mizu 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/mizu/picohttpparser/.clang-format +7 -0
- data/ext/mizu/picohttpparser/.gitattributes +1 -0
- data/ext/mizu/picohttpparser/.gitmodules +3 -0
- data/ext/mizu/picohttpparser/.travis.yml +6 -0
- data/ext/mizu/picohttpparser/Jamfile +7 -0
- data/ext/mizu/picohttpparser/Makefile +40 -0
- data/ext/mizu/picohttpparser/README.md +116 -0
- data/ext/mizu/picohttpparser/bench.c +66 -0
- data/ext/mizu/picohttpparser/picohttpparser.c +620 -0
- data/ext/mizu/picohttpparser/picohttpparser.h +89 -0
- data/ext/mizu/picohttpparser/test.c +419 -0
- metadata +12 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56ef92253b94222a789b40bd651258ad52e48da43e0452527535b043b681b7a6
|
4
|
+
data.tar.gz: ea19e559607cbdc6c2337b6b6e2ad5df0d019a1d6ec15237c64009ced8460f57
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 68588cee02aeeb9eee02f53cf827c581379d2a7177beecc890da892074a9b1a6424e2912232f20379e2ee4f34b4ad35d111cf5facfb592849b3bca4aeb0a64a5
|
7
|
+
data.tar.gz: be1177e812bd050d699450243c3b1ad013631f37294b4737846aa730271b11157fbf261e7df42794ebe3de30b701d29f410926c7e4d14163281eab77f18d203e
|
@@ -0,0 +1 @@
|
|
1
|
+
picohttpparser.* ident
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
|
3
|
+
# Shigeo Mitsunari
|
4
|
+
#
|
5
|
+
# The software is licensed under either the MIT License (below) or the Perl
|
6
|
+
# license.
|
7
|
+
#
|
8
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
|
+
# of this software and associated documentation files (the "Software"), to
|
10
|
+
# deal in the Software without restriction, including without limitation the
|
11
|
+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
12
|
+
# sell copies of the Software, and to permit persons to whom the Software is
|
13
|
+
# furnished to do so, subject to the following conditions:
|
14
|
+
#
|
15
|
+
# The above copyright notice and this permission notice shall be included in
|
16
|
+
# all copies or substantial portions of the Software.
|
17
|
+
#
|
18
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
24
|
+
# IN THE SOFTWARE.
|
25
|
+
|
26
|
+
CC?=gcc
|
27
|
+
PROVE?=prove
|
28
|
+
|
29
|
+
all:
|
30
|
+
|
31
|
+
test: test-bin
|
32
|
+
$(PROVE) -v ./test-bin
|
33
|
+
|
34
|
+
test-bin: picohttpparser.c picotest/picotest.c test.c
|
35
|
+
$(CC) -Wall $(CFLAGS) $(LDFLAGS) -o $@ $^
|
36
|
+
|
37
|
+
clean:
|
38
|
+
rm -f test-bin
|
39
|
+
|
40
|
+
.PHONY: test
|
@@ -0,0 +1,116 @@
|
|
1
|
+
PicoHTTPParser
|
2
|
+
=============
|
3
|
+
|
4
|
+
Copyright (c) 2009-2014 [Kazuho Oku](https://github.com/kazuho), [Tokuhiro Matsuno](https://github.com/tokuhirom), [Daisuke Murase](https://github.com/typester), [Shigeo Mitsunari](https://github.com/herumi)
|
5
|
+
|
6
|
+
PicoHTTPParser is a tiny, primitive, fast HTTP request/response parser.
|
7
|
+
|
8
|
+
Unlike most parsers, it is stateless and does not allocate memory by itself.
|
9
|
+
All it does is accept pointer to buffer and the output structure, and setups the pointers in the latter to point at the necessary portions of the buffer.
|
10
|
+
|
11
|
+
The code is widely deployed within Perl applications through popular modules that use it, including [Plack](https://metacpan.org/pod/Plack), [Starman](https://metacpan.org/pod/Starman), [Starlet](https://metacpan.org/pod/Starlet), [Furl](https://metacpan.org/pod/Furl). It is also the HTTP/1 parser of [H2O](https://github.com/h2o/h2o).
|
12
|
+
|
13
|
+
Check out [test.c] to find out how to use the parser.
|
14
|
+
|
15
|
+
The software is dual-licensed under the Perl License or the MIT License.
|
16
|
+
|
17
|
+
Usage
|
18
|
+
-----
|
19
|
+
|
20
|
+
The library exposes four functions: `phr_parse_request`, `phr_parse_response`, `phr_parse_headers`, `phr_decode_chunked`.
|
21
|
+
|
22
|
+
### phr_parse_request
|
23
|
+
|
24
|
+
The example below reads an HTTP request from socket `sock` using `read(2)`, parses it using `phr_parse_request`, and prints the details.
|
25
|
+
|
26
|
+
```c
|
27
|
+
char buf[4096], *method, *path;
|
28
|
+
int pret, minor_version;
|
29
|
+
struct phr_header headers[100];
|
30
|
+
size_t buflen = 0, prevbuflen = 0, method_len, path_len, num_headers;
|
31
|
+
ssize_t rret;
|
32
|
+
|
33
|
+
while (1) {
|
34
|
+
/* read the request */
|
35
|
+
while ((rret = read(sock, buf + buflen, sizeof(buf) - buflen)) == -1 && errno == EINTR)
|
36
|
+
;
|
37
|
+
if (rret <= 0)
|
38
|
+
return IOError;
|
39
|
+
prevbuflen = buflen;
|
40
|
+
buflen += rret;
|
41
|
+
/* parse the request */
|
42
|
+
num_headers = sizeof(headers) / sizeof(headers[0]);
|
43
|
+
pret = phr_parse_request(buf, buflen, &method, &method_len, &path, &path_len,
|
44
|
+
&minor_version, headers, &num_headers, prevbuflen);
|
45
|
+
if (pret > 0)
|
46
|
+
break; /* successfully parsed the request */
|
47
|
+
else if (pret == -1)
|
48
|
+
return ParseError;
|
49
|
+
/* request is incomplete, continue the loop */
|
50
|
+
assert(pret == -2);
|
51
|
+
if (buflen == sizeof(buf))
|
52
|
+
return RequestIsTooLongError;
|
53
|
+
}
|
54
|
+
|
55
|
+
printf("request is %d bytes long\n", pret);
|
56
|
+
printf("method is %.*s\n", (int)method_len, method);
|
57
|
+
printf("path is %.*s\n", (int)path_len, path);
|
58
|
+
printf("HTTP version is 1.%d\n", minor_version);
|
59
|
+
printf("headers:\n");
|
60
|
+
for (i = 0; i != num_headers; ++i) {
|
61
|
+
printf("%.*s: %.*s\n", (int)headers[i].name_len, headers[i].name,
|
62
|
+
(int)headers[i].value_len, headers[i].value);
|
63
|
+
}
|
64
|
+
```
|
65
|
+
|
66
|
+
### phr_parse_response, phr_parse_headers
|
67
|
+
|
68
|
+
`phr_parse_response` and `phr_parse_headers` provide similar interfaces as `phr_parse_request`. `phr_parse_response` parses an HTTP response, and `phr_parse_headers` parses the headers only.
|
69
|
+
|
70
|
+
### phr_decode_chunked
|
71
|
+
|
72
|
+
The example below decodes incoming data in chunked-encoding. The data is decoded in-place.
|
73
|
+
|
74
|
+
```c
|
75
|
+
struct phr_chunked_decoder decoder = {}; /* zero-clear */
|
76
|
+
char *buf = malloc(4096);
|
77
|
+
size_t size = 0, capacity = 4096, rsize;
|
78
|
+
ssize_t rret, pret;
|
79
|
+
|
80
|
+
/* set consume_trailer to 1 to discard the trailing header, or the application
|
81
|
+
* should call phr_parse_headers to parse the trailing header */
|
82
|
+
decoder.consume_trailer = 1;
|
83
|
+
|
84
|
+
do {
|
85
|
+
/* expand the buffer if necessary */
|
86
|
+
if (size == capacity) {
|
87
|
+
capacity *= 2;
|
88
|
+
buf = realloc(buf, capacity);
|
89
|
+
assert(buf != NULL);
|
90
|
+
}
|
91
|
+
/* read */
|
92
|
+
while ((rret = read(sock, buf + size, capacity - size)) == -1 && errno == EINTR)
|
93
|
+
;
|
94
|
+
if (rret <= 0)
|
95
|
+
return IOError;
|
96
|
+
/* decode */
|
97
|
+
rsize = rret;
|
98
|
+
pret = phr_decode_chunked(&decoder, buf + size, &rsize);
|
99
|
+
if (pret == -1)
|
100
|
+
return ParseError;
|
101
|
+
size += rsize;
|
102
|
+
} while (pret == -2);
|
103
|
+
|
104
|
+
/* successfully decoded the chunked data */
|
105
|
+
assert(pret >= 0);
|
106
|
+
printf("decoded data is at %p (%zu bytes)\n", buf, size);
|
107
|
+
```
|
108
|
+
|
109
|
+
Benchmark
|
110
|
+
---------
|
111
|
+
|
112
|
+
![benchmark results](http://i.gyazo.com/a85c18d3162dfb46b485bb41e0ad443a.png)
|
113
|
+
|
114
|
+
The benchmark code is from [fukamachi/fast-http@6b91103](https://github.com/fukamachi/fast-http/tree/6b9110347c7a3407310c08979aefd65078518478).
|
115
|
+
|
116
|
+
The internals of picohttpparser has been described to some extent in [my blog entry]( http://blog.kazuhooku.com/2014/11/the-internals-h2o-or-how-to-write-fast.html).
|
@@ -0,0 +1,66 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
|
3
|
+
* Shigeo Mitsunari
|
4
|
+
*
|
5
|
+
* The software is licensed under either the MIT License (below) or the Perl
|
6
|
+
* license.
|
7
|
+
*
|
8
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
|
+
* of this software and associated documentation files (the "Software"), to
|
10
|
+
* deal in the Software without restriction, including without limitation the
|
11
|
+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
12
|
+
* sell copies of the Software, and to permit persons to whom the Software is
|
13
|
+
* furnished to do so, subject to the following conditions:
|
14
|
+
*
|
15
|
+
* The above copyright notice and this permission notice shall be included in
|
16
|
+
* all copies or substantial portions of the Software.
|
17
|
+
*
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
24
|
+
* IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#include <assert.h>
|
28
|
+
#include <stdio.h>
|
29
|
+
#include "picohttpparser.h"
|
30
|
+
|
31
|
+
#define REQ \
|
32
|
+
"GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\n" \
|
33
|
+
"Host: www.kittyhell.com\r\n" \
|
34
|
+
"User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 " \
|
35
|
+
"Pathtraq/0.9\r\n" \
|
36
|
+
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" \
|
37
|
+
"Accept-Language: ja,en-us;q=0.7,en;q=0.3\r\n" \
|
38
|
+
"Accept-Encoding: gzip,deflate\r\n" \
|
39
|
+
"Accept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\n" \
|
40
|
+
"Keep-Alive: 115\r\n" \
|
41
|
+
"Connection: keep-alive\r\n" \
|
42
|
+
"Cookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; " \
|
43
|
+
"__utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; " \
|
44
|
+
"__utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n" \
|
45
|
+
"\r\n"
|
46
|
+
|
47
|
+
int main(void)
|
48
|
+
{
|
49
|
+
const char *method;
|
50
|
+
size_t method_len;
|
51
|
+
const char *path;
|
52
|
+
size_t path_len;
|
53
|
+
int minor_version;
|
54
|
+
struct phr_header headers[32];
|
55
|
+
size_t num_headers;
|
56
|
+
int i, ret;
|
57
|
+
|
58
|
+
for (i = 0; i < 10000000; i++) {
|
59
|
+
num_headers = sizeof(headers) / sizeof(headers[0]);
|
60
|
+
ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path, &path_len, &minor_version, headers, &num_headers,
|
61
|
+
0);
|
62
|
+
assert(ret == sizeof(REQ) - 1);
|
63
|
+
}
|
64
|
+
|
65
|
+
return 0;
|
66
|
+
}
|
@@ -0,0 +1,620 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
|
3
|
+
* Shigeo Mitsunari
|
4
|
+
*
|
5
|
+
* The software is licensed under either the MIT License (below) or the Perl
|
6
|
+
* license.
|
7
|
+
*
|
8
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
|
+
* of this software and associated documentation files (the "Software"), to
|
10
|
+
* deal in the Software without restriction, including without limitation the
|
11
|
+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
12
|
+
* sell copies of the Software, and to permit persons to whom the Software is
|
13
|
+
* furnished to do so, subject to the following conditions:
|
14
|
+
*
|
15
|
+
* The above copyright notice and this permission notice shall be included in
|
16
|
+
* all copies or substantial portions of the Software.
|
17
|
+
*
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
24
|
+
* IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#include <assert.h>
|
28
|
+
#include <stddef.h>
|
29
|
+
#include <string.h>
|
30
|
+
#ifdef __SSE4_2__
|
31
|
+
#ifdef _MSC_VER
|
32
|
+
#include <nmmintrin.h>
|
33
|
+
#else
|
34
|
+
#include <x86intrin.h>
|
35
|
+
#endif
|
36
|
+
#endif
|
37
|
+
#include "picohttpparser.h"
|
38
|
+
|
39
|
+
/* $Id$ */
|
40
|
+
|
41
|
+
#if __GNUC__ >= 3
|
42
|
+
#define likely(x) __builtin_expect(!!(x), 1)
|
43
|
+
#define unlikely(x) __builtin_expect(!!(x), 0)
|
44
|
+
#else
|
45
|
+
#define likely(x) (x)
|
46
|
+
#define unlikely(x) (x)
|
47
|
+
#endif
|
48
|
+
|
49
|
+
#ifdef _MSC_VER
|
50
|
+
#define ALIGNED(n) _declspec(align(n))
|
51
|
+
#else
|
52
|
+
#define ALIGNED(n) __attribute__((aligned(n)))
|
53
|
+
#endif
|
54
|
+
|
55
|
+
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
|
56
|
+
|
57
|
+
#define CHECK_EOF() \
|
58
|
+
if (buf == buf_end) { \
|
59
|
+
*ret = -2; \
|
60
|
+
return NULL; \
|
61
|
+
}
|
62
|
+
|
63
|
+
#define EXPECT_CHAR_NO_CHECK(ch) \
|
64
|
+
if (*buf++ != ch) { \
|
65
|
+
*ret = -1; \
|
66
|
+
return NULL; \
|
67
|
+
}
|
68
|
+
|
69
|
+
#define EXPECT_CHAR(ch) \
|
70
|
+
CHECK_EOF(); \
|
71
|
+
EXPECT_CHAR_NO_CHECK(ch);
|
72
|
+
|
73
|
+
#define ADVANCE_TOKEN(tok, toklen) \
|
74
|
+
do { \
|
75
|
+
const char *tok_start = buf; \
|
76
|
+
static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
|
77
|
+
int found2; \
|
78
|
+
buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
|
79
|
+
if (!found2) { \
|
80
|
+
CHECK_EOF(); \
|
81
|
+
} \
|
82
|
+
while (1) { \
|
83
|
+
if (*buf == ' ') { \
|
84
|
+
break; \
|
85
|
+
} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
|
86
|
+
if ((unsigned char)*buf < '\040' || *buf == '\177') { \
|
87
|
+
*ret = -1; \
|
88
|
+
return NULL; \
|
89
|
+
} \
|
90
|
+
} \
|
91
|
+
++buf; \
|
92
|
+
CHECK_EOF(); \
|
93
|
+
} \
|
94
|
+
tok = tok_start; \
|
95
|
+
toklen = buf - tok_start; \
|
96
|
+
} while (0)
|
97
|
+
|
98
|
+
static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
99
|
+
"\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
|
100
|
+
"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
|
101
|
+
"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
|
102
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
103
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
104
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
105
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
|
106
|
+
|
107
|
+
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
|
108
|
+
{
|
109
|
+
*found = 0;
|
110
|
+
#if __SSE4_2__
|
111
|
+
if (likely(buf_end - buf >= 16)) {
|
112
|
+
__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
|
113
|
+
|
114
|
+
size_t left = (buf_end - buf) & ~15;
|
115
|
+
do {
|
116
|
+
__m128i b16 = _mm_loadu_si128((const __m128i *)buf);
|
117
|
+
int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
|
118
|
+
if (unlikely(r != 16)) {
|
119
|
+
buf += r;
|
120
|
+
*found = 1;
|
121
|
+
break;
|
122
|
+
}
|
123
|
+
buf += 16;
|
124
|
+
left -= 16;
|
125
|
+
} while (likely(left != 0));
|
126
|
+
}
|
127
|
+
#else
|
128
|
+
/* suppress unused parameter warning */
|
129
|
+
(void)buf_end;
|
130
|
+
(void)ranges;
|
131
|
+
(void)ranges_size;
|
132
|
+
#endif
|
133
|
+
return buf;
|
134
|
+
}
|
135
|
+
|
136
|
+
static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
|
137
|
+
{
|
138
|
+
const char *token_start = buf;
|
139
|
+
|
140
|
+
#ifdef __SSE4_2__
|
141
|
+
static const char ranges1[] = "\0\010"
|
142
|
+
/* allow HT */
|
143
|
+
"\012\037"
|
144
|
+
/* allow SP and up to but not including DEL */
|
145
|
+
"\177\177"
|
146
|
+
/* allow chars w. MSB set */
|
147
|
+
;
|
148
|
+
int found;
|
149
|
+
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
|
150
|
+
if (found)
|
151
|
+
goto FOUND_CTL;
|
152
|
+
#else
|
153
|
+
/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
|
154
|
+
while (likely(buf_end - buf >= 8)) {
|
155
|
+
#define DOIT() \
|
156
|
+
do { \
|
157
|
+
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
|
158
|
+
goto NonPrintable; \
|
159
|
+
++buf; \
|
160
|
+
} while (0)
|
161
|
+
DOIT();
|
162
|
+
DOIT();
|
163
|
+
DOIT();
|
164
|
+
DOIT();
|
165
|
+
DOIT();
|
166
|
+
DOIT();
|
167
|
+
DOIT();
|
168
|
+
DOIT();
|
169
|
+
#undef DOIT
|
170
|
+
continue;
|
171
|
+
NonPrintable:
|
172
|
+
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
|
173
|
+
goto FOUND_CTL;
|
174
|
+
}
|
175
|
+
++buf;
|
176
|
+
}
|
177
|
+
#endif
|
178
|
+
for (;; ++buf) {
|
179
|
+
CHECK_EOF();
|
180
|
+
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
|
181
|
+
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
|
182
|
+
goto FOUND_CTL;
|
183
|
+
}
|
184
|
+
}
|
185
|
+
}
|
186
|
+
FOUND_CTL:
|
187
|
+
if (likely(*buf == '\015')) {
|
188
|
+
++buf;
|
189
|
+
EXPECT_CHAR('\012');
|
190
|
+
*token_len = buf - 2 - token_start;
|
191
|
+
} else if (*buf == '\012') {
|
192
|
+
*token_len = buf - token_start;
|
193
|
+
++buf;
|
194
|
+
} else {
|
195
|
+
*ret = -1;
|
196
|
+
return NULL;
|
197
|
+
}
|
198
|
+
*token = token_start;
|
199
|
+
|
200
|
+
return buf;
|
201
|
+
}
|
202
|
+
|
203
|
+
static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
|
204
|
+
{
|
205
|
+
int ret_cnt = 0;
|
206
|
+
buf = last_len < 3 ? buf : buf + last_len - 3;
|
207
|
+
|
208
|
+
while (1) {
|
209
|
+
CHECK_EOF();
|
210
|
+
if (*buf == '\015') {
|
211
|
+
++buf;
|
212
|
+
CHECK_EOF();
|
213
|
+
EXPECT_CHAR('\012');
|
214
|
+
++ret_cnt;
|
215
|
+
} else if (*buf == '\012') {
|
216
|
+
++buf;
|
217
|
+
++ret_cnt;
|
218
|
+
} else {
|
219
|
+
++buf;
|
220
|
+
ret_cnt = 0;
|
221
|
+
}
|
222
|
+
if (ret_cnt == 2) {
|
223
|
+
return buf;
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
*ret = -2;
|
228
|
+
return NULL;
|
229
|
+
}
|
230
|
+
|
231
|
+
#define PARSE_INT(valp_, mul_) \
|
232
|
+
if (*buf < '0' || '9' < *buf) { \
|
233
|
+
buf++; \
|
234
|
+
*ret = -1; \
|
235
|
+
return NULL; \
|
236
|
+
} \
|
237
|
+
*(valp_) = (mul_) * (*buf++ - '0');
|
238
|
+
|
239
|
+
#define PARSE_INT_3(valp_) \
|
240
|
+
do { \
|
241
|
+
int res_ = 0; \
|
242
|
+
PARSE_INT(&res_, 100) \
|
243
|
+
*valp_ = res_; \
|
244
|
+
PARSE_INT(&res_, 10) \
|
245
|
+
*valp_ += res_; \
|
246
|
+
PARSE_INT(&res_, 1) \
|
247
|
+
*valp_ += res_; \
|
248
|
+
} while (0)
|
249
|
+
|
250
|
+
/* returned pointer is always within [buf, buf_end), or null */
|
251
|
+
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
|
252
|
+
{
|
253
|
+
/* we want at least [HTTP/1.<two chars>] to try to parse */
|
254
|
+
if (buf_end - buf < 9) {
|
255
|
+
*ret = -2;
|
256
|
+
return NULL;
|
257
|
+
}
|
258
|
+
EXPECT_CHAR_NO_CHECK('H');
|
259
|
+
EXPECT_CHAR_NO_CHECK('T');
|
260
|
+
EXPECT_CHAR_NO_CHECK('T');
|
261
|
+
EXPECT_CHAR_NO_CHECK('P');
|
262
|
+
EXPECT_CHAR_NO_CHECK('/');
|
263
|
+
EXPECT_CHAR_NO_CHECK('1');
|
264
|
+
EXPECT_CHAR_NO_CHECK('.');
|
265
|
+
PARSE_INT(minor_version, 1);
|
266
|
+
return buf;
|
267
|
+
}
|
268
|
+
|
269
|
+
static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
|
270
|
+
size_t max_headers, int *ret)
|
271
|
+
{
|
272
|
+
for (;; ++*num_headers) {
|
273
|
+
CHECK_EOF();
|
274
|
+
if (*buf == '\015') {
|
275
|
+
++buf;
|
276
|
+
EXPECT_CHAR('\012');
|
277
|
+
break;
|
278
|
+
} else if (*buf == '\012') {
|
279
|
+
++buf;
|
280
|
+
break;
|
281
|
+
}
|
282
|
+
if (*num_headers == max_headers) {
|
283
|
+
*ret = -1;
|
284
|
+
return NULL;
|
285
|
+
}
|
286
|
+
if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
|
287
|
+
/* parsing name, but do not discard SP before colon, see
|
288
|
+
* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
|
289
|
+
headers[*num_headers].name = buf;
|
290
|
+
static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
|
291
|
+
"\"\"" /* 0x22 */
|
292
|
+
"()" /* 0x28,0x29 */
|
293
|
+
",," /* 0x2c */
|
294
|
+
"//" /* 0x2f */
|
295
|
+
":@" /* 0x3a-0x40 */
|
296
|
+
"[]" /* 0x5b-0x5d */
|
297
|
+
"{\377"; /* 0x7b-0xff */
|
298
|
+
int found;
|
299
|
+
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
|
300
|
+
if (!found) {
|
301
|
+
CHECK_EOF();
|
302
|
+
}
|
303
|
+
while (1) {
|
304
|
+
if (*buf == ':') {
|
305
|
+
break;
|
306
|
+
} else if (!token_char_map[(unsigned char)*buf]) {
|
307
|
+
*ret = -1;
|
308
|
+
return NULL;
|
309
|
+
}
|
310
|
+
++buf;
|
311
|
+
CHECK_EOF();
|
312
|
+
}
|
313
|
+
if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
|
314
|
+
*ret = -1;
|
315
|
+
return NULL;
|
316
|
+
}
|
317
|
+
++buf;
|
318
|
+
for (;; ++buf) {
|
319
|
+
CHECK_EOF();
|
320
|
+
if (!(*buf == ' ' || *buf == '\t')) {
|
321
|
+
break;
|
322
|
+
}
|
323
|
+
}
|
324
|
+
} else {
|
325
|
+
headers[*num_headers].name = NULL;
|
326
|
+
headers[*num_headers].name_len = 0;
|
327
|
+
}
|
328
|
+
if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) {
|
329
|
+
return NULL;
|
330
|
+
}
|
331
|
+
}
|
332
|
+
return buf;
|
333
|
+
}
|
334
|
+
|
335
|
+
static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
|
336
|
+
size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
|
337
|
+
size_t max_headers, int *ret)
|
338
|
+
{
|
339
|
+
/* skip first empty line (some clients add CRLF after POST content) */
|
340
|
+
CHECK_EOF();
|
341
|
+
if (*buf == '\015') {
|
342
|
+
++buf;
|
343
|
+
EXPECT_CHAR('\012');
|
344
|
+
} else if (*buf == '\012') {
|
345
|
+
++buf;
|
346
|
+
}
|
347
|
+
|
348
|
+
/* parse request line */
|
349
|
+
ADVANCE_TOKEN(*method, *method_len);
|
350
|
+
++buf;
|
351
|
+
ADVANCE_TOKEN(*path, *path_len);
|
352
|
+
++buf;
|
353
|
+
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
|
354
|
+
return NULL;
|
355
|
+
}
|
356
|
+
if (*buf == '\015') {
|
357
|
+
++buf;
|
358
|
+
EXPECT_CHAR('\012');
|
359
|
+
} else if (*buf == '\012') {
|
360
|
+
++buf;
|
361
|
+
} else {
|
362
|
+
*ret = -1;
|
363
|
+
return NULL;
|
364
|
+
}
|
365
|
+
|
366
|
+
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
367
|
+
}
|
368
|
+
|
369
|
+
int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
|
370
|
+
size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
|
371
|
+
{
|
372
|
+
const char *buf = buf_start, *buf_end = buf_start + len;
|
373
|
+
size_t max_headers = *num_headers;
|
374
|
+
int r;
|
375
|
+
|
376
|
+
*method = NULL;
|
377
|
+
*method_len = 0;
|
378
|
+
*path = NULL;
|
379
|
+
*path_len = 0;
|
380
|
+
*minor_version = -1;
|
381
|
+
*num_headers = 0;
|
382
|
+
|
383
|
+
/* if last_len != 0, check if the request is complete (a fast countermeasure
|
384
|
+
againt slowloris */
|
385
|
+
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
|
386
|
+
return r;
|
387
|
+
}
|
388
|
+
|
389
|
+
if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
|
390
|
+
&r)) == NULL) {
|
391
|
+
return r;
|
392
|
+
}
|
393
|
+
|
394
|
+
return (int)(buf - buf_start);
|
395
|
+
}
|
396
|
+
|
397
|
+
static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
|
398
|
+
size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
|
399
|
+
{
|
400
|
+
/* parse "HTTP/1.x" */
|
401
|
+
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
|
402
|
+
return NULL;
|
403
|
+
}
|
404
|
+
/* skip space */
|
405
|
+
if (*buf++ != ' ') {
|
406
|
+
*ret = -1;
|
407
|
+
return NULL;
|
408
|
+
}
|
409
|
+
/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
|
410
|
+
if (buf_end - buf < 4) {
|
411
|
+
*ret = -2;
|
412
|
+
return NULL;
|
413
|
+
}
|
414
|
+
PARSE_INT_3(status);
|
415
|
+
|
416
|
+
/* skip space */
|
417
|
+
if (*buf++ != ' ') {
|
418
|
+
*ret = -1;
|
419
|
+
return NULL;
|
420
|
+
}
|
421
|
+
/* get message */
|
422
|
+
if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
|
423
|
+
return NULL;
|
424
|
+
}
|
425
|
+
|
426
|
+
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
427
|
+
}
|
428
|
+
|
429
|
+
int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
|
430
|
+
struct phr_header *headers, size_t *num_headers, size_t last_len)
|
431
|
+
{
|
432
|
+
const char *buf = buf_start, *buf_end = buf + len;
|
433
|
+
size_t max_headers = *num_headers;
|
434
|
+
int r;
|
435
|
+
|
436
|
+
*minor_version = -1;
|
437
|
+
*status = 0;
|
438
|
+
*msg = NULL;
|
439
|
+
*msg_len = 0;
|
440
|
+
*num_headers = 0;
|
441
|
+
|
442
|
+
/* if last_len != 0, check if the response is complete (a fast countermeasure
|
443
|
+
against slowloris */
|
444
|
+
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
|
445
|
+
return r;
|
446
|
+
}
|
447
|
+
|
448
|
+
if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
|
449
|
+
return r;
|
450
|
+
}
|
451
|
+
|
452
|
+
return (int)(buf - buf_start);
|
453
|
+
}
|
454
|
+
|
455
|
+
int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
|
456
|
+
{
|
457
|
+
const char *buf = buf_start, *buf_end = buf + len;
|
458
|
+
size_t max_headers = *num_headers;
|
459
|
+
int r;
|
460
|
+
|
461
|
+
*num_headers = 0;
|
462
|
+
|
463
|
+
/* if last_len != 0, check if the response is complete (a fast countermeasure
|
464
|
+
against slowloris */
|
465
|
+
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
|
466
|
+
return r;
|
467
|
+
}
|
468
|
+
|
469
|
+
if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
|
470
|
+
return r;
|
471
|
+
}
|
472
|
+
|
473
|
+
return (int)(buf - buf_start);
|
474
|
+
}
|
475
|
+
|
476
|
+
enum {
|
477
|
+
CHUNKED_IN_CHUNK_SIZE,
|
478
|
+
CHUNKED_IN_CHUNK_EXT,
|
479
|
+
CHUNKED_IN_CHUNK_DATA,
|
480
|
+
CHUNKED_IN_CHUNK_CRLF,
|
481
|
+
CHUNKED_IN_TRAILERS_LINE_HEAD,
|
482
|
+
CHUNKED_IN_TRAILERS_LINE_MIDDLE
|
483
|
+
};
|
484
|
+
|
485
|
+
static int decode_hex(int ch)
|
486
|
+
{
|
487
|
+
if ('0' <= ch && ch <= '9') {
|
488
|
+
return ch - '0';
|
489
|
+
} else if ('A' <= ch && ch <= 'F') {
|
490
|
+
return ch - 'A' + 0xa;
|
491
|
+
} else if ('a' <= ch && ch <= 'f') {
|
492
|
+
return ch - 'a' + 0xa;
|
493
|
+
} else {
|
494
|
+
return -1;
|
495
|
+
}
|
496
|
+
}
|
497
|
+
|
498
|
+
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
|
499
|
+
{
|
500
|
+
size_t dst = 0, src = 0, bufsz = *_bufsz;
|
501
|
+
ssize_t ret = -2; /* incomplete */
|
502
|
+
|
503
|
+
while (1) {
|
504
|
+
switch (decoder->_state) {
|
505
|
+
case CHUNKED_IN_CHUNK_SIZE:
|
506
|
+
for (;; ++src) {
|
507
|
+
int v;
|
508
|
+
if (src == bufsz)
|
509
|
+
goto Exit;
|
510
|
+
if ((v = decode_hex(buf[src])) == -1) {
|
511
|
+
if (decoder->_hex_count == 0) {
|
512
|
+
ret = -1;
|
513
|
+
goto Exit;
|
514
|
+
}
|
515
|
+
break;
|
516
|
+
}
|
517
|
+
if (decoder->_hex_count == sizeof(size_t) * 2) {
|
518
|
+
ret = -1;
|
519
|
+
goto Exit;
|
520
|
+
}
|
521
|
+
decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
|
522
|
+
++decoder->_hex_count;
|
523
|
+
}
|
524
|
+
decoder->_hex_count = 0;
|
525
|
+
decoder->_state = CHUNKED_IN_CHUNK_EXT;
|
526
|
+
/* fallthru */
|
527
|
+
case CHUNKED_IN_CHUNK_EXT:
|
528
|
+
/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
|
529
|
+
for (;; ++src) {
|
530
|
+
if (src == bufsz)
|
531
|
+
goto Exit;
|
532
|
+
if (buf[src] == '\012')
|
533
|
+
break;
|
534
|
+
}
|
535
|
+
++src;
|
536
|
+
if (decoder->bytes_left_in_chunk == 0) {
|
537
|
+
if (decoder->consume_trailer) {
|
538
|
+
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
|
539
|
+
break;
|
540
|
+
} else {
|
541
|
+
goto Complete;
|
542
|
+
}
|
543
|
+
}
|
544
|
+
decoder->_state = CHUNKED_IN_CHUNK_DATA;
|
545
|
+
/* fallthru */
|
546
|
+
case CHUNKED_IN_CHUNK_DATA: {
|
547
|
+
size_t avail = bufsz - src;
|
548
|
+
if (avail < decoder->bytes_left_in_chunk) {
|
549
|
+
if (dst != src)
|
550
|
+
memmove(buf + dst, buf + src, avail);
|
551
|
+
src += avail;
|
552
|
+
dst += avail;
|
553
|
+
decoder->bytes_left_in_chunk -= avail;
|
554
|
+
goto Exit;
|
555
|
+
}
|
556
|
+
if (dst != src)
|
557
|
+
memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
|
558
|
+
src += decoder->bytes_left_in_chunk;
|
559
|
+
dst += decoder->bytes_left_in_chunk;
|
560
|
+
decoder->bytes_left_in_chunk = 0;
|
561
|
+
decoder->_state = CHUNKED_IN_CHUNK_CRLF;
|
562
|
+
}
|
563
|
+
/* fallthru */
|
564
|
+
case CHUNKED_IN_CHUNK_CRLF:
|
565
|
+
for (;; ++src) {
|
566
|
+
if (src == bufsz)
|
567
|
+
goto Exit;
|
568
|
+
if (buf[src] != '\015')
|
569
|
+
break;
|
570
|
+
}
|
571
|
+
if (buf[src] != '\012') {
|
572
|
+
ret = -1;
|
573
|
+
goto Exit;
|
574
|
+
}
|
575
|
+
++src;
|
576
|
+
decoder->_state = CHUNKED_IN_CHUNK_SIZE;
|
577
|
+
break;
|
578
|
+
case CHUNKED_IN_TRAILERS_LINE_HEAD:
|
579
|
+
for (;; ++src) {
|
580
|
+
if (src == bufsz)
|
581
|
+
goto Exit;
|
582
|
+
if (buf[src] != '\015')
|
583
|
+
break;
|
584
|
+
}
|
585
|
+
if (buf[src++] == '\012')
|
586
|
+
goto Complete;
|
587
|
+
decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
|
588
|
+
/* fallthru */
|
589
|
+
case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
|
590
|
+
for (;; ++src) {
|
591
|
+
if (src == bufsz)
|
592
|
+
goto Exit;
|
593
|
+
if (buf[src] == '\012')
|
594
|
+
break;
|
595
|
+
}
|
596
|
+
++src;
|
597
|
+
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
|
598
|
+
break;
|
599
|
+
default:
|
600
|
+
assert(!"decoder is corrupt");
|
601
|
+
}
|
602
|
+
}
|
603
|
+
|
604
|
+
Complete:
|
605
|
+
ret = bufsz - src;
|
606
|
+
Exit:
|
607
|
+
if (dst != src)
|
608
|
+
memmove(buf + dst, buf + src, bufsz - src);
|
609
|
+
*_bufsz = dst;
|
610
|
+
return ret;
|
611
|
+
}
|
612
|
+
|
613
|
+
int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
|
614
|
+
{
|
615
|
+
return decoder->_state == CHUNKED_IN_CHUNK_DATA;
|
616
|
+
}
|
617
|
+
|
618
|
+
#undef CHECK_EOF
|
619
|
+
#undef EXPECT_CHAR
|
620
|
+
#undef ADVANCE_TOKEN
|