rhebok 0.8.6 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1dbfe9f10796f8d8851faa4f2a642d8e462fed15
4
- data.tar.gz: 865e2b6464856104ad2cba761e2a82c1967a30f6
3
+ metadata.gz: 7b58143758b183d03af523fa633a56783e030abd
4
+ data.tar.gz: 6a44fcfad56efa0fbdf28a595f552bec9dda9842
5
5
  SHA512:
6
- metadata.gz: 1bdea8576b3aff0eaec288a7e8613e47d8e7efa7a1b54a55f5e84eb5148e9781896b90b559c4e117104d4ea795a36e80a09e7f23c315ff0188fa845f43717216
7
- data.tar.gz: a7973f26793013d7c208b6578076738102df80e42e8eaf5c8b6a379416fbb10247564de242f6986de9d749e50332df615d46fb8818b78aaf437dbf62065d9a20
6
+ metadata.gz: ec40342c239b048255213965936191a83b9d4c41a6883783ac49ac9c544d0de677917315651441ce1b3abaaa7ca0d04f37f5642292a12a3311748baa213b09b3
7
+ data.tar.gz: 5855c448e72d86aa99fc13eff0a25cf8e3fc70502ce27d44457134715f4d67e3b6be27ca87b356200ef199070fafe3b29f45b82c15ce59e0cfb0df9a1feab654
data/Changes CHANGED
@@ -1,3 +1,8 @@
1
+ 0.9.0 2015-11-13T23:44:13
2
+
3
+ - safe graceful shutdown
4
+ - fix iovcnt calculation
5
+
1
6
  0.8.6 2015-01-26T14:53:06Z
2
7
 
3
8
  - add some chunked transfer test. refactor around sending chunk
@@ -0,0 +1,6 @@
1
+ # requires clang-format >= 3.6
2
+ BasedOnStyle: "LLVM"
3
+ IndentWidth: 4
4
+ ColumnLimit: 132
5
+ BreakBeforeBraces: Linux
6
+ AllowShortFunctionsOnASingleLine: None
@@ -0,0 +1,7 @@
1
+ project picohttpparser ;
2
+
3
+ lib picohttpparser : picohttpparser.c ;
4
+
5
+ unit-test test
6
+ : picohttpparser picotest/picotest.c test.c
7
+ : <testing.launcher>prove ;
@@ -14,6 +14,98 @@ Check out [test.c] to find out how to use the parser.
14
14
 
15
15
  The software is dual-licensed under the Perl License or the MIT License.
16
16
 
17
+ Usage
18
+ -----
19
+
20
+ The library exposes four functions: `phr_parse_request`, `phr_parse_response`, `phr_parse_headers`, `phr_decode_chunked`.
21
+
22
+ ### phr_parse_request
23
+
24
+ The example below reads an HTTP request from socket `sock` using `read(2)`, parses it using `phr_parse_request`, and prints the details.
25
+
26
+ ```
27
+ char buf[4096], *method, *path;
28
+ int pret, minor_version;
29
+ struct phr_header headers[100];
30
+ size_t buflen = 0, prevbuflen = 0, method_len, path_len, num_headers;
31
+ ssize_t rret;
32
+
33
+ while (1) {
34
+ /* read the request */
35
+ while ((rret = read(sock, buf + buflen, sizeof(buf) - buflen)) == -1 && errno == EINTR)
36
+ ;
37
+ if (rret <= 0)
38
+ return IOError;
39
+ prevbuflen = buflen;
40
+ buflen += rret;
41
+ /* parse the request */
42
+ num_headers = sizeof(headers) / sizeof(headers[0]);
43
+ pret = phr_parse_request(buf, buflen, &method, &method_len, &path, &path_len,
44
+ &minor_version, headers, &num_headers, prevbuflen);
45
+ if (pret > 0)
46
+ break; /* successfully parsed the request */
47
+ else if (pret == -1)
48
+ return ParseError;
49
+ /* request is incomplete, continue the loop */
50
+ assert(pret == -2);
51
+ if (buflen == sizeof(buf))
52
+ return RequestIsTooLongError;
53
+ }
54
+
55
+ printf("request is %d bytes long\n", pret);
56
+ printf("method is %.*s\n", (int)method_len, method);
57
+ printf("path is %.*s\n", (int)path_len, path);
58
+ printf("HTTP version is 1.%d\n", minor_version);
59
+ printf("headers:\n");
60
+ for (i = 0; i != num_headers; ++i) {
61
+ printf("%.*s: %.*s\n", (int)headers[i].name_len, headers[i].name,
62
+ (int)headers[i].value_len, headers[i].value);
63
+ }
64
+ ```
65
+
66
+ ### phr_parse_response, phr_parse_headers
67
+
68
+ `phr_parse_response` and `phr_parse_headers` provide similar interfaces as `phr_parse_request`. `phr_parse_response` parses an HTTP response, and `phr_parse_headers` parses the headers only.
69
+
70
+ ### phr_decode_chunked
71
+
72
+ The example below decodes incoming data in chunked-encoding. The data is decoded in-place.
73
+
74
+ ```
75
+ struct phr_chunked_decoder decoder = {}; /* zero-clear */
76
+ char *buf = malloc(4096);
77
+ size_t size = 0, capacity = 4096, rsize;
78
+ ssize_t rret, pret;
79
+
80
+ /* set consume_trailer to 1 to discard the trailing header, or the application
81
+ * should call phr_parse_headers to parse the trailing header */
82
+ decoder.consume_trailer = 1;
83
+
84
+ do {
85
+ /* expand the buffer if necessary */
86
+ if (size == capacity) {
87
+ capacity *= 2;
88
+ buf = realloc(buf, capacity);
89
+ assert(buf != NULL);
90
+ }
91
+ /* read */
92
+ while ((rret = read(sock, buf + size, capacity - size)) == -1 && errno == EINTR)
93
+ ;
94
+ if (rret <= 0)
95
+ return IOError;
96
+ /* decode */
97
+ rsize = rret;
98
+ pret = phr_decode_chunked(&decoder, buf + size, &rsize);
99
+ if (pret == -1)
100
+ return ParseError;
101
+ size += rsize;
102
+ } while (pret == -2);
103
+
104
+ /* successfully decoded the chunked data */
105
+ assert(pret >= 0);
106
+ printf("decoded data is at %p (%zu bytes)\n", buf, size);
107
+ ```
108
+
17
109
  Benchmark
18
110
  ---------
19
111
 
@@ -28,26 +28,39 @@
28
28
  #include <stdio.h>
29
29
  #include "picohttpparser.h"
30
30
 
31
- #define REQ "GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\nHost: www.kittyhell.com\r\nUser-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 Pathtraq/0.9\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: ja,en-us;q=0.7,en;q=0.3\r\nAccept-Encoding: gzip,deflate\r\nAccept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\nKeep-Alive: 115\r\nConnection: keep-alive\r\nCookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; __utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; __utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n\r\n"
31
+ #define REQ \
32
+ "GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\n" \
33
+ "Host: www.kittyhell.com\r\n" \
34
+ "User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 " \
35
+ "Pathtraq/0.9\r\n" \
36
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" \
37
+ "Accept-Language: ja,en-us;q=0.7,en;q=0.3\r\n" \
38
+ "Accept-Encoding: gzip,deflate\r\n" \
39
+ "Accept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\n" \
40
+ "Keep-Alive: 115\r\n" \
41
+ "Connection: keep-alive\r\n" \
42
+ "Cookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; " \
43
+ "__utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; " \
44
+ "__utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n" \
45
+ "\r\n"
32
46
 
33
47
  int main(void)
34
48
  {
35
- const char* method;
36
- size_t method_len;
37
- const char* path;
38
- size_t path_len;
39
- int minor_version;
40
- struct phr_header headers[32];
41
- size_t num_headers;
42
- int i, ret;
43
-
44
- for (i = 0; i < 10000000; i++) {
45
- num_headers = sizeof(headers) / sizeof(headers[0]);
46
- ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path,
47
- &path_len, &minor_version, headers, &num_headers,
48
- 0);
49
- assert(ret == sizeof(REQ) - 1);
50
- }
51
-
52
- return 0;
49
+ const char *method;
50
+ size_t method_len;
51
+ const char *path;
52
+ size_t path_len;
53
+ int minor_version;
54
+ struct phr_header headers[32];
55
+ size_t num_headers;
56
+ int i, ret;
57
+
58
+ for (i = 0; i < 10000000; i++) {
59
+ num_headers = sizeof(headers) / sizeof(headers[0]);
60
+ ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path, &path_len, &minor_version, headers, &num_headers,
61
+ 0);
62
+ assert(ret == sizeof(REQ) - 1);
63
+ }
64
+
65
+ return 0;
53
66
  }
@@ -24,409 +24,576 @@
24
24
  * IN THE SOFTWARE.
25
25
  */
26
26
 
27
+ #include <assert.h>
27
28
  #include <stddef.h>
29
+ #include <string.h>
28
30
  #ifdef __SSE4_2__
29
- # include <x86intrin.h>
31
+ #ifdef _MSC_VER
32
+ #include <nmmintrin.h>
33
+ #else
34
+ #include <x86intrin.h>
35
+ #endif
30
36
  #endif
31
37
  #include "picohttpparser.h"
32
38
 
33
39
  /* $Id$ */
34
40
 
35
41
  #if __GNUC__ >= 3
36
- # define likely(x) __builtin_expect(!!(x), 1)
37
- # define unlikely(x) __builtin_expect(!!(x), 0)
42
+ #define likely(x) __builtin_expect(!!(x), 1)
43
+ #define unlikely(x) __builtin_expect(!!(x), 0)
44
+ #else
45
+ #define likely(x) (x)
46
+ #define unlikely(x) (x)
47
+ #endif
48
+
49
+ #ifdef _MSC_VER
50
+ #define ALIGNED(n) _declspec(align(n))
38
51
  #else
39
- # define likely(x) (x)
40
- # define unlikely(x) (x)
52
+ #define ALIGNED(n) __attribute__((aligned(n)))
41
53
  #endif
42
54
 
43
- #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c) - 040u < 0137u)
44
-
45
- #define CHECK_EOF() \
46
- if (buf == buf_end) { \
47
- *ret = -2; \
48
- return NULL; \
49
- }
50
-
51
- #define EXPECT_CHAR(ch) \
52
- CHECK_EOF(); \
53
- if (*buf++ != ch) { \
54
- *ret = -1; \
55
- return NULL; \
56
- }
57
-
58
- #define ADVANCE_TOKEN(tok, toklen) do { \
59
- const char* tok_start = buf; \
60
- static const char ranges2[] __attribute__((aligned(16))) = "\000\040\177\177"; \
61
- int found2; \
62
- buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
63
- if (! found2) { \
64
- CHECK_EOF(); \
65
- } \
66
- while (1) { \
67
- if (*buf == ' ') { \
68
- break; \
69
- } else if (unlikely(! IS_PRINTABLE_ASCII(*buf))) { \
70
- if ((unsigned char)*buf < '\040' || *buf == '\177') { \
71
- *ret = -1; \
72
- return NULL; \
73
- } \
74
- } \
75
- ++buf; \
76
- CHECK_EOF(); \
77
- } \
78
- tok = tok_start; \
79
- toklen = buf - tok_start; \
80
- } while (0)
81
-
82
- static const char* token_char_map =
83
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
84
- "\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
85
- "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
86
- "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
87
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
88
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
89
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
90
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
91
-
92
- static const char* findchar_fast(const char* buf, const char* buf_end, const char *ranges, size_t ranges_size, int* found)
55
+ #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
56
+
57
+ #define CHECK_EOF() \
58
+ if (buf == buf_end) { \
59
+ *ret = -2; \
60
+ return NULL; \
61
+ }
62
+
63
+ #define EXPECT_CHAR(ch) \
64
+ CHECK_EOF(); \
65
+ if (*buf++ != ch) { \
66
+ *ret = -1; \
67
+ return NULL; \
68
+ }
69
+
70
+ #define ADVANCE_TOKEN(tok, toklen) \
71
+ do { \
72
+ const char *tok_start = buf; \
73
+ static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
74
+ int found2; \
75
+ buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
76
+ if (!found2) { \
77
+ CHECK_EOF(); \
78
+ } \
79
+ while (1) { \
80
+ if (*buf == ' ') { \
81
+ break; \
82
+ } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
83
+ if ((unsigned char)*buf < '\040' || *buf == '\177') { \
84
+ *ret = -1; \
85
+ return NULL; \
86
+ } \
87
+ } \
88
+ ++buf; \
89
+ CHECK_EOF(); \
90
+ } \
91
+ tok = tok_start; \
92
+ toklen = buf - tok_start; \
93
+ } while (0)
94
+
95
+ static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
96
+ "\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
97
+ "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
98
+ "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
99
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
100
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
103
+
104
+ static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
93
105
  {
94
- *found = 0;
106
+ *found = 0;
95
107
  #if __SSE4_2__
96
- if (likely(buf_end - buf >= 16)) {
97
- __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
98
-
99
- size_t left = (buf_end - buf) & ~15;
100
- do {
101
- __m128i b16 = _mm_loadu_si128((void*)buf);
102
- int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
103
- if (unlikely(r != 16)) {
104
- buf += r;
105
- *found = 1;
106
- break;
107
- }
108
- buf += 16;
109
- left -= 16;
110
- } while (likely(left != 0));
111
- }
108
+ if (likely(buf_end - buf >= 16)) {
109
+ __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
110
+
111
+ size_t left = (buf_end - buf) & ~15;
112
+ do {
113
+ __m128i b16 = _mm_loadu_si128((void *)buf);
114
+ int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
115
+ if (unlikely(r != 16)) {
116
+ buf += r;
117
+ *found = 1;
118
+ break;
119
+ }
120
+ buf += 16;
121
+ left -= 16;
122
+ } while (likely(left != 0));
123
+ }
124
+ #else
125
+ /* suppress unused parameter warning */
126
+ (void)buf_end;
127
+ (void)ranges;
128
+ (void)ranges_size;
112
129
  #endif
113
- return buf;
130
+ return buf;
114
131
  }
115
132
 
116
- static const char* get_token_to_eol(const char* buf, const char* buf_end,
117
- const char** token, size_t* token_len,
118
- int* ret)
133
+ static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
119
134
  {
120
- const char* token_start = buf;
121
-
135
+ const char *token_start = buf;
136
+
122
137
  #ifdef __SSE4_2__
123
- static const char ranges1[] =
124
- "\0\010"
125
- /* allow HT */
126
- "\012\037"
127
- /* allow SP and up to but not including DEL */
128
- "\177\177"
129
- /* allow chars w. MSB set */
130
- ;
131
- int found;
132
- buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
133
- if (found)
134
- goto FOUND_CTL;
138
+ static const char ranges1[] = "\0\010"
139
+ /* allow HT */
140
+ "\012\037"
141
+ /* allow SP and up to but not including DEL */
142
+ "\177\177"
143
+ /* allow chars w. MSB set */
144
+ ;
145
+ int found;
146
+ buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
147
+ if (found)
148
+ goto FOUND_CTL;
135
149
  #else
136
- /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
137
- while (likely(buf_end - buf >= 8)) {
138
- #define DOIT() if (unlikely(! IS_PRINTABLE_ASCII(*buf))) goto NonPrintable; ++buf
139
- DOIT(); DOIT(); DOIT(); DOIT();
140
- DOIT(); DOIT(); DOIT(); DOIT();
150
+ /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
151
+ while (likely(buf_end - buf >= 8)) {
152
+ #define DOIT() \
153
+ do { \
154
+ if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
155
+ goto NonPrintable; \
156
+ ++buf; \
157
+ } while (0)
158
+ DOIT();
159
+ DOIT();
160
+ DOIT();
161
+ DOIT();
162
+ DOIT();
163
+ DOIT();
164
+ DOIT();
165
+ DOIT();
141
166
  #undef DOIT
142
- continue;
143
- NonPrintable:
144
- if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
145
- goto FOUND_CTL;
167
+ continue;
168
+ NonPrintable:
169
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
170
+ goto FOUND_CTL;
171
+ }
172
+ ++buf;
146
173
  }
147
- ++buf;
148
- }
149
174
  #endif
150
- for (; ; ++buf) {
151
- CHECK_EOF();
152
- if (unlikely(! IS_PRINTABLE_ASCII(*buf))) {
153
- if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
154
- goto FOUND_CTL;
155
- }
175
+ for (;; ++buf) {
176
+ CHECK_EOF();
177
+ if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
178
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
179
+ goto FOUND_CTL;
180
+ }
181
+ }
156
182
  }
157
- }
158
- FOUND_CTL:
159
- if (likely(*buf == '\015')) {
160
- ++buf;
161
- EXPECT_CHAR('\012');
162
- *token_len = buf - 2 - token_start;
163
- } else if (*buf == '\012') {
164
- *token_len = buf - token_start;
165
- ++buf;
166
- } else {
167
- *ret = -1;
168
- return NULL;
169
- }
170
- *token = token_start;
171
-
172
- return buf;
173
- }
174
-
175
- static const char* is_complete(const char* buf, const char* buf_end,
176
- size_t last_len, int* ret)
177
- {
178
- int ret_cnt = 0;
179
- buf = last_len < 3 ? buf : buf + last_len - 3;
180
-
181
- while (1) {
182
- CHECK_EOF();
183
- if (*buf == '\015') {
184
- ++buf;
185
- CHECK_EOF();
186
- EXPECT_CHAR('\012');
187
- ++ret_cnt;
183
+ FOUND_CTL:
184
+ if (likely(*buf == '\015')) {
185
+ ++buf;
186
+ EXPECT_CHAR('\012');
187
+ *token_len = buf - 2 - token_start;
188
188
  } else if (*buf == '\012') {
189
- ++buf;
190
- ++ret_cnt;
189
+ *token_len = buf - token_start;
190
+ ++buf;
191
191
  } else {
192
- ++buf;
193
- ret_cnt = 0;
192
+ *ret = -1;
193
+ return NULL;
194
194
  }
195
- if (ret_cnt == 2) {
196
- return buf;
195
+ *token = token_start;
196
+
197
+ return buf;
198
+ }
199
+
200
+ static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
201
+ {
202
+ int ret_cnt = 0;
203
+ buf = last_len < 3 ? buf : buf + last_len - 3;
204
+
205
+ while (1) {
206
+ CHECK_EOF();
207
+ if (*buf == '\015') {
208
+ ++buf;
209
+ CHECK_EOF();
210
+ EXPECT_CHAR('\012');
211
+ ++ret_cnt;
212
+ } else if (*buf == '\012') {
213
+ ++buf;
214
+ ++ret_cnt;
215
+ } else {
216
+ ++buf;
217
+ ret_cnt = 0;
218
+ }
219
+ if (ret_cnt == 2) {
220
+ return buf;
221
+ }
197
222
  }
198
- }
199
-
200
- *ret = -2;
201
- return NULL;
223
+
224
+ *ret = -2;
225
+ return NULL;
202
226
  }
203
227
 
204
228
  /* *_buf is always within [buf, buf_end) upon success */
205
- static const char* parse_int(const char* buf, const char* buf_end, int* value,
206
- int* ret)
229
+ static const char *parse_int(const char *buf, const char *buf_end, int *value, int *ret)
207
230
  {
208
- int v;
209
- CHECK_EOF();
210
- if (! ('0' <= *buf && *buf <= '9')) {
211
- *ret = -1;
212
- return NULL;
213
- }
214
- v = 0;
215
- for (; ; ++buf) {
231
+ int v;
216
232
  CHECK_EOF();
217
- if ('0' <= *buf && *buf <= '9') {
218
- v = v * 10 + *buf - '0';
219
- } else {
220
- break;
233
+ if (!('0' <= *buf && *buf <= '9')) {
234
+ *ret = -1;
235
+ return NULL;
236
+ }
237
+ v = 0;
238
+ for (;; ++buf) {
239
+ CHECK_EOF();
240
+ if ('0' <= *buf && *buf <= '9') {
241
+ v = v * 10 + *buf - '0';
242
+ } else {
243
+ break;
244
+ }
221
245
  }
222
- }
223
-
224
- *value = v;
225
- return buf;
246
+
247
+ *value = v;
248
+ return buf;
226
249
  }
227
250
 
228
251
  /* returned pointer is always within [buf, buf_end), or null */
229
- static const char* parse_http_version(const char* buf, const char* buf_end,
230
- int* minor_version, int* ret)
252
+ static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
231
253
  {
232
- EXPECT_CHAR('H'); EXPECT_CHAR('T'); EXPECT_CHAR('T'); EXPECT_CHAR('P');
233
- EXPECT_CHAR('/'); EXPECT_CHAR('1'); EXPECT_CHAR('.');
234
- return parse_int(buf, buf_end, minor_version, ret);
254
+ EXPECT_CHAR('H');
255
+ EXPECT_CHAR('T');
256
+ EXPECT_CHAR('T');
257
+ EXPECT_CHAR('P');
258
+ EXPECT_CHAR('/');
259
+ EXPECT_CHAR('1');
260
+ EXPECT_CHAR('.');
261
+ return parse_int(buf, buf_end, minor_version, ret);
235
262
  }
236
263
 
237
- static const char* parse_headers(const char* buf, const char* buf_end,
238
- struct phr_header* headers,
239
- size_t* num_headers, size_t max_headers,
240
- int* ret)
264
+ static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
265
+ size_t max_headers, int *ret)
241
266
  {
242
- for (; ; ++*num_headers) {
267
+ for (;; ++*num_headers) {
268
+ CHECK_EOF();
269
+ if (*buf == '\015') {
270
+ ++buf;
271
+ EXPECT_CHAR('\012');
272
+ break;
273
+ } else if (*buf == '\012') {
274
+ ++buf;
275
+ break;
276
+ }
277
+ if (*num_headers == max_headers) {
278
+ *ret = -1;
279
+ return NULL;
280
+ }
281
+ if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
282
+ static const char ALIGNED(16) ranges1[] = "::\x00\037";
283
+ int found;
284
+ if (!token_char_map[(unsigned char)*buf]) {
285
+ *ret = -1;
286
+ return NULL;
287
+ }
288
+ /* parsing name, but do not discard SP before colon, see
289
+ * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
290
+ headers[*num_headers].name = buf;
291
+ buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
292
+ if (!found) {
293
+ CHECK_EOF();
294
+ }
295
+ while (1) {
296
+ if (*buf == ':') {
297
+ break;
298
+ } else if (*buf < ' ') {
299
+ *ret = -1;
300
+ return NULL;
301
+ }
302
+ ++buf;
303
+ CHECK_EOF();
304
+ }
305
+ headers[*num_headers].name_len = buf - headers[*num_headers].name;
306
+ ++buf;
307
+ for (;; ++buf) {
308
+ CHECK_EOF();
309
+ if (!(*buf == ' ' || *buf == '\t')) {
310
+ break;
311
+ }
312
+ }
313
+ } else {
314
+ headers[*num_headers].name = NULL;
315
+ headers[*num_headers].name_len = 0;
316
+ }
317
+ if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) {
318
+ return NULL;
319
+ }
320
+ }
321
+ return buf;
322
+ }
323
+
324
+ static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
325
+ size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
326
+ size_t max_headers, int *ret)
327
+ {
328
+ /* skip first empty line (some clients add CRLF after POST content) */
243
329
  CHECK_EOF();
244
330
  if (*buf == '\015') {
245
- ++buf;
246
- EXPECT_CHAR('\012');
247
- break;
331
+ ++buf;
332
+ EXPECT_CHAR('\012');
248
333
  } else if (*buf == '\012') {
249
- ++buf;
250
- break;
251
- }
252
- if (*num_headers == max_headers) {
253
- *ret = -1;
254
- return NULL;
334
+ ++buf;
255
335
  }
256
- if (! (*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
257
- if (! token_char_map[(unsigned char)*buf]) {
258
- *ret = -1;
336
+
337
+ /* parse request line */
338
+ ADVANCE_TOKEN(*method, *method_len);
339
+ ++buf;
340
+ ADVANCE_TOKEN(*path, *path_len);
341
+ ++buf;
342
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
259
343
  return NULL;
260
- }
261
- /* parsing name, but do not discard SP before colon, see
262
- * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
263
- headers[*num_headers].name = buf;
264
- static const char ranges1[] __attribute__((aligned(16))) = "::\x00\037";
265
- int found;
266
- buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
267
- if (! found) {
268
- CHECK_EOF();
269
- }
270
- while (1) {
271
- if (*buf == ':') {
272
- break;
273
- } else if (*buf < ' ') {
274
- *ret = -1;
275
- return NULL;
276
- }
344
+ }
345
+ if (*buf == '\015') {
346
+ ++buf;
347
+ EXPECT_CHAR('\012');
348
+ } else if (*buf == '\012') {
277
349
  ++buf;
278
- CHECK_EOF();
279
- }
280
- headers[*num_headers].name_len = buf - headers[*num_headers].name;
281
- ++buf;
282
- for (; ; ++buf) {
283
- CHECK_EOF();
284
- if (! (*buf == ' ' || *buf == '\t')) {
285
- break;
286
- }
287
- }
288
350
  } else {
289
- headers[*num_headers].name = NULL;
290
- headers[*num_headers].name_len = 0;
351
+ *ret = -1;
352
+ return NULL;
353
+ }
354
+
355
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
356
+ }
357
+
358
+ int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
359
+ size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
360
+ {
361
+ const char *buf = buf_start, *buf_end = buf_start + len;
362
+ size_t max_headers = *num_headers;
363
+ int r;
364
+
365
+ *method = NULL;
366
+ *method_len = 0;
367
+ *path = NULL;
368
+ *path_len = 0;
369
+ *minor_version = -1;
370
+ *num_headers = 0;
371
+
372
+ /* if last_len != 0, check if the request is complete (a fast countermeasure
373
+ againt slowloris */
374
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
375
+ return r;
291
376
  }
292
- if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value,
293
- &headers[*num_headers].value_len, ret))
294
- == NULL) {
295
- return NULL;
377
+
378
+ if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
379
+ &r)) == NULL) {
380
+ return r;
296
381
  }
297
- }
298
- return buf;
382
+
383
+ return (int)(buf - buf_start);
299
384
  }
300
385
 
301
- const char* parse_request(const char* buf, const char* buf_end,
302
- const char** method, size_t* method_len,
303
- const char** path, size_t* path_len,
304
- int* minor_version, struct phr_header* headers,
305
- size_t* num_headers, size_t max_headers, int* ret)
386
+ static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
387
+ size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
306
388
  {
307
- /* skip first empty line (some clients add CRLF after POST content) */
308
- CHECK_EOF();
309
- if (*buf == '\015') {
310
- ++buf;
311
- EXPECT_CHAR('\012');
312
- } else if (*buf == '\012') {
313
- ++buf;
314
- }
315
-
316
- /* parse request line */
317
- ADVANCE_TOKEN(*method, *method_len);
318
- ++buf;
319
- ADVANCE_TOKEN(*path, *path_len);
320
- ++buf;
321
- if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
322
- return NULL;
323
- }
324
- if (*buf == '\015') {
325
- ++buf;
326
- EXPECT_CHAR('\012');
327
- } else if (*buf == '\012') {
328
- ++buf;
329
- } else {
330
- *ret = -1;
331
- return NULL;
332
- }
333
-
334
- return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
389
+ /* parse "HTTP/1.x" */
390
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
391
+ return NULL;
392
+ }
393
+ /* skip space */
394
+ if (*buf++ != ' ') {
395
+ *ret = -1;
396
+ return NULL;
397
+ }
398
+ /* parse status code */
399
+ if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
400
+ return NULL;
401
+ }
402
+ /* skip space */
403
+ if (*buf++ != ' ') {
404
+ *ret = -1;
405
+ return NULL;
406
+ }
407
+ /* get message */
408
+ if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
409
+ return NULL;
410
+ }
411
+
412
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
335
413
  }
336
414
 
337
- int phr_parse_request(const char* buf_start, size_t len, const char** method,
338
- size_t* method_len, const char** path, size_t* path_len,
339
- int* minor_version, struct phr_header* headers,
340
- size_t* num_headers, size_t last_len)
415
+ int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
416
+ struct phr_header *headers, size_t *num_headers, size_t last_len)
341
417
  {
342
- const char * buf = buf_start, * buf_end = buf_start + len;
343
- size_t max_headers = *num_headers;
344
- int r;
345
-
346
- *method = NULL;
347
- *method_len = 0;
348
- *path = NULL;
349
- *path_len = 0;
350
- *minor_version = -1;
351
- *num_headers = 0;
352
-
353
- /* if last_len != 0, check if the request is complete (a fast countermeasure
354
- againt slowloris */
355
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
356
- return r;
357
- }
358
-
359
- if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len,
360
- minor_version, headers, num_headers, max_headers,
361
- &r))
362
- == NULL) {
363
- return r;
364
- }
365
-
366
- return (int)(buf - buf_start);
418
+ const char *buf = buf_start, *buf_end = buf + len;
419
+ size_t max_headers = *num_headers;
420
+ int r;
421
+
422
+ *minor_version = -1;
423
+ *status = 0;
424
+ *msg = NULL;
425
+ *msg_len = 0;
426
+ *num_headers = 0;
427
+
428
+ /* if last_len != 0, check if the response is complete (a fast countermeasure
429
+ against slowloris */
430
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
431
+ return r;
432
+ }
433
+
434
+ if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
435
+ return r;
436
+ }
437
+
438
+ return (int)(buf - buf_start);
367
439
  }
368
440
 
369
- static const char* parse_response(const char* buf, const char* buf_end,
370
- int* minor_version, int* status,
371
- const char** msg, size_t* msg_len,
372
- struct phr_header* headers,
373
- size_t* num_headers, size_t max_headers,
374
- int* ret)
441
+ int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
375
442
  {
376
- /* parse "HTTP/1.x" */
377
- if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
378
- return NULL;
379
- }
380
- /* skip space */
381
- if (*buf++ != ' ') {
382
- *ret = -1;
383
- return NULL;
384
- }
385
- /* parse status code */
386
- if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
387
- return NULL;
388
- }
389
- /* skip space */
390
- if (*buf++ != ' ') {
391
- *ret = -1;
392
- return NULL;
393
- }
394
- /* get message */
395
- if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
396
- return NULL;
397
- }
398
-
399
- return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
443
+ const char *buf = buf_start, *buf_end = buf + len;
444
+ size_t max_headers = *num_headers;
445
+ int r;
446
+
447
+ *num_headers = 0;
448
+
449
+ /* if last_len != 0, check if the response is complete (a fast countermeasure
450
+ against slowloris */
451
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
452
+ return r;
453
+ }
454
+
455
+ if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
456
+ return r;
457
+ }
458
+
459
+ return (int)(buf - buf_start);
400
460
  }
401
461
 
402
- int phr_parse_response(const char* buf_start, size_t len, int* minor_version,
403
- int* status, const char** msg, size_t* msg_len,
404
- struct phr_header* headers, size_t* num_headers,
405
- size_t last_len)
462
+ enum {
463
+ CHUNKED_IN_CHUNK_SIZE,
464
+ CHUNKED_IN_CHUNK_EXT,
465
+ CHUNKED_IN_CHUNK_DATA,
466
+ CHUNKED_IN_CHUNK_CRLF,
467
+ CHUNKED_IN_TRAILERS_LINE_HEAD,
468
+ CHUNKED_IN_TRAILERS_LINE_MIDDLE
469
+ };
470
+
471
+ static int decode_hex(int ch)
406
472
  {
407
- const char * buf = buf_start, * buf_end = buf + len;
408
- size_t max_headers = *num_headers;
409
- int r;
410
-
411
- *minor_version = -1;
412
- *status = 0;
413
- *msg = NULL;
414
- *msg_len = 0;
415
- *num_headers = 0;
416
-
417
- /* if last_len != 0, check if the response is complete (a fast countermeasure
418
- against slowloris */
419
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
420
- return r;
421
- }
422
-
423
- if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len,
424
- headers, num_headers, max_headers, &r))
425
- == NULL) {
426
- return r;
427
- }
428
-
429
- return (int)(buf - buf_start);
473
+ if ('0' <= ch && ch <= '9') {
474
+ return ch - '0';
475
+ } else if ('A' <= ch && ch <= 'F') {
476
+ return ch - 'A' + 0xa;
477
+ } else if ('a' <= ch && ch <= 'f') {
478
+ return ch - 'a' + 0xa;
479
+ } else {
480
+ return -1;
481
+ }
482
+ }
483
+
484
+ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
485
+ {
486
+ size_t dst = 0, src = 0, bufsz = *_bufsz;
487
+ ssize_t ret = -2; /* incomplete */
488
+
489
+ while (1) {
490
+ switch (decoder->_state) {
491
+ case CHUNKED_IN_CHUNK_SIZE:
492
+ for (;; ++src) {
493
+ int v;
494
+ if (src == bufsz)
495
+ goto Exit;
496
+ if ((v = decode_hex(buf[src])) == -1) {
497
+ if (decoder->_hex_count == 0) {
498
+ ret = -1;
499
+ goto Exit;
500
+ }
501
+ break;
502
+ }
503
+ if (decoder->_hex_count == sizeof(size_t) * 2) {
504
+ ret = -1;
505
+ goto Exit;
506
+ }
507
+ decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
508
+ ++decoder->_hex_count;
509
+ }
510
+ decoder->_hex_count = 0;
511
+ decoder->_state = CHUNKED_IN_CHUNK_EXT;
512
+ /* fallthru */
513
+ case CHUNKED_IN_CHUNK_EXT:
514
+ /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
515
+ for (;; ++src) {
516
+ if (src == bufsz)
517
+ goto Exit;
518
+ if (buf[src] == '\012')
519
+ break;
520
+ }
521
+ ++src;
522
+ if (decoder->bytes_left_in_chunk == 0) {
523
+ if (decoder->consume_trailer) {
524
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
525
+ break;
526
+ } else {
527
+ goto Complete;
528
+ }
529
+ }
530
+ decoder->_state = CHUNKED_IN_CHUNK_DATA;
531
+ /* fallthru */
532
+ case CHUNKED_IN_CHUNK_DATA: {
533
+ size_t avail = bufsz - src;
534
+ if (avail < decoder->bytes_left_in_chunk) {
535
+ if (dst != src)
536
+ memmove(buf + dst, buf + src, avail);
537
+ src += avail;
538
+ dst += avail;
539
+ decoder->bytes_left_in_chunk -= avail;
540
+ goto Exit;
541
+ }
542
+ if (dst != src)
543
+ memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
544
+ src += decoder->bytes_left_in_chunk;
545
+ dst += decoder->bytes_left_in_chunk;
546
+ decoder->bytes_left_in_chunk = 0;
547
+ decoder->_state = CHUNKED_IN_CHUNK_CRLF;
548
+ }
549
+ /* fallthru */
550
+ case CHUNKED_IN_CHUNK_CRLF:
551
+ for (;; ++src) {
552
+ if (src == bufsz)
553
+ goto Exit;
554
+ if (buf[src] != '\015')
555
+ break;
556
+ }
557
+ if (buf[src] != '\012') {
558
+ ret = -1;
559
+ goto Exit;
560
+ }
561
+ ++src;
562
+ decoder->_state = CHUNKED_IN_CHUNK_SIZE;
563
+ break;
564
+ case CHUNKED_IN_TRAILERS_LINE_HEAD:
565
+ for (;; ++src) {
566
+ if (src == bufsz)
567
+ goto Exit;
568
+ if (buf[src] != '\015')
569
+ break;
570
+ }
571
+ if (buf[src++] == '\012')
572
+ goto Complete;
573
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
574
+ /* fallthru */
575
+ case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
576
+ for (;; ++src) {
577
+ if (src == bufsz)
578
+ goto Exit;
579
+ if (buf[src] == '\012')
580
+ break;
581
+ }
582
+ ++src;
583
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
584
+ break;
585
+ default:
586
+ assert(!"decoder is corrupt");
587
+ }
588
+ }
589
+
590
+ Complete:
591
+ ret = bufsz - src;
592
+ Exit:
593
+ if (dst != src)
594
+ memmove(buf + dst, buf + src, bufsz - src);
595
+ *_bufsz = dst;
596
+ return ret;
430
597
  }
431
598
 
432
599
  #undef CHECK_EOF