pico_http_parser 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 417f57e6c7289be7606e9fdda1b65346751f3c10
4
- data.tar.gz: 420957b9c7d8ed9338c29d20030109ea0f6c17c0
3
+ metadata.gz: 984780783c1c9bfce3fdcc058187bfe3fd553ceb
4
+ data.tar.gz: 2ad841f14ab2c12a79211634ae6758e22009e0f8
5
5
  SHA512:
6
- metadata.gz: 1cd7184656ea42efb5f22aa27a42d30ed381f241c665859fbc9e94937195248db6216a4be25903799841148c51842fd2fa93bf0ddf39b699cb48192eb44d0e1a
7
- data.tar.gz: 8656caa1f569d72acc097392e67c9316e26c7aa5546d165d1d600566e80a6e1057660696e8096fb00db9a6b7a88860fa890d8a801dbdbedf9f877c4f06e51f62
6
+ metadata.gz: c3dca6150fc78bd0946f4787b7f91dad54a7942ff7bc0f301a7580e277851fed9256008f740d930c4e757055230066af2c0d5ea72c3283f4d99db2f09a8ef1c4
7
+ data.tar.gz: 623cdcfc79c5c5cec7293e72996a92826a64b1e3d6fa05b1449631067b8cd99c2e80685b6c648e898042aa9813af5c7cb117ef80134c09ba32f28f942ebbefae
@@ -2,13 +2,19 @@ require File.expand_path(File.dirname(__FILE__) + '/bench_helper')
2
2
 
3
3
  require 'pico_http_parser'
4
4
 
5
- # request_body = "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\n\r\n"
6
- request_body = "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwared-For: 127.0.0.1\r\n\r\n"
7
- # request_body = "GET /foo/bar/baz.html?key=value HTTP/1.0\r\n\r\n"
5
+ request_bodys = [
6
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\n\r\n",
7
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nUser-Agent: Mozilla/5.0\r\n\r\n",
8
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwarded-For: 127.0.0.1\r\nUser-Agent: Mozilla/5.0\r\n\r\n",
9
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwarded-For: 127.0.0.1\r\nUser-Agent: Mozilla/5.0\r\nAccept: X-5\r\nConnection: XXXXXX-6\r\nReferer: XXXXXXXX-7\r\nAccept-Encoding: XXXXXXX8\r\nCache-Control: XXXXXXXX9\r\nIf-Modified-Since: XXXXXXXXXXXXXXX10\r\n\r\n",
10
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\n\r\n"
11
+ ];
8
12
 
13
+ request_bodys.each do |request_body|
14
+ puts("benchmark #{request_body}");
9
15
  Benchmark.ips do |x|
10
16
  x.time = 5
11
- x.warmup = 2
17
+ x.warmup = 1
12
18
 
13
19
  x.report("PicoHTTPParser") {
14
20
  env = {}
@@ -21,7 +27,7 @@ Benchmark.ips do |x|
21
27
  x.report("Unicorn's HttpParser") {
22
28
  parser = HttpParser.new
23
29
  parser.buf << request_body
24
- parser.parse
30
+ env = parser.parse
25
31
  }
26
32
  rescue LoadError
27
33
  puts("Can't benchmark unicorn as it couldn't be loaded.")
@@ -29,3 +35,5 @@ Benchmark.ips do |x|
29
35
 
30
36
  x.compare!
31
37
  end
38
+ end
39
+
@@ -14,6 +14,11 @@ static VALUE script_name_key;
14
14
  static VALUE server_protocol_key;
15
15
  static VALUE query_string_key;
16
16
 
17
+ static VALUE vacant_string_val;
18
+
19
+ static VALUE http10_val;
20
+ static VALUE http11_val;
21
+
17
22
  struct common_header {
18
23
  const char * name;
19
24
  size_t name_len;
@@ -147,8 +152,8 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
147
152
  char tmp[MAX_HEADER_NAME_LEN + sizeof("HTTP_") - 1];
148
153
  VALUE last_value;
149
154
 
150
- buf_str = StringValuePtr(buf);
151
- buf_len = strlen(buf_str);
155
+ buf_str = RSTRING_PTR(buf);
156
+ buf_len = RSTRING_LEN(buf);
152
157
  num_headers = MAX_HEADERS;
153
158
  ret = phr_parse_request(buf_str, buf_len, &method, &method_len, &path,
154
159
  &path_len, &minor_version, headers, &num_headers, 0);
@@ -157,10 +162,8 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
157
162
 
158
163
  rb_hash_aset(envref, request_method_key, rb_str_new(method,method_len));
159
164
  rb_hash_aset(envref, request_uri_key, rb_str_new(path, path_len));
160
- rb_hash_aset(envref, script_name_key, rb_str_new2(""));
161
- strcpy(tmp, "HTTP/1.");
162
- tmp[7] = 48 + ((minor_version > 1 || minor_version < 0 ) ? 0 : minor_version);
163
- rb_hash_aset(envref, server_protocol_key, rb_str_new(tmp, sizeof("HTTP/1.0") - 1));
165
+ rb_hash_aset(envref, script_name_key, vacant_string_val);
166
+ rb_hash_aset(envref, server_protocol_key, (minor_version == 1) ? http11_val : http10_val);
164
167
 
165
168
  /* PATH_INFO QUERY_STRING */
166
169
  path_len = find_ch(path, path_len, '#'); /* strip off all text after # after storing request_uri */
@@ -172,7 +175,7 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
172
175
  }
173
176
  if (question_at != path_len) ++question_at;
174
177
  rb_hash_aset(envref, query_string_key, rb_str_new(path + question_at, path_len - question_at));
175
-
178
+
176
179
  last_value = Qnil;
177
180
  for (i = 0; i < num_headers; ++i) {
178
181
  if (headers[i].name != NULL) {
@@ -200,6 +203,7 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
200
203
  env_key = rb_str_new(name, name_len);
201
204
  }
202
205
  }
206
+
203
207
  slot = rb_hash_aref(envref, env_key);
204
208
  if ( slot != Qnil ) {
205
209
  rb_str_cat2(slot, ", ");
@@ -209,11 +213,13 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
209
213
  rb_hash_aset(envref, env_key, slot);
210
214
  last_value = slot;
211
215
  }
216
+
212
217
  } else {
213
218
  /* continuing lines of a mulitiline header */
214
219
  if ( last_value != Qnil )
215
220
  rb_str_cat(last_value, headers[i].value, headers[i].value_len);
216
221
  }
222
+
217
223
  }
218
224
 
219
225
  done:
@@ -247,6 +253,13 @@ void Init_pico_http_parser()
247
253
  set_common_header("USER-AGENT",sizeof("USER-AGENT") - 1, 0);
248
254
  set_common_header("X-FORWARDED-FOR",sizeof("X-FORWARDED-FOR") - 1, 0);
249
255
 
256
+ http10_val = rb_obj_freeze(rb_str_new2("HTTP/1.0"));
257
+ rb_gc_register_address(&http10_val);
258
+ http11_val = rb_obj_freeze(rb_str_new2("HTTP/1.1"));
259
+ rb_gc_register_address(&http11_val);
260
+ vacant_string_val = rb_obj_freeze(rb_str_new("",0));
261
+ rb_gc_register_address(&vacant_string_val);
262
+
250
263
  cPicoHTTPParser = rb_const_get(rb_cObject, rb_intern("PicoHTTPParser"));
251
264
  rb_define_module_function(cPicoHTTPParser, "parse_http_request", phr_parse_http_request, 2);
252
265
  }
@@ -0,0 +1,6 @@
1
+ # requires clang-format >= 3.6
2
+ BasedOnStyle: "LLVM"
3
+ IndentWidth: 4
4
+ ColumnLimit: 132
5
+ BreakBeforeBraces: Linux
6
+ AllowShortFunctionsOnASingleLine: None
@@ -0,0 +1,7 @@
1
+ project picohttpparser ;
2
+
3
+ lib picohttpparser : picohttpparser.c ;
4
+
5
+ unit-test test
6
+ : picohttpparser picotest/picotest.c test.c
7
+ : <testing.launcher>prove ;
@@ -14,6 +14,98 @@ Check out [test.c] to find out how to use the parser.
14
14
 
15
15
  The software is dual-licensed under the Perl License or the MIT License.
16
16
 
17
+ Usage
18
+ -----
19
+
20
+ The library exposes four functions: `phr_parse_request`, `phr_parse_response`, `phr_parse_headers`, `phr_decode_chunked`.
21
+
22
+ ### phr_parse_request
23
+
24
+ The example below reads an HTTP request from socket `sock` using `read(2)`, parses it using `phr_parse_request`, and prints the details.
25
+
26
+ ```
27
+ char buf[4096], *method, *path;
28
+ int pret, minor_version;
29
+ struct phr_header headers[100];
30
+ size_t buflen = 0, prevbuflen = 0, method_len, path_len, num_headers;
31
+ ssize_t rret;
32
+
33
+ while (1) {
34
+ /* read the request */
35
+ while ((rret = read(sock, buf + buflen, sizeof(buf) - buflen)) == -1 && errno == EINTR)
36
+ ;
37
+ if (rret <= 0)
38
+ return IOError;
39
+ prevbuflen = buflen;
40
+ buflen += rret;
41
+ /* parse the request */
42
+ num_headers = sizeof(headers) / sizeof(headers[0]);
43
+ pret = phr_parse_request(buf, buflen, &method, &method_len, &path, &path_len,
44
+ &minor_version, headers, &num_headers, prevbuflen);
45
+ if (pret > 0)
46
+ break; /* successfully parsed the request */
47
+ else if (pret == -1)
48
+ return ParseError;
49
+ /* request is incomplete, continue the loop */
50
+ assert(pret == -2);
51
+ if (buflen == sizeof(buf))
52
+ return RequestIsTooLongError;
53
+ }
54
+
55
+ printf("request is %d bytes long\n", pret);
56
+ printf("method is %.*s\n", (int)method_len, method);
57
+ printf("path is %.*s\n", (int)path_len, path);
58
+ printf("HTTP version is 1.%d\n", minor_version);
59
+ printf("headers:\n");
60
+ for (i = 0; i != num_headers; ++i) {
61
+ printf("%.*s: %.*s\n", (int)headers[i].name_len, headers[i].name,
62
+ (int)headers[i].value_len, headers[i].value);
63
+ }
64
+ ```
65
+
66
+ ### phr_parse_response, phr_parse_headers
67
+
68
+ `phr_parse_response` and `phr_parse_headers` provide similar interfaces as `phr_parse_request`. `phr_parse_response` parses an HTTP response, and `phr_parse_headers` parses the headers only.
69
+
70
+ ### phr_decode_chunked
71
+
72
+ The example below decodes incoming data in chunked-encoding. The data is decoded in-place.
73
+
74
+ ```
75
+ struct phr_chunked_decoder decoder = {}; /* zero-clear */
76
+ char *buf = malloc(4096);
77
+ size_t size = 0, capacity = 4096, rsize;
78
+ ssize_t rret, pret;
79
+
80
+ /* set consume_trailer to 1 to discard the trailing header, or the application
81
+ * should call phr_parse_headers to parse the trailing header */
82
+ decoder.consume_trailer = 1;
83
+
84
+ do {
85
+ /* expand the buffer if necessary */
86
+ if (size == capacity) {
87
+ capacity *= 2;
88
+ buf = realloc(buf, capacity);
89
+ assert(buf != NULL);
90
+ }
91
+ /* read */
92
+ while ((rret = read(sock, buf + size, capacity - size)) == -1 && errno == EINTR)
93
+ ;
94
+ if (rret <= 0)
95
+ return IOError;
96
+ /* decode */
97
+ rsize = rret;
98
+ pret = phr_decode_chunked(&decoder, buf + size, &rsize);
99
+ if (pret == -1)
100
+ return ParseError;
101
+ size += rsize;
102
+ } while (pret == -2);
103
+
104
+ /* successfully decoded the chunked data */
105
+ assert(pret >= 0);
106
+ printf("decoded data is at %p (%zu bytes)\n", buf, size);
107
+ ```
108
+
17
109
  Benchmark
18
110
  ---------
19
111
 
@@ -28,26 +28,39 @@
28
28
  #include <stdio.h>
29
29
  #include "picohttpparser.h"
30
30
 
31
- #define REQ "GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\nHost: www.kittyhell.com\r\nUser-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 Pathtraq/0.9\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: ja,en-us;q=0.7,en;q=0.3\r\nAccept-Encoding: gzip,deflate\r\nAccept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\nKeep-Alive: 115\r\nConnection: keep-alive\r\nCookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; __utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; __utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n\r\n"
31
+ #define REQ \
32
+ "GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\n" \
33
+ "Host: www.kittyhell.com\r\n" \
34
+ "User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 " \
35
+ "Pathtraq/0.9\r\n" \
36
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" \
37
+ "Accept-Language: ja,en-us;q=0.7,en;q=0.3\r\n" \
38
+ "Accept-Encoding: gzip,deflate\r\n" \
39
+ "Accept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\n" \
40
+ "Keep-Alive: 115\r\n" \
41
+ "Connection: keep-alive\r\n" \
42
+ "Cookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; " \
43
+ "__utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; " \
44
+ "__utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n" \
45
+ "\r\n"
32
46
 
33
47
  int main(void)
34
48
  {
35
- const char* method;
36
- size_t method_len;
37
- const char* path;
38
- size_t path_len;
39
- int minor_version;
40
- struct phr_header headers[32];
41
- size_t num_headers;
42
- int i, ret;
43
-
44
- for (i = 0; i < 10000000; i++) {
45
- num_headers = sizeof(headers) / sizeof(headers[0]);
46
- ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path,
47
- &path_len, &minor_version, headers, &num_headers,
48
- 0);
49
- assert(ret == sizeof(REQ) - 1);
50
- }
51
-
52
- return 0;
49
+ const char *method;
50
+ size_t method_len;
51
+ const char *path;
52
+ size_t path_len;
53
+ int minor_version;
54
+ struct phr_header headers[32];
55
+ size_t num_headers;
56
+ int i, ret;
57
+
58
+ for (i = 0; i < 10000000; i++) {
59
+ num_headers = sizeof(headers) / sizeof(headers[0]);
60
+ ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path, &path_len, &minor_version, headers, &num_headers,
61
+ 0);
62
+ assert(ret == sizeof(REQ) - 1);
63
+ }
64
+
65
+ return 0;
53
66
  }
@@ -24,409 +24,576 @@
24
24
  * IN THE SOFTWARE.
25
25
  */
26
26
 
27
+ #include <assert.h>
27
28
  #include <stddef.h>
29
+ #include <string.h>
28
30
  #ifdef __SSE4_2__
29
- # include <x86intrin.h>
31
+ #ifdef _MSC_VER
32
+ #include <nmmintrin.h>
33
+ #else
34
+ #include <x86intrin.h>
35
+ #endif
30
36
  #endif
31
37
  #include "picohttpparser.h"
32
38
 
33
39
  /* $Id$ */
34
40
 
35
41
  #if __GNUC__ >= 3
36
- # define likely(x) __builtin_expect(!!(x), 1)
37
- # define unlikely(x) __builtin_expect(!!(x), 0)
42
+ #define likely(x) __builtin_expect(!!(x), 1)
43
+ #define unlikely(x) __builtin_expect(!!(x), 0)
44
+ #else
45
+ #define likely(x) (x)
46
+ #define unlikely(x) (x)
47
+ #endif
48
+
49
+ #ifdef _MSC_VER
50
+ #define ALIGNED(n) _declspec(align(n))
38
51
  #else
39
- # define likely(x) (x)
40
- # define unlikely(x) (x)
52
+ #define ALIGNED(n) __attribute__((aligned(n)))
41
53
  #endif
42
54
 
43
- #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c) - 040u < 0137u)
44
-
45
- #define CHECK_EOF() \
46
- if (buf == buf_end) { \
47
- *ret = -2; \
48
- return NULL; \
49
- }
50
-
51
- #define EXPECT_CHAR(ch) \
52
- CHECK_EOF(); \
53
- if (*buf++ != ch) { \
54
- *ret = -1; \
55
- return NULL; \
56
- }
57
-
58
- #define ADVANCE_TOKEN(tok, toklen) do { \
59
- const char* tok_start = buf; \
60
- static const char ranges2[] __attribute__((aligned(16))) = "\000\040\177\177"; \
61
- int found2; \
62
- buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
63
- if (! found2) { \
64
- CHECK_EOF(); \
65
- } \
66
- while (1) { \
67
- if (*buf == ' ') { \
68
- break; \
69
- } else if (unlikely(! IS_PRINTABLE_ASCII(*buf))) { \
70
- if ((unsigned char)*buf < '\040' || *buf == '\177') { \
71
- *ret = -1; \
72
- return NULL; \
73
- } \
74
- } \
75
- ++buf; \
76
- CHECK_EOF(); \
77
- } \
78
- tok = tok_start; \
79
- toklen = buf - tok_start; \
80
- } while (0)
81
-
82
- static const char* token_char_map =
83
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
84
- "\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
85
- "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
86
- "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
87
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
88
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
89
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
90
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
91
-
92
- static const char* findchar_fast(const char* buf, const char* buf_end, const char *ranges, size_t ranges_size, int* found)
55
+ #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
56
+
57
+ #define CHECK_EOF() \
58
+ if (buf == buf_end) { \
59
+ *ret = -2; \
60
+ return NULL; \
61
+ }
62
+
63
+ #define EXPECT_CHAR(ch) \
64
+ CHECK_EOF(); \
65
+ if (*buf++ != ch) { \
66
+ *ret = -1; \
67
+ return NULL; \
68
+ }
69
+
70
+ #define ADVANCE_TOKEN(tok, toklen) \
71
+ do { \
72
+ const char *tok_start = buf; \
73
+ static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
74
+ int found2; \
75
+ buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
76
+ if (!found2) { \
77
+ CHECK_EOF(); \
78
+ } \
79
+ while (1) { \
80
+ if (*buf == ' ') { \
81
+ break; \
82
+ } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
83
+ if ((unsigned char)*buf < '\040' || *buf == '\177') { \
84
+ *ret = -1; \
85
+ return NULL; \
86
+ } \
87
+ } \
88
+ ++buf; \
89
+ CHECK_EOF(); \
90
+ } \
91
+ tok = tok_start; \
92
+ toklen = buf - tok_start; \
93
+ } while (0)
94
+
95
+ static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
96
+ "\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
97
+ "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
98
+ "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
99
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
100
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
103
+
104
+ static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
93
105
  {
94
- *found = 0;
106
+ *found = 0;
95
107
  #if __SSE4_2__
96
- if (likely(buf_end - buf >= 16)) {
97
- __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
98
-
99
- size_t left = (buf_end - buf) & ~15;
100
- do {
101
- __m128i b16 = _mm_loadu_si128((void*)buf);
102
- int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
103
- if (unlikely(r != 16)) {
104
- buf += r;
105
- *found = 1;
106
- break;
107
- }
108
- buf += 16;
109
- left -= 16;
110
- } while (likely(left != 0));
111
- }
108
+ if (likely(buf_end - buf >= 16)) {
109
+ __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
110
+
111
+ size_t left = (buf_end - buf) & ~15;
112
+ do {
113
+ __m128i b16 = _mm_loadu_si128((void *)buf);
114
+ int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
115
+ if (unlikely(r != 16)) {
116
+ buf += r;
117
+ *found = 1;
118
+ break;
119
+ }
120
+ buf += 16;
121
+ left -= 16;
122
+ } while (likely(left != 0));
123
+ }
124
+ #else
125
+ /* suppress unused parameter warning */
126
+ (void)buf_end;
127
+ (void)ranges;
128
+ (void)ranges_size;
112
129
  #endif
113
- return buf;
130
+ return buf;
114
131
  }
115
132
 
116
- static const char* get_token_to_eol(const char* buf, const char* buf_end,
117
- const char** token, size_t* token_len,
118
- int* ret)
133
+ static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
119
134
  {
120
- const char* token_start = buf;
121
-
135
+ const char *token_start = buf;
136
+
122
137
  #ifdef __SSE4_2__
123
- static const char ranges1[] =
124
- "\0\010"
125
- /* allow HT */
126
- "\012\037"
127
- /* allow SP and up to but not including DEL */
128
- "\177\177"
129
- /* allow chars w. MSB set */
130
- ;
131
- int found;
132
- buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
133
- if (found)
134
- goto FOUND_CTL;
138
+ static const char ranges1[] = "\0\010"
139
+ /* allow HT */
140
+ "\012\037"
141
+ /* allow SP and up to but not including DEL */
142
+ "\177\177"
143
+ /* allow chars w. MSB set */
144
+ ;
145
+ int found;
146
+ buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
147
+ if (found)
148
+ goto FOUND_CTL;
135
149
  #else
136
- /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
137
- while (likely(buf_end - buf >= 8)) {
138
- #define DOIT() if (unlikely(! IS_PRINTABLE_ASCII(*buf))) goto NonPrintable; ++buf
139
- DOIT(); DOIT(); DOIT(); DOIT();
140
- DOIT(); DOIT(); DOIT(); DOIT();
150
+ /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
151
+ while (likely(buf_end - buf >= 8)) {
152
+ #define DOIT() \
153
+ do { \
154
+ if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
155
+ goto NonPrintable; \
156
+ ++buf; \
157
+ } while (0)
158
+ DOIT();
159
+ DOIT();
160
+ DOIT();
161
+ DOIT();
162
+ DOIT();
163
+ DOIT();
164
+ DOIT();
165
+ DOIT();
141
166
  #undef DOIT
142
- continue;
143
- NonPrintable:
144
- if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
145
- goto FOUND_CTL;
167
+ continue;
168
+ NonPrintable:
169
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
170
+ goto FOUND_CTL;
171
+ }
172
+ ++buf;
146
173
  }
147
- ++buf;
148
- }
149
174
  #endif
150
- for (; ; ++buf) {
151
- CHECK_EOF();
152
- if (unlikely(! IS_PRINTABLE_ASCII(*buf))) {
153
- if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
154
- goto FOUND_CTL;
155
- }
175
+ for (;; ++buf) {
176
+ CHECK_EOF();
177
+ if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
178
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
179
+ goto FOUND_CTL;
180
+ }
181
+ }
156
182
  }
157
- }
158
- FOUND_CTL:
159
- if (likely(*buf == '\015')) {
160
- ++buf;
161
- EXPECT_CHAR('\012');
162
- *token_len = buf - 2 - token_start;
163
- } else if (*buf == '\012') {
164
- *token_len = buf - token_start;
165
- ++buf;
166
- } else {
167
- *ret = -1;
168
- return NULL;
169
- }
170
- *token = token_start;
171
-
172
- return buf;
173
- }
174
-
175
- static const char* is_complete(const char* buf, const char* buf_end,
176
- size_t last_len, int* ret)
177
- {
178
- int ret_cnt = 0;
179
- buf = last_len < 3 ? buf : buf + last_len - 3;
180
-
181
- while (1) {
182
- CHECK_EOF();
183
- if (*buf == '\015') {
184
- ++buf;
185
- CHECK_EOF();
186
- EXPECT_CHAR('\012');
187
- ++ret_cnt;
183
+ FOUND_CTL:
184
+ if (likely(*buf == '\015')) {
185
+ ++buf;
186
+ EXPECT_CHAR('\012');
187
+ *token_len = buf - 2 - token_start;
188
188
  } else if (*buf == '\012') {
189
- ++buf;
190
- ++ret_cnt;
189
+ *token_len = buf - token_start;
190
+ ++buf;
191
191
  } else {
192
- ++buf;
193
- ret_cnt = 0;
192
+ *ret = -1;
193
+ return NULL;
194
194
  }
195
- if (ret_cnt == 2) {
196
- return buf;
195
+ *token = token_start;
196
+
197
+ return buf;
198
+ }
199
+
200
+ static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
201
+ {
202
+ int ret_cnt = 0;
203
+ buf = last_len < 3 ? buf : buf + last_len - 3;
204
+
205
+ while (1) {
206
+ CHECK_EOF();
207
+ if (*buf == '\015') {
208
+ ++buf;
209
+ CHECK_EOF();
210
+ EXPECT_CHAR('\012');
211
+ ++ret_cnt;
212
+ } else if (*buf == '\012') {
213
+ ++buf;
214
+ ++ret_cnt;
215
+ } else {
216
+ ++buf;
217
+ ret_cnt = 0;
218
+ }
219
+ if (ret_cnt == 2) {
220
+ return buf;
221
+ }
197
222
  }
198
- }
199
-
200
- *ret = -2;
201
- return NULL;
223
+
224
+ *ret = -2;
225
+ return NULL;
202
226
  }
203
227
 
204
228
  /* *_buf is always within [buf, buf_end) upon success */
205
- static const char* parse_int(const char* buf, const char* buf_end, int* value,
206
- int* ret)
229
+ static const char *parse_int(const char *buf, const char *buf_end, int *value, int *ret)
207
230
  {
208
- int v;
209
- CHECK_EOF();
210
- if (! ('0' <= *buf && *buf <= '9')) {
211
- *ret = -1;
212
- return NULL;
213
- }
214
- v = 0;
215
- for (; ; ++buf) {
231
+ int v;
216
232
  CHECK_EOF();
217
- if ('0' <= *buf && *buf <= '9') {
218
- v = v * 10 + *buf - '0';
219
- } else {
220
- break;
233
+ if (!('0' <= *buf && *buf <= '9')) {
234
+ *ret = -1;
235
+ return NULL;
236
+ }
237
+ v = 0;
238
+ for (;; ++buf) {
239
+ CHECK_EOF();
240
+ if ('0' <= *buf && *buf <= '9') {
241
+ v = v * 10 + *buf - '0';
242
+ } else {
243
+ break;
244
+ }
221
245
  }
222
- }
223
-
224
- *value = v;
225
- return buf;
246
+
247
+ *value = v;
248
+ return buf;
226
249
  }
227
250
 
228
251
  /* returned pointer is always within [buf, buf_end), or null */
229
- static const char* parse_http_version(const char* buf, const char* buf_end,
230
- int* minor_version, int* ret)
252
+ static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
231
253
  {
232
- EXPECT_CHAR('H'); EXPECT_CHAR('T'); EXPECT_CHAR('T'); EXPECT_CHAR('P');
233
- EXPECT_CHAR('/'); EXPECT_CHAR('1'); EXPECT_CHAR('.');
234
- return parse_int(buf, buf_end, minor_version, ret);
254
+ EXPECT_CHAR('H');
255
+ EXPECT_CHAR('T');
256
+ EXPECT_CHAR('T');
257
+ EXPECT_CHAR('P');
258
+ EXPECT_CHAR('/');
259
+ EXPECT_CHAR('1');
260
+ EXPECT_CHAR('.');
261
+ return parse_int(buf, buf_end, minor_version, ret);
235
262
  }
236
263
 
237
- static const char* parse_headers(const char* buf, const char* buf_end,
238
- struct phr_header* headers,
239
- size_t* num_headers, size_t max_headers,
240
- int* ret)
264
+ static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
265
+ size_t max_headers, int *ret)
241
266
  {
242
- for (; ; ++*num_headers) {
267
+ for (;; ++*num_headers) {
268
+ CHECK_EOF();
269
+ if (*buf == '\015') {
270
+ ++buf;
271
+ EXPECT_CHAR('\012');
272
+ break;
273
+ } else if (*buf == '\012') {
274
+ ++buf;
275
+ break;
276
+ }
277
+ if (*num_headers == max_headers) {
278
+ *ret = -1;
279
+ return NULL;
280
+ }
281
+ if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
282
+ static const char ALIGNED(16) ranges1[] = "::\x00\037";
283
+ int found;
284
+ if (!token_char_map[(unsigned char)*buf]) {
285
+ *ret = -1;
286
+ return NULL;
287
+ }
288
+ /* parsing name, but do not discard SP before colon, see
289
+ * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
290
+ headers[*num_headers].name = buf;
291
+ buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
292
+ if (!found) {
293
+ CHECK_EOF();
294
+ }
295
+ while (1) {
296
+ if (*buf == ':') {
297
+ break;
298
+ } else if (*buf < ' ') {
299
+ *ret = -1;
300
+ return NULL;
301
+ }
302
+ ++buf;
303
+ CHECK_EOF();
304
+ }
305
+ headers[*num_headers].name_len = buf - headers[*num_headers].name;
306
+ ++buf;
307
+ for (;; ++buf) {
308
+ CHECK_EOF();
309
+ if (!(*buf == ' ' || *buf == '\t')) {
310
+ break;
311
+ }
312
+ }
313
+ } else {
314
+ headers[*num_headers].name = NULL;
315
+ headers[*num_headers].name_len = 0;
316
+ }
317
+ if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) {
318
+ return NULL;
319
+ }
320
+ }
321
+ return buf;
322
+ }
323
+
324
+ static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
325
+ size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
326
+ size_t max_headers, int *ret)
327
+ {
328
+ /* skip first empty line (some clients add CRLF after POST content) */
243
329
  CHECK_EOF();
244
330
  if (*buf == '\015') {
245
- ++buf;
246
- EXPECT_CHAR('\012');
247
- break;
331
+ ++buf;
332
+ EXPECT_CHAR('\012');
248
333
  } else if (*buf == '\012') {
249
- ++buf;
250
- break;
251
- }
252
- if (*num_headers == max_headers) {
253
- *ret = -1;
254
- return NULL;
334
+ ++buf;
255
335
  }
256
- if (! (*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
257
- if (! token_char_map[(unsigned char)*buf]) {
258
- *ret = -1;
336
+
337
+ /* parse request line */
338
+ ADVANCE_TOKEN(*method, *method_len);
339
+ ++buf;
340
+ ADVANCE_TOKEN(*path, *path_len);
341
+ ++buf;
342
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
259
343
  return NULL;
260
- }
261
- /* parsing name, but do not discard SP before colon, see
262
- * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
263
- headers[*num_headers].name = buf;
264
- static const char ranges1[] __attribute__((aligned(16))) = "::\x00\037";
265
- int found;
266
- buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
267
- if (! found) {
268
- CHECK_EOF();
269
- }
270
- while (1) {
271
- if (*buf == ':') {
272
- break;
273
- } else if (*buf < ' ') {
274
- *ret = -1;
275
- return NULL;
276
- }
344
+ }
345
+ if (*buf == '\015') {
346
+ ++buf;
347
+ EXPECT_CHAR('\012');
348
+ } else if (*buf == '\012') {
277
349
  ++buf;
278
- CHECK_EOF();
279
- }
280
- headers[*num_headers].name_len = buf - headers[*num_headers].name;
281
- ++buf;
282
- for (; ; ++buf) {
283
- CHECK_EOF();
284
- if (! (*buf == ' ' || *buf == '\t')) {
285
- break;
286
- }
287
- }
288
350
  } else {
289
- headers[*num_headers].name = NULL;
290
- headers[*num_headers].name_len = 0;
351
+ *ret = -1;
352
+ return NULL;
353
+ }
354
+
355
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
356
+ }
357
+
358
+ int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
359
+ size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
360
+ {
361
+ const char *buf = buf_start, *buf_end = buf_start + len;
362
+ size_t max_headers = *num_headers;
363
+ int r;
364
+
365
+ *method = NULL;
366
+ *method_len = 0;
367
+ *path = NULL;
368
+ *path_len = 0;
369
+ *minor_version = -1;
370
+ *num_headers = 0;
371
+
372
+ /* if last_len != 0, check if the request is complete (a fast countermeasure
373
+ againt slowloris */
374
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
375
+ return r;
291
376
  }
292
- if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value,
293
- &headers[*num_headers].value_len, ret))
294
- == NULL) {
295
- return NULL;
377
+
378
+ if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
379
+ &r)) == NULL) {
380
+ return r;
296
381
  }
297
- }
298
- return buf;
382
+
383
+ return (int)(buf - buf_start);
299
384
  }
300
385
 
301
- const char* parse_request(const char* buf, const char* buf_end,
302
- const char** method, size_t* method_len,
303
- const char** path, size_t* path_len,
304
- int* minor_version, struct phr_header* headers,
305
- size_t* num_headers, size_t max_headers, int* ret)
386
+ static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
387
+ size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
306
388
  {
307
- /* skip first empty line (some clients add CRLF after POST content) */
308
- CHECK_EOF();
309
- if (*buf == '\015') {
310
- ++buf;
311
- EXPECT_CHAR('\012');
312
- } else if (*buf == '\012') {
313
- ++buf;
314
- }
315
-
316
- /* parse request line */
317
- ADVANCE_TOKEN(*method, *method_len);
318
- ++buf;
319
- ADVANCE_TOKEN(*path, *path_len);
320
- ++buf;
321
- if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
322
- return NULL;
323
- }
324
- if (*buf == '\015') {
325
- ++buf;
326
- EXPECT_CHAR('\012');
327
- } else if (*buf == '\012') {
328
- ++buf;
329
- } else {
330
- *ret = -1;
331
- return NULL;
332
- }
333
-
334
- return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
389
+ /* parse "HTTP/1.x" */
390
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
391
+ return NULL;
392
+ }
393
+ /* skip space */
394
+ if (*buf++ != ' ') {
395
+ *ret = -1;
396
+ return NULL;
397
+ }
398
+ /* parse status code */
399
+ if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
400
+ return NULL;
401
+ }
402
+ /* skip space */
403
+ if (*buf++ != ' ') {
404
+ *ret = -1;
405
+ return NULL;
406
+ }
407
+ /* get message */
408
+ if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
409
+ return NULL;
410
+ }
411
+
412
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
335
413
  }
336
414
 
337
- int phr_parse_request(const char* buf_start, size_t len, const char** method,
338
- size_t* method_len, const char** path, size_t* path_len,
339
- int* minor_version, struct phr_header* headers,
340
- size_t* num_headers, size_t last_len)
415
+ int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
416
+ struct phr_header *headers, size_t *num_headers, size_t last_len)
341
417
  {
342
- const char * buf = buf_start, * buf_end = buf_start + len;
343
- size_t max_headers = *num_headers;
344
- int r;
345
-
346
- *method = NULL;
347
- *method_len = 0;
348
- *path = NULL;
349
- *path_len = 0;
350
- *minor_version = -1;
351
- *num_headers = 0;
352
-
353
- /* if last_len != 0, check if the request is complete (a fast countermeasure
354
- againt slowloris */
355
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
356
- return r;
357
- }
358
-
359
- if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len,
360
- minor_version, headers, num_headers, max_headers,
361
- &r))
362
- == NULL) {
363
- return r;
364
- }
365
-
366
- return (int)(buf - buf_start);
418
+ const char *buf = buf_start, *buf_end = buf + len;
419
+ size_t max_headers = *num_headers;
420
+ int r;
421
+
422
+ *minor_version = -1;
423
+ *status = 0;
424
+ *msg = NULL;
425
+ *msg_len = 0;
426
+ *num_headers = 0;
427
+
428
+ /* if last_len != 0, check if the response is complete (a fast countermeasure
429
+ against slowloris */
430
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
431
+ return r;
432
+ }
433
+
434
+ if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
435
+ return r;
436
+ }
437
+
438
+ return (int)(buf - buf_start);
367
439
  }
368
440
 
369
- static const char* parse_response(const char* buf, const char* buf_end,
370
- int* minor_version, int* status,
371
- const char** msg, size_t* msg_len,
372
- struct phr_header* headers,
373
- size_t* num_headers, size_t max_headers,
374
- int* ret)
441
+ int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
375
442
  {
376
- /* parse "HTTP/1.x" */
377
- if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
378
- return NULL;
379
- }
380
- /* skip space */
381
- if (*buf++ != ' ') {
382
- *ret = -1;
383
- return NULL;
384
- }
385
- /* parse status code */
386
- if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
387
- return NULL;
388
- }
389
- /* skip space */
390
- if (*buf++ != ' ') {
391
- *ret = -1;
392
- return NULL;
393
- }
394
- /* get message */
395
- if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
396
- return NULL;
397
- }
398
-
399
- return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
443
+ const char *buf = buf_start, *buf_end = buf + len;
444
+ size_t max_headers = *num_headers;
445
+ int r;
446
+
447
+ *num_headers = 0;
448
+
449
+ /* if last_len != 0, check if the response is complete (a fast countermeasure
450
+ against slowloris */
451
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
452
+ return r;
453
+ }
454
+
455
+ if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
456
+ return r;
457
+ }
458
+
459
+ return (int)(buf - buf_start);
400
460
  }
401
461
 
402
- int phr_parse_response(const char* buf_start, size_t len, int* minor_version,
403
- int* status, const char** msg, size_t* msg_len,
404
- struct phr_header* headers, size_t* num_headers,
405
- size_t last_len)
462
+ enum {
463
+ CHUNKED_IN_CHUNK_SIZE,
464
+ CHUNKED_IN_CHUNK_EXT,
465
+ CHUNKED_IN_CHUNK_DATA,
466
+ CHUNKED_IN_CHUNK_CRLF,
467
+ CHUNKED_IN_TRAILERS_LINE_HEAD,
468
+ CHUNKED_IN_TRAILERS_LINE_MIDDLE
469
+ };
470
+
471
+ static int decode_hex(int ch)
406
472
  {
407
- const char * buf = buf_start, * buf_end = buf + len;
408
- size_t max_headers = *num_headers;
409
- int r;
410
-
411
- *minor_version = -1;
412
- *status = 0;
413
- *msg = NULL;
414
- *msg_len = 0;
415
- *num_headers = 0;
416
-
417
- /* if last_len != 0, check if the response is complete (a fast countermeasure
418
- against slowloris */
419
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
420
- return r;
421
- }
422
-
423
- if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len,
424
- headers, num_headers, max_headers, &r))
425
- == NULL) {
426
- return r;
427
- }
428
-
429
- return (int)(buf - buf_start);
473
+ if ('0' <= ch && ch <= '9') {
474
+ return ch - '0';
475
+ } else if ('A' <= ch && ch <= 'F') {
476
+ return ch - 'A' + 0xa;
477
+ } else if ('a' <= ch && ch <= 'f') {
478
+ return ch - 'a' + 0xa;
479
+ } else {
480
+ return -1;
481
+ }
482
+ }
483
+
484
+ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
485
+ {
486
+ size_t dst = 0, src = 0, bufsz = *_bufsz;
487
+ ssize_t ret = -2; /* incomplete */
488
+
489
+ while (1) {
490
+ switch (decoder->_state) {
491
+ case CHUNKED_IN_CHUNK_SIZE:
492
+ for (;; ++src) {
493
+ int v;
494
+ if (src == bufsz)
495
+ goto Exit;
496
+ if ((v = decode_hex(buf[src])) == -1) {
497
+ if (decoder->_hex_count == 0) {
498
+ ret = -1;
499
+ goto Exit;
500
+ }
501
+ break;
502
+ }
503
+ if (decoder->_hex_count == sizeof(size_t) * 2) {
504
+ ret = -1;
505
+ goto Exit;
506
+ }
507
+ decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
508
+ ++decoder->_hex_count;
509
+ }
510
+ decoder->_hex_count = 0;
511
+ decoder->_state = CHUNKED_IN_CHUNK_EXT;
512
+ /* fallthru */
513
+ case CHUNKED_IN_CHUNK_EXT:
514
+ /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
515
+ for (;; ++src) {
516
+ if (src == bufsz)
517
+ goto Exit;
518
+ if (buf[src] == '\012')
519
+ break;
520
+ }
521
+ ++src;
522
+ if (decoder->bytes_left_in_chunk == 0) {
523
+ if (decoder->consume_trailer) {
524
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
525
+ break;
526
+ } else {
527
+ goto Complete;
528
+ }
529
+ }
530
+ decoder->_state = CHUNKED_IN_CHUNK_DATA;
531
+ /* fallthru */
532
+ case CHUNKED_IN_CHUNK_DATA: {
533
+ size_t avail = bufsz - src;
534
+ if (avail < decoder->bytes_left_in_chunk) {
535
+ if (dst != src)
536
+ memmove(buf + dst, buf + src, avail);
537
+ src += avail;
538
+ dst += avail;
539
+ decoder->bytes_left_in_chunk -= avail;
540
+ goto Exit;
541
+ }
542
+ if (dst != src)
543
+ memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
544
+ src += decoder->bytes_left_in_chunk;
545
+ dst += decoder->bytes_left_in_chunk;
546
+ decoder->bytes_left_in_chunk = 0;
547
+ decoder->_state = CHUNKED_IN_CHUNK_CRLF;
548
+ }
549
+ /* fallthru */
550
+ case CHUNKED_IN_CHUNK_CRLF:
551
+ for (;; ++src) {
552
+ if (src == bufsz)
553
+ goto Exit;
554
+ if (buf[src] != '\015')
555
+ break;
556
+ }
557
+ if (buf[src] != '\012') {
558
+ ret = -1;
559
+ goto Exit;
560
+ }
561
+ ++src;
562
+ decoder->_state = CHUNKED_IN_CHUNK_SIZE;
563
+ break;
564
+ case CHUNKED_IN_TRAILERS_LINE_HEAD:
565
+ for (;; ++src) {
566
+ if (src == bufsz)
567
+ goto Exit;
568
+ if (buf[src] != '\015')
569
+ break;
570
+ }
571
+ if (buf[src++] == '\012')
572
+ goto Complete;
573
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
574
+ /* fallthru */
575
+ case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
576
+ for (;; ++src) {
577
+ if (src == bufsz)
578
+ goto Exit;
579
+ if (buf[src] == '\012')
580
+ break;
581
+ }
582
+ ++src;
583
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
584
+ break;
585
+ default:
586
+ assert(!"decoder is corrupt");
587
+ }
588
+ }
589
+
590
+ Complete:
591
+ ret = bufsz - src;
592
+ Exit:
593
+ if (dst != src)
594
+ memmove(buf + dst, buf + src, bufsz - src);
595
+ *_bufsz = dst;
596
+ return ret;
430
597
  }
431
598
 
432
599
  #undef CHECK_EOF