pico_http_parser 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 417f57e6c7289be7606e9fdda1b65346751f3c10
4
- data.tar.gz: 420957b9c7d8ed9338c29d20030109ea0f6c17c0
3
+ metadata.gz: 984780783c1c9bfce3fdcc058187bfe3fd553ceb
4
+ data.tar.gz: 2ad841f14ab2c12a79211634ae6758e22009e0f8
5
5
  SHA512:
6
- metadata.gz: 1cd7184656ea42efb5f22aa27a42d30ed381f241c665859fbc9e94937195248db6216a4be25903799841148c51842fd2fa93bf0ddf39b699cb48192eb44d0e1a
7
- data.tar.gz: 8656caa1f569d72acc097392e67c9316e26c7aa5546d165d1d600566e80a6e1057660696e8096fb00db9a6b7a88860fa890d8a801dbdbedf9f877c4f06e51f62
6
+ metadata.gz: c3dca6150fc78bd0946f4787b7f91dad54a7942ff7bc0f301a7580e277851fed9256008f740d930c4e757055230066af2c0d5ea72c3283f4d99db2f09a8ef1c4
7
+ data.tar.gz: 623cdcfc79c5c5cec7293e72996a92826a64b1e3d6fa05b1449631067b8cd99c2e80685b6c648e898042aa9813af5c7cb117ef80134c09ba32f28f942ebbefae
@@ -2,13 +2,19 @@ require File.expand_path(File.dirname(__FILE__) + '/bench_helper')
2
2
 
3
3
  require 'pico_http_parser'
4
4
 
5
- # request_body = "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\n\r\n"
6
- request_body = "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwared-For: 127.0.0.1\r\n\r\n"
7
- # request_body = "GET /foo/bar/baz.html?key=value HTTP/1.0\r\n\r\n"
5
+ request_bodys = [
6
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\n\r\n",
7
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nUser-Agent: Mozilla/5.0\r\n\r\n",
8
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwarded-For: 127.0.0.1\r\nUser-Agent: Mozilla/5.0\r\n\r\n",
9
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwarded-For: 127.0.0.1\r\nUser-Agent: Mozilla/5.0\r\nAccept: X-5\r\nConnection: XXXXXX-6\r\nReferer: XXXXXXXX-7\r\nAccept-Encoding: XXXXXXX8\r\nCache-Control: XXXXXXXX9\r\nIf-Modified-Since: XXXXXXXXXXXXXXX10\r\n\r\n",
10
+ "GET /foo/bar/baz.html?key=value HTTP/1.0\r\n\r\n"
11
+ ];
8
12
 
13
+ request_bodys.each do |request_body|
14
+ puts("benchmark #{request_body}");
9
15
  Benchmark.ips do |x|
10
16
  x.time = 5
11
- x.warmup = 2
17
+ x.warmup = 1
12
18
 
13
19
  x.report("PicoHTTPParser") {
14
20
  env = {}
@@ -21,7 +27,7 @@ Benchmark.ips do |x|
21
27
  x.report("Unicorn's HttpParser") {
22
28
  parser = HttpParser.new
23
29
  parser.buf << request_body
24
- parser.parse
30
+ env = parser.parse
25
31
  }
26
32
  rescue LoadError
27
33
  puts("Can't benchmark unicorn as it couldn't be loaded.")
@@ -29,3 +35,5 @@ Benchmark.ips do |x|
29
35
 
30
36
  x.compare!
31
37
  end
38
+ end
39
+
@@ -14,6 +14,11 @@ static VALUE script_name_key;
14
14
  static VALUE server_protocol_key;
15
15
  static VALUE query_string_key;
16
16
 
17
+ static VALUE vacant_string_val;
18
+
19
+ static VALUE http10_val;
20
+ static VALUE http11_val;
21
+
17
22
  struct common_header {
18
23
  const char * name;
19
24
  size_t name_len;
@@ -147,8 +152,8 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
147
152
  char tmp[MAX_HEADER_NAME_LEN + sizeof("HTTP_") - 1];
148
153
  VALUE last_value;
149
154
 
150
- buf_str = StringValuePtr(buf);
151
- buf_len = strlen(buf_str);
155
+ buf_str = RSTRING_PTR(buf);
156
+ buf_len = RSTRING_LEN(buf);
152
157
  num_headers = MAX_HEADERS;
153
158
  ret = phr_parse_request(buf_str, buf_len, &method, &method_len, &path,
154
159
  &path_len, &minor_version, headers, &num_headers, 0);
@@ -157,10 +162,8 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
157
162
 
158
163
  rb_hash_aset(envref, request_method_key, rb_str_new(method,method_len));
159
164
  rb_hash_aset(envref, request_uri_key, rb_str_new(path, path_len));
160
- rb_hash_aset(envref, script_name_key, rb_str_new2(""));
161
- strcpy(tmp, "HTTP/1.");
162
- tmp[7] = 48 + ((minor_version > 1 || minor_version < 0 ) ? 0 : minor_version);
163
- rb_hash_aset(envref, server_protocol_key, rb_str_new(tmp, sizeof("HTTP/1.0") - 1));
165
+ rb_hash_aset(envref, script_name_key, vacant_string_val);
166
+ rb_hash_aset(envref, server_protocol_key, (minor_version == 1) ? http11_val : http10_val);
164
167
 
165
168
  /* PATH_INFO QUERY_STRING */
166
169
  path_len = find_ch(path, path_len, '#'); /* strip off all text after # after storing request_uri */
@@ -172,7 +175,7 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
172
175
  }
173
176
  if (question_at != path_len) ++question_at;
174
177
  rb_hash_aset(envref, query_string_key, rb_str_new(path + question_at, path_len - question_at));
175
-
178
+
176
179
  last_value = Qnil;
177
180
  for (i = 0; i < num_headers; ++i) {
178
181
  if (headers[i].name != NULL) {
@@ -200,6 +203,7 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
200
203
  env_key = rb_str_new(name, name_len);
201
204
  }
202
205
  }
206
+
203
207
  slot = rb_hash_aref(envref, env_key);
204
208
  if ( slot != Qnil ) {
205
209
  rb_str_cat2(slot, ", ");
@@ -209,11 +213,13 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
209
213
  rb_hash_aset(envref, env_key, slot);
210
214
  last_value = slot;
211
215
  }
216
+
212
217
  } else {
213
218
  /* continuing lines of a mulitiline header */
214
219
  if ( last_value != Qnil )
215
220
  rb_str_cat(last_value, headers[i].value, headers[i].value_len);
216
221
  }
222
+
217
223
  }
218
224
 
219
225
  done:
@@ -247,6 +253,13 @@ void Init_pico_http_parser()
247
253
  set_common_header("USER-AGENT",sizeof("USER-AGENT") - 1, 0);
248
254
  set_common_header("X-FORWARDED-FOR",sizeof("X-FORWARDED-FOR") - 1, 0);
249
255
 
256
+ http10_val = rb_obj_freeze(rb_str_new2("HTTP/1.0"));
257
+ rb_gc_register_address(&http10_val);
258
+ http11_val = rb_obj_freeze(rb_str_new2("HTTP/1.1"));
259
+ rb_gc_register_address(&http11_val);
260
+ vacant_string_val = rb_obj_freeze(rb_str_new("",0));
261
+ rb_gc_register_address(&vacant_string_val);
262
+
250
263
  cPicoHTTPParser = rb_const_get(rb_cObject, rb_intern("PicoHTTPParser"));
251
264
  rb_define_module_function(cPicoHTTPParser, "parse_http_request", phr_parse_http_request, 2);
252
265
  }
@@ -0,0 +1,6 @@
1
+ # requires clang-format >= 3.6
2
+ BasedOnStyle: "LLVM"
3
+ IndentWidth: 4
4
+ ColumnLimit: 132
5
+ BreakBeforeBraces: Linux
6
+ AllowShortFunctionsOnASingleLine: None
@@ -0,0 +1,7 @@
1
+ project picohttpparser ;
2
+
3
+ lib picohttpparser : picohttpparser.c ;
4
+
5
+ unit-test test
6
+ : picohttpparser picotest/picotest.c test.c
7
+ : <testing.launcher>prove ;
@@ -14,6 +14,98 @@ Check out [test.c] to find out how to use the parser.
14
14
 
15
15
  The software is dual-licensed under the Perl License or the MIT License.
16
16
 
17
+ Usage
18
+ -----
19
+
20
+ The library exposes four functions: `phr_parse_request`, `phr_parse_response`, `phr_parse_headers`, `phr_decode_chunked`.
21
+
22
+ ### phr_parse_request
23
+
24
+ The example below reads an HTTP request from socket `sock` using `read(2)`, parses it using `phr_parse_request`, and prints the details.
25
+
26
+ ```
27
+ char buf[4096], *method, *path;
28
+ int pret, minor_version;
29
+ struct phr_header headers[100];
30
+ size_t buflen = 0, prevbuflen = 0, method_len, path_len, num_headers;
31
+ ssize_t rret;
32
+
33
+ while (1) {
34
+ /* read the request */
35
+ while ((rret = read(sock, buf + buflen, sizeof(buf) - buflen)) == -1 && errno == EINTR)
36
+ ;
37
+ if (rret <= 0)
38
+ return IOError;
39
+ prevbuflen = buflen;
40
+ buflen += rret;
41
+ /* parse the request */
42
+ num_headers = sizeof(headers) / sizeof(headers[0]);
43
+ pret = phr_parse_request(buf, buflen, &method, &method_len, &path, &path_len,
44
+ &minor_version, headers, &num_headers, prevbuflen);
45
+ if (pret > 0)
46
+ break; /* successfully parsed the request */
47
+ else if (pret == -1)
48
+ return ParseError;
49
+ /* request is incomplete, continue the loop */
50
+ assert(pret == -2);
51
+ if (buflen == sizeof(buf))
52
+ return RequestIsTooLongError;
53
+ }
54
+
55
+ printf("request is %d bytes long\n", pret);
56
+ printf("method is %.*s\n", (int)method_len, method);
57
+ printf("path is %.*s\n", (int)path_len, path);
58
+ printf("HTTP version is 1.%d\n", minor_version);
59
+ printf("headers:\n");
60
+ for (i = 0; i != num_headers; ++i) {
61
+ printf("%.*s: %.*s\n", (int)headers[i].name_len, headers[i].name,
62
+ (int)headers[i].value_len, headers[i].value);
63
+ }
64
+ ```
65
+
66
+ ### phr_parse_response, phr_parse_headers
67
+
68
+ `phr_parse_response` and `phr_parse_headers` provide similar interfaces as `phr_parse_request`. `phr_parse_response` parses an HTTP response, and `phr_parse_headers` parses the headers only.
69
+
70
+ ### phr_decode_chunked
71
+
72
+ The example below decodes incoming data in chunked-encoding. The data is decoded in-place.
73
+
74
+ ```
75
+ struct phr_chunked_decoder decoder = {}; /* zero-clear */
76
+ char *buf = malloc(4096);
77
+ size_t size = 0, capacity = 4096, rsize;
78
+ ssize_t rret, pret;
79
+
80
+ /* set consume_trailer to 1 to discard the trailing header, or the application
81
+ * should call phr_parse_headers to parse the trailing header */
82
+ decoder.consume_trailer = 1;
83
+
84
+ do {
85
+ /* expand the buffer if necessary */
86
+ if (size == capacity) {
87
+ capacity *= 2;
88
+ buf = realloc(buf, capacity);
89
+ assert(buf != NULL);
90
+ }
91
+ /* read */
92
+ while ((rret = read(sock, buf + size, capacity - size)) == -1 && errno == EINTR)
93
+ ;
94
+ if (rret <= 0)
95
+ return IOError;
96
+ /* decode */
97
+ rsize = rret;
98
+ pret = phr_decode_chunked(&decoder, buf + size, &rsize);
99
+ if (pret == -1)
100
+ return ParseError;
101
+ size += rsize;
102
+ } while (pret == -2);
103
+
104
+ /* successfully decoded the chunked data */
105
+ assert(pret >= 0);
106
+ printf("decoded data is at %p (%zu bytes)\n", buf, size);
107
+ ```
108
+
17
109
  Benchmark
18
110
  ---------
19
111
 
@@ -28,26 +28,39 @@
28
28
  #include <stdio.h>
29
29
  #include "picohttpparser.h"
30
30
 
31
- #define REQ "GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\nHost: www.kittyhell.com\r\nUser-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 Pathtraq/0.9\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: ja,en-us;q=0.7,en;q=0.3\r\nAccept-Encoding: gzip,deflate\r\nAccept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\nKeep-Alive: 115\r\nConnection: keep-alive\r\nCookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; __utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; __utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n\r\n"
31
+ #define REQ \
32
+ "GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\n" \
33
+ "Host: www.kittyhell.com\r\n" \
34
+ "User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 " \
35
+ "Pathtraq/0.9\r\n" \
36
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" \
37
+ "Accept-Language: ja,en-us;q=0.7,en;q=0.3\r\n" \
38
+ "Accept-Encoding: gzip,deflate\r\n" \
39
+ "Accept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\n" \
40
+ "Keep-Alive: 115\r\n" \
41
+ "Connection: keep-alive\r\n" \
42
+ "Cookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; " \
43
+ "__utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; " \
44
+ "__utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n" \
45
+ "\r\n"
32
46
 
33
47
  int main(void)
34
48
  {
35
- const char* method;
36
- size_t method_len;
37
- const char* path;
38
- size_t path_len;
39
- int minor_version;
40
- struct phr_header headers[32];
41
- size_t num_headers;
42
- int i, ret;
43
-
44
- for (i = 0; i < 10000000; i++) {
45
- num_headers = sizeof(headers) / sizeof(headers[0]);
46
- ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path,
47
- &path_len, &minor_version, headers, &num_headers,
48
- 0);
49
- assert(ret == sizeof(REQ) - 1);
50
- }
51
-
52
- return 0;
49
+ const char *method;
50
+ size_t method_len;
51
+ const char *path;
52
+ size_t path_len;
53
+ int minor_version;
54
+ struct phr_header headers[32];
55
+ size_t num_headers;
56
+ int i, ret;
57
+
58
+ for (i = 0; i < 10000000; i++) {
59
+ num_headers = sizeof(headers) / sizeof(headers[0]);
60
+ ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path, &path_len, &minor_version, headers, &num_headers,
61
+ 0);
62
+ assert(ret == sizeof(REQ) - 1);
63
+ }
64
+
65
+ return 0;
53
66
  }
@@ -24,409 +24,576 @@
24
24
  * IN THE SOFTWARE.
25
25
  */
26
26
 
27
+ #include <assert.h>
27
28
  #include <stddef.h>
29
+ #include <string.h>
28
30
  #ifdef __SSE4_2__
29
- # include <x86intrin.h>
31
+ #ifdef _MSC_VER
32
+ #include <nmmintrin.h>
33
+ #else
34
+ #include <x86intrin.h>
35
+ #endif
30
36
  #endif
31
37
  #include "picohttpparser.h"
32
38
 
33
39
  /* $Id$ */
34
40
 
35
41
  #if __GNUC__ >= 3
36
- # define likely(x) __builtin_expect(!!(x), 1)
37
- # define unlikely(x) __builtin_expect(!!(x), 0)
42
+ #define likely(x) __builtin_expect(!!(x), 1)
43
+ #define unlikely(x) __builtin_expect(!!(x), 0)
44
+ #else
45
+ #define likely(x) (x)
46
+ #define unlikely(x) (x)
47
+ #endif
48
+
49
+ #ifdef _MSC_VER
50
+ #define ALIGNED(n) _declspec(align(n))
38
51
  #else
39
- # define likely(x) (x)
40
- # define unlikely(x) (x)
52
+ #define ALIGNED(n) __attribute__((aligned(n)))
41
53
  #endif
42
54
 
43
- #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c) - 040u < 0137u)
44
-
45
- #define CHECK_EOF() \
46
- if (buf == buf_end) { \
47
- *ret = -2; \
48
- return NULL; \
49
- }
50
-
51
- #define EXPECT_CHAR(ch) \
52
- CHECK_EOF(); \
53
- if (*buf++ != ch) { \
54
- *ret = -1; \
55
- return NULL; \
56
- }
57
-
58
- #define ADVANCE_TOKEN(tok, toklen) do { \
59
- const char* tok_start = buf; \
60
- static const char ranges2[] __attribute__((aligned(16))) = "\000\040\177\177"; \
61
- int found2; \
62
- buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
63
- if (! found2) { \
64
- CHECK_EOF(); \
65
- } \
66
- while (1) { \
67
- if (*buf == ' ') { \
68
- break; \
69
- } else if (unlikely(! IS_PRINTABLE_ASCII(*buf))) { \
70
- if ((unsigned char)*buf < '\040' || *buf == '\177') { \
71
- *ret = -1; \
72
- return NULL; \
73
- } \
74
- } \
75
- ++buf; \
76
- CHECK_EOF(); \
77
- } \
78
- tok = tok_start; \
79
- toklen = buf - tok_start; \
80
- } while (0)
81
-
82
- static const char* token_char_map =
83
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
84
- "\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
85
- "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
86
- "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
87
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
88
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
89
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
90
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
91
-
92
- static const char* findchar_fast(const char* buf, const char* buf_end, const char *ranges, size_t ranges_size, int* found)
55
+ #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
56
+
57
+ #define CHECK_EOF() \
58
+ if (buf == buf_end) { \
59
+ *ret = -2; \
60
+ return NULL; \
61
+ }
62
+
63
+ #define EXPECT_CHAR(ch) \
64
+ CHECK_EOF(); \
65
+ if (*buf++ != ch) { \
66
+ *ret = -1; \
67
+ return NULL; \
68
+ }
69
+
70
+ #define ADVANCE_TOKEN(tok, toklen) \
71
+ do { \
72
+ const char *tok_start = buf; \
73
+ static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
74
+ int found2; \
75
+ buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
76
+ if (!found2) { \
77
+ CHECK_EOF(); \
78
+ } \
79
+ while (1) { \
80
+ if (*buf == ' ') { \
81
+ break; \
82
+ } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
83
+ if ((unsigned char)*buf < '\040' || *buf == '\177') { \
84
+ *ret = -1; \
85
+ return NULL; \
86
+ } \
87
+ } \
88
+ ++buf; \
89
+ CHECK_EOF(); \
90
+ } \
91
+ tok = tok_start; \
92
+ toklen = buf - tok_start; \
93
+ } while (0)
94
+
95
+ static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
96
+ "\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
97
+ "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
98
+ "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
99
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
100
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
103
+
104
+ static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
93
105
  {
94
- *found = 0;
106
+ *found = 0;
95
107
  #if __SSE4_2__
96
- if (likely(buf_end - buf >= 16)) {
97
- __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
98
-
99
- size_t left = (buf_end - buf) & ~15;
100
- do {
101
- __m128i b16 = _mm_loadu_si128((void*)buf);
102
- int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
103
- if (unlikely(r != 16)) {
104
- buf += r;
105
- *found = 1;
106
- break;
107
- }
108
- buf += 16;
109
- left -= 16;
110
- } while (likely(left != 0));
111
- }
108
+ if (likely(buf_end - buf >= 16)) {
109
+ __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
110
+
111
+ size_t left = (buf_end - buf) & ~15;
112
+ do {
113
+ __m128i b16 = _mm_loadu_si128((void *)buf);
114
+ int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
115
+ if (unlikely(r != 16)) {
116
+ buf += r;
117
+ *found = 1;
118
+ break;
119
+ }
120
+ buf += 16;
121
+ left -= 16;
122
+ } while (likely(left != 0));
123
+ }
124
+ #else
125
+ /* suppress unused parameter warning */
126
+ (void)buf_end;
127
+ (void)ranges;
128
+ (void)ranges_size;
112
129
  #endif
113
- return buf;
130
+ return buf;
114
131
  }
115
132
 
116
- static const char* get_token_to_eol(const char* buf, const char* buf_end,
117
- const char** token, size_t* token_len,
118
- int* ret)
133
+ static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
119
134
  {
120
- const char* token_start = buf;
121
-
135
+ const char *token_start = buf;
136
+
122
137
  #ifdef __SSE4_2__
123
- static const char ranges1[] =
124
- "\0\010"
125
- /* allow HT */
126
- "\012\037"
127
- /* allow SP and up to but not including DEL */
128
- "\177\177"
129
- /* allow chars w. MSB set */
130
- ;
131
- int found;
132
- buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
133
- if (found)
134
- goto FOUND_CTL;
138
+ static const char ranges1[] = "\0\010"
139
+ /* allow HT */
140
+ "\012\037"
141
+ /* allow SP and up to but not including DEL */
142
+ "\177\177"
143
+ /* allow chars w. MSB set */
144
+ ;
145
+ int found;
146
+ buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
147
+ if (found)
148
+ goto FOUND_CTL;
135
149
  #else
136
- /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
137
- while (likely(buf_end - buf >= 8)) {
138
- #define DOIT() if (unlikely(! IS_PRINTABLE_ASCII(*buf))) goto NonPrintable; ++buf
139
- DOIT(); DOIT(); DOIT(); DOIT();
140
- DOIT(); DOIT(); DOIT(); DOIT();
150
+ /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
151
+ while (likely(buf_end - buf >= 8)) {
152
+ #define DOIT() \
153
+ do { \
154
+ if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
155
+ goto NonPrintable; \
156
+ ++buf; \
157
+ } while (0)
158
+ DOIT();
159
+ DOIT();
160
+ DOIT();
161
+ DOIT();
162
+ DOIT();
163
+ DOIT();
164
+ DOIT();
165
+ DOIT();
141
166
  #undef DOIT
142
- continue;
143
- NonPrintable:
144
- if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
145
- goto FOUND_CTL;
167
+ continue;
168
+ NonPrintable:
169
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
170
+ goto FOUND_CTL;
171
+ }
172
+ ++buf;
146
173
  }
147
- ++buf;
148
- }
149
174
  #endif
150
- for (; ; ++buf) {
151
- CHECK_EOF();
152
- if (unlikely(! IS_PRINTABLE_ASCII(*buf))) {
153
- if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
154
- goto FOUND_CTL;
155
- }
175
+ for (;; ++buf) {
176
+ CHECK_EOF();
177
+ if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
178
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
179
+ goto FOUND_CTL;
180
+ }
181
+ }
156
182
  }
157
- }
158
- FOUND_CTL:
159
- if (likely(*buf == '\015')) {
160
- ++buf;
161
- EXPECT_CHAR('\012');
162
- *token_len = buf - 2 - token_start;
163
- } else if (*buf == '\012') {
164
- *token_len = buf - token_start;
165
- ++buf;
166
- } else {
167
- *ret = -1;
168
- return NULL;
169
- }
170
- *token = token_start;
171
-
172
- return buf;
173
- }
174
-
175
- static const char* is_complete(const char* buf, const char* buf_end,
176
- size_t last_len, int* ret)
177
- {
178
- int ret_cnt = 0;
179
- buf = last_len < 3 ? buf : buf + last_len - 3;
180
-
181
- while (1) {
182
- CHECK_EOF();
183
- if (*buf == '\015') {
184
- ++buf;
185
- CHECK_EOF();
186
- EXPECT_CHAR('\012');
187
- ++ret_cnt;
183
+ FOUND_CTL:
184
+ if (likely(*buf == '\015')) {
185
+ ++buf;
186
+ EXPECT_CHAR('\012');
187
+ *token_len = buf - 2 - token_start;
188
188
  } else if (*buf == '\012') {
189
- ++buf;
190
- ++ret_cnt;
189
+ *token_len = buf - token_start;
190
+ ++buf;
191
191
  } else {
192
- ++buf;
193
- ret_cnt = 0;
192
+ *ret = -1;
193
+ return NULL;
194
194
  }
195
- if (ret_cnt == 2) {
196
- return buf;
195
+ *token = token_start;
196
+
197
+ return buf;
198
+ }
199
+
200
+ static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
201
+ {
202
+ int ret_cnt = 0;
203
+ buf = last_len < 3 ? buf : buf + last_len - 3;
204
+
205
+ while (1) {
206
+ CHECK_EOF();
207
+ if (*buf == '\015') {
208
+ ++buf;
209
+ CHECK_EOF();
210
+ EXPECT_CHAR('\012');
211
+ ++ret_cnt;
212
+ } else if (*buf == '\012') {
213
+ ++buf;
214
+ ++ret_cnt;
215
+ } else {
216
+ ++buf;
217
+ ret_cnt = 0;
218
+ }
219
+ if (ret_cnt == 2) {
220
+ return buf;
221
+ }
197
222
  }
198
- }
199
-
200
- *ret = -2;
201
- return NULL;
223
+
224
+ *ret = -2;
225
+ return NULL;
202
226
  }
203
227
 
204
228
  /* *_buf is always within [buf, buf_end) upon success */
205
- static const char* parse_int(const char* buf, const char* buf_end, int* value,
206
- int* ret)
229
+ static const char *parse_int(const char *buf, const char *buf_end, int *value, int *ret)
207
230
  {
208
- int v;
209
- CHECK_EOF();
210
- if (! ('0' <= *buf && *buf <= '9')) {
211
- *ret = -1;
212
- return NULL;
213
- }
214
- v = 0;
215
- for (; ; ++buf) {
231
+ int v;
216
232
  CHECK_EOF();
217
- if ('0' <= *buf && *buf <= '9') {
218
- v = v * 10 + *buf - '0';
219
- } else {
220
- break;
233
+ if (!('0' <= *buf && *buf <= '9')) {
234
+ *ret = -1;
235
+ return NULL;
236
+ }
237
+ v = 0;
238
+ for (;; ++buf) {
239
+ CHECK_EOF();
240
+ if ('0' <= *buf && *buf <= '9') {
241
+ v = v * 10 + *buf - '0';
242
+ } else {
243
+ break;
244
+ }
221
245
  }
222
- }
223
-
224
- *value = v;
225
- return buf;
246
+
247
+ *value = v;
248
+ return buf;
226
249
  }
227
250
 
228
251
  /* returned pointer is always within [buf, buf_end), or null */
229
- static const char* parse_http_version(const char* buf, const char* buf_end,
230
- int* minor_version, int* ret)
252
+ static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
231
253
  {
232
- EXPECT_CHAR('H'); EXPECT_CHAR('T'); EXPECT_CHAR('T'); EXPECT_CHAR('P');
233
- EXPECT_CHAR('/'); EXPECT_CHAR('1'); EXPECT_CHAR('.');
234
- return parse_int(buf, buf_end, minor_version, ret);
254
+ EXPECT_CHAR('H');
255
+ EXPECT_CHAR('T');
256
+ EXPECT_CHAR('T');
257
+ EXPECT_CHAR('P');
258
+ EXPECT_CHAR('/');
259
+ EXPECT_CHAR('1');
260
+ EXPECT_CHAR('.');
261
+ return parse_int(buf, buf_end, minor_version, ret);
235
262
  }
236
263
 
237
- static const char* parse_headers(const char* buf, const char* buf_end,
238
- struct phr_header* headers,
239
- size_t* num_headers, size_t max_headers,
240
- int* ret)
264
+ static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
265
+ size_t max_headers, int *ret)
241
266
  {
242
- for (; ; ++*num_headers) {
267
+ for (;; ++*num_headers) {
268
+ CHECK_EOF();
269
+ if (*buf == '\015') {
270
+ ++buf;
271
+ EXPECT_CHAR('\012');
272
+ break;
273
+ } else if (*buf == '\012') {
274
+ ++buf;
275
+ break;
276
+ }
277
+ if (*num_headers == max_headers) {
278
+ *ret = -1;
279
+ return NULL;
280
+ }
281
+ if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
282
+ static const char ALIGNED(16) ranges1[] = "::\x00\037";
283
+ int found;
284
+ if (!token_char_map[(unsigned char)*buf]) {
285
+ *ret = -1;
286
+ return NULL;
287
+ }
288
+ /* parsing name, but do not discard SP before colon, see
289
+ * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
290
+ headers[*num_headers].name = buf;
291
+ buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
292
+ if (!found) {
293
+ CHECK_EOF();
294
+ }
295
+ while (1) {
296
+ if (*buf == ':') {
297
+ break;
298
+ } else if (*buf < ' ') {
299
+ *ret = -1;
300
+ return NULL;
301
+ }
302
+ ++buf;
303
+ CHECK_EOF();
304
+ }
305
+ headers[*num_headers].name_len = buf - headers[*num_headers].name;
306
+ ++buf;
307
+ for (;; ++buf) {
308
+ CHECK_EOF();
309
+ if (!(*buf == ' ' || *buf == '\t')) {
310
+ break;
311
+ }
312
+ }
313
+ } else {
314
+ headers[*num_headers].name = NULL;
315
+ headers[*num_headers].name_len = 0;
316
+ }
317
+ if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) {
318
+ return NULL;
319
+ }
320
+ }
321
+ return buf;
322
+ }
323
+
324
+ static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
325
+ size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
326
+ size_t max_headers, int *ret)
327
+ {
328
+ /* skip first empty line (some clients add CRLF after POST content) */
243
329
  CHECK_EOF();
244
330
  if (*buf == '\015') {
245
- ++buf;
246
- EXPECT_CHAR('\012');
247
- break;
331
+ ++buf;
332
+ EXPECT_CHAR('\012');
248
333
  } else if (*buf == '\012') {
249
- ++buf;
250
- break;
251
- }
252
- if (*num_headers == max_headers) {
253
- *ret = -1;
254
- return NULL;
334
+ ++buf;
255
335
  }
256
- if (! (*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
257
- if (! token_char_map[(unsigned char)*buf]) {
258
- *ret = -1;
336
+
337
+ /* parse request line */
338
+ ADVANCE_TOKEN(*method, *method_len);
339
+ ++buf;
340
+ ADVANCE_TOKEN(*path, *path_len);
341
+ ++buf;
342
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
259
343
  return NULL;
260
- }
261
- /* parsing name, but do not discard SP before colon, see
262
- * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
263
- headers[*num_headers].name = buf;
264
- static const char ranges1[] __attribute__((aligned(16))) = "::\x00\037";
265
- int found;
266
- buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
267
- if (! found) {
268
- CHECK_EOF();
269
- }
270
- while (1) {
271
- if (*buf == ':') {
272
- break;
273
- } else if (*buf < ' ') {
274
- *ret = -1;
275
- return NULL;
276
- }
344
+ }
345
+ if (*buf == '\015') {
346
+ ++buf;
347
+ EXPECT_CHAR('\012');
348
+ } else if (*buf == '\012') {
277
349
  ++buf;
278
- CHECK_EOF();
279
- }
280
- headers[*num_headers].name_len = buf - headers[*num_headers].name;
281
- ++buf;
282
- for (; ; ++buf) {
283
- CHECK_EOF();
284
- if (! (*buf == ' ' || *buf == '\t')) {
285
- break;
286
- }
287
- }
288
350
  } else {
289
- headers[*num_headers].name = NULL;
290
- headers[*num_headers].name_len = 0;
351
+ *ret = -1;
352
+ return NULL;
353
+ }
354
+
355
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
356
+ }
357
+
358
+ int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
359
+ size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
360
+ {
361
+ const char *buf = buf_start, *buf_end = buf_start + len;
362
+ size_t max_headers = *num_headers;
363
+ int r;
364
+
365
+ *method = NULL;
366
+ *method_len = 0;
367
+ *path = NULL;
368
+ *path_len = 0;
369
+ *minor_version = -1;
370
+ *num_headers = 0;
371
+
372
+ /* if last_len != 0, check if the request is complete (a fast countermeasure
373
+ againt slowloris */
374
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
375
+ return r;
291
376
  }
292
- if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value,
293
- &headers[*num_headers].value_len, ret))
294
- == NULL) {
295
- return NULL;
377
+
378
+ if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
379
+ &r)) == NULL) {
380
+ return r;
296
381
  }
297
- }
298
- return buf;
382
+
383
+ return (int)(buf - buf_start);
299
384
  }
300
385
 
301
- const char* parse_request(const char* buf, const char* buf_end,
302
- const char** method, size_t* method_len,
303
- const char** path, size_t* path_len,
304
- int* minor_version, struct phr_header* headers,
305
- size_t* num_headers, size_t max_headers, int* ret)
386
+ static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
387
+ size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
306
388
  {
307
- /* skip first empty line (some clients add CRLF after POST content) */
308
- CHECK_EOF();
309
- if (*buf == '\015') {
310
- ++buf;
311
- EXPECT_CHAR('\012');
312
- } else if (*buf == '\012') {
313
- ++buf;
314
- }
315
-
316
- /* parse request line */
317
- ADVANCE_TOKEN(*method, *method_len);
318
- ++buf;
319
- ADVANCE_TOKEN(*path, *path_len);
320
- ++buf;
321
- if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
322
- return NULL;
323
- }
324
- if (*buf == '\015') {
325
- ++buf;
326
- EXPECT_CHAR('\012');
327
- } else if (*buf == '\012') {
328
- ++buf;
329
- } else {
330
- *ret = -1;
331
- return NULL;
332
- }
333
-
334
- return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
389
+ /* parse "HTTP/1.x" */
390
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
391
+ return NULL;
392
+ }
393
+ /* skip space */
394
+ if (*buf++ != ' ') {
395
+ *ret = -1;
396
+ return NULL;
397
+ }
398
+ /* parse status code */
399
+ if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
400
+ return NULL;
401
+ }
402
+ /* skip space */
403
+ if (*buf++ != ' ') {
404
+ *ret = -1;
405
+ return NULL;
406
+ }
407
+ /* get message */
408
+ if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
409
+ return NULL;
410
+ }
411
+
412
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
335
413
  }
336
414
 
337
- int phr_parse_request(const char* buf_start, size_t len, const char** method,
338
- size_t* method_len, const char** path, size_t* path_len,
339
- int* minor_version, struct phr_header* headers,
340
- size_t* num_headers, size_t last_len)
415
+ int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
416
+ struct phr_header *headers, size_t *num_headers, size_t last_len)
341
417
  {
342
- const char * buf = buf_start, * buf_end = buf_start + len;
343
- size_t max_headers = *num_headers;
344
- int r;
345
-
346
- *method = NULL;
347
- *method_len = 0;
348
- *path = NULL;
349
- *path_len = 0;
350
- *minor_version = -1;
351
- *num_headers = 0;
352
-
353
- /* if last_len != 0, check if the request is complete (a fast countermeasure
354
- againt slowloris */
355
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
356
- return r;
357
- }
358
-
359
- if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len,
360
- minor_version, headers, num_headers, max_headers,
361
- &r))
362
- == NULL) {
363
- return r;
364
- }
365
-
366
- return (int)(buf - buf_start);
418
+ const char *buf = buf_start, *buf_end = buf + len;
419
+ size_t max_headers = *num_headers;
420
+ int r;
421
+
422
+ *minor_version = -1;
423
+ *status = 0;
424
+ *msg = NULL;
425
+ *msg_len = 0;
426
+ *num_headers = 0;
427
+
428
+ /* if last_len != 0, check if the response is complete (a fast countermeasure
429
+ against slowloris */
430
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
431
+ return r;
432
+ }
433
+
434
+ if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
435
+ return r;
436
+ }
437
+
438
+ return (int)(buf - buf_start);
367
439
  }
368
440
 
369
- static const char* parse_response(const char* buf, const char* buf_end,
370
- int* minor_version, int* status,
371
- const char** msg, size_t* msg_len,
372
- struct phr_header* headers,
373
- size_t* num_headers, size_t max_headers,
374
- int* ret)
441
+ int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
375
442
  {
376
- /* parse "HTTP/1.x" */
377
- if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
378
- return NULL;
379
- }
380
- /* skip space */
381
- if (*buf++ != ' ') {
382
- *ret = -1;
383
- return NULL;
384
- }
385
- /* parse status code */
386
- if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
387
- return NULL;
388
- }
389
- /* skip space */
390
- if (*buf++ != ' ') {
391
- *ret = -1;
392
- return NULL;
393
- }
394
- /* get message */
395
- if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
396
- return NULL;
397
- }
398
-
399
- return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
443
+ const char *buf = buf_start, *buf_end = buf + len;
444
+ size_t max_headers = *num_headers;
445
+ int r;
446
+
447
+ *num_headers = 0;
448
+
449
+ /* if last_len != 0, check if the response is complete (a fast countermeasure
450
+ against slowloris */
451
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
452
+ return r;
453
+ }
454
+
455
+ if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
456
+ return r;
457
+ }
458
+
459
+ return (int)(buf - buf_start);
400
460
  }
401
461
 
402
- int phr_parse_response(const char* buf_start, size_t len, int* minor_version,
403
- int* status, const char** msg, size_t* msg_len,
404
- struct phr_header* headers, size_t* num_headers,
405
- size_t last_len)
462
+ enum {
463
+ CHUNKED_IN_CHUNK_SIZE,
464
+ CHUNKED_IN_CHUNK_EXT,
465
+ CHUNKED_IN_CHUNK_DATA,
466
+ CHUNKED_IN_CHUNK_CRLF,
467
+ CHUNKED_IN_TRAILERS_LINE_HEAD,
468
+ CHUNKED_IN_TRAILERS_LINE_MIDDLE
469
+ };
470
+
471
+ static int decode_hex(int ch)
406
472
  {
407
- const char * buf = buf_start, * buf_end = buf + len;
408
- size_t max_headers = *num_headers;
409
- int r;
410
-
411
- *minor_version = -1;
412
- *status = 0;
413
- *msg = NULL;
414
- *msg_len = 0;
415
- *num_headers = 0;
416
-
417
- /* if last_len != 0, check if the response is complete (a fast countermeasure
418
- against slowloris */
419
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
420
- return r;
421
- }
422
-
423
- if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len,
424
- headers, num_headers, max_headers, &r))
425
- == NULL) {
426
- return r;
427
- }
428
-
429
- return (int)(buf - buf_start);
473
+ if ('0' <= ch && ch <= '9') {
474
+ return ch - '0';
475
+ } else if ('A' <= ch && ch <= 'F') {
476
+ return ch - 'A' + 0xa;
477
+ } else if ('a' <= ch && ch <= 'f') {
478
+ return ch - 'a' + 0xa;
479
+ } else {
480
+ return -1;
481
+ }
482
+ }
483
+
484
+ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
485
+ {
486
+ size_t dst = 0, src = 0, bufsz = *_bufsz;
487
+ ssize_t ret = -2; /* incomplete */
488
+
489
+ while (1) {
490
+ switch (decoder->_state) {
491
+ case CHUNKED_IN_CHUNK_SIZE:
492
+ for (;; ++src) {
493
+ int v;
494
+ if (src == bufsz)
495
+ goto Exit;
496
+ if ((v = decode_hex(buf[src])) == -1) {
497
+ if (decoder->_hex_count == 0) {
498
+ ret = -1;
499
+ goto Exit;
500
+ }
501
+ break;
502
+ }
503
+ if (decoder->_hex_count == sizeof(size_t) * 2) {
504
+ ret = -1;
505
+ goto Exit;
506
+ }
507
+ decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
508
+ ++decoder->_hex_count;
509
+ }
510
+ decoder->_hex_count = 0;
511
+ decoder->_state = CHUNKED_IN_CHUNK_EXT;
512
+ /* fallthru */
513
+ case CHUNKED_IN_CHUNK_EXT:
514
+ /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
515
+ for (;; ++src) {
516
+ if (src == bufsz)
517
+ goto Exit;
518
+ if (buf[src] == '\012')
519
+ break;
520
+ }
521
+ ++src;
522
+ if (decoder->bytes_left_in_chunk == 0) {
523
+ if (decoder->consume_trailer) {
524
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
525
+ break;
526
+ } else {
527
+ goto Complete;
528
+ }
529
+ }
530
+ decoder->_state = CHUNKED_IN_CHUNK_DATA;
531
+ /* fallthru */
532
+ case CHUNKED_IN_CHUNK_DATA: {
533
+ size_t avail = bufsz - src;
534
+ if (avail < decoder->bytes_left_in_chunk) {
535
+ if (dst != src)
536
+ memmove(buf + dst, buf + src, avail);
537
+ src += avail;
538
+ dst += avail;
539
+ decoder->bytes_left_in_chunk -= avail;
540
+ goto Exit;
541
+ }
542
+ if (dst != src)
543
+ memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
544
+ src += decoder->bytes_left_in_chunk;
545
+ dst += decoder->bytes_left_in_chunk;
546
+ decoder->bytes_left_in_chunk = 0;
547
+ decoder->_state = CHUNKED_IN_CHUNK_CRLF;
548
+ }
549
+ /* fallthru */
550
+ case CHUNKED_IN_CHUNK_CRLF:
551
+ for (;; ++src) {
552
+ if (src == bufsz)
553
+ goto Exit;
554
+ if (buf[src] != '\015')
555
+ break;
556
+ }
557
+ if (buf[src] != '\012') {
558
+ ret = -1;
559
+ goto Exit;
560
+ }
561
+ ++src;
562
+ decoder->_state = CHUNKED_IN_CHUNK_SIZE;
563
+ break;
564
+ case CHUNKED_IN_TRAILERS_LINE_HEAD:
565
+ for (;; ++src) {
566
+ if (src == bufsz)
567
+ goto Exit;
568
+ if (buf[src] != '\015')
569
+ break;
570
+ }
571
+ if (buf[src++] == '\012')
572
+ goto Complete;
573
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
574
+ /* fallthru */
575
+ case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
576
+ for (;; ++src) {
577
+ if (src == bufsz)
578
+ goto Exit;
579
+ if (buf[src] == '\012')
580
+ break;
581
+ }
582
+ ++src;
583
+ decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
584
+ break;
585
+ default:
586
+ assert(!"decoder is corrupt");
587
+ }
588
+ }
589
+
590
+ Complete:
591
+ ret = bufsz - src;
592
+ Exit:
593
+ if (dst != src)
594
+ memmove(buf + dst, buf + src, bufsz - src);
595
+ *_bufsz = dst;
596
+ return ret;
430
597
  }
431
598
 
432
599
  #undef CHECK_EOF