pico_http_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,19 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/extensiontask"
3
+
4
+ Rake::ExtensionTask.new "pico_http_parser" do |ext|
5
+ ext.lib_dir = "lib/pico_http_parser"
6
+ end
7
+
8
+ require 'rspec/core/rake_task'
9
+ RSpec::Core::RakeTask.new(:spec) do |spec|
10
+ spec.pattern = 'spec/*_spec.rb'
11
+ # spec.rspec_opts = ['-cfs']
12
+ end
13
+
14
+ task :test do
15
+ Rake::Task["clobber"].invoke
16
+ Rake::Task["compile"].invoke
17
+ Rake::Task["spec"].invoke
18
+ end
19
+
@@ -0,0 +1,5 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+
4
+ require 'rubygems'
5
+ require 'benchmark'
@@ -0,0 +1,35 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/bench_helper')
2
+
3
+ require 'pico_http_parser'
4
+
5
+ request_body = <<REQ
6
+ GET /blakjsdfkas HTTP/1.1\r
7
+ Host: blooperblorp\r
8
+ Cookie: blah=woop\r
9
+ \r
10
+ REQ
11
+ loop = 300000
12
+
13
+ #File.read(File.expand_path(File.dirname(__FILE__) + '/sample_request.http'))
14
+
15
+ Benchmark.bmbm(20) do |bm|
16
+ bm.report("PicoHTTPParser") do
17
+ 0.upto(loop) do
18
+ env = {}
19
+ PicoHTTPParser.parse_http_request(request_body,env)
20
+ end
21
+ end
22
+ begin
23
+ require 'unicorn'
24
+ include Unicorn
25
+ bm.report("HttpParser") do
26
+ 0.upto(loop) do
27
+ parser = HttpParser.new
28
+ parser.buf << request_body
29
+ parser.parse
30
+ end
31
+ end
32
+ rescue LoadError
33
+ puts("Can't benchmark unicorn as it couldn't be loaded.")
34
+ end
35
+ end
@@ -0,0 +1,2 @@
1
+ require "mkmf"
2
+ create_makefile("pico_http_parser/pico_http_parser")
@@ -0,0 +1,167 @@
1
+ #include <ruby.h>
2
+ #include <ctype.h>
3
+ #include "picohttpparser/picohttpparser.c"
4
+
5
+ #define MAX_HEADER_NAME_LEN 1024
6
+ #define MAX_HEADERS 128
7
+ #define TOU(ch) (('a' <= ch && ch <= 'z') ? ch - ('a' - 'A') : ch)
8
+
9
+ VALUE cPicoHTTPParser;
10
+
11
+ static
12
+ size_t find_ch(const char* s, size_t len, char ch)
13
+ {
14
+ size_t i;
15
+ for (i = 0; i != len; ++i, ++s)
16
+ if (*s == ch)
17
+ break;
18
+ return i;
19
+ }
20
+
21
+ static
22
+ int header_is(const struct phr_header* header, const char* name,
23
+ size_t len)
24
+ {
25
+ const char* x, * y;
26
+ if (header->name_len != len)
27
+ return 0;
28
+ for (x = header->name, y = name; len != 0; --len, ++x, ++y)
29
+ if (TOU(*x) != *y)
30
+ return 0;
31
+ return 1;
32
+ }
33
+
34
+
35
+ static
36
+ int store_path_info(VALUE envref, const char* src, size_t src_len) {
37
+ size_t dlen = 0, i = 0;
38
+ char *d;
39
+ char s2, s3;
40
+
41
+ d = (char*)malloc(src_len * 3 + 1);
42
+ for (i = 0; i < src_len; i++ ) {
43
+ if ( src[i] == '%' ) {
44
+ if ( !isxdigit(src[i+1]) || !isxdigit(src[i+2]) ) {
45
+ free(d);
46
+ return -1;
47
+ }
48
+ s2 = src[i+1];
49
+ s3 = src[i+2];
50
+ s2 -= s2 <= '9' ? '0'
51
+ : s2 <= 'F' ? 'A' - 10
52
+ : 'a' - 10;
53
+ s3 -= s3 <= '9' ? '0'
54
+ : s3 <= 'F' ? 'A' - 10
55
+ : 'a' - 10;
56
+ d[dlen++] = s2 * 16 + s3;
57
+ i += 2;
58
+ }
59
+ else {
60
+ d[dlen++] = src[i];
61
+ }
62
+ }
63
+ d[dlen]='0';
64
+ rb_hash_aset(envref, rb_str_new2("PATH_INFO"), rb_str_new(d, dlen));
65
+ free(d);
66
+ return dlen;
67
+ }
68
+
69
+
70
+ static
71
+ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
72
+ {
73
+ const char* buf_str;
74
+ size_t buf_len;
75
+ const char* method;
76
+ size_t method_len;
77
+ const char* path;
78
+ size_t path_len;
79
+ size_t o_path_len;
80
+ int minor_version;
81
+ struct phr_header headers[MAX_HEADERS];
82
+ size_t num_headers, question_at;
83
+ size_t i;
84
+ int ret;
85
+ char tmp[MAX_HEADER_NAME_LEN + sizeof("HTTP_") - 1];
86
+ VALUE last_value;
87
+
88
+ buf_str = StringValuePtr(buf);
89
+ buf_len = strlen(buf_str);
90
+ num_headers = MAX_HEADERS;
91
+ ret = phr_parse_request(buf_str, buf_len, &method, &method_len, &path,
92
+ &path_len, &minor_version, headers, &num_headers, 0);
93
+ if (ret < 0)
94
+ goto done;
95
+
96
+ rb_hash_aset(envref, rb_str_new2("REQUEST_METHOD"), rb_str_new(method,method_len));
97
+ rb_hash_aset(envref, rb_str_new2("REQUEST_URI"), rb_str_new(path, path_len));
98
+ rb_hash_aset(envref, rb_str_new2("SCRIPT_NAME"), rb_str_new2(""));
99
+ i = sprintf(tmp,"HTTP/1.%d",minor_version);
100
+ rb_hash_aset(envref, rb_str_new2("SERVER_PROTOCOL"), rb_str_new(tmp, i));
101
+
102
+ /* PATH_INFO QUERY_STRING */
103
+ path_len = find_ch(path, path_len, '#'); /* strip off all text after # after storing request_uri */
104
+ question_at = find_ch(path, path_len, '?');
105
+ if ( store_path_info(envref, path, question_at) < 0 ) {
106
+ rb_hash_clear(envref);
107
+ ret = -1;
108
+ goto done;
109
+ }
110
+ if (question_at != path_len) ++question_at;
111
+ rb_hash_aset(envref, rb_str_new2("QUERY_STRING"), rb_str_new(path + question_at, path_len - question_at));
112
+
113
+ last_value = Qnil;
114
+ for (i = 0; i < num_headers; ++i) {
115
+ if (headers[i].name != NULL) {
116
+ const char* name;
117
+ size_t name_len;
118
+ VALUE slot;
119
+ if (header_is(headers + i, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1)) {
120
+ name = "CONTENT_TYPE";
121
+ name_len = sizeof("CONTENT_TYPE") - 1;
122
+ } else if (header_is(headers + i, "CONTENT-LENGTH", sizeof("CONTENT-LENGTH") - 1)) {
123
+ name = "CONTENT_LENGTH";
124
+ name_len = sizeof("CONTENT_LENGTH") - 1;
125
+ } else {
126
+ const char* s;
127
+ char* d;
128
+ size_t n;
129
+ if (sizeof(tmp) - 5 < headers[i].name_len) {
130
+ rb_hash_clear(envref);
131
+ ret = -1;
132
+ goto done;
133
+ }
134
+ strcpy(tmp, "HTTP_");
135
+ for (s = headers[i].name, n = headers[i].name_len, d = tmp + 5;
136
+ n != 0;
137
+ s++, --n, d++) {
138
+ *d = *s == '-' ? '_' : TOU(*s);
139
+ name = tmp;
140
+ name_len = headers[i].name_len + 5;
141
+ }
142
+ }
143
+ slot = rb_hash_aref(envref, rb_str_new(name, name_len));
144
+ if ( slot != Qnil ) {
145
+ rb_str_cat2(slot, ", ");
146
+ rb_str_cat(slot, headers[i].value, headers[i].value_len);
147
+ } else {
148
+ slot = rb_str_new(headers[i].value, headers[i].value_len);
149
+ rb_hash_aset(envref, rb_str_new(name, name_len), slot);
150
+ last_value = slot;
151
+ }
152
+ } else {
153
+ /* continuing lines of a mulitiline header */
154
+ rb_str_cat(last_value, headers[i].value, headers[i].value_len);
155
+ }
156
+ }
157
+
158
+ done:
159
+ return rb_int_new(ret);
160
+ }
161
+
162
+ void Init_pico_http_parser()
163
+ {
164
+
165
+ cPicoHTTPParser = rb_const_get(rb_cObject, rb_intern("PicoHTTPParser"));
166
+ rb_define_module_function(cPicoHTTPParser, "parse_http_request", phr_parse_http_request, 2);
167
+ }
@@ -0,0 +1 @@
1
+ picohttpparser.* ident
@@ -0,0 +1,3 @@
1
+ [submodule "picotest"]
2
+ path = picotest
3
+ url = https://github.com/h2o/picotest.git
@@ -0,0 +1,6 @@
1
+ language: c
2
+ compiler:
3
+ - gcc
4
+ - clang
5
+ script:
6
+ - make test
@@ -0,0 +1,39 @@
1
+ #
2
+ # Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase
3
+ #
4
+ # The software is licensed under either the MIT License (below) or the Perl
5
+ # license.
6
+ #
7
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ # of this software and associated documentation files (the "Software"), to
9
+ # deal in the Software without restriction, including without limitation the
10
+ # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
11
+ # sell copies of the Software, and to permit persons to whom the Software is
12
+ # furnished to do so, subject to the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be included in
15
+ # all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23
+ # IN THE SOFTWARE.
24
+
25
+ CC?=gcc
26
+ PROVE?=prove
27
+
28
+ all:
29
+
30
+ test: test-bin
31
+ $(PROVE) -v ./test-bin
32
+
33
+ test-bin: picohttpparser.c picotest/picotest.c test.c
34
+ $(CC) -Wall $(CFLAGS) $(LDFLAGS) -o $@ $^
35
+
36
+ clean:
37
+ rm -f test-bin
38
+
39
+ .PHONY: test
@@ -0,0 +1,24 @@
1
+ PicoHTTPParser
2
+ =============
3
+
4
+ Copyright (c) 2009-2014 [Kazuho Oku](https://github.com/kazuho), [Tokuhiro Matsuno](https://github.com/tokuhirom), [Daisuke Murase](https://github.com/typester)
5
+
6
+ PicoHTTPParser is a tiny, primitive, fast HTTP request/response parser.
7
+
8
+ Unlike most parsers, it is stateless and does not allocate memory by itself.
9
+ All it does is accept pointer to buffer and the output structure, and setups the pointers in the latter to point at the necessary portions of the buffer.
10
+
11
+ The code is widely deployed within Perl applications through popular modules that use it, including [Plack](https://metacpan.org/pod/Plack), [Starman](https://metacpan.org/pod/Starman), [Starlet](https://metacpan.org/pod/Starlet), [Furl](https://metacpan.org/pod/Furl). It is also the HTTP/1 parser of [H2O](https://github.com/h2o/h2o).
12
+
13
+ Check out [test.c] to find out how to use the parser.
14
+
15
+ The software is dual-licensed under the Perl License or the MIT License.
16
+
17
+ Benchmark
18
+ ---------
19
+
20
+ ![benchmark results](http://i.gyazo.com/7e098703c29128d69d02c9a216bfb6fb.png)
21
+
22
+ The benchmark code is from [fukamachi/fast-http](https://github.com/fukamachi/fast-http/).
23
+
24
+ The internals of picohttpparser has been described to some extent in [my blog entry]( http://blog.kazuhooku.com/2014/11/the-internals-h2o-or-how-to-write-fast.html).
@@ -0,0 +1,52 @@
1
+ /*
2
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase
3
+ *
4
+ * The software is licensed under either the MIT License (below) or the Perl
5
+ * license.
6
+ *
7
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ * of this software and associated documentation files (the "Software"), to
9
+ * deal in the Software without restriction, including without limitation the
10
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
11
+ * sell copies of the Software, and to permit persons to whom the Software is
12
+ * furnished to do so, subject to the following conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be included in
15
+ * all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23
+ * IN THE SOFTWARE.
24
+ */
25
+
26
+ #include <assert.h>
27
+ #include <stdio.h>
28
+ #include "picohttpparser.h"
29
+
30
+ #define REQ "GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\nHost: www.kittyhell.com\r\nUser-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 Pathtraq/0.9\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: ja,en-us;q=0.7,en;q=0.3\r\nAccept-Encoding: gzip,deflate\r\nAccept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\nKeep-Alive: 115\r\nConnection: keep-alive\r\nCookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; __utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; __utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n\r\n"
31
+
32
+ int main(void)
33
+ {
34
+ const char* method;
35
+ size_t method_len;
36
+ const char* path;
37
+ size_t path_len;
38
+ int minor_version;
39
+ struct phr_header headers[32];
40
+ size_t num_headers;
41
+ int i, ret;
42
+
43
+ for (i = 0; i < 1000000; i++) {
44
+ num_headers = sizeof(headers) / sizeof(headers[0]);
45
+ ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path,
46
+ &path_len, &minor_version, headers, &num_headers,
47
+ 0);
48
+ assert(ret == sizeof(REQ) - 1);
49
+ }
50
+
51
+ return 0;
52
+ }
@@ -0,0 +1,377 @@
1
+ /*
2
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase
3
+ *
4
+ * The software is licensed under either the MIT License (below) or the Perl
5
+ * license.
6
+ *
7
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ * of this software and associated documentation files (the "Software"), to
9
+ * deal in the Software without restriction, including without limitation the
10
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
11
+ * sell copies of the Software, and to permit persons to whom the Software is
12
+ * furnished to do so, subject to the following conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be included in
15
+ * all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23
+ * IN THE SOFTWARE.
24
+ */
25
+
26
+ #include <stddef.h>
27
+ #include "picohttpparser.h"
28
+
29
+ /* $Id$ */
30
+
31
+ #if __GNUC__ >= 3
32
+ # define likely(x) __builtin_expect(!!(x), 1)
33
+ # define unlikely(x) __builtin_expect(!!(x), 0)
34
+ #else
35
+ # define likely(x) (x)
36
+ # define unlikely(x) (x)
37
+ #endif
38
+
39
+ #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c) - 040u < 0137u)
40
+
41
+ #define CHECK_EOF() \
42
+ if (buf == buf_end) { \
43
+ *ret = -2; \
44
+ return NULL; \
45
+ }
46
+
47
+ #define EXPECT_CHAR(ch) \
48
+ CHECK_EOF(); \
49
+ if (*buf++ != ch) { \
50
+ *ret = -1; \
51
+ return NULL; \
52
+ }
53
+
54
+ #define ADVANCE_TOKEN(tok, toklen) do { \
55
+ const char* tok_start = buf; \
56
+ for (; ; ++buf) { \
57
+ CHECK_EOF(); \
58
+ if (*buf == ' ') { \
59
+ break; \
60
+ } else if (unlikely(! IS_PRINTABLE_ASCII(*buf))) { \
61
+ if ((unsigned char)*buf < '\040' || *buf == '\177') { \
62
+ *ret = -1; \
63
+ return NULL; \
64
+ } \
65
+ } \
66
+ } \
67
+ tok = tok_start; \
68
+ toklen = buf - tok_start; \
69
+ } while (0)
70
+
71
+ static const char* token_char_map =
72
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
73
+ "\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
74
+ "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
75
+ "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
76
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
77
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
78
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
79
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
80
+
81
+ static const char* get_token_to_eol(const char* buf, const char* buf_end,
82
+ const char** token, size_t* token_len,
83
+ int* ret)
84
+ {
85
+ const char* token_start = buf;
86
+
87
+ /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
88
+ while (likely(buf_end - buf >= 8)) {
89
+ #define DOIT() if (unlikely(! IS_PRINTABLE_ASCII(*buf))) goto NonPrintable; ++buf
90
+ DOIT(); DOIT(); DOIT(); DOIT();
91
+ DOIT(); DOIT(); DOIT(); DOIT();
92
+ #undef DOIT
93
+ continue;
94
+ NonPrintable:
95
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
96
+ goto FOUND_CTL;
97
+ }
98
+ ++buf;
99
+ }
100
+ for (; ; ++buf) {
101
+ CHECK_EOF();
102
+ if (unlikely(! IS_PRINTABLE_ASCII(*buf))) {
103
+ if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
104
+ goto FOUND_CTL;
105
+ }
106
+ }
107
+ }
108
+ FOUND_CTL:
109
+ if (likely(*buf == '\015')) {
110
+ ++buf;
111
+ EXPECT_CHAR('\012');
112
+ *token_len = buf - 2 - token_start;
113
+ } else if (*buf == '\012') {
114
+ *token_len = buf - token_start;
115
+ ++buf;
116
+ } else {
117
+ *ret = -1;
118
+ return NULL;
119
+ }
120
+ *token = token_start;
121
+
122
+ return buf;
123
+ }
124
+
125
+ static const char* is_complete(const char* buf, const char* buf_end,
126
+ size_t last_len, int* ret)
127
+ {
128
+ int ret_cnt = 0;
129
+ buf = last_len < 3 ? buf : buf + last_len - 3;
130
+
131
+ while (1) {
132
+ CHECK_EOF();
133
+ if (*buf == '\015') {
134
+ ++buf;
135
+ CHECK_EOF();
136
+ EXPECT_CHAR('\012');
137
+ ++ret_cnt;
138
+ } else if (*buf == '\012') {
139
+ ++buf;
140
+ ++ret_cnt;
141
+ } else {
142
+ ++buf;
143
+ ret_cnt = 0;
144
+ }
145
+ if (ret_cnt == 2) {
146
+ return buf;
147
+ }
148
+ }
149
+
150
+ *ret = -2;
151
+ return NULL;
152
+ }
153
+
154
+ /* *_buf is always within [buf, buf_end) upon success */
155
+ static const char* parse_int(const char* buf, const char* buf_end, int* value,
156
+ int* ret)
157
+ {
158
+ int v;
159
+ CHECK_EOF();
160
+ if (! ('0' <= *buf && *buf <= '9')) {
161
+ *ret = -1;
162
+ return NULL;
163
+ }
164
+ v = 0;
165
+ for (; ; ++buf) {
166
+ CHECK_EOF();
167
+ if ('0' <= *buf && *buf <= '9') {
168
+ v = v * 10 + *buf - '0';
169
+ } else {
170
+ break;
171
+ }
172
+ }
173
+
174
+ *value = v;
175
+ return buf;
176
+ }
177
+
178
+ /* returned pointer is always within [buf, buf_end), or null */
179
+ static const char* parse_http_version(const char* buf, const char* buf_end,
180
+ int* minor_version, int* ret)
181
+ {
182
+ EXPECT_CHAR('H'); EXPECT_CHAR('T'); EXPECT_CHAR('T'); EXPECT_CHAR('P');
183
+ EXPECT_CHAR('/'); EXPECT_CHAR('1'); EXPECT_CHAR('.');
184
+ return parse_int(buf, buf_end, minor_version, ret);
185
+ }
186
+
187
+ static const char* parse_headers(const char* buf, const char* buf_end,
188
+ struct phr_header* headers,
189
+ size_t* num_headers, size_t max_headers,
190
+ int* ret)
191
+ {
192
+ for (; ; ++*num_headers) {
193
+ CHECK_EOF();
194
+ if (*buf == '\015') {
195
+ ++buf;
196
+ EXPECT_CHAR('\012');
197
+ break;
198
+ } else if (*buf == '\012') {
199
+ ++buf;
200
+ break;
201
+ }
202
+ if (*num_headers == max_headers) {
203
+ *ret = -1;
204
+ return NULL;
205
+ }
206
+ if (! (*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
207
+ if (! token_char_map[(unsigned char)*buf]) {
208
+ *ret = -1;
209
+ return NULL;
210
+ }
211
+ /* parsing name, but do not discard SP before colon, see
212
+ * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
213
+ headers[*num_headers].name = buf;
214
+ for (; ; ++buf) {
215
+ CHECK_EOF();
216
+ if (*buf == ':') {
217
+ break;
218
+ } else if (*buf < ' ') {
219
+ *ret = -1;
220
+ return NULL;
221
+ }
222
+ }
223
+ headers[*num_headers].name_len = buf - headers[*num_headers].name;
224
+ ++buf;
225
+ for (; ; ++buf) {
226
+ CHECK_EOF();
227
+ if (! (*buf == ' ' || *buf == '\t')) {
228
+ break;
229
+ }
230
+ }
231
+ } else {
232
+ headers[*num_headers].name = NULL;
233
+ headers[*num_headers].name_len = 0;
234
+ }
235
+ if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value,
236
+ &headers[*num_headers].value_len, ret))
237
+ == NULL) {
238
+ return NULL;
239
+ }
240
+ }
241
+ return buf;
242
+ }
243
+
244
+ const char* parse_request(const char* buf, const char* buf_end,
245
+ const char** method, size_t* method_len,
246
+ const char** path, size_t* path_len,
247
+ int* minor_version, struct phr_header* headers,
248
+ size_t* num_headers, size_t max_headers, int* ret)
249
+ {
250
+ /* skip first empty line (some clients add CRLF after POST content) */
251
+ CHECK_EOF();
252
+ if (*buf == '\015') {
253
+ ++buf;
254
+ EXPECT_CHAR('\012');
255
+ } else if (*buf == '\012') {
256
+ ++buf;
257
+ }
258
+
259
+ /* parse request line */
260
+ ADVANCE_TOKEN(*method, *method_len);
261
+ ++buf;
262
+ ADVANCE_TOKEN(*path, *path_len);
263
+ ++buf;
264
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
265
+ return NULL;
266
+ }
267
+ if (*buf == '\015') {
268
+ ++buf;
269
+ EXPECT_CHAR('\012');
270
+ } else if (*buf == '\012') {
271
+ ++buf;
272
+ } else {
273
+ *ret = -1;
274
+ return NULL;
275
+ }
276
+
277
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
278
+ }
279
+
280
+ int phr_parse_request(const char* buf_start, size_t len, const char** method,
281
+ size_t* method_len, const char** path, size_t* path_len,
282
+ int* minor_version, struct phr_header* headers,
283
+ size_t* num_headers, size_t last_len)
284
+ {
285
+ const char * buf = buf_start, * buf_end = buf_start + len;
286
+ size_t max_headers = *num_headers;
287
+ int r;
288
+
289
+ *method = NULL;
290
+ *method_len = 0;
291
+ *path = NULL;
292
+ *path_len = 0;
293
+ *minor_version = -1;
294
+ *num_headers = 0;
295
+
296
+ /* if last_len != 0, check if the request is complete (a fast countermeasure
297
+ againt slowloris */
298
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
299
+ return r;
300
+ }
301
+
302
+ if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len,
303
+ minor_version, headers, num_headers, max_headers,
304
+ &r))
305
+ == NULL) {
306
+ return r;
307
+ }
308
+
309
+ return (int)(buf - buf_start);
310
+ }
311
+
312
+ static const char* parse_response(const char* buf, const char* buf_end,
313
+ int* minor_version, int* status,
314
+ const char** msg, size_t* msg_len,
315
+ struct phr_header* headers,
316
+ size_t* num_headers, size_t max_headers,
317
+ int* ret)
318
+ {
319
+ /* parse "HTTP/1.x" */
320
+ if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
321
+ return NULL;
322
+ }
323
+ /* skip space */
324
+ if (*buf++ != ' ') {
325
+ *ret = -1;
326
+ return NULL;
327
+ }
328
+ /* parse status code */
329
+ if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
330
+ return NULL;
331
+ }
332
+ /* skip space */
333
+ if (*buf++ != ' ') {
334
+ *ret = -1;
335
+ return NULL;
336
+ }
337
+ /* get message */
338
+ if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
339
+ return NULL;
340
+ }
341
+
342
+ return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
343
+ }
344
+
345
+ int phr_parse_response(const char* buf_start, size_t len, int* minor_version,
346
+ int* status, const char** msg, size_t* msg_len,
347
+ struct phr_header* headers, size_t* num_headers,
348
+ size_t last_len)
349
+ {
350
+ const char * buf = buf_start, * buf_end = buf + len;
351
+ size_t max_headers = *num_headers;
352
+ int r;
353
+
354
+ *minor_version = -1;
355
+ *status = 0;
356
+ *msg = NULL;
357
+ *msg_len = 0;
358
+ *num_headers = 0;
359
+
360
+ /* if last_len != 0, check if the response is complete (a fast countermeasure
361
+ against slowloris */
362
+ if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
363
+ return r;
364
+ }
365
+
366
+ if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len,
367
+ headers, num_headers, max_headers, &r))
368
+ == NULL) {
369
+ return r;
370
+ }
371
+
372
+ return (int)(buf - buf_start);
373
+ }
374
+
375
+ #undef CHECK_EOF
376
+ #undef EXPECT_CHAR
377
+ #undef ADVANCE_TOKEN