http-parser 1.0.5 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8c93598e31fa92f5acb831a75e7514f21180af80
4
- data.tar.gz: 935ef47585bf17f3a1b4a107d07774c60e300f6b
3
+ metadata.gz: 521dd5304a0b3ff3e6c1a1ffbec3f383b5ad3665
4
+ data.tar.gz: a79ef2fcbc2020577e2060f65b929c7d2cdaeaf0
5
5
  SHA512:
6
- metadata.gz: 271c6b54982d6854b00873a2d217dba0551adc69a920fb4748a6f22d9d43573d1cb8f96e3b7b06942db510ac4d27906ae0e9cde60a5ee78f08f47524fca83643
7
- data.tar.gz: 159035cb10bc1b947b29cdb3ee27f6e1ae1570e7cf4df42544c1cbf7a81065a2e32e7390deae2d686d491bc6b7301e216446c97f43f69f711f7de61bb1afddd1
6
+ metadata.gz: a6fb5d0850ee732007b4ce10edb55fd3dc2bc60f2872cf06d19705db5f7a196daa478aeaf5867362d1ef7ae98c5b22e179d86a7d6d0ed03d1a6fcfa2e476e648
7
+ data.tar.gz: d7648c23965f6e6d19b2a3f12a1776726dc028ad6c6a3c5d7543253f14d346c961a965455e293d15ea71e6e9edae19c08be383a3b580d41921908cc0be88d4c7
data/LICENSE CHANGED
@@ -1,20 +1,20 @@
1
- The MIT License (MIT)
2
-
3
- Copyright (c) 2013 CoTag Media
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy of
6
- this software and associated documentation files (the "Software"), to deal in
7
- the Software without restriction, including without limitation the rights to
8
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
- the Software, and to permit persons to whom the Software is furnished to do so,
10
- subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
- FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
- COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
- IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 CoTag Media
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -1,70 +1,70 @@
1
- # http-parser
2
-
3
- Ruby FFI bindings to [http-parser](https://github.com/joyent/http-parser) [![Build Status](https://travis-ci.org/cotag/http-parser.png)](https://travis-ci.org/cotag/http-parser)
4
-
5
- ## Install
6
-
7
- ```shell
8
- gem install http-parser
9
- ```
10
- This gem will compile a local copy of http-parser
11
-
12
-
13
- ## Usage
14
-
15
- ```ruby
16
- require 'rubygems'
17
- require 'http-parser'
18
-
19
- #
20
- # Create a shared parser
21
- #
22
- parser = HttpParser::Parser.new do |parser|
23
- parser.on_message_begin do |inst|
24
- puts "message begin"
25
- end
26
-
27
- parser.on_message_complete do |inst|
28
- puts "message end"
29
- end
30
-
31
- parser.on_url do |inst, data|
32
- puts "url: #{data}"
33
- end
34
-
35
- parser.on_header_field do |inst, data|
36
- puts "field: #{data}"
37
- end
38
-
39
- parser.on_header_value do |inst, data|
40
- puts "value: #{data}"
41
- end
42
- end
43
-
44
- #
45
- # Create state objects to track requests through the parser
46
- #
47
- request = HttpParser::Parser.new_instance do |inst|
48
- inst.type = :request
49
- end
50
-
51
- #
52
- # Parse requests
53
- #
54
- parser.parse request, "GET /foo HTTP/1.1\r\n"
55
- sleep 3
56
- parser.parse request, "Host: example.com\r\n"
57
- sleep 3
58
- parser.parse request, "\r\n"
59
-
60
- #
61
- # Re-use the memory for another request
62
- #
63
- request.reset!
64
- ```
65
-
66
- ## Acknowledgements
67
-
68
- * https://github.com/joyent/http-parser#readme
69
- * https://github.com/postmodern/ffi-http-parser#readme
1
+ # http-parser
2
+
3
+ Ruby FFI bindings to [http-parser](https://github.com/joyent/http-parser) [![Build Status](https://travis-ci.org/cotag/http-parser.png)](https://travis-ci.org/cotag/http-parser)
4
+
5
+ ## Install
6
+
7
+ ```shell
8
+ gem install http-parser
9
+ ```
10
+ This gem will compile a local copy of http-parser
11
+
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ require 'rubygems'
17
+ require 'http-parser'
18
+
19
+ #
20
+ # Create a shared parser
21
+ #
22
+ parser = HttpParser::Parser.new do |parser|
23
+ parser.on_message_begin do |inst|
24
+ puts "message begin"
25
+ end
26
+
27
+ parser.on_message_complete do |inst|
28
+ puts "message end"
29
+ end
30
+
31
+ parser.on_url do |inst, data|
32
+ puts "url: #{data}"
33
+ end
34
+
35
+ parser.on_header_field do |inst, data|
36
+ puts "field: #{data}"
37
+ end
38
+
39
+ parser.on_header_value do |inst, data|
40
+ puts "value: #{data}"
41
+ end
42
+ end
43
+
44
+ #
45
+ # Create state objects to track requests through the parser
46
+ #
47
+ request = HttpParser::Parser.new_instance do |inst|
48
+ inst.type = :request
49
+ end
50
+
51
+ #
52
+ # Parse requests
53
+ #
54
+ parser.parse request, "GET /foo HTTP/1.1\r\n"
55
+ sleep 3
56
+ parser.parse request, "Host: example.com\r\n"
57
+ sleep 3
58
+ parser.parse request, "\r\n"
59
+
60
+ #
61
+ # Re-use the memory for another request
62
+ #
63
+ request.reset!
64
+ ```
65
+
66
+ ## Acknowledgements
67
+
68
+ * https://github.com/joyent/http-parser#readme
69
+ * https://github.com/postmodern/ffi-http-parser#readme
70
70
  * https://github.com/deepfryed/http-parser-lite#readme
data/Rakefile CHANGED
@@ -1,19 +1,19 @@
1
- require 'rubygems'
2
- require 'rake'
3
- require 'rspec/core/rake_task'
4
-
5
- task :default => [:compile, :test]
6
-
7
- task :compile do
8
- protect = ['http_parser.c', 'http_parser.h']
9
- Dir["ext/http-parser/**/*"].each do |file|
10
- begin
11
- next if protect.include? File.basename(file)
12
- FileUtils.rm file
13
- rescue
14
- end
15
- end
16
- system 'cd ext && rake'
17
- end
18
-
19
- RSpec::Core::RakeTask.new(:test)
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rspec/core/rake_task'
4
+
5
+ task :default => [:compile, :test]
6
+
7
+ task :compile do
8
+ protect = ['http_parser.c', 'http_parser.h']
9
+ Dir["ext/http-parser/**/*"].each do |file|
10
+ begin
11
+ next if protect.include? File.basename(file)
12
+ FileUtils.rm file
13
+ rescue
14
+ end
15
+ end
16
+ system 'cd ext && rake'
17
+ end
18
+
19
+ RSpec::Core::RakeTask.new(:test)
@@ -1,8 +1,8 @@
1
- require 'ffi-compiler/compile_task'
2
-
3
- FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
- t.cflags << "-Wall -Wextra -O3"
5
- t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
- t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
- t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
8
- end
1
+ require 'ffi-compiler/compile_task'
2
+
3
+ FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
+ t.cflags << "-Wall -Wextra -O3"
5
+ t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
+ t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
+ t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
8
+ end
@@ -1,2234 +1,2470 @@
1
- /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
- *
3
- * Additional changes are licensed under the same terms as NGINX and
4
- * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
- *
6
- * Permission is hereby granted, free of charge, to any person obtaining a copy
7
- * of this software and associated documentation files (the "Software"), to
8
- * deal in the Software without restriction, including without limitation the
9
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
- * sell copies of the Software, and to permit persons to whom the Software is
11
- * furnished to do so, subject to the following conditions:
12
- *
13
- * The above copyright notice and this permission notice shall be included in
14
- * all copies or substantial portions of the Software.
15
- *
16
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
- * IN THE SOFTWARE.
23
- */
24
- #include "http_parser.h"
25
- #include <assert.h>
26
- #include <stddef.h>
27
- #include <ctype.h>
28
- #include <stdlib.h>
29
- #include <string.h>
30
- #include <limits.h>
31
-
32
- #ifndef ULLONG_MAX
33
- # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
- #endif
35
-
36
- #ifndef MIN
37
- # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
- #endif
39
-
40
- #ifndef ARRAY_SIZE
41
- # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
- #endif
43
-
44
- #ifndef BIT_AT
45
- # define BIT_AT(a, i) \
46
- (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
- (1 << ((unsigned int) (i) & 7))))
48
- #endif
49
-
50
- #ifndef ELEM_AT
51
- # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
- #endif
53
-
54
- #define SET_ERRNO(e) \
55
- do { \
56
- parser->http_errno = (e); \
57
- } while(0)
58
-
59
-
60
- /* Run the notify callback FOR, returning ER if it fails */
61
- #define CALLBACK_NOTIFY_(FOR, ER) \
62
- do { \
63
- assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
- \
65
- if (settings->on_##FOR) { \
66
- if (0 != settings->on_##FOR(parser)) { \
67
- SET_ERRNO(HPE_CB_##FOR); \
68
- } \
69
- \
70
- /* We either errored above or got paused; get out */ \
71
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
72
- return (ER); \
73
- } \
74
- } \
75
- } while (0)
76
-
77
- /* Run the notify callback FOR and consume the current byte */
78
- #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
79
-
80
- /* Run the notify callback FOR and don't consume the current byte */
81
- #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
82
-
83
- /* Run data callback FOR with LEN bytes, returning ER if it fails */
84
- #define CALLBACK_DATA_(FOR, LEN, ER) \
85
- do { \
86
- assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
- \
88
- if (FOR##_mark) { \
89
- if (settings->on_##FOR) { \
90
- if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
91
- SET_ERRNO(HPE_CB_##FOR); \
92
- } \
93
- \
94
- /* We either errored above or got paused; get out */ \
95
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
96
- return (ER); \
97
- } \
98
- } \
99
- FOR##_mark = NULL; \
100
- } \
101
- } while (0)
102
-
103
- /* Run the data callback FOR and consume the current byte */
104
- #define CALLBACK_DATA(FOR) \
105
- CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
106
-
107
- /* Run the data callback FOR and don't consume the current byte */
108
- #define CALLBACK_DATA_NOADVANCE(FOR) \
109
- CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
110
-
111
- /* Set the mark FOR; non-destructive if mark is already set */
112
- #define MARK(FOR) \
113
- do { \
114
- if (!FOR##_mark) { \
115
- FOR##_mark = p; \
116
- } \
117
- } while (0)
118
-
119
-
120
- #define PROXY_CONNECTION "proxy-connection"
121
- #define CONNECTION "connection"
122
- #define CONTENT_LENGTH "content-length"
123
- #define TRANSFER_ENCODING "transfer-encoding"
124
- #define UPGRADE "upgrade"
125
- #define CHUNKED "chunked"
126
- #define KEEP_ALIVE "keep-alive"
127
- #define CLOSE "close"
128
-
129
-
130
- static const char *method_strings[] =
131
- {
132
- #define XX(num, name, string) #string,
133
- HTTP_METHOD_MAP(XX)
134
- #undef XX
135
- };
136
-
137
-
138
- /* Tokens as defined by rfc 2616. Also lowercases them.
139
- * token = 1*<any CHAR except CTLs or separators>
140
- * separators = "(" | ")" | "<" | ">" | "@"
141
- * | "," | ";" | ":" | "\" | <">
142
- * | "/" | "[" | "]" | "?" | "="
143
- * | "{" | "}" | SP | HT
144
- */
145
- static const char tokens[256] = {
146
- /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
147
- 0, 0, 0, 0, 0, 0, 0, 0,
148
- /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
149
- 0, 0, 0, 0, 0, 0, 0, 0,
150
- /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
151
- 0, 0, 0, 0, 0, 0, 0, 0,
152
- /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153
- 0, 0, 0, 0, 0, 0, 0, 0,
154
- /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155
- 0, '!', 0, '#', '$', '%', '&', '\'',
156
- /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157
- 0, 0, '*', '+', 0, '-', '.', 0,
158
- /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
159
- '0', '1', '2', '3', '4', '5', '6', '7',
160
- /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
161
- '8', '9', 0, 0, 0, 0, 0, 0,
162
- /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
163
- 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
164
- /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
165
- 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
166
- /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
167
- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
168
- /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
169
- 'x', 'y', 'z', 0, 0, 0, '^', '_',
170
- /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
171
- '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
172
- /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
173
- 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
174
- /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
175
- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
176
- /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
177
- 'x', 'y', 'z', 0, '|', 0, '~', 0 };
178
-
179
-
180
- static const int8_t unhex[256] =
181
- {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
182
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
183
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184
- , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
185
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
186
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
187
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
188
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
189
- };
190
-
191
-
192
- #if HTTP_PARSER_STRICT
193
- # define T(v) 0
194
- #else
195
- # define T(v) v
196
- #endif
197
-
198
-
199
- static const uint8_t normal_url_char[32] = {
200
- /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
201
- 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
202
- /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
203
- 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
204
- /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
205
- 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
206
- /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
207
- 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
208
- /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
209
- 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
210
- /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
211
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
212
- /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
213
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
214
- /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
215
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
216
- /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
217
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
218
- /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
219
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
220
- /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
221
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
222
- /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
223
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
224
- /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
225
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
226
- /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
227
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
228
- /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
229
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
230
- /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
231
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
232
-
233
- #undef T
234
-
235
- enum state
236
- { s_dead = 1 /* important that this is > 0 */
237
-
238
- , s_start_req_or_res
239
- , s_res_or_resp_H
240
- , s_start_res
241
- , s_res_H
242
- , s_res_HT
243
- , s_res_HTT
244
- , s_res_HTTP
245
- , s_res_first_http_major
246
- , s_res_http_major
247
- , s_res_first_http_minor
248
- , s_res_http_minor
249
- , s_res_first_status_code
250
- , s_res_status_code
251
- , s_res_status_start
252
- , s_res_status
253
- , s_res_line_almost_done
254
-
255
- , s_start_req
256
-
257
- , s_req_method
258
- , s_req_spaces_before_url
259
- , s_req_schema
260
- , s_req_schema_slash
261
- , s_req_schema_slash_slash
262
- , s_req_server_start
263
- , s_req_server
264
- , s_req_server_with_at
265
- , s_req_path
266
- , s_req_query_string_start
267
- , s_req_query_string
268
- , s_req_fragment_start
269
- , s_req_fragment
270
- , s_req_http_start
271
- , s_req_http_H
272
- , s_req_http_HT
273
- , s_req_http_HTT
274
- , s_req_http_HTTP
275
- , s_req_first_http_major
276
- , s_req_http_major
277
- , s_req_first_http_minor
278
- , s_req_http_minor
279
- , s_req_line_almost_done
280
-
281
- , s_header_field_start
282
- , s_header_field
283
- , s_header_value_start
284
- , s_header_value
285
- , s_header_value_lws
286
-
287
- , s_header_almost_done
288
-
289
- , s_chunk_size_start
290
- , s_chunk_size
291
- , s_chunk_parameters
292
- , s_chunk_size_almost_done
293
-
294
- , s_headers_almost_done
295
- , s_headers_done
296
-
297
- /* Important: 's_headers_done' must be the last 'header' state. All
298
- * states beyond this must be 'body' states. It is used for overflow
299
- * checking. See the PARSING_HEADER() macro.
300
- */
301
-
302
- , s_chunk_data
303
- , s_chunk_data_almost_done
304
- , s_chunk_data_done
305
-
306
- , s_body_identity
307
- , s_body_identity_eof
308
-
309
- , s_message_done
310
- };
311
-
312
-
313
- #define PARSING_HEADER(state) (state <= s_headers_done)
314
-
315
-
316
- enum header_states
317
- { h_general = 0
318
- , h_C
319
- , h_CO
320
- , h_CON
321
-
322
- , h_matching_connection
323
- , h_matching_proxy_connection
324
- , h_matching_content_length
325
- , h_matching_transfer_encoding
326
- , h_matching_upgrade
327
-
328
- , h_connection
329
- , h_content_length
330
- , h_transfer_encoding
331
- , h_upgrade
332
-
333
- , h_matching_transfer_encoding_chunked
334
- , h_matching_connection_keep_alive
335
- , h_matching_connection_close
336
-
337
- , h_transfer_encoding_chunked
338
- , h_connection_keep_alive
339
- , h_connection_close
340
- };
341
-
342
- enum http_host_state
343
- {
344
- s_http_host_dead = 1
345
- , s_http_userinfo_start
346
- , s_http_userinfo
347
- , s_http_host_start
348
- , s_http_host_v6_start
349
- , s_http_host
350
- , s_http_host_v6
351
- , s_http_host_v6_end
352
- , s_http_host_port_start
353
- , s_http_host_port
354
- };
355
-
356
- /* Macros for character classes; depends on strict-mode */
357
- #define CR '\r'
358
- #define LF '\n'
359
- #define LOWER(c) (unsigned char)(c | 0x20)
360
- #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
361
- #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
362
- #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
363
- #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
364
- #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
365
- (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
366
- (c) == ')')
367
- #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
368
- (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
369
- (c) == '$' || (c) == ',')
370
-
371
- #if HTTP_PARSER_STRICT
372
- #define TOKEN(c) (tokens[(unsigned char)c])
373
- #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
374
- #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
375
- #else
376
- #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
377
- #define IS_URL_CHAR(c) \
378
- (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
379
- #define IS_HOST_CHAR(c) \
380
- (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
381
- #endif
382
-
383
-
384
- #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
385
-
386
-
387
- #if HTTP_PARSER_STRICT
388
- # define STRICT_CHECK(cond) \
389
- do { \
390
- if (cond) { \
391
- SET_ERRNO(HPE_STRICT); \
392
- goto error; \
393
- } \
394
- } while (0)
395
- # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
396
- #else
397
- # define STRICT_CHECK(cond)
398
- # define NEW_MESSAGE() start_state
399
- #endif
400
-
401
-
402
- /* Map errno values to strings for human-readable output */
403
- #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
404
- static struct {
405
- const char *name;
406
- const char *description;
407
- } http_strerror_tab[] = {
408
- HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
409
- };
410
- #undef HTTP_STRERROR_GEN
411
-
412
- int http_message_needs_eof(const http_parser *parser);
413
-
414
- /* Our URL parser.
415
- *
416
- * This is designed to be shared by http_parser_execute() for URL validation,
417
- * hence it has a state transition + byte-for-byte interface. In addition, it
418
- * is meant to be embedded in http_parser_parse_url(), which does the dirty
419
- * work of turning state transitions URL components for its API.
420
- *
421
- * This function should only be invoked with non-space characters. It is
422
- * assumed that the caller cares about (and can detect) the transition between
423
- * URL and non-URL states by looking for these.
424
- */
425
- static enum state
426
- parse_url_char(enum state s, const char ch)
427
- {
428
- if (ch == ' ' || ch == '\r' || ch == '\n') {
429
- return s_dead;
430
- }
431
-
432
- #if HTTP_PARSER_STRICT
433
- if (ch == '\t' || ch == '\f') {
434
- return s_dead;
435
- }
436
- #endif
437
-
438
- switch (s) {
439
- case s_req_spaces_before_url:
440
- /* Proxied requests are followed by scheme of an absolute URI (alpha).
441
- * All methods except CONNECT are followed by '/' or '*'.
442
- */
443
-
444
- if (ch == '/' || ch == '*') {
445
- return s_req_path;
446
- }
447
-
448
- if (IS_ALPHA(ch)) {
449
- return s_req_schema;
450
- }
451
-
452
- break;
453
-
454
- case s_req_schema:
455
- if (IS_ALPHA(ch)) {
456
- return s;
457
- }
458
-
459
- if (ch == ':') {
460
- return s_req_schema_slash;
461
- }
462
-
463
- break;
464
-
465
- case s_req_schema_slash:
466
- if (ch == '/') {
467
- return s_req_schema_slash_slash;
468
- }
469
-
470
- break;
471
-
472
- case s_req_schema_slash_slash:
473
- if (ch == '/') {
474
- return s_req_server_start;
475
- }
476
-
477
- break;
478
-
479
- case s_req_server_with_at:
480
- if (ch == '@') {
481
- return s_dead;
482
- }
483
-
484
- /* FALLTHROUGH */
485
- case s_req_server_start:
486
- case s_req_server:
487
- if (ch == '/') {
488
- return s_req_path;
489
- }
490
-
491
- if (ch == '?') {
492
- return s_req_query_string_start;
493
- }
494
-
495
- if (ch == '@') {
496
- return s_req_server_with_at;
497
- }
498
-
499
- if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
500
- return s_req_server;
501
- }
502
-
503
- break;
504
-
505
- case s_req_path:
506
- if (IS_URL_CHAR(ch)) {
507
- return s;
508
- }
509
-
510
- switch (ch) {
511
- case '?':
512
- return s_req_query_string_start;
513
-
514
- case '#':
515
- return s_req_fragment_start;
516
- }
517
-
518
- break;
519
-
520
- case s_req_query_string_start:
521
- case s_req_query_string:
522
- if (IS_URL_CHAR(ch)) {
523
- return s_req_query_string;
524
- }
525
-
526
- switch (ch) {
527
- case '?':
528
- /* allow extra '?' in query string */
529
- return s_req_query_string;
530
-
531
- case '#':
532
- return s_req_fragment_start;
533
- }
534
-
535
- break;
536
-
537
- case s_req_fragment_start:
538
- if (IS_URL_CHAR(ch)) {
539
- return s_req_fragment;
540
- }
541
-
542
- switch (ch) {
543
- case '?':
544
- return s_req_fragment;
545
-
546
- case '#':
547
- return s;
548
- }
549
-
550
- break;
551
-
552
- case s_req_fragment:
553
- if (IS_URL_CHAR(ch)) {
554
- return s;
555
- }
556
-
557
- switch (ch) {
558
- case '?':
559
- case '#':
560
- return s;
561
- }
562
-
563
- break;
564
-
565
- default:
566
- break;
567
- }
568
-
569
- /* We should never fall out of the switch above unless there's an error */
570
- return s_dead;
571
- }
572
-
573
- size_t http_parser_execute (http_parser *parser,
574
- const http_parser_settings *settings,
575
- const char *data,
576
- size_t len)
577
- {
578
- char c, ch;
579
- int8_t unhex_val;
580
- const char *p = data;
581
- const char *header_field_mark = 0;
582
- const char *header_value_mark = 0;
583
- const char *url_mark = 0;
584
- const char *body_mark = 0;
585
- const char *status_mark = 0;
586
-
587
- /* We're in an error state. Don't bother doing anything. */
588
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
589
- return 0;
590
- }
591
-
592
- if (len == 0) {
593
- switch (parser->state) {
594
- case s_body_identity_eof:
595
- /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
596
- * we got paused.
597
- */
598
- CALLBACK_NOTIFY_NOADVANCE(message_complete);
599
- return 0;
600
-
601
- case s_dead:
602
- case s_start_req_or_res:
603
- case s_start_res:
604
- case s_start_req:
605
- return 0;
606
-
607
- default:
608
- SET_ERRNO(HPE_INVALID_EOF_STATE);
609
- return 1;
610
- }
611
- }
612
-
613
-
614
- if (parser->state == s_header_field)
615
- header_field_mark = data;
616
- if (parser->state == s_header_value)
617
- header_value_mark = data;
618
- switch (parser->state) {
619
- case s_req_path:
620
- case s_req_schema:
621
- case s_req_schema_slash:
622
- case s_req_schema_slash_slash:
623
- case s_req_server_start:
624
- case s_req_server:
625
- case s_req_server_with_at:
626
- case s_req_query_string_start:
627
- case s_req_query_string:
628
- case s_req_fragment_start:
629
- case s_req_fragment:
630
- url_mark = data;
631
- break;
632
- case s_res_status:
633
- status_mark = data;
634
- break;
635
- }
636
-
637
- for (p=data; p != data + len; p++) {
638
- ch = *p;
639
-
640
- if (PARSING_HEADER(parser->state)) {
641
- ++parser->nread;
642
- /* Don't allow the total size of the HTTP headers (including the status
643
- * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
644
- * embedders against denial-of-service attacks where the attacker feeds
645
- * us a never-ending header that the embedder keeps buffering.
646
- *
647
- * This check is arguably the responsibility of embedders but we're doing
648
- * it on the embedder's behalf because most won't bother and this way we
649
- * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
650
- * than any reasonable request or response so this should never affect
651
- * day-to-day operation.
652
- */
653
- if (parser->nread > HTTP_MAX_HEADER_SIZE) {
654
- SET_ERRNO(HPE_HEADER_OVERFLOW);
655
- goto error;
656
- }
657
- }
658
-
659
- reexecute_byte:
660
- switch (parser->state) {
661
-
662
- case s_dead:
663
- /* this state is used after a 'Connection: close' message
664
- * the parser will error out if it reads another message
665
- */
666
- if (ch == CR || ch == LF)
667
- break;
668
-
669
- SET_ERRNO(HPE_CLOSED_CONNECTION);
670
- goto error;
671
-
672
- case s_start_req_or_res:
673
- {
674
- if (ch == CR || ch == LF)
675
- break;
676
- parser->flags = 0;
677
- parser->content_length = ULLONG_MAX;
678
-
679
- if (ch == 'H') {
680
- parser->state = s_res_or_resp_H;
681
-
682
- CALLBACK_NOTIFY(message_begin);
683
- } else {
684
- parser->type = HTTP_REQUEST;
685
- parser->state = s_start_req;
686
- goto reexecute_byte;
687
- }
688
-
689
- break;
690
- }
691
-
692
- case s_res_or_resp_H:
693
- if (ch == 'T') {
694
- parser->type = HTTP_RESPONSE;
695
- parser->state = s_res_HT;
696
- } else {
697
- if (ch != 'E') {
698
- SET_ERRNO(HPE_INVALID_CONSTANT);
699
- goto error;
700
- }
701
-
702
- parser->type = HTTP_REQUEST;
703
- parser->method = HTTP_HEAD;
704
- parser->index = 2;
705
- parser->state = s_req_method;
706
- }
707
- break;
708
-
709
- case s_start_res:
710
- {
711
- parser->flags = 0;
712
- parser->content_length = ULLONG_MAX;
713
-
714
- switch (ch) {
715
- case 'H':
716
- parser->state = s_res_H;
717
- break;
718
-
719
- case CR:
720
- case LF:
721
- break;
722
-
723
- default:
724
- SET_ERRNO(HPE_INVALID_CONSTANT);
725
- goto error;
726
- }
727
-
728
- CALLBACK_NOTIFY(message_begin);
729
- break;
730
- }
731
-
732
- case s_res_H:
733
- STRICT_CHECK(ch != 'T');
734
- parser->state = s_res_HT;
735
- break;
736
-
737
- case s_res_HT:
738
- STRICT_CHECK(ch != 'T');
739
- parser->state = s_res_HTT;
740
- break;
741
-
742
- case s_res_HTT:
743
- STRICT_CHECK(ch != 'P');
744
- parser->state = s_res_HTTP;
745
- break;
746
-
747
- case s_res_HTTP:
748
- STRICT_CHECK(ch != '/');
749
- parser->state = s_res_first_http_major;
750
- break;
751
-
752
- case s_res_first_http_major:
753
- if (ch < '0' || ch > '9') {
754
- SET_ERRNO(HPE_INVALID_VERSION);
755
- goto error;
756
- }
757
-
758
- parser->http_major = ch - '0';
759
- parser->state = s_res_http_major;
760
- break;
761
-
762
- /* major HTTP version or dot */
763
- case s_res_http_major:
764
- {
765
- if (ch == '.') {
766
- parser->state = s_res_first_http_minor;
767
- break;
768
- }
769
-
770
- if (!IS_NUM(ch)) {
771
- SET_ERRNO(HPE_INVALID_VERSION);
772
- goto error;
773
- }
774
-
775
- parser->http_major *= 10;
776
- parser->http_major += ch - '0';
777
-
778
- if (parser->http_major > 999) {
779
- SET_ERRNO(HPE_INVALID_VERSION);
780
- goto error;
781
- }
782
-
783
- break;
784
- }
785
-
786
- /* first digit of minor HTTP version */
787
- case s_res_first_http_minor:
788
- if (!IS_NUM(ch)) {
789
- SET_ERRNO(HPE_INVALID_VERSION);
790
- goto error;
791
- }
792
-
793
- parser->http_minor = ch - '0';
794
- parser->state = s_res_http_minor;
795
- break;
796
-
797
- /* minor HTTP version or end of request line */
798
- case s_res_http_minor:
799
- {
800
- if (ch == ' ') {
801
- parser->state = s_res_first_status_code;
802
- break;
803
- }
804
-
805
- if (!IS_NUM(ch)) {
806
- SET_ERRNO(HPE_INVALID_VERSION);
807
- goto error;
808
- }
809
-
810
- parser->http_minor *= 10;
811
- parser->http_minor += ch - '0';
812
-
813
- if (parser->http_minor > 999) {
814
- SET_ERRNO(HPE_INVALID_VERSION);
815
- goto error;
816
- }
817
-
818
- break;
819
- }
820
-
821
- case s_res_first_status_code:
822
- {
823
- if (!IS_NUM(ch)) {
824
- if (ch == ' ') {
825
- break;
826
- }
827
-
828
- SET_ERRNO(HPE_INVALID_STATUS);
829
- goto error;
830
- }
831
- parser->status_code = ch - '0';
832
- parser->state = s_res_status_code;
833
- break;
834
- }
835
-
836
- case s_res_status_code:
837
- {
838
- if (!IS_NUM(ch)) {
839
- switch (ch) {
840
- case ' ':
841
- parser->state = s_res_status_start;
842
- break;
843
- case CR:
844
- parser->state = s_res_line_almost_done;
845
- break;
846
- case LF:
847
- parser->state = s_header_field_start;
848
- break;
849
- default:
850
- SET_ERRNO(HPE_INVALID_STATUS);
851
- goto error;
852
- }
853
- break;
854
- }
855
-
856
- parser->status_code *= 10;
857
- parser->status_code += ch - '0';
858
-
859
- if (parser->status_code > 999) {
860
- SET_ERRNO(HPE_INVALID_STATUS);
861
- goto error;
862
- }
863
-
864
- break;
865
- }
866
-
867
- case s_res_status_start:
868
- {
869
- if (ch == CR) {
870
- parser->state = s_res_line_almost_done;
871
- break;
872
- }
873
-
874
- if (ch == LF) {
875
- parser->state = s_header_field_start;
876
- break;
877
- }
878
-
879
- MARK(status);
880
- parser->state = s_res_status;
881
- parser->index = 0;
882
- break;
883
- }
884
-
885
- case s_res_status:
886
- if (ch == CR) {
887
- parser->state = s_res_line_almost_done;
888
- CALLBACK_DATA(status);
889
- break;
890
- }
891
-
892
- if (ch == LF) {
893
- parser->state = s_header_field_start;
894
- CALLBACK_DATA(status);
895
- break;
896
- }
897
-
898
- break;
899
-
900
- case s_res_line_almost_done:
901
- STRICT_CHECK(ch != LF);
902
- parser->state = s_header_field_start;
903
- break;
904
-
905
- case s_start_req:
906
- {
907
- if (ch == CR || ch == LF)
908
- break;
909
- parser->flags = 0;
910
- parser->content_length = ULLONG_MAX;
911
-
912
- if (!IS_ALPHA(ch)) {
913
- SET_ERRNO(HPE_INVALID_METHOD);
914
- goto error;
915
- }
916
-
917
- parser->method = (enum http_method) 0;
918
- parser->index = 1;
919
- switch (ch) {
920
- case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
921
- case 'D': parser->method = HTTP_DELETE; break;
922
- case 'G': parser->method = HTTP_GET; break;
923
- case 'H': parser->method = HTTP_HEAD; break;
924
- case 'L': parser->method = HTTP_LOCK; break;
925
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
926
- case 'N': parser->method = HTTP_NOTIFY; break;
927
- case 'O': parser->method = HTTP_OPTIONS; break;
928
- case 'P': parser->method = HTTP_POST;
929
- /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
930
- break;
931
- case 'R': parser->method = HTTP_REPORT; break;
932
- case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
933
- case 'T': parser->method = HTTP_TRACE; break;
934
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
935
- default:
936
- SET_ERRNO(HPE_INVALID_METHOD);
937
- goto error;
938
- }
939
- parser->state = s_req_method;
940
-
941
- CALLBACK_NOTIFY(message_begin);
942
-
943
- break;
944
- }
945
-
946
- case s_req_method:
947
- {
948
- const char *matcher;
949
- if (ch == '\0') {
950
- SET_ERRNO(HPE_INVALID_METHOD);
951
- goto error;
952
- }
953
-
954
- matcher = method_strings[parser->method];
955
- if (ch == ' ' && matcher[parser->index] == '\0') {
956
- parser->state = s_req_spaces_before_url;
957
- } else if (ch == matcher[parser->index]) {
958
- ; /* nada */
959
- } else if (parser->method == HTTP_CONNECT) {
960
- if (parser->index == 1 && ch == 'H') {
961
- parser->method = HTTP_CHECKOUT;
962
- } else if (parser->index == 2 && ch == 'P') {
963
- parser->method = HTTP_COPY;
964
- } else {
965
- SET_ERRNO(HPE_INVALID_METHOD);
966
- goto error;
967
- }
968
- } else if (parser->method == HTTP_MKCOL) {
969
- if (parser->index == 1 && ch == 'O') {
970
- parser->method = HTTP_MOVE;
971
- } else if (parser->index == 1 && ch == 'E') {
972
- parser->method = HTTP_MERGE;
973
- } else if (parser->index == 1 && ch == '-') {
974
- parser->method = HTTP_MSEARCH;
975
- } else if (parser->index == 2 && ch == 'A') {
976
- parser->method = HTTP_MKACTIVITY;
977
- } else {
978
- SET_ERRNO(HPE_INVALID_METHOD);
979
- goto error;
980
- }
981
- } else if (parser->method == HTTP_SUBSCRIBE) {
982
- if (parser->index == 1 && ch == 'E') {
983
- parser->method = HTTP_SEARCH;
984
- } else {
985
- SET_ERRNO(HPE_INVALID_METHOD);
986
- goto error;
987
- }
988
- } else if (parser->index == 1 && parser->method == HTTP_POST) {
989
- if (ch == 'R') {
990
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
991
- } else if (ch == 'U') {
992
- parser->method = HTTP_PUT; /* or HTTP_PURGE */
993
- } else if (ch == 'A') {
994
- parser->method = HTTP_PATCH;
995
- } else {
996
- SET_ERRNO(HPE_INVALID_METHOD);
997
- goto error;
998
- }
999
- } else if (parser->index == 2) {
1000
- if (parser->method == HTTP_PUT) {
1001
- if (ch == 'R') {
1002
- parser->method = HTTP_PURGE;
1003
- } else {
1004
- SET_ERRNO(HPE_INVALID_METHOD);
1005
- goto error;
1006
- }
1007
- } else if (parser->method == HTTP_UNLOCK) {
1008
- if (ch == 'S') {
1009
- parser->method = HTTP_UNSUBSCRIBE;
1010
- } else {
1011
- SET_ERRNO(HPE_INVALID_METHOD);
1012
- goto error;
1013
- }
1014
- } else {
1015
- SET_ERRNO(HPE_INVALID_METHOD);
1016
- goto error;
1017
- }
1018
- } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1019
- parser->method = HTTP_PROPPATCH;
1020
- } else {
1021
- SET_ERRNO(HPE_INVALID_METHOD);
1022
- goto error;
1023
- }
1024
-
1025
- ++parser->index;
1026
- break;
1027
- }
1028
-
1029
- case s_req_spaces_before_url:
1030
- {
1031
- if (ch == ' ') break;
1032
-
1033
- MARK(url);
1034
- if (parser->method == HTTP_CONNECT) {
1035
- parser->state = s_req_server_start;
1036
- }
1037
-
1038
- parser->state = parse_url_char((enum state)parser->state, ch);
1039
- if (parser->state == s_dead) {
1040
- SET_ERRNO(HPE_INVALID_URL);
1041
- goto error;
1042
- }
1043
-
1044
- break;
1045
- }
1046
-
1047
- case s_req_schema:
1048
- case s_req_schema_slash:
1049
- case s_req_schema_slash_slash:
1050
- case s_req_server_start:
1051
- {
1052
- switch (ch) {
1053
- /* No whitespace allowed here */
1054
- case ' ':
1055
- case CR:
1056
- case LF:
1057
- SET_ERRNO(HPE_INVALID_URL);
1058
- goto error;
1059
- default:
1060
- parser->state = parse_url_char((enum state)parser->state, ch);
1061
- if (parser->state == s_dead) {
1062
- SET_ERRNO(HPE_INVALID_URL);
1063
- goto error;
1064
- }
1065
- }
1066
-
1067
- break;
1068
- }
1069
-
1070
- case s_req_server:
1071
- case s_req_server_with_at:
1072
- case s_req_path:
1073
- case s_req_query_string_start:
1074
- case s_req_query_string:
1075
- case s_req_fragment_start:
1076
- case s_req_fragment:
1077
- {
1078
- switch (ch) {
1079
- case ' ':
1080
- parser->state = s_req_http_start;
1081
- CALLBACK_DATA(url);
1082
- break;
1083
- case CR:
1084
- case LF:
1085
- parser->http_major = 0;
1086
- parser->http_minor = 9;
1087
- parser->state = (ch == CR) ?
1088
- s_req_line_almost_done :
1089
- s_header_field_start;
1090
- CALLBACK_DATA(url);
1091
- break;
1092
- default:
1093
- parser->state = parse_url_char((enum state)parser->state, ch);
1094
- if (parser->state == s_dead) {
1095
- SET_ERRNO(HPE_INVALID_URL);
1096
- goto error;
1097
- }
1098
- }
1099
- break;
1100
- }
1101
-
1102
- case s_req_http_start:
1103
- switch (ch) {
1104
- case 'H':
1105
- parser->state = s_req_http_H;
1106
- break;
1107
- case ' ':
1108
- break;
1109
- default:
1110
- SET_ERRNO(HPE_INVALID_CONSTANT);
1111
- goto error;
1112
- }
1113
- break;
1114
-
1115
- case s_req_http_H:
1116
- STRICT_CHECK(ch != 'T');
1117
- parser->state = s_req_http_HT;
1118
- break;
1119
-
1120
- case s_req_http_HT:
1121
- STRICT_CHECK(ch != 'T');
1122
- parser->state = s_req_http_HTT;
1123
- break;
1124
-
1125
- case s_req_http_HTT:
1126
- STRICT_CHECK(ch != 'P');
1127
- parser->state = s_req_http_HTTP;
1128
- break;
1129
-
1130
- case s_req_http_HTTP:
1131
- STRICT_CHECK(ch != '/');
1132
- parser->state = s_req_first_http_major;
1133
- break;
1134
-
1135
- /* first digit of major HTTP version */
1136
- case s_req_first_http_major:
1137
- if (ch < '1' || ch > '9') {
1138
- SET_ERRNO(HPE_INVALID_VERSION);
1139
- goto error;
1140
- }
1141
-
1142
- parser->http_major = ch - '0';
1143
- parser->state = s_req_http_major;
1144
- break;
1145
-
1146
- /* major HTTP version or dot */
1147
- case s_req_http_major:
1148
- {
1149
- if (ch == '.') {
1150
- parser->state = s_req_first_http_minor;
1151
- break;
1152
- }
1153
-
1154
- if (!IS_NUM(ch)) {
1155
- SET_ERRNO(HPE_INVALID_VERSION);
1156
- goto error;
1157
- }
1158
-
1159
- parser->http_major *= 10;
1160
- parser->http_major += ch - '0';
1161
-
1162
- if (parser->http_major > 999) {
1163
- SET_ERRNO(HPE_INVALID_VERSION);
1164
- goto error;
1165
- }
1166
-
1167
- break;
1168
- }
1169
-
1170
- /* first digit of minor HTTP version */
1171
- case s_req_first_http_minor:
1172
- if (!IS_NUM(ch)) {
1173
- SET_ERRNO(HPE_INVALID_VERSION);
1174
- goto error;
1175
- }
1176
-
1177
- parser->http_minor = ch - '0';
1178
- parser->state = s_req_http_minor;
1179
- break;
1180
-
1181
- /* minor HTTP version or end of request line */
1182
- case s_req_http_minor:
1183
- {
1184
- if (ch == CR) {
1185
- parser->state = s_req_line_almost_done;
1186
- break;
1187
- }
1188
-
1189
- if (ch == LF) {
1190
- parser->state = s_header_field_start;
1191
- break;
1192
- }
1193
-
1194
- /* XXX allow spaces after digit? */
1195
-
1196
- if (!IS_NUM(ch)) {
1197
- SET_ERRNO(HPE_INVALID_VERSION);
1198
- goto error;
1199
- }
1200
-
1201
- parser->http_minor *= 10;
1202
- parser->http_minor += ch - '0';
1203
-
1204
- if (parser->http_minor > 999) {
1205
- SET_ERRNO(HPE_INVALID_VERSION);
1206
- goto error;
1207
- }
1208
-
1209
- break;
1210
- }
1211
-
1212
- /* end of request line */
1213
- case s_req_line_almost_done:
1214
- {
1215
- if (ch != LF) {
1216
- SET_ERRNO(HPE_LF_EXPECTED);
1217
- goto error;
1218
- }
1219
-
1220
- parser->state = s_header_field_start;
1221
- break;
1222
- }
1223
-
1224
- case s_header_field_start:
1225
- {
1226
- if (ch == CR) {
1227
- parser->state = s_headers_almost_done;
1228
- break;
1229
- }
1230
-
1231
- if (ch == LF) {
1232
- /* they might be just sending \n instead of \r\n so this would be
1233
- * the second \n to denote the end of headers*/
1234
- parser->state = s_headers_almost_done;
1235
- goto reexecute_byte;
1236
- }
1237
-
1238
- c = TOKEN(ch);
1239
-
1240
- if (!c) {
1241
- SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1242
- goto error;
1243
- }
1244
-
1245
- MARK(header_field);
1246
-
1247
- parser->index = 0;
1248
- parser->state = s_header_field;
1249
-
1250
- switch (c) {
1251
- case 'c':
1252
- parser->header_state = h_C;
1253
- break;
1254
-
1255
- case 'p':
1256
- parser->header_state = h_matching_proxy_connection;
1257
- break;
1258
-
1259
- case 't':
1260
- parser->header_state = h_matching_transfer_encoding;
1261
- break;
1262
-
1263
- case 'u':
1264
- parser->header_state = h_matching_upgrade;
1265
- break;
1266
-
1267
- default:
1268
- parser->header_state = h_general;
1269
- break;
1270
- }
1271
- break;
1272
- }
1273
-
1274
- case s_header_field:
1275
- {
1276
- c = TOKEN(ch);
1277
-
1278
- if (c) {
1279
- switch (parser->header_state) {
1280
- case h_general:
1281
- break;
1282
-
1283
- case h_C:
1284
- parser->index++;
1285
- parser->header_state = (c == 'o' ? h_CO : h_general);
1286
- break;
1287
-
1288
- case h_CO:
1289
- parser->index++;
1290
- parser->header_state = (c == 'n' ? h_CON : h_general);
1291
- break;
1292
-
1293
- case h_CON:
1294
- parser->index++;
1295
- switch (c) {
1296
- case 'n':
1297
- parser->header_state = h_matching_connection;
1298
- break;
1299
- case 't':
1300
- parser->header_state = h_matching_content_length;
1301
- break;
1302
- default:
1303
- parser->header_state = h_general;
1304
- break;
1305
- }
1306
- break;
1307
-
1308
- /* connection */
1309
-
1310
- case h_matching_connection:
1311
- parser->index++;
1312
- if (parser->index > sizeof(CONNECTION)-1
1313
- || c != CONNECTION[parser->index]) {
1314
- parser->header_state = h_general;
1315
- } else if (parser->index == sizeof(CONNECTION)-2) {
1316
- parser->header_state = h_connection;
1317
- }
1318
- break;
1319
-
1320
- /* proxy-connection */
1321
-
1322
- case h_matching_proxy_connection:
1323
- parser->index++;
1324
- if (parser->index > sizeof(PROXY_CONNECTION)-1
1325
- || c != PROXY_CONNECTION[parser->index]) {
1326
- parser->header_state = h_general;
1327
- } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1328
- parser->header_state = h_connection;
1329
- }
1330
- break;
1331
-
1332
- /* content-length */
1333
-
1334
- case h_matching_content_length:
1335
- parser->index++;
1336
- if (parser->index > sizeof(CONTENT_LENGTH)-1
1337
- || c != CONTENT_LENGTH[parser->index]) {
1338
- parser->header_state = h_general;
1339
- } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1340
- parser->header_state = h_content_length;
1341
- }
1342
- break;
1343
-
1344
- /* transfer-encoding */
1345
-
1346
- case h_matching_transfer_encoding:
1347
- parser->index++;
1348
- if (parser->index > sizeof(TRANSFER_ENCODING)-1
1349
- || c != TRANSFER_ENCODING[parser->index]) {
1350
- parser->header_state = h_general;
1351
- } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1352
- parser->header_state = h_transfer_encoding;
1353
- }
1354
- break;
1355
-
1356
- /* upgrade */
1357
-
1358
- case h_matching_upgrade:
1359
- parser->index++;
1360
- if (parser->index > sizeof(UPGRADE)-1
1361
- || c != UPGRADE[parser->index]) {
1362
- parser->header_state = h_general;
1363
- } else if (parser->index == sizeof(UPGRADE)-2) {
1364
- parser->header_state = h_upgrade;
1365
- }
1366
- break;
1367
-
1368
- case h_connection:
1369
- case h_content_length:
1370
- case h_transfer_encoding:
1371
- case h_upgrade:
1372
- if (ch != ' ') parser->header_state = h_general;
1373
- break;
1374
-
1375
- default:
1376
- assert(0 && "Unknown header_state");
1377
- break;
1378
- }
1379
- break;
1380
- }
1381
-
1382
- if (ch == ':') {
1383
- parser->state = s_header_value_start;
1384
- CALLBACK_DATA(header_field);
1385
- break;
1386
- }
1387
-
1388
- if (ch == CR) {
1389
- parser->state = s_header_almost_done;
1390
- CALLBACK_DATA(header_field);
1391
- break;
1392
- }
1393
-
1394
- if (ch == LF) {
1395
- parser->state = s_header_field_start;
1396
- CALLBACK_DATA(header_field);
1397
- break;
1398
- }
1399
-
1400
- SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1401
- goto error;
1402
- }
1403
-
1404
- case s_header_value_start:
1405
- {
1406
- if (ch == ' ' || ch == '\t') break;
1407
-
1408
- MARK(header_value);
1409
-
1410
- parser->state = s_header_value;
1411
- parser->index = 0;
1412
-
1413
- if (ch == CR) {
1414
- parser->header_state = h_general;
1415
- parser->state = s_header_almost_done;
1416
- CALLBACK_DATA(header_value);
1417
- break;
1418
- }
1419
-
1420
- if (ch == LF) {
1421
- parser->state = s_header_field_start;
1422
- CALLBACK_DATA(header_value);
1423
- break;
1424
- }
1425
-
1426
- c = LOWER(ch);
1427
-
1428
- switch (parser->header_state) {
1429
- case h_upgrade:
1430
- parser->flags |= F_UPGRADE;
1431
- parser->header_state = h_general;
1432
- break;
1433
-
1434
- case h_transfer_encoding:
1435
- /* looking for 'Transfer-Encoding: chunked' */
1436
- if ('c' == c) {
1437
- parser->header_state = h_matching_transfer_encoding_chunked;
1438
- } else {
1439
- parser->header_state = h_general;
1440
- }
1441
- break;
1442
-
1443
- case h_content_length:
1444
- if (!IS_NUM(ch)) {
1445
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1446
- goto error;
1447
- }
1448
-
1449
- parser->content_length = ch - '0';
1450
- break;
1451
-
1452
- case h_connection:
1453
- /* looking for 'Connection: keep-alive' */
1454
- if (c == 'k') {
1455
- parser->header_state = h_matching_connection_keep_alive;
1456
- /* looking for 'Connection: close' */
1457
- } else if (c == 'c') {
1458
- parser->header_state = h_matching_connection_close;
1459
- } else {
1460
- parser->header_state = h_general;
1461
- }
1462
- break;
1463
-
1464
- default:
1465
- parser->header_state = h_general;
1466
- break;
1467
- }
1468
- break;
1469
- }
1470
-
1471
- case s_header_value:
1472
- {
1473
-
1474
- if (ch == CR) {
1475
- parser->state = s_header_almost_done;
1476
- CALLBACK_DATA(header_value);
1477
- break;
1478
- }
1479
-
1480
- if (ch == LF) {
1481
- parser->state = s_header_almost_done;
1482
- CALLBACK_DATA_NOADVANCE(header_value);
1483
- goto reexecute_byte;
1484
- }
1485
-
1486
- c = LOWER(ch);
1487
-
1488
- switch (parser->header_state) {
1489
- case h_general:
1490
- break;
1491
-
1492
- case h_connection:
1493
- case h_transfer_encoding:
1494
- assert(0 && "Shouldn't get here.");
1495
- break;
1496
-
1497
- case h_content_length:
1498
- {
1499
- uint64_t t;
1500
-
1501
- if (ch == ' ') break;
1502
-
1503
- if (!IS_NUM(ch)) {
1504
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1505
- goto error;
1506
- }
1507
-
1508
- t = parser->content_length;
1509
- t *= 10;
1510
- t += ch - '0';
1511
-
1512
- /* Overflow? Test against a conservative limit for simplicity. */
1513
- if ((ULLONG_MAX - 10) / 10 < parser->content_length) {
1514
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1515
- goto error;
1516
- }
1517
-
1518
- parser->content_length = t;
1519
- break;
1520
- }
1521
-
1522
- /* Transfer-Encoding: chunked */
1523
- case h_matching_transfer_encoding_chunked:
1524
- parser->index++;
1525
- if (parser->index > sizeof(CHUNKED)-1
1526
- || c != CHUNKED[parser->index]) {
1527
- parser->header_state = h_general;
1528
- } else if (parser->index == sizeof(CHUNKED)-2) {
1529
- parser->header_state = h_transfer_encoding_chunked;
1530
- }
1531
- break;
1532
-
1533
- /* looking for 'Connection: keep-alive' */
1534
- case h_matching_connection_keep_alive:
1535
- parser->index++;
1536
- if (parser->index > sizeof(KEEP_ALIVE)-1
1537
- || c != KEEP_ALIVE[parser->index]) {
1538
- parser->header_state = h_general;
1539
- } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1540
- parser->header_state = h_connection_keep_alive;
1541
- }
1542
- break;
1543
-
1544
- /* looking for 'Connection: close' */
1545
- case h_matching_connection_close:
1546
- parser->index++;
1547
- if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1548
- parser->header_state = h_general;
1549
- } else if (parser->index == sizeof(CLOSE)-2) {
1550
- parser->header_state = h_connection_close;
1551
- }
1552
- break;
1553
-
1554
- case h_transfer_encoding_chunked:
1555
- case h_connection_keep_alive:
1556
- case h_connection_close:
1557
- if (ch != ' ') parser->header_state = h_general;
1558
- break;
1559
-
1560
- default:
1561
- parser->state = s_header_value;
1562
- parser->header_state = h_general;
1563
- break;
1564
- }
1565
- break;
1566
- }
1567
-
1568
- case s_header_almost_done:
1569
- {
1570
- STRICT_CHECK(ch != LF);
1571
-
1572
- parser->state = s_header_value_lws;
1573
-
1574
- switch (parser->header_state) {
1575
- case h_connection_keep_alive:
1576
- parser->flags |= F_CONNECTION_KEEP_ALIVE;
1577
- break;
1578
- case h_connection_close:
1579
- parser->flags |= F_CONNECTION_CLOSE;
1580
- break;
1581
- case h_transfer_encoding_chunked:
1582
- parser->flags |= F_CHUNKED;
1583
- break;
1584
- default:
1585
- break;
1586
- }
1587
-
1588
- break;
1589
- }
1590
-
1591
- case s_header_value_lws:
1592
- {
1593
- if (ch == ' ' || ch == '\t')
1594
- parser->state = s_header_value_start;
1595
- else
1596
- {
1597
- parser->state = s_header_field_start;
1598
- goto reexecute_byte;
1599
- }
1600
- break;
1601
- }
1602
-
1603
- case s_headers_almost_done:
1604
- {
1605
- STRICT_CHECK(ch != LF);
1606
-
1607
- if (parser->flags & F_TRAILING) {
1608
- /* End of a chunked request */
1609
- parser->state = NEW_MESSAGE();
1610
- CALLBACK_NOTIFY(message_complete);
1611
- break;
1612
- }
1613
-
1614
- parser->state = s_headers_done;
1615
-
1616
- /* Set this here so that on_headers_complete() callbacks can see it */
1617
- parser->upgrade =
1618
- (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1619
-
1620
- /* Here we call the headers_complete callback. This is somewhat
1621
- * different than other callbacks because if the user returns 1, we
1622
- * will interpret that as saying that this message has no body. This
1623
- * is needed for the annoying case of recieving a response to a HEAD
1624
- * request.
1625
- *
1626
- * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1627
- * we have to simulate it by handling a change in errno below.
1628
- */
1629
- if (settings->on_headers_complete) {
1630
- switch (settings->on_headers_complete(parser)) {
1631
- case 0:
1632
- break;
1633
-
1634
- case 1:
1635
- parser->flags |= F_SKIPBODY;
1636
- break;
1637
-
1638
- default:
1639
- SET_ERRNO(HPE_CB_headers_complete);
1640
- return p - data; /* Error */
1641
- }
1642
- }
1643
-
1644
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1645
- return p - data;
1646
- }
1647
-
1648
- goto reexecute_byte;
1649
- }
1650
-
1651
- case s_headers_done:
1652
- {
1653
- STRICT_CHECK(ch != LF);
1654
-
1655
- parser->nread = 0;
1656
-
1657
- /* Exit, the rest of the connect is in a different protocol. */
1658
- if (parser->upgrade) {
1659
- parser->state = NEW_MESSAGE();
1660
- CALLBACK_NOTIFY(message_complete);
1661
- return (p - data) + 1;
1662
- }
1663
-
1664
- if (parser->flags & F_SKIPBODY) {
1665
- parser->state = NEW_MESSAGE();
1666
- CALLBACK_NOTIFY(message_complete);
1667
- } else if (parser->flags & F_CHUNKED) {
1668
- /* chunked encoding - ignore Content-Length header */
1669
- parser->state = s_chunk_size_start;
1670
- } else {
1671
- if (parser->content_length == 0) {
1672
- /* Content-Length header given but zero: Content-Length: 0\r\n */
1673
- parser->state = NEW_MESSAGE();
1674
- CALLBACK_NOTIFY(message_complete);
1675
- } else if (parser->content_length != ULLONG_MAX) {
1676
- /* Content-Length header given and non-zero */
1677
- parser->state = s_body_identity;
1678
- } else {
1679
- if (parser->type == HTTP_REQUEST ||
1680
- !http_message_needs_eof(parser)) {
1681
- /* Assume content-length 0 - read the next */
1682
- parser->state = NEW_MESSAGE();
1683
- CALLBACK_NOTIFY(message_complete);
1684
- } else {
1685
- /* Read body until EOF */
1686
- parser->state = s_body_identity_eof;
1687
- }
1688
- }
1689
- }
1690
-
1691
- break;
1692
- }
1693
-
1694
- case s_body_identity:
1695
- {
1696
- uint64_t to_read = MIN(parser->content_length,
1697
- (uint64_t) ((data + len) - p));
1698
-
1699
- assert(parser->content_length != 0
1700
- && parser->content_length != ULLONG_MAX);
1701
-
1702
- /* The difference between advancing content_length and p is because
1703
- * the latter will automaticaly advance on the next loop iteration.
1704
- * Further, if content_length ends up at 0, we want to see the last
1705
- * byte again for our message complete callback.
1706
- */
1707
- MARK(body);
1708
- parser->content_length -= to_read;
1709
- p += to_read - 1;
1710
-
1711
- if (parser->content_length == 0) {
1712
- parser->state = s_message_done;
1713
-
1714
- /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1715
- *
1716
- * The alternative to doing this is to wait for the next byte to
1717
- * trigger the data callback, just as in every other case. The
1718
- * problem with this is that this makes it difficult for the test
1719
- * harness to distinguish between complete-on-EOF and
1720
- * complete-on-length. It's not clear that this distinction is
1721
- * important for applications, but let's keep it for now.
1722
- */
1723
- CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1724
- goto reexecute_byte;
1725
- }
1726
-
1727
- break;
1728
- }
1729
-
1730
- /* read until EOF */
1731
- case s_body_identity_eof:
1732
- MARK(body);
1733
- p = data + len - 1;
1734
-
1735
- break;
1736
-
1737
- case s_message_done:
1738
- parser->state = NEW_MESSAGE();
1739
- CALLBACK_NOTIFY(message_complete);
1740
- break;
1741
-
1742
- case s_chunk_size_start:
1743
- {
1744
- assert(parser->nread == 1);
1745
- assert(parser->flags & F_CHUNKED);
1746
-
1747
- unhex_val = unhex[(unsigned char)ch];
1748
- if (unhex_val == -1) {
1749
- SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1750
- goto error;
1751
- }
1752
-
1753
- parser->content_length = unhex_val;
1754
- parser->state = s_chunk_size;
1755
- break;
1756
- }
1757
-
1758
- case s_chunk_size:
1759
- {
1760
- uint64_t t;
1761
-
1762
- assert(parser->flags & F_CHUNKED);
1763
-
1764
- if (ch == CR) {
1765
- parser->state = s_chunk_size_almost_done;
1766
- break;
1767
- }
1768
-
1769
- unhex_val = unhex[(unsigned char)ch];
1770
-
1771
- if (unhex_val == -1) {
1772
- if (ch == ';' || ch == ' ') {
1773
- parser->state = s_chunk_parameters;
1774
- break;
1775
- }
1776
-
1777
- SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1778
- goto error;
1779
- }
1780
-
1781
- t = parser->content_length;
1782
- t *= 16;
1783
- t += unhex_val;
1784
-
1785
- /* Overflow? Test against a conservative limit for simplicity. */
1786
- if ((ULLONG_MAX - 16) / 16 < parser->content_length) {
1787
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1788
- goto error;
1789
- }
1790
-
1791
- parser->content_length = t;
1792
- break;
1793
- }
1794
-
1795
- case s_chunk_parameters:
1796
- {
1797
- assert(parser->flags & F_CHUNKED);
1798
- /* just ignore this shit. TODO check for overflow */
1799
- if (ch == CR) {
1800
- parser->state = s_chunk_size_almost_done;
1801
- break;
1802
- }
1803
- break;
1804
- }
1805
-
1806
- case s_chunk_size_almost_done:
1807
- {
1808
- assert(parser->flags & F_CHUNKED);
1809
- STRICT_CHECK(ch != LF);
1810
-
1811
- parser->nread = 0;
1812
-
1813
- if (parser->content_length == 0) {
1814
- parser->flags |= F_TRAILING;
1815
- parser->state = s_header_field_start;
1816
- } else {
1817
- parser->state = s_chunk_data;
1818
- }
1819
- break;
1820
- }
1821
-
1822
- case s_chunk_data:
1823
- {
1824
- uint64_t to_read = MIN(parser->content_length,
1825
- (uint64_t) ((data + len) - p));
1826
-
1827
- assert(parser->flags & F_CHUNKED);
1828
- assert(parser->content_length != 0
1829
- && parser->content_length != ULLONG_MAX);
1830
-
1831
- /* See the explanation in s_body_identity for why the content
1832
- * length and data pointers are managed this way.
1833
- */
1834
- MARK(body);
1835
- parser->content_length -= to_read;
1836
- p += to_read - 1;
1837
-
1838
- if (parser->content_length == 0) {
1839
- parser->state = s_chunk_data_almost_done;
1840
- }
1841
-
1842
- break;
1843
- }
1844
-
1845
- case s_chunk_data_almost_done:
1846
- assert(parser->flags & F_CHUNKED);
1847
- assert(parser->content_length == 0);
1848
- STRICT_CHECK(ch != CR);
1849
- parser->state = s_chunk_data_done;
1850
- CALLBACK_DATA(body);
1851
- break;
1852
-
1853
- case s_chunk_data_done:
1854
- assert(parser->flags & F_CHUNKED);
1855
- STRICT_CHECK(ch != LF);
1856
- parser->nread = 0;
1857
- parser->state = s_chunk_size_start;
1858
- break;
1859
-
1860
- default:
1861
- assert(0 && "unhandled state");
1862
- SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1863
- goto error;
1864
- }
1865
- }
1866
-
1867
- /* Run callbacks for any marks that we have leftover after we ran our of
1868
- * bytes. There should be at most one of these set, so it's OK to invoke
1869
- * them in series (unset marks will not result in callbacks).
1870
- *
1871
- * We use the NOADVANCE() variety of callbacks here because 'p' has already
1872
- * overflowed 'data' and this allows us to correct for the off-by-one that
1873
- * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1874
- * value that's in-bounds).
1875
- */
1876
-
1877
- assert(((header_field_mark ? 1 : 0) +
1878
- (header_value_mark ? 1 : 0) +
1879
- (url_mark ? 1 : 0) +
1880
- (body_mark ? 1 : 0) +
1881
- (status_mark ? 1 : 0)) <= 1);
1882
-
1883
- CALLBACK_DATA_NOADVANCE(header_field);
1884
- CALLBACK_DATA_NOADVANCE(header_value);
1885
- CALLBACK_DATA_NOADVANCE(url);
1886
- CALLBACK_DATA_NOADVANCE(body);
1887
- CALLBACK_DATA_NOADVANCE(status);
1888
-
1889
- return len;
1890
-
1891
- error:
1892
- if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1893
- SET_ERRNO(HPE_UNKNOWN);
1894
- }
1895
-
1896
- return (p - data);
1897
- }
1898
-
1899
-
1900
- /* Does the parser need to see an EOF to find the end of the message? */
1901
- int
1902
- http_message_needs_eof (const http_parser *parser)
1903
- {
1904
- if (parser->type == HTTP_REQUEST) {
1905
- return 0;
1906
- }
1907
-
1908
- /* See RFC 2616 section 4.4 */
1909
- if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1910
- parser->status_code == 204 || /* No Content */
1911
- parser->status_code == 304 || /* Not Modified */
1912
- parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1913
- return 0;
1914
- }
1915
-
1916
- if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1917
- return 0;
1918
- }
1919
-
1920
- return 1;
1921
- }
1922
-
1923
-
1924
- int
1925
- http_should_keep_alive (const http_parser *parser)
1926
- {
1927
- if (parser->http_major > 0 && parser->http_minor > 0) {
1928
- /* HTTP/1.1 */
1929
- if (parser->flags & F_CONNECTION_CLOSE) {
1930
- return 0;
1931
- }
1932
- } else {
1933
- /* HTTP/1.0 or earlier */
1934
- if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1935
- return 0;
1936
- }
1937
- }
1938
-
1939
- return !http_message_needs_eof(parser);
1940
- }
1941
-
1942
-
1943
- const char *
1944
- http_method_str (enum http_method m)
1945
- {
1946
- return ELEM_AT(method_strings, m, "<unknown>");
1947
- }
1948
-
1949
-
1950
- void
1951
- http_parser_init (http_parser *parser, enum http_parser_type t)
1952
- {
1953
- void *data = parser->data; /* preserve application data */
1954
- memset(parser, 0, sizeof(*parser));
1955
- parser->data = data;
1956
- parser->type = t;
1957
- parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1958
- parser->http_errno = HPE_OK;
1959
- }
1960
-
1961
- const char *
1962
- http_errno_name(enum http_errno err) {
1963
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1964
- return http_strerror_tab[err].name;
1965
- }
1966
-
1967
- const char *
1968
- http_errno_description(enum http_errno err) {
1969
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1970
- return http_strerror_tab[err].description;
1971
- }
1972
-
1973
- static enum http_host_state
1974
- http_parse_host_char(enum http_host_state s, const char ch) {
1975
- switch(s) {
1976
- case s_http_userinfo:
1977
- case s_http_userinfo_start:
1978
- if (ch == '@') {
1979
- return s_http_host_start;
1980
- }
1981
-
1982
- if (IS_USERINFO_CHAR(ch)) {
1983
- return s_http_userinfo;
1984
- }
1985
- break;
1986
-
1987
- case s_http_host_start:
1988
- if (ch == '[') {
1989
- return s_http_host_v6_start;
1990
- }
1991
-
1992
- if (IS_HOST_CHAR(ch)) {
1993
- return s_http_host;
1994
- }
1995
-
1996
- break;
1997
-
1998
- case s_http_host:
1999
- if (IS_HOST_CHAR(ch)) {
2000
- return s_http_host;
2001
- }
2002
-
2003
- /* FALLTHROUGH */
2004
- case s_http_host_v6_end:
2005
- if (ch == ':') {
2006
- return s_http_host_port_start;
2007
- }
2008
-
2009
- break;
2010
-
2011
- case s_http_host_v6:
2012
- if (ch == ']') {
2013
- return s_http_host_v6_end;
2014
- }
2015
-
2016
- /* FALLTHROUGH */
2017
- case s_http_host_v6_start:
2018
- if (IS_HEX(ch) || ch == ':' || ch == '.') {
2019
- return s_http_host_v6;
2020
- }
2021
-
2022
- break;
2023
-
2024
- case s_http_host_port:
2025
- case s_http_host_port_start:
2026
- if (IS_NUM(ch)) {
2027
- return s_http_host_port;
2028
- }
2029
-
2030
- break;
2031
-
2032
- default:
2033
- break;
2034
- }
2035
- return s_http_host_dead;
2036
- }
2037
-
2038
- static int
2039
- http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2040
- enum http_host_state s;
2041
-
2042
- const char *p;
2043
- size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2044
-
2045
- u->field_data[UF_HOST].len = 0;
2046
-
2047
- s = found_at ? s_http_userinfo_start : s_http_host_start;
2048
-
2049
- for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2050
- enum http_host_state new_s = http_parse_host_char(s, *p);
2051
-
2052
- if (new_s == s_http_host_dead) {
2053
- return 1;
2054
- }
2055
-
2056
- switch(new_s) {
2057
- case s_http_host:
2058
- if (s != s_http_host) {
2059
- u->field_data[UF_HOST].off = p - buf;
2060
- }
2061
- u->field_data[UF_HOST].len++;
2062
- break;
2063
-
2064
- case s_http_host_v6:
2065
- if (s != s_http_host_v6) {
2066
- u->field_data[UF_HOST].off = p - buf;
2067
- }
2068
- u->field_data[UF_HOST].len++;
2069
- break;
2070
-
2071
- case s_http_host_port:
2072
- if (s != s_http_host_port) {
2073
- u->field_data[UF_PORT].off = p - buf;
2074
- u->field_data[UF_PORT].len = 0;
2075
- u->field_set |= (1 << UF_PORT);
2076
- }
2077
- u->field_data[UF_PORT].len++;
2078
- break;
2079
-
2080
- case s_http_userinfo:
2081
- if (s != s_http_userinfo) {
2082
- u->field_data[UF_USERINFO].off = p - buf ;
2083
- u->field_data[UF_USERINFO].len = 0;
2084
- u->field_set |= (1 << UF_USERINFO);
2085
- }
2086
- u->field_data[UF_USERINFO].len++;
2087
- break;
2088
-
2089
- default:
2090
- break;
2091
- }
2092
- s = new_s;
2093
- }
2094
-
2095
- /* Make sure we don't end somewhere unexpected */
2096
- switch (s) {
2097
- case s_http_host_start:
2098
- case s_http_host_v6_start:
2099
- case s_http_host_v6:
2100
- case s_http_host_port_start:
2101
- case s_http_userinfo:
2102
- case s_http_userinfo_start:
2103
- return 1;
2104
- default:
2105
- break;
2106
- }
2107
-
2108
- return 0;
2109
- }
2110
-
2111
- int
2112
- http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2113
- struct http_parser_url *u)
2114
- {
2115
- enum state s;
2116
- const char *p;
2117
- enum http_parser_url_fields uf, old_uf;
2118
- int found_at = 0;
2119
-
2120
- u->port = u->field_set = 0;
2121
- s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2122
- uf = old_uf = UF_MAX;
2123
-
2124
- for (p = buf; p < buf + buflen; p++) {
2125
- s = parse_url_char(s, *p);
2126
-
2127
- /* Figure out the next field that we're operating on */
2128
- switch (s) {
2129
- case s_dead:
2130
- return 1;
2131
-
2132
- /* Skip delimeters */
2133
- case s_req_schema_slash:
2134
- case s_req_schema_slash_slash:
2135
- case s_req_server_start:
2136
- case s_req_query_string_start:
2137
- case s_req_fragment_start:
2138
- continue;
2139
-
2140
- case s_req_schema:
2141
- uf = UF_SCHEMA;
2142
- break;
2143
-
2144
- case s_req_server_with_at:
2145
- found_at = 1;
2146
-
2147
- /* FALLTROUGH */
2148
- case s_req_server:
2149
- uf = UF_HOST;
2150
- break;
2151
-
2152
- case s_req_path:
2153
- uf = UF_PATH;
2154
- break;
2155
-
2156
- case s_req_query_string:
2157
- uf = UF_QUERY;
2158
- break;
2159
-
2160
- case s_req_fragment:
2161
- uf = UF_FRAGMENT;
2162
- break;
2163
-
2164
- default:
2165
- assert(!"Unexpected state");
2166
- return 1;
2167
- }
2168
-
2169
- /* Nothing's changed; soldier on */
2170
- if (uf == old_uf) {
2171
- u->field_data[uf].len++;
2172
- continue;
2173
- }
2174
-
2175
- u->field_data[uf].off = p - buf;
2176
- u->field_data[uf].len = 1;
2177
-
2178
- u->field_set |= (1 << uf);
2179
- old_uf = uf;
2180
- }
2181
-
2182
- /* host must be present if there is a schema */
2183
- /* parsing http:///toto will fail */
2184
- if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2185
- if (http_parse_host(buf, u, found_at) != 0) {
2186
- return 1;
2187
- }
2188
- }
2189
-
2190
- /* CONNECT requests can only contain "hostname:port" */
2191
- if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2192
- return 1;
2193
- }
2194
-
2195
- if (u->field_set & (1 << UF_PORT)) {
2196
- /* Don't bother with endp; we've already validated the string */
2197
- unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2198
-
2199
- /* Ports have a max value of 2^16 */
2200
- if (v > 0xffff) {
2201
- return 1;
2202
- }
2203
-
2204
- u->port = (uint16_t) v;
2205
- }
2206
-
2207
- return 0;
2208
- }
2209
-
2210
- void
2211
- http_parser_pause(http_parser *parser, int paused) {
2212
- /* Users should only be pausing/unpausing a parser that is not in an error
2213
- * state. In non-debug builds, there's not much that we can do about this
2214
- * other than ignore it.
2215
- */
2216
- if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2217
- HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2218
- SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2219
- } else {
2220
- assert(0 && "Attempting to pause parser in error state");
2221
- }
2222
- }
2223
-
2224
- int
2225
- http_body_is_final(const struct http_parser *parser) {
2226
- return parser->state == s_message_done;
2227
- }
2228
-
2229
- unsigned long
2230
- http_parser_version(void) {
2231
- return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2232
- HTTP_PARSER_VERSION_MINOR * 0x00100 |
2233
- HTTP_PARSER_VERSION_PATCH * 0x00001;
2234
- }
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include "http_parser.h"
25
+ #include <assert.h>
26
+ #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
31
+
32
+ #ifndef ULLONG_MAX
33
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
+ #endif
35
+
36
+ #ifndef MIN
37
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
+ #endif
39
+
40
+ #ifndef ARRAY_SIZE
41
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
+ #endif
43
+
44
+ #ifndef BIT_AT
45
+ # define BIT_AT(a, i) \
46
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
+ (1 << ((unsigned int) (i) & 7))))
48
+ #endif
49
+
50
+ #ifndef ELEM_AT
51
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
+ #endif
53
+
54
+ #define SET_ERRNO(e) \
55
+ do { \
56
+ parser->http_errno = (e); \
57
+ } while(0)
58
+
59
+ #define CURRENT_STATE() p_state
60
+ #define UPDATE_STATE(V) p_state = (enum state) (V);
61
+ #define RETURN(V) \
62
+ do { \
63
+ parser->state = CURRENT_STATE(); \
64
+ return (V); \
65
+ } while (0);
66
+ #define REEXECUTE() \
67
+ goto reexecute; \
68
+
69
+
70
+ #ifdef __GNUC__
71
+ # define LIKELY(X) __builtin_expect(!!(X), 1)
72
+ # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73
+ #else
74
+ # define LIKELY(X) (X)
75
+ # define UNLIKELY(X) (X)
76
+ #endif
77
+
78
+
79
+ /* Run the notify callback FOR, returning ER if it fails */
80
+ #define CALLBACK_NOTIFY_(FOR, ER) \
81
+ do { \
82
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
83
+ \
84
+ if (LIKELY(settings->on_##FOR)) { \
85
+ parser->state = CURRENT_STATE(); \
86
+ if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87
+ SET_ERRNO(HPE_CB_##FOR); \
88
+ } \
89
+ UPDATE_STATE(parser->state); \
90
+ \
91
+ /* We either errored above or got paused; get out */ \
92
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
93
+ return (ER); \
94
+ } \
95
+ } \
96
+ } while (0)
97
+
98
+ /* Run the notify callback FOR and consume the current byte */
99
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
100
+
101
+ /* Run the notify callback FOR and don't consume the current byte */
102
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
103
+
104
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
105
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
106
+ do { \
107
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
108
+ \
109
+ if (FOR##_mark) { \
110
+ if (LIKELY(settings->on_##FOR)) { \
111
+ parser->state = CURRENT_STATE(); \
112
+ if (UNLIKELY(0 != \
113
+ settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114
+ SET_ERRNO(HPE_CB_##FOR); \
115
+ } \
116
+ UPDATE_STATE(parser->state); \
117
+ \
118
+ /* We either errored above or got paused; get out */ \
119
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
120
+ return (ER); \
121
+ } \
122
+ } \
123
+ FOR##_mark = NULL; \
124
+ } \
125
+ } while (0)
126
+
127
+ /* Run the data callback FOR and consume the current byte */
128
+ #define CALLBACK_DATA(FOR) \
129
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130
+
131
+ /* Run the data callback FOR and don't consume the current byte */
132
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
133
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134
+
135
+ /* Set the mark FOR; non-destructive if mark is already set */
136
+ #define MARK(FOR) \
137
+ do { \
138
+ if (!FOR##_mark) { \
139
+ FOR##_mark = p; \
140
+ } \
141
+ } while (0)
142
+
143
+ /* Don't allow the total size of the HTTP headers (including the status
144
+ * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
145
+ * embedders against denial-of-service attacks where the attacker feeds
146
+ * us a never-ending header that the embedder keeps buffering.
147
+ *
148
+ * This check is arguably the responsibility of embedders but we're doing
149
+ * it on the embedder's behalf because most won't bother and this way we
150
+ * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
151
+ * than any reasonable request or response so this should never affect
152
+ * day-to-day operation.
153
+ */
154
+ #define COUNT_HEADER_SIZE(V) \
155
+ do { \
156
+ parser->nread += (V); \
157
+ if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
158
+ SET_ERRNO(HPE_HEADER_OVERFLOW); \
159
+ goto error; \
160
+ } \
161
+ } while (0)
162
+
163
+
164
+ #define PROXY_CONNECTION "proxy-connection"
165
+ #define CONNECTION "connection"
166
+ #define CONTENT_LENGTH "content-length"
167
+ #define TRANSFER_ENCODING "transfer-encoding"
168
+ #define UPGRADE "upgrade"
169
+ #define CHUNKED "chunked"
170
+ #define KEEP_ALIVE "keep-alive"
171
+ #define CLOSE "close"
172
+
173
+
174
+ static const char *method_strings[] =
175
+ {
176
+ #define XX(num, name, string) #string,
177
+ HTTP_METHOD_MAP(XX)
178
+ #undef XX
179
+ };
180
+
181
+
182
+ /* Tokens as defined by rfc 2616. Also lowercases them.
183
+ * token = 1*<any CHAR except CTLs or separators>
184
+ * separators = "(" | ")" | "<" | ">" | "@"
185
+ * | "," | ";" | ":" | "\" | <">
186
+ * | "/" | "[" | "]" | "?" | "="
187
+ * | "{" | "}" | SP | HT
188
+ */
189
+ static const char tokens[256] = {
190
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191
+ 0, 0, 0, 0, 0, 0, 0, 0,
192
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193
+ 0, 0, 0, 0, 0, 0, 0, 0,
194
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195
+ 0, 0, 0, 0, 0, 0, 0, 0,
196
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197
+ 0, 0, 0, 0, 0, 0, 0, 0,
198
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199
+ 0, '!', 0, '#', '$', '%', '&', '\'',
200
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201
+ 0, 0, '*', '+', 0, '-', '.', 0,
202
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203
+ '0', '1', '2', '3', '4', '5', '6', '7',
204
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205
+ '8', '9', 0, 0, 0, 0, 0, 0,
206
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
214
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
222
+
223
+
224
+ static const int8_t unhex[256] =
225
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233
+ };
234
+
235
+
236
+ #if HTTP_PARSER_STRICT
237
+ # define T(v) 0
238
+ #else
239
+ # define T(v) v
240
+ #endif
241
+
242
+
243
+ static const uint8_t normal_url_char[32] = {
244
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
276
+
277
+ #undef T
278
+
279
+ enum state
280
+ { s_dead = 1 /* important that this is > 0 */
281
+
282
+ , s_start_req_or_res
283
+ , s_res_or_resp_H
284
+ , s_start_res
285
+ , s_res_H
286
+ , s_res_HT
287
+ , s_res_HTT
288
+ , s_res_HTTP
289
+ , s_res_first_http_major
290
+ , s_res_http_major
291
+ , s_res_first_http_minor
292
+ , s_res_http_minor
293
+ , s_res_first_status_code
294
+ , s_res_status_code
295
+ , s_res_status_start
296
+ , s_res_status
297
+ , s_res_line_almost_done
298
+
299
+ , s_start_req
300
+
301
+ , s_req_method
302
+ , s_req_spaces_before_url
303
+ , s_req_schema
304
+ , s_req_schema_slash
305
+ , s_req_schema_slash_slash
306
+ , s_req_server_start
307
+ , s_req_server
308
+ , s_req_server_with_at
309
+ , s_req_path
310
+ , s_req_query_string_start
311
+ , s_req_query_string
312
+ , s_req_fragment_start
313
+ , s_req_fragment
314
+ , s_req_http_start
315
+ , s_req_http_H
316
+ , s_req_http_HT
317
+ , s_req_http_HTT
318
+ , s_req_http_HTTP
319
+ , s_req_first_http_major
320
+ , s_req_http_major
321
+ , s_req_first_http_minor
322
+ , s_req_http_minor
323
+ , s_req_line_almost_done
324
+
325
+ , s_header_field_start
326
+ , s_header_field
327
+ , s_header_value_discard_ws
328
+ , s_header_value_discard_ws_almost_done
329
+ , s_header_value_discard_lws
330
+ , s_header_value_start
331
+ , s_header_value
332
+ , s_header_value_lws
333
+
334
+ , s_header_almost_done
335
+
336
+ , s_chunk_size_start
337
+ , s_chunk_size
338
+ , s_chunk_parameters
339
+ , s_chunk_size_almost_done
340
+
341
+ , s_headers_almost_done
342
+ , s_headers_done
343
+
344
+ /* Important: 's_headers_done' must be the last 'header' state. All
345
+ * states beyond this must be 'body' states. It is used for overflow
346
+ * checking. See the PARSING_HEADER() macro.
347
+ */
348
+
349
+ , s_chunk_data
350
+ , s_chunk_data_almost_done
351
+ , s_chunk_data_done
352
+
353
+ , s_body_identity
354
+ , s_body_identity_eof
355
+
356
+ , s_message_done
357
+ };
358
+
359
+
360
+ #define PARSING_HEADER(state) (state <= s_headers_done)
361
+
362
+
363
+ enum header_states
364
+ { h_general = 0
365
+ , h_C
366
+ , h_CO
367
+ , h_CON
368
+
369
+ , h_matching_connection
370
+ , h_matching_proxy_connection
371
+ , h_matching_content_length
372
+ , h_matching_transfer_encoding
373
+ , h_matching_upgrade
374
+
375
+ , h_connection
376
+ , h_content_length
377
+ , h_transfer_encoding
378
+ , h_upgrade
379
+
380
+ , h_matching_transfer_encoding_chunked
381
+ , h_matching_connection_token_start
382
+ , h_matching_connection_keep_alive
383
+ , h_matching_connection_close
384
+ , h_matching_connection_upgrade
385
+ , h_matching_connection_token
386
+
387
+ , h_transfer_encoding_chunked
388
+ , h_connection_keep_alive
389
+ , h_connection_close
390
+ , h_connection_upgrade
391
+ };
392
+
393
+ enum http_host_state
394
+ {
395
+ s_http_host_dead = 1
396
+ , s_http_userinfo_start
397
+ , s_http_userinfo
398
+ , s_http_host_start
399
+ , s_http_host_v6_start
400
+ , s_http_host
401
+ , s_http_host_v6
402
+ , s_http_host_v6_end
403
+ , s_http_host_v6_zone_start
404
+ , s_http_host_v6_zone
405
+ , s_http_host_port_start
406
+ , s_http_host_port
407
+ };
408
+
409
+ /* Macros for character classes; depends on strict-mode */
410
+ #define CR '\r'
411
+ #define LF '\n'
412
+ #define LOWER(c) (unsigned char)(c | 0x20)
413
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
414
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
415
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
416
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
417
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
418
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
419
+ (c) == ')')
420
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
421
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
422
+ (c) == '$' || (c) == ',')
423
+
424
+ #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
425
+
426
+ #if HTTP_PARSER_STRICT
427
+ #define TOKEN(c) (tokens[(unsigned char)c])
428
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
429
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
430
+ #else
431
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
432
+ #define IS_URL_CHAR(c) \
433
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
434
+ #define IS_HOST_CHAR(c) \
435
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
436
+ #endif
437
+
438
+ /**
439
+ * Verify that a char is a valid visible (printable) US-ASCII
440
+ * character or %x80-FF
441
+ **/
442
+ #define IS_HEADER_CHAR(ch) \
443
+ (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
444
+
445
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
446
+
447
+
448
+ #if HTTP_PARSER_STRICT
449
+ # define STRICT_CHECK(cond) \
450
+ do { \
451
+ if (cond) { \
452
+ SET_ERRNO(HPE_STRICT); \
453
+ goto error; \
454
+ } \
455
+ } while (0)
456
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
457
+ #else
458
+ # define STRICT_CHECK(cond)
459
+ # define NEW_MESSAGE() start_state
460
+ #endif
461
+
462
+
463
+ /* Map errno values to strings for human-readable output */
464
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
465
+ static struct {
466
+ const char *name;
467
+ const char *description;
468
+ } http_strerror_tab[] = {
469
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
470
+ };
471
+ #undef HTTP_STRERROR_GEN
472
+
473
+ int http_message_needs_eof(const http_parser *parser);
474
+
475
+ /* Our URL parser.
476
+ *
477
+ * This is designed to be shared by http_parser_execute() for URL validation,
478
+ * hence it has a state transition + byte-for-byte interface. In addition, it
479
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
480
+ * work of turning state transitions URL components for its API.
481
+ *
482
+ * This function should only be invoked with non-space characters. It is
483
+ * assumed that the caller cares about (and can detect) the transition between
484
+ * URL and non-URL states by looking for these.
485
+ */
486
+ static enum state
487
+ parse_url_char(enum state s, const char ch)
488
+ {
489
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
490
+ return s_dead;
491
+ }
492
+
493
+ #if HTTP_PARSER_STRICT
494
+ if (ch == '\t' || ch == '\f') {
495
+ return s_dead;
496
+ }
497
+ #endif
498
+
499
+ switch (s) {
500
+ case s_req_spaces_before_url:
501
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
502
+ * All methods except CONNECT are followed by '/' or '*'.
503
+ */
504
+
505
+ if (ch == '/' || ch == '*') {
506
+ return s_req_path;
507
+ }
508
+
509
+ if (IS_ALPHA(ch)) {
510
+ return s_req_schema;
511
+ }
512
+
513
+ break;
514
+
515
+ case s_req_schema:
516
+ if (IS_ALPHA(ch)) {
517
+ return s;
518
+ }
519
+
520
+ if (ch == ':') {
521
+ return s_req_schema_slash;
522
+ }
523
+
524
+ break;
525
+
526
+ case s_req_schema_slash:
527
+ if (ch == '/') {
528
+ return s_req_schema_slash_slash;
529
+ }
530
+
531
+ break;
532
+
533
+ case s_req_schema_slash_slash:
534
+ if (ch == '/') {
535
+ return s_req_server_start;
536
+ }
537
+
538
+ break;
539
+
540
+ case s_req_server_with_at:
541
+ if (ch == '@') {
542
+ return s_dead;
543
+ }
544
+
545
+ /* FALLTHROUGH */
546
+ case s_req_server_start:
547
+ case s_req_server:
548
+ if (ch == '/') {
549
+ return s_req_path;
550
+ }
551
+
552
+ if (ch == '?') {
553
+ return s_req_query_string_start;
554
+ }
555
+
556
+ if (ch == '@') {
557
+ return s_req_server_with_at;
558
+ }
559
+
560
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
561
+ return s_req_server;
562
+ }
563
+
564
+ break;
565
+
566
+ case s_req_path:
567
+ if (IS_URL_CHAR(ch)) {
568
+ return s;
569
+ }
570
+
571
+ switch (ch) {
572
+ case '?':
573
+ return s_req_query_string_start;
574
+
575
+ case '#':
576
+ return s_req_fragment_start;
577
+ }
578
+
579
+ break;
580
+
581
+ case s_req_query_string_start:
582
+ case s_req_query_string:
583
+ if (IS_URL_CHAR(ch)) {
584
+ return s_req_query_string;
585
+ }
586
+
587
+ switch (ch) {
588
+ case '?':
589
+ /* allow extra '?' in query string */
590
+ return s_req_query_string;
591
+
592
+ case '#':
593
+ return s_req_fragment_start;
594
+ }
595
+
596
+ break;
597
+
598
+ case s_req_fragment_start:
599
+ if (IS_URL_CHAR(ch)) {
600
+ return s_req_fragment;
601
+ }
602
+
603
+ switch (ch) {
604
+ case '?':
605
+ return s_req_fragment;
606
+
607
+ case '#':
608
+ return s;
609
+ }
610
+
611
+ break;
612
+
613
+ case s_req_fragment:
614
+ if (IS_URL_CHAR(ch)) {
615
+ return s;
616
+ }
617
+
618
+ switch (ch) {
619
+ case '?':
620
+ case '#':
621
+ return s;
622
+ }
623
+
624
+ break;
625
+
626
+ default:
627
+ break;
628
+ }
629
+
630
+ /* We should never fall out of the switch above unless there's an error */
631
+ return s_dead;
632
+ }
633
+
634
+ size_t http_parser_execute (http_parser *parser,
635
+ const http_parser_settings *settings,
636
+ const char *data,
637
+ size_t len)
638
+ {
639
+ char c, ch;
640
+ int8_t unhex_val;
641
+ const char *p = data;
642
+ const char *header_field_mark = 0;
643
+ const char *header_value_mark = 0;
644
+ const char *url_mark = 0;
645
+ const char *body_mark = 0;
646
+ const char *status_mark = 0;
647
+ enum state p_state = (enum state) parser->state;
648
+ const unsigned int lenient = parser->lenient_http_headers;
649
+
650
+ /* We're in an error state. Don't bother doing anything. */
651
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
652
+ return 0;
653
+ }
654
+
655
+ if (len == 0) {
656
+ switch (CURRENT_STATE()) {
657
+ case s_body_identity_eof:
658
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
659
+ * we got paused.
660
+ */
661
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
662
+ return 0;
663
+
664
+ case s_dead:
665
+ case s_start_req_or_res:
666
+ case s_start_res:
667
+ case s_start_req:
668
+ return 0;
669
+
670
+ default:
671
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
672
+ return 1;
673
+ }
674
+ }
675
+
676
+
677
+ if (CURRENT_STATE() == s_header_field)
678
+ header_field_mark = data;
679
+ if (CURRENT_STATE() == s_header_value)
680
+ header_value_mark = data;
681
+ switch (CURRENT_STATE()) {
682
+ case s_req_path:
683
+ case s_req_schema:
684
+ case s_req_schema_slash:
685
+ case s_req_schema_slash_slash:
686
+ case s_req_server_start:
687
+ case s_req_server:
688
+ case s_req_server_with_at:
689
+ case s_req_query_string_start:
690
+ case s_req_query_string:
691
+ case s_req_fragment_start:
692
+ case s_req_fragment:
693
+ url_mark = data;
694
+ break;
695
+ case s_res_status:
696
+ status_mark = data;
697
+ break;
698
+ default:
699
+ break;
700
+ }
701
+
702
+ for (p=data; p != data + len; p++) {
703
+ ch = *p;
704
+
705
+ if (PARSING_HEADER(CURRENT_STATE()))
706
+ COUNT_HEADER_SIZE(1);
707
+
708
+ reexecute:
709
+ switch (CURRENT_STATE()) {
710
+
711
+ case s_dead:
712
+ /* this state is used after a 'Connection: close' message
713
+ * the parser will error out if it reads another message
714
+ */
715
+ if (LIKELY(ch == CR || ch == LF))
716
+ break;
717
+
718
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
719
+ goto error;
720
+
721
+ case s_start_req_or_res:
722
+ {
723
+ if (ch == CR || ch == LF)
724
+ break;
725
+ parser->flags = 0;
726
+ parser->content_length = ULLONG_MAX;
727
+
728
+ if (ch == 'H') {
729
+ UPDATE_STATE(s_res_or_resp_H);
730
+
731
+ CALLBACK_NOTIFY(message_begin);
732
+ } else {
733
+ parser->type = HTTP_REQUEST;
734
+ UPDATE_STATE(s_start_req);
735
+ REEXECUTE();
736
+ }
737
+
738
+ break;
739
+ }
740
+
741
+ case s_res_or_resp_H:
742
+ if (ch == 'T') {
743
+ parser->type = HTTP_RESPONSE;
744
+ UPDATE_STATE(s_res_HT);
745
+ } else {
746
+ if (UNLIKELY(ch != 'E')) {
747
+ SET_ERRNO(HPE_INVALID_CONSTANT);
748
+ goto error;
749
+ }
750
+
751
+ parser->type = HTTP_REQUEST;
752
+ parser->method = HTTP_HEAD;
753
+ parser->index = 2;
754
+ UPDATE_STATE(s_req_method);
755
+ }
756
+ break;
757
+
758
+ case s_start_res:
759
+ {
760
+ parser->flags = 0;
761
+ parser->content_length = ULLONG_MAX;
762
+
763
+ switch (ch) {
764
+ case 'H':
765
+ UPDATE_STATE(s_res_H);
766
+ break;
767
+
768
+ case CR:
769
+ case LF:
770
+ break;
771
+
772
+ default:
773
+ SET_ERRNO(HPE_INVALID_CONSTANT);
774
+ goto error;
775
+ }
776
+
777
+ CALLBACK_NOTIFY(message_begin);
778
+ break;
779
+ }
780
+
781
+ case s_res_H:
782
+ STRICT_CHECK(ch != 'T');
783
+ UPDATE_STATE(s_res_HT);
784
+ break;
785
+
786
+ case s_res_HT:
787
+ STRICT_CHECK(ch != 'T');
788
+ UPDATE_STATE(s_res_HTT);
789
+ break;
790
+
791
+ case s_res_HTT:
792
+ STRICT_CHECK(ch != 'P');
793
+ UPDATE_STATE(s_res_HTTP);
794
+ break;
795
+
796
+ case s_res_HTTP:
797
+ STRICT_CHECK(ch != '/');
798
+ UPDATE_STATE(s_res_first_http_major);
799
+ break;
800
+
801
+ case s_res_first_http_major:
802
+ if (UNLIKELY(ch < '0' || ch > '9')) {
803
+ SET_ERRNO(HPE_INVALID_VERSION);
804
+ goto error;
805
+ }
806
+
807
+ parser->http_major = ch - '0';
808
+ UPDATE_STATE(s_res_http_major);
809
+ break;
810
+
811
+ /* major HTTP version or dot */
812
+ case s_res_http_major:
813
+ {
814
+ if (ch == '.') {
815
+ UPDATE_STATE(s_res_first_http_minor);
816
+ break;
817
+ }
818
+
819
+ if (!IS_NUM(ch)) {
820
+ SET_ERRNO(HPE_INVALID_VERSION);
821
+ goto error;
822
+ }
823
+
824
+ parser->http_major *= 10;
825
+ parser->http_major += ch - '0';
826
+
827
+ if (UNLIKELY(parser->http_major > 999)) {
828
+ SET_ERRNO(HPE_INVALID_VERSION);
829
+ goto error;
830
+ }
831
+
832
+ break;
833
+ }
834
+
835
+ /* first digit of minor HTTP version */
836
+ case s_res_first_http_minor:
837
+ if (UNLIKELY(!IS_NUM(ch))) {
838
+ SET_ERRNO(HPE_INVALID_VERSION);
839
+ goto error;
840
+ }
841
+
842
+ parser->http_minor = ch - '0';
843
+ UPDATE_STATE(s_res_http_minor);
844
+ break;
845
+
846
+ /* minor HTTP version or end of request line */
847
+ case s_res_http_minor:
848
+ {
849
+ if (ch == ' ') {
850
+ UPDATE_STATE(s_res_first_status_code);
851
+ break;
852
+ }
853
+
854
+ if (UNLIKELY(!IS_NUM(ch))) {
855
+ SET_ERRNO(HPE_INVALID_VERSION);
856
+ goto error;
857
+ }
858
+
859
+ parser->http_minor *= 10;
860
+ parser->http_minor += ch - '0';
861
+
862
+ if (UNLIKELY(parser->http_minor > 999)) {
863
+ SET_ERRNO(HPE_INVALID_VERSION);
864
+ goto error;
865
+ }
866
+
867
+ break;
868
+ }
869
+
870
+ case s_res_first_status_code:
871
+ {
872
+ if (!IS_NUM(ch)) {
873
+ if (ch == ' ') {
874
+ break;
875
+ }
876
+
877
+ SET_ERRNO(HPE_INVALID_STATUS);
878
+ goto error;
879
+ }
880
+ parser->status_code = ch - '0';
881
+ UPDATE_STATE(s_res_status_code);
882
+ break;
883
+ }
884
+
885
+ case s_res_status_code:
886
+ {
887
+ if (!IS_NUM(ch)) {
888
+ switch (ch) {
889
+ case ' ':
890
+ UPDATE_STATE(s_res_status_start);
891
+ break;
892
+ case CR:
893
+ UPDATE_STATE(s_res_line_almost_done);
894
+ break;
895
+ case LF:
896
+ UPDATE_STATE(s_header_field_start);
897
+ break;
898
+ default:
899
+ SET_ERRNO(HPE_INVALID_STATUS);
900
+ goto error;
901
+ }
902
+ break;
903
+ }
904
+
905
+ parser->status_code *= 10;
906
+ parser->status_code += ch - '0';
907
+
908
+ if (UNLIKELY(parser->status_code > 999)) {
909
+ SET_ERRNO(HPE_INVALID_STATUS);
910
+ goto error;
911
+ }
912
+
913
+ break;
914
+ }
915
+
916
+ case s_res_status_start:
917
+ {
918
+ if (ch == CR) {
919
+ UPDATE_STATE(s_res_line_almost_done);
920
+ break;
921
+ }
922
+
923
+ if (ch == LF) {
924
+ UPDATE_STATE(s_header_field_start);
925
+ break;
926
+ }
927
+
928
+ MARK(status);
929
+ UPDATE_STATE(s_res_status);
930
+ parser->index = 0;
931
+ break;
932
+ }
933
+
934
+ case s_res_status:
935
+ if (ch == CR) {
936
+ UPDATE_STATE(s_res_line_almost_done);
937
+ CALLBACK_DATA(status);
938
+ break;
939
+ }
940
+
941
+ if (ch == LF) {
942
+ UPDATE_STATE(s_header_field_start);
943
+ CALLBACK_DATA(status);
944
+ break;
945
+ }
946
+
947
+ break;
948
+
949
+ case s_res_line_almost_done:
950
+ STRICT_CHECK(ch != LF);
951
+ UPDATE_STATE(s_header_field_start);
952
+ break;
953
+
954
+ case s_start_req:
955
+ {
956
+ if (ch == CR || ch == LF)
957
+ break;
958
+ parser->flags = 0;
959
+ parser->content_length = ULLONG_MAX;
960
+
961
+ if (UNLIKELY(!IS_ALPHA(ch))) {
962
+ SET_ERRNO(HPE_INVALID_METHOD);
963
+ goto error;
964
+ }
965
+
966
+ parser->method = (enum http_method) 0;
967
+ parser->index = 1;
968
+ switch (ch) {
969
+ case 'A': parser->method = HTTP_ACL; break;
970
+ case 'B': parser->method = HTTP_BIND; break;
971
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
972
+ case 'D': parser->method = HTTP_DELETE; break;
973
+ case 'G': parser->method = HTTP_GET; break;
974
+ case 'H': parser->method = HTTP_HEAD; break;
975
+ case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
976
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
977
+ case 'N': parser->method = HTTP_NOTIFY; break;
978
+ case 'O': parser->method = HTTP_OPTIONS; break;
979
+ case 'P': parser->method = HTTP_POST;
980
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
981
+ break;
982
+ case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
983
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
984
+ case 'T': parser->method = HTTP_TRACE; break;
985
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
986
+ default:
987
+ SET_ERRNO(HPE_INVALID_METHOD);
988
+ goto error;
989
+ }
990
+ UPDATE_STATE(s_req_method);
991
+
992
+ CALLBACK_NOTIFY(message_begin);
993
+
994
+ break;
995
+ }
996
+
997
+ case s_req_method:
998
+ {
999
+ const char *matcher;
1000
+ if (UNLIKELY(ch == '\0')) {
1001
+ SET_ERRNO(HPE_INVALID_METHOD);
1002
+ goto error;
1003
+ }
1004
+
1005
+ matcher = method_strings[parser->method];
1006
+ if (ch == ' ' && matcher[parser->index] == '\0') {
1007
+ UPDATE_STATE(s_req_spaces_before_url);
1008
+ } else if (ch == matcher[parser->index]) {
1009
+ ; /* nada */
1010
+ } else if (IS_ALPHA(ch)) {
1011
+
1012
+ switch (parser->method << 16 | parser->index << 8 | ch) {
1013
+ #define XX(meth, pos, ch, new_meth) \
1014
+ case (HTTP_##meth << 16 | pos << 8 | ch): \
1015
+ parser->method = HTTP_##new_meth; break;
1016
+
1017
+ XX(POST, 1, 'U', PUT)
1018
+ XX(POST, 1, 'A', PATCH)
1019
+ XX(CONNECT, 1, 'H', CHECKOUT)
1020
+ XX(CONNECT, 2, 'P', COPY)
1021
+ XX(MKCOL, 1, 'O', MOVE)
1022
+ XX(MKCOL, 1, 'E', MERGE)
1023
+ XX(MKCOL, 2, 'A', MKACTIVITY)
1024
+ XX(MKCOL, 3, 'A', MKCALENDAR)
1025
+ XX(SUBSCRIBE, 1, 'E', SEARCH)
1026
+ XX(REPORT, 2, 'B', REBIND)
1027
+ XX(POST, 1, 'R', PROPFIND)
1028
+ XX(PROPFIND, 4, 'P', PROPPATCH)
1029
+ XX(PUT, 2, 'R', PURGE)
1030
+ XX(LOCK, 1, 'I', LINK)
1031
+ XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1032
+ XX(UNLOCK, 2, 'B', UNBIND)
1033
+ XX(UNLOCK, 3, 'I', UNLINK)
1034
+ #undef XX
1035
+
1036
+ default:
1037
+ SET_ERRNO(HPE_INVALID_METHOD);
1038
+ goto error;
1039
+ }
1040
+ } else if (ch == '-' &&
1041
+ parser->index == 1 &&
1042
+ parser->method == HTTP_MKCOL) {
1043
+ parser->method = HTTP_MSEARCH;
1044
+ } else {
1045
+ SET_ERRNO(HPE_INVALID_METHOD);
1046
+ goto error;
1047
+ }
1048
+
1049
+ ++parser->index;
1050
+ break;
1051
+ }
1052
+
1053
+ case s_req_spaces_before_url:
1054
+ {
1055
+ if (ch == ' ') break;
1056
+
1057
+ MARK(url);
1058
+ if (parser->method == HTTP_CONNECT) {
1059
+ UPDATE_STATE(s_req_server_start);
1060
+ }
1061
+
1062
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1063
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1064
+ SET_ERRNO(HPE_INVALID_URL);
1065
+ goto error;
1066
+ }
1067
+
1068
+ break;
1069
+ }
1070
+
1071
+ case s_req_schema:
1072
+ case s_req_schema_slash:
1073
+ case s_req_schema_slash_slash:
1074
+ case s_req_server_start:
1075
+ {
1076
+ switch (ch) {
1077
+ /* No whitespace allowed here */
1078
+ case ' ':
1079
+ case CR:
1080
+ case LF:
1081
+ SET_ERRNO(HPE_INVALID_URL);
1082
+ goto error;
1083
+ default:
1084
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1085
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1086
+ SET_ERRNO(HPE_INVALID_URL);
1087
+ goto error;
1088
+ }
1089
+ }
1090
+
1091
+ break;
1092
+ }
1093
+
1094
+ case s_req_server:
1095
+ case s_req_server_with_at:
1096
+ case s_req_path:
1097
+ case s_req_query_string_start:
1098
+ case s_req_query_string:
1099
+ case s_req_fragment_start:
1100
+ case s_req_fragment:
1101
+ {
1102
+ switch (ch) {
1103
+ case ' ':
1104
+ UPDATE_STATE(s_req_http_start);
1105
+ CALLBACK_DATA(url);
1106
+ break;
1107
+ case CR:
1108
+ case LF:
1109
+ parser->http_major = 0;
1110
+ parser->http_minor = 9;
1111
+ UPDATE_STATE((ch == CR) ?
1112
+ s_req_line_almost_done :
1113
+ s_header_field_start);
1114
+ CALLBACK_DATA(url);
1115
+ break;
1116
+ default:
1117
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1118
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1119
+ SET_ERRNO(HPE_INVALID_URL);
1120
+ goto error;
1121
+ }
1122
+ }
1123
+ break;
1124
+ }
1125
+
1126
+ case s_req_http_start:
1127
+ switch (ch) {
1128
+ case 'H':
1129
+ UPDATE_STATE(s_req_http_H);
1130
+ break;
1131
+ case ' ':
1132
+ break;
1133
+ default:
1134
+ SET_ERRNO(HPE_INVALID_CONSTANT);
1135
+ goto error;
1136
+ }
1137
+ break;
1138
+
1139
+ case s_req_http_H:
1140
+ STRICT_CHECK(ch != 'T');
1141
+ UPDATE_STATE(s_req_http_HT);
1142
+ break;
1143
+
1144
+ case s_req_http_HT:
1145
+ STRICT_CHECK(ch != 'T');
1146
+ UPDATE_STATE(s_req_http_HTT);
1147
+ break;
1148
+
1149
+ case s_req_http_HTT:
1150
+ STRICT_CHECK(ch != 'P');
1151
+ UPDATE_STATE(s_req_http_HTTP);
1152
+ break;
1153
+
1154
+ case s_req_http_HTTP:
1155
+ STRICT_CHECK(ch != '/');
1156
+ UPDATE_STATE(s_req_first_http_major);
1157
+ break;
1158
+
1159
+ /* first digit of major HTTP version */
1160
+ case s_req_first_http_major:
1161
+ if (UNLIKELY(ch < '1' || ch > '9')) {
1162
+ SET_ERRNO(HPE_INVALID_VERSION);
1163
+ goto error;
1164
+ }
1165
+
1166
+ parser->http_major = ch - '0';
1167
+ UPDATE_STATE(s_req_http_major);
1168
+ break;
1169
+
1170
+ /* major HTTP version or dot */
1171
+ case s_req_http_major:
1172
+ {
1173
+ if (ch == '.') {
1174
+ UPDATE_STATE(s_req_first_http_minor);
1175
+ break;
1176
+ }
1177
+
1178
+ if (UNLIKELY(!IS_NUM(ch))) {
1179
+ SET_ERRNO(HPE_INVALID_VERSION);
1180
+ goto error;
1181
+ }
1182
+
1183
+ parser->http_major *= 10;
1184
+ parser->http_major += ch - '0';
1185
+
1186
+ if (UNLIKELY(parser->http_major > 999)) {
1187
+ SET_ERRNO(HPE_INVALID_VERSION);
1188
+ goto error;
1189
+ }
1190
+
1191
+ break;
1192
+ }
1193
+
1194
+ /* first digit of minor HTTP version */
1195
+ case s_req_first_http_minor:
1196
+ if (UNLIKELY(!IS_NUM(ch))) {
1197
+ SET_ERRNO(HPE_INVALID_VERSION);
1198
+ goto error;
1199
+ }
1200
+
1201
+ parser->http_minor = ch - '0';
1202
+ UPDATE_STATE(s_req_http_minor);
1203
+ break;
1204
+
1205
+ /* minor HTTP version or end of request line */
1206
+ case s_req_http_minor:
1207
+ {
1208
+ if (ch == CR) {
1209
+ UPDATE_STATE(s_req_line_almost_done);
1210
+ break;
1211
+ }
1212
+
1213
+ if (ch == LF) {
1214
+ UPDATE_STATE(s_header_field_start);
1215
+ break;
1216
+ }
1217
+
1218
+ /* XXX allow spaces after digit? */
1219
+
1220
+ if (UNLIKELY(!IS_NUM(ch))) {
1221
+ SET_ERRNO(HPE_INVALID_VERSION);
1222
+ goto error;
1223
+ }
1224
+
1225
+ parser->http_minor *= 10;
1226
+ parser->http_minor += ch - '0';
1227
+
1228
+ if (UNLIKELY(parser->http_minor > 999)) {
1229
+ SET_ERRNO(HPE_INVALID_VERSION);
1230
+ goto error;
1231
+ }
1232
+
1233
+ break;
1234
+ }
1235
+
1236
+ /* end of request line */
1237
+ case s_req_line_almost_done:
1238
+ {
1239
+ if (UNLIKELY(ch != LF)) {
1240
+ SET_ERRNO(HPE_LF_EXPECTED);
1241
+ goto error;
1242
+ }
1243
+
1244
+ UPDATE_STATE(s_header_field_start);
1245
+ break;
1246
+ }
1247
+
1248
+ case s_header_field_start:
1249
+ {
1250
+ if (ch == CR) {
1251
+ UPDATE_STATE(s_headers_almost_done);
1252
+ break;
1253
+ }
1254
+
1255
+ if (ch == LF) {
1256
+ /* they might be just sending \n instead of \r\n so this would be
1257
+ * the second \n to denote the end of headers*/
1258
+ UPDATE_STATE(s_headers_almost_done);
1259
+ REEXECUTE();
1260
+ }
1261
+
1262
+ c = TOKEN(ch);
1263
+
1264
+ if (UNLIKELY(!c)) {
1265
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1266
+ goto error;
1267
+ }
1268
+
1269
+ MARK(header_field);
1270
+
1271
+ parser->index = 0;
1272
+ UPDATE_STATE(s_header_field);
1273
+
1274
+ switch (c) {
1275
+ case 'c':
1276
+ parser->header_state = h_C;
1277
+ break;
1278
+
1279
+ case 'p':
1280
+ parser->header_state = h_matching_proxy_connection;
1281
+ break;
1282
+
1283
+ case 't':
1284
+ parser->header_state = h_matching_transfer_encoding;
1285
+ break;
1286
+
1287
+ case 'u':
1288
+ parser->header_state = h_matching_upgrade;
1289
+ break;
1290
+
1291
+ default:
1292
+ parser->header_state = h_general;
1293
+ break;
1294
+ }
1295
+ break;
1296
+ }
1297
+
1298
+ case s_header_field:
1299
+ {
1300
+ const char* start = p;
1301
+ for (; p != data + len; p++) {
1302
+ ch = *p;
1303
+ c = TOKEN(ch);
1304
+
1305
+ if (!c)
1306
+ break;
1307
+
1308
+ switch (parser->header_state) {
1309
+ case h_general:
1310
+ break;
1311
+
1312
+ case h_C:
1313
+ parser->index++;
1314
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1315
+ break;
1316
+
1317
+ case h_CO:
1318
+ parser->index++;
1319
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1320
+ break;
1321
+
1322
+ case h_CON:
1323
+ parser->index++;
1324
+ switch (c) {
1325
+ case 'n':
1326
+ parser->header_state = h_matching_connection;
1327
+ break;
1328
+ case 't':
1329
+ parser->header_state = h_matching_content_length;
1330
+ break;
1331
+ default:
1332
+ parser->header_state = h_general;
1333
+ break;
1334
+ }
1335
+ break;
1336
+
1337
+ /* connection */
1338
+
1339
+ case h_matching_connection:
1340
+ parser->index++;
1341
+ if (parser->index > sizeof(CONNECTION)-1
1342
+ || c != CONNECTION[parser->index]) {
1343
+ parser->header_state = h_general;
1344
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1345
+ parser->header_state = h_connection;
1346
+ }
1347
+ break;
1348
+
1349
+ /* proxy-connection */
1350
+
1351
+ case h_matching_proxy_connection:
1352
+ parser->index++;
1353
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1354
+ || c != PROXY_CONNECTION[parser->index]) {
1355
+ parser->header_state = h_general;
1356
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1357
+ parser->header_state = h_connection;
1358
+ }
1359
+ break;
1360
+
1361
+ /* content-length */
1362
+
1363
+ case h_matching_content_length:
1364
+ parser->index++;
1365
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1366
+ || c != CONTENT_LENGTH[parser->index]) {
1367
+ parser->header_state = h_general;
1368
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1369
+ parser->header_state = h_content_length;
1370
+ }
1371
+ break;
1372
+
1373
+ /* transfer-encoding */
1374
+
1375
+ case h_matching_transfer_encoding:
1376
+ parser->index++;
1377
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1378
+ || c != TRANSFER_ENCODING[parser->index]) {
1379
+ parser->header_state = h_general;
1380
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1381
+ parser->header_state = h_transfer_encoding;
1382
+ }
1383
+ break;
1384
+
1385
+ /* upgrade */
1386
+
1387
+ case h_matching_upgrade:
1388
+ parser->index++;
1389
+ if (parser->index > sizeof(UPGRADE)-1
1390
+ || c != UPGRADE[parser->index]) {
1391
+ parser->header_state = h_general;
1392
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1393
+ parser->header_state = h_upgrade;
1394
+ }
1395
+ break;
1396
+
1397
+ case h_connection:
1398
+ case h_content_length:
1399
+ case h_transfer_encoding:
1400
+ case h_upgrade:
1401
+ if (ch != ' ') parser->header_state = h_general;
1402
+ break;
1403
+
1404
+ default:
1405
+ assert(0 && "Unknown header_state");
1406
+ break;
1407
+ }
1408
+ }
1409
+
1410
+ COUNT_HEADER_SIZE(p - start);
1411
+
1412
+ if (p == data + len) {
1413
+ --p;
1414
+ break;
1415
+ }
1416
+
1417
+ if (ch == ':') {
1418
+ UPDATE_STATE(s_header_value_discard_ws);
1419
+ CALLBACK_DATA(header_field);
1420
+ break;
1421
+ }
1422
+
1423
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1424
+ goto error;
1425
+ }
1426
+
1427
+ case s_header_value_discard_ws:
1428
+ if (ch == ' ' || ch == '\t') break;
1429
+
1430
+ if (ch == CR) {
1431
+ UPDATE_STATE(s_header_value_discard_ws_almost_done);
1432
+ break;
1433
+ }
1434
+
1435
+ if (ch == LF) {
1436
+ UPDATE_STATE(s_header_value_discard_lws);
1437
+ break;
1438
+ }
1439
+
1440
+ /* FALLTHROUGH */
1441
+
1442
+ case s_header_value_start:
1443
+ {
1444
+ MARK(header_value);
1445
+
1446
+ UPDATE_STATE(s_header_value);
1447
+ parser->index = 0;
1448
+
1449
+ c = LOWER(ch);
1450
+
1451
+ switch (parser->header_state) {
1452
+ case h_upgrade:
1453
+ parser->flags |= F_UPGRADE;
1454
+ parser->header_state = h_general;
1455
+ break;
1456
+
1457
+ case h_transfer_encoding:
1458
+ /* looking for 'Transfer-Encoding: chunked' */
1459
+ if ('c' == c) {
1460
+ parser->header_state = h_matching_transfer_encoding_chunked;
1461
+ } else {
1462
+ parser->header_state = h_general;
1463
+ }
1464
+ break;
1465
+
1466
+ case h_content_length:
1467
+ if (UNLIKELY(!IS_NUM(ch))) {
1468
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1469
+ goto error;
1470
+ }
1471
+
1472
+ if (parser->flags & F_CONTENTLENGTH) {
1473
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1474
+ goto error;
1475
+ }
1476
+
1477
+ parser->flags |= F_CONTENTLENGTH;
1478
+ parser->content_length = ch - '0';
1479
+ break;
1480
+
1481
+ case h_connection:
1482
+ /* looking for 'Connection: keep-alive' */
1483
+ if (c == 'k') {
1484
+ parser->header_state = h_matching_connection_keep_alive;
1485
+ /* looking for 'Connection: close' */
1486
+ } else if (c == 'c') {
1487
+ parser->header_state = h_matching_connection_close;
1488
+ } else if (c == 'u') {
1489
+ parser->header_state = h_matching_connection_upgrade;
1490
+ } else {
1491
+ parser->header_state = h_matching_connection_token;
1492
+ }
1493
+ break;
1494
+
1495
+ /* Multi-value `Connection` header */
1496
+ case h_matching_connection_token_start:
1497
+ break;
1498
+
1499
+ default:
1500
+ parser->header_state = h_general;
1501
+ break;
1502
+ }
1503
+ break;
1504
+ }
1505
+
1506
+ case s_header_value:
1507
+ {
1508
+ const char* start = p;
1509
+ enum header_states h_state = (enum header_states) parser->header_state;
1510
+ for (; p != data + len; p++) {
1511
+ ch = *p;
1512
+ if (ch == CR) {
1513
+ UPDATE_STATE(s_header_almost_done);
1514
+ parser->header_state = h_state;
1515
+ CALLBACK_DATA(header_value);
1516
+ break;
1517
+ }
1518
+
1519
+ if (ch == LF) {
1520
+ UPDATE_STATE(s_header_almost_done);
1521
+ COUNT_HEADER_SIZE(p - start);
1522
+ parser->header_state = h_state;
1523
+ CALLBACK_DATA_NOADVANCE(header_value);
1524
+ REEXECUTE();
1525
+ }
1526
+
1527
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1528
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1529
+ goto error;
1530
+ }
1531
+
1532
+ c = LOWER(ch);
1533
+
1534
+ switch (h_state) {
1535
+ case h_general:
1536
+ {
1537
+ const char* p_cr;
1538
+ const char* p_lf;
1539
+ size_t limit = data + len - p;
1540
+
1541
+ limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1542
+
1543
+ p_cr = (const char*) memchr(p, CR, limit);
1544
+ p_lf = (const char*) memchr(p, LF, limit);
1545
+ if (p_cr != NULL) {
1546
+ if (p_lf != NULL && p_cr >= p_lf)
1547
+ p = p_lf;
1548
+ else
1549
+ p = p_cr;
1550
+ } else if (UNLIKELY(p_lf != NULL)) {
1551
+ p = p_lf;
1552
+ } else {
1553
+ p = data + len;
1554
+ }
1555
+ --p;
1556
+
1557
+ break;
1558
+ }
1559
+
1560
+ case h_connection:
1561
+ case h_transfer_encoding:
1562
+ assert(0 && "Shouldn't get here.");
1563
+ break;
1564
+
1565
+ case h_content_length:
1566
+ {
1567
+ uint64_t t;
1568
+
1569
+ if (ch == ' ') break;
1570
+
1571
+ if (UNLIKELY(!IS_NUM(ch))) {
1572
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1573
+ parser->header_state = h_state;
1574
+ goto error;
1575
+ }
1576
+
1577
+ t = parser->content_length;
1578
+ t *= 10;
1579
+ t += ch - '0';
1580
+
1581
+ /* Overflow? Test against a conservative limit for simplicity. */
1582
+ if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1583
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1584
+ parser->header_state = h_state;
1585
+ goto error;
1586
+ }
1587
+
1588
+ parser->content_length = t;
1589
+ break;
1590
+ }
1591
+
1592
+ /* Transfer-Encoding: chunked */
1593
+ case h_matching_transfer_encoding_chunked:
1594
+ parser->index++;
1595
+ if (parser->index > sizeof(CHUNKED)-1
1596
+ || c != CHUNKED[parser->index]) {
1597
+ h_state = h_general;
1598
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1599
+ h_state = h_transfer_encoding_chunked;
1600
+ }
1601
+ break;
1602
+
1603
+ case h_matching_connection_token_start:
1604
+ /* looking for 'Connection: keep-alive' */
1605
+ if (c == 'k') {
1606
+ h_state = h_matching_connection_keep_alive;
1607
+ /* looking for 'Connection: close' */
1608
+ } else if (c == 'c') {
1609
+ h_state = h_matching_connection_close;
1610
+ } else if (c == 'u') {
1611
+ h_state = h_matching_connection_upgrade;
1612
+ } else if (STRICT_TOKEN(c)) {
1613
+ h_state = h_matching_connection_token;
1614
+ } else if (c == ' ' || c == '\t') {
1615
+ /* Skip lws */
1616
+ } else {
1617
+ h_state = h_general;
1618
+ }
1619
+ break;
1620
+
1621
+ /* looking for 'Connection: keep-alive' */
1622
+ case h_matching_connection_keep_alive:
1623
+ parser->index++;
1624
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1625
+ || c != KEEP_ALIVE[parser->index]) {
1626
+ h_state = h_matching_connection_token;
1627
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1628
+ h_state = h_connection_keep_alive;
1629
+ }
1630
+ break;
1631
+
1632
+ /* looking for 'Connection: close' */
1633
+ case h_matching_connection_close:
1634
+ parser->index++;
1635
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1636
+ h_state = h_matching_connection_token;
1637
+ } else if (parser->index == sizeof(CLOSE)-2) {
1638
+ h_state = h_connection_close;
1639
+ }
1640
+ break;
1641
+
1642
+ /* looking for 'Connection: upgrade' */
1643
+ case h_matching_connection_upgrade:
1644
+ parser->index++;
1645
+ if (parser->index > sizeof(UPGRADE) - 1 ||
1646
+ c != UPGRADE[parser->index]) {
1647
+ h_state = h_matching_connection_token;
1648
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1649
+ h_state = h_connection_upgrade;
1650
+ }
1651
+ break;
1652
+
1653
+ case h_matching_connection_token:
1654
+ if (ch == ',') {
1655
+ h_state = h_matching_connection_token_start;
1656
+ parser->index = 0;
1657
+ }
1658
+ break;
1659
+
1660
+ case h_transfer_encoding_chunked:
1661
+ if (ch != ' ') h_state = h_general;
1662
+ break;
1663
+
1664
+ case h_connection_keep_alive:
1665
+ case h_connection_close:
1666
+ case h_connection_upgrade:
1667
+ if (ch == ',') {
1668
+ if (h_state == h_connection_keep_alive) {
1669
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1670
+ } else if (h_state == h_connection_close) {
1671
+ parser->flags |= F_CONNECTION_CLOSE;
1672
+ } else if (h_state == h_connection_upgrade) {
1673
+ parser->flags |= F_CONNECTION_UPGRADE;
1674
+ }
1675
+ h_state = h_matching_connection_token_start;
1676
+ parser->index = 0;
1677
+ } else if (ch != ' ') {
1678
+ h_state = h_matching_connection_token;
1679
+ }
1680
+ break;
1681
+
1682
+ default:
1683
+ UPDATE_STATE(s_header_value);
1684
+ h_state = h_general;
1685
+ break;
1686
+ }
1687
+ }
1688
+ parser->header_state = h_state;
1689
+
1690
+ COUNT_HEADER_SIZE(p - start);
1691
+
1692
+ if (p == data + len)
1693
+ --p;
1694
+ break;
1695
+ }
1696
+
1697
+ case s_header_almost_done:
1698
+ {
1699
+ if (UNLIKELY(ch != LF)) {
1700
+ SET_ERRNO(HPE_LF_EXPECTED);
1701
+ goto error;
1702
+ }
1703
+
1704
+ UPDATE_STATE(s_header_value_lws);
1705
+ break;
1706
+ }
1707
+
1708
+ case s_header_value_lws:
1709
+ {
1710
+ if (ch == ' ' || ch == '\t') {
1711
+ UPDATE_STATE(s_header_value_start);
1712
+ REEXECUTE();
1713
+ }
1714
+
1715
+ /* finished the header */
1716
+ switch (parser->header_state) {
1717
+ case h_connection_keep_alive:
1718
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1719
+ break;
1720
+ case h_connection_close:
1721
+ parser->flags |= F_CONNECTION_CLOSE;
1722
+ break;
1723
+ case h_transfer_encoding_chunked:
1724
+ parser->flags |= F_CHUNKED;
1725
+ break;
1726
+ case h_connection_upgrade:
1727
+ parser->flags |= F_CONNECTION_UPGRADE;
1728
+ break;
1729
+ default:
1730
+ break;
1731
+ }
1732
+
1733
+ UPDATE_STATE(s_header_field_start);
1734
+ REEXECUTE();
1735
+ }
1736
+
1737
+ case s_header_value_discard_ws_almost_done:
1738
+ {
1739
+ STRICT_CHECK(ch != LF);
1740
+ UPDATE_STATE(s_header_value_discard_lws);
1741
+ break;
1742
+ }
1743
+
1744
+ case s_header_value_discard_lws:
1745
+ {
1746
+ if (ch == ' ' || ch == '\t') {
1747
+ UPDATE_STATE(s_header_value_discard_ws);
1748
+ break;
1749
+ } else {
1750
+ switch (parser->header_state) {
1751
+ case h_connection_keep_alive:
1752
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1753
+ break;
1754
+ case h_connection_close:
1755
+ parser->flags |= F_CONNECTION_CLOSE;
1756
+ break;
1757
+ case h_connection_upgrade:
1758
+ parser->flags |= F_CONNECTION_UPGRADE;
1759
+ break;
1760
+ case h_transfer_encoding_chunked:
1761
+ parser->flags |= F_CHUNKED;
1762
+ break;
1763
+ default:
1764
+ break;
1765
+ }
1766
+
1767
+ /* header value was empty */
1768
+ MARK(header_value);
1769
+ UPDATE_STATE(s_header_field_start);
1770
+ CALLBACK_DATA_NOADVANCE(header_value);
1771
+ REEXECUTE();
1772
+ }
1773
+ }
1774
+
1775
+ case s_headers_almost_done:
1776
+ {
1777
+ STRICT_CHECK(ch != LF);
1778
+
1779
+ if (parser->flags & F_TRAILING) {
1780
+ /* End of a chunked request */
1781
+ UPDATE_STATE(s_message_done);
1782
+ CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1783
+ REEXECUTE();
1784
+ }
1785
+
1786
+ /* Cannot use chunked encoding and a content-length header together
1787
+ per the HTTP specification. */
1788
+ if ((parser->flags & F_CHUNKED) &&
1789
+ (parser->flags & F_CONTENTLENGTH)) {
1790
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1791
+ goto error;
1792
+ }
1793
+
1794
+ UPDATE_STATE(s_headers_done);
1795
+
1796
+ /* Set this here so that on_headers_complete() callbacks can see it */
1797
+ parser->upgrade =
1798
+ ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
1799
+ (F_UPGRADE | F_CONNECTION_UPGRADE) ||
1800
+ parser->method == HTTP_CONNECT);
1801
+
1802
+ /* Here we call the headers_complete callback. This is somewhat
1803
+ * different than other callbacks because if the user returns 1, we
1804
+ * will interpret that as saying that this message has no body. This
1805
+ * is needed for the annoying case of recieving a response to a HEAD
1806
+ * request.
1807
+ *
1808
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1809
+ * we have to simulate it by handling a change in errno below.
1810
+ */
1811
+ if (settings->on_headers_complete) {
1812
+ switch (settings->on_headers_complete(parser)) {
1813
+ case 0:
1814
+ break;
1815
+
1816
+ case 2:
1817
+ parser->upgrade = 1;
1818
+
1819
+ case 1:
1820
+ parser->flags |= F_SKIPBODY;
1821
+ break;
1822
+
1823
+ default:
1824
+ SET_ERRNO(HPE_CB_headers_complete);
1825
+ RETURN(p - data); /* Error */
1826
+ }
1827
+ }
1828
+
1829
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1830
+ RETURN(p - data);
1831
+ }
1832
+
1833
+ REEXECUTE();
1834
+ }
1835
+
1836
+ case s_headers_done:
1837
+ {
1838
+ int hasBody;
1839
+ STRICT_CHECK(ch != LF);
1840
+
1841
+ parser->nread = 0;
1842
+
1843
+ hasBody = parser->flags & F_CHUNKED ||
1844
+ (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1845
+ if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1846
+ (parser->flags & F_SKIPBODY) || !hasBody)) {
1847
+ /* Exit, the rest of the message is in a different protocol. */
1848
+ UPDATE_STATE(NEW_MESSAGE());
1849
+ CALLBACK_NOTIFY(message_complete);
1850
+ RETURN((p - data) + 1);
1851
+ }
1852
+
1853
+ if (parser->flags & F_SKIPBODY) {
1854
+ UPDATE_STATE(NEW_MESSAGE());
1855
+ CALLBACK_NOTIFY(message_complete);
1856
+ } else if (parser->flags & F_CHUNKED) {
1857
+ /* chunked encoding - ignore Content-Length header */
1858
+ UPDATE_STATE(s_chunk_size_start);
1859
+ } else {
1860
+ if (parser->content_length == 0) {
1861
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1862
+ UPDATE_STATE(NEW_MESSAGE());
1863
+ CALLBACK_NOTIFY(message_complete);
1864
+ } else if (parser->content_length != ULLONG_MAX) {
1865
+ /* Content-Length header given and non-zero */
1866
+ UPDATE_STATE(s_body_identity);
1867
+ } else {
1868
+ if (!http_message_needs_eof(parser)) {
1869
+ /* Assume content-length 0 - read the next */
1870
+ UPDATE_STATE(NEW_MESSAGE());
1871
+ CALLBACK_NOTIFY(message_complete);
1872
+ } else {
1873
+ /* Read body until EOF */
1874
+ UPDATE_STATE(s_body_identity_eof);
1875
+ }
1876
+ }
1877
+ }
1878
+
1879
+ break;
1880
+ }
1881
+
1882
+ case s_body_identity:
1883
+ {
1884
+ uint64_t to_read = MIN(parser->content_length,
1885
+ (uint64_t) ((data + len) - p));
1886
+
1887
+ assert(parser->content_length != 0
1888
+ && parser->content_length != ULLONG_MAX);
1889
+
1890
+ /* The difference between advancing content_length and p is because
1891
+ * the latter will automaticaly advance on the next loop iteration.
1892
+ * Further, if content_length ends up at 0, we want to see the last
1893
+ * byte again for our message complete callback.
1894
+ */
1895
+ MARK(body);
1896
+ parser->content_length -= to_read;
1897
+ p += to_read - 1;
1898
+
1899
+ if (parser->content_length == 0) {
1900
+ UPDATE_STATE(s_message_done);
1901
+
1902
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1903
+ *
1904
+ * The alternative to doing this is to wait for the next byte to
1905
+ * trigger the data callback, just as in every other case. The
1906
+ * problem with this is that this makes it difficult for the test
1907
+ * harness to distinguish between complete-on-EOF and
1908
+ * complete-on-length. It's not clear that this distinction is
1909
+ * important for applications, but let's keep it for now.
1910
+ */
1911
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1912
+ REEXECUTE();
1913
+ }
1914
+
1915
+ break;
1916
+ }
1917
+
1918
+ /* read until EOF */
1919
+ case s_body_identity_eof:
1920
+ MARK(body);
1921
+ p = data + len - 1;
1922
+
1923
+ break;
1924
+
1925
+ case s_message_done:
1926
+ UPDATE_STATE(NEW_MESSAGE());
1927
+ CALLBACK_NOTIFY(message_complete);
1928
+ if (parser->upgrade) {
1929
+ /* Exit, the rest of the message is in a different protocol. */
1930
+ RETURN((p - data) + 1);
1931
+ }
1932
+ break;
1933
+
1934
+ case s_chunk_size_start:
1935
+ {
1936
+ assert(parser->nread == 1);
1937
+ assert(parser->flags & F_CHUNKED);
1938
+
1939
+ unhex_val = unhex[(unsigned char)ch];
1940
+ if (UNLIKELY(unhex_val == -1)) {
1941
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1942
+ goto error;
1943
+ }
1944
+
1945
+ parser->content_length = unhex_val;
1946
+ UPDATE_STATE(s_chunk_size);
1947
+ break;
1948
+ }
1949
+
1950
+ case s_chunk_size:
1951
+ {
1952
+ uint64_t t;
1953
+
1954
+ assert(parser->flags & F_CHUNKED);
1955
+
1956
+ if (ch == CR) {
1957
+ UPDATE_STATE(s_chunk_size_almost_done);
1958
+ break;
1959
+ }
1960
+
1961
+ unhex_val = unhex[(unsigned char)ch];
1962
+
1963
+ if (unhex_val == -1) {
1964
+ if (ch == ';' || ch == ' ') {
1965
+ UPDATE_STATE(s_chunk_parameters);
1966
+ break;
1967
+ }
1968
+
1969
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1970
+ goto error;
1971
+ }
1972
+
1973
+ t = parser->content_length;
1974
+ t *= 16;
1975
+ t += unhex_val;
1976
+
1977
+ /* Overflow? Test against a conservative limit for simplicity. */
1978
+ if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1979
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1980
+ goto error;
1981
+ }
1982
+
1983
+ parser->content_length = t;
1984
+ break;
1985
+ }
1986
+
1987
+ case s_chunk_parameters:
1988
+ {
1989
+ assert(parser->flags & F_CHUNKED);
1990
+ /* just ignore this shit. TODO check for overflow */
1991
+ if (ch == CR) {
1992
+ UPDATE_STATE(s_chunk_size_almost_done);
1993
+ break;
1994
+ }
1995
+ break;
1996
+ }
1997
+
1998
+ case s_chunk_size_almost_done:
1999
+ {
2000
+ assert(parser->flags & F_CHUNKED);
2001
+ STRICT_CHECK(ch != LF);
2002
+
2003
+ parser->nread = 0;
2004
+
2005
+ if (parser->content_length == 0) {
2006
+ parser->flags |= F_TRAILING;
2007
+ UPDATE_STATE(s_header_field_start);
2008
+ } else {
2009
+ UPDATE_STATE(s_chunk_data);
2010
+ }
2011
+ CALLBACK_NOTIFY(chunk_header);
2012
+ break;
2013
+ }
2014
+
2015
+ case s_chunk_data:
2016
+ {
2017
+ uint64_t to_read = MIN(parser->content_length,
2018
+ (uint64_t) ((data + len) - p));
2019
+
2020
+ assert(parser->flags & F_CHUNKED);
2021
+ assert(parser->content_length != 0
2022
+ && parser->content_length != ULLONG_MAX);
2023
+
2024
+ /* See the explanation in s_body_identity for why the content
2025
+ * length and data pointers are managed this way.
2026
+ */
2027
+ MARK(body);
2028
+ parser->content_length -= to_read;
2029
+ p += to_read - 1;
2030
+
2031
+ if (parser->content_length == 0) {
2032
+ UPDATE_STATE(s_chunk_data_almost_done);
2033
+ }
2034
+
2035
+ break;
2036
+ }
2037
+
2038
+ case s_chunk_data_almost_done:
2039
+ assert(parser->flags & F_CHUNKED);
2040
+ assert(parser->content_length == 0);
2041
+ STRICT_CHECK(ch != CR);
2042
+ UPDATE_STATE(s_chunk_data_done);
2043
+ CALLBACK_DATA(body);
2044
+ break;
2045
+
2046
+ case s_chunk_data_done:
2047
+ assert(parser->flags & F_CHUNKED);
2048
+ STRICT_CHECK(ch != LF);
2049
+ parser->nread = 0;
2050
+ UPDATE_STATE(s_chunk_size_start);
2051
+ CALLBACK_NOTIFY(chunk_complete);
2052
+ break;
2053
+
2054
+ default:
2055
+ assert(0 && "unhandled state");
2056
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2057
+ goto error;
2058
+ }
2059
+ }
2060
+
2061
+ /* Run callbacks for any marks that we have leftover after we ran our of
2062
+ * bytes. There should be at most one of these set, so it's OK to invoke
2063
+ * them in series (unset marks will not result in callbacks).
2064
+ *
2065
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
2066
+ * overflowed 'data' and this allows us to correct for the off-by-one that
2067
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2068
+ * value that's in-bounds).
2069
+ */
2070
+
2071
+ assert(((header_field_mark ? 1 : 0) +
2072
+ (header_value_mark ? 1 : 0) +
2073
+ (url_mark ? 1 : 0) +
2074
+ (body_mark ? 1 : 0) +
2075
+ (status_mark ? 1 : 0)) <= 1);
2076
+
2077
+ CALLBACK_DATA_NOADVANCE(header_field);
2078
+ CALLBACK_DATA_NOADVANCE(header_value);
2079
+ CALLBACK_DATA_NOADVANCE(url);
2080
+ CALLBACK_DATA_NOADVANCE(body);
2081
+ CALLBACK_DATA_NOADVANCE(status);
2082
+
2083
+ RETURN(len);
2084
+
2085
+ error:
2086
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2087
+ SET_ERRNO(HPE_UNKNOWN);
2088
+ }
2089
+
2090
+ RETURN(p - data);
2091
+ }
2092
+
2093
+
2094
+ /* Does the parser need to see an EOF to find the end of the message? */
2095
+ int
2096
+ http_message_needs_eof (const http_parser *parser)
2097
+ {
2098
+ if (parser->type == HTTP_REQUEST) {
2099
+ return 0;
2100
+ }
2101
+
2102
+ /* See RFC 2616 section 4.4 */
2103
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2104
+ parser->status_code == 204 || /* No Content */
2105
+ parser->status_code == 304 || /* Not Modified */
2106
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2107
+ return 0;
2108
+ }
2109
+
2110
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2111
+ return 0;
2112
+ }
2113
+
2114
+ return 1;
2115
+ }
2116
+
2117
+
2118
+ int
2119
+ http_should_keep_alive (const http_parser *parser)
2120
+ {
2121
+ if (parser->http_major > 0 && parser->http_minor > 0) {
2122
+ /* HTTP/1.1 */
2123
+ if (parser->flags & F_CONNECTION_CLOSE) {
2124
+ return 0;
2125
+ }
2126
+ } else {
2127
+ /* HTTP/1.0 or earlier */
2128
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2129
+ return 0;
2130
+ }
2131
+ }
2132
+
2133
+ return !http_message_needs_eof(parser);
2134
+ }
2135
+
2136
+
2137
+ const char *
2138
+ http_method_str (enum http_method m)
2139
+ {
2140
+ return ELEM_AT(method_strings, m, "<unknown>");
2141
+ }
2142
+
2143
+
2144
+ void
2145
+ http_parser_init (http_parser *parser, enum http_parser_type t)
2146
+ {
2147
+ void *data = parser->data; /* preserve application data */
2148
+ memset(parser, 0, sizeof(*parser));
2149
+ parser->data = data;
2150
+ parser->type = t;
2151
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2152
+ parser->http_errno = HPE_OK;
2153
+ }
2154
+
2155
+ void
2156
+ http_parser_settings_init(http_parser_settings *settings)
2157
+ {
2158
+ memset(settings, 0, sizeof(*settings));
2159
+ }
2160
+
2161
+ const char *
2162
+ http_errno_name(enum http_errno err) {
2163
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2164
+ return http_strerror_tab[err].name;
2165
+ }
2166
+
2167
+ const char *
2168
+ http_errno_description(enum http_errno err) {
2169
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2170
+ return http_strerror_tab[err].description;
2171
+ }
2172
+
2173
+ static enum http_host_state
2174
+ http_parse_host_char(enum http_host_state s, const char ch) {
2175
+ switch(s) {
2176
+ case s_http_userinfo:
2177
+ case s_http_userinfo_start:
2178
+ if (ch == '@') {
2179
+ return s_http_host_start;
2180
+ }
2181
+
2182
+ if (IS_USERINFO_CHAR(ch)) {
2183
+ return s_http_userinfo;
2184
+ }
2185
+ break;
2186
+
2187
+ case s_http_host_start:
2188
+ if (ch == '[') {
2189
+ return s_http_host_v6_start;
2190
+ }
2191
+
2192
+ if (IS_HOST_CHAR(ch)) {
2193
+ return s_http_host;
2194
+ }
2195
+
2196
+ break;
2197
+
2198
+ case s_http_host:
2199
+ if (IS_HOST_CHAR(ch)) {
2200
+ return s_http_host;
2201
+ }
2202
+
2203
+ /* FALLTHROUGH */
2204
+ case s_http_host_v6_end:
2205
+ if (ch == ':') {
2206
+ return s_http_host_port_start;
2207
+ }
2208
+
2209
+ break;
2210
+
2211
+ case s_http_host_v6:
2212
+ if (ch == ']') {
2213
+ return s_http_host_v6_end;
2214
+ }
2215
+
2216
+ /* FALLTHROUGH */
2217
+ case s_http_host_v6_start:
2218
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
2219
+ return s_http_host_v6;
2220
+ }
2221
+
2222
+ if (s == s_http_host_v6 && ch == '%') {
2223
+ return s_http_host_v6_zone_start;
2224
+ }
2225
+ break;
2226
+
2227
+ case s_http_host_v6_zone:
2228
+ if (ch == ']') {
2229
+ return s_http_host_v6_end;
2230
+ }
2231
+
2232
+ /* FALLTHROUGH */
2233
+ case s_http_host_v6_zone_start:
2234
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2235
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2236
+ ch == '~') {
2237
+ return s_http_host_v6_zone;
2238
+ }
2239
+ break;
2240
+
2241
+ case s_http_host_port:
2242
+ case s_http_host_port_start:
2243
+ if (IS_NUM(ch)) {
2244
+ return s_http_host_port;
2245
+ }
2246
+
2247
+ break;
2248
+
2249
+ default:
2250
+ break;
2251
+ }
2252
+ return s_http_host_dead;
2253
+ }
2254
+
2255
+ static int
2256
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2257
+ enum http_host_state s;
2258
+
2259
+ const char *p;
2260
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2261
+
2262
+ assert(u->field_set & (1 << UF_HOST));
2263
+
2264
+ u->field_data[UF_HOST].len = 0;
2265
+
2266
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2267
+
2268
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2269
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2270
+
2271
+ if (new_s == s_http_host_dead) {
2272
+ return 1;
2273
+ }
2274
+
2275
+ switch(new_s) {
2276
+ case s_http_host:
2277
+ if (s != s_http_host) {
2278
+ u->field_data[UF_HOST].off = p - buf;
2279
+ }
2280
+ u->field_data[UF_HOST].len++;
2281
+ break;
2282
+
2283
+ case s_http_host_v6:
2284
+ if (s != s_http_host_v6) {
2285
+ u->field_data[UF_HOST].off = p - buf;
2286
+ }
2287
+ u->field_data[UF_HOST].len++;
2288
+ break;
2289
+
2290
+ case s_http_host_v6_zone_start:
2291
+ case s_http_host_v6_zone:
2292
+ u->field_data[UF_HOST].len++;
2293
+ break;
2294
+
2295
+ case s_http_host_port:
2296
+ if (s != s_http_host_port) {
2297
+ u->field_data[UF_PORT].off = p - buf;
2298
+ u->field_data[UF_PORT].len = 0;
2299
+ u->field_set |= (1 << UF_PORT);
2300
+ }
2301
+ u->field_data[UF_PORT].len++;
2302
+ break;
2303
+
2304
+ case s_http_userinfo:
2305
+ if (s != s_http_userinfo) {
2306
+ u->field_data[UF_USERINFO].off = p - buf ;
2307
+ u->field_data[UF_USERINFO].len = 0;
2308
+ u->field_set |= (1 << UF_USERINFO);
2309
+ }
2310
+ u->field_data[UF_USERINFO].len++;
2311
+ break;
2312
+
2313
+ default:
2314
+ break;
2315
+ }
2316
+ s = new_s;
2317
+ }
2318
+
2319
+ /* Make sure we don't end somewhere unexpected */
2320
+ switch (s) {
2321
+ case s_http_host_start:
2322
+ case s_http_host_v6_start:
2323
+ case s_http_host_v6:
2324
+ case s_http_host_v6_zone_start:
2325
+ case s_http_host_v6_zone:
2326
+ case s_http_host_port_start:
2327
+ case s_http_userinfo:
2328
+ case s_http_userinfo_start:
2329
+ return 1;
2330
+ default:
2331
+ break;
2332
+ }
2333
+
2334
+ return 0;
2335
+ }
2336
+
2337
+ void
2338
+ http_parser_url_init(struct http_parser_url *u) {
2339
+ memset(u, 0, sizeof(*u));
2340
+ }
2341
+
2342
+ int
2343
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2344
+ struct http_parser_url *u)
2345
+ {
2346
+ enum state s;
2347
+ const char *p;
2348
+ enum http_parser_url_fields uf, old_uf;
2349
+ int found_at = 0;
2350
+
2351
+ u->port = u->field_set = 0;
2352
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2353
+ old_uf = UF_MAX;
2354
+
2355
+ for (p = buf; p < buf + buflen; p++) {
2356
+ s = parse_url_char(s, *p);
2357
+
2358
+ /* Figure out the next field that we're operating on */
2359
+ switch (s) {
2360
+ case s_dead:
2361
+ return 1;
2362
+
2363
+ /* Skip delimeters */
2364
+ case s_req_schema_slash:
2365
+ case s_req_schema_slash_slash:
2366
+ case s_req_server_start:
2367
+ case s_req_query_string_start:
2368
+ case s_req_fragment_start:
2369
+ continue;
2370
+
2371
+ case s_req_schema:
2372
+ uf = UF_SCHEMA;
2373
+ break;
2374
+
2375
+ case s_req_server_with_at:
2376
+ found_at = 1;
2377
+
2378
+ /* FALLTROUGH */
2379
+ case s_req_server:
2380
+ uf = UF_HOST;
2381
+ break;
2382
+
2383
+ case s_req_path:
2384
+ uf = UF_PATH;
2385
+ break;
2386
+
2387
+ case s_req_query_string:
2388
+ uf = UF_QUERY;
2389
+ break;
2390
+
2391
+ case s_req_fragment:
2392
+ uf = UF_FRAGMENT;
2393
+ break;
2394
+
2395
+ default:
2396
+ assert(!"Unexpected state");
2397
+ return 1;
2398
+ }
2399
+
2400
+ /* Nothing's changed; soldier on */
2401
+ if (uf == old_uf) {
2402
+ u->field_data[uf].len++;
2403
+ continue;
2404
+ }
2405
+
2406
+ u->field_data[uf].off = p - buf;
2407
+ u->field_data[uf].len = 1;
2408
+
2409
+ u->field_set |= (1 << uf);
2410
+ old_uf = uf;
2411
+ }
2412
+
2413
+ /* host must be present if there is a schema */
2414
+ /* parsing http:///toto will fail */
2415
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
2416
+ (u->field_set & (1 << UF_HOST)) == 0) {
2417
+ return 1;
2418
+ }
2419
+
2420
+ if (u->field_set & (1 << UF_HOST)) {
2421
+ if (http_parse_host(buf, u, found_at) != 0) {
2422
+ return 1;
2423
+ }
2424
+ }
2425
+
2426
+ /* CONNECT requests can only contain "hostname:port" */
2427
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2428
+ return 1;
2429
+ }
2430
+
2431
+ if (u->field_set & (1 << UF_PORT)) {
2432
+ /* Don't bother with endp; we've already validated the string */
2433
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2434
+
2435
+ /* Ports have a max value of 2^16 */
2436
+ if (v > 0xffff) {
2437
+ return 1;
2438
+ }
2439
+
2440
+ u->port = (uint16_t) v;
2441
+ }
2442
+
2443
+ return 0;
2444
+ }
2445
+
2446
+ void
2447
+ http_parser_pause(http_parser *parser, int paused) {
2448
+ /* Users should only be pausing/unpausing a parser that is not in an error
2449
+ * state. In non-debug builds, there's not much that we can do about this
2450
+ * other than ignore it.
2451
+ */
2452
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2453
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2454
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2455
+ } else {
2456
+ assert(0 && "Attempting to pause parser in error state");
2457
+ }
2458
+ }
2459
+
2460
+ int
2461
+ http_body_is_final(const struct http_parser *parser) {
2462
+ return parser->state == s_message_done;
2463
+ }
2464
+
2465
+ unsigned long
2466
+ http_parser_version(void) {
2467
+ return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2468
+ HTTP_PARSER_VERSION_MINOR * 0x00100 |
2469
+ HTTP_PARSER_VERSION_PATCH * 0x00001;
2470
+ }