http-parser 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c7c15574adec9aefd70f9ad96c542b70ce3b3727
4
- data.tar.gz: ebcfbcfbe13fa5e1155e4c0a41f242ea96b1e8a7
3
+ metadata.gz: 8c93598e31fa92f5acb831a75e7514f21180af80
4
+ data.tar.gz: 935ef47585bf17f3a1b4a107d07774c60e300f6b
5
5
  SHA512:
6
- metadata.gz: 0e3a987b169359afe6229340fb4c28e74c411beb302f767bb45f836cc8579598c1d9b89286e9aeb4bf3c114d4fed9b87b5952510c36da676b063e4a60007ee4c
7
- data.tar.gz: 6a531fd9df9c4f011a7163eb1e87dc260fa0a9ee6c201c9e8156f7b86db0b636f579ca3e094535d16a1bc0541c909f276026811de2ee9f5176177c17cb19f05d
6
+ metadata.gz: 271c6b54982d6854b00873a2d217dba0551adc69a920fb4748a6f22d9d43573d1cb8f96e3b7b06942db510ac4d27906ae0e9cde60a5ee78f08f47524fca83643
7
+ data.tar.gz: 159035cb10bc1b947b29cdb3ee27f6e1ae1570e7cf4df42544c1cbf7a81065a2e32e7390deae2d686d491bc6b7301e216446c97f43f69f711f7de61bb1afddd1
data/LICENSE CHANGED
@@ -1,20 +1,20 @@
1
- The MIT License (MIT)
2
-
3
- Copyright (c) 2013 CoTag Media
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy of
6
- this software and associated documentation files (the "Software"), to deal in
7
- the Software without restriction, including without limitation the rights to
8
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
- the Software, and to permit persons to whom the Software is furnished to do so,
10
- subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
- FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
- COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
- IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 CoTag Media
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -1,70 +1,70 @@
1
- # http-parser
2
-
3
- Ruby FFI bindings to [http-parser](https://github.com/joyent/http-parser) [![Build Status](https://travis-ci.org/cotag/http-parser.png)](https://travis-ci.org/cotag/http-parser)
4
-
5
- ## Install
6
-
7
- ```shell
8
- gem install http-parser
9
- ```
10
- This gem will compile a local copy of http-parser
11
-
12
-
13
- ## Usage
14
-
15
- ```ruby
16
- require 'rubygems'
17
- require 'http-parser'
18
-
19
- #
20
- # Create a shared parser
21
- #
22
- parser = HttpParser::Parser.new do |parser|
23
- parser.on_message_begin do |inst|
24
- puts "message begin"
25
- end
26
-
27
- parser.on_message_complete do |inst|
28
- puts "message end"
29
- end
30
-
31
- parser.on_url do |inst, data|
32
- puts "url: #{data}"
33
- end
34
-
35
- parser.on_header_field do |inst, data|
36
- puts "field: #{data}"
37
- end
38
-
39
- parser.on_header_value do |inst, data|
40
- puts "value: #{data}"
41
- end
42
- end
43
-
44
- #
45
- # Create state objects to track requests through the parser
46
- #
47
- request = HttpParser::Parser.new_instance do |inst|
48
- inst.type = :request
49
- end
50
-
51
- #
52
- # Parse requests
53
- #
54
- parser.parse request, "GET /foo HTTP/1.1\r\n"
55
- sleep 3
56
- parser.parse request, "Host: example.com\r\n"
57
- sleep 3
58
- parser.parse request, "\r\n"
59
-
60
- #
61
- # Re-use the memory for another request
62
- #
63
- request.reset!
64
- ```
65
-
66
- ## Acknowledgements
67
-
68
- * https://github.com/joyent/http-parser#readme
69
- * https://github.com/postmodern/ffi-http-parser#readme
1
+ # http-parser
2
+
3
+ Ruby FFI bindings to [http-parser](https://github.com/joyent/http-parser) [![Build Status](https://travis-ci.org/cotag/http-parser.png)](https://travis-ci.org/cotag/http-parser)
4
+
5
+ ## Install
6
+
7
+ ```shell
8
+ gem install http-parser
9
+ ```
10
+ This gem will compile a local copy of http-parser
11
+
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ require 'rubygems'
17
+ require 'http-parser'
18
+
19
+ #
20
+ # Create a shared parser
21
+ #
22
+ parser = HttpParser::Parser.new do |parser|
23
+ parser.on_message_begin do |inst|
24
+ puts "message begin"
25
+ end
26
+
27
+ parser.on_message_complete do |inst|
28
+ puts "message end"
29
+ end
30
+
31
+ parser.on_url do |inst, data|
32
+ puts "url: #{data}"
33
+ end
34
+
35
+ parser.on_header_field do |inst, data|
36
+ puts "field: #{data}"
37
+ end
38
+
39
+ parser.on_header_value do |inst, data|
40
+ puts "value: #{data}"
41
+ end
42
+ end
43
+
44
+ #
45
+ # Create state objects to track requests through the parser
46
+ #
47
+ request = HttpParser::Parser.new_instance do |inst|
48
+ inst.type = :request
49
+ end
50
+
51
+ #
52
+ # Parse requests
53
+ #
54
+ parser.parse request, "GET /foo HTTP/1.1\r\n"
55
+ sleep 3
56
+ parser.parse request, "Host: example.com\r\n"
57
+ sleep 3
58
+ parser.parse request, "\r\n"
59
+
60
+ #
61
+ # Re-use the memory for another request
62
+ #
63
+ request.reset!
64
+ ```
65
+
66
+ ## Acknowledgements
67
+
68
+ * https://github.com/joyent/http-parser#readme
69
+ * https://github.com/postmodern/ffi-http-parser#readme
70
70
  * https://github.com/deepfryed/http-parser-lite#readme
data/Rakefile CHANGED
@@ -1,19 +1,19 @@
1
- require 'rubygems'
2
- require 'rake'
3
- require 'rspec/core/rake_task'
4
-
5
- task :default => [:compile, :test]
6
-
7
- task :compile do
8
- protect = ['http_parser.c', 'http_parser.h']
9
- Dir["ext/http-parser/**/*"].each do |file|
10
- begin
11
- next if protect.include? File.basename(file)
12
- FileUtils.rm file
13
- rescue
14
- end
15
- end
16
- system 'cd ext && rake'
17
- end
18
-
19
- RSpec::Core::RakeTask.new(:test)
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rspec/core/rake_task'
4
+
5
+ task :default => [:compile, :test]
6
+
7
+ task :compile do
8
+ protect = ['http_parser.c', 'http_parser.h']
9
+ Dir["ext/http-parser/**/*"].each do |file|
10
+ begin
11
+ next if protect.include? File.basename(file)
12
+ FileUtils.rm file
13
+ rescue
14
+ end
15
+ end
16
+ system 'cd ext && rake'
17
+ end
18
+
19
+ RSpec::Core::RakeTask.new(:test)
@@ -1,8 +1,8 @@
1
- require 'ffi-compiler/compile_task'
2
-
3
- FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
- t.cflags << "-Wall -Wextra -O3"
5
- t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
- t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
- t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
8
- end
1
+ require 'ffi-compiler/compile_task'
2
+
3
+ FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
+ t.cflags << "-Wall -Wextra -O3"
5
+ t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
+ t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
+ t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
8
+ end
@@ -1,2234 +1,2234 @@
1
- /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
- *
3
- * Additional changes are licensed under the same terms as NGINX and
4
- * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
- *
6
- * Permission is hereby granted, free of charge, to any person obtaining a copy
7
- * of this software and associated documentation files (the "Software"), to
8
- * deal in the Software without restriction, including without limitation the
9
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
- * sell copies of the Software, and to permit persons to whom the Software is
11
- * furnished to do so, subject to the following conditions:
12
- *
13
- * The above copyright notice and this permission notice shall be included in
14
- * all copies or substantial portions of the Software.
15
- *
16
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
- * IN THE SOFTWARE.
23
- */
24
- #include "http_parser.h"
25
- #include <assert.h>
26
- #include <stddef.h>
27
- #include <ctype.h>
28
- #include <stdlib.h>
29
- #include <string.h>
30
- #include <limits.h>
31
-
32
- #ifndef ULLONG_MAX
33
- # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
- #endif
35
-
36
- #ifndef MIN
37
- # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
- #endif
39
-
40
- #ifndef ARRAY_SIZE
41
- # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
- #endif
43
-
44
- #ifndef BIT_AT
45
- # define BIT_AT(a, i) \
46
- (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
- (1 << ((unsigned int) (i) & 7))))
48
- #endif
49
-
50
- #ifndef ELEM_AT
51
- # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
- #endif
53
-
54
- #define SET_ERRNO(e) \
55
- do { \
56
- parser->http_errno = (e); \
57
- } while(0)
58
-
59
-
60
- /* Run the notify callback FOR, returning ER if it fails */
61
- #define CALLBACK_NOTIFY_(FOR, ER) \
62
- do { \
63
- assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
- \
65
- if (settings->on_##FOR) { \
66
- if (0 != settings->on_##FOR(parser)) { \
67
- SET_ERRNO(HPE_CB_##FOR); \
68
- } \
69
- \
70
- /* We either errored above or got paused; get out */ \
71
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
72
- return (ER); \
73
- } \
74
- } \
75
- } while (0)
76
-
77
- /* Run the notify callback FOR and consume the current byte */
78
- #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
79
-
80
- /* Run the notify callback FOR and don't consume the current byte */
81
- #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
82
-
83
- /* Run data callback FOR with LEN bytes, returning ER if it fails */
84
- #define CALLBACK_DATA_(FOR, LEN, ER) \
85
- do { \
86
- assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
- \
88
- if (FOR##_mark) { \
89
- if (settings->on_##FOR) { \
90
- if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
91
- SET_ERRNO(HPE_CB_##FOR); \
92
- } \
93
- \
94
- /* We either errored above or got paused; get out */ \
95
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
96
- return (ER); \
97
- } \
98
- } \
99
- FOR##_mark = NULL; \
100
- } \
101
- } while (0)
102
-
103
- /* Run the data callback FOR and consume the current byte */
104
- #define CALLBACK_DATA(FOR) \
105
- CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
106
-
107
- /* Run the data callback FOR and don't consume the current byte */
108
- #define CALLBACK_DATA_NOADVANCE(FOR) \
109
- CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
110
-
111
- /* Set the mark FOR; non-destructive if mark is already set */
112
- #define MARK(FOR) \
113
- do { \
114
- if (!FOR##_mark) { \
115
- FOR##_mark = p; \
116
- } \
117
- } while (0)
118
-
119
-
120
- #define PROXY_CONNECTION "proxy-connection"
121
- #define CONNECTION "connection"
122
- #define CONTENT_LENGTH "content-length"
123
- #define TRANSFER_ENCODING "transfer-encoding"
124
- #define UPGRADE "upgrade"
125
- #define CHUNKED "chunked"
126
- #define KEEP_ALIVE "keep-alive"
127
- #define CLOSE "close"
128
-
129
-
130
- static const char *method_strings[] =
131
- {
132
- #define XX(num, name, string) #string,
133
- HTTP_METHOD_MAP(XX)
134
- #undef XX
135
- };
136
-
137
-
138
- /* Tokens as defined by rfc 2616. Also lowercases them.
139
- * token = 1*<any CHAR except CTLs or separators>
140
- * separators = "(" | ")" | "<" | ">" | "@"
141
- * | "," | ";" | ":" | "\" | <">
142
- * | "/" | "[" | "]" | "?" | "="
143
- * | "{" | "}" | SP | HT
144
- */
145
- static const char tokens[256] = {
146
- /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
147
- 0, 0, 0, 0, 0, 0, 0, 0,
148
- /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
149
- 0, 0, 0, 0, 0, 0, 0, 0,
150
- /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
151
- 0, 0, 0, 0, 0, 0, 0, 0,
152
- /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153
- 0, 0, 0, 0, 0, 0, 0, 0,
154
- /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155
- 0, '!', 0, '#', '$', '%', '&', '\'',
156
- /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157
- 0, 0, '*', '+', 0, '-', '.', 0,
158
- /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
159
- '0', '1', '2', '3', '4', '5', '6', '7',
160
- /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
161
- '8', '9', 0, 0, 0, 0, 0, 0,
162
- /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
163
- 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
164
- /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
165
- 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
166
- /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
167
- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
168
- /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
169
- 'x', 'y', 'z', 0, 0, 0, '^', '_',
170
- /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
171
- '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
172
- /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
173
- 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
174
- /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
175
- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
176
- /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
177
- 'x', 'y', 'z', 0, '|', 0, '~', 0 };
178
-
179
-
180
- static const int8_t unhex[256] =
181
- {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
182
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
183
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184
- , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
185
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
186
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
187
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
188
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
189
- };
190
-
191
-
192
- #if HTTP_PARSER_STRICT
193
- # define T(v) 0
194
- #else
195
- # define T(v) v
196
- #endif
197
-
198
-
199
- static const uint8_t normal_url_char[32] = {
200
- /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
201
- 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
202
- /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
203
- 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
204
- /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
205
- 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
206
- /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
207
- 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
208
- /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
209
- 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
210
- /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
211
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
212
- /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
213
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
214
- /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
215
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
216
- /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
217
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
218
- /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
219
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
220
- /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
221
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
222
- /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
223
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
224
- /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
225
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
226
- /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
227
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
228
- /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
229
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
230
- /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
231
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
232
-
233
- #undef T
234
-
235
- enum state
236
- { s_dead = 1 /* important that this is > 0 */
237
-
238
- , s_start_req_or_res
239
- , s_res_or_resp_H
240
- , s_start_res
241
- , s_res_H
242
- , s_res_HT
243
- , s_res_HTT
244
- , s_res_HTTP
245
- , s_res_first_http_major
246
- , s_res_http_major
247
- , s_res_first_http_minor
248
- , s_res_http_minor
249
- , s_res_first_status_code
250
- , s_res_status_code
251
- , s_res_status_start
252
- , s_res_status
253
- , s_res_line_almost_done
254
-
255
- , s_start_req
256
-
257
- , s_req_method
258
- , s_req_spaces_before_url
259
- , s_req_schema
260
- , s_req_schema_slash
261
- , s_req_schema_slash_slash
262
- , s_req_server_start
263
- , s_req_server
264
- , s_req_server_with_at
265
- , s_req_path
266
- , s_req_query_string_start
267
- , s_req_query_string
268
- , s_req_fragment_start
269
- , s_req_fragment
270
- , s_req_http_start
271
- , s_req_http_H
272
- , s_req_http_HT
273
- , s_req_http_HTT
274
- , s_req_http_HTTP
275
- , s_req_first_http_major
276
- , s_req_http_major
277
- , s_req_first_http_minor
278
- , s_req_http_minor
279
- , s_req_line_almost_done
280
-
281
- , s_header_field_start
282
- , s_header_field
283
- , s_header_value_start
284
- , s_header_value
285
- , s_header_value_lws
286
-
287
- , s_header_almost_done
288
-
289
- , s_chunk_size_start
290
- , s_chunk_size
291
- , s_chunk_parameters
292
- , s_chunk_size_almost_done
293
-
294
- , s_headers_almost_done
295
- , s_headers_done
296
-
297
- /* Important: 's_headers_done' must be the last 'header' state. All
298
- * states beyond this must be 'body' states. It is used for overflow
299
- * checking. See the PARSING_HEADER() macro.
300
- */
301
-
302
- , s_chunk_data
303
- , s_chunk_data_almost_done
304
- , s_chunk_data_done
305
-
306
- , s_body_identity
307
- , s_body_identity_eof
308
-
309
- , s_message_done
310
- };
311
-
312
-
313
- #define PARSING_HEADER(state) (state <= s_headers_done)
314
-
315
-
316
- enum header_states
317
- { h_general = 0
318
- , h_C
319
- , h_CO
320
- , h_CON
321
-
322
- , h_matching_connection
323
- , h_matching_proxy_connection
324
- , h_matching_content_length
325
- , h_matching_transfer_encoding
326
- , h_matching_upgrade
327
-
328
- , h_connection
329
- , h_content_length
330
- , h_transfer_encoding
331
- , h_upgrade
332
-
333
- , h_matching_transfer_encoding_chunked
334
- , h_matching_connection_keep_alive
335
- , h_matching_connection_close
336
-
337
- , h_transfer_encoding_chunked
338
- , h_connection_keep_alive
339
- , h_connection_close
340
- };
341
-
342
- enum http_host_state
343
- {
344
- s_http_host_dead = 1
345
- , s_http_userinfo_start
346
- , s_http_userinfo
347
- , s_http_host_start
348
- , s_http_host_v6_start
349
- , s_http_host
350
- , s_http_host_v6
351
- , s_http_host_v6_end
352
- , s_http_host_port_start
353
- , s_http_host_port
354
- };
355
-
356
- /* Macros for character classes; depends on strict-mode */
357
- #define CR '\r'
358
- #define LF '\n'
359
- #define LOWER(c) (unsigned char)(c | 0x20)
360
- #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
361
- #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
362
- #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
363
- #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
364
- #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
365
- (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
366
- (c) == ')')
367
- #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
368
- (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
369
- (c) == '$' || (c) == ',')
370
-
371
- #if HTTP_PARSER_STRICT
372
- #define TOKEN(c) (tokens[(unsigned char)c])
373
- #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
374
- #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
375
- #else
376
- #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
377
- #define IS_URL_CHAR(c) \
378
- (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
379
- #define IS_HOST_CHAR(c) \
380
- (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
381
- #endif
382
-
383
-
384
- #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
385
-
386
-
387
- #if HTTP_PARSER_STRICT
388
- # define STRICT_CHECK(cond) \
389
- do { \
390
- if (cond) { \
391
- SET_ERRNO(HPE_STRICT); \
392
- goto error; \
393
- } \
394
- } while (0)
395
- # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
396
- #else
397
- # define STRICT_CHECK(cond)
398
- # define NEW_MESSAGE() start_state
399
- #endif
400
-
401
-
402
- /* Map errno values to strings for human-readable output */
403
- #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
404
- static struct {
405
- const char *name;
406
- const char *description;
407
- } http_strerror_tab[] = {
408
- HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
409
- };
410
- #undef HTTP_STRERROR_GEN
411
-
412
- int http_message_needs_eof(const http_parser *parser);
413
-
414
- /* Our URL parser.
415
- *
416
- * This is designed to be shared by http_parser_execute() for URL validation,
417
- * hence it has a state transition + byte-for-byte interface. In addition, it
418
- * is meant to be embedded in http_parser_parse_url(), which does the dirty
419
- * work of turning state transitions URL components for its API.
420
- *
421
- * This function should only be invoked with non-space characters. It is
422
- * assumed that the caller cares about (and can detect) the transition between
423
- * URL and non-URL states by looking for these.
424
- */
425
- static enum state
426
- parse_url_char(enum state s, const char ch)
427
- {
428
- if (ch == ' ' || ch == '\r' || ch == '\n') {
429
- return s_dead;
430
- }
431
-
432
- #if HTTP_PARSER_STRICT
433
- if (ch == '\t' || ch == '\f') {
434
- return s_dead;
435
- }
436
- #endif
437
-
438
- switch (s) {
439
- case s_req_spaces_before_url:
440
- /* Proxied requests are followed by scheme of an absolute URI (alpha).
441
- * All methods except CONNECT are followed by '/' or '*'.
442
- */
443
-
444
- if (ch == '/' || ch == '*') {
445
- return s_req_path;
446
- }
447
-
448
- if (IS_ALPHA(ch)) {
449
- return s_req_schema;
450
- }
451
-
452
- break;
453
-
454
- case s_req_schema:
455
- if (IS_ALPHA(ch)) {
456
- return s;
457
- }
458
-
459
- if (ch == ':') {
460
- return s_req_schema_slash;
461
- }
462
-
463
- break;
464
-
465
- case s_req_schema_slash:
466
- if (ch == '/') {
467
- return s_req_schema_slash_slash;
468
- }
469
-
470
- break;
471
-
472
- case s_req_schema_slash_slash:
473
- if (ch == '/') {
474
- return s_req_server_start;
475
- }
476
-
477
- break;
478
-
479
- case s_req_server_with_at:
480
- if (ch == '@') {
481
- return s_dead;
482
- }
483
-
484
- /* FALLTHROUGH */
485
- case s_req_server_start:
486
- case s_req_server:
487
- if (ch == '/') {
488
- return s_req_path;
489
- }
490
-
491
- if (ch == '?') {
492
- return s_req_query_string_start;
493
- }
494
-
495
- if (ch == '@') {
496
- return s_req_server_with_at;
497
- }
498
-
499
- if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
500
- return s_req_server;
501
- }
502
-
503
- break;
504
-
505
- case s_req_path:
506
- if (IS_URL_CHAR(ch)) {
507
- return s;
508
- }
509
-
510
- switch (ch) {
511
- case '?':
512
- return s_req_query_string_start;
513
-
514
- case '#':
515
- return s_req_fragment_start;
516
- }
517
-
518
- break;
519
-
520
- case s_req_query_string_start:
521
- case s_req_query_string:
522
- if (IS_URL_CHAR(ch)) {
523
- return s_req_query_string;
524
- }
525
-
526
- switch (ch) {
527
- case '?':
528
- /* allow extra '?' in query string */
529
- return s_req_query_string;
530
-
531
- case '#':
532
- return s_req_fragment_start;
533
- }
534
-
535
- break;
536
-
537
- case s_req_fragment_start:
538
- if (IS_URL_CHAR(ch)) {
539
- return s_req_fragment;
540
- }
541
-
542
- switch (ch) {
543
- case '?':
544
- return s_req_fragment;
545
-
546
- case '#':
547
- return s;
548
- }
549
-
550
- break;
551
-
552
- case s_req_fragment:
553
- if (IS_URL_CHAR(ch)) {
554
- return s;
555
- }
556
-
557
- switch (ch) {
558
- case '?':
559
- case '#':
560
- return s;
561
- }
562
-
563
- break;
564
-
565
- default:
566
- break;
567
- }
568
-
569
- /* We should never fall out of the switch above unless there's an error */
570
- return s_dead;
571
- }
572
-
573
- size_t http_parser_execute (http_parser *parser,
574
- const http_parser_settings *settings,
575
- const char *data,
576
- size_t len)
577
- {
578
- char c, ch;
579
- int8_t unhex_val;
580
- const char *p = data;
581
- const char *header_field_mark = 0;
582
- const char *header_value_mark = 0;
583
- const char *url_mark = 0;
584
- const char *body_mark = 0;
585
- const char *status_mark = 0;
586
-
587
- /* We're in an error state. Don't bother doing anything. */
588
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
589
- return 0;
590
- }
591
-
592
- if (len == 0) {
593
- switch (parser->state) {
594
- case s_body_identity_eof:
595
- /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
596
- * we got paused.
597
- */
598
- CALLBACK_NOTIFY_NOADVANCE(message_complete);
599
- return 0;
600
-
601
- case s_dead:
602
- case s_start_req_or_res:
603
- case s_start_res:
604
- case s_start_req:
605
- return 0;
606
-
607
- default:
608
- SET_ERRNO(HPE_INVALID_EOF_STATE);
609
- return 1;
610
- }
611
- }
612
-
613
-
614
- if (parser->state == s_header_field)
615
- header_field_mark = data;
616
- if (parser->state == s_header_value)
617
- header_value_mark = data;
618
- switch (parser->state) {
619
- case s_req_path:
620
- case s_req_schema:
621
- case s_req_schema_slash:
622
- case s_req_schema_slash_slash:
623
- case s_req_server_start:
624
- case s_req_server:
625
- case s_req_server_with_at:
626
- case s_req_query_string_start:
627
- case s_req_query_string:
628
- case s_req_fragment_start:
629
- case s_req_fragment:
630
- url_mark = data;
631
- break;
632
- case s_res_status:
633
- status_mark = data;
634
- break;
635
- }
636
-
637
- for (p=data; p != data + len; p++) {
638
- ch = *p;
639
-
640
- if (PARSING_HEADER(parser->state)) {
641
- ++parser->nread;
642
- /* Don't allow the total size of the HTTP headers (including the status
643
- * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
644
- * embedders against denial-of-service attacks where the attacker feeds
645
- * us a never-ending header that the embedder keeps buffering.
646
- *
647
- * This check is arguably the responsibility of embedders but we're doing
648
- * it on the embedder's behalf because most won't bother and this way we
649
- * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
650
- * than any reasonable request or response so this should never affect
651
- * day-to-day operation.
652
- */
653
- if (parser->nread > HTTP_MAX_HEADER_SIZE) {
654
- SET_ERRNO(HPE_HEADER_OVERFLOW);
655
- goto error;
656
- }
657
- }
658
-
659
- reexecute_byte:
660
- switch (parser->state) {
661
-
662
- case s_dead:
663
- /* this state is used after a 'Connection: close' message
664
- * the parser will error out if it reads another message
665
- */
666
- if (ch == CR || ch == LF)
667
- break;
668
-
669
- SET_ERRNO(HPE_CLOSED_CONNECTION);
670
- goto error;
671
-
672
- case s_start_req_or_res:
673
- {
674
- if (ch == CR || ch == LF)
675
- break;
676
- parser->flags = 0;
677
- parser->content_length = ULLONG_MAX;
678
-
679
- if (ch == 'H') {
680
- parser->state = s_res_or_resp_H;
681
-
682
- CALLBACK_NOTIFY(message_begin);
683
- } else {
684
- parser->type = HTTP_REQUEST;
685
- parser->state = s_start_req;
686
- goto reexecute_byte;
687
- }
688
-
689
- break;
690
- }
691
-
692
- case s_res_or_resp_H:
693
- if (ch == 'T') {
694
- parser->type = HTTP_RESPONSE;
695
- parser->state = s_res_HT;
696
- } else {
697
- if (ch != 'E') {
698
- SET_ERRNO(HPE_INVALID_CONSTANT);
699
- goto error;
700
- }
701
-
702
- parser->type = HTTP_REQUEST;
703
- parser->method = HTTP_HEAD;
704
- parser->index = 2;
705
- parser->state = s_req_method;
706
- }
707
- break;
708
-
709
- case s_start_res:
710
- {
711
- parser->flags = 0;
712
- parser->content_length = ULLONG_MAX;
713
-
714
- switch (ch) {
715
- case 'H':
716
- parser->state = s_res_H;
717
- break;
718
-
719
- case CR:
720
- case LF:
721
- break;
722
-
723
- default:
724
- SET_ERRNO(HPE_INVALID_CONSTANT);
725
- goto error;
726
- }
727
-
728
- CALLBACK_NOTIFY(message_begin);
729
- break;
730
- }
731
-
732
- case s_res_H:
733
- STRICT_CHECK(ch != 'T');
734
- parser->state = s_res_HT;
735
- break;
736
-
737
- case s_res_HT:
738
- STRICT_CHECK(ch != 'T');
739
- parser->state = s_res_HTT;
740
- break;
741
-
742
- case s_res_HTT:
743
- STRICT_CHECK(ch != 'P');
744
- parser->state = s_res_HTTP;
745
- break;
746
-
747
- case s_res_HTTP:
748
- STRICT_CHECK(ch != '/');
749
- parser->state = s_res_first_http_major;
750
- break;
751
-
752
- case s_res_first_http_major:
753
- if (ch < '0' || ch > '9') {
754
- SET_ERRNO(HPE_INVALID_VERSION);
755
- goto error;
756
- }
757
-
758
- parser->http_major = ch - '0';
759
- parser->state = s_res_http_major;
760
- break;
761
-
762
- /* major HTTP version or dot */
763
- case s_res_http_major:
764
- {
765
- if (ch == '.') {
766
- parser->state = s_res_first_http_minor;
767
- break;
768
- }
769
-
770
- if (!IS_NUM(ch)) {
771
- SET_ERRNO(HPE_INVALID_VERSION);
772
- goto error;
773
- }
774
-
775
- parser->http_major *= 10;
776
- parser->http_major += ch - '0';
777
-
778
- if (parser->http_major > 999) {
779
- SET_ERRNO(HPE_INVALID_VERSION);
780
- goto error;
781
- }
782
-
783
- break;
784
- }
785
-
786
- /* first digit of minor HTTP version */
787
- case s_res_first_http_minor:
788
- if (!IS_NUM(ch)) {
789
- SET_ERRNO(HPE_INVALID_VERSION);
790
- goto error;
791
- }
792
-
793
- parser->http_minor = ch - '0';
794
- parser->state = s_res_http_minor;
795
- break;
796
-
797
- /* minor HTTP version or end of request line */
798
- case s_res_http_minor:
799
- {
800
- if (ch == ' ') {
801
- parser->state = s_res_first_status_code;
802
- break;
803
- }
804
-
805
- if (!IS_NUM(ch)) {
806
- SET_ERRNO(HPE_INVALID_VERSION);
807
- goto error;
808
- }
809
-
810
- parser->http_minor *= 10;
811
- parser->http_minor += ch - '0';
812
-
813
- if (parser->http_minor > 999) {
814
- SET_ERRNO(HPE_INVALID_VERSION);
815
- goto error;
816
- }
817
-
818
- break;
819
- }
820
-
821
- case s_res_first_status_code:
822
- {
823
- if (!IS_NUM(ch)) {
824
- if (ch == ' ') {
825
- break;
826
- }
827
-
828
- SET_ERRNO(HPE_INVALID_STATUS);
829
- goto error;
830
- }
831
- parser->status_code = ch - '0';
832
- parser->state = s_res_status_code;
833
- break;
834
- }
835
-
836
- case s_res_status_code:
837
- {
838
- if (!IS_NUM(ch)) {
839
- switch (ch) {
840
- case ' ':
841
- parser->state = s_res_status_start;
842
- break;
843
- case CR:
844
- parser->state = s_res_line_almost_done;
845
- break;
846
- case LF:
847
- parser->state = s_header_field_start;
848
- break;
849
- default:
850
- SET_ERRNO(HPE_INVALID_STATUS);
851
- goto error;
852
- }
853
- break;
854
- }
855
-
856
- parser->status_code *= 10;
857
- parser->status_code += ch - '0';
858
-
859
- if (parser->status_code > 999) {
860
- SET_ERRNO(HPE_INVALID_STATUS);
861
- goto error;
862
- }
863
-
864
- break;
865
- }
866
-
867
- case s_res_status_start:
868
- {
869
- if (ch == CR) {
870
- parser->state = s_res_line_almost_done;
871
- break;
872
- }
873
-
874
- if (ch == LF) {
875
- parser->state = s_header_field_start;
876
- break;
877
- }
878
-
879
- MARK(status);
880
- parser->state = s_res_status;
881
- parser->index = 0;
882
- break;
883
- }
884
-
885
- case s_res_status:
886
- if (ch == CR) {
887
- parser->state = s_res_line_almost_done;
888
- CALLBACK_DATA(status);
889
- break;
890
- }
891
-
892
- if (ch == LF) {
893
- parser->state = s_header_field_start;
894
- CALLBACK_DATA(status);
895
- break;
896
- }
897
-
898
- break;
899
-
900
- case s_res_line_almost_done:
901
- STRICT_CHECK(ch != LF);
902
- parser->state = s_header_field_start;
903
- break;
904
-
905
- case s_start_req:
906
- {
907
- if (ch == CR || ch == LF)
908
- break;
909
- parser->flags = 0;
910
- parser->content_length = ULLONG_MAX;
911
-
912
- if (!IS_ALPHA(ch)) {
913
- SET_ERRNO(HPE_INVALID_METHOD);
914
- goto error;
915
- }
916
-
917
- parser->method = (enum http_method) 0;
918
- parser->index = 1;
919
- switch (ch) {
920
- case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
921
- case 'D': parser->method = HTTP_DELETE; break;
922
- case 'G': parser->method = HTTP_GET; break;
923
- case 'H': parser->method = HTTP_HEAD; break;
924
- case 'L': parser->method = HTTP_LOCK; break;
925
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
926
- case 'N': parser->method = HTTP_NOTIFY; break;
927
- case 'O': parser->method = HTTP_OPTIONS; break;
928
- case 'P': parser->method = HTTP_POST;
929
- /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
930
- break;
931
- case 'R': parser->method = HTTP_REPORT; break;
932
- case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
933
- case 'T': parser->method = HTTP_TRACE; break;
934
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
935
- default:
936
- SET_ERRNO(HPE_INVALID_METHOD);
937
- goto error;
938
- }
939
- parser->state = s_req_method;
940
-
941
- CALLBACK_NOTIFY(message_begin);
942
-
943
- break;
944
- }
945
-
946
- case s_req_method:
947
- {
948
- const char *matcher;
949
- if (ch == '\0') {
950
- SET_ERRNO(HPE_INVALID_METHOD);
951
- goto error;
952
- }
953
-
954
- matcher = method_strings[parser->method];
955
- if (ch == ' ' && matcher[parser->index] == '\0') {
956
- parser->state = s_req_spaces_before_url;
957
- } else if (ch == matcher[parser->index]) {
958
- ; /* nada */
959
- } else if (parser->method == HTTP_CONNECT) {
960
- if (parser->index == 1 && ch == 'H') {
961
- parser->method = HTTP_CHECKOUT;
962
- } else if (parser->index == 2 && ch == 'P') {
963
- parser->method = HTTP_COPY;
964
- } else {
965
- SET_ERRNO(HPE_INVALID_METHOD);
966
- goto error;
967
- }
968
- } else if (parser->method == HTTP_MKCOL) {
969
- if (parser->index == 1 && ch == 'O') {
970
- parser->method = HTTP_MOVE;
971
- } else if (parser->index == 1 && ch == 'E') {
972
- parser->method = HTTP_MERGE;
973
- } else if (parser->index == 1 && ch == '-') {
974
- parser->method = HTTP_MSEARCH;
975
- } else if (parser->index == 2 && ch == 'A') {
976
- parser->method = HTTP_MKACTIVITY;
977
- } else {
978
- SET_ERRNO(HPE_INVALID_METHOD);
979
- goto error;
980
- }
981
- } else if (parser->method == HTTP_SUBSCRIBE) {
982
- if (parser->index == 1 && ch == 'E') {
983
- parser->method = HTTP_SEARCH;
984
- } else {
985
- SET_ERRNO(HPE_INVALID_METHOD);
986
- goto error;
987
- }
988
- } else if (parser->index == 1 && parser->method == HTTP_POST) {
989
- if (ch == 'R') {
990
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
991
- } else if (ch == 'U') {
992
- parser->method = HTTP_PUT; /* or HTTP_PURGE */
993
- } else if (ch == 'A') {
994
- parser->method = HTTP_PATCH;
995
- } else {
996
- SET_ERRNO(HPE_INVALID_METHOD);
997
- goto error;
998
- }
999
- } else if (parser->index == 2) {
1000
- if (parser->method == HTTP_PUT) {
1001
- if (ch == 'R') {
1002
- parser->method = HTTP_PURGE;
1003
- } else {
1004
- SET_ERRNO(HPE_INVALID_METHOD);
1005
- goto error;
1006
- }
1007
- } else if (parser->method == HTTP_UNLOCK) {
1008
- if (ch == 'S') {
1009
- parser->method = HTTP_UNSUBSCRIBE;
1010
- } else {
1011
- SET_ERRNO(HPE_INVALID_METHOD);
1012
- goto error;
1013
- }
1014
- } else {
1015
- SET_ERRNO(HPE_INVALID_METHOD);
1016
- goto error;
1017
- }
1018
- } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1019
- parser->method = HTTP_PROPPATCH;
1020
- } else {
1021
- SET_ERRNO(HPE_INVALID_METHOD);
1022
- goto error;
1023
- }
1024
-
1025
- ++parser->index;
1026
- break;
1027
- }
1028
-
1029
- case s_req_spaces_before_url:
1030
- {
1031
- if (ch == ' ') break;
1032
-
1033
- MARK(url);
1034
- if (parser->method == HTTP_CONNECT) {
1035
- parser->state = s_req_server_start;
1036
- }
1037
-
1038
- parser->state = parse_url_char((enum state)parser->state, ch);
1039
- if (parser->state == s_dead) {
1040
- SET_ERRNO(HPE_INVALID_URL);
1041
- goto error;
1042
- }
1043
-
1044
- break;
1045
- }
1046
-
1047
- case s_req_schema:
1048
- case s_req_schema_slash:
1049
- case s_req_schema_slash_slash:
1050
- case s_req_server_start:
1051
- {
1052
- switch (ch) {
1053
- /* No whitespace allowed here */
1054
- case ' ':
1055
- case CR:
1056
- case LF:
1057
- SET_ERRNO(HPE_INVALID_URL);
1058
- goto error;
1059
- default:
1060
- parser->state = parse_url_char((enum state)parser->state, ch);
1061
- if (parser->state == s_dead) {
1062
- SET_ERRNO(HPE_INVALID_URL);
1063
- goto error;
1064
- }
1065
- }
1066
-
1067
- break;
1068
- }
1069
-
1070
- case s_req_server:
1071
- case s_req_server_with_at:
1072
- case s_req_path:
1073
- case s_req_query_string_start:
1074
- case s_req_query_string:
1075
- case s_req_fragment_start:
1076
- case s_req_fragment:
1077
- {
1078
- switch (ch) {
1079
- case ' ':
1080
- parser->state = s_req_http_start;
1081
- CALLBACK_DATA(url);
1082
- break;
1083
- case CR:
1084
- case LF:
1085
- parser->http_major = 0;
1086
- parser->http_minor = 9;
1087
- parser->state = (ch == CR) ?
1088
- s_req_line_almost_done :
1089
- s_header_field_start;
1090
- CALLBACK_DATA(url);
1091
- break;
1092
- default:
1093
- parser->state = parse_url_char((enum state)parser->state, ch);
1094
- if (parser->state == s_dead) {
1095
- SET_ERRNO(HPE_INVALID_URL);
1096
- goto error;
1097
- }
1098
- }
1099
- break;
1100
- }
1101
-
1102
- case s_req_http_start:
1103
- switch (ch) {
1104
- case 'H':
1105
- parser->state = s_req_http_H;
1106
- break;
1107
- case ' ':
1108
- break;
1109
- default:
1110
- SET_ERRNO(HPE_INVALID_CONSTANT);
1111
- goto error;
1112
- }
1113
- break;
1114
-
1115
- case s_req_http_H:
1116
- STRICT_CHECK(ch != 'T');
1117
- parser->state = s_req_http_HT;
1118
- break;
1119
-
1120
- case s_req_http_HT:
1121
- STRICT_CHECK(ch != 'T');
1122
- parser->state = s_req_http_HTT;
1123
- break;
1124
-
1125
- case s_req_http_HTT:
1126
- STRICT_CHECK(ch != 'P');
1127
- parser->state = s_req_http_HTTP;
1128
- break;
1129
-
1130
- case s_req_http_HTTP:
1131
- STRICT_CHECK(ch != '/');
1132
- parser->state = s_req_first_http_major;
1133
- break;
1134
-
1135
- /* first digit of major HTTP version */
1136
- case s_req_first_http_major:
1137
- if (ch < '1' || ch > '9') {
1138
- SET_ERRNO(HPE_INVALID_VERSION);
1139
- goto error;
1140
- }
1141
-
1142
- parser->http_major = ch - '0';
1143
- parser->state = s_req_http_major;
1144
- break;
1145
-
1146
- /* major HTTP version or dot */
1147
- case s_req_http_major:
1148
- {
1149
- if (ch == '.') {
1150
- parser->state = s_req_first_http_minor;
1151
- break;
1152
- }
1153
-
1154
- if (!IS_NUM(ch)) {
1155
- SET_ERRNO(HPE_INVALID_VERSION);
1156
- goto error;
1157
- }
1158
-
1159
- parser->http_major *= 10;
1160
- parser->http_major += ch - '0';
1161
-
1162
- if (parser->http_major > 999) {
1163
- SET_ERRNO(HPE_INVALID_VERSION);
1164
- goto error;
1165
- }
1166
-
1167
- break;
1168
- }
1169
-
1170
- /* first digit of minor HTTP version */
1171
- case s_req_first_http_minor:
1172
- if (!IS_NUM(ch)) {
1173
- SET_ERRNO(HPE_INVALID_VERSION);
1174
- goto error;
1175
- }
1176
-
1177
- parser->http_minor = ch - '0';
1178
- parser->state = s_req_http_minor;
1179
- break;
1180
-
1181
- /* minor HTTP version or end of request line */
1182
- case s_req_http_minor:
1183
- {
1184
- if (ch == CR) {
1185
- parser->state = s_req_line_almost_done;
1186
- break;
1187
- }
1188
-
1189
- if (ch == LF) {
1190
- parser->state = s_header_field_start;
1191
- break;
1192
- }
1193
-
1194
- /* XXX allow spaces after digit? */
1195
-
1196
- if (!IS_NUM(ch)) {
1197
- SET_ERRNO(HPE_INVALID_VERSION);
1198
- goto error;
1199
- }
1200
-
1201
- parser->http_minor *= 10;
1202
- parser->http_minor += ch - '0';
1203
-
1204
- if (parser->http_minor > 999) {
1205
- SET_ERRNO(HPE_INVALID_VERSION);
1206
- goto error;
1207
- }
1208
-
1209
- break;
1210
- }
1211
-
1212
- /* end of request line */
1213
- case s_req_line_almost_done:
1214
- {
1215
- if (ch != LF) {
1216
- SET_ERRNO(HPE_LF_EXPECTED);
1217
- goto error;
1218
- }
1219
-
1220
- parser->state = s_header_field_start;
1221
- break;
1222
- }
1223
-
1224
- case s_header_field_start:
1225
- {
1226
- if (ch == CR) {
1227
- parser->state = s_headers_almost_done;
1228
- break;
1229
- }
1230
-
1231
- if (ch == LF) {
1232
- /* they might be just sending \n instead of \r\n so this would be
1233
- * the second \n to denote the end of headers*/
1234
- parser->state = s_headers_almost_done;
1235
- goto reexecute_byte;
1236
- }
1237
-
1238
- c = TOKEN(ch);
1239
-
1240
- if (!c) {
1241
- SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1242
- goto error;
1243
- }
1244
-
1245
- MARK(header_field);
1246
-
1247
- parser->index = 0;
1248
- parser->state = s_header_field;
1249
-
1250
- switch (c) {
1251
- case 'c':
1252
- parser->header_state = h_C;
1253
- break;
1254
-
1255
- case 'p':
1256
- parser->header_state = h_matching_proxy_connection;
1257
- break;
1258
-
1259
- case 't':
1260
- parser->header_state = h_matching_transfer_encoding;
1261
- break;
1262
-
1263
- case 'u':
1264
- parser->header_state = h_matching_upgrade;
1265
- break;
1266
-
1267
- default:
1268
- parser->header_state = h_general;
1269
- break;
1270
- }
1271
- break;
1272
- }
1273
-
1274
- case s_header_field:
1275
- {
1276
- c = TOKEN(ch);
1277
-
1278
- if (c) {
1279
- switch (parser->header_state) {
1280
- case h_general:
1281
- break;
1282
-
1283
- case h_C:
1284
- parser->index++;
1285
- parser->header_state = (c == 'o' ? h_CO : h_general);
1286
- break;
1287
-
1288
- case h_CO:
1289
- parser->index++;
1290
- parser->header_state = (c == 'n' ? h_CON : h_general);
1291
- break;
1292
-
1293
- case h_CON:
1294
- parser->index++;
1295
- switch (c) {
1296
- case 'n':
1297
- parser->header_state = h_matching_connection;
1298
- break;
1299
- case 't':
1300
- parser->header_state = h_matching_content_length;
1301
- break;
1302
- default:
1303
- parser->header_state = h_general;
1304
- break;
1305
- }
1306
- break;
1307
-
1308
- /* connection */
1309
-
1310
- case h_matching_connection:
1311
- parser->index++;
1312
- if (parser->index > sizeof(CONNECTION)-1
1313
- || c != CONNECTION[parser->index]) {
1314
- parser->header_state = h_general;
1315
- } else if (parser->index == sizeof(CONNECTION)-2) {
1316
- parser->header_state = h_connection;
1317
- }
1318
- break;
1319
-
1320
- /* proxy-connection */
1321
-
1322
- case h_matching_proxy_connection:
1323
- parser->index++;
1324
- if (parser->index > sizeof(PROXY_CONNECTION)-1
1325
- || c != PROXY_CONNECTION[parser->index]) {
1326
- parser->header_state = h_general;
1327
- } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1328
- parser->header_state = h_connection;
1329
- }
1330
- break;
1331
-
1332
- /* content-length */
1333
-
1334
- case h_matching_content_length:
1335
- parser->index++;
1336
- if (parser->index > sizeof(CONTENT_LENGTH)-1
1337
- || c != CONTENT_LENGTH[parser->index]) {
1338
- parser->header_state = h_general;
1339
- } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1340
- parser->header_state = h_content_length;
1341
- }
1342
- break;
1343
-
1344
- /* transfer-encoding */
1345
-
1346
- case h_matching_transfer_encoding:
1347
- parser->index++;
1348
- if (parser->index > sizeof(TRANSFER_ENCODING)-1
1349
- || c != TRANSFER_ENCODING[parser->index]) {
1350
- parser->header_state = h_general;
1351
- } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1352
- parser->header_state = h_transfer_encoding;
1353
- }
1354
- break;
1355
-
1356
- /* upgrade */
1357
-
1358
- case h_matching_upgrade:
1359
- parser->index++;
1360
- if (parser->index > sizeof(UPGRADE)-1
1361
- || c != UPGRADE[parser->index]) {
1362
- parser->header_state = h_general;
1363
- } else if (parser->index == sizeof(UPGRADE)-2) {
1364
- parser->header_state = h_upgrade;
1365
- }
1366
- break;
1367
-
1368
- case h_connection:
1369
- case h_content_length:
1370
- case h_transfer_encoding:
1371
- case h_upgrade:
1372
- if (ch != ' ') parser->header_state = h_general;
1373
- break;
1374
-
1375
- default:
1376
- assert(0 && "Unknown header_state");
1377
- break;
1378
- }
1379
- break;
1380
- }
1381
-
1382
- if (ch == ':') {
1383
- parser->state = s_header_value_start;
1384
- CALLBACK_DATA(header_field);
1385
- break;
1386
- }
1387
-
1388
- if (ch == CR) {
1389
- parser->state = s_header_almost_done;
1390
- CALLBACK_DATA(header_field);
1391
- break;
1392
- }
1393
-
1394
- if (ch == LF) {
1395
- parser->state = s_header_field_start;
1396
- CALLBACK_DATA(header_field);
1397
- break;
1398
- }
1399
-
1400
- SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1401
- goto error;
1402
- }
1403
-
1404
- case s_header_value_start:
1405
- {
1406
- if (ch == ' ' || ch == '\t') break;
1407
-
1408
- MARK(header_value);
1409
-
1410
- parser->state = s_header_value;
1411
- parser->index = 0;
1412
-
1413
- if (ch == CR) {
1414
- parser->header_state = h_general;
1415
- parser->state = s_header_almost_done;
1416
- CALLBACK_DATA(header_value);
1417
- break;
1418
- }
1419
-
1420
- if (ch == LF) {
1421
- parser->state = s_header_field_start;
1422
- CALLBACK_DATA(header_value);
1423
- break;
1424
- }
1425
-
1426
- c = LOWER(ch);
1427
-
1428
- switch (parser->header_state) {
1429
- case h_upgrade:
1430
- parser->flags |= F_UPGRADE;
1431
- parser->header_state = h_general;
1432
- break;
1433
-
1434
- case h_transfer_encoding:
1435
- /* looking for 'Transfer-Encoding: chunked' */
1436
- if ('c' == c) {
1437
- parser->header_state = h_matching_transfer_encoding_chunked;
1438
- } else {
1439
- parser->header_state = h_general;
1440
- }
1441
- break;
1442
-
1443
- case h_content_length:
1444
- if (!IS_NUM(ch)) {
1445
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1446
- goto error;
1447
- }
1448
-
1449
- parser->content_length = ch - '0';
1450
- break;
1451
-
1452
- case h_connection:
1453
- /* looking for 'Connection: keep-alive' */
1454
- if (c == 'k') {
1455
- parser->header_state = h_matching_connection_keep_alive;
1456
- /* looking for 'Connection: close' */
1457
- } else if (c == 'c') {
1458
- parser->header_state = h_matching_connection_close;
1459
- } else {
1460
- parser->header_state = h_general;
1461
- }
1462
- break;
1463
-
1464
- default:
1465
- parser->header_state = h_general;
1466
- break;
1467
- }
1468
- break;
1469
- }
1470
-
1471
- case s_header_value:
1472
- {
1473
-
1474
- if (ch == CR) {
1475
- parser->state = s_header_almost_done;
1476
- CALLBACK_DATA(header_value);
1477
- break;
1478
- }
1479
-
1480
- if (ch == LF) {
1481
- parser->state = s_header_almost_done;
1482
- CALLBACK_DATA_NOADVANCE(header_value);
1483
- goto reexecute_byte;
1484
- }
1485
-
1486
- c = LOWER(ch);
1487
-
1488
- switch (parser->header_state) {
1489
- case h_general:
1490
- break;
1491
-
1492
- case h_connection:
1493
- case h_transfer_encoding:
1494
- assert(0 && "Shouldn't get here.");
1495
- break;
1496
-
1497
- case h_content_length:
1498
- {
1499
- uint64_t t;
1500
-
1501
- if (ch == ' ') break;
1502
-
1503
- if (!IS_NUM(ch)) {
1504
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1505
- goto error;
1506
- }
1507
-
1508
- t = parser->content_length;
1509
- t *= 10;
1510
- t += ch - '0';
1511
-
1512
- /* Overflow? */
1513
- if (t < parser->content_length || t == ULLONG_MAX) {
1514
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1515
- goto error;
1516
- }
1517
-
1518
- parser->content_length = t;
1519
- break;
1520
- }
1521
-
1522
- /* Transfer-Encoding: chunked */
1523
- case h_matching_transfer_encoding_chunked:
1524
- parser->index++;
1525
- if (parser->index > sizeof(CHUNKED)-1
1526
- || c != CHUNKED[parser->index]) {
1527
- parser->header_state = h_general;
1528
- } else if (parser->index == sizeof(CHUNKED)-2) {
1529
- parser->header_state = h_transfer_encoding_chunked;
1530
- }
1531
- break;
1532
-
1533
- /* looking for 'Connection: keep-alive' */
1534
- case h_matching_connection_keep_alive:
1535
- parser->index++;
1536
- if (parser->index > sizeof(KEEP_ALIVE)-1
1537
- || c != KEEP_ALIVE[parser->index]) {
1538
- parser->header_state = h_general;
1539
- } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1540
- parser->header_state = h_connection_keep_alive;
1541
- }
1542
- break;
1543
-
1544
- /* looking for 'Connection: close' */
1545
- case h_matching_connection_close:
1546
- parser->index++;
1547
- if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1548
- parser->header_state = h_general;
1549
- } else if (parser->index == sizeof(CLOSE)-2) {
1550
- parser->header_state = h_connection_close;
1551
- }
1552
- break;
1553
-
1554
- case h_transfer_encoding_chunked:
1555
- case h_connection_keep_alive:
1556
- case h_connection_close:
1557
- if (ch != ' ') parser->header_state = h_general;
1558
- break;
1559
-
1560
- default:
1561
- parser->state = s_header_value;
1562
- parser->header_state = h_general;
1563
- break;
1564
- }
1565
- break;
1566
- }
1567
-
1568
- case s_header_almost_done:
1569
- {
1570
- STRICT_CHECK(ch != LF);
1571
-
1572
- parser->state = s_header_value_lws;
1573
-
1574
- switch (parser->header_state) {
1575
- case h_connection_keep_alive:
1576
- parser->flags |= F_CONNECTION_KEEP_ALIVE;
1577
- break;
1578
- case h_connection_close:
1579
- parser->flags |= F_CONNECTION_CLOSE;
1580
- break;
1581
- case h_transfer_encoding_chunked:
1582
- parser->flags |= F_CHUNKED;
1583
- break;
1584
- default:
1585
- break;
1586
- }
1587
-
1588
- break;
1589
- }
1590
-
1591
- case s_header_value_lws:
1592
- {
1593
- if (ch == ' ' || ch == '\t')
1594
- parser->state = s_header_value_start;
1595
- else
1596
- {
1597
- parser->state = s_header_field_start;
1598
- goto reexecute_byte;
1599
- }
1600
- break;
1601
- }
1602
-
1603
- case s_headers_almost_done:
1604
- {
1605
- STRICT_CHECK(ch != LF);
1606
-
1607
- if (parser->flags & F_TRAILING) {
1608
- /* End of a chunked request */
1609
- parser->state = NEW_MESSAGE();
1610
- CALLBACK_NOTIFY(message_complete);
1611
- break;
1612
- }
1613
-
1614
- parser->state = s_headers_done;
1615
-
1616
- /* Set this here so that on_headers_complete() callbacks can see it */
1617
- parser->upgrade =
1618
- (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1619
-
1620
- /* Here we call the headers_complete callback. This is somewhat
1621
- * different than other callbacks because if the user returns 1, we
1622
- * will interpret that as saying that this message has no body. This
1623
- * is needed for the annoying case of recieving a response to a HEAD
1624
- * request.
1625
- *
1626
- * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1627
- * we have to simulate it by handling a change in errno below.
1628
- */
1629
- if (settings->on_headers_complete) {
1630
- switch (settings->on_headers_complete(parser)) {
1631
- case 0:
1632
- break;
1633
-
1634
- case 1:
1635
- parser->flags |= F_SKIPBODY;
1636
- break;
1637
-
1638
- default:
1639
- SET_ERRNO(HPE_CB_headers_complete);
1640
- return p - data; /* Error */
1641
- }
1642
- }
1643
-
1644
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1645
- return p - data;
1646
- }
1647
-
1648
- goto reexecute_byte;
1649
- }
1650
-
1651
- case s_headers_done:
1652
- {
1653
- STRICT_CHECK(ch != LF);
1654
-
1655
- parser->nread = 0;
1656
-
1657
- /* Exit, the rest of the connect is in a different protocol. */
1658
- if (parser->upgrade) {
1659
- parser->state = NEW_MESSAGE();
1660
- CALLBACK_NOTIFY(message_complete);
1661
- return (p - data) + 1;
1662
- }
1663
-
1664
- if (parser->flags & F_SKIPBODY) {
1665
- parser->state = NEW_MESSAGE();
1666
- CALLBACK_NOTIFY(message_complete);
1667
- } else if (parser->flags & F_CHUNKED) {
1668
- /* chunked encoding - ignore Content-Length header */
1669
- parser->state = s_chunk_size_start;
1670
- } else {
1671
- if (parser->content_length == 0) {
1672
- /* Content-Length header given but zero: Content-Length: 0\r\n */
1673
- parser->state = NEW_MESSAGE();
1674
- CALLBACK_NOTIFY(message_complete);
1675
- } else if (parser->content_length != ULLONG_MAX) {
1676
- /* Content-Length header given and non-zero */
1677
- parser->state = s_body_identity;
1678
- } else {
1679
- if (parser->type == HTTP_REQUEST ||
1680
- !http_message_needs_eof(parser)) {
1681
- /* Assume content-length 0 - read the next */
1682
- parser->state = NEW_MESSAGE();
1683
- CALLBACK_NOTIFY(message_complete);
1684
- } else {
1685
- /* Read body until EOF */
1686
- parser->state = s_body_identity_eof;
1687
- }
1688
- }
1689
- }
1690
-
1691
- break;
1692
- }
1693
-
1694
- case s_body_identity:
1695
- {
1696
- uint64_t to_read = MIN(parser->content_length,
1697
- (uint64_t) ((data + len) - p));
1698
-
1699
- assert(parser->content_length != 0
1700
- && parser->content_length != ULLONG_MAX);
1701
-
1702
- /* The difference between advancing content_length and p is because
1703
- * the latter will automaticaly advance on the next loop iteration.
1704
- * Further, if content_length ends up at 0, we want to see the last
1705
- * byte again for our message complete callback.
1706
- */
1707
- MARK(body);
1708
- parser->content_length -= to_read;
1709
- p += to_read - 1;
1710
-
1711
- if (parser->content_length == 0) {
1712
- parser->state = s_message_done;
1713
-
1714
- /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1715
- *
1716
- * The alternative to doing this is to wait for the next byte to
1717
- * trigger the data callback, just as in every other case. The
1718
- * problem with this is that this makes it difficult for the test
1719
- * harness to distinguish between complete-on-EOF and
1720
- * complete-on-length. It's not clear that this distinction is
1721
- * important for applications, but let's keep it for now.
1722
- */
1723
- CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1724
- goto reexecute_byte;
1725
- }
1726
-
1727
- break;
1728
- }
1729
-
1730
- /* read until EOF */
1731
- case s_body_identity_eof:
1732
- MARK(body);
1733
- p = data + len - 1;
1734
-
1735
- break;
1736
-
1737
- case s_message_done:
1738
- parser->state = NEW_MESSAGE();
1739
- CALLBACK_NOTIFY(message_complete);
1740
- break;
1741
-
1742
- case s_chunk_size_start:
1743
- {
1744
- assert(parser->nread == 1);
1745
- assert(parser->flags & F_CHUNKED);
1746
-
1747
- unhex_val = unhex[(unsigned char)ch];
1748
- if (unhex_val == -1) {
1749
- SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1750
- goto error;
1751
- }
1752
-
1753
- parser->content_length = unhex_val;
1754
- parser->state = s_chunk_size;
1755
- break;
1756
- }
1757
-
1758
- case s_chunk_size:
1759
- {
1760
- uint64_t t;
1761
-
1762
- assert(parser->flags & F_CHUNKED);
1763
-
1764
- if (ch == CR) {
1765
- parser->state = s_chunk_size_almost_done;
1766
- break;
1767
- }
1768
-
1769
- unhex_val = unhex[(unsigned char)ch];
1770
-
1771
- if (unhex_val == -1) {
1772
- if (ch == ';' || ch == ' ') {
1773
- parser->state = s_chunk_parameters;
1774
- break;
1775
- }
1776
-
1777
- SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1778
- goto error;
1779
- }
1780
-
1781
- t = parser->content_length;
1782
- t *= 16;
1783
- t += unhex_val;
1784
-
1785
- /* Overflow? */
1786
- if (t < parser->content_length || t == ULLONG_MAX) {
1787
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1788
- goto error;
1789
- }
1790
-
1791
- parser->content_length = t;
1792
- break;
1793
- }
1794
-
1795
- case s_chunk_parameters:
1796
- {
1797
- assert(parser->flags & F_CHUNKED);
1798
- /* just ignore this shit. TODO check for overflow */
1799
- if (ch == CR) {
1800
- parser->state = s_chunk_size_almost_done;
1801
- break;
1802
- }
1803
- break;
1804
- }
1805
-
1806
- case s_chunk_size_almost_done:
1807
- {
1808
- assert(parser->flags & F_CHUNKED);
1809
- STRICT_CHECK(ch != LF);
1810
-
1811
- parser->nread = 0;
1812
-
1813
- if (parser->content_length == 0) {
1814
- parser->flags |= F_TRAILING;
1815
- parser->state = s_header_field_start;
1816
- } else {
1817
- parser->state = s_chunk_data;
1818
- }
1819
- break;
1820
- }
1821
-
1822
- case s_chunk_data:
1823
- {
1824
- uint64_t to_read = MIN(parser->content_length,
1825
- (uint64_t) ((data + len) - p));
1826
-
1827
- assert(parser->flags & F_CHUNKED);
1828
- assert(parser->content_length != 0
1829
- && parser->content_length != ULLONG_MAX);
1830
-
1831
- /* See the explanation in s_body_identity for why the content
1832
- * length and data pointers are managed this way.
1833
- */
1834
- MARK(body);
1835
- parser->content_length -= to_read;
1836
- p += to_read - 1;
1837
-
1838
- if (parser->content_length == 0) {
1839
- parser->state = s_chunk_data_almost_done;
1840
- }
1841
-
1842
- break;
1843
- }
1844
-
1845
- case s_chunk_data_almost_done:
1846
- assert(parser->flags & F_CHUNKED);
1847
- assert(parser->content_length == 0);
1848
- STRICT_CHECK(ch != CR);
1849
- parser->state = s_chunk_data_done;
1850
- CALLBACK_DATA(body);
1851
- break;
1852
-
1853
- case s_chunk_data_done:
1854
- assert(parser->flags & F_CHUNKED);
1855
- STRICT_CHECK(ch != LF);
1856
- parser->nread = 0;
1857
- parser->state = s_chunk_size_start;
1858
- break;
1859
-
1860
- default:
1861
- assert(0 && "unhandled state");
1862
- SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1863
- goto error;
1864
- }
1865
- }
1866
-
1867
- /* Run callbacks for any marks that we have leftover after we ran our of
1868
- * bytes. There should be at most one of these set, so it's OK to invoke
1869
- * them in series (unset marks will not result in callbacks).
1870
- *
1871
- * We use the NOADVANCE() variety of callbacks here because 'p' has already
1872
- * overflowed 'data' and this allows us to correct for the off-by-one that
1873
- * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1874
- * value that's in-bounds).
1875
- */
1876
-
1877
- assert(((header_field_mark ? 1 : 0) +
1878
- (header_value_mark ? 1 : 0) +
1879
- (url_mark ? 1 : 0) +
1880
- (body_mark ? 1 : 0) +
1881
- (status_mark ? 1 : 0)) <= 1);
1882
-
1883
- CALLBACK_DATA_NOADVANCE(header_field);
1884
- CALLBACK_DATA_NOADVANCE(header_value);
1885
- CALLBACK_DATA_NOADVANCE(url);
1886
- CALLBACK_DATA_NOADVANCE(body);
1887
- CALLBACK_DATA_NOADVANCE(status);
1888
-
1889
- return len;
1890
-
1891
- error:
1892
- if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1893
- SET_ERRNO(HPE_UNKNOWN);
1894
- }
1895
-
1896
- return (p - data);
1897
- }
1898
-
1899
-
1900
- /* Does the parser need to see an EOF to find the end of the message? */
1901
- int
1902
- http_message_needs_eof (const http_parser *parser)
1903
- {
1904
- if (parser->type == HTTP_REQUEST) {
1905
- return 0;
1906
- }
1907
-
1908
- /* See RFC 2616 section 4.4 */
1909
- if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1910
- parser->status_code == 204 || /* No Content */
1911
- parser->status_code == 304 || /* Not Modified */
1912
- parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1913
- return 0;
1914
- }
1915
-
1916
- if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1917
- return 0;
1918
- }
1919
-
1920
- return 1;
1921
- }
1922
-
1923
-
1924
- int
1925
- http_should_keep_alive (const http_parser *parser)
1926
- {
1927
- if (parser->http_major > 0 && parser->http_minor > 0) {
1928
- /* HTTP/1.1 */
1929
- if (parser->flags & F_CONNECTION_CLOSE) {
1930
- return 0;
1931
- }
1932
- } else {
1933
- /* HTTP/1.0 or earlier */
1934
- if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1935
- return 0;
1936
- }
1937
- }
1938
-
1939
- return !http_message_needs_eof(parser);
1940
- }
1941
-
1942
-
1943
- const char *
1944
- http_method_str (enum http_method m)
1945
- {
1946
- return ELEM_AT(method_strings, m, "<unknown>");
1947
- }
1948
-
1949
-
1950
- void
1951
- http_parser_init (http_parser *parser, enum http_parser_type t)
1952
- {
1953
- void *data = parser->data; /* preserve application data */
1954
- memset(parser, 0, sizeof(*parser));
1955
- parser->data = data;
1956
- parser->type = t;
1957
- parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1958
- parser->http_errno = HPE_OK;
1959
- }
1960
-
1961
- const char *
1962
- http_errno_name(enum http_errno err) {
1963
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1964
- return http_strerror_tab[err].name;
1965
- }
1966
-
1967
- const char *
1968
- http_errno_description(enum http_errno err) {
1969
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1970
- return http_strerror_tab[err].description;
1971
- }
1972
-
1973
- static enum http_host_state
1974
- http_parse_host_char(enum http_host_state s, const char ch) {
1975
- switch(s) {
1976
- case s_http_userinfo:
1977
- case s_http_userinfo_start:
1978
- if (ch == '@') {
1979
- return s_http_host_start;
1980
- }
1981
-
1982
- if (IS_USERINFO_CHAR(ch)) {
1983
- return s_http_userinfo;
1984
- }
1985
- break;
1986
-
1987
- case s_http_host_start:
1988
- if (ch == '[') {
1989
- return s_http_host_v6_start;
1990
- }
1991
-
1992
- if (IS_HOST_CHAR(ch)) {
1993
- return s_http_host;
1994
- }
1995
-
1996
- break;
1997
-
1998
- case s_http_host:
1999
- if (IS_HOST_CHAR(ch)) {
2000
- return s_http_host;
2001
- }
2002
-
2003
- /* FALLTHROUGH */
2004
- case s_http_host_v6_end:
2005
- if (ch == ':') {
2006
- return s_http_host_port_start;
2007
- }
2008
-
2009
- break;
2010
-
2011
- case s_http_host_v6:
2012
- if (ch == ']') {
2013
- return s_http_host_v6_end;
2014
- }
2015
-
2016
- /* FALLTHROUGH */
2017
- case s_http_host_v6_start:
2018
- if (IS_HEX(ch) || ch == ':' || ch == '.') {
2019
- return s_http_host_v6;
2020
- }
2021
-
2022
- break;
2023
-
2024
- case s_http_host_port:
2025
- case s_http_host_port_start:
2026
- if (IS_NUM(ch)) {
2027
- return s_http_host_port;
2028
- }
2029
-
2030
- break;
2031
-
2032
- default:
2033
- break;
2034
- }
2035
- return s_http_host_dead;
2036
- }
2037
-
2038
- static int
2039
- http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2040
- enum http_host_state s;
2041
-
2042
- const char *p;
2043
- size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2044
-
2045
- u->field_data[UF_HOST].len = 0;
2046
-
2047
- s = found_at ? s_http_userinfo_start : s_http_host_start;
2048
-
2049
- for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2050
- enum http_host_state new_s = http_parse_host_char(s, *p);
2051
-
2052
- if (new_s == s_http_host_dead) {
2053
- return 1;
2054
- }
2055
-
2056
- switch(new_s) {
2057
- case s_http_host:
2058
- if (s != s_http_host) {
2059
- u->field_data[UF_HOST].off = p - buf;
2060
- }
2061
- u->field_data[UF_HOST].len++;
2062
- break;
2063
-
2064
- case s_http_host_v6:
2065
- if (s != s_http_host_v6) {
2066
- u->field_data[UF_HOST].off = p - buf;
2067
- }
2068
- u->field_data[UF_HOST].len++;
2069
- break;
2070
-
2071
- case s_http_host_port:
2072
- if (s != s_http_host_port) {
2073
- u->field_data[UF_PORT].off = p - buf;
2074
- u->field_data[UF_PORT].len = 0;
2075
- u->field_set |= (1 << UF_PORT);
2076
- }
2077
- u->field_data[UF_PORT].len++;
2078
- break;
2079
-
2080
- case s_http_userinfo:
2081
- if (s != s_http_userinfo) {
2082
- u->field_data[UF_USERINFO].off = p - buf ;
2083
- u->field_data[UF_USERINFO].len = 0;
2084
- u->field_set |= (1 << UF_USERINFO);
2085
- }
2086
- u->field_data[UF_USERINFO].len++;
2087
- break;
2088
-
2089
- default:
2090
- break;
2091
- }
2092
- s = new_s;
2093
- }
2094
-
2095
- /* Make sure we don't end somewhere unexpected */
2096
- switch (s) {
2097
- case s_http_host_start:
2098
- case s_http_host_v6_start:
2099
- case s_http_host_v6:
2100
- case s_http_host_port_start:
2101
- case s_http_userinfo:
2102
- case s_http_userinfo_start:
2103
- return 1;
2104
- default:
2105
- break;
2106
- }
2107
-
2108
- return 0;
2109
- }
2110
-
2111
- int
2112
- http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2113
- struct http_parser_url *u)
2114
- {
2115
- enum state s;
2116
- const char *p;
2117
- enum http_parser_url_fields uf, old_uf;
2118
- int found_at = 0;
2119
-
2120
- u->port = u->field_set = 0;
2121
- s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2122
- uf = old_uf = UF_MAX;
2123
-
2124
- for (p = buf; p < buf + buflen; p++) {
2125
- s = parse_url_char(s, *p);
2126
-
2127
- /* Figure out the next field that we're operating on */
2128
- switch (s) {
2129
- case s_dead:
2130
- return 1;
2131
-
2132
- /* Skip delimeters */
2133
- case s_req_schema_slash:
2134
- case s_req_schema_slash_slash:
2135
- case s_req_server_start:
2136
- case s_req_query_string_start:
2137
- case s_req_fragment_start:
2138
- continue;
2139
-
2140
- case s_req_schema:
2141
- uf = UF_SCHEMA;
2142
- break;
2143
-
2144
- case s_req_server_with_at:
2145
- found_at = 1;
2146
-
2147
- /* FALLTROUGH */
2148
- case s_req_server:
2149
- uf = UF_HOST;
2150
- break;
2151
-
2152
- case s_req_path:
2153
- uf = UF_PATH;
2154
- break;
2155
-
2156
- case s_req_query_string:
2157
- uf = UF_QUERY;
2158
- break;
2159
-
2160
- case s_req_fragment:
2161
- uf = UF_FRAGMENT;
2162
- break;
2163
-
2164
- default:
2165
- assert(!"Unexpected state");
2166
- return 1;
2167
- }
2168
-
2169
- /* Nothing's changed; soldier on */
2170
- if (uf == old_uf) {
2171
- u->field_data[uf].len++;
2172
- continue;
2173
- }
2174
-
2175
- u->field_data[uf].off = p - buf;
2176
- u->field_data[uf].len = 1;
2177
-
2178
- u->field_set |= (1 << uf);
2179
- old_uf = uf;
2180
- }
2181
-
2182
- /* host must be present if there is a schema */
2183
- /* parsing http:///toto will fail */
2184
- if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2185
- if (http_parse_host(buf, u, found_at) != 0) {
2186
- return 1;
2187
- }
2188
- }
2189
-
2190
- /* CONNECT requests can only contain "hostname:port" */
2191
- if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2192
- return 1;
2193
- }
2194
-
2195
- if (u->field_set & (1 << UF_PORT)) {
2196
- /* Don't bother with endp; we've already validated the string */
2197
- unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2198
-
2199
- /* Ports have a max value of 2^16 */
2200
- if (v > 0xffff) {
2201
- return 1;
2202
- }
2203
-
2204
- u->port = (uint16_t) v;
2205
- }
2206
-
2207
- return 0;
2208
- }
2209
-
2210
- void
2211
- http_parser_pause(http_parser *parser, int paused) {
2212
- /* Users should only be pausing/unpausing a parser that is not in an error
2213
- * state. In non-debug builds, there's not much that we can do about this
2214
- * other than ignore it.
2215
- */
2216
- if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2217
- HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2218
- SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2219
- } else {
2220
- assert(0 && "Attempting to pause parser in error state");
2221
- }
2222
- }
2223
-
2224
- int
2225
- http_body_is_final(const struct http_parser *parser) {
2226
- return parser->state == s_message_done;
2227
- }
2228
-
2229
- unsigned long
2230
- http_parser_version(void) {
2231
- return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2232
- HTTP_PARSER_VERSION_MINOR * 0x00100 |
2233
- HTTP_PARSER_VERSION_PATCH * 0x00001;
2234
- }
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include "http_parser.h"
25
+ #include <assert.h>
26
+ #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
31
+
32
+ #ifndef ULLONG_MAX
33
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
+ #endif
35
+
36
+ #ifndef MIN
37
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
+ #endif
39
+
40
+ #ifndef ARRAY_SIZE
41
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
+ #endif
43
+
44
+ #ifndef BIT_AT
45
+ # define BIT_AT(a, i) \
46
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
+ (1 << ((unsigned int) (i) & 7))))
48
+ #endif
49
+
50
+ #ifndef ELEM_AT
51
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
+ #endif
53
+
54
+ #define SET_ERRNO(e) \
55
+ do { \
56
+ parser->http_errno = (e); \
57
+ } while(0)
58
+
59
+
60
+ /* Run the notify callback FOR, returning ER if it fails */
61
+ #define CALLBACK_NOTIFY_(FOR, ER) \
62
+ do { \
63
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
+ \
65
+ if (settings->on_##FOR) { \
66
+ if (0 != settings->on_##FOR(parser)) { \
67
+ SET_ERRNO(HPE_CB_##FOR); \
68
+ } \
69
+ \
70
+ /* We either errored above or got paused; get out */ \
71
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
72
+ return (ER); \
73
+ } \
74
+ } \
75
+ } while (0)
76
+
77
+ /* Run the notify callback FOR and consume the current byte */
78
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
79
+
80
+ /* Run the notify callback FOR and don't consume the current byte */
81
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
82
+
83
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
84
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
85
+ do { \
86
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
+ \
88
+ if (FOR##_mark) { \
89
+ if (settings->on_##FOR) { \
90
+ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
91
+ SET_ERRNO(HPE_CB_##FOR); \
92
+ } \
93
+ \
94
+ /* We either errored above or got paused; get out */ \
95
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
96
+ return (ER); \
97
+ } \
98
+ } \
99
+ FOR##_mark = NULL; \
100
+ } \
101
+ } while (0)
102
+
103
+ /* Run the data callback FOR and consume the current byte */
104
+ #define CALLBACK_DATA(FOR) \
105
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
106
+
107
+ /* Run the data callback FOR and don't consume the current byte */
108
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
109
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
110
+
111
+ /* Set the mark FOR; non-destructive if mark is already set */
112
+ #define MARK(FOR) \
113
+ do { \
114
+ if (!FOR##_mark) { \
115
+ FOR##_mark = p; \
116
+ } \
117
+ } while (0)
118
+
119
+
120
+ #define PROXY_CONNECTION "proxy-connection"
121
+ #define CONNECTION "connection"
122
+ #define CONTENT_LENGTH "content-length"
123
+ #define TRANSFER_ENCODING "transfer-encoding"
124
+ #define UPGRADE "upgrade"
125
+ #define CHUNKED "chunked"
126
+ #define KEEP_ALIVE "keep-alive"
127
+ #define CLOSE "close"
128
+
129
+
130
+ static const char *method_strings[] =
131
+ {
132
+ #define XX(num, name, string) #string,
133
+ HTTP_METHOD_MAP(XX)
134
+ #undef XX
135
+ };
136
+
137
+
138
+ /* Tokens as defined by rfc 2616. Also lowercases them.
139
+ * token = 1*<any CHAR except CTLs or separators>
140
+ * separators = "(" | ")" | "<" | ">" | "@"
141
+ * | "," | ";" | ":" | "\" | <">
142
+ * | "/" | "[" | "]" | "?" | "="
143
+ * | "{" | "}" | SP | HT
144
+ */
145
+ static const char tokens[256] = {
146
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
147
+ 0, 0, 0, 0, 0, 0, 0, 0,
148
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
149
+ 0, 0, 0, 0, 0, 0, 0, 0,
150
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
151
+ 0, 0, 0, 0, 0, 0, 0, 0,
152
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153
+ 0, 0, 0, 0, 0, 0, 0, 0,
154
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155
+ 0, '!', 0, '#', '$', '%', '&', '\'',
156
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157
+ 0, 0, '*', '+', 0, '-', '.', 0,
158
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
159
+ '0', '1', '2', '3', '4', '5', '6', '7',
160
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
161
+ '8', '9', 0, 0, 0, 0, 0, 0,
162
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
163
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
164
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
165
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
166
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
167
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
168
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
169
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
170
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
171
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
172
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
173
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
174
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
175
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
176
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
177
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
178
+
179
+
180
+ static const int8_t unhex[256] =
181
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
182
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
183
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
185
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
186
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
187
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
188
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
189
+ };
190
+
191
+
192
+ #if HTTP_PARSER_STRICT
193
+ # define T(v) 0
194
+ #else
195
+ # define T(v) v
196
+ #endif
197
+
198
+
199
+ static const uint8_t normal_url_char[32] = {
200
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
201
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
202
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
203
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
204
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
205
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
206
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
207
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
208
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
209
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
210
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
211
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
212
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
213
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
214
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
215
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
216
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
217
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
218
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
219
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
220
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
221
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
222
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
223
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
224
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
225
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
226
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
227
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
228
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
229
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
230
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
231
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
232
+
233
+ #undef T
234
+
235
+ enum state
236
+ { s_dead = 1 /* important that this is > 0 */
237
+
238
+ , s_start_req_or_res
239
+ , s_res_or_resp_H
240
+ , s_start_res
241
+ , s_res_H
242
+ , s_res_HT
243
+ , s_res_HTT
244
+ , s_res_HTTP
245
+ , s_res_first_http_major
246
+ , s_res_http_major
247
+ , s_res_first_http_minor
248
+ , s_res_http_minor
249
+ , s_res_first_status_code
250
+ , s_res_status_code
251
+ , s_res_status_start
252
+ , s_res_status
253
+ , s_res_line_almost_done
254
+
255
+ , s_start_req
256
+
257
+ , s_req_method
258
+ , s_req_spaces_before_url
259
+ , s_req_schema
260
+ , s_req_schema_slash
261
+ , s_req_schema_slash_slash
262
+ , s_req_server_start
263
+ , s_req_server
264
+ , s_req_server_with_at
265
+ , s_req_path
266
+ , s_req_query_string_start
267
+ , s_req_query_string
268
+ , s_req_fragment_start
269
+ , s_req_fragment
270
+ , s_req_http_start
271
+ , s_req_http_H
272
+ , s_req_http_HT
273
+ , s_req_http_HTT
274
+ , s_req_http_HTTP
275
+ , s_req_first_http_major
276
+ , s_req_http_major
277
+ , s_req_first_http_minor
278
+ , s_req_http_minor
279
+ , s_req_line_almost_done
280
+
281
+ , s_header_field_start
282
+ , s_header_field
283
+ , s_header_value_start
284
+ , s_header_value
285
+ , s_header_value_lws
286
+
287
+ , s_header_almost_done
288
+
289
+ , s_chunk_size_start
290
+ , s_chunk_size
291
+ , s_chunk_parameters
292
+ , s_chunk_size_almost_done
293
+
294
+ , s_headers_almost_done
295
+ , s_headers_done
296
+
297
+ /* Important: 's_headers_done' must be the last 'header' state. All
298
+ * states beyond this must be 'body' states. It is used for overflow
299
+ * checking. See the PARSING_HEADER() macro.
300
+ */
301
+
302
+ , s_chunk_data
303
+ , s_chunk_data_almost_done
304
+ , s_chunk_data_done
305
+
306
+ , s_body_identity
307
+ , s_body_identity_eof
308
+
309
+ , s_message_done
310
+ };
311
+
312
+
313
+ #define PARSING_HEADER(state) (state <= s_headers_done)
314
+
315
+
316
+ enum header_states
317
+ { h_general = 0
318
+ , h_C
319
+ , h_CO
320
+ , h_CON
321
+
322
+ , h_matching_connection
323
+ , h_matching_proxy_connection
324
+ , h_matching_content_length
325
+ , h_matching_transfer_encoding
326
+ , h_matching_upgrade
327
+
328
+ , h_connection
329
+ , h_content_length
330
+ , h_transfer_encoding
331
+ , h_upgrade
332
+
333
+ , h_matching_transfer_encoding_chunked
334
+ , h_matching_connection_keep_alive
335
+ , h_matching_connection_close
336
+
337
+ , h_transfer_encoding_chunked
338
+ , h_connection_keep_alive
339
+ , h_connection_close
340
+ };
341
+
342
+ enum http_host_state
343
+ {
344
+ s_http_host_dead = 1
345
+ , s_http_userinfo_start
346
+ , s_http_userinfo
347
+ , s_http_host_start
348
+ , s_http_host_v6_start
349
+ , s_http_host
350
+ , s_http_host_v6
351
+ , s_http_host_v6_end
352
+ , s_http_host_port_start
353
+ , s_http_host_port
354
+ };
355
+
356
+ /* Macros for character classes; depends on strict-mode */
357
+ #define CR '\r'
358
+ #define LF '\n'
359
+ #define LOWER(c) (unsigned char)(c | 0x20)
360
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
361
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
362
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
363
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
364
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
365
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
366
+ (c) == ')')
367
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
368
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
369
+ (c) == '$' || (c) == ',')
370
+
371
+ #if HTTP_PARSER_STRICT
372
+ #define TOKEN(c) (tokens[(unsigned char)c])
373
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
374
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
375
+ #else
376
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
377
+ #define IS_URL_CHAR(c) \
378
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
379
+ #define IS_HOST_CHAR(c) \
380
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
381
+ #endif
382
+
383
+
384
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
385
+
386
+
387
+ #if HTTP_PARSER_STRICT
388
+ # define STRICT_CHECK(cond) \
389
+ do { \
390
+ if (cond) { \
391
+ SET_ERRNO(HPE_STRICT); \
392
+ goto error; \
393
+ } \
394
+ } while (0)
395
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
396
+ #else
397
+ # define STRICT_CHECK(cond)
398
+ # define NEW_MESSAGE() start_state
399
+ #endif
400
+
401
+
402
+ /* Map errno values to strings for human-readable output */
403
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
404
+ static struct {
405
+ const char *name;
406
+ const char *description;
407
+ } http_strerror_tab[] = {
408
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
409
+ };
410
+ #undef HTTP_STRERROR_GEN
411
+
412
+ int http_message_needs_eof(const http_parser *parser);
413
+
414
+ /* Our URL parser.
415
+ *
416
+ * This is designed to be shared by http_parser_execute() for URL validation,
417
+ * hence it has a state transition + byte-for-byte interface. In addition, it
418
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
419
+ * work of turning state transitions URL components for its API.
420
+ *
421
+ * This function should only be invoked with non-space characters. It is
422
+ * assumed that the caller cares about (and can detect) the transition between
423
+ * URL and non-URL states by looking for these.
424
+ */
425
+ static enum state
426
+ parse_url_char(enum state s, const char ch)
427
+ {
428
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
429
+ return s_dead;
430
+ }
431
+
432
+ #if HTTP_PARSER_STRICT
433
+ if (ch == '\t' || ch == '\f') {
434
+ return s_dead;
435
+ }
436
+ #endif
437
+
438
+ switch (s) {
439
+ case s_req_spaces_before_url:
440
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
441
+ * All methods except CONNECT are followed by '/' or '*'.
442
+ */
443
+
444
+ if (ch == '/' || ch == '*') {
445
+ return s_req_path;
446
+ }
447
+
448
+ if (IS_ALPHA(ch)) {
449
+ return s_req_schema;
450
+ }
451
+
452
+ break;
453
+
454
+ case s_req_schema:
455
+ if (IS_ALPHA(ch)) {
456
+ return s;
457
+ }
458
+
459
+ if (ch == ':') {
460
+ return s_req_schema_slash;
461
+ }
462
+
463
+ break;
464
+
465
+ case s_req_schema_slash:
466
+ if (ch == '/') {
467
+ return s_req_schema_slash_slash;
468
+ }
469
+
470
+ break;
471
+
472
+ case s_req_schema_slash_slash:
473
+ if (ch == '/') {
474
+ return s_req_server_start;
475
+ }
476
+
477
+ break;
478
+
479
+ case s_req_server_with_at:
480
+ if (ch == '@') {
481
+ return s_dead;
482
+ }
483
+
484
+ /* FALLTHROUGH */
485
+ case s_req_server_start:
486
+ case s_req_server:
487
+ if (ch == '/') {
488
+ return s_req_path;
489
+ }
490
+
491
+ if (ch == '?') {
492
+ return s_req_query_string_start;
493
+ }
494
+
495
+ if (ch == '@') {
496
+ return s_req_server_with_at;
497
+ }
498
+
499
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
500
+ return s_req_server;
501
+ }
502
+
503
+ break;
504
+
505
+ case s_req_path:
506
+ if (IS_URL_CHAR(ch)) {
507
+ return s;
508
+ }
509
+
510
+ switch (ch) {
511
+ case '?':
512
+ return s_req_query_string_start;
513
+
514
+ case '#':
515
+ return s_req_fragment_start;
516
+ }
517
+
518
+ break;
519
+
520
+ case s_req_query_string_start:
521
+ case s_req_query_string:
522
+ if (IS_URL_CHAR(ch)) {
523
+ return s_req_query_string;
524
+ }
525
+
526
+ switch (ch) {
527
+ case '?':
528
+ /* allow extra '?' in query string */
529
+ return s_req_query_string;
530
+
531
+ case '#':
532
+ return s_req_fragment_start;
533
+ }
534
+
535
+ break;
536
+
537
+ case s_req_fragment_start:
538
+ if (IS_URL_CHAR(ch)) {
539
+ return s_req_fragment;
540
+ }
541
+
542
+ switch (ch) {
543
+ case '?':
544
+ return s_req_fragment;
545
+
546
+ case '#':
547
+ return s;
548
+ }
549
+
550
+ break;
551
+
552
+ case s_req_fragment:
553
+ if (IS_URL_CHAR(ch)) {
554
+ return s;
555
+ }
556
+
557
+ switch (ch) {
558
+ case '?':
559
+ case '#':
560
+ return s;
561
+ }
562
+
563
+ break;
564
+
565
+ default:
566
+ break;
567
+ }
568
+
569
+ /* We should never fall out of the switch above unless there's an error */
570
+ return s_dead;
571
+ }
572
+
573
+ size_t http_parser_execute (http_parser *parser,
574
+ const http_parser_settings *settings,
575
+ const char *data,
576
+ size_t len)
577
+ {
578
+ char c, ch;
579
+ int8_t unhex_val;
580
+ const char *p = data;
581
+ const char *header_field_mark = 0;
582
+ const char *header_value_mark = 0;
583
+ const char *url_mark = 0;
584
+ const char *body_mark = 0;
585
+ const char *status_mark = 0;
586
+
587
+ /* We're in an error state. Don't bother doing anything. */
588
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
589
+ return 0;
590
+ }
591
+
592
+ if (len == 0) {
593
+ switch (parser->state) {
594
+ case s_body_identity_eof:
595
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
596
+ * we got paused.
597
+ */
598
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
599
+ return 0;
600
+
601
+ case s_dead:
602
+ case s_start_req_or_res:
603
+ case s_start_res:
604
+ case s_start_req:
605
+ return 0;
606
+
607
+ default:
608
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
609
+ return 1;
610
+ }
611
+ }
612
+
613
+
614
+ if (parser->state == s_header_field)
615
+ header_field_mark = data;
616
+ if (parser->state == s_header_value)
617
+ header_value_mark = data;
618
+ switch (parser->state) {
619
+ case s_req_path:
620
+ case s_req_schema:
621
+ case s_req_schema_slash:
622
+ case s_req_schema_slash_slash:
623
+ case s_req_server_start:
624
+ case s_req_server:
625
+ case s_req_server_with_at:
626
+ case s_req_query_string_start:
627
+ case s_req_query_string:
628
+ case s_req_fragment_start:
629
+ case s_req_fragment:
630
+ url_mark = data;
631
+ break;
632
+ case s_res_status:
633
+ status_mark = data;
634
+ break;
635
+ }
636
+
637
+ for (p=data; p != data + len; p++) {
638
+ ch = *p;
639
+
640
+ if (PARSING_HEADER(parser->state)) {
641
+ ++parser->nread;
642
+ /* Don't allow the total size of the HTTP headers (including the status
643
+ * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
644
+ * embedders against denial-of-service attacks where the attacker feeds
645
+ * us a never-ending header that the embedder keeps buffering.
646
+ *
647
+ * This check is arguably the responsibility of embedders but we're doing
648
+ * it on the embedder's behalf because most won't bother and this way we
649
+ * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
650
+ * than any reasonable request or response so this should never affect
651
+ * day-to-day operation.
652
+ */
653
+ if (parser->nread > HTTP_MAX_HEADER_SIZE) {
654
+ SET_ERRNO(HPE_HEADER_OVERFLOW);
655
+ goto error;
656
+ }
657
+ }
658
+
659
+ reexecute_byte:
660
+ switch (parser->state) {
661
+
662
+ case s_dead:
663
+ /* this state is used after a 'Connection: close' message
664
+ * the parser will error out if it reads another message
665
+ */
666
+ if (ch == CR || ch == LF)
667
+ break;
668
+
669
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
670
+ goto error;
671
+
672
+ case s_start_req_or_res:
673
+ {
674
+ if (ch == CR || ch == LF)
675
+ break;
676
+ parser->flags = 0;
677
+ parser->content_length = ULLONG_MAX;
678
+
679
+ if (ch == 'H') {
680
+ parser->state = s_res_or_resp_H;
681
+
682
+ CALLBACK_NOTIFY(message_begin);
683
+ } else {
684
+ parser->type = HTTP_REQUEST;
685
+ parser->state = s_start_req;
686
+ goto reexecute_byte;
687
+ }
688
+
689
+ break;
690
+ }
691
+
692
+ case s_res_or_resp_H:
693
+ if (ch == 'T') {
694
+ parser->type = HTTP_RESPONSE;
695
+ parser->state = s_res_HT;
696
+ } else {
697
+ if (ch != 'E') {
698
+ SET_ERRNO(HPE_INVALID_CONSTANT);
699
+ goto error;
700
+ }
701
+
702
+ parser->type = HTTP_REQUEST;
703
+ parser->method = HTTP_HEAD;
704
+ parser->index = 2;
705
+ parser->state = s_req_method;
706
+ }
707
+ break;
708
+
709
+ case s_start_res:
710
+ {
711
+ parser->flags = 0;
712
+ parser->content_length = ULLONG_MAX;
713
+
714
+ switch (ch) {
715
+ case 'H':
716
+ parser->state = s_res_H;
717
+ break;
718
+
719
+ case CR:
720
+ case LF:
721
+ break;
722
+
723
+ default:
724
+ SET_ERRNO(HPE_INVALID_CONSTANT);
725
+ goto error;
726
+ }
727
+
728
+ CALLBACK_NOTIFY(message_begin);
729
+ break;
730
+ }
731
+
732
+ case s_res_H:
733
+ STRICT_CHECK(ch != 'T');
734
+ parser->state = s_res_HT;
735
+ break;
736
+
737
+ case s_res_HT:
738
+ STRICT_CHECK(ch != 'T');
739
+ parser->state = s_res_HTT;
740
+ break;
741
+
742
+ case s_res_HTT:
743
+ STRICT_CHECK(ch != 'P');
744
+ parser->state = s_res_HTTP;
745
+ break;
746
+
747
+ case s_res_HTTP:
748
+ STRICT_CHECK(ch != '/');
749
+ parser->state = s_res_first_http_major;
750
+ break;
751
+
752
+ case s_res_first_http_major:
753
+ if (ch < '0' || ch > '9') {
754
+ SET_ERRNO(HPE_INVALID_VERSION);
755
+ goto error;
756
+ }
757
+
758
+ parser->http_major = ch - '0';
759
+ parser->state = s_res_http_major;
760
+ break;
761
+
762
+ /* major HTTP version or dot */
763
+ case s_res_http_major:
764
+ {
765
+ if (ch == '.') {
766
+ parser->state = s_res_first_http_minor;
767
+ break;
768
+ }
769
+
770
+ if (!IS_NUM(ch)) {
771
+ SET_ERRNO(HPE_INVALID_VERSION);
772
+ goto error;
773
+ }
774
+
775
+ parser->http_major *= 10;
776
+ parser->http_major += ch - '0';
777
+
778
+ if (parser->http_major > 999) {
779
+ SET_ERRNO(HPE_INVALID_VERSION);
780
+ goto error;
781
+ }
782
+
783
+ break;
784
+ }
785
+
786
+ /* first digit of minor HTTP version */
787
+ case s_res_first_http_minor:
788
+ if (!IS_NUM(ch)) {
789
+ SET_ERRNO(HPE_INVALID_VERSION);
790
+ goto error;
791
+ }
792
+
793
+ parser->http_minor = ch - '0';
794
+ parser->state = s_res_http_minor;
795
+ break;
796
+
797
+ /* minor HTTP version or end of request line */
798
+ case s_res_http_minor:
799
+ {
800
+ if (ch == ' ') {
801
+ parser->state = s_res_first_status_code;
802
+ break;
803
+ }
804
+
805
+ if (!IS_NUM(ch)) {
806
+ SET_ERRNO(HPE_INVALID_VERSION);
807
+ goto error;
808
+ }
809
+
810
+ parser->http_minor *= 10;
811
+ parser->http_minor += ch - '0';
812
+
813
+ if (parser->http_minor > 999) {
814
+ SET_ERRNO(HPE_INVALID_VERSION);
815
+ goto error;
816
+ }
817
+
818
+ break;
819
+ }
820
+
821
+ case s_res_first_status_code:
822
+ {
823
+ if (!IS_NUM(ch)) {
824
+ if (ch == ' ') {
825
+ break;
826
+ }
827
+
828
+ SET_ERRNO(HPE_INVALID_STATUS);
829
+ goto error;
830
+ }
831
+ parser->status_code = ch - '0';
832
+ parser->state = s_res_status_code;
833
+ break;
834
+ }
835
+
836
+ case s_res_status_code:
837
+ {
838
+ if (!IS_NUM(ch)) {
839
+ switch (ch) {
840
+ case ' ':
841
+ parser->state = s_res_status_start;
842
+ break;
843
+ case CR:
844
+ parser->state = s_res_line_almost_done;
845
+ break;
846
+ case LF:
847
+ parser->state = s_header_field_start;
848
+ break;
849
+ default:
850
+ SET_ERRNO(HPE_INVALID_STATUS);
851
+ goto error;
852
+ }
853
+ break;
854
+ }
855
+
856
+ parser->status_code *= 10;
857
+ parser->status_code += ch - '0';
858
+
859
+ if (parser->status_code > 999) {
860
+ SET_ERRNO(HPE_INVALID_STATUS);
861
+ goto error;
862
+ }
863
+
864
+ break;
865
+ }
866
+
867
+ case s_res_status_start:
868
+ {
869
+ if (ch == CR) {
870
+ parser->state = s_res_line_almost_done;
871
+ break;
872
+ }
873
+
874
+ if (ch == LF) {
875
+ parser->state = s_header_field_start;
876
+ break;
877
+ }
878
+
879
+ MARK(status);
880
+ parser->state = s_res_status;
881
+ parser->index = 0;
882
+ break;
883
+ }
884
+
885
+ case s_res_status:
886
+ if (ch == CR) {
887
+ parser->state = s_res_line_almost_done;
888
+ CALLBACK_DATA(status);
889
+ break;
890
+ }
891
+
892
+ if (ch == LF) {
893
+ parser->state = s_header_field_start;
894
+ CALLBACK_DATA(status);
895
+ break;
896
+ }
897
+
898
+ break;
899
+
900
+ case s_res_line_almost_done:
901
+ STRICT_CHECK(ch != LF);
902
+ parser->state = s_header_field_start;
903
+ break;
904
+
905
+ case s_start_req:
906
+ {
907
+ if (ch == CR || ch == LF)
908
+ break;
909
+ parser->flags = 0;
910
+ parser->content_length = ULLONG_MAX;
911
+
912
+ if (!IS_ALPHA(ch)) {
913
+ SET_ERRNO(HPE_INVALID_METHOD);
914
+ goto error;
915
+ }
916
+
917
+ parser->method = (enum http_method) 0;
918
+ parser->index = 1;
919
+ switch (ch) {
920
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
921
+ case 'D': parser->method = HTTP_DELETE; break;
922
+ case 'G': parser->method = HTTP_GET; break;
923
+ case 'H': parser->method = HTTP_HEAD; break;
924
+ case 'L': parser->method = HTTP_LOCK; break;
925
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
926
+ case 'N': parser->method = HTTP_NOTIFY; break;
927
+ case 'O': parser->method = HTTP_OPTIONS; break;
928
+ case 'P': parser->method = HTTP_POST;
929
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
930
+ break;
931
+ case 'R': parser->method = HTTP_REPORT; break;
932
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
933
+ case 'T': parser->method = HTTP_TRACE; break;
934
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
935
+ default:
936
+ SET_ERRNO(HPE_INVALID_METHOD);
937
+ goto error;
938
+ }
939
+ parser->state = s_req_method;
940
+
941
+ CALLBACK_NOTIFY(message_begin);
942
+
943
+ break;
944
+ }
945
+
946
+ case s_req_method:
947
+ {
948
+ const char *matcher;
949
+ if (ch == '\0') {
950
+ SET_ERRNO(HPE_INVALID_METHOD);
951
+ goto error;
952
+ }
953
+
954
+ matcher = method_strings[parser->method];
955
+ if (ch == ' ' && matcher[parser->index] == '\0') {
956
+ parser->state = s_req_spaces_before_url;
957
+ } else if (ch == matcher[parser->index]) {
958
+ ; /* nada */
959
+ } else if (parser->method == HTTP_CONNECT) {
960
+ if (parser->index == 1 && ch == 'H') {
961
+ parser->method = HTTP_CHECKOUT;
962
+ } else if (parser->index == 2 && ch == 'P') {
963
+ parser->method = HTTP_COPY;
964
+ } else {
965
+ SET_ERRNO(HPE_INVALID_METHOD);
966
+ goto error;
967
+ }
968
+ } else if (parser->method == HTTP_MKCOL) {
969
+ if (parser->index == 1 && ch == 'O') {
970
+ parser->method = HTTP_MOVE;
971
+ } else if (parser->index == 1 && ch == 'E') {
972
+ parser->method = HTTP_MERGE;
973
+ } else if (parser->index == 1 && ch == '-') {
974
+ parser->method = HTTP_MSEARCH;
975
+ } else if (parser->index == 2 && ch == 'A') {
976
+ parser->method = HTTP_MKACTIVITY;
977
+ } else {
978
+ SET_ERRNO(HPE_INVALID_METHOD);
979
+ goto error;
980
+ }
981
+ } else if (parser->method == HTTP_SUBSCRIBE) {
982
+ if (parser->index == 1 && ch == 'E') {
983
+ parser->method = HTTP_SEARCH;
984
+ } else {
985
+ SET_ERRNO(HPE_INVALID_METHOD);
986
+ goto error;
987
+ }
988
+ } else if (parser->index == 1 && parser->method == HTTP_POST) {
989
+ if (ch == 'R') {
990
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
991
+ } else if (ch == 'U') {
992
+ parser->method = HTTP_PUT; /* or HTTP_PURGE */
993
+ } else if (ch == 'A') {
994
+ parser->method = HTTP_PATCH;
995
+ } else {
996
+ SET_ERRNO(HPE_INVALID_METHOD);
997
+ goto error;
998
+ }
999
+ } else if (parser->index == 2) {
1000
+ if (parser->method == HTTP_PUT) {
1001
+ if (ch == 'R') {
1002
+ parser->method = HTTP_PURGE;
1003
+ } else {
1004
+ SET_ERRNO(HPE_INVALID_METHOD);
1005
+ goto error;
1006
+ }
1007
+ } else if (parser->method == HTTP_UNLOCK) {
1008
+ if (ch == 'S') {
1009
+ parser->method = HTTP_UNSUBSCRIBE;
1010
+ } else {
1011
+ SET_ERRNO(HPE_INVALID_METHOD);
1012
+ goto error;
1013
+ }
1014
+ } else {
1015
+ SET_ERRNO(HPE_INVALID_METHOD);
1016
+ goto error;
1017
+ }
1018
+ } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1019
+ parser->method = HTTP_PROPPATCH;
1020
+ } else {
1021
+ SET_ERRNO(HPE_INVALID_METHOD);
1022
+ goto error;
1023
+ }
1024
+
1025
+ ++parser->index;
1026
+ break;
1027
+ }
1028
+
1029
+ case s_req_spaces_before_url:
1030
+ {
1031
+ if (ch == ' ') break;
1032
+
1033
+ MARK(url);
1034
+ if (parser->method == HTTP_CONNECT) {
1035
+ parser->state = s_req_server_start;
1036
+ }
1037
+
1038
+ parser->state = parse_url_char((enum state)parser->state, ch);
1039
+ if (parser->state == s_dead) {
1040
+ SET_ERRNO(HPE_INVALID_URL);
1041
+ goto error;
1042
+ }
1043
+
1044
+ break;
1045
+ }
1046
+
1047
+ case s_req_schema:
1048
+ case s_req_schema_slash:
1049
+ case s_req_schema_slash_slash:
1050
+ case s_req_server_start:
1051
+ {
1052
+ switch (ch) {
1053
+ /* No whitespace allowed here */
1054
+ case ' ':
1055
+ case CR:
1056
+ case LF:
1057
+ SET_ERRNO(HPE_INVALID_URL);
1058
+ goto error;
1059
+ default:
1060
+ parser->state = parse_url_char((enum state)parser->state, ch);
1061
+ if (parser->state == s_dead) {
1062
+ SET_ERRNO(HPE_INVALID_URL);
1063
+ goto error;
1064
+ }
1065
+ }
1066
+
1067
+ break;
1068
+ }
1069
+
1070
+ case s_req_server:
1071
+ case s_req_server_with_at:
1072
+ case s_req_path:
1073
+ case s_req_query_string_start:
1074
+ case s_req_query_string:
1075
+ case s_req_fragment_start:
1076
+ case s_req_fragment:
1077
+ {
1078
+ switch (ch) {
1079
+ case ' ':
1080
+ parser->state = s_req_http_start;
1081
+ CALLBACK_DATA(url);
1082
+ break;
1083
+ case CR:
1084
+ case LF:
1085
+ parser->http_major = 0;
1086
+ parser->http_minor = 9;
1087
+ parser->state = (ch == CR) ?
1088
+ s_req_line_almost_done :
1089
+ s_header_field_start;
1090
+ CALLBACK_DATA(url);
1091
+ break;
1092
+ default:
1093
+ parser->state = parse_url_char((enum state)parser->state, ch);
1094
+ if (parser->state == s_dead) {
1095
+ SET_ERRNO(HPE_INVALID_URL);
1096
+ goto error;
1097
+ }
1098
+ }
1099
+ break;
1100
+ }
1101
+
1102
+ case s_req_http_start:
1103
+ switch (ch) {
1104
+ case 'H':
1105
+ parser->state = s_req_http_H;
1106
+ break;
1107
+ case ' ':
1108
+ break;
1109
+ default:
1110
+ SET_ERRNO(HPE_INVALID_CONSTANT);
1111
+ goto error;
1112
+ }
1113
+ break;
1114
+
1115
+ case s_req_http_H:
1116
+ STRICT_CHECK(ch != 'T');
1117
+ parser->state = s_req_http_HT;
1118
+ break;
1119
+
1120
+ case s_req_http_HT:
1121
+ STRICT_CHECK(ch != 'T');
1122
+ parser->state = s_req_http_HTT;
1123
+ break;
1124
+
1125
+ case s_req_http_HTT:
1126
+ STRICT_CHECK(ch != 'P');
1127
+ parser->state = s_req_http_HTTP;
1128
+ break;
1129
+
1130
+ case s_req_http_HTTP:
1131
+ STRICT_CHECK(ch != '/');
1132
+ parser->state = s_req_first_http_major;
1133
+ break;
1134
+
1135
+ /* first digit of major HTTP version */
1136
+ case s_req_first_http_major:
1137
+ if (ch < '1' || ch > '9') {
1138
+ SET_ERRNO(HPE_INVALID_VERSION);
1139
+ goto error;
1140
+ }
1141
+
1142
+ parser->http_major = ch - '0';
1143
+ parser->state = s_req_http_major;
1144
+ break;
1145
+
1146
+ /* major HTTP version or dot */
1147
+ case s_req_http_major:
1148
+ {
1149
+ if (ch == '.') {
1150
+ parser->state = s_req_first_http_minor;
1151
+ break;
1152
+ }
1153
+
1154
+ if (!IS_NUM(ch)) {
1155
+ SET_ERRNO(HPE_INVALID_VERSION);
1156
+ goto error;
1157
+ }
1158
+
1159
+ parser->http_major *= 10;
1160
+ parser->http_major += ch - '0';
1161
+
1162
+ if (parser->http_major > 999) {
1163
+ SET_ERRNO(HPE_INVALID_VERSION);
1164
+ goto error;
1165
+ }
1166
+
1167
+ break;
1168
+ }
1169
+
1170
+ /* first digit of minor HTTP version */
1171
+ case s_req_first_http_minor:
1172
+ if (!IS_NUM(ch)) {
1173
+ SET_ERRNO(HPE_INVALID_VERSION);
1174
+ goto error;
1175
+ }
1176
+
1177
+ parser->http_minor = ch - '0';
1178
+ parser->state = s_req_http_minor;
1179
+ break;
1180
+
1181
+ /* minor HTTP version or end of request line */
1182
+ case s_req_http_minor:
1183
+ {
1184
+ if (ch == CR) {
1185
+ parser->state = s_req_line_almost_done;
1186
+ break;
1187
+ }
1188
+
1189
+ if (ch == LF) {
1190
+ parser->state = s_header_field_start;
1191
+ break;
1192
+ }
1193
+
1194
+ /* XXX allow spaces after digit? */
1195
+
1196
+ if (!IS_NUM(ch)) {
1197
+ SET_ERRNO(HPE_INVALID_VERSION);
1198
+ goto error;
1199
+ }
1200
+
1201
+ parser->http_minor *= 10;
1202
+ parser->http_minor += ch - '0';
1203
+
1204
+ if (parser->http_minor > 999) {
1205
+ SET_ERRNO(HPE_INVALID_VERSION);
1206
+ goto error;
1207
+ }
1208
+
1209
+ break;
1210
+ }
1211
+
1212
+ /* end of request line */
1213
+ case s_req_line_almost_done:
1214
+ {
1215
+ if (ch != LF) {
1216
+ SET_ERRNO(HPE_LF_EXPECTED);
1217
+ goto error;
1218
+ }
1219
+
1220
+ parser->state = s_header_field_start;
1221
+ break;
1222
+ }
1223
+
1224
+ case s_header_field_start:
1225
+ {
1226
+ if (ch == CR) {
1227
+ parser->state = s_headers_almost_done;
1228
+ break;
1229
+ }
1230
+
1231
+ if (ch == LF) {
1232
+ /* they might be just sending \n instead of \r\n so this would be
1233
+ * the second \n to denote the end of headers*/
1234
+ parser->state = s_headers_almost_done;
1235
+ goto reexecute_byte;
1236
+ }
1237
+
1238
+ c = TOKEN(ch);
1239
+
1240
+ if (!c) {
1241
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1242
+ goto error;
1243
+ }
1244
+
1245
+ MARK(header_field);
1246
+
1247
+ parser->index = 0;
1248
+ parser->state = s_header_field;
1249
+
1250
+ switch (c) {
1251
+ case 'c':
1252
+ parser->header_state = h_C;
1253
+ break;
1254
+
1255
+ case 'p':
1256
+ parser->header_state = h_matching_proxy_connection;
1257
+ break;
1258
+
1259
+ case 't':
1260
+ parser->header_state = h_matching_transfer_encoding;
1261
+ break;
1262
+
1263
+ case 'u':
1264
+ parser->header_state = h_matching_upgrade;
1265
+ break;
1266
+
1267
+ default:
1268
+ parser->header_state = h_general;
1269
+ break;
1270
+ }
1271
+ break;
1272
+ }
1273
+
1274
+ case s_header_field:
1275
+ {
1276
+ c = TOKEN(ch);
1277
+
1278
+ if (c) {
1279
+ switch (parser->header_state) {
1280
+ case h_general:
1281
+ break;
1282
+
1283
+ case h_C:
1284
+ parser->index++;
1285
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1286
+ break;
1287
+
1288
+ case h_CO:
1289
+ parser->index++;
1290
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1291
+ break;
1292
+
1293
+ case h_CON:
1294
+ parser->index++;
1295
+ switch (c) {
1296
+ case 'n':
1297
+ parser->header_state = h_matching_connection;
1298
+ break;
1299
+ case 't':
1300
+ parser->header_state = h_matching_content_length;
1301
+ break;
1302
+ default:
1303
+ parser->header_state = h_general;
1304
+ break;
1305
+ }
1306
+ break;
1307
+
1308
+ /* connection */
1309
+
1310
+ case h_matching_connection:
1311
+ parser->index++;
1312
+ if (parser->index > sizeof(CONNECTION)-1
1313
+ || c != CONNECTION[parser->index]) {
1314
+ parser->header_state = h_general;
1315
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1316
+ parser->header_state = h_connection;
1317
+ }
1318
+ break;
1319
+
1320
+ /* proxy-connection */
1321
+
1322
+ case h_matching_proxy_connection:
1323
+ parser->index++;
1324
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1325
+ || c != PROXY_CONNECTION[parser->index]) {
1326
+ parser->header_state = h_general;
1327
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1328
+ parser->header_state = h_connection;
1329
+ }
1330
+ break;
1331
+
1332
+ /* content-length */
1333
+
1334
+ case h_matching_content_length:
1335
+ parser->index++;
1336
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1337
+ || c != CONTENT_LENGTH[parser->index]) {
1338
+ parser->header_state = h_general;
1339
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1340
+ parser->header_state = h_content_length;
1341
+ }
1342
+ break;
1343
+
1344
+ /* transfer-encoding */
1345
+
1346
+ case h_matching_transfer_encoding:
1347
+ parser->index++;
1348
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1349
+ || c != TRANSFER_ENCODING[parser->index]) {
1350
+ parser->header_state = h_general;
1351
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1352
+ parser->header_state = h_transfer_encoding;
1353
+ }
1354
+ break;
1355
+
1356
+ /* upgrade */
1357
+
1358
+ case h_matching_upgrade:
1359
+ parser->index++;
1360
+ if (parser->index > sizeof(UPGRADE)-1
1361
+ || c != UPGRADE[parser->index]) {
1362
+ parser->header_state = h_general;
1363
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1364
+ parser->header_state = h_upgrade;
1365
+ }
1366
+ break;
1367
+
1368
+ case h_connection:
1369
+ case h_content_length:
1370
+ case h_transfer_encoding:
1371
+ case h_upgrade:
1372
+ if (ch != ' ') parser->header_state = h_general;
1373
+ break;
1374
+
1375
+ default:
1376
+ assert(0 && "Unknown header_state");
1377
+ break;
1378
+ }
1379
+ break;
1380
+ }
1381
+
1382
+ if (ch == ':') {
1383
+ parser->state = s_header_value_start;
1384
+ CALLBACK_DATA(header_field);
1385
+ break;
1386
+ }
1387
+
1388
+ if (ch == CR) {
1389
+ parser->state = s_header_almost_done;
1390
+ CALLBACK_DATA(header_field);
1391
+ break;
1392
+ }
1393
+
1394
+ if (ch == LF) {
1395
+ parser->state = s_header_field_start;
1396
+ CALLBACK_DATA(header_field);
1397
+ break;
1398
+ }
1399
+
1400
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1401
+ goto error;
1402
+ }
1403
+
1404
+ case s_header_value_start:
1405
+ {
1406
+ if (ch == ' ' || ch == '\t') break;
1407
+
1408
+ MARK(header_value);
1409
+
1410
+ parser->state = s_header_value;
1411
+ parser->index = 0;
1412
+
1413
+ if (ch == CR) {
1414
+ parser->header_state = h_general;
1415
+ parser->state = s_header_almost_done;
1416
+ CALLBACK_DATA(header_value);
1417
+ break;
1418
+ }
1419
+
1420
+ if (ch == LF) {
1421
+ parser->state = s_header_field_start;
1422
+ CALLBACK_DATA(header_value);
1423
+ break;
1424
+ }
1425
+
1426
+ c = LOWER(ch);
1427
+
1428
+ switch (parser->header_state) {
1429
+ case h_upgrade:
1430
+ parser->flags |= F_UPGRADE;
1431
+ parser->header_state = h_general;
1432
+ break;
1433
+
1434
+ case h_transfer_encoding:
1435
+ /* looking for 'Transfer-Encoding: chunked' */
1436
+ if ('c' == c) {
1437
+ parser->header_state = h_matching_transfer_encoding_chunked;
1438
+ } else {
1439
+ parser->header_state = h_general;
1440
+ }
1441
+ break;
1442
+
1443
+ case h_content_length:
1444
+ if (!IS_NUM(ch)) {
1445
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1446
+ goto error;
1447
+ }
1448
+
1449
+ parser->content_length = ch - '0';
1450
+ break;
1451
+
1452
+ case h_connection:
1453
+ /* looking for 'Connection: keep-alive' */
1454
+ if (c == 'k') {
1455
+ parser->header_state = h_matching_connection_keep_alive;
1456
+ /* looking for 'Connection: close' */
1457
+ } else if (c == 'c') {
1458
+ parser->header_state = h_matching_connection_close;
1459
+ } else {
1460
+ parser->header_state = h_general;
1461
+ }
1462
+ break;
1463
+
1464
+ default:
1465
+ parser->header_state = h_general;
1466
+ break;
1467
+ }
1468
+ break;
1469
+ }
1470
+
1471
+ case s_header_value:
1472
+ {
1473
+
1474
+ if (ch == CR) {
1475
+ parser->state = s_header_almost_done;
1476
+ CALLBACK_DATA(header_value);
1477
+ break;
1478
+ }
1479
+
1480
+ if (ch == LF) {
1481
+ parser->state = s_header_almost_done;
1482
+ CALLBACK_DATA_NOADVANCE(header_value);
1483
+ goto reexecute_byte;
1484
+ }
1485
+
1486
+ c = LOWER(ch);
1487
+
1488
+ switch (parser->header_state) {
1489
+ case h_general:
1490
+ break;
1491
+
1492
+ case h_connection:
1493
+ case h_transfer_encoding:
1494
+ assert(0 && "Shouldn't get here.");
1495
+ break;
1496
+
1497
+ case h_content_length:
1498
+ {
1499
+ uint64_t t;
1500
+
1501
+ if (ch == ' ') break;
1502
+
1503
+ if (!IS_NUM(ch)) {
1504
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1505
+ goto error;
1506
+ }
1507
+
1508
+ t = parser->content_length;
1509
+ t *= 10;
1510
+ t += ch - '0';
1511
+
1512
+ /* Overflow? Test against a conservative limit for simplicity. */
1513
+ if ((ULLONG_MAX - 10) / 10 < parser->content_length) {
1514
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1515
+ goto error;
1516
+ }
1517
+
1518
+ parser->content_length = t;
1519
+ break;
1520
+ }
1521
+
1522
+ /* Transfer-Encoding: chunked */
1523
+ case h_matching_transfer_encoding_chunked:
1524
+ parser->index++;
1525
+ if (parser->index > sizeof(CHUNKED)-1
1526
+ || c != CHUNKED[parser->index]) {
1527
+ parser->header_state = h_general;
1528
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1529
+ parser->header_state = h_transfer_encoding_chunked;
1530
+ }
1531
+ break;
1532
+
1533
+ /* looking for 'Connection: keep-alive' */
1534
+ case h_matching_connection_keep_alive:
1535
+ parser->index++;
1536
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1537
+ || c != KEEP_ALIVE[parser->index]) {
1538
+ parser->header_state = h_general;
1539
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1540
+ parser->header_state = h_connection_keep_alive;
1541
+ }
1542
+ break;
1543
+
1544
+ /* looking for 'Connection: close' */
1545
+ case h_matching_connection_close:
1546
+ parser->index++;
1547
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1548
+ parser->header_state = h_general;
1549
+ } else if (parser->index == sizeof(CLOSE)-2) {
1550
+ parser->header_state = h_connection_close;
1551
+ }
1552
+ break;
1553
+
1554
+ case h_transfer_encoding_chunked:
1555
+ case h_connection_keep_alive:
1556
+ case h_connection_close:
1557
+ if (ch != ' ') parser->header_state = h_general;
1558
+ break;
1559
+
1560
+ default:
1561
+ parser->state = s_header_value;
1562
+ parser->header_state = h_general;
1563
+ break;
1564
+ }
1565
+ break;
1566
+ }
1567
+
1568
+ case s_header_almost_done:
1569
+ {
1570
+ STRICT_CHECK(ch != LF);
1571
+
1572
+ parser->state = s_header_value_lws;
1573
+
1574
+ switch (parser->header_state) {
1575
+ case h_connection_keep_alive:
1576
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1577
+ break;
1578
+ case h_connection_close:
1579
+ parser->flags |= F_CONNECTION_CLOSE;
1580
+ break;
1581
+ case h_transfer_encoding_chunked:
1582
+ parser->flags |= F_CHUNKED;
1583
+ break;
1584
+ default:
1585
+ break;
1586
+ }
1587
+
1588
+ break;
1589
+ }
1590
+
1591
+ case s_header_value_lws:
1592
+ {
1593
+ if (ch == ' ' || ch == '\t')
1594
+ parser->state = s_header_value_start;
1595
+ else
1596
+ {
1597
+ parser->state = s_header_field_start;
1598
+ goto reexecute_byte;
1599
+ }
1600
+ break;
1601
+ }
1602
+
1603
+ case s_headers_almost_done:
1604
+ {
1605
+ STRICT_CHECK(ch != LF);
1606
+
1607
+ if (parser->flags & F_TRAILING) {
1608
+ /* End of a chunked request */
1609
+ parser->state = NEW_MESSAGE();
1610
+ CALLBACK_NOTIFY(message_complete);
1611
+ break;
1612
+ }
1613
+
1614
+ parser->state = s_headers_done;
1615
+
1616
+ /* Set this here so that on_headers_complete() callbacks can see it */
1617
+ parser->upgrade =
1618
+ (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1619
+
1620
+ /* Here we call the headers_complete callback. This is somewhat
1621
+ * different than other callbacks because if the user returns 1, we
1622
+ * will interpret that as saying that this message has no body. This
1623
+ * is needed for the annoying case of recieving a response to a HEAD
1624
+ * request.
1625
+ *
1626
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1627
+ * we have to simulate it by handling a change in errno below.
1628
+ */
1629
+ if (settings->on_headers_complete) {
1630
+ switch (settings->on_headers_complete(parser)) {
1631
+ case 0:
1632
+ break;
1633
+
1634
+ case 1:
1635
+ parser->flags |= F_SKIPBODY;
1636
+ break;
1637
+
1638
+ default:
1639
+ SET_ERRNO(HPE_CB_headers_complete);
1640
+ return p - data; /* Error */
1641
+ }
1642
+ }
1643
+
1644
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1645
+ return p - data;
1646
+ }
1647
+
1648
+ goto reexecute_byte;
1649
+ }
1650
+
1651
+ case s_headers_done:
1652
+ {
1653
+ STRICT_CHECK(ch != LF);
1654
+
1655
+ parser->nread = 0;
1656
+
1657
+ /* Exit, the rest of the connect is in a different protocol. */
1658
+ if (parser->upgrade) {
1659
+ parser->state = NEW_MESSAGE();
1660
+ CALLBACK_NOTIFY(message_complete);
1661
+ return (p - data) + 1;
1662
+ }
1663
+
1664
+ if (parser->flags & F_SKIPBODY) {
1665
+ parser->state = NEW_MESSAGE();
1666
+ CALLBACK_NOTIFY(message_complete);
1667
+ } else if (parser->flags & F_CHUNKED) {
1668
+ /* chunked encoding - ignore Content-Length header */
1669
+ parser->state = s_chunk_size_start;
1670
+ } else {
1671
+ if (parser->content_length == 0) {
1672
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1673
+ parser->state = NEW_MESSAGE();
1674
+ CALLBACK_NOTIFY(message_complete);
1675
+ } else if (parser->content_length != ULLONG_MAX) {
1676
+ /* Content-Length header given and non-zero */
1677
+ parser->state = s_body_identity;
1678
+ } else {
1679
+ if (parser->type == HTTP_REQUEST ||
1680
+ !http_message_needs_eof(parser)) {
1681
+ /* Assume content-length 0 - read the next */
1682
+ parser->state = NEW_MESSAGE();
1683
+ CALLBACK_NOTIFY(message_complete);
1684
+ } else {
1685
+ /* Read body until EOF */
1686
+ parser->state = s_body_identity_eof;
1687
+ }
1688
+ }
1689
+ }
1690
+
1691
+ break;
1692
+ }
1693
+
1694
+ case s_body_identity:
1695
+ {
1696
+ uint64_t to_read = MIN(parser->content_length,
1697
+ (uint64_t) ((data + len) - p));
1698
+
1699
+ assert(parser->content_length != 0
1700
+ && parser->content_length != ULLONG_MAX);
1701
+
1702
+ /* The difference between advancing content_length and p is because
1703
+ * the latter will automaticaly advance on the next loop iteration.
1704
+ * Further, if content_length ends up at 0, we want to see the last
1705
+ * byte again for our message complete callback.
1706
+ */
1707
+ MARK(body);
1708
+ parser->content_length -= to_read;
1709
+ p += to_read - 1;
1710
+
1711
+ if (parser->content_length == 0) {
1712
+ parser->state = s_message_done;
1713
+
1714
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1715
+ *
1716
+ * The alternative to doing this is to wait for the next byte to
1717
+ * trigger the data callback, just as in every other case. The
1718
+ * problem with this is that this makes it difficult for the test
1719
+ * harness to distinguish between complete-on-EOF and
1720
+ * complete-on-length. It's not clear that this distinction is
1721
+ * important for applications, but let's keep it for now.
1722
+ */
1723
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1724
+ goto reexecute_byte;
1725
+ }
1726
+
1727
+ break;
1728
+ }
1729
+
1730
+ /* read until EOF */
1731
+ case s_body_identity_eof:
1732
+ MARK(body);
1733
+ p = data + len - 1;
1734
+
1735
+ break;
1736
+
1737
+ case s_message_done:
1738
+ parser->state = NEW_MESSAGE();
1739
+ CALLBACK_NOTIFY(message_complete);
1740
+ break;
1741
+
1742
+ case s_chunk_size_start:
1743
+ {
1744
+ assert(parser->nread == 1);
1745
+ assert(parser->flags & F_CHUNKED);
1746
+
1747
+ unhex_val = unhex[(unsigned char)ch];
1748
+ if (unhex_val == -1) {
1749
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1750
+ goto error;
1751
+ }
1752
+
1753
+ parser->content_length = unhex_val;
1754
+ parser->state = s_chunk_size;
1755
+ break;
1756
+ }
1757
+
1758
+ case s_chunk_size:
1759
+ {
1760
+ uint64_t t;
1761
+
1762
+ assert(parser->flags & F_CHUNKED);
1763
+
1764
+ if (ch == CR) {
1765
+ parser->state = s_chunk_size_almost_done;
1766
+ break;
1767
+ }
1768
+
1769
+ unhex_val = unhex[(unsigned char)ch];
1770
+
1771
+ if (unhex_val == -1) {
1772
+ if (ch == ';' || ch == ' ') {
1773
+ parser->state = s_chunk_parameters;
1774
+ break;
1775
+ }
1776
+
1777
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1778
+ goto error;
1779
+ }
1780
+
1781
+ t = parser->content_length;
1782
+ t *= 16;
1783
+ t += unhex_val;
1784
+
1785
+ /* Overflow? Test against a conservative limit for simplicity. */
1786
+ if ((ULLONG_MAX - 16) / 16 < parser->content_length) {
1787
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1788
+ goto error;
1789
+ }
1790
+
1791
+ parser->content_length = t;
1792
+ break;
1793
+ }
1794
+
1795
+ case s_chunk_parameters:
1796
+ {
1797
+ assert(parser->flags & F_CHUNKED);
1798
+ /* just ignore this shit. TODO check for overflow */
1799
+ if (ch == CR) {
1800
+ parser->state = s_chunk_size_almost_done;
1801
+ break;
1802
+ }
1803
+ break;
1804
+ }
1805
+
1806
+ case s_chunk_size_almost_done:
1807
+ {
1808
+ assert(parser->flags & F_CHUNKED);
1809
+ STRICT_CHECK(ch != LF);
1810
+
1811
+ parser->nread = 0;
1812
+
1813
+ if (parser->content_length == 0) {
1814
+ parser->flags |= F_TRAILING;
1815
+ parser->state = s_header_field_start;
1816
+ } else {
1817
+ parser->state = s_chunk_data;
1818
+ }
1819
+ break;
1820
+ }
1821
+
1822
+ case s_chunk_data:
1823
+ {
1824
+ uint64_t to_read = MIN(parser->content_length,
1825
+ (uint64_t) ((data + len) - p));
1826
+
1827
+ assert(parser->flags & F_CHUNKED);
1828
+ assert(parser->content_length != 0
1829
+ && parser->content_length != ULLONG_MAX);
1830
+
1831
+ /* See the explanation in s_body_identity for why the content
1832
+ * length and data pointers are managed this way.
1833
+ */
1834
+ MARK(body);
1835
+ parser->content_length -= to_read;
1836
+ p += to_read - 1;
1837
+
1838
+ if (parser->content_length == 0) {
1839
+ parser->state = s_chunk_data_almost_done;
1840
+ }
1841
+
1842
+ break;
1843
+ }
1844
+
1845
+ case s_chunk_data_almost_done:
1846
+ assert(parser->flags & F_CHUNKED);
1847
+ assert(parser->content_length == 0);
1848
+ STRICT_CHECK(ch != CR);
1849
+ parser->state = s_chunk_data_done;
1850
+ CALLBACK_DATA(body);
1851
+ break;
1852
+
1853
+ case s_chunk_data_done:
1854
+ assert(parser->flags & F_CHUNKED);
1855
+ STRICT_CHECK(ch != LF);
1856
+ parser->nread = 0;
1857
+ parser->state = s_chunk_size_start;
1858
+ break;
1859
+
1860
+ default:
1861
+ assert(0 && "unhandled state");
1862
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1863
+ goto error;
1864
+ }
1865
+ }
1866
+
1867
+ /* Run callbacks for any marks that we have leftover after we ran our of
1868
+ * bytes. There should be at most one of these set, so it's OK to invoke
1869
+ * them in series (unset marks will not result in callbacks).
1870
+ *
1871
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
1872
+ * overflowed 'data' and this allows us to correct for the off-by-one that
1873
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1874
+ * value that's in-bounds).
1875
+ */
1876
+
1877
+ assert(((header_field_mark ? 1 : 0) +
1878
+ (header_value_mark ? 1 : 0) +
1879
+ (url_mark ? 1 : 0) +
1880
+ (body_mark ? 1 : 0) +
1881
+ (status_mark ? 1 : 0)) <= 1);
1882
+
1883
+ CALLBACK_DATA_NOADVANCE(header_field);
1884
+ CALLBACK_DATA_NOADVANCE(header_value);
1885
+ CALLBACK_DATA_NOADVANCE(url);
1886
+ CALLBACK_DATA_NOADVANCE(body);
1887
+ CALLBACK_DATA_NOADVANCE(status);
1888
+
1889
+ return len;
1890
+
1891
+ error:
1892
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1893
+ SET_ERRNO(HPE_UNKNOWN);
1894
+ }
1895
+
1896
+ return (p - data);
1897
+ }
1898
+
1899
+
1900
+ /* Does the parser need to see an EOF to find the end of the message? */
1901
+ int
1902
+ http_message_needs_eof (const http_parser *parser)
1903
+ {
1904
+ if (parser->type == HTTP_REQUEST) {
1905
+ return 0;
1906
+ }
1907
+
1908
+ /* See RFC 2616 section 4.4 */
1909
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1910
+ parser->status_code == 204 || /* No Content */
1911
+ parser->status_code == 304 || /* Not Modified */
1912
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1913
+ return 0;
1914
+ }
1915
+
1916
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1917
+ return 0;
1918
+ }
1919
+
1920
+ return 1;
1921
+ }
1922
+
1923
+
1924
+ int
1925
+ http_should_keep_alive (const http_parser *parser)
1926
+ {
1927
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1928
+ /* HTTP/1.1 */
1929
+ if (parser->flags & F_CONNECTION_CLOSE) {
1930
+ return 0;
1931
+ }
1932
+ } else {
1933
+ /* HTTP/1.0 or earlier */
1934
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1935
+ return 0;
1936
+ }
1937
+ }
1938
+
1939
+ return !http_message_needs_eof(parser);
1940
+ }
1941
+
1942
+
1943
+ const char *
1944
+ http_method_str (enum http_method m)
1945
+ {
1946
+ return ELEM_AT(method_strings, m, "<unknown>");
1947
+ }
1948
+
1949
+
1950
+ void
1951
+ http_parser_init (http_parser *parser, enum http_parser_type t)
1952
+ {
1953
+ void *data = parser->data; /* preserve application data */
1954
+ memset(parser, 0, sizeof(*parser));
1955
+ parser->data = data;
1956
+ parser->type = t;
1957
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1958
+ parser->http_errno = HPE_OK;
1959
+ }
1960
+
1961
+ const char *
1962
+ http_errno_name(enum http_errno err) {
1963
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1964
+ return http_strerror_tab[err].name;
1965
+ }
1966
+
1967
+ const char *
1968
+ http_errno_description(enum http_errno err) {
1969
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1970
+ return http_strerror_tab[err].description;
1971
+ }
1972
+
1973
+ static enum http_host_state
1974
+ http_parse_host_char(enum http_host_state s, const char ch) {
1975
+ switch(s) {
1976
+ case s_http_userinfo:
1977
+ case s_http_userinfo_start:
1978
+ if (ch == '@') {
1979
+ return s_http_host_start;
1980
+ }
1981
+
1982
+ if (IS_USERINFO_CHAR(ch)) {
1983
+ return s_http_userinfo;
1984
+ }
1985
+ break;
1986
+
1987
+ case s_http_host_start:
1988
+ if (ch == '[') {
1989
+ return s_http_host_v6_start;
1990
+ }
1991
+
1992
+ if (IS_HOST_CHAR(ch)) {
1993
+ return s_http_host;
1994
+ }
1995
+
1996
+ break;
1997
+
1998
+ case s_http_host:
1999
+ if (IS_HOST_CHAR(ch)) {
2000
+ return s_http_host;
2001
+ }
2002
+
2003
+ /* FALLTHROUGH */
2004
+ case s_http_host_v6_end:
2005
+ if (ch == ':') {
2006
+ return s_http_host_port_start;
2007
+ }
2008
+
2009
+ break;
2010
+
2011
+ case s_http_host_v6:
2012
+ if (ch == ']') {
2013
+ return s_http_host_v6_end;
2014
+ }
2015
+
2016
+ /* FALLTHROUGH */
2017
+ case s_http_host_v6_start:
2018
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
2019
+ return s_http_host_v6;
2020
+ }
2021
+
2022
+ break;
2023
+
2024
+ case s_http_host_port:
2025
+ case s_http_host_port_start:
2026
+ if (IS_NUM(ch)) {
2027
+ return s_http_host_port;
2028
+ }
2029
+
2030
+ break;
2031
+
2032
+ default:
2033
+ break;
2034
+ }
2035
+ return s_http_host_dead;
2036
+ }
2037
+
2038
+ static int
2039
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2040
+ enum http_host_state s;
2041
+
2042
+ const char *p;
2043
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2044
+
2045
+ u->field_data[UF_HOST].len = 0;
2046
+
2047
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2048
+
2049
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2050
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2051
+
2052
+ if (new_s == s_http_host_dead) {
2053
+ return 1;
2054
+ }
2055
+
2056
+ switch(new_s) {
2057
+ case s_http_host:
2058
+ if (s != s_http_host) {
2059
+ u->field_data[UF_HOST].off = p - buf;
2060
+ }
2061
+ u->field_data[UF_HOST].len++;
2062
+ break;
2063
+
2064
+ case s_http_host_v6:
2065
+ if (s != s_http_host_v6) {
2066
+ u->field_data[UF_HOST].off = p - buf;
2067
+ }
2068
+ u->field_data[UF_HOST].len++;
2069
+ break;
2070
+
2071
+ case s_http_host_port:
2072
+ if (s != s_http_host_port) {
2073
+ u->field_data[UF_PORT].off = p - buf;
2074
+ u->field_data[UF_PORT].len = 0;
2075
+ u->field_set |= (1 << UF_PORT);
2076
+ }
2077
+ u->field_data[UF_PORT].len++;
2078
+ break;
2079
+
2080
+ case s_http_userinfo:
2081
+ if (s != s_http_userinfo) {
2082
+ u->field_data[UF_USERINFO].off = p - buf ;
2083
+ u->field_data[UF_USERINFO].len = 0;
2084
+ u->field_set |= (1 << UF_USERINFO);
2085
+ }
2086
+ u->field_data[UF_USERINFO].len++;
2087
+ break;
2088
+
2089
+ default:
2090
+ break;
2091
+ }
2092
+ s = new_s;
2093
+ }
2094
+
2095
+ /* Make sure we don't end somewhere unexpected */
2096
+ switch (s) {
2097
+ case s_http_host_start:
2098
+ case s_http_host_v6_start:
2099
+ case s_http_host_v6:
2100
+ case s_http_host_port_start:
2101
+ case s_http_userinfo:
2102
+ case s_http_userinfo_start:
2103
+ return 1;
2104
+ default:
2105
+ break;
2106
+ }
2107
+
2108
+ return 0;
2109
+ }
2110
+
2111
+ int
2112
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2113
+ struct http_parser_url *u)
2114
+ {
2115
+ enum state s;
2116
+ const char *p;
2117
+ enum http_parser_url_fields uf, old_uf;
2118
+ int found_at = 0;
2119
+
2120
+ u->port = u->field_set = 0;
2121
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2122
+ uf = old_uf = UF_MAX;
2123
+
2124
+ for (p = buf; p < buf + buflen; p++) {
2125
+ s = parse_url_char(s, *p);
2126
+
2127
+ /* Figure out the next field that we're operating on */
2128
+ switch (s) {
2129
+ case s_dead:
2130
+ return 1;
2131
+
2132
+ /* Skip delimeters */
2133
+ case s_req_schema_slash:
2134
+ case s_req_schema_slash_slash:
2135
+ case s_req_server_start:
2136
+ case s_req_query_string_start:
2137
+ case s_req_fragment_start:
2138
+ continue;
2139
+
2140
+ case s_req_schema:
2141
+ uf = UF_SCHEMA;
2142
+ break;
2143
+
2144
+ case s_req_server_with_at:
2145
+ found_at = 1;
2146
+
2147
+ /* FALLTROUGH */
2148
+ case s_req_server:
2149
+ uf = UF_HOST;
2150
+ break;
2151
+
2152
+ case s_req_path:
2153
+ uf = UF_PATH;
2154
+ break;
2155
+
2156
+ case s_req_query_string:
2157
+ uf = UF_QUERY;
2158
+ break;
2159
+
2160
+ case s_req_fragment:
2161
+ uf = UF_FRAGMENT;
2162
+ break;
2163
+
2164
+ default:
2165
+ assert(!"Unexpected state");
2166
+ return 1;
2167
+ }
2168
+
2169
+ /* Nothing's changed; soldier on */
2170
+ if (uf == old_uf) {
2171
+ u->field_data[uf].len++;
2172
+ continue;
2173
+ }
2174
+
2175
+ u->field_data[uf].off = p - buf;
2176
+ u->field_data[uf].len = 1;
2177
+
2178
+ u->field_set |= (1 << uf);
2179
+ old_uf = uf;
2180
+ }
2181
+
2182
+ /* host must be present if there is a schema */
2183
+ /* parsing http:///toto will fail */
2184
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2185
+ if (http_parse_host(buf, u, found_at) != 0) {
2186
+ return 1;
2187
+ }
2188
+ }
2189
+
2190
+ /* CONNECT requests can only contain "hostname:port" */
2191
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2192
+ return 1;
2193
+ }
2194
+
2195
+ if (u->field_set & (1 << UF_PORT)) {
2196
+ /* Don't bother with endp; we've already validated the string */
2197
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2198
+
2199
+ /* Ports have a max value of 2^16 */
2200
+ if (v > 0xffff) {
2201
+ return 1;
2202
+ }
2203
+
2204
+ u->port = (uint16_t) v;
2205
+ }
2206
+
2207
+ return 0;
2208
+ }
2209
+
2210
+ void
2211
+ http_parser_pause(http_parser *parser, int paused) {
2212
+ /* Users should only be pausing/unpausing a parser that is not in an error
2213
+ * state. In non-debug builds, there's not much that we can do about this
2214
+ * other than ignore it.
2215
+ */
2216
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2217
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2218
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2219
+ } else {
2220
+ assert(0 && "Attempting to pause parser in error state");
2221
+ }
2222
+ }
2223
+
2224
+ int
2225
+ http_body_is_final(const struct http_parser *parser) {
2226
+ return parser->state == s_message_done;
2227
+ }
2228
+
2229
+ unsigned long
2230
+ http_parser_version(void) {
2231
+ return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2232
+ HTTP_PARSER_VERSION_MINOR * 0x00100 |
2233
+ HTTP_PARSER_VERSION_PATCH * 0x00001;
2234
+ }