http-parser 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c7c15574adec9aefd70f9ad96c542b70ce3b3727
4
- data.tar.gz: ebcfbcfbe13fa5e1155e4c0a41f242ea96b1e8a7
3
+ metadata.gz: 8c93598e31fa92f5acb831a75e7514f21180af80
4
+ data.tar.gz: 935ef47585bf17f3a1b4a107d07774c60e300f6b
5
5
  SHA512:
6
- metadata.gz: 0e3a987b169359afe6229340fb4c28e74c411beb302f767bb45f836cc8579598c1d9b89286e9aeb4bf3c114d4fed9b87b5952510c36da676b063e4a60007ee4c
7
- data.tar.gz: 6a531fd9df9c4f011a7163eb1e87dc260fa0a9ee6c201c9e8156f7b86db0b636f579ca3e094535d16a1bc0541c909f276026811de2ee9f5176177c17cb19f05d
6
+ metadata.gz: 271c6b54982d6854b00873a2d217dba0551adc69a920fb4748a6f22d9d43573d1cb8f96e3b7b06942db510ac4d27906ae0e9cde60a5ee78f08f47524fca83643
7
+ data.tar.gz: 159035cb10bc1b947b29cdb3ee27f6e1ae1570e7cf4df42544c1cbf7a81065a2e32e7390deae2d686d491bc6b7301e216446c97f43f69f711f7de61bb1afddd1
data/LICENSE CHANGED
@@ -1,20 +1,20 @@
1
- The MIT License (MIT)
2
-
3
- Copyright (c) 2013 CoTag Media
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy of
6
- this software and associated documentation files (the "Software"), to deal in
7
- the Software without restriction, including without limitation the rights to
8
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
- the Software, and to permit persons to whom the Software is furnished to do so,
10
- subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
- FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
- COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
- IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 CoTag Media
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -1,70 +1,70 @@
1
- # http-parser
2
-
3
- Ruby FFI bindings to [http-parser](https://github.com/joyent/http-parser) [![Build Status](https://travis-ci.org/cotag/http-parser.png)](https://travis-ci.org/cotag/http-parser)
4
-
5
- ## Install
6
-
7
- ```shell
8
- gem install http-parser
9
- ```
10
- This gem will compile a local copy of http-parser
11
-
12
-
13
- ## Usage
14
-
15
- ```ruby
16
- require 'rubygems'
17
- require 'http-parser'
18
-
19
- #
20
- # Create a shared parser
21
- #
22
- parser = HttpParser::Parser.new do |parser|
23
- parser.on_message_begin do |inst|
24
- puts "message begin"
25
- end
26
-
27
- parser.on_message_complete do |inst|
28
- puts "message end"
29
- end
30
-
31
- parser.on_url do |inst, data|
32
- puts "url: #{data}"
33
- end
34
-
35
- parser.on_header_field do |inst, data|
36
- puts "field: #{data}"
37
- end
38
-
39
- parser.on_header_value do |inst, data|
40
- puts "value: #{data}"
41
- end
42
- end
43
-
44
- #
45
- # Create state objects to track requests through the parser
46
- #
47
- request = HttpParser::Parser.new_instance do |inst|
48
- inst.type = :request
49
- end
50
-
51
- #
52
- # Parse requests
53
- #
54
- parser.parse request, "GET /foo HTTP/1.1\r\n"
55
- sleep 3
56
- parser.parse request, "Host: example.com\r\n"
57
- sleep 3
58
- parser.parse request, "\r\n"
59
-
60
- #
61
- # Re-use the memory for another request
62
- #
63
- request.reset!
64
- ```
65
-
66
- ## Acknowledgements
67
-
68
- * https://github.com/joyent/http-parser#readme
69
- * https://github.com/postmodern/ffi-http-parser#readme
1
+ # http-parser
2
+
3
+ Ruby FFI bindings to [http-parser](https://github.com/joyent/http-parser) [![Build Status](https://travis-ci.org/cotag/http-parser.png)](https://travis-ci.org/cotag/http-parser)
4
+
5
+ ## Install
6
+
7
+ ```shell
8
+ gem install http-parser
9
+ ```
10
+ This gem will compile a local copy of http-parser
11
+
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ require 'rubygems'
17
+ require 'http-parser'
18
+
19
+ #
20
+ # Create a shared parser
21
+ #
22
+ parser = HttpParser::Parser.new do |parser|
23
+ parser.on_message_begin do |inst|
24
+ puts "message begin"
25
+ end
26
+
27
+ parser.on_message_complete do |inst|
28
+ puts "message end"
29
+ end
30
+
31
+ parser.on_url do |inst, data|
32
+ puts "url: #{data}"
33
+ end
34
+
35
+ parser.on_header_field do |inst, data|
36
+ puts "field: #{data}"
37
+ end
38
+
39
+ parser.on_header_value do |inst, data|
40
+ puts "value: #{data}"
41
+ end
42
+ end
43
+
44
+ #
45
+ # Create state objects to track requests through the parser
46
+ #
47
+ request = HttpParser::Parser.new_instance do |inst|
48
+ inst.type = :request
49
+ end
50
+
51
+ #
52
+ # Parse requests
53
+ #
54
+ parser.parse request, "GET /foo HTTP/1.1\r\n"
55
+ sleep 3
56
+ parser.parse request, "Host: example.com\r\n"
57
+ sleep 3
58
+ parser.parse request, "\r\n"
59
+
60
+ #
61
+ # Re-use the memory for another request
62
+ #
63
+ request.reset!
64
+ ```
65
+
66
+ ## Acknowledgements
67
+
68
+ * https://github.com/joyent/http-parser#readme
69
+ * https://github.com/postmodern/ffi-http-parser#readme
70
70
  * https://github.com/deepfryed/http-parser-lite#readme
data/Rakefile CHANGED
@@ -1,19 +1,19 @@
1
- require 'rubygems'
2
- require 'rake'
3
- require 'rspec/core/rake_task'
4
-
5
- task :default => [:compile, :test]
6
-
7
- task :compile do
8
- protect = ['http_parser.c', 'http_parser.h']
9
- Dir["ext/http-parser/**/*"].each do |file|
10
- begin
11
- next if protect.include? File.basename(file)
12
- FileUtils.rm file
13
- rescue
14
- end
15
- end
16
- system 'cd ext && rake'
17
- end
18
-
19
- RSpec::Core::RakeTask.new(:test)
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rspec/core/rake_task'
4
+
5
+ task :default => [:compile, :test]
6
+
7
+ task :compile do
8
+ protect = ['http_parser.c', 'http_parser.h']
9
+ Dir["ext/http-parser/**/*"].each do |file|
10
+ begin
11
+ next if protect.include? File.basename(file)
12
+ FileUtils.rm file
13
+ rescue
14
+ end
15
+ end
16
+ system 'cd ext && rake'
17
+ end
18
+
19
+ RSpec::Core::RakeTask.new(:test)
@@ -1,8 +1,8 @@
1
- require 'ffi-compiler/compile_task'
2
-
3
- FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
- t.cflags << "-Wall -Wextra -O3"
5
- t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
- t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
- t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
8
- end
1
+ require 'ffi-compiler/compile_task'
2
+
3
+ FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
+ t.cflags << "-Wall -Wextra -O3"
5
+ t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
+ t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
+ t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
8
+ end
@@ -1,2234 +1,2234 @@
1
- /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
- *
3
- * Additional changes are licensed under the same terms as NGINX and
4
- * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
- *
6
- * Permission is hereby granted, free of charge, to any person obtaining a copy
7
- * of this software and associated documentation files (the "Software"), to
8
- * deal in the Software without restriction, including without limitation the
9
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
- * sell copies of the Software, and to permit persons to whom the Software is
11
- * furnished to do so, subject to the following conditions:
12
- *
13
- * The above copyright notice and this permission notice shall be included in
14
- * all copies or substantial portions of the Software.
15
- *
16
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
- * IN THE SOFTWARE.
23
- */
24
- #include "http_parser.h"
25
- #include <assert.h>
26
- #include <stddef.h>
27
- #include <ctype.h>
28
- #include <stdlib.h>
29
- #include <string.h>
30
- #include <limits.h>
31
-
32
- #ifndef ULLONG_MAX
33
- # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
- #endif
35
-
36
- #ifndef MIN
37
- # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
- #endif
39
-
40
- #ifndef ARRAY_SIZE
41
- # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
- #endif
43
-
44
- #ifndef BIT_AT
45
- # define BIT_AT(a, i) \
46
- (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
- (1 << ((unsigned int) (i) & 7))))
48
- #endif
49
-
50
- #ifndef ELEM_AT
51
- # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
- #endif
53
-
54
- #define SET_ERRNO(e) \
55
- do { \
56
- parser->http_errno = (e); \
57
- } while(0)
58
-
59
-
60
- /* Run the notify callback FOR, returning ER if it fails */
61
- #define CALLBACK_NOTIFY_(FOR, ER) \
62
- do { \
63
- assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
- \
65
- if (settings->on_##FOR) { \
66
- if (0 != settings->on_##FOR(parser)) { \
67
- SET_ERRNO(HPE_CB_##FOR); \
68
- } \
69
- \
70
- /* We either errored above or got paused; get out */ \
71
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
72
- return (ER); \
73
- } \
74
- } \
75
- } while (0)
76
-
77
- /* Run the notify callback FOR and consume the current byte */
78
- #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
79
-
80
- /* Run the notify callback FOR and don't consume the current byte */
81
- #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
82
-
83
- /* Run data callback FOR with LEN bytes, returning ER if it fails */
84
- #define CALLBACK_DATA_(FOR, LEN, ER) \
85
- do { \
86
- assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
- \
88
- if (FOR##_mark) { \
89
- if (settings->on_##FOR) { \
90
- if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
91
- SET_ERRNO(HPE_CB_##FOR); \
92
- } \
93
- \
94
- /* We either errored above or got paused; get out */ \
95
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
96
- return (ER); \
97
- } \
98
- } \
99
- FOR##_mark = NULL; \
100
- } \
101
- } while (0)
102
-
103
- /* Run the data callback FOR and consume the current byte */
104
- #define CALLBACK_DATA(FOR) \
105
- CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
106
-
107
- /* Run the data callback FOR and don't consume the current byte */
108
- #define CALLBACK_DATA_NOADVANCE(FOR) \
109
- CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
110
-
111
- /* Set the mark FOR; non-destructive if mark is already set */
112
- #define MARK(FOR) \
113
- do { \
114
- if (!FOR##_mark) { \
115
- FOR##_mark = p; \
116
- } \
117
- } while (0)
118
-
119
-
120
- #define PROXY_CONNECTION "proxy-connection"
121
- #define CONNECTION "connection"
122
- #define CONTENT_LENGTH "content-length"
123
- #define TRANSFER_ENCODING "transfer-encoding"
124
- #define UPGRADE "upgrade"
125
- #define CHUNKED "chunked"
126
- #define KEEP_ALIVE "keep-alive"
127
- #define CLOSE "close"
128
-
129
-
130
- static const char *method_strings[] =
131
- {
132
- #define XX(num, name, string) #string,
133
- HTTP_METHOD_MAP(XX)
134
- #undef XX
135
- };
136
-
137
-
138
- /* Tokens as defined by rfc 2616. Also lowercases them.
139
- * token = 1*<any CHAR except CTLs or separators>
140
- * separators = "(" | ")" | "<" | ">" | "@"
141
- * | "," | ";" | ":" | "\" | <">
142
- * | "/" | "[" | "]" | "?" | "="
143
- * | "{" | "}" | SP | HT
144
- */
145
- static const char tokens[256] = {
146
- /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
147
- 0, 0, 0, 0, 0, 0, 0, 0,
148
- /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
149
- 0, 0, 0, 0, 0, 0, 0, 0,
150
- /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
151
- 0, 0, 0, 0, 0, 0, 0, 0,
152
- /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153
- 0, 0, 0, 0, 0, 0, 0, 0,
154
- /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155
- 0, '!', 0, '#', '$', '%', '&', '\'',
156
- /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157
- 0, 0, '*', '+', 0, '-', '.', 0,
158
- /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
159
- '0', '1', '2', '3', '4', '5', '6', '7',
160
- /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
161
- '8', '9', 0, 0, 0, 0, 0, 0,
162
- /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
163
- 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
164
- /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
165
- 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
166
- /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
167
- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
168
- /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
169
- 'x', 'y', 'z', 0, 0, 0, '^', '_',
170
- /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
171
- '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
172
- /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
173
- 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
174
- /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
175
- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
176
- /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
177
- 'x', 'y', 'z', 0, '|', 0, '~', 0 };
178
-
179
-
180
- static const int8_t unhex[256] =
181
- {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
182
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
183
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184
- , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
185
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
186
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
187
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
188
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
189
- };
190
-
191
-
192
- #if HTTP_PARSER_STRICT
193
- # define T(v) 0
194
- #else
195
- # define T(v) v
196
- #endif
197
-
198
-
199
- static const uint8_t normal_url_char[32] = {
200
- /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
201
- 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
202
- /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
203
- 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
204
- /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
205
- 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
206
- /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
207
- 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
208
- /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
209
- 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
210
- /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
211
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
212
- /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
213
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
214
- /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
215
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
216
- /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
217
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
218
- /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
219
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
220
- /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
221
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
222
- /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
223
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
224
- /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
225
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
226
- /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
227
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
228
- /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
229
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
230
- /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
231
- 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
232
-
233
- #undef T
234
-
235
- enum state
236
- { s_dead = 1 /* important that this is > 0 */
237
-
238
- , s_start_req_or_res
239
- , s_res_or_resp_H
240
- , s_start_res
241
- , s_res_H
242
- , s_res_HT
243
- , s_res_HTT
244
- , s_res_HTTP
245
- , s_res_first_http_major
246
- , s_res_http_major
247
- , s_res_first_http_minor
248
- , s_res_http_minor
249
- , s_res_first_status_code
250
- , s_res_status_code
251
- , s_res_status_start
252
- , s_res_status
253
- , s_res_line_almost_done
254
-
255
- , s_start_req
256
-
257
- , s_req_method
258
- , s_req_spaces_before_url
259
- , s_req_schema
260
- , s_req_schema_slash
261
- , s_req_schema_slash_slash
262
- , s_req_server_start
263
- , s_req_server
264
- , s_req_server_with_at
265
- , s_req_path
266
- , s_req_query_string_start
267
- , s_req_query_string
268
- , s_req_fragment_start
269
- , s_req_fragment
270
- , s_req_http_start
271
- , s_req_http_H
272
- , s_req_http_HT
273
- , s_req_http_HTT
274
- , s_req_http_HTTP
275
- , s_req_first_http_major
276
- , s_req_http_major
277
- , s_req_first_http_minor
278
- , s_req_http_minor
279
- , s_req_line_almost_done
280
-
281
- , s_header_field_start
282
- , s_header_field
283
- , s_header_value_start
284
- , s_header_value
285
- , s_header_value_lws
286
-
287
- , s_header_almost_done
288
-
289
- , s_chunk_size_start
290
- , s_chunk_size
291
- , s_chunk_parameters
292
- , s_chunk_size_almost_done
293
-
294
- , s_headers_almost_done
295
- , s_headers_done
296
-
297
- /* Important: 's_headers_done' must be the last 'header' state. All
298
- * states beyond this must be 'body' states. It is used for overflow
299
- * checking. See the PARSING_HEADER() macro.
300
- */
301
-
302
- , s_chunk_data
303
- , s_chunk_data_almost_done
304
- , s_chunk_data_done
305
-
306
- , s_body_identity
307
- , s_body_identity_eof
308
-
309
- , s_message_done
310
- };
311
-
312
-
313
- #define PARSING_HEADER(state) (state <= s_headers_done)
314
-
315
-
316
- enum header_states
317
- { h_general = 0
318
- , h_C
319
- , h_CO
320
- , h_CON
321
-
322
- , h_matching_connection
323
- , h_matching_proxy_connection
324
- , h_matching_content_length
325
- , h_matching_transfer_encoding
326
- , h_matching_upgrade
327
-
328
- , h_connection
329
- , h_content_length
330
- , h_transfer_encoding
331
- , h_upgrade
332
-
333
- , h_matching_transfer_encoding_chunked
334
- , h_matching_connection_keep_alive
335
- , h_matching_connection_close
336
-
337
- , h_transfer_encoding_chunked
338
- , h_connection_keep_alive
339
- , h_connection_close
340
- };
341
-
342
- enum http_host_state
343
- {
344
- s_http_host_dead = 1
345
- , s_http_userinfo_start
346
- , s_http_userinfo
347
- , s_http_host_start
348
- , s_http_host_v6_start
349
- , s_http_host
350
- , s_http_host_v6
351
- , s_http_host_v6_end
352
- , s_http_host_port_start
353
- , s_http_host_port
354
- };
355
-
356
- /* Macros for character classes; depends on strict-mode */
357
- #define CR '\r'
358
- #define LF '\n'
359
- #define LOWER(c) (unsigned char)(c | 0x20)
360
- #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
361
- #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
362
- #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
363
- #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
364
- #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
365
- (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
366
- (c) == ')')
367
- #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
368
- (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
369
- (c) == '$' || (c) == ',')
370
-
371
- #if HTTP_PARSER_STRICT
372
- #define TOKEN(c) (tokens[(unsigned char)c])
373
- #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
374
- #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
375
- #else
376
- #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
377
- #define IS_URL_CHAR(c) \
378
- (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
379
- #define IS_HOST_CHAR(c) \
380
- (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
381
- #endif
382
-
383
-
384
- #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
385
-
386
-
387
- #if HTTP_PARSER_STRICT
388
- # define STRICT_CHECK(cond) \
389
- do { \
390
- if (cond) { \
391
- SET_ERRNO(HPE_STRICT); \
392
- goto error; \
393
- } \
394
- } while (0)
395
- # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
396
- #else
397
- # define STRICT_CHECK(cond)
398
- # define NEW_MESSAGE() start_state
399
- #endif
400
-
401
-
402
- /* Map errno values to strings for human-readable output */
403
- #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
404
- static struct {
405
- const char *name;
406
- const char *description;
407
- } http_strerror_tab[] = {
408
- HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
409
- };
410
- #undef HTTP_STRERROR_GEN
411
-
412
- int http_message_needs_eof(const http_parser *parser);
413
-
414
- /* Our URL parser.
415
- *
416
- * This is designed to be shared by http_parser_execute() for URL validation,
417
- * hence it has a state transition + byte-for-byte interface. In addition, it
418
- * is meant to be embedded in http_parser_parse_url(), which does the dirty
419
- * work of turning state transitions URL components for its API.
420
- *
421
- * This function should only be invoked with non-space characters. It is
422
- * assumed that the caller cares about (and can detect) the transition between
423
- * URL and non-URL states by looking for these.
424
- */
425
- static enum state
426
- parse_url_char(enum state s, const char ch)
427
- {
428
- if (ch == ' ' || ch == '\r' || ch == '\n') {
429
- return s_dead;
430
- }
431
-
432
- #if HTTP_PARSER_STRICT
433
- if (ch == '\t' || ch == '\f') {
434
- return s_dead;
435
- }
436
- #endif
437
-
438
- switch (s) {
439
- case s_req_spaces_before_url:
440
- /* Proxied requests are followed by scheme of an absolute URI (alpha).
441
- * All methods except CONNECT are followed by '/' or '*'.
442
- */
443
-
444
- if (ch == '/' || ch == '*') {
445
- return s_req_path;
446
- }
447
-
448
- if (IS_ALPHA(ch)) {
449
- return s_req_schema;
450
- }
451
-
452
- break;
453
-
454
- case s_req_schema:
455
- if (IS_ALPHA(ch)) {
456
- return s;
457
- }
458
-
459
- if (ch == ':') {
460
- return s_req_schema_slash;
461
- }
462
-
463
- break;
464
-
465
- case s_req_schema_slash:
466
- if (ch == '/') {
467
- return s_req_schema_slash_slash;
468
- }
469
-
470
- break;
471
-
472
- case s_req_schema_slash_slash:
473
- if (ch == '/') {
474
- return s_req_server_start;
475
- }
476
-
477
- break;
478
-
479
- case s_req_server_with_at:
480
- if (ch == '@') {
481
- return s_dead;
482
- }
483
-
484
- /* FALLTHROUGH */
485
- case s_req_server_start:
486
- case s_req_server:
487
- if (ch == '/') {
488
- return s_req_path;
489
- }
490
-
491
- if (ch == '?') {
492
- return s_req_query_string_start;
493
- }
494
-
495
- if (ch == '@') {
496
- return s_req_server_with_at;
497
- }
498
-
499
- if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
500
- return s_req_server;
501
- }
502
-
503
- break;
504
-
505
- case s_req_path:
506
- if (IS_URL_CHAR(ch)) {
507
- return s;
508
- }
509
-
510
- switch (ch) {
511
- case '?':
512
- return s_req_query_string_start;
513
-
514
- case '#':
515
- return s_req_fragment_start;
516
- }
517
-
518
- break;
519
-
520
- case s_req_query_string_start:
521
- case s_req_query_string:
522
- if (IS_URL_CHAR(ch)) {
523
- return s_req_query_string;
524
- }
525
-
526
- switch (ch) {
527
- case '?':
528
- /* allow extra '?' in query string */
529
- return s_req_query_string;
530
-
531
- case '#':
532
- return s_req_fragment_start;
533
- }
534
-
535
- break;
536
-
537
- case s_req_fragment_start:
538
- if (IS_URL_CHAR(ch)) {
539
- return s_req_fragment;
540
- }
541
-
542
- switch (ch) {
543
- case '?':
544
- return s_req_fragment;
545
-
546
- case '#':
547
- return s;
548
- }
549
-
550
- break;
551
-
552
- case s_req_fragment:
553
- if (IS_URL_CHAR(ch)) {
554
- return s;
555
- }
556
-
557
- switch (ch) {
558
- case '?':
559
- case '#':
560
- return s;
561
- }
562
-
563
- break;
564
-
565
- default:
566
- break;
567
- }
568
-
569
- /* We should never fall out of the switch above unless there's an error */
570
- return s_dead;
571
- }
572
-
573
- size_t http_parser_execute (http_parser *parser,
574
- const http_parser_settings *settings,
575
- const char *data,
576
- size_t len)
577
- {
578
- char c, ch;
579
- int8_t unhex_val;
580
- const char *p = data;
581
- const char *header_field_mark = 0;
582
- const char *header_value_mark = 0;
583
- const char *url_mark = 0;
584
- const char *body_mark = 0;
585
- const char *status_mark = 0;
586
-
587
- /* We're in an error state. Don't bother doing anything. */
588
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
589
- return 0;
590
- }
591
-
592
- if (len == 0) {
593
- switch (parser->state) {
594
- case s_body_identity_eof:
595
- /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
596
- * we got paused.
597
- */
598
- CALLBACK_NOTIFY_NOADVANCE(message_complete);
599
- return 0;
600
-
601
- case s_dead:
602
- case s_start_req_or_res:
603
- case s_start_res:
604
- case s_start_req:
605
- return 0;
606
-
607
- default:
608
- SET_ERRNO(HPE_INVALID_EOF_STATE);
609
- return 1;
610
- }
611
- }
612
-
613
-
614
- if (parser->state == s_header_field)
615
- header_field_mark = data;
616
- if (parser->state == s_header_value)
617
- header_value_mark = data;
618
- switch (parser->state) {
619
- case s_req_path:
620
- case s_req_schema:
621
- case s_req_schema_slash:
622
- case s_req_schema_slash_slash:
623
- case s_req_server_start:
624
- case s_req_server:
625
- case s_req_server_with_at:
626
- case s_req_query_string_start:
627
- case s_req_query_string:
628
- case s_req_fragment_start:
629
- case s_req_fragment:
630
- url_mark = data;
631
- break;
632
- case s_res_status:
633
- status_mark = data;
634
- break;
635
- }
636
-
637
- for (p=data; p != data + len; p++) {
638
- ch = *p;
639
-
640
- if (PARSING_HEADER(parser->state)) {
641
- ++parser->nread;
642
- /* Don't allow the total size of the HTTP headers (including the status
643
- * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
644
- * embedders against denial-of-service attacks where the attacker feeds
645
- * us a never-ending header that the embedder keeps buffering.
646
- *
647
- * This check is arguably the responsibility of embedders but we're doing
648
- * it on the embedder's behalf because most won't bother and this way we
649
- * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
650
- * than any reasonable request or response so this should never affect
651
- * day-to-day operation.
652
- */
653
- if (parser->nread > HTTP_MAX_HEADER_SIZE) {
654
- SET_ERRNO(HPE_HEADER_OVERFLOW);
655
- goto error;
656
- }
657
- }
658
-
659
- reexecute_byte:
660
- switch (parser->state) {
661
-
662
- case s_dead:
663
- /* this state is used after a 'Connection: close' message
664
- * the parser will error out if it reads another message
665
- */
666
- if (ch == CR || ch == LF)
667
- break;
668
-
669
- SET_ERRNO(HPE_CLOSED_CONNECTION);
670
- goto error;
671
-
672
- case s_start_req_or_res:
673
- {
674
- if (ch == CR || ch == LF)
675
- break;
676
- parser->flags = 0;
677
- parser->content_length = ULLONG_MAX;
678
-
679
- if (ch == 'H') {
680
- parser->state = s_res_or_resp_H;
681
-
682
- CALLBACK_NOTIFY(message_begin);
683
- } else {
684
- parser->type = HTTP_REQUEST;
685
- parser->state = s_start_req;
686
- goto reexecute_byte;
687
- }
688
-
689
- break;
690
- }
691
-
692
- case s_res_or_resp_H:
693
- if (ch == 'T') {
694
- parser->type = HTTP_RESPONSE;
695
- parser->state = s_res_HT;
696
- } else {
697
- if (ch != 'E') {
698
- SET_ERRNO(HPE_INVALID_CONSTANT);
699
- goto error;
700
- }
701
-
702
- parser->type = HTTP_REQUEST;
703
- parser->method = HTTP_HEAD;
704
- parser->index = 2;
705
- parser->state = s_req_method;
706
- }
707
- break;
708
-
709
- case s_start_res:
710
- {
711
- parser->flags = 0;
712
- parser->content_length = ULLONG_MAX;
713
-
714
- switch (ch) {
715
- case 'H':
716
- parser->state = s_res_H;
717
- break;
718
-
719
- case CR:
720
- case LF:
721
- break;
722
-
723
- default:
724
- SET_ERRNO(HPE_INVALID_CONSTANT);
725
- goto error;
726
- }
727
-
728
- CALLBACK_NOTIFY(message_begin);
729
- break;
730
- }
731
-
732
- case s_res_H:
733
- STRICT_CHECK(ch != 'T');
734
- parser->state = s_res_HT;
735
- break;
736
-
737
- case s_res_HT:
738
- STRICT_CHECK(ch != 'T');
739
- parser->state = s_res_HTT;
740
- break;
741
-
742
- case s_res_HTT:
743
- STRICT_CHECK(ch != 'P');
744
- parser->state = s_res_HTTP;
745
- break;
746
-
747
- case s_res_HTTP:
748
- STRICT_CHECK(ch != '/');
749
- parser->state = s_res_first_http_major;
750
- break;
751
-
752
- case s_res_first_http_major:
753
- if (ch < '0' || ch > '9') {
754
- SET_ERRNO(HPE_INVALID_VERSION);
755
- goto error;
756
- }
757
-
758
- parser->http_major = ch - '0';
759
- parser->state = s_res_http_major;
760
- break;
761
-
762
- /* major HTTP version or dot */
763
- case s_res_http_major:
764
- {
765
- if (ch == '.') {
766
- parser->state = s_res_first_http_minor;
767
- break;
768
- }
769
-
770
- if (!IS_NUM(ch)) {
771
- SET_ERRNO(HPE_INVALID_VERSION);
772
- goto error;
773
- }
774
-
775
- parser->http_major *= 10;
776
- parser->http_major += ch - '0';
777
-
778
- if (parser->http_major > 999) {
779
- SET_ERRNO(HPE_INVALID_VERSION);
780
- goto error;
781
- }
782
-
783
- break;
784
- }
785
-
786
- /* first digit of minor HTTP version */
787
- case s_res_first_http_minor:
788
- if (!IS_NUM(ch)) {
789
- SET_ERRNO(HPE_INVALID_VERSION);
790
- goto error;
791
- }
792
-
793
- parser->http_minor = ch - '0';
794
- parser->state = s_res_http_minor;
795
- break;
796
-
797
- /* minor HTTP version or end of request line */
798
- case s_res_http_minor:
799
- {
800
- if (ch == ' ') {
801
- parser->state = s_res_first_status_code;
802
- break;
803
- }
804
-
805
- if (!IS_NUM(ch)) {
806
- SET_ERRNO(HPE_INVALID_VERSION);
807
- goto error;
808
- }
809
-
810
- parser->http_minor *= 10;
811
- parser->http_minor += ch - '0';
812
-
813
- if (parser->http_minor > 999) {
814
- SET_ERRNO(HPE_INVALID_VERSION);
815
- goto error;
816
- }
817
-
818
- break;
819
- }
820
-
821
- case s_res_first_status_code:
822
- {
823
- if (!IS_NUM(ch)) {
824
- if (ch == ' ') {
825
- break;
826
- }
827
-
828
- SET_ERRNO(HPE_INVALID_STATUS);
829
- goto error;
830
- }
831
- parser->status_code = ch - '0';
832
- parser->state = s_res_status_code;
833
- break;
834
- }
835
-
836
- case s_res_status_code:
837
- {
838
- if (!IS_NUM(ch)) {
839
- switch (ch) {
840
- case ' ':
841
- parser->state = s_res_status_start;
842
- break;
843
- case CR:
844
- parser->state = s_res_line_almost_done;
845
- break;
846
- case LF:
847
- parser->state = s_header_field_start;
848
- break;
849
- default:
850
- SET_ERRNO(HPE_INVALID_STATUS);
851
- goto error;
852
- }
853
- break;
854
- }
855
-
856
- parser->status_code *= 10;
857
- parser->status_code += ch - '0';
858
-
859
- if (parser->status_code > 999) {
860
- SET_ERRNO(HPE_INVALID_STATUS);
861
- goto error;
862
- }
863
-
864
- break;
865
- }
866
-
867
- case s_res_status_start:
868
- {
869
- if (ch == CR) {
870
- parser->state = s_res_line_almost_done;
871
- break;
872
- }
873
-
874
- if (ch == LF) {
875
- parser->state = s_header_field_start;
876
- break;
877
- }
878
-
879
- MARK(status);
880
- parser->state = s_res_status;
881
- parser->index = 0;
882
- break;
883
- }
884
-
885
- case s_res_status:
886
- if (ch == CR) {
887
- parser->state = s_res_line_almost_done;
888
- CALLBACK_DATA(status);
889
- break;
890
- }
891
-
892
- if (ch == LF) {
893
- parser->state = s_header_field_start;
894
- CALLBACK_DATA(status);
895
- break;
896
- }
897
-
898
- break;
899
-
900
- case s_res_line_almost_done:
901
- STRICT_CHECK(ch != LF);
902
- parser->state = s_header_field_start;
903
- break;
904
-
905
- case s_start_req:
906
- {
907
- if (ch == CR || ch == LF)
908
- break;
909
- parser->flags = 0;
910
- parser->content_length = ULLONG_MAX;
911
-
912
- if (!IS_ALPHA(ch)) {
913
- SET_ERRNO(HPE_INVALID_METHOD);
914
- goto error;
915
- }
916
-
917
- parser->method = (enum http_method) 0;
918
- parser->index = 1;
919
- switch (ch) {
920
- case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
921
- case 'D': parser->method = HTTP_DELETE; break;
922
- case 'G': parser->method = HTTP_GET; break;
923
- case 'H': parser->method = HTTP_HEAD; break;
924
- case 'L': parser->method = HTTP_LOCK; break;
925
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
926
- case 'N': parser->method = HTTP_NOTIFY; break;
927
- case 'O': parser->method = HTTP_OPTIONS; break;
928
- case 'P': parser->method = HTTP_POST;
929
- /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
930
- break;
931
- case 'R': parser->method = HTTP_REPORT; break;
932
- case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
933
- case 'T': parser->method = HTTP_TRACE; break;
934
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
935
- default:
936
- SET_ERRNO(HPE_INVALID_METHOD);
937
- goto error;
938
- }
939
- parser->state = s_req_method;
940
-
941
- CALLBACK_NOTIFY(message_begin);
942
-
943
- break;
944
- }
945
-
946
- case s_req_method:
947
- {
948
- const char *matcher;
949
- if (ch == '\0') {
950
- SET_ERRNO(HPE_INVALID_METHOD);
951
- goto error;
952
- }
953
-
954
- matcher = method_strings[parser->method];
955
- if (ch == ' ' && matcher[parser->index] == '\0') {
956
- parser->state = s_req_spaces_before_url;
957
- } else if (ch == matcher[parser->index]) {
958
- ; /* nada */
959
- } else if (parser->method == HTTP_CONNECT) {
960
- if (parser->index == 1 && ch == 'H') {
961
- parser->method = HTTP_CHECKOUT;
962
- } else if (parser->index == 2 && ch == 'P') {
963
- parser->method = HTTP_COPY;
964
- } else {
965
- SET_ERRNO(HPE_INVALID_METHOD);
966
- goto error;
967
- }
968
- } else if (parser->method == HTTP_MKCOL) {
969
- if (parser->index == 1 && ch == 'O') {
970
- parser->method = HTTP_MOVE;
971
- } else if (parser->index == 1 && ch == 'E') {
972
- parser->method = HTTP_MERGE;
973
- } else if (parser->index == 1 && ch == '-') {
974
- parser->method = HTTP_MSEARCH;
975
- } else if (parser->index == 2 && ch == 'A') {
976
- parser->method = HTTP_MKACTIVITY;
977
- } else {
978
- SET_ERRNO(HPE_INVALID_METHOD);
979
- goto error;
980
- }
981
- } else if (parser->method == HTTP_SUBSCRIBE) {
982
- if (parser->index == 1 && ch == 'E') {
983
- parser->method = HTTP_SEARCH;
984
- } else {
985
- SET_ERRNO(HPE_INVALID_METHOD);
986
- goto error;
987
- }
988
- } else if (parser->index == 1 && parser->method == HTTP_POST) {
989
- if (ch == 'R') {
990
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
991
- } else if (ch == 'U') {
992
- parser->method = HTTP_PUT; /* or HTTP_PURGE */
993
- } else if (ch == 'A') {
994
- parser->method = HTTP_PATCH;
995
- } else {
996
- SET_ERRNO(HPE_INVALID_METHOD);
997
- goto error;
998
- }
999
- } else if (parser->index == 2) {
1000
- if (parser->method == HTTP_PUT) {
1001
- if (ch == 'R') {
1002
- parser->method = HTTP_PURGE;
1003
- } else {
1004
- SET_ERRNO(HPE_INVALID_METHOD);
1005
- goto error;
1006
- }
1007
- } else if (parser->method == HTTP_UNLOCK) {
1008
- if (ch == 'S') {
1009
- parser->method = HTTP_UNSUBSCRIBE;
1010
- } else {
1011
- SET_ERRNO(HPE_INVALID_METHOD);
1012
- goto error;
1013
- }
1014
- } else {
1015
- SET_ERRNO(HPE_INVALID_METHOD);
1016
- goto error;
1017
- }
1018
- } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1019
- parser->method = HTTP_PROPPATCH;
1020
- } else {
1021
- SET_ERRNO(HPE_INVALID_METHOD);
1022
- goto error;
1023
- }
1024
-
1025
- ++parser->index;
1026
- break;
1027
- }
1028
-
1029
- case s_req_spaces_before_url:
1030
- {
1031
- if (ch == ' ') break;
1032
-
1033
- MARK(url);
1034
- if (parser->method == HTTP_CONNECT) {
1035
- parser->state = s_req_server_start;
1036
- }
1037
-
1038
- parser->state = parse_url_char((enum state)parser->state, ch);
1039
- if (parser->state == s_dead) {
1040
- SET_ERRNO(HPE_INVALID_URL);
1041
- goto error;
1042
- }
1043
-
1044
- break;
1045
- }
1046
-
1047
- case s_req_schema:
1048
- case s_req_schema_slash:
1049
- case s_req_schema_slash_slash:
1050
- case s_req_server_start:
1051
- {
1052
- switch (ch) {
1053
- /* No whitespace allowed here */
1054
- case ' ':
1055
- case CR:
1056
- case LF:
1057
- SET_ERRNO(HPE_INVALID_URL);
1058
- goto error;
1059
- default:
1060
- parser->state = parse_url_char((enum state)parser->state, ch);
1061
- if (parser->state == s_dead) {
1062
- SET_ERRNO(HPE_INVALID_URL);
1063
- goto error;
1064
- }
1065
- }
1066
-
1067
- break;
1068
- }
1069
-
1070
- case s_req_server:
1071
- case s_req_server_with_at:
1072
- case s_req_path:
1073
- case s_req_query_string_start:
1074
- case s_req_query_string:
1075
- case s_req_fragment_start:
1076
- case s_req_fragment:
1077
- {
1078
- switch (ch) {
1079
- case ' ':
1080
- parser->state = s_req_http_start;
1081
- CALLBACK_DATA(url);
1082
- break;
1083
- case CR:
1084
- case LF:
1085
- parser->http_major = 0;
1086
- parser->http_minor = 9;
1087
- parser->state = (ch == CR) ?
1088
- s_req_line_almost_done :
1089
- s_header_field_start;
1090
- CALLBACK_DATA(url);
1091
- break;
1092
- default:
1093
- parser->state = parse_url_char((enum state)parser->state, ch);
1094
- if (parser->state == s_dead) {
1095
- SET_ERRNO(HPE_INVALID_URL);
1096
- goto error;
1097
- }
1098
- }
1099
- break;
1100
- }
1101
-
1102
- case s_req_http_start:
1103
- switch (ch) {
1104
- case 'H':
1105
- parser->state = s_req_http_H;
1106
- break;
1107
- case ' ':
1108
- break;
1109
- default:
1110
- SET_ERRNO(HPE_INVALID_CONSTANT);
1111
- goto error;
1112
- }
1113
- break;
1114
-
1115
- case s_req_http_H:
1116
- STRICT_CHECK(ch != 'T');
1117
- parser->state = s_req_http_HT;
1118
- break;
1119
-
1120
- case s_req_http_HT:
1121
- STRICT_CHECK(ch != 'T');
1122
- parser->state = s_req_http_HTT;
1123
- break;
1124
-
1125
- case s_req_http_HTT:
1126
- STRICT_CHECK(ch != 'P');
1127
- parser->state = s_req_http_HTTP;
1128
- break;
1129
-
1130
- case s_req_http_HTTP:
1131
- STRICT_CHECK(ch != '/');
1132
- parser->state = s_req_first_http_major;
1133
- break;
1134
-
1135
- /* first digit of major HTTP version */
1136
- case s_req_first_http_major:
1137
- if (ch < '1' || ch > '9') {
1138
- SET_ERRNO(HPE_INVALID_VERSION);
1139
- goto error;
1140
- }
1141
-
1142
- parser->http_major = ch - '0';
1143
- parser->state = s_req_http_major;
1144
- break;
1145
-
1146
- /* major HTTP version or dot */
1147
- case s_req_http_major:
1148
- {
1149
- if (ch == '.') {
1150
- parser->state = s_req_first_http_minor;
1151
- break;
1152
- }
1153
-
1154
- if (!IS_NUM(ch)) {
1155
- SET_ERRNO(HPE_INVALID_VERSION);
1156
- goto error;
1157
- }
1158
-
1159
- parser->http_major *= 10;
1160
- parser->http_major += ch - '0';
1161
-
1162
- if (parser->http_major > 999) {
1163
- SET_ERRNO(HPE_INVALID_VERSION);
1164
- goto error;
1165
- }
1166
-
1167
- break;
1168
- }
1169
-
1170
- /* first digit of minor HTTP version */
1171
- case s_req_first_http_minor:
1172
- if (!IS_NUM(ch)) {
1173
- SET_ERRNO(HPE_INVALID_VERSION);
1174
- goto error;
1175
- }
1176
-
1177
- parser->http_minor = ch - '0';
1178
- parser->state = s_req_http_minor;
1179
- break;
1180
-
1181
- /* minor HTTP version or end of request line */
1182
- case s_req_http_minor:
1183
- {
1184
- if (ch == CR) {
1185
- parser->state = s_req_line_almost_done;
1186
- break;
1187
- }
1188
-
1189
- if (ch == LF) {
1190
- parser->state = s_header_field_start;
1191
- break;
1192
- }
1193
-
1194
- /* XXX allow spaces after digit? */
1195
-
1196
- if (!IS_NUM(ch)) {
1197
- SET_ERRNO(HPE_INVALID_VERSION);
1198
- goto error;
1199
- }
1200
-
1201
- parser->http_minor *= 10;
1202
- parser->http_minor += ch - '0';
1203
-
1204
- if (parser->http_minor > 999) {
1205
- SET_ERRNO(HPE_INVALID_VERSION);
1206
- goto error;
1207
- }
1208
-
1209
- break;
1210
- }
1211
-
1212
- /* end of request line */
1213
- case s_req_line_almost_done:
1214
- {
1215
- if (ch != LF) {
1216
- SET_ERRNO(HPE_LF_EXPECTED);
1217
- goto error;
1218
- }
1219
-
1220
- parser->state = s_header_field_start;
1221
- break;
1222
- }
1223
-
1224
- case s_header_field_start:
1225
- {
1226
- if (ch == CR) {
1227
- parser->state = s_headers_almost_done;
1228
- break;
1229
- }
1230
-
1231
- if (ch == LF) {
1232
- /* they might be just sending \n instead of \r\n so this would be
1233
- * the second \n to denote the end of headers*/
1234
- parser->state = s_headers_almost_done;
1235
- goto reexecute_byte;
1236
- }
1237
-
1238
- c = TOKEN(ch);
1239
-
1240
- if (!c) {
1241
- SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1242
- goto error;
1243
- }
1244
-
1245
- MARK(header_field);
1246
-
1247
- parser->index = 0;
1248
- parser->state = s_header_field;
1249
-
1250
- switch (c) {
1251
- case 'c':
1252
- parser->header_state = h_C;
1253
- break;
1254
-
1255
- case 'p':
1256
- parser->header_state = h_matching_proxy_connection;
1257
- break;
1258
-
1259
- case 't':
1260
- parser->header_state = h_matching_transfer_encoding;
1261
- break;
1262
-
1263
- case 'u':
1264
- parser->header_state = h_matching_upgrade;
1265
- break;
1266
-
1267
- default:
1268
- parser->header_state = h_general;
1269
- break;
1270
- }
1271
- break;
1272
- }
1273
-
1274
- case s_header_field:
1275
- {
1276
- c = TOKEN(ch);
1277
-
1278
- if (c) {
1279
- switch (parser->header_state) {
1280
- case h_general:
1281
- break;
1282
-
1283
- case h_C:
1284
- parser->index++;
1285
- parser->header_state = (c == 'o' ? h_CO : h_general);
1286
- break;
1287
-
1288
- case h_CO:
1289
- parser->index++;
1290
- parser->header_state = (c == 'n' ? h_CON : h_general);
1291
- break;
1292
-
1293
- case h_CON:
1294
- parser->index++;
1295
- switch (c) {
1296
- case 'n':
1297
- parser->header_state = h_matching_connection;
1298
- break;
1299
- case 't':
1300
- parser->header_state = h_matching_content_length;
1301
- break;
1302
- default:
1303
- parser->header_state = h_general;
1304
- break;
1305
- }
1306
- break;
1307
-
1308
- /* connection */
1309
-
1310
- case h_matching_connection:
1311
- parser->index++;
1312
- if (parser->index > sizeof(CONNECTION)-1
1313
- || c != CONNECTION[parser->index]) {
1314
- parser->header_state = h_general;
1315
- } else if (parser->index == sizeof(CONNECTION)-2) {
1316
- parser->header_state = h_connection;
1317
- }
1318
- break;
1319
-
1320
- /* proxy-connection */
1321
-
1322
- case h_matching_proxy_connection:
1323
- parser->index++;
1324
- if (parser->index > sizeof(PROXY_CONNECTION)-1
1325
- || c != PROXY_CONNECTION[parser->index]) {
1326
- parser->header_state = h_general;
1327
- } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1328
- parser->header_state = h_connection;
1329
- }
1330
- break;
1331
-
1332
- /* content-length */
1333
-
1334
- case h_matching_content_length:
1335
- parser->index++;
1336
- if (parser->index > sizeof(CONTENT_LENGTH)-1
1337
- || c != CONTENT_LENGTH[parser->index]) {
1338
- parser->header_state = h_general;
1339
- } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1340
- parser->header_state = h_content_length;
1341
- }
1342
- break;
1343
-
1344
- /* transfer-encoding */
1345
-
1346
- case h_matching_transfer_encoding:
1347
- parser->index++;
1348
- if (parser->index > sizeof(TRANSFER_ENCODING)-1
1349
- || c != TRANSFER_ENCODING[parser->index]) {
1350
- parser->header_state = h_general;
1351
- } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1352
- parser->header_state = h_transfer_encoding;
1353
- }
1354
- break;
1355
-
1356
- /* upgrade */
1357
-
1358
- case h_matching_upgrade:
1359
- parser->index++;
1360
- if (parser->index > sizeof(UPGRADE)-1
1361
- || c != UPGRADE[parser->index]) {
1362
- parser->header_state = h_general;
1363
- } else if (parser->index == sizeof(UPGRADE)-2) {
1364
- parser->header_state = h_upgrade;
1365
- }
1366
- break;
1367
-
1368
- case h_connection:
1369
- case h_content_length:
1370
- case h_transfer_encoding:
1371
- case h_upgrade:
1372
- if (ch != ' ') parser->header_state = h_general;
1373
- break;
1374
-
1375
- default:
1376
- assert(0 && "Unknown header_state");
1377
- break;
1378
- }
1379
- break;
1380
- }
1381
-
1382
- if (ch == ':') {
1383
- parser->state = s_header_value_start;
1384
- CALLBACK_DATA(header_field);
1385
- break;
1386
- }
1387
-
1388
- if (ch == CR) {
1389
- parser->state = s_header_almost_done;
1390
- CALLBACK_DATA(header_field);
1391
- break;
1392
- }
1393
-
1394
- if (ch == LF) {
1395
- parser->state = s_header_field_start;
1396
- CALLBACK_DATA(header_field);
1397
- break;
1398
- }
1399
-
1400
- SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1401
- goto error;
1402
- }
1403
-
1404
- case s_header_value_start:
1405
- {
1406
- if (ch == ' ' || ch == '\t') break;
1407
-
1408
- MARK(header_value);
1409
-
1410
- parser->state = s_header_value;
1411
- parser->index = 0;
1412
-
1413
- if (ch == CR) {
1414
- parser->header_state = h_general;
1415
- parser->state = s_header_almost_done;
1416
- CALLBACK_DATA(header_value);
1417
- break;
1418
- }
1419
-
1420
- if (ch == LF) {
1421
- parser->state = s_header_field_start;
1422
- CALLBACK_DATA(header_value);
1423
- break;
1424
- }
1425
-
1426
- c = LOWER(ch);
1427
-
1428
- switch (parser->header_state) {
1429
- case h_upgrade:
1430
- parser->flags |= F_UPGRADE;
1431
- parser->header_state = h_general;
1432
- break;
1433
-
1434
- case h_transfer_encoding:
1435
- /* looking for 'Transfer-Encoding: chunked' */
1436
- if ('c' == c) {
1437
- parser->header_state = h_matching_transfer_encoding_chunked;
1438
- } else {
1439
- parser->header_state = h_general;
1440
- }
1441
- break;
1442
-
1443
- case h_content_length:
1444
- if (!IS_NUM(ch)) {
1445
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1446
- goto error;
1447
- }
1448
-
1449
- parser->content_length = ch - '0';
1450
- break;
1451
-
1452
- case h_connection:
1453
- /* looking for 'Connection: keep-alive' */
1454
- if (c == 'k') {
1455
- parser->header_state = h_matching_connection_keep_alive;
1456
- /* looking for 'Connection: close' */
1457
- } else if (c == 'c') {
1458
- parser->header_state = h_matching_connection_close;
1459
- } else {
1460
- parser->header_state = h_general;
1461
- }
1462
- break;
1463
-
1464
- default:
1465
- parser->header_state = h_general;
1466
- break;
1467
- }
1468
- break;
1469
- }
1470
-
1471
- case s_header_value:
1472
- {
1473
-
1474
- if (ch == CR) {
1475
- parser->state = s_header_almost_done;
1476
- CALLBACK_DATA(header_value);
1477
- break;
1478
- }
1479
-
1480
- if (ch == LF) {
1481
- parser->state = s_header_almost_done;
1482
- CALLBACK_DATA_NOADVANCE(header_value);
1483
- goto reexecute_byte;
1484
- }
1485
-
1486
- c = LOWER(ch);
1487
-
1488
- switch (parser->header_state) {
1489
- case h_general:
1490
- break;
1491
-
1492
- case h_connection:
1493
- case h_transfer_encoding:
1494
- assert(0 && "Shouldn't get here.");
1495
- break;
1496
-
1497
- case h_content_length:
1498
- {
1499
- uint64_t t;
1500
-
1501
- if (ch == ' ') break;
1502
-
1503
- if (!IS_NUM(ch)) {
1504
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1505
- goto error;
1506
- }
1507
-
1508
- t = parser->content_length;
1509
- t *= 10;
1510
- t += ch - '0';
1511
-
1512
- /* Overflow? */
1513
- if (t < parser->content_length || t == ULLONG_MAX) {
1514
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1515
- goto error;
1516
- }
1517
-
1518
- parser->content_length = t;
1519
- break;
1520
- }
1521
-
1522
- /* Transfer-Encoding: chunked */
1523
- case h_matching_transfer_encoding_chunked:
1524
- parser->index++;
1525
- if (parser->index > sizeof(CHUNKED)-1
1526
- || c != CHUNKED[parser->index]) {
1527
- parser->header_state = h_general;
1528
- } else if (parser->index == sizeof(CHUNKED)-2) {
1529
- parser->header_state = h_transfer_encoding_chunked;
1530
- }
1531
- break;
1532
-
1533
- /* looking for 'Connection: keep-alive' */
1534
- case h_matching_connection_keep_alive:
1535
- parser->index++;
1536
- if (parser->index > sizeof(KEEP_ALIVE)-1
1537
- || c != KEEP_ALIVE[parser->index]) {
1538
- parser->header_state = h_general;
1539
- } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1540
- parser->header_state = h_connection_keep_alive;
1541
- }
1542
- break;
1543
-
1544
- /* looking for 'Connection: close' */
1545
- case h_matching_connection_close:
1546
- parser->index++;
1547
- if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1548
- parser->header_state = h_general;
1549
- } else if (parser->index == sizeof(CLOSE)-2) {
1550
- parser->header_state = h_connection_close;
1551
- }
1552
- break;
1553
-
1554
- case h_transfer_encoding_chunked:
1555
- case h_connection_keep_alive:
1556
- case h_connection_close:
1557
- if (ch != ' ') parser->header_state = h_general;
1558
- break;
1559
-
1560
- default:
1561
- parser->state = s_header_value;
1562
- parser->header_state = h_general;
1563
- break;
1564
- }
1565
- break;
1566
- }
1567
-
1568
- case s_header_almost_done:
1569
- {
1570
- STRICT_CHECK(ch != LF);
1571
-
1572
- parser->state = s_header_value_lws;
1573
-
1574
- switch (parser->header_state) {
1575
- case h_connection_keep_alive:
1576
- parser->flags |= F_CONNECTION_KEEP_ALIVE;
1577
- break;
1578
- case h_connection_close:
1579
- parser->flags |= F_CONNECTION_CLOSE;
1580
- break;
1581
- case h_transfer_encoding_chunked:
1582
- parser->flags |= F_CHUNKED;
1583
- break;
1584
- default:
1585
- break;
1586
- }
1587
-
1588
- break;
1589
- }
1590
-
1591
- case s_header_value_lws:
1592
- {
1593
- if (ch == ' ' || ch == '\t')
1594
- parser->state = s_header_value_start;
1595
- else
1596
- {
1597
- parser->state = s_header_field_start;
1598
- goto reexecute_byte;
1599
- }
1600
- break;
1601
- }
1602
-
1603
- case s_headers_almost_done:
1604
- {
1605
- STRICT_CHECK(ch != LF);
1606
-
1607
- if (parser->flags & F_TRAILING) {
1608
- /* End of a chunked request */
1609
- parser->state = NEW_MESSAGE();
1610
- CALLBACK_NOTIFY(message_complete);
1611
- break;
1612
- }
1613
-
1614
- parser->state = s_headers_done;
1615
-
1616
- /* Set this here so that on_headers_complete() callbacks can see it */
1617
- parser->upgrade =
1618
- (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1619
-
1620
- /* Here we call the headers_complete callback. This is somewhat
1621
- * different than other callbacks because if the user returns 1, we
1622
- * will interpret that as saying that this message has no body. This
1623
- * is needed for the annoying case of recieving a response to a HEAD
1624
- * request.
1625
- *
1626
- * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1627
- * we have to simulate it by handling a change in errno below.
1628
- */
1629
- if (settings->on_headers_complete) {
1630
- switch (settings->on_headers_complete(parser)) {
1631
- case 0:
1632
- break;
1633
-
1634
- case 1:
1635
- parser->flags |= F_SKIPBODY;
1636
- break;
1637
-
1638
- default:
1639
- SET_ERRNO(HPE_CB_headers_complete);
1640
- return p - data; /* Error */
1641
- }
1642
- }
1643
-
1644
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1645
- return p - data;
1646
- }
1647
-
1648
- goto reexecute_byte;
1649
- }
1650
-
1651
- case s_headers_done:
1652
- {
1653
- STRICT_CHECK(ch != LF);
1654
-
1655
- parser->nread = 0;
1656
-
1657
- /* Exit, the rest of the connect is in a different protocol. */
1658
- if (parser->upgrade) {
1659
- parser->state = NEW_MESSAGE();
1660
- CALLBACK_NOTIFY(message_complete);
1661
- return (p - data) + 1;
1662
- }
1663
-
1664
- if (parser->flags & F_SKIPBODY) {
1665
- parser->state = NEW_MESSAGE();
1666
- CALLBACK_NOTIFY(message_complete);
1667
- } else if (parser->flags & F_CHUNKED) {
1668
- /* chunked encoding - ignore Content-Length header */
1669
- parser->state = s_chunk_size_start;
1670
- } else {
1671
- if (parser->content_length == 0) {
1672
- /* Content-Length header given but zero: Content-Length: 0\r\n */
1673
- parser->state = NEW_MESSAGE();
1674
- CALLBACK_NOTIFY(message_complete);
1675
- } else if (parser->content_length != ULLONG_MAX) {
1676
- /* Content-Length header given and non-zero */
1677
- parser->state = s_body_identity;
1678
- } else {
1679
- if (parser->type == HTTP_REQUEST ||
1680
- !http_message_needs_eof(parser)) {
1681
- /* Assume content-length 0 - read the next */
1682
- parser->state = NEW_MESSAGE();
1683
- CALLBACK_NOTIFY(message_complete);
1684
- } else {
1685
- /* Read body until EOF */
1686
- parser->state = s_body_identity_eof;
1687
- }
1688
- }
1689
- }
1690
-
1691
- break;
1692
- }
1693
-
1694
- case s_body_identity:
1695
- {
1696
- uint64_t to_read = MIN(parser->content_length,
1697
- (uint64_t) ((data + len) - p));
1698
-
1699
- assert(parser->content_length != 0
1700
- && parser->content_length != ULLONG_MAX);
1701
-
1702
- /* The difference between advancing content_length and p is because
1703
- * the latter will automaticaly advance on the next loop iteration.
1704
- * Further, if content_length ends up at 0, we want to see the last
1705
- * byte again for our message complete callback.
1706
- */
1707
- MARK(body);
1708
- parser->content_length -= to_read;
1709
- p += to_read - 1;
1710
-
1711
- if (parser->content_length == 0) {
1712
- parser->state = s_message_done;
1713
-
1714
- /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1715
- *
1716
- * The alternative to doing this is to wait for the next byte to
1717
- * trigger the data callback, just as in every other case. The
1718
- * problem with this is that this makes it difficult for the test
1719
- * harness to distinguish between complete-on-EOF and
1720
- * complete-on-length. It's not clear that this distinction is
1721
- * important for applications, but let's keep it for now.
1722
- */
1723
- CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1724
- goto reexecute_byte;
1725
- }
1726
-
1727
- break;
1728
- }
1729
-
1730
- /* read until EOF */
1731
- case s_body_identity_eof:
1732
- MARK(body);
1733
- p = data + len - 1;
1734
-
1735
- break;
1736
-
1737
- case s_message_done:
1738
- parser->state = NEW_MESSAGE();
1739
- CALLBACK_NOTIFY(message_complete);
1740
- break;
1741
-
1742
- case s_chunk_size_start:
1743
- {
1744
- assert(parser->nread == 1);
1745
- assert(parser->flags & F_CHUNKED);
1746
-
1747
- unhex_val = unhex[(unsigned char)ch];
1748
- if (unhex_val == -1) {
1749
- SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1750
- goto error;
1751
- }
1752
-
1753
- parser->content_length = unhex_val;
1754
- parser->state = s_chunk_size;
1755
- break;
1756
- }
1757
-
1758
- case s_chunk_size:
1759
- {
1760
- uint64_t t;
1761
-
1762
- assert(parser->flags & F_CHUNKED);
1763
-
1764
- if (ch == CR) {
1765
- parser->state = s_chunk_size_almost_done;
1766
- break;
1767
- }
1768
-
1769
- unhex_val = unhex[(unsigned char)ch];
1770
-
1771
- if (unhex_val == -1) {
1772
- if (ch == ';' || ch == ' ') {
1773
- parser->state = s_chunk_parameters;
1774
- break;
1775
- }
1776
-
1777
- SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1778
- goto error;
1779
- }
1780
-
1781
- t = parser->content_length;
1782
- t *= 16;
1783
- t += unhex_val;
1784
-
1785
- /* Overflow? */
1786
- if (t < parser->content_length || t == ULLONG_MAX) {
1787
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1788
- goto error;
1789
- }
1790
-
1791
- parser->content_length = t;
1792
- break;
1793
- }
1794
-
1795
- case s_chunk_parameters:
1796
- {
1797
- assert(parser->flags & F_CHUNKED);
1798
- /* just ignore this shit. TODO check for overflow */
1799
- if (ch == CR) {
1800
- parser->state = s_chunk_size_almost_done;
1801
- break;
1802
- }
1803
- break;
1804
- }
1805
-
1806
- case s_chunk_size_almost_done:
1807
- {
1808
- assert(parser->flags & F_CHUNKED);
1809
- STRICT_CHECK(ch != LF);
1810
-
1811
- parser->nread = 0;
1812
-
1813
- if (parser->content_length == 0) {
1814
- parser->flags |= F_TRAILING;
1815
- parser->state = s_header_field_start;
1816
- } else {
1817
- parser->state = s_chunk_data;
1818
- }
1819
- break;
1820
- }
1821
-
1822
- case s_chunk_data:
1823
- {
1824
- uint64_t to_read = MIN(parser->content_length,
1825
- (uint64_t) ((data + len) - p));
1826
-
1827
- assert(parser->flags & F_CHUNKED);
1828
- assert(parser->content_length != 0
1829
- && parser->content_length != ULLONG_MAX);
1830
-
1831
- /* See the explanation in s_body_identity for why the content
1832
- * length and data pointers are managed this way.
1833
- */
1834
- MARK(body);
1835
- parser->content_length -= to_read;
1836
- p += to_read - 1;
1837
-
1838
- if (parser->content_length == 0) {
1839
- parser->state = s_chunk_data_almost_done;
1840
- }
1841
-
1842
- break;
1843
- }
1844
-
1845
- case s_chunk_data_almost_done:
1846
- assert(parser->flags & F_CHUNKED);
1847
- assert(parser->content_length == 0);
1848
- STRICT_CHECK(ch != CR);
1849
- parser->state = s_chunk_data_done;
1850
- CALLBACK_DATA(body);
1851
- break;
1852
-
1853
- case s_chunk_data_done:
1854
- assert(parser->flags & F_CHUNKED);
1855
- STRICT_CHECK(ch != LF);
1856
- parser->nread = 0;
1857
- parser->state = s_chunk_size_start;
1858
- break;
1859
-
1860
- default:
1861
- assert(0 && "unhandled state");
1862
- SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1863
- goto error;
1864
- }
1865
- }
1866
-
1867
- /* Run callbacks for any marks that we have leftover after we ran our of
1868
- * bytes. There should be at most one of these set, so it's OK to invoke
1869
- * them in series (unset marks will not result in callbacks).
1870
- *
1871
- * We use the NOADVANCE() variety of callbacks here because 'p' has already
1872
- * overflowed 'data' and this allows us to correct for the off-by-one that
1873
- * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1874
- * value that's in-bounds).
1875
- */
1876
-
1877
- assert(((header_field_mark ? 1 : 0) +
1878
- (header_value_mark ? 1 : 0) +
1879
- (url_mark ? 1 : 0) +
1880
- (body_mark ? 1 : 0) +
1881
- (status_mark ? 1 : 0)) <= 1);
1882
-
1883
- CALLBACK_DATA_NOADVANCE(header_field);
1884
- CALLBACK_DATA_NOADVANCE(header_value);
1885
- CALLBACK_DATA_NOADVANCE(url);
1886
- CALLBACK_DATA_NOADVANCE(body);
1887
- CALLBACK_DATA_NOADVANCE(status);
1888
-
1889
- return len;
1890
-
1891
- error:
1892
- if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1893
- SET_ERRNO(HPE_UNKNOWN);
1894
- }
1895
-
1896
- return (p - data);
1897
- }
1898
-
1899
-
1900
- /* Does the parser need to see an EOF to find the end of the message? */
1901
- int
1902
- http_message_needs_eof (const http_parser *parser)
1903
- {
1904
- if (parser->type == HTTP_REQUEST) {
1905
- return 0;
1906
- }
1907
-
1908
- /* See RFC 2616 section 4.4 */
1909
- if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1910
- parser->status_code == 204 || /* No Content */
1911
- parser->status_code == 304 || /* Not Modified */
1912
- parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1913
- return 0;
1914
- }
1915
-
1916
- if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1917
- return 0;
1918
- }
1919
-
1920
- return 1;
1921
- }
1922
-
1923
-
1924
- int
1925
- http_should_keep_alive (const http_parser *parser)
1926
- {
1927
- if (parser->http_major > 0 && parser->http_minor > 0) {
1928
- /* HTTP/1.1 */
1929
- if (parser->flags & F_CONNECTION_CLOSE) {
1930
- return 0;
1931
- }
1932
- } else {
1933
- /* HTTP/1.0 or earlier */
1934
- if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1935
- return 0;
1936
- }
1937
- }
1938
-
1939
- return !http_message_needs_eof(parser);
1940
- }
1941
-
1942
-
1943
- const char *
1944
- http_method_str (enum http_method m)
1945
- {
1946
- return ELEM_AT(method_strings, m, "<unknown>");
1947
- }
1948
-
1949
-
1950
- void
1951
- http_parser_init (http_parser *parser, enum http_parser_type t)
1952
- {
1953
- void *data = parser->data; /* preserve application data */
1954
- memset(parser, 0, sizeof(*parser));
1955
- parser->data = data;
1956
- parser->type = t;
1957
- parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1958
- parser->http_errno = HPE_OK;
1959
- }
1960
-
1961
- const char *
1962
- http_errno_name(enum http_errno err) {
1963
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1964
- return http_strerror_tab[err].name;
1965
- }
1966
-
1967
- const char *
1968
- http_errno_description(enum http_errno err) {
1969
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1970
- return http_strerror_tab[err].description;
1971
- }
1972
-
1973
- static enum http_host_state
1974
- http_parse_host_char(enum http_host_state s, const char ch) {
1975
- switch(s) {
1976
- case s_http_userinfo:
1977
- case s_http_userinfo_start:
1978
- if (ch == '@') {
1979
- return s_http_host_start;
1980
- }
1981
-
1982
- if (IS_USERINFO_CHAR(ch)) {
1983
- return s_http_userinfo;
1984
- }
1985
- break;
1986
-
1987
- case s_http_host_start:
1988
- if (ch == '[') {
1989
- return s_http_host_v6_start;
1990
- }
1991
-
1992
- if (IS_HOST_CHAR(ch)) {
1993
- return s_http_host;
1994
- }
1995
-
1996
- break;
1997
-
1998
- case s_http_host:
1999
- if (IS_HOST_CHAR(ch)) {
2000
- return s_http_host;
2001
- }
2002
-
2003
- /* FALLTHROUGH */
2004
- case s_http_host_v6_end:
2005
- if (ch == ':') {
2006
- return s_http_host_port_start;
2007
- }
2008
-
2009
- break;
2010
-
2011
- case s_http_host_v6:
2012
- if (ch == ']') {
2013
- return s_http_host_v6_end;
2014
- }
2015
-
2016
- /* FALLTHROUGH */
2017
- case s_http_host_v6_start:
2018
- if (IS_HEX(ch) || ch == ':' || ch == '.') {
2019
- return s_http_host_v6;
2020
- }
2021
-
2022
- break;
2023
-
2024
- case s_http_host_port:
2025
- case s_http_host_port_start:
2026
- if (IS_NUM(ch)) {
2027
- return s_http_host_port;
2028
- }
2029
-
2030
- break;
2031
-
2032
- default:
2033
- break;
2034
- }
2035
- return s_http_host_dead;
2036
- }
2037
-
2038
- static int
2039
- http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2040
- enum http_host_state s;
2041
-
2042
- const char *p;
2043
- size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2044
-
2045
- u->field_data[UF_HOST].len = 0;
2046
-
2047
- s = found_at ? s_http_userinfo_start : s_http_host_start;
2048
-
2049
- for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2050
- enum http_host_state new_s = http_parse_host_char(s, *p);
2051
-
2052
- if (new_s == s_http_host_dead) {
2053
- return 1;
2054
- }
2055
-
2056
- switch(new_s) {
2057
- case s_http_host:
2058
- if (s != s_http_host) {
2059
- u->field_data[UF_HOST].off = p - buf;
2060
- }
2061
- u->field_data[UF_HOST].len++;
2062
- break;
2063
-
2064
- case s_http_host_v6:
2065
- if (s != s_http_host_v6) {
2066
- u->field_data[UF_HOST].off = p - buf;
2067
- }
2068
- u->field_data[UF_HOST].len++;
2069
- break;
2070
-
2071
- case s_http_host_port:
2072
- if (s != s_http_host_port) {
2073
- u->field_data[UF_PORT].off = p - buf;
2074
- u->field_data[UF_PORT].len = 0;
2075
- u->field_set |= (1 << UF_PORT);
2076
- }
2077
- u->field_data[UF_PORT].len++;
2078
- break;
2079
-
2080
- case s_http_userinfo:
2081
- if (s != s_http_userinfo) {
2082
- u->field_data[UF_USERINFO].off = p - buf ;
2083
- u->field_data[UF_USERINFO].len = 0;
2084
- u->field_set |= (1 << UF_USERINFO);
2085
- }
2086
- u->field_data[UF_USERINFO].len++;
2087
- break;
2088
-
2089
- default:
2090
- break;
2091
- }
2092
- s = new_s;
2093
- }
2094
-
2095
- /* Make sure we don't end somewhere unexpected */
2096
- switch (s) {
2097
- case s_http_host_start:
2098
- case s_http_host_v6_start:
2099
- case s_http_host_v6:
2100
- case s_http_host_port_start:
2101
- case s_http_userinfo:
2102
- case s_http_userinfo_start:
2103
- return 1;
2104
- default:
2105
- break;
2106
- }
2107
-
2108
- return 0;
2109
- }
2110
-
2111
- int
2112
- http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2113
- struct http_parser_url *u)
2114
- {
2115
- enum state s;
2116
- const char *p;
2117
- enum http_parser_url_fields uf, old_uf;
2118
- int found_at = 0;
2119
-
2120
- u->port = u->field_set = 0;
2121
- s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2122
- uf = old_uf = UF_MAX;
2123
-
2124
- for (p = buf; p < buf + buflen; p++) {
2125
- s = parse_url_char(s, *p);
2126
-
2127
- /* Figure out the next field that we're operating on */
2128
- switch (s) {
2129
- case s_dead:
2130
- return 1;
2131
-
2132
- /* Skip delimeters */
2133
- case s_req_schema_slash:
2134
- case s_req_schema_slash_slash:
2135
- case s_req_server_start:
2136
- case s_req_query_string_start:
2137
- case s_req_fragment_start:
2138
- continue;
2139
-
2140
- case s_req_schema:
2141
- uf = UF_SCHEMA;
2142
- break;
2143
-
2144
- case s_req_server_with_at:
2145
- found_at = 1;
2146
-
2147
- /* FALLTROUGH */
2148
- case s_req_server:
2149
- uf = UF_HOST;
2150
- break;
2151
-
2152
- case s_req_path:
2153
- uf = UF_PATH;
2154
- break;
2155
-
2156
- case s_req_query_string:
2157
- uf = UF_QUERY;
2158
- break;
2159
-
2160
- case s_req_fragment:
2161
- uf = UF_FRAGMENT;
2162
- break;
2163
-
2164
- default:
2165
- assert(!"Unexpected state");
2166
- return 1;
2167
- }
2168
-
2169
- /* Nothing's changed; soldier on */
2170
- if (uf == old_uf) {
2171
- u->field_data[uf].len++;
2172
- continue;
2173
- }
2174
-
2175
- u->field_data[uf].off = p - buf;
2176
- u->field_data[uf].len = 1;
2177
-
2178
- u->field_set |= (1 << uf);
2179
- old_uf = uf;
2180
- }
2181
-
2182
- /* host must be present if there is a schema */
2183
- /* parsing http:///toto will fail */
2184
- if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2185
- if (http_parse_host(buf, u, found_at) != 0) {
2186
- return 1;
2187
- }
2188
- }
2189
-
2190
- /* CONNECT requests can only contain "hostname:port" */
2191
- if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2192
- return 1;
2193
- }
2194
-
2195
- if (u->field_set & (1 << UF_PORT)) {
2196
- /* Don't bother with endp; we've already validated the string */
2197
- unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2198
-
2199
- /* Ports have a max value of 2^16 */
2200
- if (v > 0xffff) {
2201
- return 1;
2202
- }
2203
-
2204
- u->port = (uint16_t) v;
2205
- }
2206
-
2207
- return 0;
2208
- }
2209
-
2210
- void
2211
- http_parser_pause(http_parser *parser, int paused) {
2212
- /* Users should only be pausing/unpausing a parser that is not in an error
2213
- * state. In non-debug builds, there's not much that we can do about this
2214
- * other than ignore it.
2215
- */
2216
- if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2217
- HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2218
- SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2219
- } else {
2220
- assert(0 && "Attempting to pause parser in error state");
2221
- }
2222
- }
2223
-
2224
- int
2225
- http_body_is_final(const struct http_parser *parser) {
2226
- return parser->state == s_message_done;
2227
- }
2228
-
2229
- unsigned long
2230
- http_parser_version(void) {
2231
- return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2232
- HTTP_PARSER_VERSION_MINOR * 0x00100 |
2233
- HTTP_PARSER_VERSION_PATCH * 0x00001;
2234
- }
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include "http_parser.h"
25
+ #include <assert.h>
26
+ #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
31
+
32
+ #ifndef ULLONG_MAX
33
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
+ #endif
35
+
36
+ #ifndef MIN
37
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
+ #endif
39
+
40
+ #ifndef ARRAY_SIZE
41
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
+ #endif
43
+
44
+ #ifndef BIT_AT
45
+ # define BIT_AT(a, i) \
46
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
+ (1 << ((unsigned int) (i) & 7))))
48
+ #endif
49
+
50
+ #ifndef ELEM_AT
51
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
+ #endif
53
+
54
+ #define SET_ERRNO(e) \
55
+ do { \
56
+ parser->http_errno = (e); \
57
+ } while(0)
58
+
59
+
60
+ /* Run the notify callback FOR, returning ER if it fails */
61
+ #define CALLBACK_NOTIFY_(FOR, ER) \
62
+ do { \
63
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
+ \
65
+ if (settings->on_##FOR) { \
66
+ if (0 != settings->on_##FOR(parser)) { \
67
+ SET_ERRNO(HPE_CB_##FOR); \
68
+ } \
69
+ \
70
+ /* We either errored above or got paused; get out */ \
71
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
72
+ return (ER); \
73
+ } \
74
+ } \
75
+ } while (0)
76
+
77
+ /* Run the notify callback FOR and consume the current byte */
78
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
79
+
80
+ /* Run the notify callback FOR and don't consume the current byte */
81
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
82
+
83
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
84
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
85
+ do { \
86
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
+ \
88
+ if (FOR##_mark) { \
89
+ if (settings->on_##FOR) { \
90
+ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
91
+ SET_ERRNO(HPE_CB_##FOR); \
92
+ } \
93
+ \
94
+ /* We either errored above or got paused; get out */ \
95
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
96
+ return (ER); \
97
+ } \
98
+ } \
99
+ FOR##_mark = NULL; \
100
+ } \
101
+ } while (0)
102
+
103
+ /* Run the data callback FOR and consume the current byte */
104
+ #define CALLBACK_DATA(FOR) \
105
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
106
+
107
+ /* Run the data callback FOR and don't consume the current byte */
108
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
109
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
110
+
111
+ /* Set the mark FOR; non-destructive if mark is already set */
112
+ #define MARK(FOR) \
113
+ do { \
114
+ if (!FOR##_mark) { \
115
+ FOR##_mark = p; \
116
+ } \
117
+ } while (0)
118
+
119
+
120
+ #define PROXY_CONNECTION "proxy-connection"
121
+ #define CONNECTION "connection"
122
+ #define CONTENT_LENGTH "content-length"
123
+ #define TRANSFER_ENCODING "transfer-encoding"
124
+ #define UPGRADE "upgrade"
125
+ #define CHUNKED "chunked"
126
+ #define KEEP_ALIVE "keep-alive"
127
+ #define CLOSE "close"
128
+
129
+
130
+ static const char *method_strings[] =
131
+ {
132
+ #define XX(num, name, string) #string,
133
+ HTTP_METHOD_MAP(XX)
134
+ #undef XX
135
+ };
136
+
137
+
138
+ /* Tokens as defined by rfc 2616. Also lowercases them.
139
+ * token = 1*<any CHAR except CTLs or separators>
140
+ * separators = "(" | ")" | "<" | ">" | "@"
141
+ * | "," | ";" | ":" | "\" | <">
142
+ * | "/" | "[" | "]" | "?" | "="
143
+ * | "{" | "}" | SP | HT
144
+ */
145
+ static const char tokens[256] = {
146
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
147
+ 0, 0, 0, 0, 0, 0, 0, 0,
148
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
149
+ 0, 0, 0, 0, 0, 0, 0, 0,
150
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
151
+ 0, 0, 0, 0, 0, 0, 0, 0,
152
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153
+ 0, 0, 0, 0, 0, 0, 0, 0,
154
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155
+ 0, '!', 0, '#', '$', '%', '&', '\'',
156
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157
+ 0, 0, '*', '+', 0, '-', '.', 0,
158
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
159
+ '0', '1', '2', '3', '4', '5', '6', '7',
160
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
161
+ '8', '9', 0, 0, 0, 0, 0, 0,
162
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
163
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
164
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
165
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
166
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
167
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
168
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
169
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
170
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
171
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
172
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
173
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
174
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
175
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
176
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
177
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
178
+
179
+
180
+ static const int8_t unhex[256] =
181
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
182
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
183
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
185
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
186
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
187
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
188
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
189
+ };
190
+
191
+
192
+ #if HTTP_PARSER_STRICT
193
+ # define T(v) 0
194
+ #else
195
+ # define T(v) v
196
+ #endif
197
+
198
+
199
+ static const uint8_t normal_url_char[32] = {
200
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
201
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
202
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
203
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
204
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
205
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
206
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
207
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
208
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
209
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
210
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
211
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
212
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
213
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
214
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
215
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
216
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
217
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
218
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
219
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
220
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
221
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
222
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
223
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
224
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
225
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
226
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
227
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
228
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
229
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
230
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
231
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
232
+
233
+ #undef T
234
+
235
+ enum state
236
+ { s_dead = 1 /* important that this is > 0 */
237
+
238
+ , s_start_req_or_res
239
+ , s_res_or_resp_H
240
+ , s_start_res
241
+ , s_res_H
242
+ , s_res_HT
243
+ , s_res_HTT
244
+ , s_res_HTTP
245
+ , s_res_first_http_major
246
+ , s_res_http_major
247
+ , s_res_first_http_minor
248
+ , s_res_http_minor
249
+ , s_res_first_status_code
250
+ , s_res_status_code
251
+ , s_res_status_start
252
+ , s_res_status
253
+ , s_res_line_almost_done
254
+
255
+ , s_start_req
256
+
257
+ , s_req_method
258
+ , s_req_spaces_before_url
259
+ , s_req_schema
260
+ , s_req_schema_slash
261
+ , s_req_schema_slash_slash
262
+ , s_req_server_start
263
+ , s_req_server
264
+ , s_req_server_with_at
265
+ , s_req_path
266
+ , s_req_query_string_start
267
+ , s_req_query_string
268
+ , s_req_fragment_start
269
+ , s_req_fragment
270
+ , s_req_http_start
271
+ , s_req_http_H
272
+ , s_req_http_HT
273
+ , s_req_http_HTT
274
+ , s_req_http_HTTP
275
+ , s_req_first_http_major
276
+ , s_req_http_major
277
+ , s_req_first_http_minor
278
+ , s_req_http_minor
279
+ , s_req_line_almost_done
280
+
281
+ , s_header_field_start
282
+ , s_header_field
283
+ , s_header_value_start
284
+ , s_header_value
285
+ , s_header_value_lws
286
+
287
+ , s_header_almost_done
288
+
289
+ , s_chunk_size_start
290
+ , s_chunk_size
291
+ , s_chunk_parameters
292
+ , s_chunk_size_almost_done
293
+
294
+ , s_headers_almost_done
295
+ , s_headers_done
296
+
297
+ /* Important: 's_headers_done' must be the last 'header' state. All
298
+ * states beyond this must be 'body' states. It is used for overflow
299
+ * checking. See the PARSING_HEADER() macro.
300
+ */
301
+
302
+ , s_chunk_data
303
+ , s_chunk_data_almost_done
304
+ , s_chunk_data_done
305
+
306
+ , s_body_identity
307
+ , s_body_identity_eof
308
+
309
+ , s_message_done
310
+ };
311
+
312
+
313
+ #define PARSING_HEADER(state) (state <= s_headers_done)
314
+
315
+
316
+ enum header_states
317
+ { h_general = 0
318
+ , h_C
319
+ , h_CO
320
+ , h_CON
321
+
322
+ , h_matching_connection
323
+ , h_matching_proxy_connection
324
+ , h_matching_content_length
325
+ , h_matching_transfer_encoding
326
+ , h_matching_upgrade
327
+
328
+ , h_connection
329
+ , h_content_length
330
+ , h_transfer_encoding
331
+ , h_upgrade
332
+
333
+ , h_matching_transfer_encoding_chunked
334
+ , h_matching_connection_keep_alive
335
+ , h_matching_connection_close
336
+
337
+ , h_transfer_encoding_chunked
338
+ , h_connection_keep_alive
339
+ , h_connection_close
340
+ };
341
+
342
+ enum http_host_state
343
+ {
344
+ s_http_host_dead = 1
345
+ , s_http_userinfo_start
346
+ , s_http_userinfo
347
+ , s_http_host_start
348
+ , s_http_host_v6_start
349
+ , s_http_host
350
+ , s_http_host_v6
351
+ , s_http_host_v6_end
352
+ , s_http_host_port_start
353
+ , s_http_host_port
354
+ };
355
+
356
+ /* Macros for character classes; depends on strict-mode */
357
+ #define CR '\r'
358
+ #define LF '\n'
359
+ #define LOWER(c) (unsigned char)(c | 0x20)
360
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
361
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
362
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
363
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
364
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
365
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
366
+ (c) == ')')
367
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
368
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
369
+ (c) == '$' || (c) == ',')
370
+
371
+ #if HTTP_PARSER_STRICT
372
+ #define TOKEN(c) (tokens[(unsigned char)c])
373
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
374
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
375
+ #else
376
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
377
+ #define IS_URL_CHAR(c) \
378
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
379
+ #define IS_HOST_CHAR(c) \
380
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
381
+ #endif
382
+
383
+
384
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
385
+
386
+
387
+ #if HTTP_PARSER_STRICT
388
+ # define STRICT_CHECK(cond) \
389
+ do { \
390
+ if (cond) { \
391
+ SET_ERRNO(HPE_STRICT); \
392
+ goto error; \
393
+ } \
394
+ } while (0)
395
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
396
+ #else
397
+ # define STRICT_CHECK(cond)
398
+ # define NEW_MESSAGE() start_state
399
+ #endif
400
+
401
+
402
+ /* Map errno values to strings for human-readable output */
403
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
404
+ static struct {
405
+ const char *name;
406
+ const char *description;
407
+ } http_strerror_tab[] = {
408
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
409
+ };
410
+ #undef HTTP_STRERROR_GEN
411
+
412
+ int http_message_needs_eof(const http_parser *parser);
413
+
414
+ /* Our URL parser.
415
+ *
416
+ * This is designed to be shared by http_parser_execute() for URL validation,
417
+ * hence it has a state transition + byte-for-byte interface. In addition, it
418
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
419
+ * work of turning state transitions URL components for its API.
420
+ *
421
+ * This function should only be invoked with non-space characters. It is
422
+ * assumed that the caller cares about (and can detect) the transition between
423
+ * URL and non-URL states by looking for these.
424
+ */
425
+ static enum state
426
+ parse_url_char(enum state s, const char ch)
427
+ {
428
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
429
+ return s_dead;
430
+ }
431
+
432
+ #if HTTP_PARSER_STRICT
433
+ if (ch == '\t' || ch == '\f') {
434
+ return s_dead;
435
+ }
436
+ #endif
437
+
438
+ switch (s) {
439
+ case s_req_spaces_before_url:
440
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
441
+ * All methods except CONNECT are followed by '/' or '*'.
442
+ */
443
+
444
+ if (ch == '/' || ch == '*') {
445
+ return s_req_path;
446
+ }
447
+
448
+ if (IS_ALPHA(ch)) {
449
+ return s_req_schema;
450
+ }
451
+
452
+ break;
453
+
454
+ case s_req_schema:
455
+ if (IS_ALPHA(ch)) {
456
+ return s;
457
+ }
458
+
459
+ if (ch == ':') {
460
+ return s_req_schema_slash;
461
+ }
462
+
463
+ break;
464
+
465
+ case s_req_schema_slash:
466
+ if (ch == '/') {
467
+ return s_req_schema_slash_slash;
468
+ }
469
+
470
+ break;
471
+
472
+ case s_req_schema_slash_slash:
473
+ if (ch == '/') {
474
+ return s_req_server_start;
475
+ }
476
+
477
+ break;
478
+
479
+ case s_req_server_with_at:
480
+ if (ch == '@') {
481
+ return s_dead;
482
+ }
483
+
484
+ /* FALLTHROUGH */
485
+ case s_req_server_start:
486
+ case s_req_server:
487
+ if (ch == '/') {
488
+ return s_req_path;
489
+ }
490
+
491
+ if (ch == '?') {
492
+ return s_req_query_string_start;
493
+ }
494
+
495
+ if (ch == '@') {
496
+ return s_req_server_with_at;
497
+ }
498
+
499
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
500
+ return s_req_server;
501
+ }
502
+
503
+ break;
504
+
505
+ case s_req_path:
506
+ if (IS_URL_CHAR(ch)) {
507
+ return s;
508
+ }
509
+
510
+ switch (ch) {
511
+ case '?':
512
+ return s_req_query_string_start;
513
+
514
+ case '#':
515
+ return s_req_fragment_start;
516
+ }
517
+
518
+ break;
519
+
520
+ case s_req_query_string_start:
521
+ case s_req_query_string:
522
+ if (IS_URL_CHAR(ch)) {
523
+ return s_req_query_string;
524
+ }
525
+
526
+ switch (ch) {
527
+ case '?':
528
+ /* allow extra '?' in query string */
529
+ return s_req_query_string;
530
+
531
+ case '#':
532
+ return s_req_fragment_start;
533
+ }
534
+
535
+ break;
536
+
537
+ case s_req_fragment_start:
538
+ if (IS_URL_CHAR(ch)) {
539
+ return s_req_fragment;
540
+ }
541
+
542
+ switch (ch) {
543
+ case '?':
544
+ return s_req_fragment;
545
+
546
+ case '#':
547
+ return s;
548
+ }
549
+
550
+ break;
551
+
552
+ case s_req_fragment:
553
+ if (IS_URL_CHAR(ch)) {
554
+ return s;
555
+ }
556
+
557
+ switch (ch) {
558
+ case '?':
559
+ case '#':
560
+ return s;
561
+ }
562
+
563
+ break;
564
+
565
+ default:
566
+ break;
567
+ }
568
+
569
+ /* We should never fall out of the switch above unless there's an error */
570
+ return s_dead;
571
+ }
572
+
573
+ size_t http_parser_execute (http_parser *parser,
574
+ const http_parser_settings *settings,
575
+ const char *data,
576
+ size_t len)
577
+ {
578
+ char c, ch;
579
+ int8_t unhex_val;
580
+ const char *p = data;
581
+ const char *header_field_mark = 0;
582
+ const char *header_value_mark = 0;
583
+ const char *url_mark = 0;
584
+ const char *body_mark = 0;
585
+ const char *status_mark = 0;
586
+
587
+ /* We're in an error state. Don't bother doing anything. */
588
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
589
+ return 0;
590
+ }
591
+
592
+ if (len == 0) {
593
+ switch (parser->state) {
594
+ case s_body_identity_eof:
595
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
596
+ * we got paused.
597
+ */
598
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
599
+ return 0;
600
+
601
+ case s_dead:
602
+ case s_start_req_or_res:
603
+ case s_start_res:
604
+ case s_start_req:
605
+ return 0;
606
+
607
+ default:
608
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
609
+ return 1;
610
+ }
611
+ }
612
+
613
+
614
+ if (parser->state == s_header_field)
615
+ header_field_mark = data;
616
+ if (parser->state == s_header_value)
617
+ header_value_mark = data;
618
+ switch (parser->state) {
619
+ case s_req_path:
620
+ case s_req_schema:
621
+ case s_req_schema_slash:
622
+ case s_req_schema_slash_slash:
623
+ case s_req_server_start:
624
+ case s_req_server:
625
+ case s_req_server_with_at:
626
+ case s_req_query_string_start:
627
+ case s_req_query_string:
628
+ case s_req_fragment_start:
629
+ case s_req_fragment:
630
+ url_mark = data;
631
+ break;
632
+ case s_res_status:
633
+ status_mark = data;
634
+ break;
635
+ }
636
+
637
+ for (p=data; p != data + len; p++) {
638
+ ch = *p;
639
+
640
+ if (PARSING_HEADER(parser->state)) {
641
+ ++parser->nread;
642
+ /* Don't allow the total size of the HTTP headers (including the status
643
+ * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
644
+ * embedders against denial-of-service attacks where the attacker feeds
645
+ * us a never-ending header that the embedder keeps buffering.
646
+ *
647
+ * This check is arguably the responsibility of embedders but we're doing
648
+ * it on the embedder's behalf because most won't bother and this way we
649
+ * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
650
+ * than any reasonable request or response so this should never affect
651
+ * day-to-day operation.
652
+ */
653
+ if (parser->nread > HTTP_MAX_HEADER_SIZE) {
654
+ SET_ERRNO(HPE_HEADER_OVERFLOW);
655
+ goto error;
656
+ }
657
+ }
658
+
659
+ reexecute_byte:
660
+ switch (parser->state) {
661
+
662
+ case s_dead:
663
+ /* this state is used after a 'Connection: close' message
664
+ * the parser will error out if it reads another message
665
+ */
666
+ if (ch == CR || ch == LF)
667
+ break;
668
+
669
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
670
+ goto error;
671
+
672
+ case s_start_req_or_res:
673
+ {
674
+ if (ch == CR || ch == LF)
675
+ break;
676
+ parser->flags = 0;
677
+ parser->content_length = ULLONG_MAX;
678
+
679
+ if (ch == 'H') {
680
+ parser->state = s_res_or_resp_H;
681
+
682
+ CALLBACK_NOTIFY(message_begin);
683
+ } else {
684
+ parser->type = HTTP_REQUEST;
685
+ parser->state = s_start_req;
686
+ goto reexecute_byte;
687
+ }
688
+
689
+ break;
690
+ }
691
+
692
+ case s_res_or_resp_H:
693
+ if (ch == 'T') {
694
+ parser->type = HTTP_RESPONSE;
695
+ parser->state = s_res_HT;
696
+ } else {
697
+ if (ch != 'E') {
698
+ SET_ERRNO(HPE_INVALID_CONSTANT);
699
+ goto error;
700
+ }
701
+
702
+ parser->type = HTTP_REQUEST;
703
+ parser->method = HTTP_HEAD;
704
+ parser->index = 2;
705
+ parser->state = s_req_method;
706
+ }
707
+ break;
708
+
709
+ case s_start_res:
710
+ {
711
+ parser->flags = 0;
712
+ parser->content_length = ULLONG_MAX;
713
+
714
+ switch (ch) {
715
+ case 'H':
716
+ parser->state = s_res_H;
717
+ break;
718
+
719
+ case CR:
720
+ case LF:
721
+ break;
722
+
723
+ default:
724
+ SET_ERRNO(HPE_INVALID_CONSTANT);
725
+ goto error;
726
+ }
727
+
728
+ CALLBACK_NOTIFY(message_begin);
729
+ break;
730
+ }
731
+
732
+ case s_res_H:
733
+ STRICT_CHECK(ch != 'T');
734
+ parser->state = s_res_HT;
735
+ break;
736
+
737
+ case s_res_HT:
738
+ STRICT_CHECK(ch != 'T');
739
+ parser->state = s_res_HTT;
740
+ break;
741
+
742
+ case s_res_HTT:
743
+ STRICT_CHECK(ch != 'P');
744
+ parser->state = s_res_HTTP;
745
+ break;
746
+
747
+ case s_res_HTTP:
748
+ STRICT_CHECK(ch != '/');
749
+ parser->state = s_res_first_http_major;
750
+ break;
751
+
752
+ case s_res_first_http_major:
753
+ if (ch < '0' || ch > '9') {
754
+ SET_ERRNO(HPE_INVALID_VERSION);
755
+ goto error;
756
+ }
757
+
758
+ parser->http_major = ch - '0';
759
+ parser->state = s_res_http_major;
760
+ break;
761
+
762
+ /* major HTTP version or dot */
763
+ case s_res_http_major:
764
+ {
765
+ if (ch == '.') {
766
+ parser->state = s_res_first_http_minor;
767
+ break;
768
+ }
769
+
770
+ if (!IS_NUM(ch)) {
771
+ SET_ERRNO(HPE_INVALID_VERSION);
772
+ goto error;
773
+ }
774
+
775
+ parser->http_major *= 10;
776
+ parser->http_major += ch - '0';
777
+
778
+ if (parser->http_major > 999) {
779
+ SET_ERRNO(HPE_INVALID_VERSION);
780
+ goto error;
781
+ }
782
+
783
+ break;
784
+ }
785
+
786
+ /* first digit of minor HTTP version */
787
+ case s_res_first_http_minor:
788
+ if (!IS_NUM(ch)) {
789
+ SET_ERRNO(HPE_INVALID_VERSION);
790
+ goto error;
791
+ }
792
+
793
+ parser->http_minor = ch - '0';
794
+ parser->state = s_res_http_minor;
795
+ break;
796
+
797
+ /* minor HTTP version or end of request line */
798
+ case s_res_http_minor:
799
+ {
800
+ if (ch == ' ') {
801
+ parser->state = s_res_first_status_code;
802
+ break;
803
+ }
804
+
805
+ if (!IS_NUM(ch)) {
806
+ SET_ERRNO(HPE_INVALID_VERSION);
807
+ goto error;
808
+ }
809
+
810
+ parser->http_minor *= 10;
811
+ parser->http_minor += ch - '0';
812
+
813
+ if (parser->http_minor > 999) {
814
+ SET_ERRNO(HPE_INVALID_VERSION);
815
+ goto error;
816
+ }
817
+
818
+ break;
819
+ }
820
+
821
+ case s_res_first_status_code:
822
+ {
823
+ if (!IS_NUM(ch)) {
824
+ if (ch == ' ') {
825
+ break;
826
+ }
827
+
828
+ SET_ERRNO(HPE_INVALID_STATUS);
829
+ goto error;
830
+ }
831
+ parser->status_code = ch - '0';
832
+ parser->state = s_res_status_code;
833
+ break;
834
+ }
835
+
836
+ case s_res_status_code:
837
+ {
838
+ if (!IS_NUM(ch)) {
839
+ switch (ch) {
840
+ case ' ':
841
+ parser->state = s_res_status_start;
842
+ break;
843
+ case CR:
844
+ parser->state = s_res_line_almost_done;
845
+ break;
846
+ case LF:
847
+ parser->state = s_header_field_start;
848
+ break;
849
+ default:
850
+ SET_ERRNO(HPE_INVALID_STATUS);
851
+ goto error;
852
+ }
853
+ break;
854
+ }
855
+
856
+ parser->status_code *= 10;
857
+ parser->status_code += ch - '0';
858
+
859
+ if (parser->status_code > 999) {
860
+ SET_ERRNO(HPE_INVALID_STATUS);
861
+ goto error;
862
+ }
863
+
864
+ break;
865
+ }
866
+
867
+ case s_res_status_start:
868
+ {
869
+ if (ch == CR) {
870
+ parser->state = s_res_line_almost_done;
871
+ break;
872
+ }
873
+
874
+ if (ch == LF) {
875
+ parser->state = s_header_field_start;
876
+ break;
877
+ }
878
+
879
+ MARK(status);
880
+ parser->state = s_res_status;
881
+ parser->index = 0;
882
+ break;
883
+ }
884
+
885
+ case s_res_status:
886
+ if (ch == CR) {
887
+ parser->state = s_res_line_almost_done;
888
+ CALLBACK_DATA(status);
889
+ break;
890
+ }
891
+
892
+ if (ch == LF) {
893
+ parser->state = s_header_field_start;
894
+ CALLBACK_DATA(status);
895
+ break;
896
+ }
897
+
898
+ break;
899
+
900
+ case s_res_line_almost_done:
901
+ STRICT_CHECK(ch != LF);
902
+ parser->state = s_header_field_start;
903
+ break;
904
+
905
+ case s_start_req:
906
+ {
907
+ if (ch == CR || ch == LF)
908
+ break;
909
+ parser->flags = 0;
910
+ parser->content_length = ULLONG_MAX;
911
+
912
+ if (!IS_ALPHA(ch)) {
913
+ SET_ERRNO(HPE_INVALID_METHOD);
914
+ goto error;
915
+ }
916
+
917
+ parser->method = (enum http_method) 0;
918
+ parser->index = 1;
919
+ switch (ch) {
920
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
921
+ case 'D': parser->method = HTTP_DELETE; break;
922
+ case 'G': parser->method = HTTP_GET; break;
923
+ case 'H': parser->method = HTTP_HEAD; break;
924
+ case 'L': parser->method = HTTP_LOCK; break;
925
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
926
+ case 'N': parser->method = HTTP_NOTIFY; break;
927
+ case 'O': parser->method = HTTP_OPTIONS; break;
928
+ case 'P': parser->method = HTTP_POST;
929
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
930
+ break;
931
+ case 'R': parser->method = HTTP_REPORT; break;
932
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
933
+ case 'T': parser->method = HTTP_TRACE; break;
934
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
935
+ default:
936
+ SET_ERRNO(HPE_INVALID_METHOD);
937
+ goto error;
938
+ }
939
+ parser->state = s_req_method;
940
+
941
+ CALLBACK_NOTIFY(message_begin);
942
+
943
+ break;
944
+ }
945
+
946
+ case s_req_method:
947
+ {
948
+ const char *matcher;
949
+ if (ch == '\0') {
950
+ SET_ERRNO(HPE_INVALID_METHOD);
951
+ goto error;
952
+ }
953
+
954
+ matcher = method_strings[parser->method];
955
+ if (ch == ' ' && matcher[parser->index] == '\0') {
956
+ parser->state = s_req_spaces_before_url;
957
+ } else if (ch == matcher[parser->index]) {
958
+ ; /* nada */
959
+ } else if (parser->method == HTTP_CONNECT) {
960
+ if (parser->index == 1 && ch == 'H') {
961
+ parser->method = HTTP_CHECKOUT;
962
+ } else if (parser->index == 2 && ch == 'P') {
963
+ parser->method = HTTP_COPY;
964
+ } else {
965
+ SET_ERRNO(HPE_INVALID_METHOD);
966
+ goto error;
967
+ }
968
+ } else if (parser->method == HTTP_MKCOL) {
969
+ if (parser->index == 1 && ch == 'O') {
970
+ parser->method = HTTP_MOVE;
971
+ } else if (parser->index == 1 && ch == 'E') {
972
+ parser->method = HTTP_MERGE;
973
+ } else if (parser->index == 1 && ch == '-') {
974
+ parser->method = HTTP_MSEARCH;
975
+ } else if (parser->index == 2 && ch == 'A') {
976
+ parser->method = HTTP_MKACTIVITY;
977
+ } else {
978
+ SET_ERRNO(HPE_INVALID_METHOD);
979
+ goto error;
980
+ }
981
+ } else if (parser->method == HTTP_SUBSCRIBE) {
982
+ if (parser->index == 1 && ch == 'E') {
983
+ parser->method = HTTP_SEARCH;
984
+ } else {
985
+ SET_ERRNO(HPE_INVALID_METHOD);
986
+ goto error;
987
+ }
988
+ } else if (parser->index == 1 && parser->method == HTTP_POST) {
989
+ if (ch == 'R') {
990
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
991
+ } else if (ch == 'U') {
992
+ parser->method = HTTP_PUT; /* or HTTP_PURGE */
993
+ } else if (ch == 'A') {
994
+ parser->method = HTTP_PATCH;
995
+ } else {
996
+ SET_ERRNO(HPE_INVALID_METHOD);
997
+ goto error;
998
+ }
999
+ } else if (parser->index == 2) {
1000
+ if (parser->method == HTTP_PUT) {
1001
+ if (ch == 'R') {
1002
+ parser->method = HTTP_PURGE;
1003
+ } else {
1004
+ SET_ERRNO(HPE_INVALID_METHOD);
1005
+ goto error;
1006
+ }
1007
+ } else if (parser->method == HTTP_UNLOCK) {
1008
+ if (ch == 'S') {
1009
+ parser->method = HTTP_UNSUBSCRIBE;
1010
+ } else {
1011
+ SET_ERRNO(HPE_INVALID_METHOD);
1012
+ goto error;
1013
+ }
1014
+ } else {
1015
+ SET_ERRNO(HPE_INVALID_METHOD);
1016
+ goto error;
1017
+ }
1018
+ } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1019
+ parser->method = HTTP_PROPPATCH;
1020
+ } else {
1021
+ SET_ERRNO(HPE_INVALID_METHOD);
1022
+ goto error;
1023
+ }
1024
+
1025
+ ++parser->index;
1026
+ break;
1027
+ }
1028
+
1029
+ case s_req_spaces_before_url:
1030
+ {
1031
+ if (ch == ' ') break;
1032
+
1033
+ MARK(url);
1034
+ if (parser->method == HTTP_CONNECT) {
1035
+ parser->state = s_req_server_start;
1036
+ }
1037
+
1038
+ parser->state = parse_url_char((enum state)parser->state, ch);
1039
+ if (parser->state == s_dead) {
1040
+ SET_ERRNO(HPE_INVALID_URL);
1041
+ goto error;
1042
+ }
1043
+
1044
+ break;
1045
+ }
1046
+
1047
+ case s_req_schema:
1048
+ case s_req_schema_slash:
1049
+ case s_req_schema_slash_slash:
1050
+ case s_req_server_start:
1051
+ {
1052
+ switch (ch) {
1053
+ /* No whitespace allowed here */
1054
+ case ' ':
1055
+ case CR:
1056
+ case LF:
1057
+ SET_ERRNO(HPE_INVALID_URL);
1058
+ goto error;
1059
+ default:
1060
+ parser->state = parse_url_char((enum state)parser->state, ch);
1061
+ if (parser->state == s_dead) {
1062
+ SET_ERRNO(HPE_INVALID_URL);
1063
+ goto error;
1064
+ }
1065
+ }
1066
+
1067
+ break;
1068
+ }
1069
+
1070
+ case s_req_server:
1071
+ case s_req_server_with_at:
1072
+ case s_req_path:
1073
+ case s_req_query_string_start:
1074
+ case s_req_query_string:
1075
+ case s_req_fragment_start:
1076
+ case s_req_fragment:
1077
+ {
1078
+ switch (ch) {
1079
+ case ' ':
1080
+ parser->state = s_req_http_start;
1081
+ CALLBACK_DATA(url);
1082
+ break;
1083
+ case CR:
1084
+ case LF:
1085
+ parser->http_major = 0;
1086
+ parser->http_minor = 9;
1087
+ parser->state = (ch == CR) ?
1088
+ s_req_line_almost_done :
1089
+ s_header_field_start;
1090
+ CALLBACK_DATA(url);
1091
+ break;
1092
+ default:
1093
+ parser->state = parse_url_char((enum state)parser->state, ch);
1094
+ if (parser->state == s_dead) {
1095
+ SET_ERRNO(HPE_INVALID_URL);
1096
+ goto error;
1097
+ }
1098
+ }
1099
+ break;
1100
+ }
1101
+
1102
+ case s_req_http_start:
1103
+ switch (ch) {
1104
+ case 'H':
1105
+ parser->state = s_req_http_H;
1106
+ break;
1107
+ case ' ':
1108
+ break;
1109
+ default:
1110
+ SET_ERRNO(HPE_INVALID_CONSTANT);
1111
+ goto error;
1112
+ }
1113
+ break;
1114
+
1115
+ case s_req_http_H:
1116
+ STRICT_CHECK(ch != 'T');
1117
+ parser->state = s_req_http_HT;
1118
+ break;
1119
+
1120
+ case s_req_http_HT:
1121
+ STRICT_CHECK(ch != 'T');
1122
+ parser->state = s_req_http_HTT;
1123
+ break;
1124
+
1125
+ case s_req_http_HTT:
1126
+ STRICT_CHECK(ch != 'P');
1127
+ parser->state = s_req_http_HTTP;
1128
+ break;
1129
+
1130
+ case s_req_http_HTTP:
1131
+ STRICT_CHECK(ch != '/');
1132
+ parser->state = s_req_first_http_major;
1133
+ break;
1134
+
1135
+ /* first digit of major HTTP version */
1136
+ case s_req_first_http_major:
1137
+ if (ch < '1' || ch > '9') {
1138
+ SET_ERRNO(HPE_INVALID_VERSION);
1139
+ goto error;
1140
+ }
1141
+
1142
+ parser->http_major = ch - '0';
1143
+ parser->state = s_req_http_major;
1144
+ break;
1145
+
1146
+ /* major HTTP version or dot */
1147
+ case s_req_http_major:
1148
+ {
1149
+ if (ch == '.') {
1150
+ parser->state = s_req_first_http_minor;
1151
+ break;
1152
+ }
1153
+
1154
+ if (!IS_NUM(ch)) {
1155
+ SET_ERRNO(HPE_INVALID_VERSION);
1156
+ goto error;
1157
+ }
1158
+
1159
+ parser->http_major *= 10;
1160
+ parser->http_major += ch - '0';
1161
+
1162
+ if (parser->http_major > 999) {
1163
+ SET_ERRNO(HPE_INVALID_VERSION);
1164
+ goto error;
1165
+ }
1166
+
1167
+ break;
1168
+ }
1169
+
1170
+ /* first digit of minor HTTP version */
1171
+ case s_req_first_http_minor:
1172
+ if (!IS_NUM(ch)) {
1173
+ SET_ERRNO(HPE_INVALID_VERSION);
1174
+ goto error;
1175
+ }
1176
+
1177
+ parser->http_minor = ch - '0';
1178
+ parser->state = s_req_http_minor;
1179
+ break;
1180
+
1181
+ /* minor HTTP version or end of request line */
1182
+ case s_req_http_minor:
1183
+ {
1184
+ if (ch == CR) {
1185
+ parser->state = s_req_line_almost_done;
1186
+ break;
1187
+ }
1188
+
1189
+ if (ch == LF) {
1190
+ parser->state = s_header_field_start;
1191
+ break;
1192
+ }
1193
+
1194
+ /* XXX allow spaces after digit? */
1195
+
1196
+ if (!IS_NUM(ch)) {
1197
+ SET_ERRNO(HPE_INVALID_VERSION);
1198
+ goto error;
1199
+ }
1200
+
1201
+ parser->http_minor *= 10;
1202
+ parser->http_minor += ch - '0';
1203
+
1204
+ if (parser->http_minor > 999) {
1205
+ SET_ERRNO(HPE_INVALID_VERSION);
1206
+ goto error;
1207
+ }
1208
+
1209
+ break;
1210
+ }
1211
+
1212
+ /* end of request line */
1213
+ case s_req_line_almost_done:
1214
+ {
1215
+ if (ch != LF) {
1216
+ SET_ERRNO(HPE_LF_EXPECTED);
1217
+ goto error;
1218
+ }
1219
+
1220
+ parser->state = s_header_field_start;
1221
+ break;
1222
+ }
1223
+
1224
+ case s_header_field_start:
1225
+ {
1226
+ if (ch == CR) {
1227
+ parser->state = s_headers_almost_done;
1228
+ break;
1229
+ }
1230
+
1231
+ if (ch == LF) {
1232
+ /* they might be just sending \n instead of \r\n so this would be
1233
+ * the second \n to denote the end of headers*/
1234
+ parser->state = s_headers_almost_done;
1235
+ goto reexecute_byte;
1236
+ }
1237
+
1238
+ c = TOKEN(ch);
1239
+
1240
+ if (!c) {
1241
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1242
+ goto error;
1243
+ }
1244
+
1245
+ MARK(header_field);
1246
+
1247
+ parser->index = 0;
1248
+ parser->state = s_header_field;
1249
+
1250
+ switch (c) {
1251
+ case 'c':
1252
+ parser->header_state = h_C;
1253
+ break;
1254
+
1255
+ case 'p':
1256
+ parser->header_state = h_matching_proxy_connection;
1257
+ break;
1258
+
1259
+ case 't':
1260
+ parser->header_state = h_matching_transfer_encoding;
1261
+ break;
1262
+
1263
+ case 'u':
1264
+ parser->header_state = h_matching_upgrade;
1265
+ break;
1266
+
1267
+ default:
1268
+ parser->header_state = h_general;
1269
+ break;
1270
+ }
1271
+ break;
1272
+ }
1273
+
1274
+ case s_header_field:
1275
+ {
1276
+ c = TOKEN(ch);
1277
+
1278
+ if (c) {
1279
+ switch (parser->header_state) {
1280
+ case h_general:
1281
+ break;
1282
+
1283
+ case h_C:
1284
+ parser->index++;
1285
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1286
+ break;
1287
+
1288
+ case h_CO:
1289
+ parser->index++;
1290
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1291
+ break;
1292
+
1293
+ case h_CON:
1294
+ parser->index++;
1295
+ switch (c) {
1296
+ case 'n':
1297
+ parser->header_state = h_matching_connection;
1298
+ break;
1299
+ case 't':
1300
+ parser->header_state = h_matching_content_length;
1301
+ break;
1302
+ default:
1303
+ parser->header_state = h_general;
1304
+ break;
1305
+ }
1306
+ break;
1307
+
1308
+ /* connection */
1309
+
1310
+ case h_matching_connection:
1311
+ parser->index++;
1312
+ if (parser->index > sizeof(CONNECTION)-1
1313
+ || c != CONNECTION[parser->index]) {
1314
+ parser->header_state = h_general;
1315
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1316
+ parser->header_state = h_connection;
1317
+ }
1318
+ break;
1319
+
1320
+ /* proxy-connection */
1321
+
1322
+ case h_matching_proxy_connection:
1323
+ parser->index++;
1324
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1325
+ || c != PROXY_CONNECTION[parser->index]) {
1326
+ parser->header_state = h_general;
1327
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1328
+ parser->header_state = h_connection;
1329
+ }
1330
+ break;
1331
+
1332
+ /* content-length */
1333
+
1334
+ case h_matching_content_length:
1335
+ parser->index++;
1336
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1337
+ || c != CONTENT_LENGTH[parser->index]) {
1338
+ parser->header_state = h_general;
1339
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1340
+ parser->header_state = h_content_length;
1341
+ }
1342
+ break;
1343
+
1344
+ /* transfer-encoding */
1345
+
1346
+ case h_matching_transfer_encoding:
1347
+ parser->index++;
1348
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1349
+ || c != TRANSFER_ENCODING[parser->index]) {
1350
+ parser->header_state = h_general;
1351
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1352
+ parser->header_state = h_transfer_encoding;
1353
+ }
1354
+ break;
1355
+
1356
+ /* upgrade */
1357
+
1358
+ case h_matching_upgrade:
1359
+ parser->index++;
1360
+ if (parser->index > sizeof(UPGRADE)-1
1361
+ || c != UPGRADE[parser->index]) {
1362
+ parser->header_state = h_general;
1363
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1364
+ parser->header_state = h_upgrade;
1365
+ }
1366
+ break;
1367
+
1368
+ case h_connection:
1369
+ case h_content_length:
1370
+ case h_transfer_encoding:
1371
+ case h_upgrade:
1372
+ if (ch != ' ') parser->header_state = h_general;
1373
+ break;
1374
+
1375
+ default:
1376
+ assert(0 && "Unknown header_state");
1377
+ break;
1378
+ }
1379
+ break;
1380
+ }
1381
+
1382
+ if (ch == ':') {
1383
+ parser->state = s_header_value_start;
1384
+ CALLBACK_DATA(header_field);
1385
+ break;
1386
+ }
1387
+
1388
+ if (ch == CR) {
1389
+ parser->state = s_header_almost_done;
1390
+ CALLBACK_DATA(header_field);
1391
+ break;
1392
+ }
1393
+
1394
+ if (ch == LF) {
1395
+ parser->state = s_header_field_start;
1396
+ CALLBACK_DATA(header_field);
1397
+ break;
1398
+ }
1399
+
1400
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1401
+ goto error;
1402
+ }
1403
+
1404
+ case s_header_value_start:
1405
+ {
1406
+ if (ch == ' ' || ch == '\t') break;
1407
+
1408
+ MARK(header_value);
1409
+
1410
+ parser->state = s_header_value;
1411
+ parser->index = 0;
1412
+
1413
+ if (ch == CR) {
1414
+ parser->header_state = h_general;
1415
+ parser->state = s_header_almost_done;
1416
+ CALLBACK_DATA(header_value);
1417
+ break;
1418
+ }
1419
+
1420
+ if (ch == LF) {
1421
+ parser->state = s_header_field_start;
1422
+ CALLBACK_DATA(header_value);
1423
+ break;
1424
+ }
1425
+
1426
+ c = LOWER(ch);
1427
+
1428
+ switch (parser->header_state) {
1429
+ case h_upgrade:
1430
+ parser->flags |= F_UPGRADE;
1431
+ parser->header_state = h_general;
1432
+ break;
1433
+
1434
+ case h_transfer_encoding:
1435
+ /* looking for 'Transfer-Encoding: chunked' */
1436
+ if ('c' == c) {
1437
+ parser->header_state = h_matching_transfer_encoding_chunked;
1438
+ } else {
1439
+ parser->header_state = h_general;
1440
+ }
1441
+ break;
1442
+
1443
+ case h_content_length:
1444
+ if (!IS_NUM(ch)) {
1445
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1446
+ goto error;
1447
+ }
1448
+
1449
+ parser->content_length = ch - '0';
1450
+ break;
1451
+
1452
+ case h_connection:
1453
+ /* looking for 'Connection: keep-alive' */
1454
+ if (c == 'k') {
1455
+ parser->header_state = h_matching_connection_keep_alive;
1456
+ /* looking for 'Connection: close' */
1457
+ } else if (c == 'c') {
1458
+ parser->header_state = h_matching_connection_close;
1459
+ } else {
1460
+ parser->header_state = h_general;
1461
+ }
1462
+ break;
1463
+
1464
+ default:
1465
+ parser->header_state = h_general;
1466
+ break;
1467
+ }
1468
+ break;
1469
+ }
1470
+
1471
+ case s_header_value:
1472
+ {
1473
+
1474
+ if (ch == CR) {
1475
+ parser->state = s_header_almost_done;
1476
+ CALLBACK_DATA(header_value);
1477
+ break;
1478
+ }
1479
+
1480
+ if (ch == LF) {
1481
+ parser->state = s_header_almost_done;
1482
+ CALLBACK_DATA_NOADVANCE(header_value);
1483
+ goto reexecute_byte;
1484
+ }
1485
+
1486
+ c = LOWER(ch);
1487
+
1488
+ switch (parser->header_state) {
1489
+ case h_general:
1490
+ break;
1491
+
1492
+ case h_connection:
1493
+ case h_transfer_encoding:
1494
+ assert(0 && "Shouldn't get here.");
1495
+ break;
1496
+
1497
+ case h_content_length:
1498
+ {
1499
+ uint64_t t;
1500
+
1501
+ if (ch == ' ') break;
1502
+
1503
+ if (!IS_NUM(ch)) {
1504
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1505
+ goto error;
1506
+ }
1507
+
1508
+ t = parser->content_length;
1509
+ t *= 10;
1510
+ t += ch - '0';
1511
+
1512
+ /* Overflow? Test against a conservative limit for simplicity. */
1513
+ if ((ULLONG_MAX - 10) / 10 < parser->content_length) {
1514
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1515
+ goto error;
1516
+ }
1517
+
1518
+ parser->content_length = t;
1519
+ break;
1520
+ }
1521
+
1522
+ /* Transfer-Encoding: chunked */
1523
+ case h_matching_transfer_encoding_chunked:
1524
+ parser->index++;
1525
+ if (parser->index > sizeof(CHUNKED)-1
1526
+ || c != CHUNKED[parser->index]) {
1527
+ parser->header_state = h_general;
1528
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1529
+ parser->header_state = h_transfer_encoding_chunked;
1530
+ }
1531
+ break;
1532
+
1533
+ /* looking for 'Connection: keep-alive' */
1534
+ case h_matching_connection_keep_alive:
1535
+ parser->index++;
1536
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1537
+ || c != KEEP_ALIVE[parser->index]) {
1538
+ parser->header_state = h_general;
1539
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1540
+ parser->header_state = h_connection_keep_alive;
1541
+ }
1542
+ break;
1543
+
1544
+ /* looking for 'Connection: close' */
1545
+ case h_matching_connection_close:
1546
+ parser->index++;
1547
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1548
+ parser->header_state = h_general;
1549
+ } else if (parser->index == sizeof(CLOSE)-2) {
1550
+ parser->header_state = h_connection_close;
1551
+ }
1552
+ break;
1553
+
1554
+ case h_transfer_encoding_chunked:
1555
+ case h_connection_keep_alive:
1556
+ case h_connection_close:
1557
+ if (ch != ' ') parser->header_state = h_general;
1558
+ break;
1559
+
1560
+ default:
1561
+ parser->state = s_header_value;
1562
+ parser->header_state = h_general;
1563
+ break;
1564
+ }
1565
+ break;
1566
+ }
1567
+
1568
+ case s_header_almost_done:
1569
+ {
1570
+ STRICT_CHECK(ch != LF);
1571
+
1572
+ parser->state = s_header_value_lws;
1573
+
1574
+ switch (parser->header_state) {
1575
+ case h_connection_keep_alive:
1576
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1577
+ break;
1578
+ case h_connection_close:
1579
+ parser->flags |= F_CONNECTION_CLOSE;
1580
+ break;
1581
+ case h_transfer_encoding_chunked:
1582
+ parser->flags |= F_CHUNKED;
1583
+ break;
1584
+ default:
1585
+ break;
1586
+ }
1587
+
1588
+ break;
1589
+ }
1590
+
1591
+ case s_header_value_lws:
1592
+ {
1593
+ if (ch == ' ' || ch == '\t')
1594
+ parser->state = s_header_value_start;
1595
+ else
1596
+ {
1597
+ parser->state = s_header_field_start;
1598
+ goto reexecute_byte;
1599
+ }
1600
+ break;
1601
+ }
1602
+
1603
+ case s_headers_almost_done:
1604
+ {
1605
+ STRICT_CHECK(ch != LF);
1606
+
1607
+ if (parser->flags & F_TRAILING) {
1608
+ /* End of a chunked request */
1609
+ parser->state = NEW_MESSAGE();
1610
+ CALLBACK_NOTIFY(message_complete);
1611
+ break;
1612
+ }
1613
+
1614
+ parser->state = s_headers_done;
1615
+
1616
+ /* Set this here so that on_headers_complete() callbacks can see it */
1617
+ parser->upgrade =
1618
+ (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1619
+
1620
+ /* Here we call the headers_complete callback. This is somewhat
1621
+ * different than other callbacks because if the user returns 1, we
1622
+ * will interpret that as saying that this message has no body. This
1623
+ * is needed for the annoying case of recieving a response to a HEAD
1624
+ * request.
1625
+ *
1626
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1627
+ * we have to simulate it by handling a change in errno below.
1628
+ */
1629
+ if (settings->on_headers_complete) {
1630
+ switch (settings->on_headers_complete(parser)) {
1631
+ case 0:
1632
+ break;
1633
+
1634
+ case 1:
1635
+ parser->flags |= F_SKIPBODY;
1636
+ break;
1637
+
1638
+ default:
1639
+ SET_ERRNO(HPE_CB_headers_complete);
1640
+ return p - data; /* Error */
1641
+ }
1642
+ }
1643
+
1644
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1645
+ return p - data;
1646
+ }
1647
+
1648
+ goto reexecute_byte;
1649
+ }
1650
+
1651
+ case s_headers_done:
1652
+ {
1653
+ STRICT_CHECK(ch != LF);
1654
+
1655
+ parser->nread = 0;
1656
+
1657
+ /* Exit, the rest of the connect is in a different protocol. */
1658
+ if (parser->upgrade) {
1659
+ parser->state = NEW_MESSAGE();
1660
+ CALLBACK_NOTIFY(message_complete);
1661
+ return (p - data) + 1;
1662
+ }
1663
+
1664
+ if (parser->flags & F_SKIPBODY) {
1665
+ parser->state = NEW_MESSAGE();
1666
+ CALLBACK_NOTIFY(message_complete);
1667
+ } else if (parser->flags & F_CHUNKED) {
1668
+ /* chunked encoding - ignore Content-Length header */
1669
+ parser->state = s_chunk_size_start;
1670
+ } else {
1671
+ if (parser->content_length == 0) {
1672
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1673
+ parser->state = NEW_MESSAGE();
1674
+ CALLBACK_NOTIFY(message_complete);
1675
+ } else if (parser->content_length != ULLONG_MAX) {
1676
+ /* Content-Length header given and non-zero */
1677
+ parser->state = s_body_identity;
1678
+ } else {
1679
+ if (parser->type == HTTP_REQUEST ||
1680
+ !http_message_needs_eof(parser)) {
1681
+ /* Assume content-length 0 - read the next */
1682
+ parser->state = NEW_MESSAGE();
1683
+ CALLBACK_NOTIFY(message_complete);
1684
+ } else {
1685
+ /* Read body until EOF */
1686
+ parser->state = s_body_identity_eof;
1687
+ }
1688
+ }
1689
+ }
1690
+
1691
+ break;
1692
+ }
1693
+
1694
+ case s_body_identity:
1695
+ {
1696
+ uint64_t to_read = MIN(parser->content_length,
1697
+ (uint64_t) ((data + len) - p));
1698
+
1699
+ assert(parser->content_length != 0
1700
+ && parser->content_length != ULLONG_MAX);
1701
+
1702
+ /* The difference between advancing content_length and p is because
1703
+ * the latter will automaticaly advance on the next loop iteration.
1704
+ * Further, if content_length ends up at 0, we want to see the last
1705
+ * byte again for our message complete callback.
1706
+ */
1707
+ MARK(body);
1708
+ parser->content_length -= to_read;
1709
+ p += to_read - 1;
1710
+
1711
+ if (parser->content_length == 0) {
1712
+ parser->state = s_message_done;
1713
+
1714
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1715
+ *
1716
+ * The alternative to doing this is to wait for the next byte to
1717
+ * trigger the data callback, just as in every other case. The
1718
+ * problem with this is that this makes it difficult for the test
1719
+ * harness to distinguish between complete-on-EOF and
1720
+ * complete-on-length. It's not clear that this distinction is
1721
+ * important for applications, but let's keep it for now.
1722
+ */
1723
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1724
+ goto reexecute_byte;
1725
+ }
1726
+
1727
+ break;
1728
+ }
1729
+
1730
+ /* read until EOF */
1731
+ case s_body_identity_eof:
1732
+ MARK(body);
1733
+ p = data + len - 1;
1734
+
1735
+ break;
1736
+
1737
+ case s_message_done:
1738
+ parser->state = NEW_MESSAGE();
1739
+ CALLBACK_NOTIFY(message_complete);
1740
+ break;
1741
+
1742
+ case s_chunk_size_start:
1743
+ {
1744
+ assert(parser->nread == 1);
1745
+ assert(parser->flags & F_CHUNKED);
1746
+
1747
+ unhex_val = unhex[(unsigned char)ch];
1748
+ if (unhex_val == -1) {
1749
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1750
+ goto error;
1751
+ }
1752
+
1753
+ parser->content_length = unhex_val;
1754
+ parser->state = s_chunk_size;
1755
+ break;
1756
+ }
1757
+
1758
+ case s_chunk_size:
1759
+ {
1760
+ uint64_t t;
1761
+
1762
+ assert(parser->flags & F_CHUNKED);
1763
+
1764
+ if (ch == CR) {
1765
+ parser->state = s_chunk_size_almost_done;
1766
+ break;
1767
+ }
1768
+
1769
+ unhex_val = unhex[(unsigned char)ch];
1770
+
1771
+ if (unhex_val == -1) {
1772
+ if (ch == ';' || ch == ' ') {
1773
+ parser->state = s_chunk_parameters;
1774
+ break;
1775
+ }
1776
+
1777
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1778
+ goto error;
1779
+ }
1780
+
1781
+ t = parser->content_length;
1782
+ t *= 16;
1783
+ t += unhex_val;
1784
+
1785
+ /* Overflow? Test against a conservative limit for simplicity. */
1786
+ if ((ULLONG_MAX - 16) / 16 < parser->content_length) {
1787
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1788
+ goto error;
1789
+ }
1790
+
1791
+ parser->content_length = t;
1792
+ break;
1793
+ }
1794
+
1795
+ case s_chunk_parameters:
1796
+ {
1797
+ assert(parser->flags & F_CHUNKED);
1798
+ /* just ignore this shit. TODO check for overflow */
1799
+ if (ch == CR) {
1800
+ parser->state = s_chunk_size_almost_done;
1801
+ break;
1802
+ }
1803
+ break;
1804
+ }
1805
+
1806
+ case s_chunk_size_almost_done:
1807
+ {
1808
+ assert(parser->flags & F_CHUNKED);
1809
+ STRICT_CHECK(ch != LF);
1810
+
1811
+ parser->nread = 0;
1812
+
1813
+ if (parser->content_length == 0) {
1814
+ parser->flags |= F_TRAILING;
1815
+ parser->state = s_header_field_start;
1816
+ } else {
1817
+ parser->state = s_chunk_data;
1818
+ }
1819
+ break;
1820
+ }
1821
+
1822
+ case s_chunk_data:
1823
+ {
1824
+ uint64_t to_read = MIN(parser->content_length,
1825
+ (uint64_t) ((data + len) - p));
1826
+
1827
+ assert(parser->flags & F_CHUNKED);
1828
+ assert(parser->content_length != 0
1829
+ && parser->content_length != ULLONG_MAX);
1830
+
1831
+ /* See the explanation in s_body_identity for why the content
1832
+ * length and data pointers are managed this way.
1833
+ */
1834
+ MARK(body);
1835
+ parser->content_length -= to_read;
1836
+ p += to_read - 1;
1837
+
1838
+ if (parser->content_length == 0) {
1839
+ parser->state = s_chunk_data_almost_done;
1840
+ }
1841
+
1842
+ break;
1843
+ }
1844
+
1845
+ case s_chunk_data_almost_done:
1846
+ assert(parser->flags & F_CHUNKED);
1847
+ assert(parser->content_length == 0);
1848
+ STRICT_CHECK(ch != CR);
1849
+ parser->state = s_chunk_data_done;
1850
+ CALLBACK_DATA(body);
1851
+ break;
1852
+
1853
+ case s_chunk_data_done:
1854
+ assert(parser->flags & F_CHUNKED);
1855
+ STRICT_CHECK(ch != LF);
1856
+ parser->nread = 0;
1857
+ parser->state = s_chunk_size_start;
1858
+ break;
1859
+
1860
+ default:
1861
+ assert(0 && "unhandled state");
1862
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1863
+ goto error;
1864
+ }
1865
+ }
1866
+
1867
+ /* Run callbacks for any marks that we have leftover after we ran our of
1868
+ * bytes. There should be at most one of these set, so it's OK to invoke
1869
+ * them in series (unset marks will not result in callbacks).
1870
+ *
1871
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
1872
+ * overflowed 'data' and this allows us to correct for the off-by-one that
1873
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1874
+ * value that's in-bounds).
1875
+ */
1876
+
1877
+ assert(((header_field_mark ? 1 : 0) +
1878
+ (header_value_mark ? 1 : 0) +
1879
+ (url_mark ? 1 : 0) +
1880
+ (body_mark ? 1 : 0) +
1881
+ (status_mark ? 1 : 0)) <= 1);
1882
+
1883
+ CALLBACK_DATA_NOADVANCE(header_field);
1884
+ CALLBACK_DATA_NOADVANCE(header_value);
1885
+ CALLBACK_DATA_NOADVANCE(url);
1886
+ CALLBACK_DATA_NOADVANCE(body);
1887
+ CALLBACK_DATA_NOADVANCE(status);
1888
+
1889
+ return len;
1890
+
1891
+ error:
1892
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1893
+ SET_ERRNO(HPE_UNKNOWN);
1894
+ }
1895
+
1896
+ return (p - data);
1897
+ }
1898
+
1899
+
1900
+ /* Does the parser need to see an EOF to find the end of the message? */
1901
+ int
1902
+ http_message_needs_eof (const http_parser *parser)
1903
+ {
1904
+ if (parser->type == HTTP_REQUEST) {
1905
+ return 0;
1906
+ }
1907
+
1908
+ /* See RFC 2616 section 4.4 */
1909
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1910
+ parser->status_code == 204 || /* No Content */
1911
+ parser->status_code == 304 || /* Not Modified */
1912
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1913
+ return 0;
1914
+ }
1915
+
1916
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1917
+ return 0;
1918
+ }
1919
+
1920
+ return 1;
1921
+ }
1922
+
1923
+
1924
+ int
1925
+ http_should_keep_alive (const http_parser *parser)
1926
+ {
1927
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1928
+ /* HTTP/1.1 */
1929
+ if (parser->flags & F_CONNECTION_CLOSE) {
1930
+ return 0;
1931
+ }
1932
+ } else {
1933
+ /* HTTP/1.0 or earlier */
1934
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1935
+ return 0;
1936
+ }
1937
+ }
1938
+
1939
+ return !http_message_needs_eof(parser);
1940
+ }
1941
+
1942
+
1943
+ const char *
1944
+ http_method_str (enum http_method m)
1945
+ {
1946
+ return ELEM_AT(method_strings, m, "<unknown>");
1947
+ }
1948
+
1949
+
1950
+ void
1951
+ http_parser_init (http_parser *parser, enum http_parser_type t)
1952
+ {
1953
+ void *data = parser->data; /* preserve application data */
1954
+ memset(parser, 0, sizeof(*parser));
1955
+ parser->data = data;
1956
+ parser->type = t;
1957
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1958
+ parser->http_errno = HPE_OK;
1959
+ }
1960
+
1961
+ const char *
1962
+ http_errno_name(enum http_errno err) {
1963
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1964
+ return http_strerror_tab[err].name;
1965
+ }
1966
+
1967
+ const char *
1968
+ http_errno_description(enum http_errno err) {
1969
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1970
+ return http_strerror_tab[err].description;
1971
+ }
1972
+
1973
+ static enum http_host_state
1974
+ http_parse_host_char(enum http_host_state s, const char ch) {
1975
+ switch(s) {
1976
+ case s_http_userinfo:
1977
+ case s_http_userinfo_start:
1978
+ if (ch == '@') {
1979
+ return s_http_host_start;
1980
+ }
1981
+
1982
+ if (IS_USERINFO_CHAR(ch)) {
1983
+ return s_http_userinfo;
1984
+ }
1985
+ break;
1986
+
1987
+ case s_http_host_start:
1988
+ if (ch == '[') {
1989
+ return s_http_host_v6_start;
1990
+ }
1991
+
1992
+ if (IS_HOST_CHAR(ch)) {
1993
+ return s_http_host;
1994
+ }
1995
+
1996
+ break;
1997
+
1998
+ case s_http_host:
1999
+ if (IS_HOST_CHAR(ch)) {
2000
+ return s_http_host;
2001
+ }
2002
+
2003
+ /* FALLTHROUGH */
2004
+ case s_http_host_v6_end:
2005
+ if (ch == ':') {
2006
+ return s_http_host_port_start;
2007
+ }
2008
+
2009
+ break;
2010
+
2011
+ case s_http_host_v6:
2012
+ if (ch == ']') {
2013
+ return s_http_host_v6_end;
2014
+ }
2015
+
2016
+ /* FALLTHROUGH */
2017
+ case s_http_host_v6_start:
2018
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
2019
+ return s_http_host_v6;
2020
+ }
2021
+
2022
+ break;
2023
+
2024
+ case s_http_host_port:
2025
+ case s_http_host_port_start:
2026
+ if (IS_NUM(ch)) {
2027
+ return s_http_host_port;
2028
+ }
2029
+
2030
+ break;
2031
+
2032
+ default:
2033
+ break;
2034
+ }
2035
+ return s_http_host_dead;
2036
+ }
2037
+
2038
+ static int
2039
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2040
+ enum http_host_state s;
2041
+
2042
+ const char *p;
2043
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2044
+
2045
+ u->field_data[UF_HOST].len = 0;
2046
+
2047
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2048
+
2049
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2050
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2051
+
2052
+ if (new_s == s_http_host_dead) {
2053
+ return 1;
2054
+ }
2055
+
2056
+ switch(new_s) {
2057
+ case s_http_host:
2058
+ if (s != s_http_host) {
2059
+ u->field_data[UF_HOST].off = p - buf;
2060
+ }
2061
+ u->field_data[UF_HOST].len++;
2062
+ break;
2063
+
2064
+ case s_http_host_v6:
2065
+ if (s != s_http_host_v6) {
2066
+ u->field_data[UF_HOST].off = p - buf;
2067
+ }
2068
+ u->field_data[UF_HOST].len++;
2069
+ break;
2070
+
2071
+ case s_http_host_port:
2072
+ if (s != s_http_host_port) {
2073
+ u->field_data[UF_PORT].off = p - buf;
2074
+ u->field_data[UF_PORT].len = 0;
2075
+ u->field_set |= (1 << UF_PORT);
2076
+ }
2077
+ u->field_data[UF_PORT].len++;
2078
+ break;
2079
+
2080
+ case s_http_userinfo:
2081
+ if (s != s_http_userinfo) {
2082
+ u->field_data[UF_USERINFO].off = p - buf ;
2083
+ u->field_data[UF_USERINFO].len = 0;
2084
+ u->field_set |= (1 << UF_USERINFO);
2085
+ }
2086
+ u->field_data[UF_USERINFO].len++;
2087
+ break;
2088
+
2089
+ default:
2090
+ break;
2091
+ }
2092
+ s = new_s;
2093
+ }
2094
+
2095
+ /* Make sure we don't end somewhere unexpected */
2096
+ switch (s) {
2097
+ case s_http_host_start:
2098
+ case s_http_host_v6_start:
2099
+ case s_http_host_v6:
2100
+ case s_http_host_port_start:
2101
+ case s_http_userinfo:
2102
+ case s_http_userinfo_start:
2103
+ return 1;
2104
+ default:
2105
+ break;
2106
+ }
2107
+
2108
+ return 0;
2109
+ }
2110
+
2111
+ int
2112
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2113
+ struct http_parser_url *u)
2114
+ {
2115
+ enum state s;
2116
+ const char *p;
2117
+ enum http_parser_url_fields uf, old_uf;
2118
+ int found_at = 0;
2119
+
2120
+ u->port = u->field_set = 0;
2121
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2122
+ uf = old_uf = UF_MAX;
2123
+
2124
+ for (p = buf; p < buf + buflen; p++) {
2125
+ s = parse_url_char(s, *p);
2126
+
2127
+ /* Figure out the next field that we're operating on */
2128
+ switch (s) {
2129
+ case s_dead:
2130
+ return 1;
2131
+
2132
+ /* Skip delimeters */
2133
+ case s_req_schema_slash:
2134
+ case s_req_schema_slash_slash:
2135
+ case s_req_server_start:
2136
+ case s_req_query_string_start:
2137
+ case s_req_fragment_start:
2138
+ continue;
2139
+
2140
+ case s_req_schema:
2141
+ uf = UF_SCHEMA;
2142
+ break;
2143
+
2144
+ case s_req_server_with_at:
2145
+ found_at = 1;
2146
+
2147
+ /* FALLTROUGH */
2148
+ case s_req_server:
2149
+ uf = UF_HOST;
2150
+ break;
2151
+
2152
+ case s_req_path:
2153
+ uf = UF_PATH;
2154
+ break;
2155
+
2156
+ case s_req_query_string:
2157
+ uf = UF_QUERY;
2158
+ break;
2159
+
2160
+ case s_req_fragment:
2161
+ uf = UF_FRAGMENT;
2162
+ break;
2163
+
2164
+ default:
2165
+ assert(!"Unexpected state");
2166
+ return 1;
2167
+ }
2168
+
2169
+ /* Nothing's changed; soldier on */
2170
+ if (uf == old_uf) {
2171
+ u->field_data[uf].len++;
2172
+ continue;
2173
+ }
2174
+
2175
+ u->field_data[uf].off = p - buf;
2176
+ u->field_data[uf].len = 1;
2177
+
2178
+ u->field_set |= (1 << uf);
2179
+ old_uf = uf;
2180
+ }
2181
+
2182
+ /* host must be present if there is a schema */
2183
+ /* parsing http:///toto will fail */
2184
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2185
+ if (http_parse_host(buf, u, found_at) != 0) {
2186
+ return 1;
2187
+ }
2188
+ }
2189
+
2190
+ /* CONNECT requests can only contain "hostname:port" */
2191
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2192
+ return 1;
2193
+ }
2194
+
2195
+ if (u->field_set & (1 << UF_PORT)) {
2196
+ /* Don't bother with endp; we've already validated the string */
2197
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2198
+
2199
+ /* Ports have a max value of 2^16 */
2200
+ if (v > 0xffff) {
2201
+ return 1;
2202
+ }
2203
+
2204
+ u->port = (uint16_t) v;
2205
+ }
2206
+
2207
+ return 0;
2208
+ }
2209
+
2210
+ void
2211
+ http_parser_pause(http_parser *parser, int paused) {
2212
+ /* Users should only be pausing/unpausing a parser that is not in an error
2213
+ * state. In non-debug builds, there's not much that we can do about this
2214
+ * other than ignore it.
2215
+ */
2216
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2217
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2218
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2219
+ } else {
2220
+ assert(0 && "Attempting to pause parser in error state");
2221
+ }
2222
+ }
2223
+
2224
+ int
2225
+ http_body_is_final(const struct http_parser *parser) {
2226
+ return parser->state == s_message_done;
2227
+ }
2228
+
2229
+ unsigned long
2230
+ http_parser_version(void) {
2231
+ return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2232
+ HTTP_PARSER_VERSION_MINOR * 0x00100 |
2233
+ HTTP_PARSER_VERSION_PATCH * 0x00001;
2234
+ }