midori_http_parser 0.6.1.1-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.gitmodules +6 -0
  4. data/.travis.yml +33 -0
  5. data/Gemfile +2 -0
  6. data/LICENSE-MIT +20 -0
  7. data/README.md +90 -0
  8. data/Rakefile +6 -0
  9. data/bench/standalone.rb +23 -0
  10. data/bench/thin.rb +58 -0
  11. data/ext/ruby_http_parser/.gitignore +1 -0
  12. data/ext/ruby_http_parser/RubyHttpParserService.java +18 -0
  13. data/ext/ruby_http_parser/ext_help.h +18 -0
  14. data/ext/ruby_http_parser/extconf.rb +24 -0
  15. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +495 -0
  16. data/ext/ruby_http_parser/ruby_http_parser.c +516 -0
  17. data/ext/ruby_http_parser/vendor/.gitkeep +0 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  19. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +48 -0
  20. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +183 -0
  21. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +28 -0
  22. data/ext/ruby_http_parser/vendor/http-parser-java/build.xml +74 -0
  23. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +2175 -0
  24. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  25. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +304 -0
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPCallback.java +8 -0
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPDataCallback.java +34 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPErrorCallback.java +12 -0
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPException.java +9 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +113 -0
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParser.java +36 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +256 -0
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserType.java +13 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +111 -0
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPCallback.java +5 -0
  39. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPDataCallback.java +25 -0
  40. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPErrorCallback.java +7 -0
  41. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +2171 -0
  42. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +83 -0
  43. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +374 -0
  44. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  45. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +69 -0
  46. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +52 -0
  47. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +16 -0
  48. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +48 -0
  49. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +212 -0
  50. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +62 -0
  51. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +117 -0
  52. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +27 -0
  53. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  54. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +236 -0
  55. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +59 -0
  56. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +3425 -0
  57. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +845 -0
  58. data/ext/ruby_http_parser/vendor/http-parser-java/tests.utf8 +17 -0
  59. data/ext/ruby_http_parser/vendor/http-parser-java/tools/byte_constants.rb +6 -0
  60. data/ext/ruby_http_parser/vendor/http-parser-java/tools/const_char.rb +13 -0
  61. data/ext/ruby_http_parser/vendor/http-parser-java/tools/lowcase.rb +15 -0
  62. data/ext/ruby_http_parser/vendor/http-parser-java/tools/parse_tests.rb +33 -0
  63. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +68 -0
  64. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +23 -0
  65. data/ext/ruby_http_parser/vendor/http-parser/README.md +246 -0
  66. data/ext/ruby_http_parser/vendor/http-parser/bench.c +111 -0
  67. data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +160 -0
  68. data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
  69. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +2470 -0
  70. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +111 -0
  71. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +432 -0
  72. data/ext/ruby_http_parser/vendor/http-parser/test.c +4226 -0
  73. data/ext/ruby_http_parser/vendor/http-parser/test_fast +0 -0
  74. data/ext/ruby_http_parser/vendor/http-parser/test_g +0 -0
  75. data/lib/http/parser.rb +1 -0
  76. data/lib/http_parser.rb +21 -0
  77. data/midori_http_parser.gemspec +24 -0
  78. data/spec/parser_spec.rb +376 -0
  79. data/spec/spec_helper.rb +1 -0
  80. data/spec/support/requests.json +631 -0
  81. data/spec/support/responses.json +375 -0
  82. data/tasks/compile.rake +42 -0
  83. data/tasks/fixtures.rake +71 -0
  84. data/tasks/spec.rake +5 -0
  85. data/tasks/submodules.rake +7 -0
  86. metadata +206 -0
@@ -0,0 +1,160 @@
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+
25
+ /* Dump what the parser finds to stdout as it happen */
26
+
27
+ #include "http_parser.h"
28
+ #include <stdio.h>
29
+ #include <stdlib.h>
30
+ #include <string.h>
31
+
32
+ int on_message_begin(http_parser* _) {
33
+ (void)_;
34
+ printf("\n***MESSAGE BEGIN***\n\n");
35
+ return 0;
36
+ }
37
+
38
+ int on_headers_complete(http_parser* _) {
39
+ (void)_;
40
+ printf("\n***HEADERS COMPLETE***\n\n");
41
+ return 0;
42
+ }
43
+
44
+ int on_message_complete(http_parser* _) {
45
+ (void)_;
46
+ printf("\n***MESSAGE COMPLETE***\n\n");
47
+ return 0;
48
+ }
49
+
50
+ int on_url(http_parser* _, const char* at, size_t length) {
51
+ (void)_;
52
+ printf("Url: %.*s\n", (int)length, at);
53
+ return 0;
54
+ }
55
+
56
+ int on_header_field(http_parser* _, const char* at, size_t length) {
57
+ (void)_;
58
+ printf("Header field: %.*s\n", (int)length, at);
59
+ return 0;
60
+ }
61
+
62
+ int on_header_value(http_parser* _, const char* at, size_t length) {
63
+ (void)_;
64
+ printf("Header value: %.*s\n", (int)length, at);
65
+ return 0;
66
+ }
67
+
68
+ int on_body(http_parser* _, const char* at, size_t length) {
69
+ (void)_;
70
+ printf("Body: %.*s\n", (int)length, at);
71
+ return 0;
72
+ }
73
+
74
+ void usage(const char* name) {
75
+ fprintf(stderr,
76
+ "Usage: %s $type $filename\n"
77
+ " type: -x, where x is one of {r,b,q}\n"
78
+ " parses file as a Response, reQuest, or Both\n",
79
+ name);
80
+ exit(EXIT_FAILURE);
81
+ }
82
+
83
+ int main(int argc, char* argv[]) {
84
+ enum http_parser_type file_type;
85
+
86
+ if (argc != 3) {
87
+ usage(argv[0]);
88
+ }
89
+
90
+ char* type = argv[1];
91
+ if (type[0] != '-') {
92
+ usage(argv[0]);
93
+ }
94
+
95
+ switch (type[1]) {
96
+ /* in the case of "-", type[1] will be NUL */
97
+ case 'r':
98
+ file_type = HTTP_RESPONSE;
99
+ break;
100
+ case 'q':
101
+ file_type = HTTP_REQUEST;
102
+ break;
103
+ case 'b':
104
+ file_type = HTTP_BOTH;
105
+ break;
106
+ default:
107
+ usage(argv[0]);
108
+ }
109
+
110
+ char* filename = argv[2];
111
+ FILE* file = fopen(filename, "r");
112
+ if (file == NULL) {
113
+ perror("fopen");
114
+ goto fail;
115
+ }
116
+
117
+ fseek(file, 0, SEEK_END);
118
+ long file_length = ftell(file);
119
+ if (file_length == -1) {
120
+ perror("ftell");
121
+ goto fail;
122
+ }
123
+ fseek(file, 0, SEEK_SET);
124
+
125
+ char* data = malloc(file_length);
126
+ if (fread(data, 1, file_length, file) != (size_t)file_length) {
127
+ fprintf(stderr, "couldn't read entire file\n");
128
+ free(data);
129
+ goto fail;
130
+ }
131
+
132
+ http_parser_settings settings;
133
+ memset(&settings, 0, sizeof(settings));
134
+ settings.on_message_begin = on_message_begin;
135
+ settings.on_url = on_url;
136
+ settings.on_header_field = on_header_field;
137
+ settings.on_header_value = on_header_value;
138
+ settings.on_headers_complete = on_headers_complete;
139
+ settings.on_body = on_body;
140
+ settings.on_message_complete = on_message_complete;
141
+
142
+ http_parser parser;
143
+ http_parser_init(&parser, file_type);
144
+ size_t nparsed = http_parser_execute(&parser, &settings, data, file_length);
145
+ free(data);
146
+
147
+ if (nparsed != (size_t)file_length) {
148
+ fprintf(stderr,
149
+ "Error: %s (%s)\n",
150
+ http_errno_description(HTTP_PARSER_ERRNO(&parser)),
151
+ http_errno_name(HTTP_PARSER_ERRNO(&parser)));
152
+ goto fail;
153
+ }
154
+
155
+ return EXIT_SUCCESS;
156
+
157
+ fail:
158
+ fclose(file);
159
+ return EXIT_FAILURE;
160
+ }
@@ -0,0 +1,47 @@
1
+ #include "http_parser.h"
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ void
6
+ dump_url (const char *url, const struct http_parser_url *u)
7
+ {
8
+ unsigned int i;
9
+
10
+ printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port);
11
+ for (i = 0; i < UF_MAX; i++) {
12
+ if ((u->field_set & (1 << i)) == 0) {
13
+ printf("\tfield_data[%u]: unset\n", i);
14
+ continue;
15
+ }
16
+
17
+ printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n",
18
+ i,
19
+ u->field_data[i].off,
20
+ u->field_data[i].len,
21
+ u->field_data[i].len,
22
+ url + u->field_data[i].off);
23
+ }
24
+ }
25
+
26
+ int main(int argc, char ** argv) {
27
+ struct http_parser_url u;
28
+ int len, connect, result;
29
+
30
+ if (argc != 3) {
31
+ printf("Syntax : %s connect|get url\n", argv[0]);
32
+ return 1;
33
+ }
34
+ len = strlen(argv[2]);
35
+ connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
36
+ printf("Parsing %s, connect %d\n", argv[2], connect);
37
+
38
+ http_parser_url_init(&u);
39
+ result = http_parser_parse_url(argv[2], len, connect, &u);
40
+ if (result != 0) {
41
+ printf("Parse error : %d\n", result);
42
+ return result;
43
+ }
44
+ printf("Parse ok, result : \n");
45
+ dump_url(argv[2], &u);
46
+ return 0;
47
+ }
@@ -0,0 +1,2470 @@
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include "http_parser.h"
25
+ #include <assert.h>
26
+ #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
31
+
32
+ #ifndef ULLONG_MAX
33
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
+ #endif
35
+
36
+ #ifndef MIN
37
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
+ #endif
39
+
40
+ #ifndef ARRAY_SIZE
41
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
+ #endif
43
+
44
+ #ifndef BIT_AT
45
+ # define BIT_AT(a, i) \
46
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
+ (1 << ((unsigned int) (i) & 7))))
48
+ #endif
49
+
50
+ #ifndef ELEM_AT
51
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
+ #endif
53
+
54
+ #define SET_ERRNO(e) \
55
+ do { \
56
+ parser->http_errno = (e); \
57
+ } while(0)
58
+
59
+ #define CURRENT_STATE() p_state
60
+ #define UPDATE_STATE(V) p_state = (enum state) (V);
61
+ #define RETURN(V) \
62
+ do { \
63
+ parser->state = CURRENT_STATE(); \
64
+ return (V); \
65
+ } while (0);
66
+ #define REEXECUTE() \
67
+ goto reexecute; \
68
+
69
+
70
+ #ifdef __GNUC__
71
+ # define LIKELY(X) __builtin_expect(!!(X), 1)
72
+ # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73
+ #else
74
+ # define LIKELY(X) (X)
75
+ # define UNLIKELY(X) (X)
76
+ #endif
77
+
78
+
79
+ /* Run the notify callback FOR, returning ER if it fails */
80
+ #define CALLBACK_NOTIFY_(FOR, ER) \
81
+ do { \
82
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
83
+ \
84
+ if (LIKELY(settings->on_##FOR)) { \
85
+ parser->state = CURRENT_STATE(); \
86
+ if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87
+ SET_ERRNO(HPE_CB_##FOR); \
88
+ } \
89
+ UPDATE_STATE(parser->state); \
90
+ \
91
+ /* We either errored above or got paused; get out */ \
92
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
93
+ return (ER); \
94
+ } \
95
+ } \
96
+ } while (0)
97
+
98
+ /* Run the notify callback FOR and consume the current byte */
99
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
100
+
101
+ /* Run the notify callback FOR and don't consume the current byte */
102
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
103
+
104
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
105
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
106
+ do { \
107
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
108
+ \
109
+ if (FOR##_mark) { \
110
+ if (LIKELY(settings->on_##FOR)) { \
111
+ parser->state = CURRENT_STATE(); \
112
+ if (UNLIKELY(0 != \
113
+ settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114
+ SET_ERRNO(HPE_CB_##FOR); \
115
+ } \
116
+ UPDATE_STATE(parser->state); \
117
+ \
118
+ /* We either errored above or got paused; get out */ \
119
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
120
+ return (ER); \
121
+ } \
122
+ } \
123
+ FOR##_mark = NULL; \
124
+ } \
125
+ } while (0)
126
+
127
+ /* Run the data callback FOR and consume the current byte */
128
+ #define CALLBACK_DATA(FOR) \
129
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130
+
131
+ /* Run the data callback FOR and don't consume the current byte */
132
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
133
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134
+
135
+ /* Set the mark FOR; non-destructive if mark is already set */
136
+ #define MARK(FOR) \
137
+ do { \
138
+ if (!FOR##_mark) { \
139
+ FOR##_mark = p; \
140
+ } \
141
+ } while (0)
142
+
143
+ /* Don't allow the total size of the HTTP headers (including the status
144
+ * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
145
+ * embedders against denial-of-service attacks where the attacker feeds
146
+ * us a never-ending header that the embedder keeps buffering.
147
+ *
148
+ * This check is arguably the responsibility of embedders but we're doing
149
+ * it on the embedder's behalf because most won't bother and this way we
150
+ * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
151
+ * than any reasonable request or response so this should never affect
152
+ * day-to-day operation.
153
+ */
154
+ #define COUNT_HEADER_SIZE(V) \
155
+ do { \
156
+ parser->nread += (V); \
157
+ if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
158
+ SET_ERRNO(HPE_HEADER_OVERFLOW); \
159
+ goto error; \
160
+ } \
161
+ } while (0)
162
+
163
+
164
+ #define PROXY_CONNECTION "proxy-connection"
165
+ #define CONNECTION "connection"
166
+ #define CONTENT_LENGTH "content-length"
167
+ #define TRANSFER_ENCODING "transfer-encoding"
168
+ #define UPGRADE "upgrade"
169
+ #define CHUNKED "chunked"
170
+ #define KEEP_ALIVE "keep-alive"
171
+ #define CLOSE "close"
172
+
173
+
174
+ static const char *method_strings[] =
175
+ {
176
+ #define XX(num, name, string) #string,
177
+ HTTP_METHOD_MAP(XX)
178
+ #undef XX
179
+ };
180
+
181
+
182
+ /* Tokens as defined by rfc 2616. Also lowercases them.
183
+ * token = 1*<any CHAR except CTLs or separators>
184
+ * separators = "(" | ")" | "<" | ">" | "@"
185
+ * | "," | ";" | ":" | "\" | <">
186
+ * | "/" | "[" | "]" | "?" | "="
187
+ * | "{" | "}" | SP | HT
188
+ */
189
+ static const char tokens[256] = {
190
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191
+ 0, 0, 0, 0, 0, 0, 0, 0,
192
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193
+ 0, 0, 0, 0, 0, 0, 0, 0,
194
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195
+ 0, 0, 0, 0, 0, 0, 0, 0,
196
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197
+ 0, 0, 0, 0, 0, 0, 0, 0,
198
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199
+ 0, '!', 0, '#', '$', '%', '&', '\'',
200
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201
+ 0, 0, '*', '+', 0, '-', '.', 0,
202
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203
+ '0', '1', '2', '3', '4', '5', '6', '7',
204
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205
+ '8', '9', 0, 0, 0, 0, 0, 0,
206
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
214
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
222
+
223
+
224
+ static const int8_t unhex[256] =
225
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233
+ };
234
+
235
+
236
+ #if HTTP_PARSER_STRICT
237
+ # define T(v) 0
238
+ #else
239
+ # define T(v) v
240
+ #endif
241
+
242
+
243
+ static const uint8_t normal_url_char[32] = {
244
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
276
+
277
+ #undef T
278
+
279
+ enum state
280
+ { s_dead = 1 /* important that this is > 0 */
281
+
282
+ , s_start_req_or_res
283
+ , s_res_or_resp_H
284
+ , s_start_res
285
+ , s_res_H
286
+ , s_res_HT
287
+ , s_res_HTT
288
+ , s_res_HTTP
289
+ , s_res_first_http_major
290
+ , s_res_http_major
291
+ , s_res_first_http_minor
292
+ , s_res_http_minor
293
+ , s_res_first_status_code
294
+ , s_res_status_code
295
+ , s_res_status_start
296
+ , s_res_status
297
+ , s_res_line_almost_done
298
+
299
+ , s_start_req
300
+
301
+ , s_req_method
302
+ , s_req_spaces_before_url
303
+ , s_req_schema
304
+ , s_req_schema_slash
305
+ , s_req_schema_slash_slash
306
+ , s_req_server_start
307
+ , s_req_server
308
+ , s_req_server_with_at
309
+ , s_req_path
310
+ , s_req_query_string_start
311
+ , s_req_query_string
312
+ , s_req_fragment_start
313
+ , s_req_fragment
314
+ , s_req_http_start
315
+ , s_req_http_H
316
+ , s_req_http_HT
317
+ , s_req_http_HTT
318
+ , s_req_http_HTTP
319
+ , s_req_first_http_major
320
+ , s_req_http_major
321
+ , s_req_first_http_minor
322
+ , s_req_http_minor
323
+ , s_req_line_almost_done
324
+
325
+ , s_header_field_start
326
+ , s_header_field
327
+ , s_header_value_discard_ws
328
+ , s_header_value_discard_ws_almost_done
329
+ , s_header_value_discard_lws
330
+ , s_header_value_start
331
+ , s_header_value
332
+ , s_header_value_lws
333
+
334
+ , s_header_almost_done
335
+
336
+ , s_chunk_size_start
337
+ , s_chunk_size
338
+ , s_chunk_parameters
339
+ , s_chunk_size_almost_done
340
+
341
+ , s_headers_almost_done
342
+ , s_headers_done
343
+
344
+ /* Important: 's_headers_done' must be the last 'header' state. All
345
+ * states beyond this must be 'body' states. It is used for overflow
346
+ * checking. See the PARSING_HEADER() macro.
347
+ */
348
+
349
+ , s_chunk_data
350
+ , s_chunk_data_almost_done
351
+ , s_chunk_data_done
352
+
353
+ , s_body_identity
354
+ , s_body_identity_eof
355
+
356
+ , s_message_done
357
+ };
358
+
359
+
360
+ #define PARSING_HEADER(state) (state <= s_headers_done)
361
+
362
+
363
+ enum header_states
364
+ { h_general = 0
365
+ , h_C
366
+ , h_CO
367
+ , h_CON
368
+
369
+ , h_matching_connection
370
+ , h_matching_proxy_connection
371
+ , h_matching_content_length
372
+ , h_matching_transfer_encoding
373
+ , h_matching_upgrade
374
+
375
+ , h_connection
376
+ , h_content_length
377
+ , h_transfer_encoding
378
+ , h_upgrade
379
+
380
+ , h_matching_transfer_encoding_chunked
381
+ , h_matching_connection_token_start
382
+ , h_matching_connection_keep_alive
383
+ , h_matching_connection_close
384
+ , h_matching_connection_upgrade
385
+ , h_matching_connection_token
386
+
387
+ , h_transfer_encoding_chunked
388
+ , h_connection_keep_alive
389
+ , h_connection_close
390
+ , h_connection_upgrade
391
+ };
392
+
393
+ enum http_host_state
394
+ {
395
+ s_http_host_dead = 1
396
+ , s_http_userinfo_start
397
+ , s_http_userinfo
398
+ , s_http_host_start
399
+ , s_http_host_v6_start
400
+ , s_http_host
401
+ , s_http_host_v6
402
+ , s_http_host_v6_end
403
+ , s_http_host_v6_zone_start
404
+ , s_http_host_v6_zone
405
+ , s_http_host_port_start
406
+ , s_http_host_port
407
+ };
408
+
409
+ /* Macros for character classes; depends on strict-mode */
410
+ #define CR '\r'
411
+ #define LF '\n'
412
+ #define LOWER(c) (unsigned char)(c | 0x20)
413
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
414
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
415
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
416
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
417
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
418
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
419
+ (c) == ')')
420
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
421
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
422
+ (c) == '$' || (c) == ',')
423
+
424
+ #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
425
+
426
+ #if HTTP_PARSER_STRICT
427
+ #define TOKEN(c) (tokens[(unsigned char)c])
428
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
429
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
430
+ #else
431
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
432
+ #define IS_URL_CHAR(c) \
433
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
434
+ #define IS_HOST_CHAR(c) \
435
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
436
+ #endif
437
+
438
+ /**
439
+ * Verify that a char is a valid visible (printable) US-ASCII
440
+ * character or %x80-FF
441
+ **/
442
+ #define IS_HEADER_CHAR(ch) \
443
+ (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
444
+
445
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
446
+
447
+
448
+ #if HTTP_PARSER_STRICT
449
+ # define STRICT_CHECK(cond) \
450
+ do { \
451
+ if (cond) { \
452
+ SET_ERRNO(HPE_STRICT); \
453
+ goto error; \
454
+ } \
455
+ } while (0)
456
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
457
+ #else
458
+ # define STRICT_CHECK(cond)
459
+ # define NEW_MESSAGE() start_state
460
+ #endif
461
+
462
+
463
+ /* Map errno values to strings for human-readable output */
464
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
465
+ static struct {
466
+ const char *name;
467
+ const char *description;
468
+ } http_strerror_tab[] = {
469
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
470
+ };
471
+ #undef HTTP_STRERROR_GEN
472
+
473
+ int http_message_needs_eof(const http_parser *parser);
474
+
475
+ /* Our URL parser.
476
+ *
477
+ * This is designed to be shared by http_parser_execute() for URL validation,
478
+ * hence it has a state transition + byte-for-byte interface. In addition, it
479
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
480
+ * work of turning state transitions URL components for its API.
481
+ *
482
+ * This function should only be invoked with non-space characters. It is
483
+ * assumed that the caller cares about (and can detect) the transition between
484
+ * URL and non-URL states by looking for these.
485
+ */
486
+ static enum state
487
+ parse_url_char(enum state s, const char ch)
488
+ {
489
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
490
+ return s_dead;
491
+ }
492
+
493
+ #if HTTP_PARSER_STRICT
494
+ if (ch == '\t' || ch == '\f') {
495
+ return s_dead;
496
+ }
497
+ #endif
498
+
499
+ switch (s) {
500
+ case s_req_spaces_before_url:
501
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
502
+ * All methods except CONNECT are followed by '/' or '*'.
503
+ */
504
+
505
+ if (ch == '/' || ch == '*') {
506
+ return s_req_path;
507
+ }
508
+
509
+ if (IS_ALPHA(ch)) {
510
+ return s_req_schema;
511
+ }
512
+
513
+ break;
514
+
515
+ case s_req_schema:
516
+ if (IS_ALPHA(ch)) {
517
+ return s;
518
+ }
519
+
520
+ if (ch == ':') {
521
+ return s_req_schema_slash;
522
+ }
523
+
524
+ break;
525
+
526
+ case s_req_schema_slash:
527
+ if (ch == '/') {
528
+ return s_req_schema_slash_slash;
529
+ }
530
+
531
+ break;
532
+
533
+ case s_req_schema_slash_slash:
534
+ if (ch == '/') {
535
+ return s_req_server_start;
536
+ }
537
+
538
+ break;
539
+
540
+ case s_req_server_with_at:
541
+ if (ch == '@') {
542
+ return s_dead;
543
+ }
544
+
545
+ /* FALLTHROUGH */
546
+ case s_req_server_start:
547
+ case s_req_server:
548
+ if (ch == '/') {
549
+ return s_req_path;
550
+ }
551
+
552
+ if (ch == '?') {
553
+ return s_req_query_string_start;
554
+ }
555
+
556
+ if (ch == '@') {
557
+ return s_req_server_with_at;
558
+ }
559
+
560
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
561
+ return s_req_server;
562
+ }
563
+
564
+ break;
565
+
566
+ case s_req_path:
567
+ if (IS_URL_CHAR(ch)) {
568
+ return s;
569
+ }
570
+
571
+ switch (ch) {
572
+ case '?':
573
+ return s_req_query_string_start;
574
+
575
+ case '#':
576
+ return s_req_fragment_start;
577
+ }
578
+
579
+ break;
580
+
581
+ case s_req_query_string_start:
582
+ case s_req_query_string:
583
+ if (IS_URL_CHAR(ch)) {
584
+ return s_req_query_string;
585
+ }
586
+
587
+ switch (ch) {
588
+ case '?':
589
+ /* allow extra '?' in query string */
590
+ return s_req_query_string;
591
+
592
+ case '#':
593
+ return s_req_fragment_start;
594
+ }
595
+
596
+ break;
597
+
598
+ case s_req_fragment_start:
599
+ if (IS_URL_CHAR(ch)) {
600
+ return s_req_fragment;
601
+ }
602
+
603
+ switch (ch) {
604
+ case '?':
605
+ return s_req_fragment;
606
+
607
+ case '#':
608
+ return s;
609
+ }
610
+
611
+ break;
612
+
613
+ case s_req_fragment:
614
+ if (IS_URL_CHAR(ch)) {
615
+ return s;
616
+ }
617
+
618
+ switch (ch) {
619
+ case '?':
620
+ case '#':
621
+ return s;
622
+ }
623
+
624
+ break;
625
+
626
+ default:
627
+ break;
628
+ }
629
+
630
+ /* We should never fall out of the switch above unless there's an error */
631
+ return s_dead;
632
+ }
633
+
634
+ size_t http_parser_execute (http_parser *parser,
635
+ const http_parser_settings *settings,
636
+ const char *data,
637
+ size_t len)
638
+ {
639
+ char c, ch;
640
+ int8_t unhex_val;
641
+ const char *p = data;
642
+ const char *header_field_mark = 0;
643
+ const char *header_value_mark = 0;
644
+ const char *url_mark = 0;
645
+ const char *body_mark = 0;
646
+ const char *status_mark = 0;
647
+ enum state p_state = (enum state) parser->state;
648
+ const unsigned int lenient = parser->lenient_http_headers;
649
+
650
+ /* We're in an error state. Don't bother doing anything. */
651
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
652
+ return 0;
653
+ }
654
+
655
+ if (len == 0) {
656
+ switch (CURRENT_STATE()) {
657
+ case s_body_identity_eof:
658
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
659
+ * we got paused.
660
+ */
661
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
662
+ return 0;
663
+
664
+ case s_dead:
665
+ case s_start_req_or_res:
666
+ case s_start_res:
667
+ case s_start_req:
668
+ return 0;
669
+
670
+ default:
671
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
672
+ return 1;
673
+ }
674
+ }
675
+
676
+
677
+ if (CURRENT_STATE() == s_header_field)
678
+ header_field_mark = data;
679
+ if (CURRENT_STATE() == s_header_value)
680
+ header_value_mark = data;
681
+ switch (CURRENT_STATE()) {
682
+ case s_req_path:
683
+ case s_req_schema:
684
+ case s_req_schema_slash:
685
+ case s_req_schema_slash_slash:
686
+ case s_req_server_start:
687
+ case s_req_server:
688
+ case s_req_server_with_at:
689
+ case s_req_query_string_start:
690
+ case s_req_query_string:
691
+ case s_req_fragment_start:
692
+ case s_req_fragment:
693
+ url_mark = data;
694
+ break;
695
+ case s_res_status:
696
+ status_mark = data;
697
+ break;
698
+ default:
699
+ break;
700
+ }
701
+
702
+ for (p=data; p != data + len; p++) {
703
+ ch = *p;
704
+
705
+ if (PARSING_HEADER(CURRENT_STATE()))
706
+ COUNT_HEADER_SIZE(1);
707
+
708
+ reexecute:
709
+ switch (CURRENT_STATE()) {
710
+
711
+ case s_dead:
712
+ /* this state is used after a 'Connection: close' message
713
+ * the parser will error out if it reads another message
714
+ */
715
+ if (LIKELY(ch == CR || ch == LF))
716
+ break;
717
+
718
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
719
+ goto error;
720
+
721
+ case s_start_req_or_res:
722
+ {
723
+ if (ch == CR || ch == LF)
724
+ break;
725
+ parser->flags = 0;
726
+ parser->content_length = ULLONG_MAX;
727
+
728
+ if (ch == 'H') {
729
+ UPDATE_STATE(s_res_or_resp_H);
730
+
731
+ CALLBACK_NOTIFY(message_begin);
732
+ } else {
733
+ parser->type = HTTP_REQUEST;
734
+ UPDATE_STATE(s_start_req);
735
+ REEXECUTE();
736
+ }
737
+
738
+ break;
739
+ }
740
+
741
+ case s_res_or_resp_H:
742
+ if (ch == 'T') {
743
+ parser->type = HTTP_RESPONSE;
744
+ UPDATE_STATE(s_res_HT);
745
+ } else {
746
+ if (UNLIKELY(ch != 'E')) {
747
+ SET_ERRNO(HPE_INVALID_CONSTANT);
748
+ goto error;
749
+ }
750
+
751
+ parser->type = HTTP_REQUEST;
752
+ parser->method = HTTP_HEAD;
753
+ parser->index = 2;
754
+ UPDATE_STATE(s_req_method);
755
+ }
756
+ break;
757
+
758
+ case s_start_res:
759
+ {
760
+ parser->flags = 0;
761
+ parser->content_length = ULLONG_MAX;
762
+
763
+ switch (ch) {
764
+ case 'H':
765
+ UPDATE_STATE(s_res_H);
766
+ break;
767
+
768
+ case CR:
769
+ case LF:
770
+ break;
771
+
772
+ default:
773
+ SET_ERRNO(HPE_INVALID_CONSTANT);
774
+ goto error;
775
+ }
776
+
777
+ CALLBACK_NOTIFY(message_begin);
778
+ break;
779
+ }
780
+
781
+ case s_res_H:
782
+ STRICT_CHECK(ch != 'T');
783
+ UPDATE_STATE(s_res_HT);
784
+ break;
785
+
786
+ case s_res_HT:
787
+ STRICT_CHECK(ch != 'T');
788
+ UPDATE_STATE(s_res_HTT);
789
+ break;
790
+
791
+ case s_res_HTT:
792
+ STRICT_CHECK(ch != 'P');
793
+ UPDATE_STATE(s_res_HTTP);
794
+ break;
795
+
796
+ case s_res_HTTP:
797
+ STRICT_CHECK(ch != '/');
798
+ UPDATE_STATE(s_res_first_http_major);
799
+ break;
800
+
801
+ case s_res_first_http_major:
802
+ if (UNLIKELY(ch < '0' || ch > '9')) {
803
+ SET_ERRNO(HPE_INVALID_VERSION);
804
+ goto error;
805
+ }
806
+
807
+ parser->http_major = ch - '0';
808
+ UPDATE_STATE(s_res_http_major);
809
+ break;
810
+
811
+ /* major HTTP version or dot */
812
+ case s_res_http_major:
813
+ {
814
+ if (ch == '.') {
815
+ UPDATE_STATE(s_res_first_http_minor);
816
+ break;
817
+ }
818
+
819
+ if (!IS_NUM(ch)) {
820
+ SET_ERRNO(HPE_INVALID_VERSION);
821
+ goto error;
822
+ }
823
+
824
+ parser->http_major *= 10;
825
+ parser->http_major += ch - '0';
826
+
827
+ if (UNLIKELY(parser->http_major > 999)) {
828
+ SET_ERRNO(HPE_INVALID_VERSION);
829
+ goto error;
830
+ }
831
+
832
+ break;
833
+ }
834
+
835
+ /* first digit of minor HTTP version */
836
+ case s_res_first_http_minor:
837
+ if (UNLIKELY(!IS_NUM(ch))) {
838
+ SET_ERRNO(HPE_INVALID_VERSION);
839
+ goto error;
840
+ }
841
+
842
+ parser->http_minor = ch - '0';
843
+ UPDATE_STATE(s_res_http_minor);
844
+ break;
845
+
846
+ /* minor HTTP version or end of request line */
847
+ case s_res_http_minor:
848
+ {
849
+ if (ch == ' ') {
850
+ UPDATE_STATE(s_res_first_status_code);
851
+ break;
852
+ }
853
+
854
+ if (UNLIKELY(!IS_NUM(ch))) {
855
+ SET_ERRNO(HPE_INVALID_VERSION);
856
+ goto error;
857
+ }
858
+
859
+ parser->http_minor *= 10;
860
+ parser->http_minor += ch - '0';
861
+
862
+ if (UNLIKELY(parser->http_minor > 999)) {
863
+ SET_ERRNO(HPE_INVALID_VERSION);
864
+ goto error;
865
+ }
866
+
867
+ break;
868
+ }
869
+
870
+ case s_res_first_status_code:
871
+ {
872
+ if (!IS_NUM(ch)) {
873
+ if (ch == ' ') {
874
+ break;
875
+ }
876
+
877
+ SET_ERRNO(HPE_INVALID_STATUS);
878
+ goto error;
879
+ }
880
+ parser->status_code = ch - '0';
881
+ UPDATE_STATE(s_res_status_code);
882
+ break;
883
+ }
884
+
885
+ case s_res_status_code:
886
+ {
887
+ if (!IS_NUM(ch)) {
888
+ switch (ch) {
889
+ case ' ':
890
+ UPDATE_STATE(s_res_status_start);
891
+ break;
892
+ case CR:
893
+ UPDATE_STATE(s_res_line_almost_done);
894
+ break;
895
+ case LF:
896
+ UPDATE_STATE(s_header_field_start);
897
+ break;
898
+ default:
899
+ SET_ERRNO(HPE_INVALID_STATUS);
900
+ goto error;
901
+ }
902
+ break;
903
+ }
904
+
905
+ parser->status_code *= 10;
906
+ parser->status_code += ch - '0';
907
+
908
+ if (UNLIKELY(parser->status_code > 999)) {
909
+ SET_ERRNO(HPE_INVALID_STATUS);
910
+ goto error;
911
+ }
912
+
913
+ break;
914
+ }
915
+
916
+ case s_res_status_start:
917
+ {
918
+ if (ch == CR) {
919
+ UPDATE_STATE(s_res_line_almost_done);
920
+ break;
921
+ }
922
+
923
+ if (ch == LF) {
924
+ UPDATE_STATE(s_header_field_start);
925
+ break;
926
+ }
927
+
928
+ MARK(status);
929
+ UPDATE_STATE(s_res_status);
930
+ parser->index = 0;
931
+ break;
932
+ }
933
+
934
+ case s_res_status:
935
+ if (ch == CR) {
936
+ UPDATE_STATE(s_res_line_almost_done);
937
+ CALLBACK_DATA(status);
938
+ break;
939
+ }
940
+
941
+ if (ch == LF) {
942
+ UPDATE_STATE(s_header_field_start);
943
+ CALLBACK_DATA(status);
944
+ break;
945
+ }
946
+
947
+ break;
948
+
949
+ case s_res_line_almost_done:
950
+ STRICT_CHECK(ch != LF);
951
+ UPDATE_STATE(s_header_field_start);
952
+ break;
953
+
954
+ case s_start_req:
955
+ {
956
+ if (ch == CR || ch == LF)
957
+ break;
958
+ parser->flags = 0;
959
+ parser->content_length = ULLONG_MAX;
960
+
961
+ if (UNLIKELY(!IS_ALPHA(ch))) {
962
+ SET_ERRNO(HPE_INVALID_METHOD);
963
+ goto error;
964
+ }
965
+
966
+ parser->method = (enum http_method) 0;
967
+ parser->index = 1;
968
+ switch (ch) {
969
+ case 'A': parser->method = HTTP_ACL; break;
970
+ case 'B': parser->method = HTTP_BIND; break;
971
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
972
+ case 'D': parser->method = HTTP_DELETE; break;
973
+ case 'G': parser->method = HTTP_GET; break;
974
+ case 'H': parser->method = HTTP_HEAD; break;
975
+ case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
976
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
977
+ case 'N': parser->method = HTTP_NOTIFY; break;
978
+ case 'O': parser->method = HTTP_OPTIONS; break;
979
+ case 'P': parser->method = HTTP_POST;
980
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
981
+ break;
982
+ case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
983
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
984
+ case 'T': parser->method = HTTP_TRACE; break;
985
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
986
+ default:
987
+ SET_ERRNO(HPE_INVALID_METHOD);
988
+ goto error;
989
+ }
990
+ UPDATE_STATE(s_req_method);
991
+
992
+ CALLBACK_NOTIFY(message_begin);
993
+
994
+ break;
995
+ }
996
+
997
+ case s_req_method:
998
+ {
999
+ const char *matcher;
1000
+ if (UNLIKELY(ch == '\0')) {
1001
+ SET_ERRNO(HPE_INVALID_METHOD);
1002
+ goto error;
1003
+ }
1004
+
1005
+ matcher = method_strings[parser->method];
1006
+ if (ch == ' ' && matcher[parser->index] == '\0') {
1007
+ UPDATE_STATE(s_req_spaces_before_url);
1008
+ } else if (ch == matcher[parser->index]) {
1009
+ ; /* nada */
1010
+ } else if (IS_ALPHA(ch)) {
1011
+
1012
+ switch (parser->method << 16 | parser->index << 8 | ch) {
1013
+ #define XX(meth, pos, ch, new_meth) \
1014
+ case (HTTP_##meth << 16 | pos << 8 | ch): \
1015
+ parser->method = HTTP_##new_meth; break;
1016
+
1017
+ XX(POST, 1, 'U', PUT)
1018
+ XX(POST, 1, 'A', PATCH)
1019
+ XX(CONNECT, 1, 'H', CHECKOUT)
1020
+ XX(CONNECT, 2, 'P', COPY)
1021
+ XX(MKCOL, 1, 'O', MOVE)
1022
+ XX(MKCOL, 1, 'E', MERGE)
1023
+ XX(MKCOL, 2, 'A', MKACTIVITY)
1024
+ XX(MKCOL, 3, 'A', MKCALENDAR)
1025
+ XX(SUBSCRIBE, 1, 'E', SEARCH)
1026
+ XX(REPORT, 2, 'B', REBIND)
1027
+ XX(POST, 1, 'R', PROPFIND)
1028
+ XX(PROPFIND, 4, 'P', PROPPATCH)
1029
+ XX(PUT, 2, 'R', PURGE)
1030
+ XX(LOCK, 1, 'I', LINK)
1031
+ XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1032
+ XX(UNLOCK, 2, 'B', UNBIND)
1033
+ XX(UNLOCK, 3, 'I', UNLINK)
1034
+ #undef XX
1035
+
1036
+ default:
1037
+ SET_ERRNO(HPE_INVALID_METHOD);
1038
+ goto error;
1039
+ }
1040
+ } else if (ch == '-' &&
1041
+ parser->index == 1 &&
1042
+ parser->method == HTTP_MKCOL) {
1043
+ parser->method = HTTP_MSEARCH;
1044
+ } else {
1045
+ SET_ERRNO(HPE_INVALID_METHOD);
1046
+ goto error;
1047
+ }
1048
+
1049
+ ++parser->index;
1050
+ break;
1051
+ }
1052
+
1053
+ case s_req_spaces_before_url:
1054
+ {
1055
+ if (ch == ' ') break;
1056
+
1057
+ MARK(url);
1058
+ if (parser->method == HTTP_CONNECT) {
1059
+ UPDATE_STATE(s_req_server_start);
1060
+ }
1061
+
1062
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1063
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1064
+ SET_ERRNO(HPE_INVALID_URL);
1065
+ goto error;
1066
+ }
1067
+
1068
+ break;
1069
+ }
1070
+
1071
+ case s_req_schema:
1072
+ case s_req_schema_slash:
1073
+ case s_req_schema_slash_slash:
1074
+ case s_req_server_start:
1075
+ {
1076
+ switch (ch) {
1077
+ /* No whitespace allowed here */
1078
+ case ' ':
1079
+ case CR:
1080
+ case LF:
1081
+ SET_ERRNO(HPE_INVALID_URL);
1082
+ goto error;
1083
+ default:
1084
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1085
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1086
+ SET_ERRNO(HPE_INVALID_URL);
1087
+ goto error;
1088
+ }
1089
+ }
1090
+
1091
+ break;
1092
+ }
1093
+
1094
+ case s_req_server:
1095
+ case s_req_server_with_at:
1096
+ case s_req_path:
1097
+ case s_req_query_string_start:
1098
+ case s_req_query_string:
1099
+ case s_req_fragment_start:
1100
+ case s_req_fragment:
1101
+ {
1102
+ switch (ch) {
1103
+ case ' ':
1104
+ UPDATE_STATE(s_req_http_start);
1105
+ CALLBACK_DATA(url);
1106
+ break;
1107
+ case CR:
1108
+ case LF:
1109
+ parser->http_major = 0;
1110
+ parser->http_minor = 9;
1111
+ UPDATE_STATE((ch == CR) ?
1112
+ s_req_line_almost_done :
1113
+ s_header_field_start);
1114
+ CALLBACK_DATA(url);
1115
+ break;
1116
+ default:
1117
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1118
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1119
+ SET_ERRNO(HPE_INVALID_URL);
1120
+ goto error;
1121
+ }
1122
+ }
1123
+ break;
1124
+ }
1125
+
1126
+ case s_req_http_start:
1127
+ switch (ch) {
1128
+ case 'H':
1129
+ UPDATE_STATE(s_req_http_H);
1130
+ break;
1131
+ case ' ':
1132
+ break;
1133
+ default:
1134
+ SET_ERRNO(HPE_INVALID_CONSTANT);
1135
+ goto error;
1136
+ }
1137
+ break;
1138
+
1139
+ case s_req_http_H:
1140
+ STRICT_CHECK(ch != 'T');
1141
+ UPDATE_STATE(s_req_http_HT);
1142
+ break;
1143
+
1144
+ case s_req_http_HT:
1145
+ STRICT_CHECK(ch != 'T');
1146
+ UPDATE_STATE(s_req_http_HTT);
1147
+ break;
1148
+
1149
+ case s_req_http_HTT:
1150
+ STRICT_CHECK(ch != 'P');
1151
+ UPDATE_STATE(s_req_http_HTTP);
1152
+ break;
1153
+
1154
+ case s_req_http_HTTP:
1155
+ STRICT_CHECK(ch != '/');
1156
+ UPDATE_STATE(s_req_first_http_major);
1157
+ break;
1158
+
1159
+ /* first digit of major HTTP version */
1160
+ case s_req_first_http_major:
1161
+ if (UNLIKELY(ch < '1' || ch > '9')) {
1162
+ SET_ERRNO(HPE_INVALID_VERSION);
1163
+ goto error;
1164
+ }
1165
+
1166
+ parser->http_major = ch - '0';
1167
+ UPDATE_STATE(s_req_http_major);
1168
+ break;
1169
+
1170
+ /* major HTTP version or dot */
1171
+ case s_req_http_major:
1172
+ {
1173
+ if (ch == '.') {
1174
+ UPDATE_STATE(s_req_first_http_minor);
1175
+ break;
1176
+ }
1177
+
1178
+ if (UNLIKELY(!IS_NUM(ch))) {
1179
+ SET_ERRNO(HPE_INVALID_VERSION);
1180
+ goto error;
1181
+ }
1182
+
1183
+ parser->http_major *= 10;
1184
+ parser->http_major += ch - '0';
1185
+
1186
+ if (UNLIKELY(parser->http_major > 999)) {
1187
+ SET_ERRNO(HPE_INVALID_VERSION);
1188
+ goto error;
1189
+ }
1190
+
1191
+ break;
1192
+ }
1193
+
1194
+ /* first digit of minor HTTP version */
1195
+ case s_req_first_http_minor:
1196
+ if (UNLIKELY(!IS_NUM(ch))) {
1197
+ SET_ERRNO(HPE_INVALID_VERSION);
1198
+ goto error;
1199
+ }
1200
+
1201
+ parser->http_minor = ch - '0';
1202
+ UPDATE_STATE(s_req_http_minor);
1203
+ break;
1204
+
1205
+ /* minor HTTP version or end of request line */
1206
+ case s_req_http_minor:
1207
+ {
1208
+ if (ch == CR) {
1209
+ UPDATE_STATE(s_req_line_almost_done);
1210
+ break;
1211
+ }
1212
+
1213
+ if (ch == LF) {
1214
+ UPDATE_STATE(s_header_field_start);
1215
+ break;
1216
+ }
1217
+
1218
+ /* XXX allow spaces after digit? */
1219
+
1220
+ if (UNLIKELY(!IS_NUM(ch))) {
1221
+ SET_ERRNO(HPE_INVALID_VERSION);
1222
+ goto error;
1223
+ }
1224
+
1225
+ parser->http_minor *= 10;
1226
+ parser->http_minor += ch - '0';
1227
+
1228
+ if (UNLIKELY(parser->http_minor > 999)) {
1229
+ SET_ERRNO(HPE_INVALID_VERSION);
1230
+ goto error;
1231
+ }
1232
+
1233
+ break;
1234
+ }
1235
+
1236
+ /* end of request line */
1237
+ case s_req_line_almost_done:
1238
+ {
1239
+ if (UNLIKELY(ch != LF)) {
1240
+ SET_ERRNO(HPE_LF_EXPECTED);
1241
+ goto error;
1242
+ }
1243
+
1244
+ UPDATE_STATE(s_header_field_start);
1245
+ break;
1246
+ }
1247
+
1248
+ case s_header_field_start:
1249
+ {
1250
+ if (ch == CR) {
1251
+ UPDATE_STATE(s_headers_almost_done);
1252
+ break;
1253
+ }
1254
+
1255
+ if (ch == LF) {
1256
+ /* they might be just sending \n instead of \r\n so this would be
1257
+ * the second \n to denote the end of headers*/
1258
+ UPDATE_STATE(s_headers_almost_done);
1259
+ REEXECUTE();
1260
+ }
1261
+
1262
+ c = TOKEN(ch);
1263
+
1264
+ if (UNLIKELY(!c)) {
1265
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1266
+ goto error;
1267
+ }
1268
+
1269
+ MARK(header_field);
1270
+
1271
+ parser->index = 0;
1272
+ UPDATE_STATE(s_header_field);
1273
+
1274
+ switch (c) {
1275
+ case 'c':
1276
+ parser->header_state = h_C;
1277
+ break;
1278
+
1279
+ case 'p':
1280
+ parser->header_state = h_matching_proxy_connection;
1281
+ break;
1282
+
1283
+ case 't':
1284
+ parser->header_state = h_matching_transfer_encoding;
1285
+ break;
1286
+
1287
+ case 'u':
1288
+ parser->header_state = h_matching_upgrade;
1289
+ break;
1290
+
1291
+ default:
1292
+ parser->header_state = h_general;
1293
+ break;
1294
+ }
1295
+ break;
1296
+ }
1297
+
1298
+ case s_header_field:
1299
+ {
1300
+ const char* start = p;
1301
+ for (; p != data + len; p++) {
1302
+ ch = *p;
1303
+ c = TOKEN(ch);
1304
+
1305
+ if (!c)
1306
+ break;
1307
+
1308
+ switch (parser->header_state) {
1309
+ case h_general:
1310
+ break;
1311
+
1312
+ case h_C:
1313
+ parser->index++;
1314
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1315
+ break;
1316
+
1317
+ case h_CO:
1318
+ parser->index++;
1319
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1320
+ break;
1321
+
1322
+ case h_CON:
1323
+ parser->index++;
1324
+ switch (c) {
1325
+ case 'n':
1326
+ parser->header_state = h_matching_connection;
1327
+ break;
1328
+ case 't':
1329
+ parser->header_state = h_matching_content_length;
1330
+ break;
1331
+ default:
1332
+ parser->header_state = h_general;
1333
+ break;
1334
+ }
1335
+ break;
1336
+
1337
+ /* connection */
1338
+
1339
+ case h_matching_connection:
1340
+ parser->index++;
1341
+ if (parser->index > sizeof(CONNECTION)-1
1342
+ || c != CONNECTION[parser->index]) {
1343
+ parser->header_state = h_general;
1344
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1345
+ parser->header_state = h_connection;
1346
+ }
1347
+ break;
1348
+
1349
+ /* proxy-connection */
1350
+
1351
+ case h_matching_proxy_connection:
1352
+ parser->index++;
1353
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1354
+ || c != PROXY_CONNECTION[parser->index]) {
1355
+ parser->header_state = h_general;
1356
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1357
+ parser->header_state = h_connection;
1358
+ }
1359
+ break;
1360
+
1361
+ /* content-length */
1362
+
1363
+ case h_matching_content_length:
1364
+ parser->index++;
1365
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1366
+ || c != CONTENT_LENGTH[parser->index]) {
1367
+ parser->header_state = h_general;
1368
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1369
+ parser->header_state = h_content_length;
1370
+ }
1371
+ break;
1372
+
1373
+ /* transfer-encoding */
1374
+
1375
+ case h_matching_transfer_encoding:
1376
+ parser->index++;
1377
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1378
+ || c != TRANSFER_ENCODING[parser->index]) {
1379
+ parser->header_state = h_general;
1380
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1381
+ parser->header_state = h_transfer_encoding;
1382
+ }
1383
+ break;
1384
+
1385
+ /* upgrade */
1386
+
1387
+ case h_matching_upgrade:
1388
+ parser->index++;
1389
+ if (parser->index > sizeof(UPGRADE)-1
1390
+ || c != UPGRADE[parser->index]) {
1391
+ parser->header_state = h_general;
1392
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1393
+ parser->header_state = h_upgrade;
1394
+ }
1395
+ break;
1396
+
1397
+ case h_connection:
1398
+ case h_content_length:
1399
+ case h_transfer_encoding:
1400
+ case h_upgrade:
1401
+ if (ch != ' ') parser->header_state = h_general;
1402
+ break;
1403
+
1404
+ default:
1405
+ assert(0 && "Unknown header_state");
1406
+ break;
1407
+ }
1408
+ }
1409
+
1410
+ COUNT_HEADER_SIZE(p - start);
1411
+
1412
+ if (p == data + len) {
1413
+ --p;
1414
+ break;
1415
+ }
1416
+
1417
+ if (ch == ':') {
1418
+ UPDATE_STATE(s_header_value_discard_ws);
1419
+ CALLBACK_DATA(header_field);
1420
+ break;
1421
+ }
1422
+
1423
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1424
+ goto error;
1425
+ }
1426
+
1427
+ case s_header_value_discard_ws:
1428
+ if (ch == ' ' || ch == '\t') break;
1429
+
1430
+ if (ch == CR) {
1431
+ UPDATE_STATE(s_header_value_discard_ws_almost_done);
1432
+ break;
1433
+ }
1434
+
1435
+ if (ch == LF) {
1436
+ UPDATE_STATE(s_header_value_discard_lws);
1437
+ break;
1438
+ }
1439
+
1440
+ /* FALLTHROUGH */
1441
+
1442
+ case s_header_value_start:
1443
+ {
1444
+ MARK(header_value);
1445
+
1446
+ UPDATE_STATE(s_header_value);
1447
+ parser->index = 0;
1448
+
1449
+ c = LOWER(ch);
1450
+
1451
+ switch (parser->header_state) {
1452
+ case h_upgrade:
1453
+ parser->flags |= F_UPGRADE;
1454
+ parser->header_state = h_general;
1455
+ break;
1456
+
1457
+ case h_transfer_encoding:
1458
+ /* looking for 'Transfer-Encoding: chunked' */
1459
+ if ('c' == c) {
1460
+ parser->header_state = h_matching_transfer_encoding_chunked;
1461
+ } else {
1462
+ parser->header_state = h_general;
1463
+ }
1464
+ break;
1465
+
1466
+ case h_content_length:
1467
+ if (UNLIKELY(!IS_NUM(ch))) {
1468
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1469
+ goto error;
1470
+ }
1471
+
1472
+ if (parser->flags & F_CONTENTLENGTH) {
1473
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1474
+ goto error;
1475
+ }
1476
+
1477
+ parser->flags |= F_CONTENTLENGTH;
1478
+ parser->content_length = ch - '0';
1479
+ break;
1480
+
1481
+ case h_connection:
1482
+ /* looking for 'Connection: keep-alive' */
1483
+ if (c == 'k') {
1484
+ parser->header_state = h_matching_connection_keep_alive;
1485
+ /* looking for 'Connection: close' */
1486
+ } else if (c == 'c') {
1487
+ parser->header_state = h_matching_connection_close;
1488
+ } else if (c == 'u') {
1489
+ parser->header_state = h_matching_connection_upgrade;
1490
+ } else {
1491
+ parser->header_state = h_matching_connection_token;
1492
+ }
1493
+ break;
1494
+
1495
+ /* Multi-value `Connection` header */
1496
+ case h_matching_connection_token_start:
1497
+ break;
1498
+
1499
+ default:
1500
+ parser->header_state = h_general;
1501
+ break;
1502
+ }
1503
+ break;
1504
+ }
1505
+
1506
+ case s_header_value:
1507
+ {
1508
+ const char* start = p;
1509
+ enum header_states h_state = (enum header_states) parser->header_state;
1510
+ for (; p != data + len; p++) {
1511
+ ch = *p;
1512
+ if (ch == CR) {
1513
+ UPDATE_STATE(s_header_almost_done);
1514
+ parser->header_state = h_state;
1515
+ CALLBACK_DATA(header_value);
1516
+ break;
1517
+ }
1518
+
1519
+ if (ch == LF) {
1520
+ UPDATE_STATE(s_header_almost_done);
1521
+ COUNT_HEADER_SIZE(p - start);
1522
+ parser->header_state = h_state;
1523
+ CALLBACK_DATA_NOADVANCE(header_value);
1524
+ REEXECUTE();
1525
+ }
1526
+
1527
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1528
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1529
+ goto error;
1530
+ }
1531
+
1532
+ c = LOWER(ch);
1533
+
1534
+ switch (h_state) {
1535
+ case h_general:
1536
+ {
1537
+ const char* p_cr;
1538
+ const char* p_lf;
1539
+ size_t limit = data + len - p;
1540
+
1541
+ limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1542
+
1543
+ p_cr = (const char*) memchr(p, CR, limit);
1544
+ p_lf = (const char*) memchr(p, LF, limit);
1545
+ if (p_cr != NULL) {
1546
+ if (p_lf != NULL && p_cr >= p_lf)
1547
+ p = p_lf;
1548
+ else
1549
+ p = p_cr;
1550
+ } else if (UNLIKELY(p_lf != NULL)) {
1551
+ p = p_lf;
1552
+ } else {
1553
+ p = data + len;
1554
+ }
1555
+ --p;
1556
+
1557
+ break;
1558
+ }
1559
+
1560
+ case h_connection:
1561
+ case h_transfer_encoding:
1562
+ assert(0 && "Shouldn't get here.");
1563
+ break;
1564
+
1565
+ case h_content_length:
1566
+ {
1567
+ uint64_t t;
1568
+
1569
+ if (ch == ' ') break;
1570
+
1571
+ if (UNLIKELY(!IS_NUM(ch))) {
1572
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1573
+ parser->header_state = h_state;
1574
+ goto error;
1575
+ }
1576
+
1577
+ t = parser->content_length;
1578
+ t *= 10;
1579
+ t += ch - '0';
1580
+
1581
+ /* Overflow? Test against a conservative limit for simplicity. */
1582
+ if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1583
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1584
+ parser->header_state = h_state;
1585
+ goto error;
1586
+ }
1587
+
1588
+ parser->content_length = t;
1589
+ break;
1590
+ }
1591
+
1592
+ /* Transfer-Encoding: chunked */
1593
+ case h_matching_transfer_encoding_chunked:
1594
+ parser->index++;
1595
+ if (parser->index > sizeof(CHUNKED)-1
1596
+ || c != CHUNKED[parser->index]) {
1597
+ h_state = h_general;
1598
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1599
+ h_state = h_transfer_encoding_chunked;
1600
+ }
1601
+ break;
1602
+
1603
+ case h_matching_connection_token_start:
1604
+ /* looking for 'Connection: keep-alive' */
1605
+ if (c == 'k') {
1606
+ h_state = h_matching_connection_keep_alive;
1607
+ /* looking for 'Connection: close' */
1608
+ } else if (c == 'c') {
1609
+ h_state = h_matching_connection_close;
1610
+ } else if (c == 'u') {
1611
+ h_state = h_matching_connection_upgrade;
1612
+ } else if (STRICT_TOKEN(c)) {
1613
+ h_state = h_matching_connection_token;
1614
+ } else if (c == ' ' || c == '\t') {
1615
+ /* Skip lws */
1616
+ } else {
1617
+ h_state = h_general;
1618
+ }
1619
+ break;
1620
+
1621
+ /* looking for 'Connection: keep-alive' */
1622
+ case h_matching_connection_keep_alive:
1623
+ parser->index++;
1624
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1625
+ || c != KEEP_ALIVE[parser->index]) {
1626
+ h_state = h_matching_connection_token;
1627
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1628
+ h_state = h_connection_keep_alive;
1629
+ }
1630
+ break;
1631
+
1632
+ /* looking for 'Connection: close' */
1633
+ case h_matching_connection_close:
1634
+ parser->index++;
1635
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1636
+ h_state = h_matching_connection_token;
1637
+ } else if (parser->index == sizeof(CLOSE)-2) {
1638
+ h_state = h_connection_close;
1639
+ }
1640
+ break;
1641
+
1642
+ /* looking for 'Connection: upgrade' */
1643
+ case h_matching_connection_upgrade:
1644
+ parser->index++;
1645
+ if (parser->index > sizeof(UPGRADE) - 1 ||
1646
+ c != UPGRADE[parser->index]) {
1647
+ h_state = h_matching_connection_token;
1648
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1649
+ h_state = h_connection_upgrade;
1650
+ }
1651
+ break;
1652
+
1653
+ case h_matching_connection_token:
1654
+ if (ch == ',') {
1655
+ h_state = h_matching_connection_token_start;
1656
+ parser->index = 0;
1657
+ }
1658
+ break;
1659
+
1660
+ case h_transfer_encoding_chunked:
1661
+ if (ch != ' ') h_state = h_general;
1662
+ break;
1663
+
1664
+ case h_connection_keep_alive:
1665
+ case h_connection_close:
1666
+ case h_connection_upgrade:
1667
+ if (ch == ',') {
1668
+ if (h_state == h_connection_keep_alive) {
1669
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1670
+ } else if (h_state == h_connection_close) {
1671
+ parser->flags |= F_CONNECTION_CLOSE;
1672
+ } else if (h_state == h_connection_upgrade) {
1673
+ parser->flags |= F_CONNECTION_UPGRADE;
1674
+ }
1675
+ h_state = h_matching_connection_token_start;
1676
+ parser->index = 0;
1677
+ } else if (ch != ' ') {
1678
+ h_state = h_matching_connection_token;
1679
+ }
1680
+ break;
1681
+
1682
+ default:
1683
+ UPDATE_STATE(s_header_value);
1684
+ h_state = h_general;
1685
+ break;
1686
+ }
1687
+ }
1688
+ parser->header_state = h_state;
1689
+
1690
+ COUNT_HEADER_SIZE(p - start);
1691
+
1692
+ if (p == data + len)
1693
+ --p;
1694
+ break;
1695
+ }
1696
+
1697
+ case s_header_almost_done:
1698
+ {
1699
+ if (UNLIKELY(ch != LF)) {
1700
+ SET_ERRNO(HPE_LF_EXPECTED);
1701
+ goto error;
1702
+ }
1703
+
1704
+ UPDATE_STATE(s_header_value_lws);
1705
+ break;
1706
+ }
1707
+
1708
+ case s_header_value_lws:
1709
+ {
1710
+ if (ch == ' ' || ch == '\t') {
1711
+ UPDATE_STATE(s_header_value_start);
1712
+ REEXECUTE();
1713
+ }
1714
+
1715
+ /* finished the header */
1716
+ switch (parser->header_state) {
1717
+ case h_connection_keep_alive:
1718
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1719
+ break;
1720
+ case h_connection_close:
1721
+ parser->flags |= F_CONNECTION_CLOSE;
1722
+ break;
1723
+ case h_transfer_encoding_chunked:
1724
+ parser->flags |= F_CHUNKED;
1725
+ break;
1726
+ case h_connection_upgrade:
1727
+ parser->flags |= F_CONNECTION_UPGRADE;
1728
+ break;
1729
+ default:
1730
+ break;
1731
+ }
1732
+
1733
+ UPDATE_STATE(s_header_field_start);
1734
+ REEXECUTE();
1735
+ }
1736
+
1737
+ case s_header_value_discard_ws_almost_done:
1738
+ {
1739
+ STRICT_CHECK(ch != LF);
1740
+ UPDATE_STATE(s_header_value_discard_lws);
1741
+ break;
1742
+ }
1743
+
1744
+ case s_header_value_discard_lws:
1745
+ {
1746
+ if (ch == ' ' || ch == '\t') {
1747
+ UPDATE_STATE(s_header_value_discard_ws);
1748
+ break;
1749
+ } else {
1750
+ switch (parser->header_state) {
1751
+ case h_connection_keep_alive:
1752
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1753
+ break;
1754
+ case h_connection_close:
1755
+ parser->flags |= F_CONNECTION_CLOSE;
1756
+ break;
1757
+ case h_connection_upgrade:
1758
+ parser->flags |= F_CONNECTION_UPGRADE;
1759
+ break;
1760
+ case h_transfer_encoding_chunked:
1761
+ parser->flags |= F_CHUNKED;
1762
+ break;
1763
+ default:
1764
+ break;
1765
+ }
1766
+
1767
+ /* header value was empty */
1768
+ MARK(header_value);
1769
+ UPDATE_STATE(s_header_field_start);
1770
+ CALLBACK_DATA_NOADVANCE(header_value);
1771
+ REEXECUTE();
1772
+ }
1773
+ }
1774
+
1775
+ case s_headers_almost_done:
1776
+ {
1777
+ STRICT_CHECK(ch != LF);
1778
+
1779
+ if (parser->flags & F_TRAILING) {
1780
+ /* End of a chunked request */
1781
+ UPDATE_STATE(s_message_done);
1782
+ CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1783
+ REEXECUTE();
1784
+ }
1785
+
1786
+ /* Cannot use chunked encoding and a content-length header together
1787
+ per the HTTP specification. */
1788
+ if ((parser->flags & F_CHUNKED) &&
1789
+ (parser->flags & F_CONTENTLENGTH)) {
1790
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1791
+ goto error;
1792
+ }
1793
+
1794
+ UPDATE_STATE(s_headers_done);
1795
+
1796
+ /* Set this here so that on_headers_complete() callbacks can see it */
1797
+ parser->upgrade =
1798
+ ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
1799
+ (F_UPGRADE | F_CONNECTION_UPGRADE) ||
1800
+ parser->method == HTTP_CONNECT);
1801
+
1802
+ /* Here we call the headers_complete callback. This is somewhat
1803
+ * different than other callbacks because if the user returns 1, we
1804
+ * will interpret that as saying that this message has no body. This
1805
+ * is needed for the annoying case of recieving a response to a HEAD
1806
+ * request.
1807
+ *
1808
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1809
+ * we have to simulate it by handling a change in errno below.
1810
+ */
1811
+ if (settings->on_headers_complete) {
1812
+ switch (settings->on_headers_complete(parser)) {
1813
+ case 0:
1814
+ break;
1815
+
1816
+ case 2:
1817
+ parser->upgrade = 1;
1818
+
1819
+ case 1:
1820
+ parser->flags |= F_SKIPBODY;
1821
+ break;
1822
+
1823
+ default:
1824
+ SET_ERRNO(HPE_CB_headers_complete);
1825
+ RETURN(p - data); /* Error */
1826
+ }
1827
+ }
1828
+
1829
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1830
+ RETURN(p - data);
1831
+ }
1832
+
1833
+ REEXECUTE();
1834
+ }
1835
+
1836
+ case s_headers_done:
1837
+ {
1838
+ int hasBody;
1839
+ STRICT_CHECK(ch != LF);
1840
+
1841
+ parser->nread = 0;
1842
+
1843
+ hasBody = parser->flags & F_CHUNKED ||
1844
+ (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1845
+ if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1846
+ (parser->flags & F_SKIPBODY) || !hasBody)) {
1847
+ /* Exit, the rest of the message is in a different protocol. */
1848
+ UPDATE_STATE(NEW_MESSAGE());
1849
+ CALLBACK_NOTIFY(message_complete);
1850
+ RETURN((p - data) + 1);
1851
+ }
1852
+
1853
+ if (parser->flags & F_SKIPBODY) {
1854
+ UPDATE_STATE(NEW_MESSAGE());
1855
+ CALLBACK_NOTIFY(message_complete);
1856
+ } else if (parser->flags & F_CHUNKED) {
1857
+ /* chunked encoding - ignore Content-Length header */
1858
+ UPDATE_STATE(s_chunk_size_start);
1859
+ } else {
1860
+ if (parser->content_length == 0) {
1861
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1862
+ UPDATE_STATE(NEW_MESSAGE());
1863
+ CALLBACK_NOTIFY(message_complete);
1864
+ } else if (parser->content_length != ULLONG_MAX) {
1865
+ /* Content-Length header given and non-zero */
1866
+ UPDATE_STATE(s_body_identity);
1867
+ } else {
1868
+ if (!http_message_needs_eof(parser)) {
1869
+ /* Assume content-length 0 - read the next */
1870
+ UPDATE_STATE(NEW_MESSAGE());
1871
+ CALLBACK_NOTIFY(message_complete);
1872
+ } else {
1873
+ /* Read body until EOF */
1874
+ UPDATE_STATE(s_body_identity_eof);
1875
+ }
1876
+ }
1877
+ }
1878
+
1879
+ break;
1880
+ }
1881
+
1882
+ case s_body_identity:
1883
+ {
1884
+ uint64_t to_read = MIN(parser->content_length,
1885
+ (uint64_t) ((data + len) - p));
1886
+
1887
+ assert(parser->content_length != 0
1888
+ && parser->content_length != ULLONG_MAX);
1889
+
1890
+ /* The difference between advancing content_length and p is because
1891
+ * the latter will automaticaly advance on the next loop iteration.
1892
+ * Further, if content_length ends up at 0, we want to see the last
1893
+ * byte again for our message complete callback.
1894
+ */
1895
+ MARK(body);
1896
+ parser->content_length -= to_read;
1897
+ p += to_read - 1;
1898
+
1899
+ if (parser->content_length == 0) {
1900
+ UPDATE_STATE(s_message_done);
1901
+
1902
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1903
+ *
1904
+ * The alternative to doing this is to wait for the next byte to
1905
+ * trigger the data callback, just as in every other case. The
1906
+ * problem with this is that this makes it difficult for the test
1907
+ * harness to distinguish between complete-on-EOF and
1908
+ * complete-on-length. It's not clear that this distinction is
1909
+ * important for applications, but let's keep it for now.
1910
+ */
1911
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1912
+ REEXECUTE();
1913
+ }
1914
+
1915
+ break;
1916
+ }
1917
+
1918
+ /* read until EOF */
1919
+ case s_body_identity_eof:
1920
+ MARK(body);
1921
+ p = data + len - 1;
1922
+
1923
+ break;
1924
+
1925
+ case s_message_done:
1926
+ UPDATE_STATE(NEW_MESSAGE());
1927
+ CALLBACK_NOTIFY(message_complete);
1928
+ if (parser->upgrade) {
1929
+ /* Exit, the rest of the message is in a different protocol. */
1930
+ RETURN((p - data) + 1);
1931
+ }
1932
+ break;
1933
+
1934
+ case s_chunk_size_start:
1935
+ {
1936
+ assert(parser->nread == 1);
1937
+ assert(parser->flags & F_CHUNKED);
1938
+
1939
+ unhex_val = unhex[(unsigned char)ch];
1940
+ if (UNLIKELY(unhex_val == -1)) {
1941
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1942
+ goto error;
1943
+ }
1944
+
1945
+ parser->content_length = unhex_val;
1946
+ UPDATE_STATE(s_chunk_size);
1947
+ break;
1948
+ }
1949
+
1950
+ case s_chunk_size:
1951
+ {
1952
+ uint64_t t;
1953
+
1954
+ assert(parser->flags & F_CHUNKED);
1955
+
1956
+ if (ch == CR) {
1957
+ UPDATE_STATE(s_chunk_size_almost_done);
1958
+ break;
1959
+ }
1960
+
1961
+ unhex_val = unhex[(unsigned char)ch];
1962
+
1963
+ if (unhex_val == -1) {
1964
+ if (ch == ';' || ch == ' ') {
1965
+ UPDATE_STATE(s_chunk_parameters);
1966
+ break;
1967
+ }
1968
+
1969
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1970
+ goto error;
1971
+ }
1972
+
1973
+ t = parser->content_length;
1974
+ t *= 16;
1975
+ t += unhex_val;
1976
+
1977
+ /* Overflow? Test against a conservative limit for simplicity. */
1978
+ if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1979
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1980
+ goto error;
1981
+ }
1982
+
1983
+ parser->content_length = t;
1984
+ break;
1985
+ }
1986
+
1987
+ case s_chunk_parameters:
1988
+ {
1989
+ assert(parser->flags & F_CHUNKED);
1990
+ /* just ignore this shit. TODO check for overflow */
1991
+ if (ch == CR) {
1992
+ UPDATE_STATE(s_chunk_size_almost_done);
1993
+ break;
1994
+ }
1995
+ break;
1996
+ }
1997
+
1998
+ case s_chunk_size_almost_done:
1999
+ {
2000
+ assert(parser->flags & F_CHUNKED);
2001
+ STRICT_CHECK(ch != LF);
2002
+
2003
+ parser->nread = 0;
2004
+
2005
+ if (parser->content_length == 0) {
2006
+ parser->flags |= F_TRAILING;
2007
+ UPDATE_STATE(s_header_field_start);
2008
+ } else {
2009
+ UPDATE_STATE(s_chunk_data);
2010
+ }
2011
+ CALLBACK_NOTIFY(chunk_header);
2012
+ break;
2013
+ }
2014
+
2015
+ case s_chunk_data:
2016
+ {
2017
+ uint64_t to_read = MIN(parser->content_length,
2018
+ (uint64_t) ((data + len) - p));
2019
+
2020
+ assert(parser->flags & F_CHUNKED);
2021
+ assert(parser->content_length != 0
2022
+ && parser->content_length != ULLONG_MAX);
2023
+
2024
+ /* See the explanation in s_body_identity for why the content
2025
+ * length and data pointers are managed this way.
2026
+ */
2027
+ MARK(body);
2028
+ parser->content_length -= to_read;
2029
+ p += to_read - 1;
2030
+
2031
+ if (parser->content_length == 0) {
2032
+ UPDATE_STATE(s_chunk_data_almost_done);
2033
+ }
2034
+
2035
+ break;
2036
+ }
2037
+
2038
+ case s_chunk_data_almost_done:
2039
+ assert(parser->flags & F_CHUNKED);
2040
+ assert(parser->content_length == 0);
2041
+ STRICT_CHECK(ch != CR);
2042
+ UPDATE_STATE(s_chunk_data_done);
2043
+ CALLBACK_DATA(body);
2044
+ break;
2045
+
2046
+ case s_chunk_data_done:
2047
+ assert(parser->flags & F_CHUNKED);
2048
+ STRICT_CHECK(ch != LF);
2049
+ parser->nread = 0;
2050
+ UPDATE_STATE(s_chunk_size_start);
2051
+ CALLBACK_NOTIFY(chunk_complete);
2052
+ break;
2053
+
2054
+ default:
2055
+ assert(0 && "unhandled state");
2056
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2057
+ goto error;
2058
+ }
2059
+ }
2060
+
2061
+ /* Run callbacks for any marks that we have leftover after we ran our of
2062
+ * bytes. There should be at most one of these set, so it's OK to invoke
2063
+ * them in series (unset marks will not result in callbacks).
2064
+ *
2065
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
2066
+ * overflowed 'data' and this allows us to correct for the off-by-one that
2067
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2068
+ * value that's in-bounds).
2069
+ */
2070
+
2071
+ assert(((header_field_mark ? 1 : 0) +
2072
+ (header_value_mark ? 1 : 0) +
2073
+ (url_mark ? 1 : 0) +
2074
+ (body_mark ? 1 : 0) +
2075
+ (status_mark ? 1 : 0)) <= 1);
2076
+
2077
+ CALLBACK_DATA_NOADVANCE(header_field);
2078
+ CALLBACK_DATA_NOADVANCE(header_value);
2079
+ CALLBACK_DATA_NOADVANCE(url);
2080
+ CALLBACK_DATA_NOADVANCE(body);
2081
+ CALLBACK_DATA_NOADVANCE(status);
2082
+
2083
+ RETURN(len);
2084
+
2085
+ error:
2086
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2087
+ SET_ERRNO(HPE_UNKNOWN);
2088
+ }
2089
+
2090
+ RETURN(p - data);
2091
+ }
2092
+
2093
+
2094
+ /* Does the parser need to see an EOF to find the end of the message? */
2095
+ int
2096
+ http_message_needs_eof (const http_parser *parser)
2097
+ {
2098
+ if (parser->type == HTTP_REQUEST) {
2099
+ return 0;
2100
+ }
2101
+
2102
+ /* See RFC 2616 section 4.4 */
2103
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2104
+ parser->status_code == 204 || /* No Content */
2105
+ parser->status_code == 304 || /* Not Modified */
2106
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2107
+ return 0;
2108
+ }
2109
+
2110
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2111
+ return 0;
2112
+ }
2113
+
2114
+ return 1;
2115
+ }
2116
+
2117
+
2118
+ int
2119
+ http_should_keep_alive (const http_parser *parser)
2120
+ {
2121
+ if (parser->http_major > 0 && parser->http_minor > 0) {
2122
+ /* HTTP/1.1 */
2123
+ if (parser->flags & F_CONNECTION_CLOSE) {
2124
+ return 0;
2125
+ }
2126
+ } else {
2127
+ /* HTTP/1.0 or earlier */
2128
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2129
+ return 0;
2130
+ }
2131
+ }
2132
+
2133
+ return !http_message_needs_eof(parser);
2134
+ }
2135
+
2136
+
2137
+ const char *
2138
+ http_method_str (enum http_method m)
2139
+ {
2140
+ return ELEM_AT(method_strings, m, "<unknown>");
2141
+ }
2142
+
2143
+
2144
+ void
2145
+ http_parser_init (http_parser *parser, enum http_parser_type t)
2146
+ {
2147
+ void *data = parser->data; /* preserve application data */
2148
+ memset(parser, 0, sizeof(*parser));
2149
+ parser->data = data;
2150
+ parser->type = t;
2151
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2152
+ parser->http_errno = HPE_OK;
2153
+ }
2154
+
2155
+ void
2156
+ http_parser_settings_init(http_parser_settings *settings)
2157
+ {
2158
+ memset(settings, 0, sizeof(*settings));
2159
+ }
2160
+
2161
+ const char *
2162
+ http_errno_name(enum http_errno err) {
2163
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2164
+ return http_strerror_tab[err].name;
2165
+ }
2166
+
2167
+ const char *
2168
+ http_errno_description(enum http_errno err) {
2169
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2170
+ return http_strerror_tab[err].description;
2171
+ }
2172
+
2173
+ static enum http_host_state
2174
+ http_parse_host_char(enum http_host_state s, const char ch) {
2175
+ switch(s) {
2176
+ case s_http_userinfo:
2177
+ case s_http_userinfo_start:
2178
+ if (ch == '@') {
2179
+ return s_http_host_start;
2180
+ }
2181
+
2182
+ if (IS_USERINFO_CHAR(ch)) {
2183
+ return s_http_userinfo;
2184
+ }
2185
+ break;
2186
+
2187
+ case s_http_host_start:
2188
+ if (ch == '[') {
2189
+ return s_http_host_v6_start;
2190
+ }
2191
+
2192
+ if (IS_HOST_CHAR(ch)) {
2193
+ return s_http_host;
2194
+ }
2195
+
2196
+ break;
2197
+
2198
+ case s_http_host:
2199
+ if (IS_HOST_CHAR(ch)) {
2200
+ return s_http_host;
2201
+ }
2202
+
2203
+ /* FALLTHROUGH */
2204
+ case s_http_host_v6_end:
2205
+ if (ch == ':') {
2206
+ return s_http_host_port_start;
2207
+ }
2208
+
2209
+ break;
2210
+
2211
+ case s_http_host_v6:
2212
+ if (ch == ']') {
2213
+ return s_http_host_v6_end;
2214
+ }
2215
+
2216
+ /* FALLTHROUGH */
2217
+ case s_http_host_v6_start:
2218
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
2219
+ return s_http_host_v6;
2220
+ }
2221
+
2222
+ if (s == s_http_host_v6 && ch == '%') {
2223
+ return s_http_host_v6_zone_start;
2224
+ }
2225
+ break;
2226
+
2227
+ case s_http_host_v6_zone:
2228
+ if (ch == ']') {
2229
+ return s_http_host_v6_end;
2230
+ }
2231
+
2232
+ /* FALLTHROUGH */
2233
+ case s_http_host_v6_zone_start:
2234
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2235
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2236
+ ch == '~') {
2237
+ return s_http_host_v6_zone;
2238
+ }
2239
+ break;
2240
+
2241
+ case s_http_host_port:
2242
+ case s_http_host_port_start:
2243
+ if (IS_NUM(ch)) {
2244
+ return s_http_host_port;
2245
+ }
2246
+
2247
+ break;
2248
+
2249
+ default:
2250
+ break;
2251
+ }
2252
+ return s_http_host_dead;
2253
+ }
2254
+
2255
+ static int
2256
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2257
+ enum http_host_state s;
2258
+
2259
+ const char *p;
2260
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2261
+
2262
+ assert(u->field_set & (1 << UF_HOST));
2263
+
2264
+ u->field_data[UF_HOST].len = 0;
2265
+
2266
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2267
+
2268
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2269
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2270
+
2271
+ if (new_s == s_http_host_dead) {
2272
+ return 1;
2273
+ }
2274
+
2275
+ switch(new_s) {
2276
+ case s_http_host:
2277
+ if (s != s_http_host) {
2278
+ u->field_data[UF_HOST].off = p - buf;
2279
+ }
2280
+ u->field_data[UF_HOST].len++;
2281
+ break;
2282
+
2283
+ case s_http_host_v6:
2284
+ if (s != s_http_host_v6) {
2285
+ u->field_data[UF_HOST].off = p - buf;
2286
+ }
2287
+ u->field_data[UF_HOST].len++;
2288
+ break;
2289
+
2290
+ case s_http_host_v6_zone_start:
2291
+ case s_http_host_v6_zone:
2292
+ u->field_data[UF_HOST].len++;
2293
+ break;
2294
+
2295
+ case s_http_host_port:
2296
+ if (s != s_http_host_port) {
2297
+ u->field_data[UF_PORT].off = p - buf;
2298
+ u->field_data[UF_PORT].len = 0;
2299
+ u->field_set |= (1 << UF_PORT);
2300
+ }
2301
+ u->field_data[UF_PORT].len++;
2302
+ break;
2303
+
2304
+ case s_http_userinfo:
2305
+ if (s != s_http_userinfo) {
2306
+ u->field_data[UF_USERINFO].off = p - buf ;
2307
+ u->field_data[UF_USERINFO].len = 0;
2308
+ u->field_set |= (1 << UF_USERINFO);
2309
+ }
2310
+ u->field_data[UF_USERINFO].len++;
2311
+ break;
2312
+
2313
+ default:
2314
+ break;
2315
+ }
2316
+ s = new_s;
2317
+ }
2318
+
2319
+ /* Make sure we don't end somewhere unexpected */
2320
+ switch (s) {
2321
+ case s_http_host_start:
2322
+ case s_http_host_v6_start:
2323
+ case s_http_host_v6:
2324
+ case s_http_host_v6_zone_start:
2325
+ case s_http_host_v6_zone:
2326
+ case s_http_host_port_start:
2327
+ case s_http_userinfo:
2328
+ case s_http_userinfo_start:
2329
+ return 1;
2330
+ default:
2331
+ break;
2332
+ }
2333
+
2334
+ return 0;
2335
+ }
2336
+
2337
+ void
2338
+ http_parser_url_init(struct http_parser_url *u) {
2339
+ memset(u, 0, sizeof(*u));
2340
+ }
2341
+
2342
+ int
2343
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2344
+ struct http_parser_url *u)
2345
+ {
2346
+ enum state s;
2347
+ const char *p;
2348
+ enum http_parser_url_fields uf, old_uf;
2349
+ int found_at = 0;
2350
+
2351
+ u->port = u->field_set = 0;
2352
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2353
+ old_uf = UF_MAX;
2354
+
2355
+ for (p = buf; p < buf + buflen; p++) {
2356
+ s = parse_url_char(s, *p);
2357
+
2358
+ /* Figure out the next field that we're operating on */
2359
+ switch (s) {
2360
+ case s_dead:
2361
+ return 1;
2362
+
2363
+ /* Skip delimeters */
2364
+ case s_req_schema_slash:
2365
+ case s_req_schema_slash_slash:
2366
+ case s_req_server_start:
2367
+ case s_req_query_string_start:
2368
+ case s_req_fragment_start:
2369
+ continue;
2370
+
2371
+ case s_req_schema:
2372
+ uf = UF_SCHEMA;
2373
+ break;
2374
+
2375
+ case s_req_server_with_at:
2376
+ found_at = 1;
2377
+
2378
+ /* FALLTROUGH */
2379
+ case s_req_server:
2380
+ uf = UF_HOST;
2381
+ break;
2382
+
2383
+ case s_req_path:
2384
+ uf = UF_PATH;
2385
+ break;
2386
+
2387
+ case s_req_query_string:
2388
+ uf = UF_QUERY;
2389
+ break;
2390
+
2391
+ case s_req_fragment:
2392
+ uf = UF_FRAGMENT;
2393
+ break;
2394
+
2395
+ default:
2396
+ assert(!"Unexpected state");
2397
+ return 1;
2398
+ }
2399
+
2400
+ /* Nothing's changed; soldier on */
2401
+ if (uf == old_uf) {
2402
+ u->field_data[uf].len++;
2403
+ continue;
2404
+ }
2405
+
2406
+ u->field_data[uf].off = p - buf;
2407
+ u->field_data[uf].len = 1;
2408
+
2409
+ u->field_set |= (1 << uf);
2410
+ old_uf = uf;
2411
+ }
2412
+
2413
+ /* host must be present if there is a schema */
2414
+ /* parsing http:///toto will fail */
2415
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
2416
+ (u->field_set & (1 << UF_HOST)) == 0) {
2417
+ return 1;
2418
+ }
2419
+
2420
+ if (u->field_set & (1 << UF_HOST)) {
2421
+ if (http_parse_host(buf, u, found_at) != 0) {
2422
+ return 1;
2423
+ }
2424
+ }
2425
+
2426
+ /* CONNECT requests can only contain "hostname:port" */
2427
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2428
+ return 1;
2429
+ }
2430
+
2431
+ if (u->field_set & (1 << UF_PORT)) {
2432
+ /* Don't bother with endp; we've already validated the string */
2433
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2434
+
2435
+ /* Ports have a max value of 2^16 */
2436
+ if (v > 0xffff) {
2437
+ return 1;
2438
+ }
2439
+
2440
+ u->port = (uint16_t) v;
2441
+ }
2442
+
2443
+ return 0;
2444
+ }
2445
+
2446
+ void
2447
+ http_parser_pause(http_parser *parser, int paused) {
2448
+ /* Users should only be pausing/unpausing a parser that is not in an error
2449
+ * state. In non-debug builds, there's not much that we can do about this
2450
+ * other than ignore it.
2451
+ */
2452
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2453
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2454
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2455
+ } else {
2456
+ assert(0 && "Attempting to pause parser in error state");
2457
+ }
2458
+ }
2459
+
2460
+ int
2461
+ http_body_is_final(const struct http_parser *parser) {
2462
+ return parser->state == s_message_done;
2463
+ }
2464
+
2465
+ unsigned long
2466
+ http_parser_version(void) {
2467
+ return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2468
+ HTTP_PARSER_VERSION_MINOR * 0x00100 |
2469
+ HTTP_PARSER_VERSION_PATCH * 0x00001;
2470
+ }