http_parser.rb 0.6.0.beta.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/linux.yml +23 -0
- data/.github/workflows/windows.yml +23 -0
- data/.gitignore +5 -4
- data/.gitmodules +2 -2
- data/README.md +2 -2
- data/Rakefile +4 -2
- data/ext/ruby_http_parser/extconf.rb +1 -1
- data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +86 -52
- data/ext/ruby_http_parser/ruby_http_parser.c +53 -7
- data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +37 -1
- data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +1 -5
- data/ext/ruby_http_parser/vendor/http-parser/Makefile +110 -8
- data/ext/ruby_http_parser/vendor/http-parser/README.md +105 -37
- data/ext/ruby_http_parser/vendor/http-parser/bench.c +128 -0
- data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +157 -0
- data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
- data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +892 -510
- data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +34 -2
- data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +198 -77
- data/ext/ruby_http_parser/vendor/http-parser/test.c +1781 -201
- data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +271 -154
- data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +48 -61
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +5 -3
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +37 -104
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +116 -101
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +9 -5
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +1 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/test.c +579 -153
- data/http_parser.rb.gemspec +14 -9
- data/spec/parser_spec.rb +177 -99
- data/spec/support/requests.json +2 -2
- data/spec/support/responses.json +20 -0
- data/tasks/spec.rake +1 -1
- metadata +131 -162
- data/Gemfile.lock +0 -39
- data/ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS +0 -4
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPHeadersCompleteCallback.java +0 -13
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPHeadersCompleteCallback.java +0 -12
@@ -37,19 +37,24 @@
|
|
37
37
|
# define MIN(a,b) ((a) < (b) ? (a) : (b))
|
38
38
|
#endif
|
39
39
|
|
40
|
+
#ifndef ARRAY_SIZE
|
41
|
+
# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
42
|
+
#endif
|
43
|
+
|
44
|
+
#ifndef BIT_AT
|
45
|
+
# define BIT_AT(a, i) \
|
46
|
+
(!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
|
47
|
+
(1 << ((unsigned int) (i) & 7))))
|
48
|
+
#endif
|
49
|
+
|
50
|
+
#ifndef ELEM_AT
|
51
|
+
# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
|
52
|
+
#endif
|
40
53
|
|
41
|
-
#if HTTP_PARSER_DEBUG
|
42
|
-
#define SET_ERRNO(e) \
|
43
|
-
do { \
|
44
|
-
parser->http_errno = (e); \
|
45
|
-
parser->error_lineno = __LINE__; \
|
46
|
-
} while (0)
|
47
|
-
#else
|
48
54
|
#define SET_ERRNO(e) \
|
49
55
|
do { \
|
50
56
|
parser->http_errno = (e); \
|
51
57
|
} while(0)
|
52
|
-
#endif
|
53
58
|
|
54
59
|
|
55
60
|
/* Run the notify callback FOR, returning ER if it fails */
|
@@ -94,7 +99,7 @@ do { \
|
|
94
99
|
FOR##_mark = NULL; \
|
95
100
|
} \
|
96
101
|
} while (0)
|
97
|
-
|
102
|
+
|
98
103
|
/* Run the data callback FOR and consume the current byte */
|
99
104
|
#define CALLBACK_DATA(FOR) \
|
100
105
|
CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
|
@@ -123,31 +128,10 @@ do { \
|
|
123
128
|
|
124
129
|
|
125
130
|
static const char *method_strings[] =
|
126
|
-
{
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
, "PUT"
|
131
|
-
, "CONNECT"
|
132
|
-
, "OPTIONS"
|
133
|
-
, "TRACE"
|
134
|
-
, "COPY"
|
135
|
-
, "LOCK"
|
136
|
-
, "MKCOL"
|
137
|
-
, "MOVE"
|
138
|
-
, "PROPFIND"
|
139
|
-
, "PROPPATCH"
|
140
|
-
, "UNLOCK"
|
141
|
-
, "REPORT"
|
142
|
-
, "MKACTIVITY"
|
143
|
-
, "CHECKOUT"
|
144
|
-
, "MERGE"
|
145
|
-
, "M-SEARCH"
|
146
|
-
, "NOTIFY"
|
147
|
-
, "SUBSCRIBE"
|
148
|
-
, "UNSUBSCRIBE"
|
149
|
-
, "PATCH"
|
150
|
-
, "PURGE"
|
131
|
+
{
|
132
|
+
#define XX(num, name, string) #string,
|
133
|
+
HTTP_METHOD_MAP(XX)
|
134
|
+
#undef XX
|
151
135
|
};
|
152
136
|
|
153
137
|
|
@@ -205,40 +189,48 @@ static const int8_t unhex[256] =
|
|
205
189
|
};
|
206
190
|
|
207
191
|
|
208
|
-
|
192
|
+
#if HTTP_PARSER_STRICT
|
193
|
+
# define T(v) 0
|
194
|
+
#else
|
195
|
+
# define T(v) v
|
196
|
+
#endif
|
197
|
+
|
198
|
+
|
199
|
+
static const uint8_t normal_url_char[32] = {
|
209
200
|
/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
|
210
|
-
0
|
201
|
+
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
|
211
202
|
/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
|
212
|
-
0
|
203
|
+
0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
|
213
204
|
/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
|
214
|
-
0
|
205
|
+
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
|
215
206
|
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
|
216
|
-
0
|
207
|
+
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
|
217
208
|
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
|
218
|
-
0
|
209
|
+
0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
|
219
210
|
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
|
220
|
-
1,
|
211
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
221
212
|
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
|
222
|
-
1,
|
213
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
223
214
|
/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
|
224
|
-
1
|
215
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
|
225
216
|
/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
|
226
|
-
1,
|
217
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
227
218
|
/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
|
228
|
-
1,
|
219
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
229
220
|
/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
|
230
|
-
1,
|
221
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
231
222
|
/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
|
232
|
-
1,
|
223
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
233
224
|
/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
|
234
|
-
1,
|
225
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
235
226
|
/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
|
236
|
-
1,
|
227
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
237
228
|
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
|
238
|
-
1,
|
229
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
239
230
|
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
|
240
|
-
1
|
231
|
+
1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
|
241
232
|
|
233
|
+
#undef T
|
242
234
|
|
243
235
|
enum state
|
244
236
|
{ s_dead = 1 /* important that this is > 0 */
|
@@ -266,13 +258,9 @@ enum state
|
|
266
258
|
, s_req_schema
|
267
259
|
, s_req_schema_slash
|
268
260
|
, s_req_schema_slash_slash
|
269
|
-
,
|
270
|
-
,
|
271
|
-
,
|
272
|
-
, s_req_host_v6_end
|
273
|
-
, s_req_host
|
274
|
-
, s_req_port_start
|
275
|
-
, s_req_port
|
261
|
+
, s_req_server_start
|
262
|
+
, s_req_server
|
263
|
+
, s_req_server_with_at
|
276
264
|
, s_req_path
|
277
265
|
, s_req_query_string_start
|
278
266
|
, s_req_query_string
|
@@ -350,6 +338,19 @@ enum header_states
|
|
350
338
|
, h_connection_close
|
351
339
|
};
|
352
340
|
|
341
|
+
enum http_host_state
|
342
|
+
{
|
343
|
+
s_http_host_dead = 1
|
344
|
+
, s_http_userinfo_start
|
345
|
+
, s_http_userinfo
|
346
|
+
, s_http_host_start
|
347
|
+
, s_http_host_v6_start
|
348
|
+
, s_http_host
|
349
|
+
, s_http_host_v6
|
350
|
+
, s_http_host_v6_end
|
351
|
+
, s_http_host_port_start
|
352
|
+
, s_http_host_port
|
353
|
+
};
|
353
354
|
|
354
355
|
/* Macros for character classes; depends on strict-mode */
|
355
356
|
#define CR '\r'
|
@@ -359,15 +360,21 @@ enum header_states
|
|
359
360
|
#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
|
360
361
|
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
|
361
362
|
#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
|
363
|
+
#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
|
364
|
+
(c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
|
365
|
+
(c) == ')')
|
366
|
+
#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
|
367
|
+
(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
|
368
|
+
(c) == '$' || (c) == ',')
|
362
369
|
|
363
370
|
#if HTTP_PARSER_STRICT
|
364
371
|
#define TOKEN(c) (tokens[(unsigned char)c])
|
365
|
-
#define IS_URL_CHAR(c) (normal_url_char
|
372
|
+
#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
|
366
373
|
#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
|
367
374
|
#else
|
368
375
|
#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
|
369
376
|
#define IS_URL_CHAR(c) \
|
370
|
-
(normal_url_char
|
377
|
+
(BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
|
371
378
|
#define IS_HOST_CHAR(c) \
|
372
379
|
(IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
|
373
380
|
#endif
|
@@ -401,7 +408,7 @@ static struct {
|
|
401
408
|
};
|
402
409
|
#undef HTTP_STRERROR_GEN
|
403
410
|
|
404
|
-
int http_message_needs_eof(http_parser *parser);
|
411
|
+
int http_message_needs_eof(const http_parser *parser);
|
405
412
|
|
406
413
|
/* Our URL parser.
|
407
414
|
*
|
@@ -417,7 +424,15 @@ int http_message_needs_eof(http_parser *parser);
|
|
417
424
|
static enum state
|
418
425
|
parse_url_char(enum state s, const char ch)
|
419
426
|
{
|
420
|
-
|
427
|
+
if (ch == ' ' || ch == '\r' || ch == '\n') {
|
428
|
+
return s_dead;
|
429
|
+
}
|
430
|
+
|
431
|
+
#if HTTP_PARSER_STRICT
|
432
|
+
if (ch == '\t' || ch == '\f') {
|
433
|
+
return s_dead;
|
434
|
+
}
|
435
|
+
#endif
|
421
436
|
|
422
437
|
switch (s) {
|
423
438
|
case s_req_spaces_before_url:
|
@@ -455,67 +470,33 @@ parse_url_char(enum state s, const char ch)
|
|
455
470
|
|
456
471
|
case s_req_schema_slash_slash:
|
457
472
|
if (ch == '/') {
|
458
|
-
return
|
459
|
-
}
|
460
|
-
|
461
|
-
break;
|
462
|
-
|
463
|
-
case s_req_host_start:
|
464
|
-
if (ch == '[') {
|
465
|
-
return s_req_host_v6_start;
|
466
|
-
}
|
467
|
-
|
468
|
-
if (IS_HOST_CHAR(ch)) {
|
469
|
-
return s_req_host;
|
473
|
+
return s_req_server_start;
|
470
474
|
}
|
471
475
|
|
472
476
|
break;
|
473
477
|
|
474
|
-
case
|
475
|
-
if (
|
476
|
-
return
|
477
|
-
}
|
478
|
-
|
479
|
-
/* FALLTHROUGH */
|
480
|
-
case s_req_host_v6_end:
|
481
|
-
switch (ch) {
|
482
|
-
case ':':
|
483
|
-
return s_req_port_start;
|
484
|
-
|
485
|
-
case '/':
|
486
|
-
return s_req_path;
|
487
|
-
|
488
|
-
case '?':
|
489
|
-
return s_req_query_string_start;
|
478
|
+
case s_req_server_with_at:
|
479
|
+
if (ch == '@') {
|
480
|
+
return s_dead;
|
490
481
|
}
|
491
482
|
|
492
|
-
|
493
|
-
|
494
|
-
case
|
495
|
-
if (ch == '
|
496
|
-
return
|
483
|
+
/* FALLTHROUGH */
|
484
|
+
case s_req_server_start:
|
485
|
+
case s_req_server:
|
486
|
+
if (ch == '/') {
|
487
|
+
return s_req_path;
|
497
488
|
}
|
498
489
|
|
499
|
-
|
500
|
-
|
501
|
-
if (IS_HEX(ch) || ch == ':') {
|
502
|
-
return s_req_host_v6;
|
490
|
+
if (ch == '?') {
|
491
|
+
return s_req_query_string_start;
|
503
492
|
}
|
504
|
-
break;
|
505
493
|
|
506
|
-
|
507
|
-
|
508
|
-
case '/':
|
509
|
-
return s_req_path;
|
510
|
-
|
511
|
-
case '?':
|
512
|
-
return s_req_query_string_start;
|
494
|
+
if (ch == '@') {
|
495
|
+
return s_req_server_with_at;
|
513
496
|
}
|
514
497
|
|
515
|
-
|
516
|
-
|
517
|
-
if (IS_NUM(ch)) {
|
518
|
-
return s_req_port;
|
498
|
+
if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
|
499
|
+
return s_req_server;
|
519
500
|
}
|
520
501
|
|
521
502
|
break;
|
@@ -637,13 +618,9 @@ size_t http_parser_execute (http_parser *parser,
|
|
637
618
|
case s_req_schema:
|
638
619
|
case s_req_schema_slash:
|
639
620
|
case s_req_schema_slash_slash:
|
640
|
-
case
|
641
|
-
case
|
642
|
-
case
|
643
|
-
case s_req_host_v6_end:
|
644
|
-
case s_req_host:
|
645
|
-
case s_req_port_start:
|
646
|
-
case s_req_port:
|
621
|
+
case s_req_server_start:
|
622
|
+
case s_req_server:
|
623
|
+
case s_req_server_with_at:
|
647
624
|
case s_req_query_string_start:
|
648
625
|
case s_req_query_string:
|
649
626
|
case s_req_fragment_start:
|
@@ -889,6 +866,7 @@ size_t http_parser_execute (http_parser *parser,
|
|
889
866
|
case s_res_line_almost_done:
|
890
867
|
STRICT_CHECK(ch != LF);
|
891
868
|
parser->state = s_header_field_start;
|
869
|
+
CALLBACK_NOTIFY(status_complete);
|
892
870
|
break;
|
893
871
|
|
894
872
|
case s_start_req:
|
@@ -918,7 +896,7 @@ size_t http_parser_execute (http_parser *parser,
|
|
918
896
|
/* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
|
919
897
|
break;
|
920
898
|
case 'R': parser->method = HTTP_REPORT; break;
|
921
|
-
case 'S': parser->method = HTTP_SUBSCRIBE; break;
|
899
|
+
case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
|
922
900
|
case 'T': parser->method = HTTP_TRACE; break;
|
923
901
|
case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
|
924
902
|
default:
|
@@ -965,6 +943,12 @@ size_t http_parser_execute (http_parser *parser,
|
|
965
943
|
} else {
|
966
944
|
goto error;
|
967
945
|
}
|
946
|
+
} else if (parser->method == HTTP_SUBSCRIBE) {
|
947
|
+
if (parser->index == 1 && ch == 'E') {
|
948
|
+
parser->method = HTTP_SEARCH;
|
949
|
+
} else {
|
950
|
+
goto error;
|
951
|
+
}
|
968
952
|
} else if (parser->index == 1 && parser->method == HTTP_POST) {
|
969
953
|
if (ch == 'R') {
|
970
954
|
parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
|
@@ -998,7 +982,7 @@ size_t http_parser_execute (http_parser *parser,
|
|
998
982
|
|
999
983
|
MARK(url);
|
1000
984
|
if (parser->method == HTTP_CONNECT) {
|
1001
|
-
parser->state =
|
985
|
+
parser->state = s_req_server_start;
|
1002
986
|
}
|
1003
987
|
|
1004
988
|
parser->state = parse_url_char((enum state)parser->state, ch);
|
@@ -1013,10 +997,7 @@ size_t http_parser_execute (http_parser *parser,
|
|
1013
997
|
case s_req_schema:
|
1014
998
|
case s_req_schema_slash:
|
1015
999
|
case s_req_schema_slash_slash:
|
1016
|
-
case
|
1017
|
-
case s_req_host_v6_start:
|
1018
|
-
case s_req_host_v6:
|
1019
|
-
case s_req_port_start:
|
1000
|
+
case s_req_server_start:
|
1020
1001
|
{
|
1021
1002
|
switch (ch) {
|
1022
1003
|
/* No whitespace allowed here */
|
@@ -1036,9 +1017,8 @@ size_t http_parser_execute (http_parser *parser,
|
|
1036
1017
|
break;
|
1037
1018
|
}
|
1038
1019
|
|
1039
|
-
case
|
1040
|
-
case
|
1041
|
-
case s_req_port:
|
1020
|
+
case s_req_server:
|
1021
|
+
case s_req_server_with_at:
|
1042
1022
|
case s_req_path:
|
1043
1023
|
case s_req_query_string_start:
|
1044
1024
|
case s_req_query_string:
|
@@ -1867,7 +1847,7 @@ error:
|
|
1867
1847
|
|
1868
1848
|
/* Does the parser need to see an EOF to find the end of the message? */
|
1869
1849
|
int
|
1870
|
-
http_message_needs_eof (http_parser *parser)
|
1850
|
+
http_message_needs_eof (const http_parser *parser)
|
1871
1851
|
{
|
1872
1852
|
if (parser->type == HTTP_REQUEST) {
|
1873
1853
|
return 0;
|
@@ -1890,7 +1870,7 @@ http_message_needs_eof (http_parser *parser)
|
|
1890
1870
|
|
1891
1871
|
|
1892
1872
|
int
|
1893
|
-
http_should_keep_alive (http_parser *parser)
|
1873
|
+
http_should_keep_alive (const http_parser *parser)
|
1894
1874
|
{
|
1895
1875
|
if (parser->http_major > 0 && parser->http_minor > 0) {
|
1896
1876
|
/* HTTP/1.1 */
|
@@ -1908,9 +1888,10 @@ http_should_keep_alive (http_parser *parser)
|
|
1908
1888
|
}
|
1909
1889
|
|
1910
1890
|
|
1911
|
-
const char *
|
1891
|
+
const char *
|
1892
|
+
http_method_str (enum http_method m)
|
1912
1893
|
{
|
1913
|
-
return method_strings
|
1894
|
+
return ELEM_AT(method_strings, m, "<unknown>");
|
1914
1895
|
}
|
1915
1896
|
|
1916
1897
|
|
@@ -1937,6 +1918,144 @@ http_errno_description(enum http_errno err) {
|
|
1937
1918
|
return http_strerror_tab[err].description;
|
1938
1919
|
}
|
1939
1920
|
|
1921
|
+
static enum http_host_state
|
1922
|
+
http_parse_host_char(enum http_host_state s, const char ch) {
|
1923
|
+
switch(s) {
|
1924
|
+
case s_http_userinfo:
|
1925
|
+
case s_http_userinfo_start:
|
1926
|
+
if (ch == '@') {
|
1927
|
+
return s_http_host_start;
|
1928
|
+
}
|
1929
|
+
|
1930
|
+
if (IS_USERINFO_CHAR(ch)) {
|
1931
|
+
return s_http_userinfo;
|
1932
|
+
}
|
1933
|
+
break;
|
1934
|
+
|
1935
|
+
case s_http_host_start:
|
1936
|
+
if (ch == '[') {
|
1937
|
+
return s_http_host_v6_start;
|
1938
|
+
}
|
1939
|
+
|
1940
|
+
if (IS_HOST_CHAR(ch)) {
|
1941
|
+
return s_http_host;
|
1942
|
+
}
|
1943
|
+
|
1944
|
+
break;
|
1945
|
+
|
1946
|
+
case s_http_host:
|
1947
|
+
if (IS_HOST_CHAR(ch)) {
|
1948
|
+
return s_http_host;
|
1949
|
+
}
|
1950
|
+
|
1951
|
+
/* FALLTHROUGH */
|
1952
|
+
case s_http_host_v6_end:
|
1953
|
+
if (ch == ':') {
|
1954
|
+
return s_http_host_port_start;
|
1955
|
+
}
|
1956
|
+
|
1957
|
+
break;
|
1958
|
+
|
1959
|
+
case s_http_host_v6:
|
1960
|
+
if (ch == ']') {
|
1961
|
+
return s_http_host_v6_end;
|
1962
|
+
}
|
1963
|
+
|
1964
|
+
/* FALLTHROUGH */
|
1965
|
+
case s_http_host_v6_start:
|
1966
|
+
if (IS_HEX(ch) || ch == ':' || ch == '.') {
|
1967
|
+
return s_http_host_v6;
|
1968
|
+
}
|
1969
|
+
|
1970
|
+
break;
|
1971
|
+
|
1972
|
+
case s_http_host_port:
|
1973
|
+
case s_http_host_port_start:
|
1974
|
+
if (IS_NUM(ch)) {
|
1975
|
+
return s_http_host_port;
|
1976
|
+
}
|
1977
|
+
|
1978
|
+
break;
|
1979
|
+
|
1980
|
+
default:
|
1981
|
+
break;
|
1982
|
+
}
|
1983
|
+
return s_http_host_dead;
|
1984
|
+
}
|
1985
|
+
|
1986
|
+
static int
|
1987
|
+
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
|
1988
|
+
enum http_host_state s;
|
1989
|
+
|
1990
|
+
const char *p;
|
1991
|
+
size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
|
1992
|
+
|
1993
|
+
u->field_data[UF_HOST].len = 0;
|
1994
|
+
|
1995
|
+
s = found_at ? s_http_userinfo_start : s_http_host_start;
|
1996
|
+
|
1997
|
+
for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
|
1998
|
+
enum http_host_state new_s = http_parse_host_char(s, *p);
|
1999
|
+
|
2000
|
+
if (new_s == s_http_host_dead) {
|
2001
|
+
return 1;
|
2002
|
+
}
|
2003
|
+
|
2004
|
+
switch(new_s) {
|
2005
|
+
case s_http_host:
|
2006
|
+
if (s != s_http_host) {
|
2007
|
+
u->field_data[UF_HOST].off = p - buf;
|
2008
|
+
}
|
2009
|
+
u->field_data[UF_HOST].len++;
|
2010
|
+
break;
|
2011
|
+
|
2012
|
+
case s_http_host_v6:
|
2013
|
+
if (s != s_http_host_v6) {
|
2014
|
+
u->field_data[UF_HOST].off = p - buf;
|
2015
|
+
}
|
2016
|
+
u->field_data[UF_HOST].len++;
|
2017
|
+
break;
|
2018
|
+
|
2019
|
+
case s_http_host_port:
|
2020
|
+
if (s != s_http_host_port) {
|
2021
|
+
u->field_data[UF_PORT].off = p - buf;
|
2022
|
+
u->field_data[UF_PORT].len = 0;
|
2023
|
+
u->field_set |= (1 << UF_PORT);
|
2024
|
+
}
|
2025
|
+
u->field_data[UF_PORT].len++;
|
2026
|
+
break;
|
2027
|
+
|
2028
|
+
case s_http_userinfo:
|
2029
|
+
if (s != s_http_userinfo) {
|
2030
|
+
u->field_data[UF_USERINFO].off = p - buf ;
|
2031
|
+
u->field_data[UF_USERINFO].len = 0;
|
2032
|
+
u->field_set |= (1 << UF_USERINFO);
|
2033
|
+
}
|
2034
|
+
u->field_data[UF_USERINFO].len++;
|
2035
|
+
break;
|
2036
|
+
|
2037
|
+
default:
|
2038
|
+
break;
|
2039
|
+
}
|
2040
|
+
s = new_s;
|
2041
|
+
}
|
2042
|
+
|
2043
|
+
/* Make sure we don't end somewhere unexpected */
|
2044
|
+
switch (s) {
|
2045
|
+
case s_http_host_start:
|
2046
|
+
case s_http_host_v6_start:
|
2047
|
+
case s_http_host_v6:
|
2048
|
+
case s_http_host_port_start:
|
2049
|
+
case s_http_userinfo:
|
2050
|
+
case s_http_userinfo_start:
|
2051
|
+
return 1;
|
2052
|
+
default:
|
2053
|
+
break;
|
2054
|
+
}
|
2055
|
+
|
2056
|
+
return 0;
|
2057
|
+
}
|
2058
|
+
|
1940
2059
|
int
|
1941
2060
|
http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
1942
2061
|
struct http_parser_url *u)
|
@@ -1944,9 +2063,10 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
1944
2063
|
enum state s;
|
1945
2064
|
const char *p;
|
1946
2065
|
enum http_parser_url_fields uf, old_uf;
|
2066
|
+
int found_at = 0;
|
1947
2067
|
|
1948
2068
|
u->port = u->field_set = 0;
|
1949
|
-
s = is_connect ?
|
2069
|
+
s = is_connect ? s_req_server_start : s_req_spaces_before_url;
|
1950
2070
|
uf = old_uf = UF_MAX;
|
1951
2071
|
|
1952
2072
|
for (p = buf; p < buf + buflen; p++) {
|
@@ -1960,10 +2080,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
1960
2080
|
/* Skip delimeters */
|
1961
2081
|
case s_req_schema_slash:
|
1962
2082
|
case s_req_schema_slash_slash:
|
1963
|
-
case
|
1964
|
-
case s_req_host_v6_start:
|
1965
|
-
case s_req_host_v6_end:
|
1966
|
-
case s_req_port_start:
|
2083
|
+
case s_req_server_start:
|
1967
2084
|
case s_req_query_string_start:
|
1968
2085
|
case s_req_fragment_start:
|
1969
2086
|
continue;
|
@@ -1972,13 +2089,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
1972
2089
|
uf = UF_SCHEMA;
|
1973
2090
|
break;
|
1974
2091
|
|
1975
|
-
case
|
1976
|
-
|
1977
|
-
uf = UF_HOST;
|
1978
|
-
break;
|
2092
|
+
case s_req_server_with_at:
|
2093
|
+
found_at = 1;
|
1979
2094
|
|
1980
|
-
|
1981
|
-
|
2095
|
+
/* FALLTROUGH */
|
2096
|
+
case s_req_server:
|
2097
|
+
uf = UF_HOST;
|
1982
2098
|
break;
|
1983
2099
|
|
1984
2100
|
case s_req_path:
|
@@ -2011,21 +2127,17 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
2011
2127
|
old_uf = uf;
|
2012
2128
|
}
|
2013
2129
|
|
2014
|
-
/*
|
2015
|
-
|
2016
|
-
|
2130
|
+
/* host must be present if there is a schema */
|
2131
|
+
/* parsing http:///toto will fail */
|
2132
|
+
if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
|
2133
|
+
if (http_parse_host(buf, u, found_at) != 0) {
|
2134
|
+
return 1;
|
2135
|
+
}
|
2017
2136
|
}
|
2018
2137
|
|
2019
|
-
/*
|
2020
|
-
|
2021
|
-
case s_req_host_v6_start:
|
2022
|
-
case s_req_host_v6:
|
2023
|
-
case s_req_host_v6_end:
|
2024
|
-
case s_req_host:
|
2025
|
-
case s_req_port_start:
|
2138
|
+
/* CONNECT requests can only contain "hostname:port" */
|
2139
|
+
if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
|
2026
2140
|
return 1;
|
2027
|
-
default:
|
2028
|
-
break;
|
2029
2141
|
}
|
2030
2142
|
|
2031
2143
|
if (u->field_set & (1 << UF_PORT)) {
|
@@ -2056,3 +2168,8 @@ http_parser_pause(http_parser *parser, int paused) {
|
|
2056
2168
|
assert(0 && "Attempting to pause parser in error state");
|
2057
2169
|
}
|
2058
2170
|
}
|
2171
|
+
|
2172
|
+
int
|
2173
|
+
http_body_is_final(const struct http_parser *parser) {
|
2174
|
+
return parser->state == s_message_done;
|
2175
|
+
}
|