http_parser.rb 0.6.0.beta.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/linux.yml +23 -0
  3. data/.github/workflows/windows.yml +23 -0
  4. data/.gitignore +5 -4
  5. data/.gitmodules +2 -2
  6. data/README.md +2 -2
  7. data/Rakefile +4 -2
  8. data/ext/ruby_http_parser/extconf.rb +1 -1
  9. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +86 -52
  10. data/ext/ruby_http_parser/ruby_http_parser.c +53 -7
  11. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +37 -1
  12. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +1 -5
  13. data/ext/ruby_http_parser/vendor/http-parser/Makefile +110 -8
  14. data/ext/ruby_http_parser/vendor/http-parser/README.md +105 -37
  15. data/ext/ruby_http_parser/vendor/http-parser/bench.c +128 -0
  16. data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +157 -0
  17. data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
  18. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +892 -510
  19. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +34 -2
  20. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +198 -77
  21. data/ext/ruby_http_parser/vendor/http-parser/test.c +1781 -201
  22. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +271 -154
  23. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +48 -61
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +5 -3
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +37 -104
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +116 -101
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +9 -5
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +1 -1
  29. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +579 -153
  30. data/http_parser.rb.gemspec +14 -9
  31. data/spec/parser_spec.rb +177 -99
  32. data/spec/support/requests.json +2 -2
  33. data/spec/support/responses.json +20 -0
  34. data/tasks/spec.rake +1 -1
  35. metadata +131 -162
  36. data/Gemfile.lock +0 -39
  37. data/ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS +0 -4
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPHeadersCompleteCallback.java +0 -13
  39. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPHeadersCompleteCallback.java +0 -12
@@ -37,19 +37,24 @@
37
37
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
38
  #endif
39
39
 
40
+ #ifndef ARRAY_SIZE
41
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
+ #endif
43
+
44
+ #ifndef BIT_AT
45
+ # define BIT_AT(a, i) \
46
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
+ (1 << ((unsigned int) (i) & 7))))
48
+ #endif
49
+
50
+ #ifndef ELEM_AT
51
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
+ #endif
40
53
 
41
- #if HTTP_PARSER_DEBUG
42
- #define SET_ERRNO(e) \
43
- do { \
44
- parser->http_errno = (e); \
45
- parser->error_lineno = __LINE__; \
46
- } while (0)
47
- #else
48
54
  #define SET_ERRNO(e) \
49
55
  do { \
50
56
  parser->http_errno = (e); \
51
57
  } while(0)
52
- #endif
53
58
 
54
59
 
55
60
  /* Run the notify callback FOR, returning ER if it fails */
@@ -94,7 +99,7 @@ do { \
94
99
  FOR##_mark = NULL; \
95
100
  } \
96
101
  } while (0)
97
-
102
+
98
103
  /* Run the data callback FOR and consume the current byte */
99
104
  #define CALLBACK_DATA(FOR) \
100
105
  CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
@@ -123,31 +128,10 @@ do { \
123
128
 
124
129
 
125
130
  static const char *method_strings[] =
126
- { "DELETE"
127
- , "GET"
128
- , "HEAD"
129
- , "POST"
130
- , "PUT"
131
- , "CONNECT"
132
- , "OPTIONS"
133
- , "TRACE"
134
- , "COPY"
135
- , "LOCK"
136
- , "MKCOL"
137
- , "MOVE"
138
- , "PROPFIND"
139
- , "PROPPATCH"
140
- , "UNLOCK"
141
- , "REPORT"
142
- , "MKACTIVITY"
143
- , "CHECKOUT"
144
- , "MERGE"
145
- , "M-SEARCH"
146
- , "NOTIFY"
147
- , "SUBSCRIBE"
148
- , "UNSUBSCRIBE"
149
- , "PATCH"
150
- , "PURGE"
131
+ {
132
+ #define XX(num, name, string) #string,
133
+ HTTP_METHOD_MAP(XX)
134
+ #undef XX
151
135
  };
152
136
 
153
137
 
@@ -205,40 +189,48 @@ static const int8_t unhex[256] =
205
189
  };
206
190
 
207
191
 
208
- static const uint8_t normal_url_char[256] = {
192
+ #if HTTP_PARSER_STRICT
193
+ # define T(v) 0
194
+ #else
195
+ # define T(v) v
196
+ #endif
197
+
198
+
199
+ static const uint8_t normal_url_char[32] = {
209
200
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
210
- 0, 0, 0, 0, 0, 0, 0, 0,
201
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
211
202
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
212
- 0, 0, 0, 0, 0, 0, 0, 0,
203
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
213
204
  /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
214
- 0, 0, 0, 0, 0, 0, 0, 0,
205
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
215
206
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
216
- 0, 0, 0, 0, 0, 0, 0, 0,
207
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
217
208
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
218
- 0, 1, 1, 0, 1, 1, 1, 1,
209
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
219
210
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
220
- 1, 1, 1, 1, 1, 1, 1, 1,
211
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
221
212
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
222
- 1, 1, 1, 1, 1, 1, 1, 1,
213
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
223
214
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
224
- 1, 1, 1, 1, 1, 1, 1, 0,
215
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
225
216
  /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
226
- 1, 1, 1, 1, 1, 1, 1, 1,
217
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
227
218
  /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
228
- 1, 1, 1, 1, 1, 1, 1, 1,
219
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
229
220
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
230
- 1, 1, 1, 1, 1, 1, 1, 1,
221
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
231
222
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
232
- 1, 1, 1, 1, 1, 1, 1, 1,
223
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
233
224
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
234
- 1, 1, 1, 1, 1, 1, 1, 1,
225
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
235
226
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
236
- 1, 1, 1, 1, 1, 1, 1, 1,
227
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
237
228
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
238
- 1, 1, 1, 1, 1, 1, 1, 1,
229
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
239
230
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
240
- 1, 1, 1, 1, 1, 1, 1, 0, };
231
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
241
232
 
233
+ #undef T
242
234
 
243
235
  enum state
244
236
  { s_dead = 1 /* important that this is > 0 */
@@ -266,13 +258,9 @@ enum state
266
258
  , s_req_schema
267
259
  , s_req_schema_slash
268
260
  , s_req_schema_slash_slash
269
- , s_req_host_start
270
- , s_req_host_v6_start
271
- , s_req_host_v6
272
- , s_req_host_v6_end
273
- , s_req_host
274
- , s_req_port_start
275
- , s_req_port
261
+ , s_req_server_start
262
+ , s_req_server
263
+ , s_req_server_with_at
276
264
  , s_req_path
277
265
  , s_req_query_string_start
278
266
  , s_req_query_string
@@ -350,6 +338,19 @@ enum header_states
350
338
  , h_connection_close
351
339
  };
352
340
 
341
+ enum http_host_state
342
+ {
343
+ s_http_host_dead = 1
344
+ , s_http_userinfo_start
345
+ , s_http_userinfo
346
+ , s_http_host_start
347
+ , s_http_host_v6_start
348
+ , s_http_host
349
+ , s_http_host_v6
350
+ , s_http_host_v6_end
351
+ , s_http_host_port_start
352
+ , s_http_host_port
353
+ };
353
354
 
354
355
  /* Macros for character classes; depends on strict-mode */
355
356
  #define CR '\r'
@@ -359,15 +360,21 @@ enum header_states
359
360
  #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
360
361
  #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
361
362
  #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
363
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
364
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
365
+ (c) == ')')
366
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
367
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
368
+ (c) == '$' || (c) == ',')
362
369
 
363
370
  #if HTTP_PARSER_STRICT
364
371
  #define TOKEN(c) (tokens[(unsigned char)c])
365
- #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
372
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
366
373
  #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
367
374
  #else
368
375
  #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
369
376
  #define IS_URL_CHAR(c) \
370
- (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
377
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
371
378
  #define IS_HOST_CHAR(c) \
372
379
  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
373
380
  #endif
@@ -401,7 +408,7 @@ static struct {
401
408
  };
402
409
  #undef HTTP_STRERROR_GEN
403
410
 
404
- int http_message_needs_eof(http_parser *parser);
411
+ int http_message_needs_eof(const http_parser *parser);
405
412
 
406
413
  /* Our URL parser.
407
414
  *
@@ -417,7 +424,15 @@ int http_message_needs_eof(http_parser *parser);
417
424
  static enum state
418
425
  parse_url_char(enum state s, const char ch)
419
426
  {
420
- assert(!isspace(ch));
427
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
428
+ return s_dead;
429
+ }
430
+
431
+ #if HTTP_PARSER_STRICT
432
+ if (ch == '\t' || ch == '\f') {
433
+ return s_dead;
434
+ }
435
+ #endif
421
436
 
422
437
  switch (s) {
423
438
  case s_req_spaces_before_url:
@@ -455,67 +470,33 @@ parse_url_char(enum state s, const char ch)
455
470
 
456
471
  case s_req_schema_slash_slash:
457
472
  if (ch == '/') {
458
- return s_req_host_start;
459
- }
460
-
461
- break;
462
-
463
- case s_req_host_start:
464
- if (ch == '[') {
465
- return s_req_host_v6_start;
466
- }
467
-
468
- if (IS_HOST_CHAR(ch)) {
469
- return s_req_host;
473
+ return s_req_server_start;
470
474
  }
471
475
 
472
476
  break;
473
477
 
474
- case s_req_host:
475
- if (IS_HOST_CHAR(ch)) {
476
- return s_req_host;
477
- }
478
-
479
- /* FALLTHROUGH */
480
- case s_req_host_v6_end:
481
- switch (ch) {
482
- case ':':
483
- return s_req_port_start;
484
-
485
- case '/':
486
- return s_req_path;
487
-
488
- case '?':
489
- return s_req_query_string_start;
478
+ case s_req_server_with_at:
479
+ if (ch == '@') {
480
+ return s_dead;
490
481
  }
491
482
 
492
- break;
493
-
494
- case s_req_host_v6:
495
- if (ch == ']') {
496
- return s_req_host_v6_end;
483
+ /* FALLTHROUGH */
484
+ case s_req_server_start:
485
+ case s_req_server:
486
+ if (ch == '/') {
487
+ return s_req_path;
497
488
  }
498
489
 
499
- /* FALLTHROUGH */
500
- case s_req_host_v6_start:
501
- if (IS_HEX(ch) || ch == ':') {
502
- return s_req_host_v6;
490
+ if (ch == '?') {
491
+ return s_req_query_string_start;
503
492
  }
504
- break;
505
493
 
506
- case s_req_port:
507
- switch (ch) {
508
- case '/':
509
- return s_req_path;
510
-
511
- case '?':
512
- return s_req_query_string_start;
494
+ if (ch == '@') {
495
+ return s_req_server_with_at;
513
496
  }
514
497
 
515
- /* FALLTHROUGH */
516
- case s_req_port_start:
517
- if (IS_NUM(ch)) {
518
- return s_req_port;
498
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
499
+ return s_req_server;
519
500
  }
520
501
 
521
502
  break;
@@ -637,13 +618,9 @@ size_t http_parser_execute (http_parser *parser,
637
618
  case s_req_schema:
638
619
  case s_req_schema_slash:
639
620
  case s_req_schema_slash_slash:
640
- case s_req_host_start:
641
- case s_req_host_v6_start:
642
- case s_req_host_v6:
643
- case s_req_host_v6_end:
644
- case s_req_host:
645
- case s_req_port_start:
646
- case s_req_port:
621
+ case s_req_server_start:
622
+ case s_req_server:
623
+ case s_req_server_with_at:
647
624
  case s_req_query_string_start:
648
625
  case s_req_query_string:
649
626
  case s_req_fragment_start:
@@ -889,6 +866,7 @@ size_t http_parser_execute (http_parser *parser,
889
866
  case s_res_line_almost_done:
890
867
  STRICT_CHECK(ch != LF);
891
868
  parser->state = s_header_field_start;
869
+ CALLBACK_NOTIFY(status_complete);
892
870
  break;
893
871
 
894
872
  case s_start_req:
@@ -918,7 +896,7 @@ size_t http_parser_execute (http_parser *parser,
918
896
  /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
919
897
  break;
920
898
  case 'R': parser->method = HTTP_REPORT; break;
921
- case 'S': parser->method = HTTP_SUBSCRIBE; break;
899
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
922
900
  case 'T': parser->method = HTTP_TRACE; break;
923
901
  case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
924
902
  default:
@@ -965,6 +943,12 @@ size_t http_parser_execute (http_parser *parser,
965
943
  } else {
966
944
  goto error;
967
945
  }
946
+ } else if (parser->method == HTTP_SUBSCRIBE) {
947
+ if (parser->index == 1 && ch == 'E') {
948
+ parser->method = HTTP_SEARCH;
949
+ } else {
950
+ goto error;
951
+ }
968
952
  } else if (parser->index == 1 && parser->method == HTTP_POST) {
969
953
  if (ch == 'R') {
970
954
  parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
@@ -998,7 +982,7 @@ size_t http_parser_execute (http_parser *parser,
998
982
 
999
983
  MARK(url);
1000
984
  if (parser->method == HTTP_CONNECT) {
1001
- parser->state = s_req_host_start;
985
+ parser->state = s_req_server_start;
1002
986
  }
1003
987
 
1004
988
  parser->state = parse_url_char((enum state)parser->state, ch);
@@ -1013,10 +997,7 @@ size_t http_parser_execute (http_parser *parser,
1013
997
  case s_req_schema:
1014
998
  case s_req_schema_slash:
1015
999
  case s_req_schema_slash_slash:
1016
- case s_req_host_start:
1017
- case s_req_host_v6_start:
1018
- case s_req_host_v6:
1019
- case s_req_port_start:
1000
+ case s_req_server_start:
1020
1001
  {
1021
1002
  switch (ch) {
1022
1003
  /* No whitespace allowed here */
@@ -1036,9 +1017,8 @@ size_t http_parser_execute (http_parser *parser,
1036
1017
  break;
1037
1018
  }
1038
1019
 
1039
- case s_req_host:
1040
- case s_req_host_v6_end:
1041
- case s_req_port:
1020
+ case s_req_server:
1021
+ case s_req_server_with_at:
1042
1022
  case s_req_path:
1043
1023
  case s_req_query_string_start:
1044
1024
  case s_req_query_string:
@@ -1867,7 +1847,7 @@ error:
1867
1847
 
1868
1848
  /* Does the parser need to see an EOF to find the end of the message? */
1869
1849
  int
1870
- http_message_needs_eof (http_parser *parser)
1850
+ http_message_needs_eof (const http_parser *parser)
1871
1851
  {
1872
1852
  if (parser->type == HTTP_REQUEST) {
1873
1853
  return 0;
@@ -1890,7 +1870,7 @@ http_message_needs_eof (http_parser *parser)
1890
1870
 
1891
1871
 
1892
1872
  int
1893
- http_should_keep_alive (http_parser *parser)
1873
+ http_should_keep_alive (const http_parser *parser)
1894
1874
  {
1895
1875
  if (parser->http_major > 0 && parser->http_minor > 0) {
1896
1876
  /* HTTP/1.1 */
@@ -1908,9 +1888,10 @@ http_should_keep_alive (http_parser *parser)
1908
1888
  }
1909
1889
 
1910
1890
 
1911
- const char * http_method_str (enum http_method m)
1891
+ const char *
1892
+ http_method_str (enum http_method m)
1912
1893
  {
1913
- return method_strings[m];
1894
+ return ELEM_AT(method_strings, m, "<unknown>");
1914
1895
  }
1915
1896
 
1916
1897
 
@@ -1937,6 +1918,144 @@ http_errno_description(enum http_errno err) {
1937
1918
  return http_strerror_tab[err].description;
1938
1919
  }
1939
1920
 
1921
+ static enum http_host_state
1922
+ http_parse_host_char(enum http_host_state s, const char ch) {
1923
+ switch(s) {
1924
+ case s_http_userinfo:
1925
+ case s_http_userinfo_start:
1926
+ if (ch == '@') {
1927
+ return s_http_host_start;
1928
+ }
1929
+
1930
+ if (IS_USERINFO_CHAR(ch)) {
1931
+ return s_http_userinfo;
1932
+ }
1933
+ break;
1934
+
1935
+ case s_http_host_start:
1936
+ if (ch == '[') {
1937
+ return s_http_host_v6_start;
1938
+ }
1939
+
1940
+ if (IS_HOST_CHAR(ch)) {
1941
+ return s_http_host;
1942
+ }
1943
+
1944
+ break;
1945
+
1946
+ case s_http_host:
1947
+ if (IS_HOST_CHAR(ch)) {
1948
+ return s_http_host;
1949
+ }
1950
+
1951
+ /* FALLTHROUGH */
1952
+ case s_http_host_v6_end:
1953
+ if (ch == ':') {
1954
+ return s_http_host_port_start;
1955
+ }
1956
+
1957
+ break;
1958
+
1959
+ case s_http_host_v6:
1960
+ if (ch == ']') {
1961
+ return s_http_host_v6_end;
1962
+ }
1963
+
1964
+ /* FALLTHROUGH */
1965
+ case s_http_host_v6_start:
1966
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
1967
+ return s_http_host_v6;
1968
+ }
1969
+
1970
+ break;
1971
+
1972
+ case s_http_host_port:
1973
+ case s_http_host_port_start:
1974
+ if (IS_NUM(ch)) {
1975
+ return s_http_host_port;
1976
+ }
1977
+
1978
+ break;
1979
+
1980
+ default:
1981
+ break;
1982
+ }
1983
+ return s_http_host_dead;
1984
+ }
1985
+
1986
+ static int
1987
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1988
+ enum http_host_state s;
1989
+
1990
+ const char *p;
1991
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1992
+
1993
+ u->field_data[UF_HOST].len = 0;
1994
+
1995
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
1996
+
1997
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
1998
+ enum http_host_state new_s = http_parse_host_char(s, *p);
1999
+
2000
+ if (new_s == s_http_host_dead) {
2001
+ return 1;
2002
+ }
2003
+
2004
+ switch(new_s) {
2005
+ case s_http_host:
2006
+ if (s != s_http_host) {
2007
+ u->field_data[UF_HOST].off = p - buf;
2008
+ }
2009
+ u->field_data[UF_HOST].len++;
2010
+ break;
2011
+
2012
+ case s_http_host_v6:
2013
+ if (s != s_http_host_v6) {
2014
+ u->field_data[UF_HOST].off = p - buf;
2015
+ }
2016
+ u->field_data[UF_HOST].len++;
2017
+ break;
2018
+
2019
+ case s_http_host_port:
2020
+ if (s != s_http_host_port) {
2021
+ u->field_data[UF_PORT].off = p - buf;
2022
+ u->field_data[UF_PORT].len = 0;
2023
+ u->field_set |= (1 << UF_PORT);
2024
+ }
2025
+ u->field_data[UF_PORT].len++;
2026
+ break;
2027
+
2028
+ case s_http_userinfo:
2029
+ if (s != s_http_userinfo) {
2030
+ u->field_data[UF_USERINFO].off = p - buf ;
2031
+ u->field_data[UF_USERINFO].len = 0;
2032
+ u->field_set |= (1 << UF_USERINFO);
2033
+ }
2034
+ u->field_data[UF_USERINFO].len++;
2035
+ break;
2036
+
2037
+ default:
2038
+ break;
2039
+ }
2040
+ s = new_s;
2041
+ }
2042
+
2043
+ /* Make sure we don't end somewhere unexpected */
2044
+ switch (s) {
2045
+ case s_http_host_start:
2046
+ case s_http_host_v6_start:
2047
+ case s_http_host_v6:
2048
+ case s_http_host_port_start:
2049
+ case s_http_userinfo:
2050
+ case s_http_userinfo_start:
2051
+ return 1;
2052
+ default:
2053
+ break;
2054
+ }
2055
+
2056
+ return 0;
2057
+ }
2058
+
1940
2059
  int
1941
2060
  http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1942
2061
  struct http_parser_url *u)
@@ -1944,9 +2063,10 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1944
2063
  enum state s;
1945
2064
  const char *p;
1946
2065
  enum http_parser_url_fields uf, old_uf;
2066
+ int found_at = 0;
1947
2067
 
1948
2068
  u->port = u->field_set = 0;
1949
- s = is_connect ? s_req_host_start : s_req_spaces_before_url;
2069
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
1950
2070
  uf = old_uf = UF_MAX;
1951
2071
 
1952
2072
  for (p = buf; p < buf + buflen; p++) {
@@ -1960,10 +2080,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1960
2080
  /* Skip delimeters */
1961
2081
  case s_req_schema_slash:
1962
2082
  case s_req_schema_slash_slash:
1963
- case s_req_host_start:
1964
- case s_req_host_v6_start:
1965
- case s_req_host_v6_end:
1966
- case s_req_port_start:
2083
+ case s_req_server_start:
1967
2084
  case s_req_query_string_start:
1968
2085
  case s_req_fragment_start:
1969
2086
  continue;
@@ -1972,13 +2089,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1972
2089
  uf = UF_SCHEMA;
1973
2090
  break;
1974
2091
 
1975
- case s_req_host:
1976
- case s_req_host_v6:
1977
- uf = UF_HOST;
1978
- break;
2092
+ case s_req_server_with_at:
2093
+ found_at = 1;
1979
2094
 
1980
- case s_req_port:
1981
- uf = UF_PORT;
2095
+ /* FALLTROUGH */
2096
+ case s_req_server:
2097
+ uf = UF_HOST;
1982
2098
  break;
1983
2099
 
1984
2100
  case s_req_path:
@@ -2011,21 +2127,17 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2011
2127
  old_uf = uf;
2012
2128
  }
2013
2129
 
2014
- /* CONNECT requests can only contain "hostname:port" */
2015
- if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2016
- return 1;
2130
+ /* host must be present if there is a schema */
2131
+ /* parsing http:///toto will fail */
2132
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2133
+ if (http_parse_host(buf, u, found_at) != 0) {
2134
+ return 1;
2135
+ }
2017
2136
  }
2018
2137
 
2019
- /* Make sure we don't end somewhere unexpected */
2020
- switch (s) {
2021
- case s_req_host_v6_start:
2022
- case s_req_host_v6:
2023
- case s_req_host_v6_end:
2024
- case s_req_host:
2025
- case s_req_port_start:
2138
+ /* CONNECT requests can only contain "hostname:port" */
2139
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2026
2140
  return 1;
2027
- default:
2028
- break;
2029
2141
  }
2030
2142
 
2031
2143
  if (u->field_set & (1 << UF_PORT)) {
@@ -2056,3 +2168,8 @@ http_parser_pause(http_parser *parser, int paused) {
2056
2168
  assert(0 && "Attempting to pause parser in error state");
2057
2169
  }
2058
2170
  }
2171
+
2172
+ int
2173
+ http_body_is_final(const struct http_parser *parser) {
2174
+ return parser->state == s_message_done;
2175
+ }