http-parser 1.0.4 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c7c15574adec9aefd70f9ad96c542b70ce3b3727
4
- data.tar.gz: ebcfbcfbe13fa5e1155e4c0a41f242ea96b1e8a7
2
+ SHA256:
3
+ metadata.gz: c32dcf71a6dae540e33a54338404f6ed9a7c51eef0ad0913162c75152189c041
4
+ data.tar.gz: 3bd270c5e3362c3c3ec4274e623f028d1fd5f3d5d30c24c87fa7babba9e555ab
5
5
  SHA512:
6
- metadata.gz: 0e3a987b169359afe6229340fb4c28e74c411beb302f767bb45f836cc8579598c1d9b89286e9aeb4bf3c114d4fed9b87b5952510c36da676b063e4a60007ee4c
7
- data.tar.gz: 6a531fd9df9c4f011a7163eb1e87dc260fa0a9ee6c201c9e8156f7b86db0b636f579ca3e094535d16a1bc0541c909f276026811de2ee9f5176177c17cb19f05d
6
+ metadata.gz: 9aef9b8b55d191a662cb831a51c726ed7eecaab049884523db8fe204326daa7aeb6c0ecf2e026e81eb8138274d20d44aaa6d6cfea8b7611d38e214982f860e38
7
+ data.tar.gz: 0d9bfde682dafa7e341eda1554cc7e9dc94c89de5dcff5df164a6c0bc5cde47deaa727eca04d706b138eea3e6d4a6351d81498f7c719240fd7244621c4502113
@@ -3,6 +3,4 @@ require 'ffi-compiler/compile_task'
3
3
  FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
4
  t.cflags << "-Wall -Wextra -O3"
5
5
  t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
- t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
- t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
8
6
  end
@@ -1,7 +1,4 @@
1
- /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
- *
3
- * Additional changes are licensed under the same terms as NGINX and
4
- * copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Copyright Joyent, Inc. and other Node contributors.
5
2
  *
6
3
  * Permission is hereby granted, free of charge, to any person obtaining a copy
7
4
  * of this software and associated documentation files (the "Software"), to
@@ -25,7 +22,6 @@
25
22
  #include <assert.h>
26
23
  #include <stddef.h>
27
24
  #include <ctype.h>
28
- #include <stdlib.h>
29
25
  #include <string.h>
30
26
  #include <limits.h>
31
27
 
@@ -53,22 +49,45 @@
53
49
 
54
50
  #define SET_ERRNO(e) \
55
51
  do { \
52
+ parser->nread = nread; \
56
53
  parser->http_errno = (e); \
57
54
  } while(0)
58
55
 
56
+ #define CURRENT_STATE() p_state
57
+ #define UPDATE_STATE(V) p_state = (enum state) (V);
58
+ #define RETURN(V) \
59
+ do { \
60
+ parser->nread = nread; \
61
+ parser->state = CURRENT_STATE(); \
62
+ return (V); \
63
+ } while (0);
64
+ #define REEXECUTE() \
65
+ goto reexecute; \
66
+
67
+
68
+ #ifdef __GNUC__
69
+ # define LIKELY(X) __builtin_expect(!!(X), 1)
70
+ # define UNLIKELY(X) __builtin_expect(!!(X), 0)
71
+ #else
72
+ # define LIKELY(X) (X)
73
+ # define UNLIKELY(X) (X)
74
+ #endif
75
+
59
76
 
60
77
  /* Run the notify callback FOR, returning ER if it fails */
61
78
  #define CALLBACK_NOTIFY_(FOR, ER) \
62
79
  do { \
63
80
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
81
  \
65
- if (settings->on_##FOR) { \
66
- if (0 != settings->on_##FOR(parser)) { \
82
+ if (LIKELY(settings->on_##FOR)) { \
83
+ parser->state = CURRENT_STATE(); \
84
+ if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
67
85
  SET_ERRNO(HPE_CB_##FOR); \
68
86
  } \
87
+ UPDATE_STATE(parser->state); \
69
88
  \
70
89
  /* We either errored above or got paused; get out */ \
71
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
90
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
72
91
  return (ER); \
73
92
  } \
74
93
  } \
@@ -86,20 +105,23 @@ do { \
86
105
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
106
  \
88
107
  if (FOR##_mark) { \
89
- if (settings->on_##FOR) { \
90
- if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
108
+ if (LIKELY(settings->on_##FOR)) { \
109
+ parser->state = CURRENT_STATE(); \
110
+ if (UNLIKELY(0 != \
111
+ settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
91
112
  SET_ERRNO(HPE_CB_##FOR); \
92
113
  } \
114
+ UPDATE_STATE(parser->state); \
93
115
  \
94
116
  /* We either errored above or got paused; get out */ \
95
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
117
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
96
118
  return (ER); \
97
119
  } \
98
120
  } \
99
121
  FOR##_mark = NULL; \
100
122
  } \
101
123
  } while (0)
102
-
124
+
103
125
  /* Run the data callback FOR and consume the current byte */
104
126
  #define CALLBACK_DATA(FOR) \
105
127
  CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
@@ -116,6 +138,26 @@ do { \
116
138
  } \
117
139
  } while (0)
118
140
 
141
+ /* Don't allow the total size of the HTTP headers (including the status
142
+ * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
143
+ * embedders against denial-of-service attacks where the attacker feeds
144
+ * us a never-ending header that the embedder keeps buffering.
145
+ *
146
+ * This check is arguably the responsibility of embedders but we're doing
147
+ * it on the embedder's behalf because most won't bother and this way we
148
+ * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
149
+ * than any reasonable request or response so this should never affect
150
+ * day-to-day operation.
151
+ */
152
+ #define COUNT_HEADER_SIZE(V) \
153
+ do { \
154
+ nread += (V); \
155
+ if (UNLIKELY(nread > (HTTP_MAX_HEADER_SIZE))) { \
156
+ SET_ERRNO(HPE_HEADER_OVERFLOW); \
157
+ goto error; \
158
+ } \
159
+ } while (0)
160
+
119
161
 
120
162
  #define PROXY_CONNECTION "proxy-connection"
121
163
  #define CONNECTION "connection"
@@ -152,7 +194,7 @@ static const char tokens[256] = {
152
194
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153
195
  0, 0, 0, 0, 0, 0, 0, 0,
154
196
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155
- 0, '!', 0, '#', '$', '%', '&', '\'',
197
+ ' ', '!', 0, '#', '$', '%', '&', '\'',
156
198
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157
199
  0, 0, '*', '+', 0, '-', '.', 0,
158
200
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
@@ -242,10 +284,10 @@ enum state
242
284
  , s_res_HT
243
285
  , s_res_HTT
244
286
  , s_res_HTTP
245
- , s_res_first_http_major
246
287
  , s_res_http_major
247
- , s_res_first_http_minor
288
+ , s_res_http_dot
248
289
  , s_res_http_minor
290
+ , s_res_http_end
249
291
  , s_res_first_status_code
250
292
  , s_res_status_code
251
293
  , s_res_status_start
@@ -272,14 +314,17 @@ enum state
272
314
  , s_req_http_HT
273
315
  , s_req_http_HTT
274
316
  , s_req_http_HTTP
275
- , s_req_first_http_major
276
317
  , s_req_http_major
277
- , s_req_first_http_minor
318
+ , s_req_http_dot
278
319
  , s_req_http_minor
320
+ , s_req_http_end
279
321
  , s_req_line_almost_done
280
322
 
281
323
  , s_header_field_start
282
324
  , s_header_field
325
+ , s_header_value_discard_ws
326
+ , s_header_value_discard_ws_almost_done
327
+ , s_header_value_discard_lws
283
328
  , s_header_value_start
284
329
  , s_header_value
285
330
  , s_header_value_lws
@@ -327,16 +372,22 @@ enum header_states
327
372
 
328
373
  , h_connection
329
374
  , h_content_length
375
+ , h_content_length_num
376
+ , h_content_length_ws
330
377
  , h_transfer_encoding
331
378
  , h_upgrade
332
379
 
333
380
  , h_matching_transfer_encoding_chunked
381
+ , h_matching_connection_token_start
334
382
  , h_matching_connection_keep_alive
335
383
  , h_matching_connection_close
384
+ , h_matching_connection_upgrade
385
+ , h_matching_connection_token
336
386
 
337
387
  , h_transfer_encoding_chunked
338
388
  , h_connection_keep_alive
339
389
  , h_connection_close
390
+ , h_connection_upgrade
340
391
  };
341
392
 
342
393
  enum http_host_state
@@ -349,6 +400,8 @@ enum http_host_state
349
400
  , s_http_host
350
401
  , s_http_host_v6
351
402
  , s_http_host_v6_end
403
+ , s_http_host_v6_zone_start
404
+ , s_http_host_v6_zone
352
405
  , s_http_host_port_start
353
406
  , s_http_host_port
354
407
  };
@@ -368,18 +421,26 @@ enum http_host_state
368
421
  (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
369
422
  (c) == '$' || (c) == ',')
370
423
 
424
+ #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
425
+
371
426
  #if HTTP_PARSER_STRICT
372
- #define TOKEN(c) (tokens[(unsigned char)c])
427
+ #define TOKEN(c) STRICT_TOKEN(c)
373
428
  #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
374
429
  #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
375
430
  #else
376
- #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
431
+ #define TOKEN(c) tokens[(unsigned char)c]
377
432
  #define IS_URL_CHAR(c) \
378
433
  (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
379
434
  #define IS_HOST_CHAR(c) \
380
435
  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
381
436
  #endif
382
437
 
438
+ /**
439
+ * Verify that a char is a valid visible (printable) US-ASCII
440
+ * character or %x80-FF
441
+ **/
442
+ #define IS_HEADER_CHAR(ch) \
443
+ (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
383
444
 
384
445
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
385
446
 
@@ -481,7 +542,7 @@ parse_url_char(enum state s, const char ch)
481
542
  return s_dead;
482
543
  }
483
544
 
484
- /* FALLTHROUGH */
545
+ /* fall through */
485
546
  case s_req_server_start:
486
547
  case s_req_server:
487
548
  if (ch == '/') {
@@ -583,6 +644,9 @@ size_t http_parser_execute (http_parser *parser,
583
644
  const char *url_mark = 0;
584
645
  const char *body_mark = 0;
585
646
  const char *status_mark = 0;
647
+ enum state p_state = (enum state) parser->state;
648
+ const unsigned int lenient = parser->lenient_http_headers;
649
+ uint32_t nread = parser->nread;
586
650
 
587
651
  /* We're in an error state. Don't bother doing anything. */
588
652
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
@@ -590,7 +654,7 @@ size_t http_parser_execute (http_parser *parser,
590
654
  }
591
655
 
592
656
  if (len == 0) {
593
- switch (parser->state) {
657
+ switch (CURRENT_STATE()) {
594
658
  case s_body_identity_eof:
595
659
  /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
596
660
  * we got paused.
@@ -611,11 +675,11 @@ size_t http_parser_execute (http_parser *parser,
611
675
  }
612
676
 
613
677
 
614
- if (parser->state == s_header_field)
678
+ if (CURRENT_STATE() == s_header_field)
615
679
  header_field_mark = data;
616
- if (parser->state == s_header_value)
680
+ if (CURRENT_STATE() == s_header_value)
617
681
  header_value_mark = data;
618
- switch (parser->state) {
682
+ switch (CURRENT_STATE()) {
619
683
  case s_req_path:
620
684
  case s_req_schema:
621
685
  case s_req_schema_slash:
@@ -632,38 +696,24 @@ size_t http_parser_execute (http_parser *parser,
632
696
  case s_res_status:
633
697
  status_mark = data;
634
698
  break;
699
+ default:
700
+ break;
635
701
  }
636
702
 
637
703
  for (p=data; p != data + len; p++) {
638
704
  ch = *p;
639
705
 
640
- if (PARSING_HEADER(parser->state)) {
641
- ++parser->nread;
642
- /* Don't allow the total size of the HTTP headers (including the status
643
- * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
644
- * embedders against denial-of-service attacks where the attacker feeds
645
- * us a never-ending header that the embedder keeps buffering.
646
- *
647
- * This check is arguably the responsibility of embedders but we're doing
648
- * it on the embedder's behalf because most won't bother and this way we
649
- * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
650
- * than any reasonable request or response so this should never affect
651
- * day-to-day operation.
652
- */
653
- if (parser->nread > HTTP_MAX_HEADER_SIZE) {
654
- SET_ERRNO(HPE_HEADER_OVERFLOW);
655
- goto error;
656
- }
657
- }
706
+ if (PARSING_HEADER(CURRENT_STATE()))
707
+ COUNT_HEADER_SIZE(1);
658
708
 
659
- reexecute_byte:
660
- switch (parser->state) {
709
+ reexecute:
710
+ switch (CURRENT_STATE()) {
661
711
 
662
712
  case s_dead:
663
713
  /* this state is used after a 'Connection: close' message
664
714
  * the parser will error out if it reads another message
665
715
  */
666
- if (ch == CR || ch == LF)
716
+ if (LIKELY(ch == CR || ch == LF))
667
717
  break;
668
718
 
669
719
  SET_ERRNO(HPE_CLOSED_CONNECTION);
@@ -677,13 +727,13 @@ size_t http_parser_execute (http_parser *parser,
677
727
  parser->content_length = ULLONG_MAX;
678
728
 
679
729
  if (ch == 'H') {
680
- parser->state = s_res_or_resp_H;
730
+ UPDATE_STATE(s_res_or_resp_H);
681
731
 
682
732
  CALLBACK_NOTIFY(message_begin);
683
733
  } else {
684
734
  parser->type = HTTP_REQUEST;
685
- parser->state = s_start_req;
686
- goto reexecute_byte;
735
+ UPDATE_STATE(s_start_req);
736
+ REEXECUTE();
687
737
  }
688
738
 
689
739
  break;
@@ -692,9 +742,9 @@ size_t http_parser_execute (http_parser *parser,
692
742
  case s_res_or_resp_H:
693
743
  if (ch == 'T') {
694
744
  parser->type = HTTP_RESPONSE;
695
- parser->state = s_res_HT;
745
+ UPDATE_STATE(s_res_HT);
696
746
  } else {
697
- if (ch != 'E') {
747
+ if (UNLIKELY(ch != 'E')) {
698
748
  SET_ERRNO(HPE_INVALID_CONSTANT);
699
749
  goto error;
700
750
  }
@@ -702,27 +752,22 @@ size_t http_parser_execute (http_parser *parser,
702
752
  parser->type = HTTP_REQUEST;
703
753
  parser->method = HTTP_HEAD;
704
754
  parser->index = 2;
705
- parser->state = s_req_method;
755
+ UPDATE_STATE(s_req_method);
706
756
  }
707
757
  break;
708
758
 
709
759
  case s_start_res:
710
760
  {
761
+ if (ch == CR || ch == LF)
762
+ break;
711
763
  parser->flags = 0;
712
764
  parser->content_length = ULLONG_MAX;
713
765
 
714
- switch (ch) {
715
- case 'H':
716
- parser->state = s_res_H;
717
- break;
718
-
719
- case CR:
720
- case LF:
721
- break;
722
-
723
- default:
724
- SET_ERRNO(HPE_INVALID_CONSTANT);
725
- goto error;
766
+ if (ch == 'H') {
767
+ UPDATE_STATE(s_res_H);
768
+ } else {
769
+ SET_ERRNO(HPE_INVALID_CONSTANT);
770
+ goto error;
726
771
  }
727
772
 
728
773
  CALLBACK_NOTIFY(message_begin);
@@ -731,90 +776,63 @@ size_t http_parser_execute (http_parser *parser,
731
776
 
732
777
  case s_res_H:
733
778
  STRICT_CHECK(ch != 'T');
734
- parser->state = s_res_HT;
779
+ UPDATE_STATE(s_res_HT);
735
780
  break;
736
781
 
737
782
  case s_res_HT:
738
783
  STRICT_CHECK(ch != 'T');
739
- parser->state = s_res_HTT;
784
+ UPDATE_STATE(s_res_HTT);
740
785
  break;
741
786
 
742
787
  case s_res_HTT:
743
788
  STRICT_CHECK(ch != 'P');
744
- parser->state = s_res_HTTP;
789
+ UPDATE_STATE(s_res_HTTP);
745
790
  break;
746
791
 
747
792
  case s_res_HTTP:
748
793
  STRICT_CHECK(ch != '/');
749
- parser->state = s_res_first_http_major;
794
+ UPDATE_STATE(s_res_http_major);
750
795
  break;
751
796
 
752
- case s_res_first_http_major:
753
- if (ch < '0' || ch > '9') {
797
+ case s_res_http_major:
798
+ if (UNLIKELY(!IS_NUM(ch))) {
754
799
  SET_ERRNO(HPE_INVALID_VERSION);
755
800
  goto error;
756
801
  }
757
802
 
758
803
  parser->http_major = ch - '0';
759
- parser->state = s_res_http_major;
804
+ UPDATE_STATE(s_res_http_dot);
760
805
  break;
761
806
 
762
- /* major HTTP version or dot */
763
- case s_res_http_major:
807
+ case s_res_http_dot:
764
808
  {
765
- if (ch == '.') {
766
- parser->state = s_res_first_http_minor;
767
- break;
768
- }
769
-
770
- if (!IS_NUM(ch)) {
771
- SET_ERRNO(HPE_INVALID_VERSION);
772
- goto error;
773
- }
774
-
775
- parser->http_major *= 10;
776
- parser->http_major += ch - '0';
777
-
778
- if (parser->http_major > 999) {
809
+ if (UNLIKELY(ch != '.')) {
779
810
  SET_ERRNO(HPE_INVALID_VERSION);
780
811
  goto error;
781
812
  }
782
813
 
814
+ UPDATE_STATE(s_res_http_minor);
783
815
  break;
784
816
  }
785
817
 
786
- /* first digit of minor HTTP version */
787
- case s_res_first_http_minor:
788
- if (!IS_NUM(ch)) {
818
+ case s_res_http_minor:
819
+ if (UNLIKELY(!IS_NUM(ch))) {
789
820
  SET_ERRNO(HPE_INVALID_VERSION);
790
821
  goto error;
791
822
  }
792
823
 
793
824
  parser->http_minor = ch - '0';
794
- parser->state = s_res_http_minor;
825
+ UPDATE_STATE(s_res_http_end);
795
826
  break;
796
827
 
797
- /* minor HTTP version or end of request line */
798
- case s_res_http_minor:
828
+ case s_res_http_end:
799
829
  {
800
- if (ch == ' ') {
801
- parser->state = s_res_first_status_code;
802
- break;
803
- }
804
-
805
- if (!IS_NUM(ch)) {
806
- SET_ERRNO(HPE_INVALID_VERSION);
807
- goto error;
808
- }
809
-
810
- parser->http_minor *= 10;
811
- parser->http_minor += ch - '0';
812
-
813
- if (parser->http_minor > 999) {
830
+ if (UNLIKELY(ch != ' ')) {
814
831
  SET_ERRNO(HPE_INVALID_VERSION);
815
832
  goto error;
816
833
  }
817
834
 
835
+ UPDATE_STATE(s_res_first_status_code);
818
836
  break;
819
837
  }
820
838
 
@@ -829,7 +847,7 @@ size_t http_parser_execute (http_parser *parser,
829
847
  goto error;
830
848
  }
831
849
  parser->status_code = ch - '0';
832
- parser->state = s_res_status_code;
850
+ UPDATE_STATE(s_res_status_code);
833
851
  break;
834
852
  }
835
853
 
@@ -838,13 +856,12 @@ size_t http_parser_execute (http_parser *parser,
838
856
  if (!IS_NUM(ch)) {
839
857
  switch (ch) {
840
858
  case ' ':
841
- parser->state = s_res_status_start;
859
+ UPDATE_STATE(s_res_status_start);
842
860
  break;
843
861
  case CR:
844
- parser->state = s_res_line_almost_done;
845
- break;
846
862
  case LF:
847
- parser->state = s_header_field_start;
863
+ UPDATE_STATE(s_res_status_start);
864
+ REEXECUTE();
848
865
  break;
849
866
  default:
850
867
  SET_ERRNO(HPE_INVALID_STATUS);
@@ -856,7 +873,7 @@ size_t http_parser_execute (http_parser *parser,
856
873
  parser->status_code *= 10;
857
874
  parser->status_code += ch - '0';
858
875
 
859
- if (parser->status_code > 999) {
876
+ if (UNLIKELY(parser->status_code > 999)) {
860
877
  SET_ERRNO(HPE_INVALID_STATUS);
861
878
  goto error;
862
879
  }
@@ -866,31 +883,25 @@ size_t http_parser_execute (http_parser *parser,
866
883
 
867
884
  case s_res_status_start:
868
885
  {
869
- if (ch == CR) {
870
- parser->state = s_res_line_almost_done;
871
- break;
872
- }
873
-
874
- if (ch == LF) {
875
- parser->state = s_header_field_start;
876
- break;
877
- }
878
-
879
886
  MARK(status);
880
- parser->state = s_res_status;
887
+ UPDATE_STATE(s_res_status);
881
888
  parser->index = 0;
889
+
890
+ if (ch == CR || ch == LF)
891
+ REEXECUTE();
892
+
882
893
  break;
883
894
  }
884
895
 
885
896
  case s_res_status:
886
897
  if (ch == CR) {
887
- parser->state = s_res_line_almost_done;
898
+ UPDATE_STATE(s_res_line_almost_done);
888
899
  CALLBACK_DATA(status);
889
900
  break;
890
901
  }
891
902
 
892
903
  if (ch == LF) {
893
- parser->state = s_header_field_start;
904
+ UPDATE_STATE(s_header_field_start);
894
905
  CALLBACK_DATA(status);
895
906
  break;
896
907
  }
@@ -899,7 +910,7 @@ size_t http_parser_execute (http_parser *parser,
899
910
 
900
911
  case s_res_line_almost_done:
901
912
  STRICT_CHECK(ch != LF);
902
- parser->state = s_header_field_start;
913
+ UPDATE_STATE(s_header_field_start);
903
914
  break;
904
915
 
905
916
  case s_start_req:
@@ -909,7 +920,7 @@ size_t http_parser_execute (http_parser *parser,
909
920
  parser->flags = 0;
910
921
  parser->content_length = ULLONG_MAX;
911
922
 
912
- if (!IS_ALPHA(ch)) {
923
+ if (UNLIKELY(!IS_ALPHA(ch))) {
913
924
  SET_ERRNO(HPE_INVALID_METHOD);
914
925
  goto error;
915
926
  }
@@ -917,26 +928,28 @@ size_t http_parser_execute (http_parser *parser,
917
928
  parser->method = (enum http_method) 0;
918
929
  parser->index = 1;
919
930
  switch (ch) {
931
+ case 'A': parser->method = HTTP_ACL; break;
932
+ case 'B': parser->method = HTTP_BIND; break;
920
933
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
921
934
  case 'D': parser->method = HTTP_DELETE; break;
922
935
  case 'G': parser->method = HTTP_GET; break;
923
936
  case 'H': parser->method = HTTP_HEAD; break;
924
- case 'L': parser->method = HTTP_LOCK; break;
925
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
937
+ case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
938
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
926
939
  case 'N': parser->method = HTTP_NOTIFY; break;
927
940
  case 'O': parser->method = HTTP_OPTIONS; break;
928
941
  case 'P': parser->method = HTTP_POST;
929
942
  /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
930
943
  break;
931
- case 'R': parser->method = HTTP_REPORT; break;
932
- case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
944
+ case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
945
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
933
946
  case 'T': parser->method = HTTP_TRACE; break;
934
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
947
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
935
948
  default:
936
949
  SET_ERRNO(HPE_INVALID_METHOD);
937
950
  goto error;
938
951
  }
939
- parser->state = s_req_method;
952
+ UPDATE_STATE(s_req_method);
940
953
 
941
954
  CALLBACK_NOTIFY(message_begin);
942
955
 
@@ -946,77 +959,47 @@ size_t http_parser_execute (http_parser *parser,
946
959
  case s_req_method:
947
960
  {
948
961
  const char *matcher;
949
- if (ch == '\0') {
962
+ if (UNLIKELY(ch == '\0')) {
950
963
  SET_ERRNO(HPE_INVALID_METHOD);
951
964
  goto error;
952
965
  }
953
966
 
954
967
  matcher = method_strings[parser->method];
955
968
  if (ch == ' ' && matcher[parser->index] == '\0') {
956
- parser->state = s_req_spaces_before_url;
969
+ UPDATE_STATE(s_req_spaces_before_url);
957
970
  } else if (ch == matcher[parser->index]) {
958
971
  ; /* nada */
959
- } else if (parser->method == HTTP_CONNECT) {
960
- if (parser->index == 1 && ch == 'H') {
961
- parser->method = HTTP_CHECKOUT;
962
- } else if (parser->index == 2 && ch == 'P') {
963
- parser->method = HTTP_COPY;
964
- } else {
965
- SET_ERRNO(HPE_INVALID_METHOD);
966
- goto error;
967
- }
968
- } else if (parser->method == HTTP_MKCOL) {
969
- if (parser->index == 1 && ch == 'O') {
970
- parser->method = HTTP_MOVE;
971
- } else if (parser->index == 1 && ch == 'E') {
972
- parser->method = HTTP_MERGE;
973
- } else if (parser->index == 1 && ch == '-') {
974
- parser->method = HTTP_MSEARCH;
975
- } else if (parser->index == 2 && ch == 'A') {
976
- parser->method = HTTP_MKACTIVITY;
977
- } else {
978
- SET_ERRNO(HPE_INVALID_METHOD);
979
- goto error;
980
- }
981
- } else if (parser->method == HTTP_SUBSCRIBE) {
982
- if (parser->index == 1 && ch == 'E') {
983
- parser->method = HTTP_SEARCH;
984
- } else {
985
- SET_ERRNO(HPE_INVALID_METHOD);
986
- goto error;
987
- }
988
- } else if (parser->index == 1 && parser->method == HTTP_POST) {
989
- if (ch == 'R') {
990
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
991
- } else if (ch == 'U') {
992
- parser->method = HTTP_PUT; /* or HTTP_PURGE */
993
- } else if (ch == 'A') {
994
- parser->method = HTTP_PATCH;
995
- } else {
996
- SET_ERRNO(HPE_INVALID_METHOD);
997
- goto error;
998
- }
999
- } else if (parser->index == 2) {
1000
- if (parser->method == HTTP_PUT) {
1001
- if (ch == 'R') {
1002
- parser->method = HTTP_PURGE;
1003
- } else {
1004
- SET_ERRNO(HPE_INVALID_METHOD);
1005
- goto error;
1006
- }
1007
- } else if (parser->method == HTTP_UNLOCK) {
1008
- if (ch == 'S') {
1009
- parser->method = HTTP_UNSUBSCRIBE;
1010
- } else {
972
+ } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
973
+
974
+ switch (parser->method << 16 | parser->index << 8 | ch) {
975
+ #define XX(meth, pos, ch, new_meth) \
976
+ case (HTTP_##meth << 16 | pos << 8 | ch): \
977
+ parser->method = HTTP_##new_meth; break;
978
+
979
+ XX(POST, 1, 'U', PUT)
980
+ XX(POST, 1, 'A', PATCH)
981
+ XX(POST, 1, 'R', PROPFIND)
982
+ XX(PUT, 2, 'R', PURGE)
983
+ XX(CONNECT, 1, 'H', CHECKOUT)
984
+ XX(CONNECT, 2, 'P', COPY)
985
+ XX(MKCOL, 1, 'O', MOVE)
986
+ XX(MKCOL, 1, 'E', MERGE)
987
+ XX(MKCOL, 1, '-', MSEARCH)
988
+ XX(MKCOL, 2, 'A', MKACTIVITY)
989
+ XX(MKCOL, 3, 'A', MKCALENDAR)
990
+ XX(SUBSCRIBE, 1, 'E', SEARCH)
991
+ XX(SUBSCRIBE, 1, 'O', SOURCE)
992
+ XX(REPORT, 2, 'B', REBIND)
993
+ XX(PROPFIND, 4, 'P', PROPPATCH)
994
+ XX(LOCK, 1, 'I', LINK)
995
+ XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
996
+ XX(UNLOCK, 2, 'B', UNBIND)
997
+ XX(UNLOCK, 3, 'I', UNLINK)
998
+ #undef XX
999
+ default:
1011
1000
  SET_ERRNO(HPE_INVALID_METHOD);
1012
1001
  goto error;
1013
- }
1014
- } else {
1015
- SET_ERRNO(HPE_INVALID_METHOD);
1016
- goto error;
1017
1002
  }
1018
- } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1019
- parser->method = HTTP_PROPPATCH;
1020
1003
  } else {
1021
1004
  SET_ERRNO(HPE_INVALID_METHOD);
1022
1005
  goto error;
@@ -1032,11 +1015,11 @@ size_t http_parser_execute (http_parser *parser,
1032
1015
 
1033
1016
  MARK(url);
1034
1017
  if (parser->method == HTTP_CONNECT) {
1035
- parser->state = s_req_server_start;
1018
+ UPDATE_STATE(s_req_server_start);
1036
1019
  }
1037
1020
 
1038
- parser->state = parse_url_char((enum state)parser->state, ch);
1039
- if (parser->state == s_dead) {
1021
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1022
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1040
1023
  SET_ERRNO(HPE_INVALID_URL);
1041
1024
  goto error;
1042
1025
  }
@@ -1057,8 +1040,8 @@ size_t http_parser_execute (http_parser *parser,
1057
1040
  SET_ERRNO(HPE_INVALID_URL);
1058
1041
  goto error;
1059
1042
  default:
1060
- parser->state = parse_url_char((enum state)parser->state, ch);
1061
- if (parser->state == s_dead) {
1043
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1044
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1062
1045
  SET_ERRNO(HPE_INVALID_URL);
1063
1046
  goto error;
1064
1047
  }
@@ -1077,21 +1060,21 @@ size_t http_parser_execute (http_parser *parser,
1077
1060
  {
1078
1061
  switch (ch) {
1079
1062
  case ' ':
1080
- parser->state = s_req_http_start;
1063
+ UPDATE_STATE(s_req_http_start);
1081
1064
  CALLBACK_DATA(url);
1082
1065
  break;
1083
1066
  case CR:
1084
1067
  case LF:
1085
1068
  parser->http_major = 0;
1086
1069
  parser->http_minor = 9;
1087
- parser->state = (ch == CR) ?
1070
+ UPDATE_STATE((ch == CR) ?
1088
1071
  s_req_line_almost_done :
1089
- s_header_field_start;
1072
+ s_header_field_start);
1090
1073
  CALLBACK_DATA(url);
1091
1074
  break;
1092
1075
  default:
1093
- parser->state = parse_url_char((enum state)parser->state, ch);
1094
- if (parser->state == s_dead) {
1076
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1077
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1095
1078
  SET_ERRNO(HPE_INVALID_URL);
1096
1079
  goto error;
1097
1080
  }
@@ -1102,7 +1085,7 @@ size_t http_parser_execute (http_parser *parser,
1102
1085
  case s_req_http_start:
1103
1086
  switch (ch) {
1104
1087
  case 'H':
1105
- parser->state = s_req_http_H;
1088
+ UPDATE_STATE(s_req_http_H);
1106
1089
  break;
1107
1090
  case ' ':
1108
1091
  break;
@@ -1114,130 +1097,101 @@ size_t http_parser_execute (http_parser *parser,
1114
1097
 
1115
1098
  case s_req_http_H:
1116
1099
  STRICT_CHECK(ch != 'T');
1117
- parser->state = s_req_http_HT;
1100
+ UPDATE_STATE(s_req_http_HT);
1118
1101
  break;
1119
1102
 
1120
1103
  case s_req_http_HT:
1121
1104
  STRICT_CHECK(ch != 'T');
1122
- parser->state = s_req_http_HTT;
1105
+ UPDATE_STATE(s_req_http_HTT);
1123
1106
  break;
1124
1107
 
1125
1108
  case s_req_http_HTT:
1126
1109
  STRICT_CHECK(ch != 'P');
1127
- parser->state = s_req_http_HTTP;
1110
+ UPDATE_STATE(s_req_http_HTTP);
1128
1111
  break;
1129
1112
 
1130
1113
  case s_req_http_HTTP:
1131
1114
  STRICT_CHECK(ch != '/');
1132
- parser->state = s_req_first_http_major;
1115
+ UPDATE_STATE(s_req_http_major);
1133
1116
  break;
1134
1117
 
1135
- /* first digit of major HTTP version */
1136
- case s_req_first_http_major:
1137
- if (ch < '1' || ch > '9') {
1118
+ case s_req_http_major:
1119
+ if (UNLIKELY(!IS_NUM(ch))) {
1138
1120
  SET_ERRNO(HPE_INVALID_VERSION);
1139
1121
  goto error;
1140
1122
  }
1141
1123
 
1142
1124
  parser->http_major = ch - '0';
1143
- parser->state = s_req_http_major;
1125
+ UPDATE_STATE(s_req_http_dot);
1144
1126
  break;
1145
1127
 
1146
- /* major HTTP version or dot */
1147
- case s_req_http_major:
1128
+ case s_req_http_dot:
1148
1129
  {
1149
- if (ch == '.') {
1150
- parser->state = s_req_first_http_minor;
1151
- break;
1152
- }
1153
-
1154
- if (!IS_NUM(ch)) {
1155
- SET_ERRNO(HPE_INVALID_VERSION);
1156
- goto error;
1157
- }
1158
-
1159
- parser->http_major *= 10;
1160
- parser->http_major += ch - '0';
1161
-
1162
- if (parser->http_major > 999) {
1130
+ if (UNLIKELY(ch != '.')) {
1163
1131
  SET_ERRNO(HPE_INVALID_VERSION);
1164
1132
  goto error;
1165
1133
  }
1166
1134
 
1135
+ UPDATE_STATE(s_req_http_minor);
1167
1136
  break;
1168
1137
  }
1169
1138
 
1170
- /* first digit of minor HTTP version */
1171
- case s_req_first_http_minor:
1172
- if (!IS_NUM(ch)) {
1139
+ case s_req_http_minor:
1140
+ if (UNLIKELY(!IS_NUM(ch))) {
1173
1141
  SET_ERRNO(HPE_INVALID_VERSION);
1174
1142
  goto error;
1175
1143
  }
1176
1144
 
1177
1145
  parser->http_minor = ch - '0';
1178
- parser->state = s_req_http_minor;
1146
+ UPDATE_STATE(s_req_http_end);
1179
1147
  break;
1180
1148
 
1181
- /* minor HTTP version or end of request line */
1182
- case s_req_http_minor:
1149
+ case s_req_http_end:
1183
1150
  {
1184
1151
  if (ch == CR) {
1185
- parser->state = s_req_line_almost_done;
1152
+ UPDATE_STATE(s_req_line_almost_done);
1186
1153
  break;
1187
1154
  }
1188
1155
 
1189
1156
  if (ch == LF) {
1190
- parser->state = s_header_field_start;
1157
+ UPDATE_STATE(s_header_field_start);
1191
1158
  break;
1192
1159
  }
1193
1160
 
1194
- /* XXX allow spaces after digit? */
1195
-
1196
- if (!IS_NUM(ch)) {
1197
- SET_ERRNO(HPE_INVALID_VERSION);
1198
- goto error;
1199
- }
1200
-
1201
- parser->http_minor *= 10;
1202
- parser->http_minor += ch - '0';
1203
-
1204
- if (parser->http_minor > 999) {
1205
- SET_ERRNO(HPE_INVALID_VERSION);
1206
- goto error;
1207
- }
1208
-
1161
+ SET_ERRNO(HPE_INVALID_VERSION);
1162
+ goto error;
1209
1163
  break;
1210
1164
  }
1211
1165
 
1212
1166
  /* end of request line */
1213
1167
  case s_req_line_almost_done:
1214
1168
  {
1215
- if (ch != LF) {
1169
+ if (UNLIKELY(ch != LF)) {
1216
1170
  SET_ERRNO(HPE_LF_EXPECTED);
1217
1171
  goto error;
1218
1172
  }
1219
1173
 
1220
- parser->state = s_header_field_start;
1174
+ UPDATE_STATE(s_header_field_start);
1221
1175
  break;
1222
1176
  }
1223
1177
 
1224
1178
  case s_header_field_start:
1225
1179
  {
1226
1180
  if (ch == CR) {
1227
- parser->state = s_headers_almost_done;
1181
+ UPDATE_STATE(s_headers_almost_done);
1228
1182
  break;
1229
1183
  }
1230
1184
 
1231
1185
  if (ch == LF) {
1232
1186
  /* they might be just sending \n instead of \r\n so this would be
1233
1187
  * the second \n to denote the end of headers*/
1234
- parser->state = s_headers_almost_done;
1235
- goto reexecute_byte;
1188
+ UPDATE_STATE(s_headers_almost_done);
1189
+ REEXECUTE();
1236
1190
  }
1237
1191
 
1238
1192
  c = TOKEN(ch);
1239
1193
 
1240
- if (!c) {
1194
+ if (UNLIKELY(!c)) {
1241
1195
  SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1242
1196
  goto error;
1243
1197
  }
@@ -1245,7 +1199,7 @@ size_t http_parser_execute (http_parser *parser,
1245
1199
  MARK(header_field);
1246
1200
 
1247
1201
  parser->index = 0;
1248
- parser->state = s_header_field;
1202
+ UPDATE_STATE(s_header_field);
1249
1203
 
1250
1204
  switch (c) {
1251
1205
  case 'c':
@@ -1273,12 +1227,23 @@ size_t http_parser_execute (http_parser *parser,
1273
1227
 
1274
1228
  case s_header_field:
1275
1229
  {
1276
- c = TOKEN(ch);
1230
+ const char* start = p;
1231
+ for (; p != data + len; p++) {
1232
+ ch = *p;
1233
+ c = TOKEN(ch);
1234
+
1235
+ if (!c)
1236
+ break;
1277
1237
 
1278
- if (c) {
1279
1238
  switch (parser->header_state) {
1280
- case h_general:
1239
+ case h_general: {
1240
+ size_t limit = data + len - p;
1241
+ limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1242
+ while (p+1 < data + limit && TOKEN(p[1])) {
1243
+ p++;
1244
+ }
1281
1245
  break;
1246
+ }
1282
1247
 
1283
1248
  case h_C:
1284
1249
  parser->index++;
@@ -1376,23 +1341,18 @@ size_t http_parser_execute (http_parser *parser,
1376
1341
  assert(0 && "Unknown header_state");
1377
1342
  break;
1378
1343
  }
1379
- break;
1380
1344
  }
1381
1345
 
1382
- if (ch == ':') {
1383
- parser->state = s_header_value_start;
1384
- CALLBACK_DATA(header_field);
1346
+ if (p == data + len) {
1347
+ --p;
1348
+ COUNT_HEADER_SIZE(p - start);
1385
1349
  break;
1386
1350
  }
1387
1351
 
1388
- if (ch == CR) {
1389
- parser->state = s_header_almost_done;
1390
- CALLBACK_DATA(header_field);
1391
- break;
1392
- }
1352
+ COUNT_HEADER_SIZE(p - start);
1393
1353
 
1394
- if (ch == LF) {
1395
- parser->state = s_header_field_start;
1354
+ if (ch == ':') {
1355
+ UPDATE_STATE(s_header_value_discard_ws);
1396
1356
  CALLBACK_DATA(header_field);
1397
1357
  break;
1398
1358
  }
@@ -1401,28 +1361,28 @@ size_t http_parser_execute (http_parser *parser,
1401
1361
  goto error;
1402
1362
  }
1403
1363
 
1404
- case s_header_value_start:
1405
- {
1364
+ case s_header_value_discard_ws:
1406
1365
  if (ch == ' ' || ch == '\t') break;
1407
1366
 
1408
- MARK(header_value);
1409
-
1410
- parser->state = s_header_value;
1411
- parser->index = 0;
1412
-
1413
1367
  if (ch == CR) {
1414
- parser->header_state = h_general;
1415
- parser->state = s_header_almost_done;
1416
- CALLBACK_DATA(header_value);
1368
+ UPDATE_STATE(s_header_value_discard_ws_almost_done);
1417
1369
  break;
1418
1370
  }
1419
1371
 
1420
1372
  if (ch == LF) {
1421
- parser->state = s_header_field_start;
1422
- CALLBACK_DATA(header_value);
1373
+ UPDATE_STATE(s_header_value_discard_lws);
1423
1374
  break;
1424
1375
  }
1425
1376
 
1377
+ /* fall through */
1378
+
1379
+ case s_header_value_start:
1380
+ {
1381
+ MARK(header_value);
1382
+
1383
+ UPDATE_STATE(s_header_value);
1384
+ parser->index = 0;
1385
+
1426
1386
  c = LOWER(ch);
1427
1387
 
1428
1388
  switch (parser->header_state) {
@@ -1441,12 +1401,19 @@ size_t http_parser_execute (http_parser *parser,
1441
1401
  break;
1442
1402
 
1443
1403
  case h_content_length:
1444
- if (!IS_NUM(ch)) {
1404
+ if (UNLIKELY(!IS_NUM(ch))) {
1445
1405
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1446
1406
  goto error;
1447
1407
  }
1448
1408
 
1409
+ if (parser->flags & F_CONTENTLENGTH) {
1410
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1411
+ goto error;
1412
+ }
1413
+
1414
+ parser->flags |= F_CONTENTLENGTH;
1449
1415
  parser->content_length = ch - '0';
1416
+ parser->header_state = h_content_length_num;
1450
1417
  break;
1451
1418
 
1452
1419
  case h_connection:
@@ -1456,11 +1423,17 @@ size_t http_parser_execute (http_parser *parser,
1456
1423
  /* looking for 'Connection: close' */
1457
1424
  } else if (c == 'c') {
1458
1425
  parser->header_state = h_matching_connection_close;
1426
+ } else if (c == 'u') {
1427
+ parser->header_state = h_matching_connection_upgrade;
1459
1428
  } else {
1460
- parser->header_state = h_general;
1429
+ parser->header_state = h_matching_connection_token;
1461
1430
  }
1462
1431
  break;
1463
1432
 
1433
+ /* Multi-value `Connection` header */
1434
+ case h_matching_connection_token_start:
1435
+ break;
1436
+
1464
1437
  default:
1465
1438
  parser->header_state = h_general;
1466
1439
  break;
@@ -1470,107 +1443,227 @@ size_t http_parser_execute (http_parser *parser,
1470
1443
 
1471
1444
  case s_header_value:
1472
1445
  {
1446
+ const char* start = p;
1447
+ enum header_states h_state = (enum header_states) parser->header_state;
1448
+ for (; p != data + len; p++) {
1449
+ ch = *p;
1450
+ if (ch == CR) {
1451
+ UPDATE_STATE(s_header_almost_done);
1452
+ parser->header_state = h_state;
1453
+ CALLBACK_DATA(header_value);
1454
+ break;
1455
+ }
1473
1456
 
1474
- if (ch == CR) {
1475
- parser->state = s_header_almost_done;
1476
- CALLBACK_DATA(header_value);
1477
- break;
1478
- }
1457
+ if (ch == LF) {
1458
+ UPDATE_STATE(s_header_almost_done);
1459
+ COUNT_HEADER_SIZE(p - start);
1460
+ parser->header_state = h_state;
1461
+ CALLBACK_DATA_NOADVANCE(header_value);
1462
+ REEXECUTE();
1463
+ }
1479
1464
 
1480
- if (ch == LF) {
1481
- parser->state = s_header_almost_done;
1482
- CALLBACK_DATA_NOADVANCE(header_value);
1483
- goto reexecute_byte;
1484
- }
1465
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1466
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1467
+ goto error;
1468
+ }
1485
1469
 
1486
- c = LOWER(ch);
1470
+ c = LOWER(ch);
1487
1471
 
1488
- switch (parser->header_state) {
1489
- case h_general:
1490
- break;
1472
+ switch (h_state) {
1473
+ case h_general:
1474
+ {
1475
+ const char* p_cr;
1476
+ const char* p_lf;
1477
+ size_t limit = data + len - p;
1478
+
1479
+ limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1480
+
1481
+ p_cr = (const char*) memchr(p, CR, limit);
1482
+ p_lf = (const char*) memchr(p, LF, limit);
1483
+ if (p_cr != NULL) {
1484
+ if (p_lf != NULL && p_cr >= p_lf)
1485
+ p = p_lf;
1486
+ else
1487
+ p = p_cr;
1488
+ } else if (UNLIKELY(p_lf != NULL)) {
1489
+ p = p_lf;
1490
+ } else {
1491
+ p = data + len;
1492
+ }
1493
+ --p;
1494
+ break;
1495
+ }
1491
1496
 
1492
- case h_connection:
1493
- case h_transfer_encoding:
1494
- assert(0 && "Shouldn't get here.");
1495
- break;
1497
+ case h_connection:
1498
+ case h_transfer_encoding:
1499
+ assert(0 && "Shouldn't get here.");
1500
+ break;
1496
1501
 
1497
- case h_content_length:
1498
- {
1499
- uint64_t t;
1502
+ case h_content_length:
1503
+ if (ch == ' ') break;
1504
+ h_state = h_content_length_num;
1505
+ /* fall through */
1500
1506
 
1501
- if (ch == ' ') break;
1507
+ case h_content_length_num:
1508
+ {
1509
+ uint64_t t;
1502
1510
 
1503
- if (!IS_NUM(ch)) {
1504
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1505
- goto error;
1506
- }
1511
+ if (ch == ' ') {
1512
+ h_state = h_content_length_ws;
1513
+ break;
1514
+ }
1515
+
1516
+ if (UNLIKELY(!IS_NUM(ch))) {
1517
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1518
+ parser->header_state = h_state;
1519
+ goto error;
1520
+ }
1521
+
1522
+ t = parser->content_length;
1523
+ t *= 10;
1524
+ t += ch - '0';
1507
1525
 
1508
- t = parser->content_length;
1509
- t *= 10;
1510
- t += ch - '0';
1526
+ /* Overflow? Test against a conservative limit for simplicity. */
1527
+ if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1528
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1529
+ parser->header_state = h_state;
1530
+ goto error;
1531
+ }
1511
1532
 
1512
- /* Overflow? */
1513
- if (t < parser->content_length || t == ULLONG_MAX) {
1533
+ parser->content_length = t;
1534
+ break;
1535
+ }
1536
+
1537
+ case h_content_length_ws:
1538
+ if (ch == ' ') break;
1514
1539
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1540
+ parser->header_state = h_state;
1515
1541
  goto error;
1516
- }
1517
1542
 
1518
- parser->content_length = t;
1519
- break;
1520
- }
1543
+ /* Transfer-Encoding: chunked */
1544
+ case h_matching_transfer_encoding_chunked:
1545
+ parser->index++;
1546
+ if (parser->index > sizeof(CHUNKED)-1
1547
+ || c != CHUNKED[parser->index]) {
1548
+ h_state = h_general;
1549
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1550
+ h_state = h_transfer_encoding_chunked;
1551
+ }
1552
+ break;
1521
1553
 
1522
- /* Transfer-Encoding: chunked */
1523
- case h_matching_transfer_encoding_chunked:
1524
- parser->index++;
1525
- if (parser->index > sizeof(CHUNKED)-1
1526
- || c != CHUNKED[parser->index]) {
1527
- parser->header_state = h_general;
1528
- } else if (parser->index == sizeof(CHUNKED)-2) {
1529
- parser->header_state = h_transfer_encoding_chunked;
1530
- }
1531
- break;
1554
+ case h_matching_connection_token_start:
1555
+ /* looking for 'Connection: keep-alive' */
1556
+ if (c == 'k') {
1557
+ h_state = h_matching_connection_keep_alive;
1558
+ /* looking for 'Connection: close' */
1559
+ } else if (c == 'c') {
1560
+ h_state = h_matching_connection_close;
1561
+ } else if (c == 'u') {
1562
+ h_state = h_matching_connection_upgrade;
1563
+ } else if (STRICT_TOKEN(c)) {
1564
+ h_state = h_matching_connection_token;
1565
+ } else if (c == ' ' || c == '\t') {
1566
+ /* Skip lws */
1567
+ } else {
1568
+ h_state = h_general;
1569
+ }
1570
+ break;
1532
1571
 
1533
- /* looking for 'Connection: keep-alive' */
1534
- case h_matching_connection_keep_alive:
1535
- parser->index++;
1536
- if (parser->index > sizeof(KEEP_ALIVE)-1
1537
- || c != KEEP_ALIVE[parser->index]) {
1538
- parser->header_state = h_general;
1539
- } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1540
- parser->header_state = h_connection_keep_alive;
1541
- }
1542
- break;
1572
+ /* looking for 'Connection: keep-alive' */
1573
+ case h_matching_connection_keep_alive:
1574
+ parser->index++;
1575
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1576
+ || c != KEEP_ALIVE[parser->index]) {
1577
+ h_state = h_matching_connection_token;
1578
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1579
+ h_state = h_connection_keep_alive;
1580
+ }
1581
+ break;
1543
1582
 
1544
- /* looking for 'Connection: close' */
1545
- case h_matching_connection_close:
1546
- parser->index++;
1547
- if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1548
- parser->header_state = h_general;
1549
- } else if (parser->index == sizeof(CLOSE)-2) {
1550
- parser->header_state = h_connection_close;
1551
- }
1552
- break;
1583
+ /* looking for 'Connection: close' */
1584
+ case h_matching_connection_close:
1585
+ parser->index++;
1586
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1587
+ h_state = h_matching_connection_token;
1588
+ } else if (parser->index == sizeof(CLOSE)-2) {
1589
+ h_state = h_connection_close;
1590
+ }
1591
+ break;
1553
1592
 
1554
- case h_transfer_encoding_chunked:
1555
- case h_connection_keep_alive:
1556
- case h_connection_close:
1557
- if (ch != ' ') parser->header_state = h_general;
1558
- break;
1593
+ /* looking for 'Connection: upgrade' */
1594
+ case h_matching_connection_upgrade:
1595
+ parser->index++;
1596
+ if (parser->index > sizeof(UPGRADE) - 1 ||
1597
+ c != UPGRADE[parser->index]) {
1598
+ h_state = h_matching_connection_token;
1599
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1600
+ h_state = h_connection_upgrade;
1601
+ }
1602
+ break;
1559
1603
 
1560
- default:
1561
- parser->state = s_header_value;
1562
- parser->header_state = h_general;
1563
- break;
1604
+ case h_matching_connection_token:
1605
+ if (ch == ',') {
1606
+ h_state = h_matching_connection_token_start;
1607
+ parser->index = 0;
1608
+ }
1609
+ break;
1610
+
1611
+ case h_transfer_encoding_chunked:
1612
+ if (ch != ' ') h_state = h_general;
1613
+ break;
1614
+
1615
+ case h_connection_keep_alive:
1616
+ case h_connection_close:
1617
+ case h_connection_upgrade:
1618
+ if (ch == ',') {
1619
+ if (h_state == h_connection_keep_alive) {
1620
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1621
+ } else if (h_state == h_connection_close) {
1622
+ parser->flags |= F_CONNECTION_CLOSE;
1623
+ } else if (h_state == h_connection_upgrade) {
1624
+ parser->flags |= F_CONNECTION_UPGRADE;
1625
+ }
1626
+ h_state = h_matching_connection_token_start;
1627
+ parser->index = 0;
1628
+ } else if (ch != ' ') {
1629
+ h_state = h_matching_connection_token;
1630
+ }
1631
+ break;
1632
+
1633
+ default:
1634
+ UPDATE_STATE(s_header_value);
1635
+ h_state = h_general;
1636
+ break;
1637
+ }
1564
1638
  }
1639
+ parser->header_state = h_state;
1640
+
1641
+ if (p == data + len)
1642
+ --p;
1643
+
1644
+ COUNT_HEADER_SIZE(p - start);
1565
1645
  break;
1566
1646
  }
1567
1647
 
1568
1648
  case s_header_almost_done:
1569
1649
  {
1570
- STRICT_CHECK(ch != LF);
1650
+ if (UNLIKELY(ch != LF)) {
1651
+ SET_ERRNO(HPE_LF_EXPECTED);
1652
+ goto error;
1653
+ }
1571
1654
 
1572
- parser->state = s_header_value_lws;
1655
+ UPDATE_STATE(s_header_value_lws);
1656
+ break;
1657
+ }
1573
1658
 
1659
+ case s_header_value_lws:
1660
+ {
1661
+ if (ch == ' ' || ch == '\t') {
1662
+ UPDATE_STATE(s_header_value_start);
1663
+ REEXECUTE();
1664
+ }
1665
+
1666
+ /* finished the header */
1574
1667
  switch (parser->header_state) {
1575
1668
  case h_connection_keep_alive:
1576
1669
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
@@ -1581,23 +1674,53 @@ size_t http_parser_execute (http_parser *parser,
1581
1674
  case h_transfer_encoding_chunked:
1582
1675
  parser->flags |= F_CHUNKED;
1583
1676
  break;
1677
+ case h_connection_upgrade:
1678
+ parser->flags |= F_CONNECTION_UPGRADE;
1679
+ break;
1584
1680
  default:
1585
1681
  break;
1586
1682
  }
1587
1683
 
1684
+ UPDATE_STATE(s_header_field_start);
1685
+ REEXECUTE();
1686
+ }
1687
+
1688
+ case s_header_value_discard_ws_almost_done:
1689
+ {
1690
+ STRICT_CHECK(ch != LF);
1691
+ UPDATE_STATE(s_header_value_discard_lws);
1588
1692
  break;
1589
1693
  }
1590
1694
 
1591
- case s_header_value_lws:
1695
+ case s_header_value_discard_lws:
1592
1696
  {
1593
- if (ch == ' ' || ch == '\t')
1594
- parser->state = s_header_value_start;
1595
- else
1596
- {
1597
- parser->state = s_header_field_start;
1598
- goto reexecute_byte;
1697
+ if (ch == ' ' || ch == '\t') {
1698
+ UPDATE_STATE(s_header_value_discard_ws);
1699
+ break;
1700
+ } else {
1701
+ switch (parser->header_state) {
1702
+ case h_connection_keep_alive:
1703
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1704
+ break;
1705
+ case h_connection_close:
1706
+ parser->flags |= F_CONNECTION_CLOSE;
1707
+ break;
1708
+ case h_connection_upgrade:
1709
+ parser->flags |= F_CONNECTION_UPGRADE;
1710
+ break;
1711
+ case h_transfer_encoding_chunked:
1712
+ parser->flags |= F_CHUNKED;
1713
+ break;
1714
+ default:
1715
+ break;
1716
+ }
1717
+
1718
+ /* header value was empty */
1719
+ MARK(header_value);
1720
+ UPDATE_STATE(s_header_field_start);
1721
+ CALLBACK_DATA_NOADVANCE(header_value);
1722
+ REEXECUTE();
1599
1723
  }
1600
- break;
1601
1724
  }
1602
1725
 
1603
1726
  case s_headers_almost_done:
@@ -1606,16 +1729,33 @@ size_t http_parser_execute (http_parser *parser,
1606
1729
 
1607
1730
  if (parser->flags & F_TRAILING) {
1608
1731
  /* End of a chunked request */
1609
- parser->state = NEW_MESSAGE();
1610
- CALLBACK_NOTIFY(message_complete);
1611
- break;
1732
+ UPDATE_STATE(s_message_done);
1733
+ CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1734
+ REEXECUTE();
1735
+ }
1736
+
1737
+ /* Cannot use chunked encoding and a content-length header together
1738
+ per the HTTP specification. */
1739
+ if ((parser->flags & F_CHUNKED) &&
1740
+ (parser->flags & F_CONTENTLENGTH)) {
1741
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1742
+ goto error;
1612
1743
  }
1613
1744
 
1614
- parser->state = s_headers_done;
1745
+ UPDATE_STATE(s_headers_done);
1615
1746
 
1616
1747
  /* Set this here so that on_headers_complete() callbacks can see it */
1617
- parser->upgrade =
1618
- (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1748
+ if ((parser->flags & F_UPGRADE) &&
1749
+ (parser->flags & F_CONNECTION_UPGRADE)) {
1750
+ /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1751
+ * mandatory only when it is a 101 Switching Protocols response,
1752
+ * otherwise it is purely informational, to announce support.
1753
+ */
1754
+ parser->upgrade =
1755
+ (parser->type == HTTP_REQUEST || parser->status_code == 101);
1756
+ } else {
1757
+ parser->upgrade = (parser->method == HTTP_CONNECT);
1758
+ }
1619
1759
 
1620
1760
  /* Here we call the headers_complete callback. This is somewhat
1621
1761
  * different than other callbacks because if the user returns 1, we
@@ -1631,59 +1771,67 @@ size_t http_parser_execute (http_parser *parser,
1631
1771
  case 0:
1632
1772
  break;
1633
1773
 
1774
+ case 2:
1775
+ parser->upgrade = 1;
1776
+
1777
+ /* fall through */
1634
1778
  case 1:
1635
1779
  parser->flags |= F_SKIPBODY;
1636
1780
  break;
1637
1781
 
1638
1782
  default:
1639
1783
  SET_ERRNO(HPE_CB_headers_complete);
1640
- return p - data; /* Error */
1784
+ RETURN(p - data); /* Error */
1641
1785
  }
1642
1786
  }
1643
1787
 
1644
1788
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1645
- return p - data;
1789
+ RETURN(p - data);
1646
1790
  }
1647
1791
 
1648
- goto reexecute_byte;
1792
+ REEXECUTE();
1649
1793
  }
1650
1794
 
1651
1795
  case s_headers_done:
1652
1796
  {
1797
+ int hasBody;
1653
1798
  STRICT_CHECK(ch != LF);
1654
1799
 
1655
1800
  parser->nread = 0;
1656
-
1657
- /* Exit, the rest of the connect is in a different protocol. */
1658
- if (parser->upgrade) {
1659
- parser->state = NEW_MESSAGE();
1801
+ nread = 0;
1802
+
1803
+ hasBody = parser->flags & F_CHUNKED ||
1804
+ (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1805
+ if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1806
+ (parser->flags & F_SKIPBODY) || !hasBody)) {
1807
+ /* Exit, the rest of the message is in a different protocol. */
1808
+ UPDATE_STATE(NEW_MESSAGE());
1660
1809
  CALLBACK_NOTIFY(message_complete);
1661
- return (p - data) + 1;
1810
+ RETURN((p - data) + 1);
1662
1811
  }
1663
1812
 
1664
1813
  if (parser->flags & F_SKIPBODY) {
1665
- parser->state = NEW_MESSAGE();
1814
+ UPDATE_STATE(NEW_MESSAGE());
1666
1815
  CALLBACK_NOTIFY(message_complete);
1667
1816
  } else if (parser->flags & F_CHUNKED) {
1668
1817
  /* chunked encoding - ignore Content-Length header */
1669
- parser->state = s_chunk_size_start;
1818
+ UPDATE_STATE(s_chunk_size_start);
1670
1819
  } else {
1671
1820
  if (parser->content_length == 0) {
1672
1821
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1673
- parser->state = NEW_MESSAGE();
1822
+ UPDATE_STATE(NEW_MESSAGE());
1674
1823
  CALLBACK_NOTIFY(message_complete);
1675
1824
  } else if (parser->content_length != ULLONG_MAX) {
1676
1825
  /* Content-Length header given and non-zero */
1677
- parser->state = s_body_identity;
1826
+ UPDATE_STATE(s_body_identity);
1678
1827
  } else {
1679
- if (parser->type == HTTP_REQUEST ||
1680
- !http_message_needs_eof(parser)) {
1828
+ if (!http_message_needs_eof(parser)) {
1681
1829
  /* Assume content-length 0 - read the next */
1682
- parser->state = NEW_MESSAGE();
1830
+ UPDATE_STATE(NEW_MESSAGE());
1683
1831
  CALLBACK_NOTIFY(message_complete);
1684
1832
  } else {
1685
1833
  /* Read body until EOF */
1686
- parser->state = s_body_identity_eof;
1834
+ UPDATE_STATE(s_body_identity_eof);
1687
1835
  }
1688
1836
  }
1689
1837
  }
@@ -1709,7 +1857,7 @@ size_t http_parser_execute (http_parser *parser,
1709
1857
  p += to_read - 1;
1710
1858
 
1711
1859
  if (parser->content_length == 0) {
1712
- parser->state = s_message_done;
1860
+ UPDATE_STATE(s_message_done);
1713
1861
 
1714
1862
  /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1715
1863
  *
@@ -1721,7 +1869,7 @@ size_t http_parser_execute (http_parser *parser,
1721
1869
  * important for applications, but let's keep it for now.
1722
1870
  */
1723
1871
  CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1724
- goto reexecute_byte;
1872
+ REEXECUTE();
1725
1873
  }
1726
1874
 
1727
1875
  break;
@@ -1735,23 +1883,27 @@ size_t http_parser_execute (http_parser *parser,
1735
1883
  break;
1736
1884
 
1737
1885
  case s_message_done:
1738
- parser->state = NEW_MESSAGE();
1886
+ UPDATE_STATE(NEW_MESSAGE());
1739
1887
  CALLBACK_NOTIFY(message_complete);
1888
+ if (parser->upgrade) {
1889
+ /* Exit, the rest of the message is in a different protocol. */
1890
+ RETURN((p - data) + 1);
1891
+ }
1740
1892
  break;
1741
1893
 
1742
1894
  case s_chunk_size_start:
1743
1895
  {
1744
- assert(parser->nread == 1);
1896
+ assert(nread == 1);
1745
1897
  assert(parser->flags & F_CHUNKED);
1746
1898
 
1747
1899
  unhex_val = unhex[(unsigned char)ch];
1748
- if (unhex_val == -1) {
1900
+ if (UNLIKELY(unhex_val == -1)) {
1749
1901
  SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1750
1902
  goto error;
1751
1903
  }
1752
1904
 
1753
1905
  parser->content_length = unhex_val;
1754
- parser->state = s_chunk_size;
1906
+ UPDATE_STATE(s_chunk_size);
1755
1907
  break;
1756
1908
  }
1757
1909
 
@@ -1762,7 +1914,7 @@ size_t http_parser_execute (http_parser *parser,
1762
1914
  assert(parser->flags & F_CHUNKED);
1763
1915
 
1764
1916
  if (ch == CR) {
1765
- parser->state = s_chunk_size_almost_done;
1917
+ UPDATE_STATE(s_chunk_size_almost_done);
1766
1918
  break;
1767
1919
  }
1768
1920
 
@@ -1770,7 +1922,7 @@ size_t http_parser_execute (http_parser *parser,
1770
1922
 
1771
1923
  if (unhex_val == -1) {
1772
1924
  if (ch == ';' || ch == ' ') {
1773
- parser->state = s_chunk_parameters;
1925
+ UPDATE_STATE(s_chunk_parameters);
1774
1926
  break;
1775
1927
  }
1776
1928
 
@@ -1782,8 +1934,8 @@ size_t http_parser_execute (http_parser *parser,
1782
1934
  t *= 16;
1783
1935
  t += unhex_val;
1784
1936
 
1785
- /* Overflow? */
1786
- if (t < parser->content_length || t == ULLONG_MAX) {
1937
+ /* Overflow? Test against a conservative limit for simplicity. */
1938
+ if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1787
1939
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1788
1940
  goto error;
1789
1941
  }
@@ -1797,7 +1949,7 @@ size_t http_parser_execute (http_parser *parser,
1797
1949
  assert(parser->flags & F_CHUNKED);
1798
1950
  /* just ignore this shit. TODO check for overflow */
1799
1951
  if (ch == CR) {
1800
- parser->state = s_chunk_size_almost_done;
1952
+ UPDATE_STATE(s_chunk_size_almost_done);
1801
1953
  break;
1802
1954
  }
1803
1955
  break;
@@ -1809,13 +1961,15 @@ size_t http_parser_execute (http_parser *parser,
1809
1961
  STRICT_CHECK(ch != LF);
1810
1962
 
1811
1963
  parser->nread = 0;
1964
+ nread = 0;
1812
1965
 
1813
1966
  if (parser->content_length == 0) {
1814
1967
  parser->flags |= F_TRAILING;
1815
- parser->state = s_header_field_start;
1968
+ UPDATE_STATE(s_header_field_start);
1816
1969
  } else {
1817
- parser->state = s_chunk_data;
1970
+ UPDATE_STATE(s_chunk_data);
1818
1971
  }
1972
+ CALLBACK_NOTIFY(chunk_header);
1819
1973
  break;
1820
1974
  }
1821
1975
 
@@ -1836,7 +1990,7 @@ size_t http_parser_execute (http_parser *parser,
1836
1990
  p += to_read - 1;
1837
1991
 
1838
1992
  if (parser->content_length == 0) {
1839
- parser->state = s_chunk_data_almost_done;
1993
+ UPDATE_STATE(s_chunk_data_almost_done);
1840
1994
  }
1841
1995
 
1842
1996
  break;
@@ -1846,7 +2000,7 @@ size_t http_parser_execute (http_parser *parser,
1846
2000
  assert(parser->flags & F_CHUNKED);
1847
2001
  assert(parser->content_length == 0);
1848
2002
  STRICT_CHECK(ch != CR);
1849
- parser->state = s_chunk_data_done;
2003
+ UPDATE_STATE(s_chunk_data_done);
1850
2004
  CALLBACK_DATA(body);
1851
2005
  break;
1852
2006
 
@@ -1854,7 +2008,9 @@ size_t http_parser_execute (http_parser *parser,
1854
2008
  assert(parser->flags & F_CHUNKED);
1855
2009
  STRICT_CHECK(ch != LF);
1856
2010
  parser->nread = 0;
1857
- parser->state = s_chunk_size_start;
2011
+ nread = 0;
2012
+ UPDATE_STATE(s_chunk_size_start);
2013
+ CALLBACK_NOTIFY(chunk_complete);
1858
2014
  break;
1859
2015
 
1860
2016
  default:
@@ -1864,7 +2020,7 @@ size_t http_parser_execute (http_parser *parser,
1864
2020
  }
1865
2021
  }
1866
2022
 
1867
- /* Run callbacks for any marks that we have leftover after we ran our of
2023
+ /* Run callbacks for any marks that we have leftover after we ran out of
1868
2024
  * bytes. There should be at most one of these set, so it's OK to invoke
1869
2025
  * them in series (unset marks will not result in callbacks).
1870
2026
  *
@@ -1886,14 +2042,14 @@ size_t http_parser_execute (http_parser *parser,
1886
2042
  CALLBACK_DATA_NOADVANCE(body);
1887
2043
  CALLBACK_DATA_NOADVANCE(status);
1888
2044
 
1889
- return len;
2045
+ RETURN(len);
1890
2046
 
1891
2047
  error:
1892
2048
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1893
2049
  SET_ERRNO(HPE_UNKNOWN);
1894
2050
  }
1895
2051
 
1896
- return (p - data);
2052
+ RETURN(p - data);
1897
2053
  }
1898
2054
 
1899
2055
 
@@ -1946,6 +2102,16 @@ http_method_str (enum http_method m)
1946
2102
  return ELEM_AT(method_strings, m, "<unknown>");
1947
2103
  }
1948
2104
 
2105
+ const char *
2106
+ http_status_str (enum http_status s)
2107
+ {
2108
+ switch (s) {
2109
+ #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2110
+ HTTP_STATUS_MAP(XX)
2111
+ #undef XX
2112
+ default: return "<unknown>";
2113
+ }
2114
+ }
1949
2115
 
1950
2116
  void
1951
2117
  http_parser_init (http_parser *parser, enum http_parser_type t)
@@ -1958,15 +2124,21 @@ http_parser_init (http_parser *parser, enum http_parser_type t)
1958
2124
  parser->http_errno = HPE_OK;
1959
2125
  }
1960
2126
 
2127
+ void
2128
+ http_parser_settings_init(http_parser_settings *settings)
2129
+ {
2130
+ memset(settings, 0, sizeof(*settings));
2131
+ }
2132
+
1961
2133
  const char *
1962
2134
  http_errno_name(enum http_errno err) {
1963
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2135
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1964
2136
  return http_strerror_tab[err].name;
1965
2137
  }
1966
2138
 
1967
2139
  const char *
1968
2140
  http_errno_description(enum http_errno err) {
1969
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2141
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1970
2142
  return http_strerror_tab[err].description;
1971
2143
  }
1972
2144
 
@@ -2000,7 +2172,7 @@ http_parse_host_char(enum http_host_state s, const char ch) {
2000
2172
  return s_http_host;
2001
2173
  }
2002
2174
 
2003
- /* FALLTHROUGH */
2175
+ /* fall through */
2004
2176
  case s_http_host_v6_end:
2005
2177
  if (ch == ':') {
2006
2178
  return s_http_host_port_start;
@@ -2013,12 +2185,29 @@ http_parse_host_char(enum http_host_state s, const char ch) {
2013
2185
  return s_http_host_v6_end;
2014
2186
  }
2015
2187
 
2016
- /* FALLTHROUGH */
2188
+ /* fall through */
2017
2189
  case s_http_host_v6_start:
2018
2190
  if (IS_HEX(ch) || ch == ':' || ch == '.') {
2019
2191
  return s_http_host_v6;
2020
2192
  }
2021
2193
 
2194
+ if (s == s_http_host_v6 && ch == '%') {
2195
+ return s_http_host_v6_zone_start;
2196
+ }
2197
+ break;
2198
+
2199
+ case s_http_host_v6_zone:
2200
+ if (ch == ']') {
2201
+ return s_http_host_v6_end;
2202
+ }
2203
+
2204
+ /* fall through */
2205
+ case s_http_host_v6_zone_start:
2206
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2207
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2208
+ ch == '~') {
2209
+ return s_http_host_v6_zone;
2210
+ }
2022
2211
  break;
2023
2212
 
2024
2213
  case s_http_host_port:
@@ -2042,6 +2231,8 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2042
2231
  const char *p;
2043
2232
  size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2044
2233
 
2234
+ assert(u->field_set & (1 << UF_HOST));
2235
+
2045
2236
  u->field_data[UF_HOST].len = 0;
2046
2237
 
2047
2238
  s = found_at ? s_http_userinfo_start : s_http_host_start;
@@ -2068,6 +2259,11 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2068
2259
  u->field_data[UF_HOST].len++;
2069
2260
  break;
2070
2261
 
2262
+ case s_http_host_v6_zone_start:
2263
+ case s_http_host_v6_zone:
2264
+ u->field_data[UF_HOST].len++;
2265
+ break;
2266
+
2071
2267
  case s_http_host_port:
2072
2268
  if (s != s_http_host_port) {
2073
2269
  u->field_data[UF_PORT].off = p - buf;
@@ -2097,6 +2293,8 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2097
2293
  case s_http_host_start:
2098
2294
  case s_http_host_v6_start:
2099
2295
  case s_http_host_v6:
2296
+ case s_http_host_v6_zone_start:
2297
+ case s_http_host_v6_zone:
2100
2298
  case s_http_host_port_start:
2101
2299
  case s_http_userinfo:
2102
2300
  case s_http_userinfo_start:
@@ -2108,6 +2306,11 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2108
2306
  return 0;
2109
2307
  }
2110
2308
 
2309
+ void
2310
+ http_parser_url_init(struct http_parser_url *u) {
2311
+ memset(u, 0, sizeof(*u));
2312
+ }
2313
+
2111
2314
  int
2112
2315
  http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2113
2316
  struct http_parser_url *u)
@@ -2117,9 +2320,13 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2117
2320
  enum http_parser_url_fields uf, old_uf;
2118
2321
  int found_at = 0;
2119
2322
 
2323
+ if (buflen == 0) {
2324
+ return 1;
2325
+ }
2326
+
2120
2327
  u->port = u->field_set = 0;
2121
2328
  s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2122
- uf = old_uf = UF_MAX;
2329
+ old_uf = UF_MAX;
2123
2330
 
2124
2331
  for (p = buf; p < buf + buflen; p++) {
2125
2332
  s = parse_url_char(s, *p);
@@ -2144,7 +2351,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2144
2351
  case s_req_server_with_at:
2145
2352
  found_at = 1;
2146
2353
 
2147
- /* FALLTROUGH */
2354
+ /* fall through */
2148
2355
  case s_req_server:
2149
2356
  uf = UF_HOST;
2150
2357
  break;
@@ -2181,7 +2388,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2181
2388
 
2182
2389
  /* host must be present if there is a schema */
2183
2390
  /* parsing http:///toto will fail */
2184
- if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2391
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
2392
+ (u->field_set & (1 << UF_HOST)) == 0) {
2393
+ return 1;
2394
+ }
2395
+
2396
+ if (u->field_set & (1 << UF_HOST)) {
2185
2397
  if (http_parse_host(buf, u, found_at) != 0) {
2186
2398
  return 1;
2187
2399
  }
@@ -2193,12 +2405,27 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2193
2405
  }
2194
2406
 
2195
2407
  if (u->field_set & (1 << UF_PORT)) {
2196
- /* Don't bother with endp; we've already validated the string */
2197
- unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2198
-
2199
- /* Ports have a max value of 2^16 */
2200
- if (v > 0xffff) {
2201
- return 1;
2408
+ uint16_t off;
2409
+ uint16_t len;
2410
+ const char* p;
2411
+ const char* end;
2412
+ unsigned long v;
2413
+
2414
+ off = u->field_data[UF_PORT].off;
2415
+ len = u->field_data[UF_PORT].len;
2416
+ end = buf + off + len;
2417
+
2418
+ /* NOTE: The characters are already validated and are in the [0-9] range */
2419
+ assert(off + len <= buflen && "Port number overflow");
2420
+ v = 0;
2421
+ for (p = buf + off; p < end; p++) {
2422
+ v *= 10;
2423
+ v += *p - '0';
2424
+
2425
+ /* Ports have a max value of 2^16 */
2426
+ if (v > 0xffff) {
2427
+ return 1;
2428
+ }
2202
2429
  }
2203
2430
 
2204
2431
  u->port = (uint16_t) v;
@@ -2215,6 +2442,7 @@ http_parser_pause(http_parser *parser, int paused) {
2215
2442
  */
2216
2443
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2217
2444
  HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2445
+ uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2218
2446
  SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2219
2447
  } else {
2220
2448
  assert(0 && "Attempting to pause parser in error state");