http-parser 1.0.3 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4ee24befc2a0a23054f11edb4379b9b6b1f8d21a
4
- data.tar.gz: 816280693464c8c5daa1c7fb20fe4954c3dd1a26
2
+ SHA256:
3
+ metadata.gz: e23f8fd15e7969bbac10a4e34f3f0fcab492ba503f2f8bdf14a4134fd3c3dd0c
4
+ data.tar.gz: d20b92acb6465ebaaf9bc8a8843241f6730e4a4ecfde5f198d6c5b389bd5ce9b
5
5
  SHA512:
6
- metadata.gz: 1846519857ad32e3f688cf18f28fb3dd2cd33fa4c0a0909d6a2c934218b1ae5df30101b54a8b100cb8caaf28482a964e8e17ef5594abc91fddcb575d67ac5091
7
- data.tar.gz: 82c5dba81e1673ab22497e5e1adda9d6f89ab115795747bc64a0043ced1511880c65d003b379a4f35e89976d92bdf793933bf196b2002876edb431f2c2dcc4c6
6
+ metadata.gz: ff9a8fb170d22563613b4743079d6104fd88bbf7d37b94ba71148567291c086a435026c360e59b53c1513d85a4a8c0942e938c5ca9e68728820bea551e18f20c
7
+ data.tar.gz: 1bb252fcae0be2ab448812f5070b29b67b07839ae19fe091febcd31078fa08712efbce6cfbb76103edd854a8b588960b6d0403c4a591e734c765733284435878
@@ -3,6 +3,6 @@ require 'ffi-compiler/compile_task'
3
3
  FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
4
  t.cflags << "-Wall -Wextra -O3"
5
5
  t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
- t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
- t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
6
+ t.cflags << "-arch x86_64" if t.platform.mac?
7
+ t.ldflags << "-arch x86_64" if t.platform.mac?
8
8
  end
@@ -1,7 +1,4 @@
1
- /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
- *
3
- * Additional changes are licensed under the same terms as NGINX and
4
- * copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Copyright Joyent, Inc. and other Node contributors.
5
2
  *
6
3
  * Permission is hereby granted, free of charge, to any person obtaining a copy
7
4
  * of this software and associated documentation files (the "Software"), to
@@ -25,10 +22,11 @@
25
22
  #include <assert.h>
26
23
  #include <stddef.h>
27
24
  #include <ctype.h>
28
- #include <stdlib.h>
29
25
  #include <string.h>
30
26
  #include <limits.h>
31
27
 
28
+ static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
29
+
32
30
  #ifndef ULLONG_MAX
33
31
  # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
32
  #endif
@@ -53,22 +51,45 @@
53
51
 
54
52
  #define SET_ERRNO(e) \
55
53
  do { \
54
+ parser->nread = nread; \
56
55
  parser->http_errno = (e); \
57
56
  } while(0)
58
57
 
58
+ #define CURRENT_STATE() p_state
59
+ #define UPDATE_STATE(V) p_state = (enum state) (V);
60
+ #define RETURN(V) \
61
+ do { \
62
+ parser->nread = nread; \
63
+ parser->state = CURRENT_STATE(); \
64
+ return (V); \
65
+ } while (0);
66
+ #define REEXECUTE() \
67
+ goto reexecute; \
68
+
69
+
70
+ #ifdef __GNUC__
71
+ # define LIKELY(X) __builtin_expect(!!(X), 1)
72
+ # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73
+ #else
74
+ # define LIKELY(X) (X)
75
+ # define UNLIKELY(X) (X)
76
+ #endif
77
+
59
78
 
60
79
  /* Run the notify callback FOR, returning ER if it fails */
61
80
  #define CALLBACK_NOTIFY_(FOR, ER) \
62
81
  do { \
63
82
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
83
  \
65
- if (settings->on_##FOR) { \
66
- if (0 != settings->on_##FOR(parser)) { \
84
+ if (LIKELY(settings->on_##FOR)) { \
85
+ parser->state = CURRENT_STATE(); \
86
+ if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
67
87
  SET_ERRNO(HPE_CB_##FOR); \
68
88
  } \
89
+ UPDATE_STATE(parser->state); \
69
90
  \
70
91
  /* We either errored above or got paused; get out */ \
71
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
92
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
72
93
  return (ER); \
73
94
  } \
74
95
  } \
@@ -86,20 +107,23 @@ do { \
86
107
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
108
  \
88
109
  if (FOR##_mark) { \
89
- if (settings->on_##FOR) { \
90
- if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
110
+ if (LIKELY(settings->on_##FOR)) { \
111
+ parser->state = CURRENT_STATE(); \
112
+ if (UNLIKELY(0 != \
113
+ settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
91
114
  SET_ERRNO(HPE_CB_##FOR); \
92
115
  } \
116
+ UPDATE_STATE(parser->state); \
93
117
  \
94
118
  /* We either errored above or got paused; get out */ \
95
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
119
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
96
120
  return (ER); \
97
121
  } \
98
122
  } \
99
123
  FOR##_mark = NULL; \
100
124
  } \
101
125
  } while (0)
102
-
126
+
103
127
  /* Run the data callback FOR and consume the current byte */
104
128
  #define CALLBACK_DATA(FOR) \
105
129
  CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
@@ -116,6 +140,26 @@ do { \
116
140
  } \
117
141
  } while (0)
118
142
 
143
+ /* Don't allow the total size of the HTTP headers (including the status
144
+ * line) to exceed max_header_size. This check is here to protect
145
+ * embedders against denial-of-service attacks where the attacker feeds
146
+ * us a never-ending header that the embedder keeps buffering.
147
+ *
148
+ * This check is arguably the responsibility of embedders but we're doing
149
+ * it on the embedder's behalf because most won't bother and this way we
150
+ * make the web a little safer. max_header_size is still far bigger
151
+ * than any reasonable request or response so this should never affect
152
+ * day-to-day operation.
153
+ */
154
+ #define COUNT_HEADER_SIZE(V) \
155
+ do { \
156
+ nread += (uint32_t)(V); \
157
+ if (UNLIKELY(nread > max_header_size)) { \
158
+ SET_ERRNO(HPE_HEADER_OVERFLOW); \
159
+ goto error; \
160
+ } \
161
+ } while (0)
162
+
119
163
 
120
164
  #define PROXY_CONNECTION "proxy-connection"
121
165
  #define CONNECTION "connection"
@@ -152,7 +196,7 @@ static const char tokens[256] = {
152
196
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153
197
  0, 0, 0, 0, 0, 0, 0, 0,
154
198
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155
- 0, '!', 0, '#', '$', '%', '&', '\'',
199
+ ' ', '!', 0, '#', '$', '%', '&', '\'',
156
200
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157
201
  0, 0, '*', '+', 0, '-', '.', 0,
158
202
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
@@ -242,12 +286,13 @@ enum state
242
286
  , s_res_HT
243
287
  , s_res_HTT
244
288
  , s_res_HTTP
245
- , s_res_first_http_major
246
289
  , s_res_http_major
247
- , s_res_first_http_minor
290
+ , s_res_http_dot
248
291
  , s_res_http_minor
292
+ , s_res_http_end
249
293
  , s_res_first_status_code
250
294
  , s_res_status_code
295
+ , s_res_status_start
251
296
  , s_res_status
252
297
  , s_res_line_almost_done
253
298
 
@@ -271,14 +316,19 @@ enum state
271
316
  , s_req_http_HT
272
317
  , s_req_http_HTT
273
318
  , s_req_http_HTTP
274
- , s_req_first_http_major
319
+ , s_req_http_I
320
+ , s_req_http_IC
275
321
  , s_req_http_major
276
- , s_req_first_http_minor
322
+ , s_req_http_dot
277
323
  , s_req_http_minor
324
+ , s_req_http_end
278
325
  , s_req_line_almost_done
279
326
 
280
327
  , s_header_field_start
281
328
  , s_header_field
329
+ , s_header_value_discard_ws
330
+ , s_header_value_discard_ws_almost_done
331
+ , s_header_value_discard_lws
282
332
  , s_header_value_start
283
333
  , s_header_value
284
334
  , s_header_value_lws
@@ -326,16 +376,25 @@ enum header_states
326
376
 
327
377
  , h_connection
328
378
  , h_content_length
379
+ , h_content_length_num
380
+ , h_content_length_ws
329
381
  , h_transfer_encoding
330
382
  , h_upgrade
331
383
 
384
+ , h_matching_transfer_encoding_token_start
332
385
  , h_matching_transfer_encoding_chunked
386
+ , h_matching_transfer_encoding_token
387
+
388
+ , h_matching_connection_token_start
333
389
  , h_matching_connection_keep_alive
334
390
  , h_matching_connection_close
391
+ , h_matching_connection_upgrade
392
+ , h_matching_connection_token
335
393
 
336
394
  , h_transfer_encoding_chunked
337
395
  , h_connection_keep_alive
338
396
  , h_connection_close
397
+ , h_connection_upgrade
339
398
  };
340
399
 
341
400
  enum http_host_state
@@ -348,6 +407,8 @@ enum http_host_state
348
407
  , s_http_host
349
408
  , s_http_host_v6
350
409
  , s_http_host_v6_end
410
+ , s_http_host_v6_zone_start
411
+ , s_http_host_v6_zone
351
412
  , s_http_host_port_start
352
413
  , s_http_host_port
353
414
  };
@@ -367,18 +428,26 @@ enum http_host_state
367
428
  (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
368
429
  (c) == '$' || (c) == ',')
369
430
 
431
+ #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
432
+
370
433
  #if HTTP_PARSER_STRICT
371
- #define TOKEN(c) (tokens[(unsigned char)c])
434
+ #define TOKEN(c) STRICT_TOKEN(c)
372
435
  #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
373
436
  #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
374
437
  #else
375
- #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
438
+ #define TOKEN(c) tokens[(unsigned char)c]
376
439
  #define IS_URL_CHAR(c) \
377
440
  (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
378
441
  #define IS_HOST_CHAR(c) \
379
442
  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
380
443
  #endif
381
444
 
445
+ /**
446
+ * Verify that a char is a valid visible (printable) US-ASCII
447
+ * character or %x80-FF
448
+ **/
449
+ #define IS_HEADER_CHAR(ch) \
450
+ (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
382
451
 
383
452
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
384
453
 
@@ -480,7 +549,7 @@ parse_url_char(enum state s, const char ch)
480
549
  return s_dead;
481
550
  }
482
551
 
483
- /* FALLTHROUGH */
552
+ /* fall through */
484
553
  case s_req_server_start:
485
554
  case s_req_server:
486
555
  if (ch == '/') {
@@ -581,6 +650,12 @@ size_t http_parser_execute (http_parser *parser,
581
650
  const char *header_value_mark = 0;
582
651
  const char *url_mark = 0;
583
652
  const char *body_mark = 0;
653
+ const char *status_mark = 0;
654
+ enum state p_state = (enum state) parser->state;
655
+ const unsigned int lenient = parser->lenient_http_headers;
656
+ const unsigned int allow_chunked_length = parser->allow_chunked_length;
657
+
658
+ uint32_t nread = parser->nread;
584
659
 
585
660
  /* We're in an error state. Don't bother doing anything. */
586
661
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
@@ -588,7 +663,7 @@ size_t http_parser_execute (http_parser *parser,
588
663
  }
589
664
 
590
665
  if (len == 0) {
591
- switch (parser->state) {
666
+ switch (CURRENT_STATE()) {
592
667
  case s_body_identity_eof:
593
668
  /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
594
669
  * we got paused.
@@ -609,11 +684,11 @@ size_t http_parser_execute (http_parser *parser,
609
684
  }
610
685
 
611
686
 
612
- if (parser->state == s_header_field)
687
+ if (CURRENT_STATE() == s_header_field)
613
688
  header_field_mark = data;
614
- if (parser->state == s_header_value)
689
+ if (CURRENT_STATE() == s_header_value)
615
690
  header_value_mark = data;
616
- switch (parser->state) {
691
+ switch (CURRENT_STATE()) {
617
692
  case s_req_path:
618
693
  case s_req_schema:
619
694
  case s_req_schema_slash:
@@ -627,38 +702,27 @@ size_t http_parser_execute (http_parser *parser,
627
702
  case s_req_fragment:
628
703
  url_mark = data;
629
704
  break;
705
+ case s_res_status:
706
+ status_mark = data;
707
+ break;
708
+ default:
709
+ break;
630
710
  }
631
711
 
632
712
  for (p=data; p != data + len; p++) {
633
713
  ch = *p;
634
714
 
635
- if (PARSING_HEADER(parser->state)) {
636
- ++parser->nread;
637
- /* Don't allow the total size of the HTTP headers (including the status
638
- * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
639
- * embedders against denial-of-service attacks where the attacker feeds
640
- * us a never-ending header that the embedder keeps buffering.
641
- *
642
- * This check is arguably the responsibility of embedders but we're doing
643
- * it on the embedder's behalf because most won't bother and this way we
644
- * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
645
- * than any reasonable request or response so this should never affect
646
- * day-to-day operation.
647
- */
648
- if (parser->nread > HTTP_MAX_HEADER_SIZE) {
649
- SET_ERRNO(HPE_HEADER_OVERFLOW);
650
- goto error;
651
- }
652
- }
715
+ if (PARSING_HEADER(CURRENT_STATE()))
716
+ COUNT_HEADER_SIZE(1);
653
717
 
654
- reexecute_byte:
655
- switch (parser->state) {
718
+ reexecute:
719
+ switch (CURRENT_STATE()) {
656
720
 
657
721
  case s_dead:
658
722
  /* this state is used after a 'Connection: close' message
659
723
  * the parser will error out if it reads another message
660
724
  */
661
- if (ch == CR || ch == LF)
725
+ if (LIKELY(ch == CR || ch == LF))
662
726
  break;
663
727
 
664
728
  SET_ERRNO(HPE_CLOSED_CONNECTION);
@@ -669,16 +733,17 @@ size_t http_parser_execute (http_parser *parser,
669
733
  if (ch == CR || ch == LF)
670
734
  break;
671
735
  parser->flags = 0;
736
+ parser->uses_transfer_encoding = 0;
672
737
  parser->content_length = ULLONG_MAX;
673
738
 
674
739
  if (ch == 'H') {
675
- parser->state = s_res_or_resp_H;
740
+ UPDATE_STATE(s_res_or_resp_H);
676
741
 
677
742
  CALLBACK_NOTIFY(message_begin);
678
743
  } else {
679
744
  parser->type = HTTP_REQUEST;
680
- parser->state = s_start_req;
681
- goto reexecute_byte;
745
+ UPDATE_STATE(s_start_req);
746
+ REEXECUTE();
682
747
  }
683
748
 
684
749
  break;
@@ -687,9 +752,9 @@ size_t http_parser_execute (http_parser *parser,
687
752
  case s_res_or_resp_H:
688
753
  if (ch == 'T') {
689
754
  parser->type = HTTP_RESPONSE;
690
- parser->state = s_res_HT;
755
+ UPDATE_STATE(s_res_HT);
691
756
  } else {
692
- if (ch != 'E') {
757
+ if (UNLIKELY(ch != 'E')) {
693
758
  SET_ERRNO(HPE_INVALID_CONSTANT);
694
759
  goto error;
695
760
  }
@@ -697,27 +762,23 @@ size_t http_parser_execute (http_parser *parser,
697
762
  parser->type = HTTP_REQUEST;
698
763
  parser->method = HTTP_HEAD;
699
764
  parser->index = 2;
700
- parser->state = s_req_method;
765
+ UPDATE_STATE(s_req_method);
701
766
  }
702
767
  break;
703
768
 
704
769
  case s_start_res:
705
770
  {
771
+ if (ch == CR || ch == LF)
772
+ break;
706
773
  parser->flags = 0;
774
+ parser->uses_transfer_encoding = 0;
707
775
  parser->content_length = ULLONG_MAX;
708
776
 
709
- switch (ch) {
710
- case 'H':
711
- parser->state = s_res_H;
712
- break;
713
-
714
- case CR:
715
- case LF:
716
- break;
717
-
718
- default:
719
- SET_ERRNO(HPE_INVALID_CONSTANT);
720
- goto error;
777
+ if (ch == 'H') {
778
+ UPDATE_STATE(s_res_H);
779
+ } else {
780
+ SET_ERRNO(HPE_INVALID_CONSTANT);
781
+ goto error;
721
782
  }
722
783
 
723
784
  CALLBACK_NOTIFY(message_begin);
@@ -726,90 +787,63 @@ size_t http_parser_execute (http_parser *parser,
726
787
 
727
788
  case s_res_H:
728
789
  STRICT_CHECK(ch != 'T');
729
- parser->state = s_res_HT;
790
+ UPDATE_STATE(s_res_HT);
730
791
  break;
731
792
 
732
793
  case s_res_HT:
733
794
  STRICT_CHECK(ch != 'T');
734
- parser->state = s_res_HTT;
795
+ UPDATE_STATE(s_res_HTT);
735
796
  break;
736
797
 
737
798
  case s_res_HTT:
738
799
  STRICT_CHECK(ch != 'P');
739
- parser->state = s_res_HTTP;
800
+ UPDATE_STATE(s_res_HTTP);
740
801
  break;
741
802
 
742
803
  case s_res_HTTP:
743
804
  STRICT_CHECK(ch != '/');
744
- parser->state = s_res_first_http_major;
805
+ UPDATE_STATE(s_res_http_major);
745
806
  break;
746
807
 
747
- case s_res_first_http_major:
748
- if (ch < '0' || ch > '9') {
808
+ case s_res_http_major:
809
+ if (UNLIKELY(!IS_NUM(ch))) {
749
810
  SET_ERRNO(HPE_INVALID_VERSION);
750
811
  goto error;
751
812
  }
752
813
 
753
814
  parser->http_major = ch - '0';
754
- parser->state = s_res_http_major;
815
+ UPDATE_STATE(s_res_http_dot);
755
816
  break;
756
817
 
757
- /* major HTTP version or dot */
758
- case s_res_http_major:
818
+ case s_res_http_dot:
759
819
  {
760
- if (ch == '.') {
761
- parser->state = s_res_first_http_minor;
762
- break;
763
- }
764
-
765
- if (!IS_NUM(ch)) {
766
- SET_ERRNO(HPE_INVALID_VERSION);
767
- goto error;
768
- }
769
-
770
- parser->http_major *= 10;
771
- parser->http_major += ch - '0';
772
-
773
- if (parser->http_major > 999) {
820
+ if (UNLIKELY(ch != '.')) {
774
821
  SET_ERRNO(HPE_INVALID_VERSION);
775
822
  goto error;
776
823
  }
777
824
 
825
+ UPDATE_STATE(s_res_http_minor);
778
826
  break;
779
827
  }
780
828
 
781
- /* first digit of minor HTTP version */
782
- case s_res_first_http_minor:
783
- if (!IS_NUM(ch)) {
829
+ case s_res_http_minor:
830
+ if (UNLIKELY(!IS_NUM(ch))) {
784
831
  SET_ERRNO(HPE_INVALID_VERSION);
785
832
  goto error;
786
833
  }
787
834
 
788
835
  parser->http_minor = ch - '0';
789
- parser->state = s_res_http_minor;
836
+ UPDATE_STATE(s_res_http_end);
790
837
  break;
791
838
 
792
- /* minor HTTP version or end of request line */
793
- case s_res_http_minor:
839
+ case s_res_http_end:
794
840
  {
795
- if (ch == ' ') {
796
- parser->state = s_res_first_status_code;
797
- break;
798
- }
799
-
800
- if (!IS_NUM(ch)) {
801
- SET_ERRNO(HPE_INVALID_VERSION);
802
- goto error;
803
- }
804
-
805
- parser->http_minor *= 10;
806
- parser->http_minor += ch - '0';
807
-
808
- if (parser->http_minor > 999) {
841
+ if (UNLIKELY(ch != ' ')) {
809
842
  SET_ERRNO(HPE_INVALID_VERSION);
810
843
  goto error;
811
844
  }
812
845
 
846
+ UPDATE_STATE(s_res_first_status_code);
813
847
  break;
814
848
  }
815
849
 
@@ -824,7 +858,7 @@ size_t http_parser_execute (http_parser *parser,
824
858
  goto error;
825
859
  }
826
860
  parser->status_code = ch - '0';
827
- parser->state = s_res_status_code;
861
+ UPDATE_STATE(s_res_status_code);
828
862
  break;
829
863
  }
830
864
 
@@ -833,13 +867,12 @@ size_t http_parser_execute (http_parser *parser,
833
867
  if (!IS_NUM(ch)) {
834
868
  switch (ch) {
835
869
  case ' ':
836
- parser->state = s_res_status;
870
+ UPDATE_STATE(s_res_status_start);
837
871
  break;
838
872
  case CR:
839
- parser->state = s_res_line_almost_done;
840
- break;
841
873
  case LF:
842
- parser->state = s_header_field_start;
874
+ UPDATE_STATE(s_res_status_start);
875
+ REEXECUTE();
843
876
  break;
844
877
  default:
845
878
  SET_ERRNO(HPE_INVALID_STATUS);
@@ -851,7 +884,7 @@ size_t http_parser_execute (http_parser *parser,
851
884
  parser->status_code *= 10;
852
885
  parser->status_code += ch - '0';
853
886
 
854
- if (parser->status_code > 999) {
887
+ if (UNLIKELY(parser->status_code > 999)) {
855
888
  SET_ERRNO(HPE_INVALID_STATUS);
856
889
  goto error;
857
890
  }
@@ -859,24 +892,36 @@ size_t http_parser_execute (http_parser *parser,
859
892
  break;
860
893
  }
861
894
 
895
+ case s_res_status_start:
896
+ {
897
+ MARK(status);
898
+ UPDATE_STATE(s_res_status);
899
+ parser->index = 0;
900
+
901
+ if (ch == CR || ch == LF)
902
+ REEXECUTE();
903
+
904
+ break;
905
+ }
906
+
862
907
  case s_res_status:
863
- /* the human readable status. e.g. "NOT FOUND"
864
- * we are not humans so just ignore this */
865
908
  if (ch == CR) {
866
- parser->state = s_res_line_almost_done;
909
+ UPDATE_STATE(s_res_line_almost_done);
910
+ CALLBACK_DATA(status);
867
911
  break;
868
912
  }
869
913
 
870
914
  if (ch == LF) {
871
- parser->state = s_header_field_start;
915
+ UPDATE_STATE(s_header_field_start);
916
+ CALLBACK_DATA(status);
872
917
  break;
873
918
  }
919
+
874
920
  break;
875
921
 
876
922
  case s_res_line_almost_done:
877
923
  STRICT_CHECK(ch != LF);
878
- parser->state = s_header_field_start;
879
- CALLBACK_NOTIFY(status_complete);
924
+ UPDATE_STATE(s_header_field_start);
880
925
  break;
881
926
 
882
927
  case s_start_req:
@@ -884,9 +929,10 @@ size_t http_parser_execute (http_parser *parser,
884
929
  if (ch == CR || ch == LF)
885
930
  break;
886
931
  parser->flags = 0;
932
+ parser->uses_transfer_encoding = 0;
887
933
  parser->content_length = ULLONG_MAX;
888
934
 
889
- if (!IS_ALPHA(ch)) {
935
+ if (UNLIKELY(!IS_ALPHA(ch))) {
890
936
  SET_ERRNO(HPE_INVALID_METHOD);
891
937
  goto error;
892
938
  }
@@ -894,26 +940,28 @@ size_t http_parser_execute (http_parser *parser,
894
940
  parser->method = (enum http_method) 0;
895
941
  parser->index = 1;
896
942
  switch (ch) {
943
+ case 'A': parser->method = HTTP_ACL; break;
944
+ case 'B': parser->method = HTTP_BIND; break;
897
945
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
898
946
  case 'D': parser->method = HTTP_DELETE; break;
899
947
  case 'G': parser->method = HTTP_GET; break;
900
948
  case 'H': parser->method = HTTP_HEAD; break;
901
- case 'L': parser->method = HTTP_LOCK; break;
902
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
949
+ case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
950
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
903
951
  case 'N': parser->method = HTTP_NOTIFY; break;
904
952
  case 'O': parser->method = HTTP_OPTIONS; break;
905
953
  case 'P': parser->method = HTTP_POST;
906
954
  /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
907
955
  break;
908
- case 'R': parser->method = HTTP_REPORT; break;
909
- case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
956
+ case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
957
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
910
958
  case 'T': parser->method = HTTP_TRACE; break;
911
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
959
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
912
960
  default:
913
961
  SET_ERRNO(HPE_INVALID_METHOD);
914
962
  goto error;
915
963
  }
916
- parser->state = s_req_method;
964
+ UPDATE_STATE(s_req_method);
917
965
 
918
966
  CALLBACK_NOTIFY(message_begin);
919
967
 
@@ -923,77 +971,47 @@ size_t http_parser_execute (http_parser *parser,
923
971
  case s_req_method:
924
972
  {
925
973
  const char *matcher;
926
- if (ch == '\0') {
974
+ if (UNLIKELY(ch == '\0')) {
927
975
  SET_ERRNO(HPE_INVALID_METHOD);
928
976
  goto error;
929
977
  }
930
978
 
931
979
  matcher = method_strings[parser->method];
932
980
  if (ch == ' ' && matcher[parser->index] == '\0') {
933
- parser->state = s_req_spaces_before_url;
981
+ UPDATE_STATE(s_req_spaces_before_url);
934
982
  } else if (ch == matcher[parser->index]) {
935
983
  ; /* nada */
936
- } else if (parser->method == HTTP_CONNECT) {
937
- if (parser->index == 1 && ch == 'H') {
938
- parser->method = HTTP_CHECKOUT;
939
- } else if (parser->index == 2 && ch == 'P') {
940
- parser->method = HTTP_COPY;
941
- } else {
942
- SET_ERRNO(HPE_INVALID_METHOD);
943
- goto error;
944
- }
945
- } else if (parser->method == HTTP_MKCOL) {
946
- if (parser->index == 1 && ch == 'O') {
947
- parser->method = HTTP_MOVE;
948
- } else if (parser->index == 1 && ch == 'E') {
949
- parser->method = HTTP_MERGE;
950
- } else if (parser->index == 1 && ch == '-') {
951
- parser->method = HTTP_MSEARCH;
952
- } else if (parser->index == 2 && ch == 'A') {
953
- parser->method = HTTP_MKACTIVITY;
954
- } else {
955
- SET_ERRNO(HPE_INVALID_METHOD);
956
- goto error;
957
- }
958
- } else if (parser->method == HTTP_SUBSCRIBE) {
959
- if (parser->index == 1 && ch == 'E') {
960
- parser->method = HTTP_SEARCH;
961
- } else {
962
- SET_ERRNO(HPE_INVALID_METHOD);
963
- goto error;
964
- }
965
- } else if (parser->index == 1 && parser->method == HTTP_POST) {
966
- if (ch == 'R') {
967
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
968
- } else if (ch == 'U') {
969
- parser->method = HTTP_PUT; /* or HTTP_PURGE */
970
- } else if (ch == 'A') {
971
- parser->method = HTTP_PATCH;
972
- } else {
973
- SET_ERRNO(HPE_INVALID_METHOD);
974
- goto error;
975
- }
976
- } else if (parser->index == 2) {
977
- if (parser->method == HTTP_PUT) {
978
- if (ch == 'R') {
979
- parser->method = HTTP_PURGE;
980
- } else {
981
- SET_ERRNO(HPE_INVALID_METHOD);
982
- goto error;
983
- }
984
- } else if (parser->method == HTTP_UNLOCK) {
985
- if (ch == 'S') {
986
- parser->method = HTTP_UNSUBSCRIBE;
987
- } else {
984
+ } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
985
+
986
+ switch (parser->method << 16 | parser->index << 8 | ch) {
987
+ #define XX(meth, pos, ch, new_meth) \
988
+ case (HTTP_##meth << 16 | pos << 8 | ch): \
989
+ parser->method = HTTP_##new_meth; break;
990
+
991
+ XX(POST, 1, 'U', PUT)
992
+ XX(POST, 1, 'A', PATCH)
993
+ XX(POST, 1, 'R', PROPFIND)
994
+ XX(PUT, 2, 'R', PURGE)
995
+ XX(CONNECT, 1, 'H', CHECKOUT)
996
+ XX(CONNECT, 2, 'P', COPY)
997
+ XX(MKCOL, 1, 'O', MOVE)
998
+ XX(MKCOL, 1, 'E', MERGE)
999
+ XX(MKCOL, 1, '-', MSEARCH)
1000
+ XX(MKCOL, 2, 'A', MKACTIVITY)
1001
+ XX(MKCOL, 3, 'A', MKCALENDAR)
1002
+ XX(SUBSCRIBE, 1, 'E', SEARCH)
1003
+ XX(SUBSCRIBE, 1, 'O', SOURCE)
1004
+ XX(REPORT, 2, 'B', REBIND)
1005
+ XX(PROPFIND, 4, 'P', PROPPATCH)
1006
+ XX(LOCK, 1, 'I', LINK)
1007
+ XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1008
+ XX(UNLOCK, 2, 'B', UNBIND)
1009
+ XX(UNLOCK, 3, 'I', UNLINK)
1010
+ #undef XX
1011
+ default:
988
1012
  SET_ERRNO(HPE_INVALID_METHOD);
989
1013
  goto error;
990
- }
991
- } else {
992
- SET_ERRNO(HPE_INVALID_METHOD);
993
- goto error;
994
1014
  }
995
- } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
996
- parser->method = HTTP_PROPPATCH;
997
1015
  } else {
998
1016
  SET_ERRNO(HPE_INVALID_METHOD);
999
1017
  goto error;
@@ -1009,11 +1027,11 @@ size_t http_parser_execute (http_parser *parser,
1009
1027
 
1010
1028
  MARK(url);
1011
1029
  if (parser->method == HTTP_CONNECT) {
1012
- parser->state = s_req_server_start;
1030
+ UPDATE_STATE(s_req_server_start);
1013
1031
  }
1014
1032
 
1015
- parser->state = parse_url_char((enum state)parser->state, ch);
1016
- if (parser->state == s_dead) {
1033
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1034
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1017
1035
  SET_ERRNO(HPE_INVALID_URL);
1018
1036
  goto error;
1019
1037
  }
@@ -1034,8 +1052,8 @@ size_t http_parser_execute (http_parser *parser,
1034
1052
  SET_ERRNO(HPE_INVALID_URL);
1035
1053
  goto error;
1036
1054
  default:
1037
- parser->state = parse_url_char((enum state)parser->state, ch);
1038
- if (parser->state == s_dead) {
1055
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1056
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1039
1057
  SET_ERRNO(HPE_INVALID_URL);
1040
1058
  goto error;
1041
1059
  }
@@ -1054,21 +1072,21 @@ size_t http_parser_execute (http_parser *parser,
1054
1072
  {
1055
1073
  switch (ch) {
1056
1074
  case ' ':
1057
- parser->state = s_req_http_start;
1075
+ UPDATE_STATE(s_req_http_start);
1058
1076
  CALLBACK_DATA(url);
1059
1077
  break;
1060
1078
  case CR:
1061
1079
  case LF:
1062
1080
  parser->http_major = 0;
1063
1081
  parser->http_minor = 9;
1064
- parser->state = (ch == CR) ?
1082
+ UPDATE_STATE((ch == CR) ?
1065
1083
  s_req_line_almost_done :
1066
- s_header_field_start;
1084
+ s_header_field_start);
1067
1085
  CALLBACK_DATA(url);
1068
1086
  break;
1069
1087
  default:
1070
- parser->state = parse_url_char((enum state)parser->state, ch);
1071
- if (parser->state == s_dead) {
1088
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1089
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1072
1090
  SET_ERRNO(HPE_INVALID_URL);
1073
1091
  goto error;
1074
1092
  }
@@ -1078,11 +1096,17 @@ size_t http_parser_execute (http_parser *parser,
1078
1096
 
1079
1097
  case s_req_http_start:
1080
1098
  switch (ch) {
1081
- case 'H':
1082
- parser->state = s_req_http_H;
1083
- break;
1084
1099
  case ' ':
1085
1100
  break;
1101
+ case 'H':
1102
+ UPDATE_STATE(s_req_http_H);
1103
+ break;
1104
+ case 'I':
1105
+ if (parser->method == HTTP_SOURCE) {
1106
+ UPDATE_STATE(s_req_http_I);
1107
+ break;
1108
+ }
1109
+ /* fall through */
1086
1110
  default:
1087
1111
  SET_ERRNO(HPE_INVALID_CONSTANT);
1088
1112
  goto error;
@@ -1091,130 +1115,111 @@ size_t http_parser_execute (http_parser *parser,
1091
1115
 
1092
1116
  case s_req_http_H:
1093
1117
  STRICT_CHECK(ch != 'T');
1094
- parser->state = s_req_http_HT;
1118
+ UPDATE_STATE(s_req_http_HT);
1095
1119
  break;
1096
1120
 
1097
1121
  case s_req_http_HT:
1098
1122
  STRICT_CHECK(ch != 'T');
1099
- parser->state = s_req_http_HTT;
1123
+ UPDATE_STATE(s_req_http_HTT);
1100
1124
  break;
1101
1125
 
1102
1126
  case s_req_http_HTT:
1103
1127
  STRICT_CHECK(ch != 'P');
1104
- parser->state = s_req_http_HTTP;
1128
+ UPDATE_STATE(s_req_http_HTTP);
1129
+ break;
1130
+
1131
+ case s_req_http_I:
1132
+ STRICT_CHECK(ch != 'C');
1133
+ UPDATE_STATE(s_req_http_IC);
1134
+ break;
1135
+
1136
+ case s_req_http_IC:
1137
+ STRICT_CHECK(ch != 'E');
1138
+ UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */
1105
1139
  break;
1106
1140
 
1107
1141
  case s_req_http_HTTP:
1108
1142
  STRICT_CHECK(ch != '/');
1109
- parser->state = s_req_first_http_major;
1143
+ UPDATE_STATE(s_req_http_major);
1110
1144
  break;
1111
1145
 
1112
- /* first digit of major HTTP version */
1113
- case s_req_first_http_major:
1114
- if (ch < '1' || ch > '9') {
1146
+ case s_req_http_major:
1147
+ if (UNLIKELY(!IS_NUM(ch))) {
1115
1148
  SET_ERRNO(HPE_INVALID_VERSION);
1116
1149
  goto error;
1117
1150
  }
1118
1151
 
1119
1152
  parser->http_major = ch - '0';
1120
- parser->state = s_req_http_major;
1153
+ UPDATE_STATE(s_req_http_dot);
1121
1154
  break;
1122
1155
 
1123
- /* major HTTP version or dot */
1124
- case s_req_http_major:
1156
+ case s_req_http_dot:
1125
1157
  {
1126
- if (ch == '.') {
1127
- parser->state = s_req_first_http_minor;
1128
- break;
1129
- }
1130
-
1131
- if (!IS_NUM(ch)) {
1132
- SET_ERRNO(HPE_INVALID_VERSION);
1133
- goto error;
1134
- }
1135
-
1136
- parser->http_major *= 10;
1137
- parser->http_major += ch - '0';
1138
-
1139
- if (parser->http_major > 999) {
1158
+ if (UNLIKELY(ch != '.')) {
1140
1159
  SET_ERRNO(HPE_INVALID_VERSION);
1141
1160
  goto error;
1142
1161
  }
1143
1162
 
1163
+ UPDATE_STATE(s_req_http_minor);
1144
1164
  break;
1145
1165
  }
1146
1166
 
1147
- /* first digit of minor HTTP version */
1148
- case s_req_first_http_minor:
1149
- if (!IS_NUM(ch)) {
1167
+ case s_req_http_minor:
1168
+ if (UNLIKELY(!IS_NUM(ch))) {
1150
1169
  SET_ERRNO(HPE_INVALID_VERSION);
1151
1170
  goto error;
1152
1171
  }
1153
1172
 
1154
1173
  parser->http_minor = ch - '0';
1155
- parser->state = s_req_http_minor;
1174
+ UPDATE_STATE(s_req_http_end);
1156
1175
  break;
1157
1176
 
1158
- /* minor HTTP version or end of request line */
1159
- case s_req_http_minor:
1177
+ case s_req_http_end:
1160
1178
  {
1161
1179
  if (ch == CR) {
1162
- parser->state = s_req_line_almost_done;
1180
+ UPDATE_STATE(s_req_line_almost_done);
1163
1181
  break;
1164
1182
  }
1165
1183
 
1166
1184
  if (ch == LF) {
1167
- parser->state = s_header_field_start;
1185
+ UPDATE_STATE(s_header_field_start);
1168
1186
  break;
1169
1187
  }
1170
1188
 
1171
- /* XXX allow spaces after digit? */
1172
-
1173
- if (!IS_NUM(ch)) {
1174
- SET_ERRNO(HPE_INVALID_VERSION);
1175
- goto error;
1176
- }
1177
-
1178
- parser->http_minor *= 10;
1179
- parser->http_minor += ch - '0';
1180
-
1181
- if (parser->http_minor > 999) {
1182
- SET_ERRNO(HPE_INVALID_VERSION);
1183
- goto error;
1184
- }
1185
-
1189
+ SET_ERRNO(HPE_INVALID_VERSION);
1190
+ goto error;
1186
1191
  break;
1187
1192
  }
1188
1193
 
1189
1194
  /* end of request line */
1190
1195
  case s_req_line_almost_done:
1191
1196
  {
1192
- if (ch != LF) {
1197
+ if (UNLIKELY(ch != LF)) {
1193
1198
  SET_ERRNO(HPE_LF_EXPECTED);
1194
1199
  goto error;
1195
1200
  }
1196
1201
 
1197
- parser->state = s_header_field_start;
1202
+ UPDATE_STATE(s_header_field_start);
1198
1203
  break;
1199
1204
  }
1200
1205
 
1201
1206
  case s_header_field_start:
1202
1207
  {
1203
1208
  if (ch == CR) {
1204
- parser->state = s_headers_almost_done;
1209
+ UPDATE_STATE(s_headers_almost_done);
1205
1210
  break;
1206
1211
  }
1207
1212
 
1208
1213
  if (ch == LF) {
1209
1214
  /* they might be just sending \n instead of \r\n so this would be
1210
1215
  * the second \n to denote the end of headers*/
1211
- parser->state = s_headers_almost_done;
1212
- goto reexecute_byte;
1216
+ UPDATE_STATE(s_headers_almost_done);
1217
+ REEXECUTE();
1213
1218
  }
1214
1219
 
1215
1220
  c = TOKEN(ch);
1216
1221
 
1217
- if (!c) {
1222
+ if (UNLIKELY(!c)) {
1218
1223
  SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1219
1224
  goto error;
1220
1225
  }
@@ -1222,7 +1227,7 @@ size_t http_parser_execute (http_parser *parser,
1222
1227
  MARK(header_field);
1223
1228
 
1224
1229
  parser->index = 0;
1225
- parser->state = s_header_field;
1230
+ UPDATE_STATE(s_header_field);
1226
1231
 
1227
1232
  switch (c) {
1228
1233
  case 'c':
@@ -1250,12 +1255,23 @@ size_t http_parser_execute (http_parser *parser,
1250
1255
 
1251
1256
  case s_header_field:
1252
1257
  {
1253
- c = TOKEN(ch);
1258
+ const char* start = p;
1259
+ for (; p != data + len; p++) {
1260
+ ch = *p;
1261
+ c = TOKEN(ch);
1262
+
1263
+ if (!c)
1264
+ break;
1254
1265
 
1255
- if (c) {
1256
1266
  switch (parser->header_state) {
1257
- case h_general:
1267
+ case h_general: {
1268
+ size_t left = data + len - p;
1269
+ const char* pe = p + MIN(left, max_header_size);
1270
+ while (p+1 < pe && TOKEN(p[1])) {
1271
+ p++;
1272
+ }
1258
1273
  break;
1274
+ }
1259
1275
 
1260
1276
  case h_C:
1261
1277
  parser->index++;
@@ -1327,6 +1343,7 @@ size_t http_parser_execute (http_parser *parser,
1327
1343
  parser->header_state = h_general;
1328
1344
  } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1329
1345
  parser->header_state = h_transfer_encoding;
1346
+ parser->uses_transfer_encoding = 1;
1330
1347
  }
1331
1348
  break;
1332
1349
 
@@ -1353,23 +1370,18 @@ size_t http_parser_execute (http_parser *parser,
1353
1370
  assert(0 && "Unknown header_state");
1354
1371
  break;
1355
1372
  }
1356
- break;
1357
1373
  }
1358
1374
 
1359
- if (ch == ':') {
1360
- parser->state = s_header_value_start;
1361
- CALLBACK_DATA(header_field);
1375
+ if (p == data + len) {
1376
+ --p;
1377
+ COUNT_HEADER_SIZE(p - start);
1362
1378
  break;
1363
1379
  }
1364
1380
 
1365
- if (ch == CR) {
1366
- parser->state = s_header_almost_done;
1367
- CALLBACK_DATA(header_field);
1368
- break;
1369
- }
1381
+ COUNT_HEADER_SIZE(p - start);
1370
1382
 
1371
- if (ch == LF) {
1372
- parser->state = s_header_field_start;
1383
+ if (ch == ':') {
1384
+ UPDATE_STATE(s_header_value_discard_ws);
1373
1385
  CALLBACK_DATA(header_field);
1374
1386
  break;
1375
1387
  }
@@ -1378,28 +1390,28 @@ size_t http_parser_execute (http_parser *parser,
1378
1390
  goto error;
1379
1391
  }
1380
1392
 
1381
- case s_header_value_start:
1382
- {
1393
+ case s_header_value_discard_ws:
1383
1394
  if (ch == ' ' || ch == '\t') break;
1384
1395
 
1385
- MARK(header_value);
1386
-
1387
- parser->state = s_header_value;
1388
- parser->index = 0;
1389
-
1390
1396
  if (ch == CR) {
1391
- parser->header_state = h_general;
1392
- parser->state = s_header_almost_done;
1393
- CALLBACK_DATA(header_value);
1397
+ UPDATE_STATE(s_header_value_discard_ws_almost_done);
1394
1398
  break;
1395
1399
  }
1396
1400
 
1397
1401
  if (ch == LF) {
1398
- parser->state = s_header_field_start;
1399
- CALLBACK_DATA(header_value);
1402
+ UPDATE_STATE(s_header_value_discard_lws);
1400
1403
  break;
1401
1404
  }
1402
1405
 
1406
+ /* fall through */
1407
+
1408
+ case s_header_value_start:
1409
+ {
1410
+ MARK(header_value);
1411
+
1412
+ UPDATE_STATE(s_header_value);
1413
+ parser->index = 0;
1414
+
1403
1415
  c = LOWER(ch);
1404
1416
 
1405
1417
  switch (parser->header_state) {
@@ -1413,17 +1425,33 @@ size_t http_parser_execute (http_parser *parser,
1413
1425
  if ('c' == c) {
1414
1426
  parser->header_state = h_matching_transfer_encoding_chunked;
1415
1427
  } else {
1416
- parser->header_state = h_general;
1428
+ parser->header_state = h_matching_transfer_encoding_token;
1417
1429
  }
1418
1430
  break;
1419
1431
 
1432
+ /* Multi-value `Transfer-Encoding` header */
1433
+ case h_matching_transfer_encoding_token_start:
1434
+ break;
1435
+
1420
1436
  case h_content_length:
1421
- if (!IS_NUM(ch)) {
1437
+ if (UNLIKELY(!IS_NUM(ch))) {
1422
1438
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1423
1439
  goto error;
1424
1440
  }
1425
1441
 
1442
+ if (parser->flags & F_CONTENTLENGTH) {
1443
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1444
+ goto error;
1445
+ }
1446
+
1447
+ parser->flags |= F_CONTENTLENGTH;
1426
1448
  parser->content_length = ch - '0';
1449
+ parser->header_state = h_content_length_num;
1450
+ break;
1451
+
1452
+ /* when obsolete line folding is encountered for content length
1453
+ * continue to the s_header_value state */
1454
+ case h_content_length_ws:
1427
1455
  break;
1428
1456
 
1429
1457
  case h_connection:
@@ -1433,11 +1461,17 @@ size_t http_parser_execute (http_parser *parser,
1433
1461
  /* looking for 'Connection: close' */
1434
1462
  } else if (c == 'c') {
1435
1463
  parser->header_state = h_matching_connection_close;
1464
+ } else if (c == 'u') {
1465
+ parser->header_state = h_matching_connection_upgrade;
1436
1466
  } else {
1437
- parser->header_state = h_general;
1467
+ parser->header_state = h_matching_connection_token;
1438
1468
  }
1439
1469
  break;
1440
1470
 
1471
+ /* Multi-value `Connection` header */
1472
+ case h_matching_connection_token_start:
1473
+ break;
1474
+
1441
1475
  default:
1442
1476
  parser->header_state = h_general;
1443
1477
  break;
@@ -1447,107 +1481,253 @@ size_t http_parser_execute (http_parser *parser,
1447
1481
 
1448
1482
  case s_header_value:
1449
1483
  {
1484
+ const char* start = p;
1485
+ enum header_states h_state = (enum header_states) parser->header_state;
1486
+ for (; p != data + len; p++) {
1487
+ ch = *p;
1488
+ if (ch == CR) {
1489
+ UPDATE_STATE(s_header_almost_done);
1490
+ parser->header_state = h_state;
1491
+ CALLBACK_DATA(header_value);
1492
+ break;
1493
+ }
1450
1494
 
1451
- if (ch == CR) {
1452
- parser->state = s_header_almost_done;
1453
- CALLBACK_DATA(header_value);
1454
- break;
1455
- }
1495
+ if (ch == LF) {
1496
+ UPDATE_STATE(s_header_almost_done);
1497
+ COUNT_HEADER_SIZE(p - start);
1498
+ parser->header_state = h_state;
1499
+ CALLBACK_DATA_NOADVANCE(header_value);
1500
+ REEXECUTE();
1501
+ }
1456
1502
 
1457
- if (ch == LF) {
1458
- parser->state = s_header_almost_done;
1459
- CALLBACK_DATA_NOADVANCE(header_value);
1460
- goto reexecute_byte;
1461
- }
1503
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1504
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1505
+ goto error;
1506
+ }
1462
1507
 
1463
- c = LOWER(ch);
1508
+ c = LOWER(ch);
1464
1509
 
1465
- switch (parser->header_state) {
1466
- case h_general:
1467
- break;
1510
+ switch (h_state) {
1511
+ case h_general:
1512
+ {
1513
+ size_t left = data + len - p;
1514
+ const char* pe = p + MIN(left, max_header_size);
1515
+
1516
+ for (; p != pe; p++) {
1517
+ ch = *p;
1518
+ if (ch == CR || ch == LF) {
1519
+ --p;
1520
+ break;
1521
+ }
1522
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1523
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1524
+ goto error;
1525
+ }
1526
+ }
1527
+ if (p == data + len)
1528
+ --p;
1529
+ break;
1530
+ }
1468
1531
 
1469
- case h_connection:
1470
- case h_transfer_encoding:
1471
- assert(0 && "Shouldn't get here.");
1472
- break;
1532
+ case h_connection:
1533
+ case h_transfer_encoding:
1534
+ assert(0 && "Shouldn't get here.");
1535
+ break;
1473
1536
 
1474
- case h_content_length:
1475
- {
1476
- uint64_t t;
1537
+ case h_content_length:
1538
+ if (ch == ' ') break;
1539
+ h_state = h_content_length_num;
1540
+ /* fall through */
1477
1541
 
1478
- if (ch == ' ') break;
1542
+ case h_content_length_num:
1543
+ {
1544
+ uint64_t t;
1479
1545
 
1480
- if (!IS_NUM(ch)) {
1481
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1482
- goto error;
1483
- }
1546
+ if (ch == ' ') {
1547
+ h_state = h_content_length_ws;
1548
+ break;
1549
+ }
1550
+
1551
+ if (UNLIKELY(!IS_NUM(ch))) {
1552
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1553
+ parser->header_state = h_state;
1554
+ goto error;
1555
+ }
1484
1556
 
1485
- t = parser->content_length;
1486
- t *= 10;
1487
- t += ch - '0';
1557
+ t = parser->content_length;
1558
+ t *= 10;
1559
+ t += ch - '0';
1488
1560
 
1489
- /* Overflow? */
1490
- if (t < parser->content_length || t == ULLONG_MAX) {
1561
+ /* Overflow? Test against a conservative limit for simplicity. */
1562
+ if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1563
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1564
+ parser->header_state = h_state;
1565
+ goto error;
1566
+ }
1567
+
1568
+ parser->content_length = t;
1569
+ break;
1570
+ }
1571
+
1572
+ case h_content_length_ws:
1573
+ if (ch == ' ') break;
1491
1574
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1575
+ parser->header_state = h_state;
1492
1576
  goto error;
1493
- }
1494
1577
 
1495
- parser->content_length = t;
1496
- break;
1497
- }
1578
+ /* Transfer-Encoding: chunked */
1579
+ case h_matching_transfer_encoding_token_start:
1580
+ /* looking for 'Transfer-Encoding: chunked' */
1581
+ if ('c' == c) {
1582
+ h_state = h_matching_transfer_encoding_chunked;
1583
+ } else if (STRICT_TOKEN(c)) {
1584
+ /* TODO(indutny): similar code below does this, but why?
1585
+ * At the very least it seems to be inconsistent given that
1586
+ * h_matching_transfer_encoding_token does not check for
1587
+ * `STRICT_TOKEN`
1588
+ */
1589
+ h_state = h_matching_transfer_encoding_token;
1590
+ } else if (c == ' ' || c == '\t') {
1591
+ /* Skip lws */
1592
+ } else {
1593
+ h_state = h_general;
1594
+ }
1595
+ break;
1498
1596
 
1499
- /* Transfer-Encoding: chunked */
1500
- case h_matching_transfer_encoding_chunked:
1501
- parser->index++;
1502
- if (parser->index > sizeof(CHUNKED)-1
1503
- || c != CHUNKED[parser->index]) {
1504
- parser->header_state = h_general;
1505
- } else if (parser->index == sizeof(CHUNKED)-2) {
1506
- parser->header_state = h_transfer_encoding_chunked;
1507
- }
1508
- break;
1597
+ case h_matching_transfer_encoding_chunked:
1598
+ parser->index++;
1599
+ if (parser->index > sizeof(CHUNKED)-1
1600
+ || c != CHUNKED[parser->index]) {
1601
+ h_state = h_matching_transfer_encoding_token;
1602
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1603
+ h_state = h_transfer_encoding_chunked;
1604
+ }
1605
+ break;
1509
1606
 
1510
- /* looking for 'Connection: keep-alive' */
1511
- case h_matching_connection_keep_alive:
1512
- parser->index++;
1513
- if (parser->index > sizeof(KEEP_ALIVE)-1
1514
- || c != KEEP_ALIVE[parser->index]) {
1515
- parser->header_state = h_general;
1516
- } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1517
- parser->header_state = h_connection_keep_alive;
1518
- }
1519
- break;
1607
+ case h_matching_transfer_encoding_token:
1608
+ if (ch == ',') {
1609
+ h_state = h_matching_transfer_encoding_token_start;
1610
+ parser->index = 0;
1611
+ }
1612
+ break;
1520
1613
 
1521
- /* looking for 'Connection: close' */
1522
- case h_matching_connection_close:
1523
- parser->index++;
1524
- if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1525
- parser->header_state = h_general;
1526
- } else if (parser->index == sizeof(CLOSE)-2) {
1527
- parser->header_state = h_connection_close;
1528
- }
1529
- break;
1614
+ case h_matching_connection_token_start:
1615
+ /* looking for 'Connection: keep-alive' */
1616
+ if (c == 'k') {
1617
+ h_state = h_matching_connection_keep_alive;
1618
+ /* looking for 'Connection: close' */
1619
+ } else if (c == 'c') {
1620
+ h_state = h_matching_connection_close;
1621
+ } else if (c == 'u') {
1622
+ h_state = h_matching_connection_upgrade;
1623
+ } else if (STRICT_TOKEN(c)) {
1624
+ h_state = h_matching_connection_token;
1625
+ } else if (c == ' ' || c == '\t') {
1626
+ /* Skip lws */
1627
+ } else {
1628
+ h_state = h_general;
1629
+ }
1630
+ break;
1530
1631
 
1531
- case h_transfer_encoding_chunked:
1532
- case h_connection_keep_alive:
1533
- case h_connection_close:
1534
- if (ch != ' ') parser->header_state = h_general;
1535
- break;
1632
+ /* looking for 'Connection: keep-alive' */
1633
+ case h_matching_connection_keep_alive:
1634
+ parser->index++;
1635
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1636
+ || c != KEEP_ALIVE[parser->index]) {
1637
+ h_state = h_matching_connection_token;
1638
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1639
+ h_state = h_connection_keep_alive;
1640
+ }
1641
+ break;
1536
1642
 
1537
- default:
1538
- parser->state = s_header_value;
1539
- parser->header_state = h_general;
1540
- break;
1643
+ /* looking for 'Connection: close' */
1644
+ case h_matching_connection_close:
1645
+ parser->index++;
1646
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1647
+ h_state = h_matching_connection_token;
1648
+ } else if (parser->index == sizeof(CLOSE)-2) {
1649
+ h_state = h_connection_close;
1650
+ }
1651
+ break;
1652
+
1653
+ /* looking for 'Connection: upgrade' */
1654
+ case h_matching_connection_upgrade:
1655
+ parser->index++;
1656
+ if (parser->index > sizeof(UPGRADE) - 1 ||
1657
+ c != UPGRADE[parser->index]) {
1658
+ h_state = h_matching_connection_token;
1659
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1660
+ h_state = h_connection_upgrade;
1661
+ }
1662
+ break;
1663
+
1664
+ case h_matching_connection_token:
1665
+ if (ch == ',') {
1666
+ h_state = h_matching_connection_token_start;
1667
+ parser->index = 0;
1668
+ }
1669
+ break;
1670
+
1671
+ case h_transfer_encoding_chunked:
1672
+ if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1673
+ break;
1674
+
1675
+ case h_connection_keep_alive:
1676
+ case h_connection_close:
1677
+ case h_connection_upgrade:
1678
+ if (ch == ',') {
1679
+ if (h_state == h_connection_keep_alive) {
1680
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1681
+ } else if (h_state == h_connection_close) {
1682
+ parser->flags |= F_CONNECTION_CLOSE;
1683
+ } else if (h_state == h_connection_upgrade) {
1684
+ parser->flags |= F_CONNECTION_UPGRADE;
1685
+ }
1686
+ h_state = h_matching_connection_token_start;
1687
+ parser->index = 0;
1688
+ } else if (ch != ' ') {
1689
+ h_state = h_matching_connection_token;
1690
+ }
1691
+ break;
1692
+
1693
+ default:
1694
+ UPDATE_STATE(s_header_value);
1695
+ h_state = h_general;
1696
+ break;
1697
+ }
1541
1698
  }
1699
+ parser->header_state = h_state;
1700
+
1701
+ if (p == data + len)
1702
+ --p;
1703
+
1704
+ COUNT_HEADER_SIZE(p - start);
1542
1705
  break;
1543
1706
  }
1544
1707
 
1545
1708
  case s_header_almost_done:
1546
1709
  {
1547
- STRICT_CHECK(ch != LF);
1710
+ if (UNLIKELY(ch != LF)) {
1711
+ SET_ERRNO(HPE_LF_EXPECTED);
1712
+ goto error;
1713
+ }
1714
+
1715
+ UPDATE_STATE(s_header_value_lws);
1716
+ break;
1717
+ }
1548
1718
 
1549
- parser->state = s_header_value_lws;
1719
+ case s_header_value_lws:
1720
+ {
1721
+ if (ch == ' ' || ch == '\t') {
1722
+ if (parser->header_state == h_content_length_num) {
1723
+ /* treat obsolete line folding as space */
1724
+ parser->header_state = h_content_length_ws;
1725
+ }
1726
+ UPDATE_STATE(s_header_value_start);
1727
+ REEXECUTE();
1728
+ }
1550
1729
 
1730
+ /* finished the header */
1551
1731
  switch (parser->header_state) {
1552
1732
  case h_connection_keep_alive:
1553
1733
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
@@ -1558,23 +1738,58 @@ size_t http_parser_execute (http_parser *parser,
1558
1738
  case h_transfer_encoding_chunked:
1559
1739
  parser->flags |= F_CHUNKED;
1560
1740
  break;
1741
+ case h_connection_upgrade:
1742
+ parser->flags |= F_CONNECTION_UPGRADE;
1743
+ break;
1561
1744
  default:
1562
1745
  break;
1563
1746
  }
1564
1747
 
1748
+ UPDATE_STATE(s_header_field_start);
1749
+ REEXECUTE();
1750
+ }
1751
+
1752
+ case s_header_value_discard_ws_almost_done:
1753
+ {
1754
+ STRICT_CHECK(ch != LF);
1755
+ UPDATE_STATE(s_header_value_discard_lws);
1565
1756
  break;
1566
1757
  }
1567
1758
 
1568
- case s_header_value_lws:
1759
+ case s_header_value_discard_lws:
1569
1760
  {
1570
- if (ch == ' ' || ch == '\t')
1571
- parser->state = s_header_value_start;
1572
- else
1573
- {
1574
- parser->state = s_header_field_start;
1575
- goto reexecute_byte;
1761
+ if (ch == ' ' || ch == '\t') {
1762
+ UPDATE_STATE(s_header_value_discard_ws);
1763
+ break;
1764
+ } else {
1765
+ switch (parser->header_state) {
1766
+ case h_connection_keep_alive:
1767
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1768
+ break;
1769
+ case h_connection_close:
1770
+ parser->flags |= F_CONNECTION_CLOSE;
1771
+ break;
1772
+ case h_connection_upgrade:
1773
+ parser->flags |= F_CONNECTION_UPGRADE;
1774
+ break;
1775
+ case h_transfer_encoding_chunked:
1776
+ parser->flags |= F_CHUNKED;
1777
+ break;
1778
+ case h_content_length:
1779
+ /* do not allow empty content length */
1780
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1781
+ goto error;
1782
+ break;
1783
+ default:
1784
+ break;
1785
+ }
1786
+
1787
+ /* header value was empty */
1788
+ MARK(header_value);
1789
+ UPDATE_STATE(s_header_field_start);
1790
+ CALLBACK_DATA_NOADVANCE(header_value);
1791
+ REEXECUTE();
1576
1792
  }
1577
- break;
1578
1793
  }
1579
1794
 
1580
1795
  case s_headers_almost_done:
@@ -1583,16 +1798,43 @@ size_t http_parser_execute (http_parser *parser,
1583
1798
 
1584
1799
  if (parser->flags & F_TRAILING) {
1585
1800
  /* End of a chunked request */
1586
- parser->state = NEW_MESSAGE();
1587
- CALLBACK_NOTIFY(message_complete);
1588
- break;
1801
+ UPDATE_STATE(s_message_done);
1802
+ CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1803
+ REEXECUTE();
1804
+ }
1805
+
1806
+ /* Cannot use transfer-encoding and a content-length header together
1807
+ per the HTTP specification. (RFC 7230 Section 3.3.3) */
1808
+ if ((parser->uses_transfer_encoding == 1) &&
1809
+ (parser->flags & F_CONTENTLENGTH)) {
1810
+ /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1811
+ * not `chunked` or allow_length_with_encoding is set
1812
+ */
1813
+ if (parser->flags & F_CHUNKED) {
1814
+ if (!allow_chunked_length) {
1815
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1816
+ goto error;
1817
+ }
1818
+ } else if (!lenient) {
1819
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1820
+ goto error;
1821
+ }
1589
1822
  }
1590
1823
 
1591
- parser->state = s_headers_done;
1824
+ UPDATE_STATE(s_headers_done);
1592
1825
 
1593
1826
  /* Set this here so that on_headers_complete() callbacks can see it */
1594
- parser->upgrade =
1595
- (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1827
+ if ((parser->flags & F_UPGRADE) &&
1828
+ (parser->flags & F_CONNECTION_UPGRADE)) {
1829
+ /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1830
+ * mandatory only when it is a 101 Switching Protocols response,
1831
+ * otherwise it is purely informational, to announce support.
1832
+ */
1833
+ parser->upgrade =
1834
+ (parser->type == HTTP_REQUEST || parser->status_code == 101);
1835
+ } else {
1836
+ parser->upgrade = (parser->method == HTTP_CONNECT);
1837
+ }
1596
1838
 
1597
1839
  /* Here we call the headers_complete callback. This is somewhat
1598
1840
  * different than other callbacks because if the user returns 1, we
@@ -1608,59 +1850,90 @@ size_t http_parser_execute (http_parser *parser,
1608
1850
  case 0:
1609
1851
  break;
1610
1852
 
1853
+ case 2:
1854
+ parser->upgrade = 1;
1855
+
1856
+ /* fall through */
1611
1857
  case 1:
1612
1858
  parser->flags |= F_SKIPBODY;
1613
1859
  break;
1614
1860
 
1615
1861
  default:
1616
1862
  SET_ERRNO(HPE_CB_headers_complete);
1617
- return p - data; /* Error */
1863
+ RETURN(p - data); /* Error */
1618
1864
  }
1619
1865
  }
1620
1866
 
1621
1867
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1622
- return p - data;
1868
+ RETURN(p - data);
1623
1869
  }
1624
1870
 
1625
- goto reexecute_byte;
1871
+ REEXECUTE();
1626
1872
  }
1627
1873
 
1628
1874
  case s_headers_done:
1629
1875
  {
1876
+ int hasBody;
1630
1877
  STRICT_CHECK(ch != LF);
1631
1878
 
1632
1879
  parser->nread = 0;
1633
-
1634
- /* Exit, the rest of the connect is in a different protocol. */
1635
- if (parser->upgrade) {
1636
- parser->state = NEW_MESSAGE();
1880
+ nread = 0;
1881
+
1882
+ hasBody = parser->flags & F_CHUNKED ||
1883
+ (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1884
+ if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1885
+ (parser->flags & F_SKIPBODY) || !hasBody)) {
1886
+ /* Exit, the rest of the message is in a different protocol. */
1887
+ UPDATE_STATE(NEW_MESSAGE());
1637
1888
  CALLBACK_NOTIFY(message_complete);
1638
- return (p - data) + 1;
1889
+ RETURN((p - data) + 1);
1639
1890
  }
1640
1891
 
1641
1892
  if (parser->flags & F_SKIPBODY) {
1642
- parser->state = NEW_MESSAGE();
1893
+ UPDATE_STATE(NEW_MESSAGE());
1643
1894
  CALLBACK_NOTIFY(message_complete);
1644
1895
  } else if (parser->flags & F_CHUNKED) {
1645
- /* chunked encoding - ignore Content-Length header */
1646
- parser->state = s_chunk_size_start;
1896
+ /* chunked encoding - ignore Content-Length header,
1897
+ * prepare for a chunk */
1898
+ UPDATE_STATE(s_chunk_size_start);
1899
+ } else if (parser->uses_transfer_encoding == 1) {
1900
+ if (parser->type == HTTP_REQUEST && !lenient) {
1901
+ /* RFC 7230 3.3.3 */
1902
+
1903
+ /* If a Transfer-Encoding header field
1904
+ * is present in a request and the chunked transfer coding is not
1905
+ * the final encoding, the message body length cannot be determined
1906
+ * reliably; the server MUST respond with the 400 (Bad Request)
1907
+ * status code and then close the connection.
1908
+ */
1909
+ SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1910
+ RETURN(p - data); /* Error */
1911
+ } else {
1912
+ /* RFC 7230 3.3.3 */
1913
+
1914
+ /* If a Transfer-Encoding header field is present in a response and
1915
+ * the chunked transfer coding is not the final encoding, the
1916
+ * message body length is determined by reading the connection until
1917
+ * it is closed by the server.
1918
+ */
1919
+ UPDATE_STATE(s_body_identity_eof);
1920
+ }
1647
1921
  } else {
1648
1922
  if (parser->content_length == 0) {
1649
1923
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1650
- parser->state = NEW_MESSAGE();
1924
+ UPDATE_STATE(NEW_MESSAGE());
1651
1925
  CALLBACK_NOTIFY(message_complete);
1652
1926
  } else if (parser->content_length != ULLONG_MAX) {
1653
1927
  /* Content-Length header given and non-zero */
1654
- parser->state = s_body_identity;
1928
+ UPDATE_STATE(s_body_identity);
1655
1929
  } else {
1656
- if (parser->type == HTTP_REQUEST ||
1657
- !http_message_needs_eof(parser)) {
1930
+ if (!http_message_needs_eof(parser)) {
1658
1931
  /* Assume content-length 0 - read the next */
1659
- parser->state = NEW_MESSAGE();
1932
+ UPDATE_STATE(NEW_MESSAGE());
1660
1933
  CALLBACK_NOTIFY(message_complete);
1661
1934
  } else {
1662
1935
  /* Read body until EOF */
1663
- parser->state = s_body_identity_eof;
1936
+ UPDATE_STATE(s_body_identity_eof);
1664
1937
  }
1665
1938
  }
1666
1939
  }
@@ -1686,7 +1959,7 @@ size_t http_parser_execute (http_parser *parser,
1686
1959
  p += to_read - 1;
1687
1960
 
1688
1961
  if (parser->content_length == 0) {
1689
- parser->state = s_message_done;
1962
+ UPDATE_STATE(s_message_done);
1690
1963
 
1691
1964
  /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1692
1965
  *
@@ -1698,7 +1971,7 @@ size_t http_parser_execute (http_parser *parser,
1698
1971
  * important for applications, but let's keep it for now.
1699
1972
  */
1700
1973
  CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1701
- goto reexecute_byte;
1974
+ REEXECUTE();
1702
1975
  }
1703
1976
 
1704
1977
  break;
@@ -1712,23 +1985,27 @@ size_t http_parser_execute (http_parser *parser,
1712
1985
  break;
1713
1986
 
1714
1987
  case s_message_done:
1715
- parser->state = NEW_MESSAGE();
1988
+ UPDATE_STATE(NEW_MESSAGE());
1716
1989
  CALLBACK_NOTIFY(message_complete);
1990
+ if (parser->upgrade) {
1991
+ /* Exit, the rest of the message is in a different protocol. */
1992
+ RETURN((p - data) + 1);
1993
+ }
1717
1994
  break;
1718
1995
 
1719
1996
  case s_chunk_size_start:
1720
1997
  {
1721
- assert(parser->nread == 1);
1998
+ assert(nread == 1);
1722
1999
  assert(parser->flags & F_CHUNKED);
1723
2000
 
1724
2001
  unhex_val = unhex[(unsigned char)ch];
1725
- if (unhex_val == -1) {
2002
+ if (UNLIKELY(unhex_val == -1)) {
1726
2003
  SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1727
2004
  goto error;
1728
2005
  }
1729
2006
 
1730
2007
  parser->content_length = unhex_val;
1731
- parser->state = s_chunk_size;
2008
+ UPDATE_STATE(s_chunk_size);
1732
2009
  break;
1733
2010
  }
1734
2011
 
@@ -1739,7 +2016,7 @@ size_t http_parser_execute (http_parser *parser,
1739
2016
  assert(parser->flags & F_CHUNKED);
1740
2017
 
1741
2018
  if (ch == CR) {
1742
- parser->state = s_chunk_size_almost_done;
2019
+ UPDATE_STATE(s_chunk_size_almost_done);
1743
2020
  break;
1744
2021
  }
1745
2022
 
@@ -1747,7 +2024,7 @@ size_t http_parser_execute (http_parser *parser,
1747
2024
 
1748
2025
  if (unhex_val == -1) {
1749
2026
  if (ch == ';' || ch == ' ') {
1750
- parser->state = s_chunk_parameters;
2027
+ UPDATE_STATE(s_chunk_parameters);
1751
2028
  break;
1752
2029
  }
1753
2030
 
@@ -1759,8 +2036,8 @@ size_t http_parser_execute (http_parser *parser,
1759
2036
  t *= 16;
1760
2037
  t += unhex_val;
1761
2038
 
1762
- /* Overflow? */
1763
- if (t < parser->content_length || t == ULLONG_MAX) {
2039
+ /* Overflow? Test against a conservative limit for simplicity. */
2040
+ if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1764
2041
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1765
2042
  goto error;
1766
2043
  }
@@ -1774,7 +2051,7 @@ size_t http_parser_execute (http_parser *parser,
1774
2051
  assert(parser->flags & F_CHUNKED);
1775
2052
  /* just ignore this shit. TODO check for overflow */
1776
2053
  if (ch == CR) {
1777
- parser->state = s_chunk_size_almost_done;
2054
+ UPDATE_STATE(s_chunk_size_almost_done);
1778
2055
  break;
1779
2056
  }
1780
2057
  break;
@@ -1786,13 +2063,15 @@ size_t http_parser_execute (http_parser *parser,
1786
2063
  STRICT_CHECK(ch != LF);
1787
2064
 
1788
2065
  parser->nread = 0;
2066
+ nread = 0;
1789
2067
 
1790
2068
  if (parser->content_length == 0) {
1791
2069
  parser->flags |= F_TRAILING;
1792
- parser->state = s_header_field_start;
2070
+ UPDATE_STATE(s_header_field_start);
1793
2071
  } else {
1794
- parser->state = s_chunk_data;
2072
+ UPDATE_STATE(s_chunk_data);
1795
2073
  }
2074
+ CALLBACK_NOTIFY(chunk_header);
1796
2075
  break;
1797
2076
  }
1798
2077
 
@@ -1813,7 +2092,7 @@ size_t http_parser_execute (http_parser *parser,
1813
2092
  p += to_read - 1;
1814
2093
 
1815
2094
  if (parser->content_length == 0) {
1816
- parser->state = s_chunk_data_almost_done;
2095
+ UPDATE_STATE(s_chunk_data_almost_done);
1817
2096
  }
1818
2097
 
1819
2098
  break;
@@ -1823,7 +2102,7 @@ size_t http_parser_execute (http_parser *parser,
1823
2102
  assert(parser->flags & F_CHUNKED);
1824
2103
  assert(parser->content_length == 0);
1825
2104
  STRICT_CHECK(ch != CR);
1826
- parser->state = s_chunk_data_done;
2105
+ UPDATE_STATE(s_chunk_data_done);
1827
2106
  CALLBACK_DATA(body);
1828
2107
  break;
1829
2108
 
@@ -1831,7 +2110,9 @@ size_t http_parser_execute (http_parser *parser,
1831
2110
  assert(parser->flags & F_CHUNKED);
1832
2111
  STRICT_CHECK(ch != LF);
1833
2112
  parser->nread = 0;
1834
- parser->state = s_chunk_size_start;
2113
+ nread = 0;
2114
+ UPDATE_STATE(s_chunk_size_start);
2115
+ CALLBACK_NOTIFY(chunk_complete);
1835
2116
  break;
1836
2117
 
1837
2118
  default:
@@ -1841,7 +2122,7 @@ size_t http_parser_execute (http_parser *parser,
1841
2122
  }
1842
2123
  }
1843
2124
 
1844
- /* Run callbacks for any marks that we have leftover after we ran our of
2125
+ /* Run callbacks for any marks that we have leftover after we ran out of
1845
2126
  * bytes. There should be at most one of these set, so it's OK to invoke
1846
2127
  * them in series (unset marks will not result in callbacks).
1847
2128
  *
@@ -1854,21 +2135,23 @@ size_t http_parser_execute (http_parser *parser,
1854
2135
  assert(((header_field_mark ? 1 : 0) +
1855
2136
  (header_value_mark ? 1 : 0) +
1856
2137
  (url_mark ? 1 : 0) +
1857
- (body_mark ? 1 : 0)) <= 1);
2138
+ (body_mark ? 1 : 0) +
2139
+ (status_mark ? 1 : 0)) <= 1);
1858
2140
 
1859
2141
  CALLBACK_DATA_NOADVANCE(header_field);
1860
2142
  CALLBACK_DATA_NOADVANCE(header_value);
1861
2143
  CALLBACK_DATA_NOADVANCE(url);
1862
2144
  CALLBACK_DATA_NOADVANCE(body);
2145
+ CALLBACK_DATA_NOADVANCE(status);
1863
2146
 
1864
- return len;
2147
+ RETURN(len);
1865
2148
 
1866
2149
  error:
1867
2150
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1868
2151
  SET_ERRNO(HPE_UNKNOWN);
1869
2152
  }
1870
2153
 
1871
- return (p - data);
2154
+ RETURN(p - data);
1872
2155
  }
1873
2156
 
1874
2157
 
@@ -1888,6 +2171,12 @@ http_message_needs_eof (const http_parser *parser)
1888
2171
  return 0;
1889
2172
  }
1890
2173
 
2174
+ /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2175
+ if ((parser->uses_transfer_encoding == 1) &&
2176
+ (parser->flags & F_CHUNKED) == 0) {
2177
+ return 1;
2178
+ }
2179
+
1891
2180
  if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1892
2181
  return 0;
1893
2182
  }
@@ -1921,6 +2210,16 @@ http_method_str (enum http_method m)
1921
2210
  return ELEM_AT(method_strings, m, "<unknown>");
1922
2211
  }
1923
2212
 
2213
+ const char *
2214
+ http_status_str (enum http_status s)
2215
+ {
2216
+ switch (s) {
2217
+ #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2218
+ HTTP_STATUS_MAP(XX)
2219
+ #undef XX
2220
+ default: return "<unknown>";
2221
+ }
2222
+ }
1924
2223
 
1925
2224
  void
1926
2225
  http_parser_init (http_parser *parser, enum http_parser_type t)
@@ -1933,15 +2232,21 @@ http_parser_init (http_parser *parser, enum http_parser_type t)
1933
2232
  parser->http_errno = HPE_OK;
1934
2233
  }
1935
2234
 
2235
+ void
2236
+ http_parser_settings_init(http_parser_settings *settings)
2237
+ {
2238
+ memset(settings, 0, sizeof(*settings));
2239
+ }
2240
+
1936
2241
  const char *
1937
2242
  http_errno_name(enum http_errno err) {
1938
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2243
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1939
2244
  return http_strerror_tab[err].name;
1940
2245
  }
1941
2246
 
1942
2247
  const char *
1943
2248
  http_errno_description(enum http_errno err) {
1944
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2249
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1945
2250
  return http_strerror_tab[err].description;
1946
2251
  }
1947
2252
 
@@ -1975,7 +2280,7 @@ http_parse_host_char(enum http_host_state s, const char ch) {
1975
2280
  return s_http_host;
1976
2281
  }
1977
2282
 
1978
- /* FALLTHROUGH */
2283
+ /* fall through */
1979
2284
  case s_http_host_v6_end:
1980
2285
  if (ch == ':') {
1981
2286
  return s_http_host_port_start;
@@ -1988,12 +2293,29 @@ http_parse_host_char(enum http_host_state s, const char ch) {
1988
2293
  return s_http_host_v6_end;
1989
2294
  }
1990
2295
 
1991
- /* FALLTHROUGH */
2296
+ /* fall through */
1992
2297
  case s_http_host_v6_start:
1993
2298
  if (IS_HEX(ch) || ch == ':' || ch == '.') {
1994
2299
  return s_http_host_v6;
1995
2300
  }
1996
2301
 
2302
+ if (s == s_http_host_v6 && ch == '%') {
2303
+ return s_http_host_v6_zone_start;
2304
+ }
2305
+ break;
2306
+
2307
+ case s_http_host_v6_zone:
2308
+ if (ch == ']') {
2309
+ return s_http_host_v6_end;
2310
+ }
2311
+
2312
+ /* fall through */
2313
+ case s_http_host_v6_zone_start:
2314
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2315
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2316
+ ch == '~') {
2317
+ return s_http_host_v6_zone;
2318
+ }
1997
2319
  break;
1998
2320
 
1999
2321
  case s_http_host_port:
@@ -2017,6 +2339,8 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2017
2339
  const char *p;
2018
2340
  size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2019
2341
 
2342
+ assert(u->field_set & (1 << UF_HOST));
2343
+
2020
2344
  u->field_data[UF_HOST].len = 0;
2021
2345
 
2022
2346
  s = found_at ? s_http_userinfo_start : s_http_host_start;
@@ -2031,21 +2355,26 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2031
2355
  switch(new_s) {
2032
2356
  case s_http_host:
2033
2357
  if (s != s_http_host) {
2034
- u->field_data[UF_HOST].off = p - buf;
2358
+ u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2035
2359
  }
2036
2360
  u->field_data[UF_HOST].len++;
2037
2361
  break;
2038
2362
 
2039
2363
  case s_http_host_v6:
2040
2364
  if (s != s_http_host_v6) {
2041
- u->field_data[UF_HOST].off = p - buf;
2365
+ u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2042
2366
  }
2043
2367
  u->field_data[UF_HOST].len++;
2044
2368
  break;
2045
2369
 
2370
+ case s_http_host_v6_zone_start:
2371
+ case s_http_host_v6_zone:
2372
+ u->field_data[UF_HOST].len++;
2373
+ break;
2374
+
2046
2375
  case s_http_host_port:
2047
2376
  if (s != s_http_host_port) {
2048
- u->field_data[UF_PORT].off = p - buf;
2377
+ u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2049
2378
  u->field_data[UF_PORT].len = 0;
2050
2379
  u->field_set |= (1 << UF_PORT);
2051
2380
  }
@@ -2054,7 +2383,7 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2054
2383
 
2055
2384
  case s_http_userinfo:
2056
2385
  if (s != s_http_userinfo) {
2057
- u->field_data[UF_USERINFO].off = p - buf ;
2386
+ u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2058
2387
  u->field_data[UF_USERINFO].len = 0;
2059
2388
  u->field_set |= (1 << UF_USERINFO);
2060
2389
  }
@@ -2072,6 +2401,8 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2072
2401
  case s_http_host_start:
2073
2402
  case s_http_host_v6_start:
2074
2403
  case s_http_host_v6:
2404
+ case s_http_host_v6_zone_start:
2405
+ case s_http_host_v6_zone:
2075
2406
  case s_http_host_port_start:
2076
2407
  case s_http_userinfo:
2077
2408
  case s_http_userinfo_start:
@@ -2083,6 +2414,11 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2083
2414
  return 0;
2084
2415
  }
2085
2416
 
2417
+ void
2418
+ http_parser_url_init(struct http_parser_url *u) {
2419
+ memset(u, 0, sizeof(*u));
2420
+ }
2421
+
2086
2422
  int
2087
2423
  http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2088
2424
  struct http_parser_url *u)
@@ -2092,9 +2428,13 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2092
2428
  enum http_parser_url_fields uf, old_uf;
2093
2429
  int found_at = 0;
2094
2430
 
2431
+ if (buflen == 0) {
2432
+ return 1;
2433
+ }
2434
+
2095
2435
  u->port = u->field_set = 0;
2096
2436
  s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2097
- uf = old_uf = UF_MAX;
2437
+ old_uf = UF_MAX;
2098
2438
 
2099
2439
  for (p = buf; p < buf + buflen; p++) {
2100
2440
  s = parse_url_char(s, *p);
@@ -2119,7 +2459,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2119
2459
  case s_req_server_with_at:
2120
2460
  found_at = 1;
2121
2461
 
2122
- /* FALLTROUGH */
2462
+ /* fall through */
2123
2463
  case s_req_server:
2124
2464
  uf = UF_HOST;
2125
2465
  break;
@@ -2147,7 +2487,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2147
2487
  continue;
2148
2488
  }
2149
2489
 
2150
- u->field_data[uf].off = p - buf;
2490
+ u->field_data[uf].off = (uint16_t)(p - buf);
2151
2491
  u->field_data[uf].len = 1;
2152
2492
 
2153
2493
  u->field_set |= (1 << uf);
@@ -2156,7 +2496,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2156
2496
 
2157
2497
  /* host must be present if there is a schema */
2158
2498
  /* parsing http:///toto will fail */
2159
- if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2499
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
2500
+ (u->field_set & (1 << UF_HOST)) == 0) {
2501
+ return 1;
2502
+ }
2503
+
2504
+ if (u->field_set & (1 << UF_HOST)) {
2160
2505
  if (http_parse_host(buf, u, found_at) != 0) {
2161
2506
  return 1;
2162
2507
  }
@@ -2168,12 +2513,27 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2168
2513
  }
2169
2514
 
2170
2515
  if (u->field_set & (1 << UF_PORT)) {
2171
- /* Don't bother with endp; we've already validated the string */
2172
- unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2173
-
2174
- /* Ports have a max value of 2^16 */
2175
- if (v > 0xffff) {
2176
- return 1;
2516
+ uint16_t off;
2517
+ uint16_t len;
2518
+ const char* p;
2519
+ const char* end;
2520
+ unsigned long v;
2521
+
2522
+ off = u->field_data[UF_PORT].off;
2523
+ len = u->field_data[UF_PORT].len;
2524
+ end = buf + off + len;
2525
+
2526
+ /* NOTE: The characters are already validated and are in the [0-9] range */
2527
+ assert((size_t) (off + len) <= buflen && "Port number overflow");
2528
+ v = 0;
2529
+ for (p = buf + off; p < end; p++) {
2530
+ v *= 10;
2531
+ v += *p - '0';
2532
+
2533
+ /* Ports have a max value of 2^16 */
2534
+ if (v > 0xffff) {
2535
+ return 1;
2536
+ }
2177
2537
  }
2178
2538
 
2179
2539
  u->port = (uint16_t) v;
@@ -2190,6 +2550,7 @@ http_parser_pause(http_parser *parser, int paused) {
2190
2550
  */
2191
2551
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2192
2552
  HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2553
+ uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2193
2554
  SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2194
2555
  } else {
2195
2556
  assert(0 && "Attempting to pause parser in error state");
@@ -2207,3 +2568,8 @@ http_parser_version(void) {
2207
2568
  HTTP_PARSER_VERSION_MINOR * 0x00100 |
2208
2569
  HTTP_PARSER_VERSION_PATCH * 0x00001;
2209
2570
  }
2571
+
2572
+ void
2573
+ http_parser_set_max_header_size(uint32_t size) {
2574
+ max_header_size = size;
2575
+ }