http-parser 1.0.3 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4ee24befc2a0a23054f11edb4379b9b6b1f8d21a
4
- data.tar.gz: 816280693464c8c5daa1c7fb20fe4954c3dd1a26
2
+ SHA256:
3
+ metadata.gz: e23f8fd15e7969bbac10a4e34f3f0fcab492ba503f2f8bdf14a4134fd3c3dd0c
4
+ data.tar.gz: d20b92acb6465ebaaf9bc8a8843241f6730e4a4ecfde5f198d6c5b389bd5ce9b
5
5
  SHA512:
6
- metadata.gz: 1846519857ad32e3f688cf18f28fb3dd2cd33fa4c0a0909d6a2c934218b1ae5df30101b54a8b100cb8caaf28482a964e8e17ef5594abc91fddcb575d67ac5091
7
- data.tar.gz: 82c5dba81e1673ab22497e5e1adda9d6f89ab115795747bc64a0043ced1511880c65d003b379a4f35e89976d92bdf793933bf196b2002876edb431f2c2dcc4c6
6
+ metadata.gz: ff9a8fb170d22563613b4743079d6104fd88bbf7d37b94ba71148567291c086a435026c360e59b53c1513d85a4a8c0942e938c5ca9e68728820bea551e18f20c
7
+ data.tar.gz: 1bb252fcae0be2ab448812f5070b29b67b07839ae19fe091febcd31078fa08712efbce6cfbb76103edd854a8b588960b6d0403c4a591e734c765733284435878
@@ -3,6 +3,6 @@ require 'ffi-compiler/compile_task'
3
3
  FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
4
  t.cflags << "-Wall -Wextra -O3"
5
5
  t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
- t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
- t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
6
+ t.cflags << "-arch x86_64" if t.platform.mac?
7
+ t.ldflags << "-arch x86_64" if t.platform.mac?
8
8
  end
@@ -1,7 +1,4 @@
1
- /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
- *
3
- * Additional changes are licensed under the same terms as NGINX and
4
- * copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Copyright Joyent, Inc. and other Node contributors.
5
2
  *
6
3
  * Permission is hereby granted, free of charge, to any person obtaining a copy
7
4
  * of this software and associated documentation files (the "Software"), to
@@ -25,10 +22,11 @@
25
22
  #include <assert.h>
26
23
  #include <stddef.h>
27
24
  #include <ctype.h>
28
- #include <stdlib.h>
29
25
  #include <string.h>
30
26
  #include <limits.h>
31
27
 
28
+ static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
29
+
32
30
  #ifndef ULLONG_MAX
33
31
  # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
32
  #endif
@@ -53,22 +51,45 @@
53
51
 
54
52
  #define SET_ERRNO(e) \
55
53
  do { \
54
+ parser->nread = nread; \
56
55
  parser->http_errno = (e); \
57
56
  } while(0)
58
57
 
58
+ #define CURRENT_STATE() p_state
59
+ #define UPDATE_STATE(V) p_state = (enum state) (V);
60
+ #define RETURN(V) \
61
+ do { \
62
+ parser->nread = nread; \
63
+ parser->state = CURRENT_STATE(); \
64
+ return (V); \
65
+ } while (0);
66
+ #define REEXECUTE() \
67
+ goto reexecute; \
68
+
69
+
70
+ #ifdef __GNUC__
71
+ # define LIKELY(X) __builtin_expect(!!(X), 1)
72
+ # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73
+ #else
74
+ # define LIKELY(X) (X)
75
+ # define UNLIKELY(X) (X)
76
+ #endif
77
+
59
78
 
60
79
  /* Run the notify callback FOR, returning ER if it fails */
61
80
  #define CALLBACK_NOTIFY_(FOR, ER) \
62
81
  do { \
63
82
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
83
  \
65
- if (settings->on_##FOR) { \
66
- if (0 != settings->on_##FOR(parser)) { \
84
+ if (LIKELY(settings->on_##FOR)) { \
85
+ parser->state = CURRENT_STATE(); \
86
+ if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
67
87
  SET_ERRNO(HPE_CB_##FOR); \
68
88
  } \
89
+ UPDATE_STATE(parser->state); \
69
90
  \
70
91
  /* We either errored above or got paused; get out */ \
71
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
92
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
72
93
  return (ER); \
73
94
  } \
74
95
  } \
@@ -86,20 +107,23 @@ do { \
86
107
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
108
  \
88
109
  if (FOR##_mark) { \
89
- if (settings->on_##FOR) { \
90
- if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
110
+ if (LIKELY(settings->on_##FOR)) { \
111
+ parser->state = CURRENT_STATE(); \
112
+ if (UNLIKELY(0 != \
113
+ settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
91
114
  SET_ERRNO(HPE_CB_##FOR); \
92
115
  } \
116
+ UPDATE_STATE(parser->state); \
93
117
  \
94
118
  /* We either errored above or got paused; get out */ \
95
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
119
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
96
120
  return (ER); \
97
121
  } \
98
122
  } \
99
123
  FOR##_mark = NULL; \
100
124
  } \
101
125
  } while (0)
102
-
126
+
103
127
  /* Run the data callback FOR and consume the current byte */
104
128
  #define CALLBACK_DATA(FOR) \
105
129
  CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
@@ -116,6 +140,26 @@ do { \
116
140
  } \
117
141
  } while (0)
118
142
 
143
+ /* Don't allow the total size of the HTTP headers (including the status
144
+ * line) to exceed max_header_size. This check is here to protect
145
+ * embedders against denial-of-service attacks where the attacker feeds
146
+ * us a never-ending header that the embedder keeps buffering.
147
+ *
148
+ * This check is arguably the responsibility of embedders but we're doing
149
+ * it on the embedder's behalf because most won't bother and this way we
150
+ * make the web a little safer. max_header_size is still far bigger
151
+ * than any reasonable request or response so this should never affect
152
+ * day-to-day operation.
153
+ */
154
+ #define COUNT_HEADER_SIZE(V) \
155
+ do { \
156
+ nread += (uint32_t)(V); \
157
+ if (UNLIKELY(nread > max_header_size)) { \
158
+ SET_ERRNO(HPE_HEADER_OVERFLOW); \
159
+ goto error; \
160
+ } \
161
+ } while (0)
162
+
119
163
 
120
164
  #define PROXY_CONNECTION "proxy-connection"
121
165
  #define CONNECTION "connection"
@@ -152,7 +196,7 @@ static const char tokens[256] = {
152
196
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153
197
  0, 0, 0, 0, 0, 0, 0, 0,
154
198
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155
- 0, '!', 0, '#', '$', '%', '&', '\'',
199
+ ' ', '!', 0, '#', '$', '%', '&', '\'',
156
200
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157
201
  0, 0, '*', '+', 0, '-', '.', 0,
158
202
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
@@ -242,12 +286,13 @@ enum state
242
286
  , s_res_HT
243
287
  , s_res_HTT
244
288
  , s_res_HTTP
245
- , s_res_first_http_major
246
289
  , s_res_http_major
247
- , s_res_first_http_minor
290
+ , s_res_http_dot
248
291
  , s_res_http_minor
292
+ , s_res_http_end
249
293
  , s_res_first_status_code
250
294
  , s_res_status_code
295
+ , s_res_status_start
251
296
  , s_res_status
252
297
  , s_res_line_almost_done
253
298
 
@@ -271,14 +316,19 @@ enum state
271
316
  , s_req_http_HT
272
317
  , s_req_http_HTT
273
318
  , s_req_http_HTTP
274
- , s_req_first_http_major
319
+ , s_req_http_I
320
+ , s_req_http_IC
275
321
  , s_req_http_major
276
- , s_req_first_http_minor
322
+ , s_req_http_dot
277
323
  , s_req_http_minor
324
+ , s_req_http_end
278
325
  , s_req_line_almost_done
279
326
 
280
327
  , s_header_field_start
281
328
  , s_header_field
329
+ , s_header_value_discard_ws
330
+ , s_header_value_discard_ws_almost_done
331
+ , s_header_value_discard_lws
282
332
  , s_header_value_start
283
333
  , s_header_value
284
334
  , s_header_value_lws
@@ -326,16 +376,25 @@ enum header_states
326
376
 
327
377
  , h_connection
328
378
  , h_content_length
379
+ , h_content_length_num
380
+ , h_content_length_ws
329
381
  , h_transfer_encoding
330
382
  , h_upgrade
331
383
 
384
+ , h_matching_transfer_encoding_token_start
332
385
  , h_matching_transfer_encoding_chunked
386
+ , h_matching_transfer_encoding_token
387
+
388
+ , h_matching_connection_token_start
333
389
  , h_matching_connection_keep_alive
334
390
  , h_matching_connection_close
391
+ , h_matching_connection_upgrade
392
+ , h_matching_connection_token
335
393
 
336
394
  , h_transfer_encoding_chunked
337
395
  , h_connection_keep_alive
338
396
  , h_connection_close
397
+ , h_connection_upgrade
339
398
  };
340
399
 
341
400
  enum http_host_state
@@ -348,6 +407,8 @@ enum http_host_state
348
407
  , s_http_host
349
408
  , s_http_host_v6
350
409
  , s_http_host_v6_end
410
+ , s_http_host_v6_zone_start
411
+ , s_http_host_v6_zone
351
412
  , s_http_host_port_start
352
413
  , s_http_host_port
353
414
  };
@@ -367,18 +428,26 @@ enum http_host_state
367
428
  (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
368
429
  (c) == '$' || (c) == ',')
369
430
 
431
+ #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
432
+
370
433
  #if HTTP_PARSER_STRICT
371
- #define TOKEN(c) (tokens[(unsigned char)c])
434
+ #define TOKEN(c) STRICT_TOKEN(c)
372
435
  #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
373
436
  #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
374
437
  #else
375
- #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
438
+ #define TOKEN(c) tokens[(unsigned char)c]
376
439
  #define IS_URL_CHAR(c) \
377
440
  (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
378
441
  #define IS_HOST_CHAR(c) \
379
442
  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
380
443
  #endif
381
444
 
445
+ /**
446
+ * Verify that a char is a valid visible (printable) US-ASCII
447
+ * character or %x80-FF
448
+ **/
449
+ #define IS_HEADER_CHAR(ch) \
450
+ (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
382
451
 
383
452
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
384
453
 
@@ -480,7 +549,7 @@ parse_url_char(enum state s, const char ch)
480
549
  return s_dead;
481
550
  }
482
551
 
483
- /* FALLTHROUGH */
552
+ /* fall through */
484
553
  case s_req_server_start:
485
554
  case s_req_server:
486
555
  if (ch == '/') {
@@ -581,6 +650,12 @@ size_t http_parser_execute (http_parser *parser,
581
650
  const char *header_value_mark = 0;
582
651
  const char *url_mark = 0;
583
652
  const char *body_mark = 0;
653
+ const char *status_mark = 0;
654
+ enum state p_state = (enum state) parser->state;
655
+ const unsigned int lenient = parser->lenient_http_headers;
656
+ const unsigned int allow_chunked_length = parser->allow_chunked_length;
657
+
658
+ uint32_t nread = parser->nread;
584
659
 
585
660
  /* We're in an error state. Don't bother doing anything. */
586
661
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
@@ -588,7 +663,7 @@ size_t http_parser_execute (http_parser *parser,
588
663
  }
589
664
 
590
665
  if (len == 0) {
591
- switch (parser->state) {
666
+ switch (CURRENT_STATE()) {
592
667
  case s_body_identity_eof:
593
668
  /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
594
669
  * we got paused.
@@ -609,11 +684,11 @@ size_t http_parser_execute (http_parser *parser,
609
684
  }
610
685
 
611
686
 
612
- if (parser->state == s_header_field)
687
+ if (CURRENT_STATE() == s_header_field)
613
688
  header_field_mark = data;
614
- if (parser->state == s_header_value)
689
+ if (CURRENT_STATE() == s_header_value)
615
690
  header_value_mark = data;
616
- switch (parser->state) {
691
+ switch (CURRENT_STATE()) {
617
692
  case s_req_path:
618
693
  case s_req_schema:
619
694
  case s_req_schema_slash:
@@ -627,38 +702,27 @@ size_t http_parser_execute (http_parser *parser,
627
702
  case s_req_fragment:
628
703
  url_mark = data;
629
704
  break;
705
+ case s_res_status:
706
+ status_mark = data;
707
+ break;
708
+ default:
709
+ break;
630
710
  }
631
711
 
632
712
  for (p=data; p != data + len; p++) {
633
713
  ch = *p;
634
714
 
635
- if (PARSING_HEADER(parser->state)) {
636
- ++parser->nread;
637
- /* Don't allow the total size of the HTTP headers (including the status
638
- * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
639
- * embedders against denial-of-service attacks where the attacker feeds
640
- * us a never-ending header that the embedder keeps buffering.
641
- *
642
- * This check is arguably the responsibility of embedders but we're doing
643
- * it on the embedder's behalf because most won't bother and this way we
644
- * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
645
- * than any reasonable request or response so this should never affect
646
- * day-to-day operation.
647
- */
648
- if (parser->nread > HTTP_MAX_HEADER_SIZE) {
649
- SET_ERRNO(HPE_HEADER_OVERFLOW);
650
- goto error;
651
- }
652
- }
715
+ if (PARSING_HEADER(CURRENT_STATE()))
716
+ COUNT_HEADER_SIZE(1);
653
717
 
654
- reexecute_byte:
655
- switch (parser->state) {
718
+ reexecute:
719
+ switch (CURRENT_STATE()) {
656
720
 
657
721
  case s_dead:
658
722
  /* this state is used after a 'Connection: close' message
659
723
  * the parser will error out if it reads another message
660
724
  */
661
- if (ch == CR || ch == LF)
725
+ if (LIKELY(ch == CR || ch == LF))
662
726
  break;
663
727
 
664
728
  SET_ERRNO(HPE_CLOSED_CONNECTION);
@@ -669,16 +733,17 @@ size_t http_parser_execute (http_parser *parser,
669
733
  if (ch == CR || ch == LF)
670
734
  break;
671
735
  parser->flags = 0;
736
+ parser->uses_transfer_encoding = 0;
672
737
  parser->content_length = ULLONG_MAX;
673
738
 
674
739
  if (ch == 'H') {
675
- parser->state = s_res_or_resp_H;
740
+ UPDATE_STATE(s_res_or_resp_H);
676
741
 
677
742
  CALLBACK_NOTIFY(message_begin);
678
743
  } else {
679
744
  parser->type = HTTP_REQUEST;
680
- parser->state = s_start_req;
681
- goto reexecute_byte;
745
+ UPDATE_STATE(s_start_req);
746
+ REEXECUTE();
682
747
  }
683
748
 
684
749
  break;
@@ -687,9 +752,9 @@ size_t http_parser_execute (http_parser *parser,
687
752
  case s_res_or_resp_H:
688
753
  if (ch == 'T') {
689
754
  parser->type = HTTP_RESPONSE;
690
- parser->state = s_res_HT;
755
+ UPDATE_STATE(s_res_HT);
691
756
  } else {
692
- if (ch != 'E') {
757
+ if (UNLIKELY(ch != 'E')) {
693
758
  SET_ERRNO(HPE_INVALID_CONSTANT);
694
759
  goto error;
695
760
  }
@@ -697,27 +762,23 @@ size_t http_parser_execute (http_parser *parser,
697
762
  parser->type = HTTP_REQUEST;
698
763
  parser->method = HTTP_HEAD;
699
764
  parser->index = 2;
700
- parser->state = s_req_method;
765
+ UPDATE_STATE(s_req_method);
701
766
  }
702
767
  break;
703
768
 
704
769
  case s_start_res:
705
770
  {
771
+ if (ch == CR || ch == LF)
772
+ break;
706
773
  parser->flags = 0;
774
+ parser->uses_transfer_encoding = 0;
707
775
  parser->content_length = ULLONG_MAX;
708
776
 
709
- switch (ch) {
710
- case 'H':
711
- parser->state = s_res_H;
712
- break;
713
-
714
- case CR:
715
- case LF:
716
- break;
717
-
718
- default:
719
- SET_ERRNO(HPE_INVALID_CONSTANT);
720
- goto error;
777
+ if (ch == 'H') {
778
+ UPDATE_STATE(s_res_H);
779
+ } else {
780
+ SET_ERRNO(HPE_INVALID_CONSTANT);
781
+ goto error;
721
782
  }
722
783
 
723
784
  CALLBACK_NOTIFY(message_begin);
@@ -726,90 +787,63 @@ size_t http_parser_execute (http_parser *parser,
726
787
 
727
788
  case s_res_H:
728
789
  STRICT_CHECK(ch != 'T');
729
- parser->state = s_res_HT;
790
+ UPDATE_STATE(s_res_HT);
730
791
  break;
731
792
 
732
793
  case s_res_HT:
733
794
  STRICT_CHECK(ch != 'T');
734
- parser->state = s_res_HTT;
795
+ UPDATE_STATE(s_res_HTT);
735
796
  break;
736
797
 
737
798
  case s_res_HTT:
738
799
  STRICT_CHECK(ch != 'P');
739
- parser->state = s_res_HTTP;
800
+ UPDATE_STATE(s_res_HTTP);
740
801
  break;
741
802
 
742
803
  case s_res_HTTP:
743
804
  STRICT_CHECK(ch != '/');
744
- parser->state = s_res_first_http_major;
805
+ UPDATE_STATE(s_res_http_major);
745
806
  break;
746
807
 
747
- case s_res_first_http_major:
748
- if (ch < '0' || ch > '9') {
808
+ case s_res_http_major:
809
+ if (UNLIKELY(!IS_NUM(ch))) {
749
810
  SET_ERRNO(HPE_INVALID_VERSION);
750
811
  goto error;
751
812
  }
752
813
 
753
814
  parser->http_major = ch - '0';
754
- parser->state = s_res_http_major;
815
+ UPDATE_STATE(s_res_http_dot);
755
816
  break;
756
817
 
757
- /* major HTTP version or dot */
758
- case s_res_http_major:
818
+ case s_res_http_dot:
759
819
  {
760
- if (ch == '.') {
761
- parser->state = s_res_first_http_minor;
762
- break;
763
- }
764
-
765
- if (!IS_NUM(ch)) {
766
- SET_ERRNO(HPE_INVALID_VERSION);
767
- goto error;
768
- }
769
-
770
- parser->http_major *= 10;
771
- parser->http_major += ch - '0';
772
-
773
- if (parser->http_major > 999) {
820
+ if (UNLIKELY(ch != '.')) {
774
821
  SET_ERRNO(HPE_INVALID_VERSION);
775
822
  goto error;
776
823
  }
777
824
 
825
+ UPDATE_STATE(s_res_http_minor);
778
826
  break;
779
827
  }
780
828
 
781
- /* first digit of minor HTTP version */
782
- case s_res_first_http_minor:
783
- if (!IS_NUM(ch)) {
829
+ case s_res_http_minor:
830
+ if (UNLIKELY(!IS_NUM(ch))) {
784
831
  SET_ERRNO(HPE_INVALID_VERSION);
785
832
  goto error;
786
833
  }
787
834
 
788
835
  parser->http_minor = ch - '0';
789
- parser->state = s_res_http_minor;
836
+ UPDATE_STATE(s_res_http_end);
790
837
  break;
791
838
 
792
- /* minor HTTP version or end of request line */
793
- case s_res_http_minor:
839
+ case s_res_http_end:
794
840
  {
795
- if (ch == ' ') {
796
- parser->state = s_res_first_status_code;
797
- break;
798
- }
799
-
800
- if (!IS_NUM(ch)) {
801
- SET_ERRNO(HPE_INVALID_VERSION);
802
- goto error;
803
- }
804
-
805
- parser->http_minor *= 10;
806
- parser->http_minor += ch - '0';
807
-
808
- if (parser->http_minor > 999) {
841
+ if (UNLIKELY(ch != ' ')) {
809
842
  SET_ERRNO(HPE_INVALID_VERSION);
810
843
  goto error;
811
844
  }
812
845
 
846
+ UPDATE_STATE(s_res_first_status_code);
813
847
  break;
814
848
  }
815
849
 
@@ -824,7 +858,7 @@ size_t http_parser_execute (http_parser *parser,
824
858
  goto error;
825
859
  }
826
860
  parser->status_code = ch - '0';
827
- parser->state = s_res_status_code;
861
+ UPDATE_STATE(s_res_status_code);
828
862
  break;
829
863
  }
830
864
 
@@ -833,13 +867,12 @@ size_t http_parser_execute (http_parser *parser,
833
867
  if (!IS_NUM(ch)) {
834
868
  switch (ch) {
835
869
  case ' ':
836
- parser->state = s_res_status;
870
+ UPDATE_STATE(s_res_status_start);
837
871
  break;
838
872
  case CR:
839
- parser->state = s_res_line_almost_done;
840
- break;
841
873
  case LF:
842
- parser->state = s_header_field_start;
874
+ UPDATE_STATE(s_res_status_start);
875
+ REEXECUTE();
843
876
  break;
844
877
  default:
845
878
  SET_ERRNO(HPE_INVALID_STATUS);
@@ -851,7 +884,7 @@ size_t http_parser_execute (http_parser *parser,
851
884
  parser->status_code *= 10;
852
885
  parser->status_code += ch - '0';
853
886
 
854
- if (parser->status_code > 999) {
887
+ if (UNLIKELY(parser->status_code > 999)) {
855
888
  SET_ERRNO(HPE_INVALID_STATUS);
856
889
  goto error;
857
890
  }
@@ -859,24 +892,36 @@ size_t http_parser_execute (http_parser *parser,
859
892
  break;
860
893
  }
861
894
 
895
+ case s_res_status_start:
896
+ {
897
+ MARK(status);
898
+ UPDATE_STATE(s_res_status);
899
+ parser->index = 0;
900
+
901
+ if (ch == CR || ch == LF)
902
+ REEXECUTE();
903
+
904
+ break;
905
+ }
906
+
862
907
  case s_res_status:
863
- /* the human readable status. e.g. "NOT FOUND"
864
- * we are not humans so just ignore this */
865
908
  if (ch == CR) {
866
- parser->state = s_res_line_almost_done;
909
+ UPDATE_STATE(s_res_line_almost_done);
910
+ CALLBACK_DATA(status);
867
911
  break;
868
912
  }
869
913
 
870
914
  if (ch == LF) {
871
- parser->state = s_header_field_start;
915
+ UPDATE_STATE(s_header_field_start);
916
+ CALLBACK_DATA(status);
872
917
  break;
873
918
  }
919
+
874
920
  break;
875
921
 
876
922
  case s_res_line_almost_done:
877
923
  STRICT_CHECK(ch != LF);
878
- parser->state = s_header_field_start;
879
- CALLBACK_NOTIFY(status_complete);
924
+ UPDATE_STATE(s_header_field_start);
880
925
  break;
881
926
 
882
927
  case s_start_req:
@@ -884,9 +929,10 @@ size_t http_parser_execute (http_parser *parser,
884
929
  if (ch == CR || ch == LF)
885
930
  break;
886
931
  parser->flags = 0;
932
+ parser->uses_transfer_encoding = 0;
887
933
  parser->content_length = ULLONG_MAX;
888
934
 
889
- if (!IS_ALPHA(ch)) {
935
+ if (UNLIKELY(!IS_ALPHA(ch))) {
890
936
  SET_ERRNO(HPE_INVALID_METHOD);
891
937
  goto error;
892
938
  }
@@ -894,26 +940,28 @@ size_t http_parser_execute (http_parser *parser,
894
940
  parser->method = (enum http_method) 0;
895
941
  parser->index = 1;
896
942
  switch (ch) {
943
+ case 'A': parser->method = HTTP_ACL; break;
944
+ case 'B': parser->method = HTTP_BIND; break;
897
945
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
898
946
  case 'D': parser->method = HTTP_DELETE; break;
899
947
  case 'G': parser->method = HTTP_GET; break;
900
948
  case 'H': parser->method = HTTP_HEAD; break;
901
- case 'L': parser->method = HTTP_LOCK; break;
902
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
949
+ case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
950
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
903
951
  case 'N': parser->method = HTTP_NOTIFY; break;
904
952
  case 'O': parser->method = HTTP_OPTIONS; break;
905
953
  case 'P': parser->method = HTTP_POST;
906
954
  /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
907
955
  break;
908
- case 'R': parser->method = HTTP_REPORT; break;
909
- case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
956
+ case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
957
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
910
958
  case 'T': parser->method = HTTP_TRACE; break;
911
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
959
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
912
960
  default:
913
961
  SET_ERRNO(HPE_INVALID_METHOD);
914
962
  goto error;
915
963
  }
916
- parser->state = s_req_method;
964
+ UPDATE_STATE(s_req_method);
917
965
 
918
966
  CALLBACK_NOTIFY(message_begin);
919
967
 
@@ -923,77 +971,47 @@ size_t http_parser_execute (http_parser *parser,
923
971
  case s_req_method:
924
972
  {
925
973
  const char *matcher;
926
- if (ch == '\0') {
974
+ if (UNLIKELY(ch == '\0')) {
927
975
  SET_ERRNO(HPE_INVALID_METHOD);
928
976
  goto error;
929
977
  }
930
978
 
931
979
  matcher = method_strings[parser->method];
932
980
  if (ch == ' ' && matcher[parser->index] == '\0') {
933
- parser->state = s_req_spaces_before_url;
981
+ UPDATE_STATE(s_req_spaces_before_url);
934
982
  } else if (ch == matcher[parser->index]) {
935
983
  ; /* nada */
936
- } else if (parser->method == HTTP_CONNECT) {
937
- if (parser->index == 1 && ch == 'H') {
938
- parser->method = HTTP_CHECKOUT;
939
- } else if (parser->index == 2 && ch == 'P') {
940
- parser->method = HTTP_COPY;
941
- } else {
942
- SET_ERRNO(HPE_INVALID_METHOD);
943
- goto error;
944
- }
945
- } else if (parser->method == HTTP_MKCOL) {
946
- if (parser->index == 1 && ch == 'O') {
947
- parser->method = HTTP_MOVE;
948
- } else if (parser->index == 1 && ch == 'E') {
949
- parser->method = HTTP_MERGE;
950
- } else if (parser->index == 1 && ch == '-') {
951
- parser->method = HTTP_MSEARCH;
952
- } else if (parser->index == 2 && ch == 'A') {
953
- parser->method = HTTP_MKACTIVITY;
954
- } else {
955
- SET_ERRNO(HPE_INVALID_METHOD);
956
- goto error;
957
- }
958
- } else if (parser->method == HTTP_SUBSCRIBE) {
959
- if (parser->index == 1 && ch == 'E') {
960
- parser->method = HTTP_SEARCH;
961
- } else {
962
- SET_ERRNO(HPE_INVALID_METHOD);
963
- goto error;
964
- }
965
- } else if (parser->index == 1 && parser->method == HTTP_POST) {
966
- if (ch == 'R') {
967
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
968
- } else if (ch == 'U') {
969
- parser->method = HTTP_PUT; /* or HTTP_PURGE */
970
- } else if (ch == 'A') {
971
- parser->method = HTTP_PATCH;
972
- } else {
973
- SET_ERRNO(HPE_INVALID_METHOD);
974
- goto error;
975
- }
976
- } else if (parser->index == 2) {
977
- if (parser->method == HTTP_PUT) {
978
- if (ch == 'R') {
979
- parser->method = HTTP_PURGE;
980
- } else {
981
- SET_ERRNO(HPE_INVALID_METHOD);
982
- goto error;
983
- }
984
- } else if (parser->method == HTTP_UNLOCK) {
985
- if (ch == 'S') {
986
- parser->method = HTTP_UNSUBSCRIBE;
987
- } else {
984
+ } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
985
+
986
+ switch (parser->method << 16 | parser->index << 8 | ch) {
987
+ #define XX(meth, pos, ch, new_meth) \
988
+ case (HTTP_##meth << 16 | pos << 8 | ch): \
989
+ parser->method = HTTP_##new_meth; break;
990
+
991
+ XX(POST, 1, 'U', PUT)
992
+ XX(POST, 1, 'A', PATCH)
993
+ XX(POST, 1, 'R', PROPFIND)
994
+ XX(PUT, 2, 'R', PURGE)
995
+ XX(CONNECT, 1, 'H', CHECKOUT)
996
+ XX(CONNECT, 2, 'P', COPY)
997
+ XX(MKCOL, 1, 'O', MOVE)
998
+ XX(MKCOL, 1, 'E', MERGE)
999
+ XX(MKCOL, 1, '-', MSEARCH)
1000
+ XX(MKCOL, 2, 'A', MKACTIVITY)
1001
+ XX(MKCOL, 3, 'A', MKCALENDAR)
1002
+ XX(SUBSCRIBE, 1, 'E', SEARCH)
1003
+ XX(SUBSCRIBE, 1, 'O', SOURCE)
1004
+ XX(REPORT, 2, 'B', REBIND)
1005
+ XX(PROPFIND, 4, 'P', PROPPATCH)
1006
+ XX(LOCK, 1, 'I', LINK)
1007
+ XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1008
+ XX(UNLOCK, 2, 'B', UNBIND)
1009
+ XX(UNLOCK, 3, 'I', UNLINK)
1010
+ #undef XX
1011
+ default:
988
1012
  SET_ERRNO(HPE_INVALID_METHOD);
989
1013
  goto error;
990
- }
991
- } else {
992
- SET_ERRNO(HPE_INVALID_METHOD);
993
- goto error;
994
1014
  }
995
- } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
996
- parser->method = HTTP_PROPPATCH;
997
1015
  } else {
998
1016
  SET_ERRNO(HPE_INVALID_METHOD);
999
1017
  goto error;
@@ -1009,11 +1027,11 @@ size_t http_parser_execute (http_parser *parser,
1009
1027
 
1010
1028
  MARK(url);
1011
1029
  if (parser->method == HTTP_CONNECT) {
1012
- parser->state = s_req_server_start;
1030
+ UPDATE_STATE(s_req_server_start);
1013
1031
  }
1014
1032
 
1015
- parser->state = parse_url_char((enum state)parser->state, ch);
1016
- if (parser->state == s_dead) {
1033
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1034
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1017
1035
  SET_ERRNO(HPE_INVALID_URL);
1018
1036
  goto error;
1019
1037
  }
@@ -1034,8 +1052,8 @@ size_t http_parser_execute (http_parser *parser,
1034
1052
  SET_ERRNO(HPE_INVALID_URL);
1035
1053
  goto error;
1036
1054
  default:
1037
- parser->state = parse_url_char((enum state)parser->state, ch);
1038
- if (parser->state == s_dead) {
1055
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1056
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1039
1057
  SET_ERRNO(HPE_INVALID_URL);
1040
1058
  goto error;
1041
1059
  }
@@ -1054,21 +1072,21 @@ size_t http_parser_execute (http_parser *parser,
1054
1072
  {
1055
1073
  switch (ch) {
1056
1074
  case ' ':
1057
- parser->state = s_req_http_start;
1075
+ UPDATE_STATE(s_req_http_start);
1058
1076
  CALLBACK_DATA(url);
1059
1077
  break;
1060
1078
  case CR:
1061
1079
  case LF:
1062
1080
  parser->http_major = 0;
1063
1081
  parser->http_minor = 9;
1064
- parser->state = (ch == CR) ?
1082
+ UPDATE_STATE((ch == CR) ?
1065
1083
  s_req_line_almost_done :
1066
- s_header_field_start;
1084
+ s_header_field_start);
1067
1085
  CALLBACK_DATA(url);
1068
1086
  break;
1069
1087
  default:
1070
- parser->state = parse_url_char((enum state)parser->state, ch);
1071
- if (parser->state == s_dead) {
1088
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1089
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1072
1090
  SET_ERRNO(HPE_INVALID_URL);
1073
1091
  goto error;
1074
1092
  }
@@ -1078,11 +1096,17 @@ size_t http_parser_execute (http_parser *parser,
1078
1096
 
1079
1097
  case s_req_http_start:
1080
1098
  switch (ch) {
1081
- case 'H':
1082
- parser->state = s_req_http_H;
1083
- break;
1084
1099
  case ' ':
1085
1100
  break;
1101
+ case 'H':
1102
+ UPDATE_STATE(s_req_http_H);
1103
+ break;
1104
+ case 'I':
1105
+ if (parser->method == HTTP_SOURCE) {
1106
+ UPDATE_STATE(s_req_http_I);
1107
+ break;
1108
+ }
1109
+ /* fall through */
1086
1110
  default:
1087
1111
  SET_ERRNO(HPE_INVALID_CONSTANT);
1088
1112
  goto error;
@@ -1091,130 +1115,111 @@ size_t http_parser_execute (http_parser *parser,
1091
1115
 
1092
1116
  case s_req_http_H:
1093
1117
  STRICT_CHECK(ch != 'T');
1094
- parser->state = s_req_http_HT;
1118
+ UPDATE_STATE(s_req_http_HT);
1095
1119
  break;
1096
1120
 
1097
1121
  case s_req_http_HT:
1098
1122
  STRICT_CHECK(ch != 'T');
1099
- parser->state = s_req_http_HTT;
1123
+ UPDATE_STATE(s_req_http_HTT);
1100
1124
  break;
1101
1125
 
1102
1126
  case s_req_http_HTT:
1103
1127
  STRICT_CHECK(ch != 'P');
1104
- parser->state = s_req_http_HTTP;
1128
+ UPDATE_STATE(s_req_http_HTTP);
1129
+ break;
1130
+
1131
+ case s_req_http_I:
1132
+ STRICT_CHECK(ch != 'C');
1133
+ UPDATE_STATE(s_req_http_IC);
1134
+ break;
1135
+
1136
+ case s_req_http_IC:
1137
+ STRICT_CHECK(ch != 'E');
1138
+ UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */
1105
1139
  break;
1106
1140
 
1107
1141
  case s_req_http_HTTP:
1108
1142
  STRICT_CHECK(ch != '/');
1109
- parser->state = s_req_first_http_major;
1143
+ UPDATE_STATE(s_req_http_major);
1110
1144
  break;
1111
1145
 
1112
- /* first digit of major HTTP version */
1113
- case s_req_first_http_major:
1114
- if (ch < '1' || ch > '9') {
1146
+ case s_req_http_major:
1147
+ if (UNLIKELY(!IS_NUM(ch))) {
1115
1148
  SET_ERRNO(HPE_INVALID_VERSION);
1116
1149
  goto error;
1117
1150
  }
1118
1151
 
1119
1152
  parser->http_major = ch - '0';
1120
- parser->state = s_req_http_major;
1153
+ UPDATE_STATE(s_req_http_dot);
1121
1154
  break;
1122
1155
 
1123
- /* major HTTP version or dot */
1124
- case s_req_http_major:
1156
+ case s_req_http_dot:
1125
1157
  {
1126
- if (ch == '.') {
1127
- parser->state = s_req_first_http_minor;
1128
- break;
1129
- }
1130
-
1131
- if (!IS_NUM(ch)) {
1132
- SET_ERRNO(HPE_INVALID_VERSION);
1133
- goto error;
1134
- }
1135
-
1136
- parser->http_major *= 10;
1137
- parser->http_major += ch - '0';
1138
-
1139
- if (parser->http_major > 999) {
1158
+ if (UNLIKELY(ch != '.')) {
1140
1159
  SET_ERRNO(HPE_INVALID_VERSION);
1141
1160
  goto error;
1142
1161
  }
1143
1162
 
1163
+ UPDATE_STATE(s_req_http_minor);
1144
1164
  break;
1145
1165
  }
1146
1166
 
1147
- /* first digit of minor HTTP version */
1148
- case s_req_first_http_minor:
1149
- if (!IS_NUM(ch)) {
1167
+ case s_req_http_minor:
1168
+ if (UNLIKELY(!IS_NUM(ch))) {
1150
1169
  SET_ERRNO(HPE_INVALID_VERSION);
1151
1170
  goto error;
1152
1171
  }
1153
1172
 
1154
1173
  parser->http_minor = ch - '0';
1155
- parser->state = s_req_http_minor;
1174
+ UPDATE_STATE(s_req_http_end);
1156
1175
  break;
1157
1176
 
1158
- /* minor HTTP version or end of request line */
1159
- case s_req_http_minor:
1177
+ case s_req_http_end:
1160
1178
  {
1161
1179
  if (ch == CR) {
1162
- parser->state = s_req_line_almost_done;
1180
+ UPDATE_STATE(s_req_line_almost_done);
1163
1181
  break;
1164
1182
  }
1165
1183
 
1166
1184
  if (ch == LF) {
1167
- parser->state = s_header_field_start;
1185
+ UPDATE_STATE(s_header_field_start);
1168
1186
  break;
1169
1187
  }
1170
1188
 
1171
- /* XXX allow spaces after digit? */
1172
-
1173
- if (!IS_NUM(ch)) {
1174
- SET_ERRNO(HPE_INVALID_VERSION);
1175
- goto error;
1176
- }
1177
-
1178
- parser->http_minor *= 10;
1179
- parser->http_minor += ch - '0';
1180
-
1181
- if (parser->http_minor > 999) {
1182
- SET_ERRNO(HPE_INVALID_VERSION);
1183
- goto error;
1184
- }
1185
-
1189
+ SET_ERRNO(HPE_INVALID_VERSION);
1190
+ goto error;
1186
1191
  break;
1187
1192
  }
1188
1193
 
1189
1194
  /* end of request line */
1190
1195
  case s_req_line_almost_done:
1191
1196
  {
1192
- if (ch != LF) {
1197
+ if (UNLIKELY(ch != LF)) {
1193
1198
  SET_ERRNO(HPE_LF_EXPECTED);
1194
1199
  goto error;
1195
1200
  }
1196
1201
 
1197
- parser->state = s_header_field_start;
1202
+ UPDATE_STATE(s_header_field_start);
1198
1203
  break;
1199
1204
  }
1200
1205
 
1201
1206
  case s_header_field_start:
1202
1207
  {
1203
1208
  if (ch == CR) {
1204
- parser->state = s_headers_almost_done;
1209
+ UPDATE_STATE(s_headers_almost_done);
1205
1210
  break;
1206
1211
  }
1207
1212
 
1208
1213
  if (ch == LF) {
1209
1214
  /* they might be just sending \n instead of \r\n so this would be
1210
1215
  * the second \n to denote the end of headers*/
1211
- parser->state = s_headers_almost_done;
1212
- goto reexecute_byte;
1216
+ UPDATE_STATE(s_headers_almost_done);
1217
+ REEXECUTE();
1213
1218
  }
1214
1219
 
1215
1220
  c = TOKEN(ch);
1216
1221
 
1217
- if (!c) {
1222
+ if (UNLIKELY(!c)) {
1218
1223
  SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1219
1224
  goto error;
1220
1225
  }
@@ -1222,7 +1227,7 @@ size_t http_parser_execute (http_parser *parser,
1222
1227
  MARK(header_field);
1223
1228
 
1224
1229
  parser->index = 0;
1225
- parser->state = s_header_field;
1230
+ UPDATE_STATE(s_header_field);
1226
1231
 
1227
1232
  switch (c) {
1228
1233
  case 'c':
@@ -1250,12 +1255,23 @@ size_t http_parser_execute (http_parser *parser,
1250
1255
 
1251
1256
  case s_header_field:
1252
1257
  {
1253
- c = TOKEN(ch);
1258
+ const char* start = p;
1259
+ for (; p != data + len; p++) {
1260
+ ch = *p;
1261
+ c = TOKEN(ch);
1262
+
1263
+ if (!c)
1264
+ break;
1254
1265
 
1255
- if (c) {
1256
1266
  switch (parser->header_state) {
1257
- case h_general:
1267
+ case h_general: {
1268
+ size_t left = data + len - p;
1269
+ const char* pe = p + MIN(left, max_header_size);
1270
+ while (p+1 < pe && TOKEN(p[1])) {
1271
+ p++;
1272
+ }
1258
1273
  break;
1274
+ }
1259
1275
 
1260
1276
  case h_C:
1261
1277
  parser->index++;
@@ -1327,6 +1343,7 @@ size_t http_parser_execute (http_parser *parser,
1327
1343
  parser->header_state = h_general;
1328
1344
  } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1329
1345
  parser->header_state = h_transfer_encoding;
1346
+ parser->uses_transfer_encoding = 1;
1330
1347
  }
1331
1348
  break;
1332
1349
 
@@ -1353,23 +1370,18 @@ size_t http_parser_execute (http_parser *parser,
1353
1370
  assert(0 && "Unknown header_state");
1354
1371
  break;
1355
1372
  }
1356
- break;
1357
1373
  }
1358
1374
 
1359
- if (ch == ':') {
1360
- parser->state = s_header_value_start;
1361
- CALLBACK_DATA(header_field);
1375
+ if (p == data + len) {
1376
+ --p;
1377
+ COUNT_HEADER_SIZE(p - start);
1362
1378
  break;
1363
1379
  }
1364
1380
 
1365
- if (ch == CR) {
1366
- parser->state = s_header_almost_done;
1367
- CALLBACK_DATA(header_field);
1368
- break;
1369
- }
1381
+ COUNT_HEADER_SIZE(p - start);
1370
1382
 
1371
- if (ch == LF) {
1372
- parser->state = s_header_field_start;
1383
+ if (ch == ':') {
1384
+ UPDATE_STATE(s_header_value_discard_ws);
1373
1385
  CALLBACK_DATA(header_field);
1374
1386
  break;
1375
1387
  }
@@ -1378,28 +1390,28 @@ size_t http_parser_execute (http_parser *parser,
1378
1390
  goto error;
1379
1391
  }
1380
1392
 
1381
- case s_header_value_start:
1382
- {
1393
+ case s_header_value_discard_ws:
1383
1394
  if (ch == ' ' || ch == '\t') break;
1384
1395
 
1385
- MARK(header_value);
1386
-
1387
- parser->state = s_header_value;
1388
- parser->index = 0;
1389
-
1390
1396
  if (ch == CR) {
1391
- parser->header_state = h_general;
1392
- parser->state = s_header_almost_done;
1393
- CALLBACK_DATA(header_value);
1397
+ UPDATE_STATE(s_header_value_discard_ws_almost_done);
1394
1398
  break;
1395
1399
  }
1396
1400
 
1397
1401
  if (ch == LF) {
1398
- parser->state = s_header_field_start;
1399
- CALLBACK_DATA(header_value);
1402
+ UPDATE_STATE(s_header_value_discard_lws);
1400
1403
  break;
1401
1404
  }
1402
1405
 
1406
+ /* fall through */
1407
+
1408
+ case s_header_value_start:
1409
+ {
1410
+ MARK(header_value);
1411
+
1412
+ UPDATE_STATE(s_header_value);
1413
+ parser->index = 0;
1414
+
1403
1415
  c = LOWER(ch);
1404
1416
 
1405
1417
  switch (parser->header_state) {
@@ -1413,17 +1425,33 @@ size_t http_parser_execute (http_parser *parser,
1413
1425
  if ('c' == c) {
1414
1426
  parser->header_state = h_matching_transfer_encoding_chunked;
1415
1427
  } else {
1416
- parser->header_state = h_general;
1428
+ parser->header_state = h_matching_transfer_encoding_token;
1417
1429
  }
1418
1430
  break;
1419
1431
 
1432
+ /* Multi-value `Transfer-Encoding` header */
1433
+ case h_matching_transfer_encoding_token_start:
1434
+ break;
1435
+
1420
1436
  case h_content_length:
1421
- if (!IS_NUM(ch)) {
1437
+ if (UNLIKELY(!IS_NUM(ch))) {
1422
1438
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1423
1439
  goto error;
1424
1440
  }
1425
1441
 
1442
+ if (parser->flags & F_CONTENTLENGTH) {
1443
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1444
+ goto error;
1445
+ }
1446
+
1447
+ parser->flags |= F_CONTENTLENGTH;
1426
1448
  parser->content_length = ch - '0';
1449
+ parser->header_state = h_content_length_num;
1450
+ break;
1451
+
1452
+ /* when obsolete line folding is encountered for content length
1453
+ * continue to the s_header_value state */
1454
+ case h_content_length_ws:
1427
1455
  break;
1428
1456
 
1429
1457
  case h_connection:
@@ -1433,11 +1461,17 @@ size_t http_parser_execute (http_parser *parser,
1433
1461
  /* looking for 'Connection: close' */
1434
1462
  } else if (c == 'c') {
1435
1463
  parser->header_state = h_matching_connection_close;
1464
+ } else if (c == 'u') {
1465
+ parser->header_state = h_matching_connection_upgrade;
1436
1466
  } else {
1437
- parser->header_state = h_general;
1467
+ parser->header_state = h_matching_connection_token;
1438
1468
  }
1439
1469
  break;
1440
1470
 
1471
+ /* Multi-value `Connection` header */
1472
+ case h_matching_connection_token_start:
1473
+ break;
1474
+
1441
1475
  default:
1442
1476
  parser->header_state = h_general;
1443
1477
  break;
@@ -1447,107 +1481,253 @@ size_t http_parser_execute (http_parser *parser,
1447
1481
 
1448
1482
  case s_header_value:
1449
1483
  {
1484
+ const char* start = p;
1485
+ enum header_states h_state = (enum header_states) parser->header_state;
1486
+ for (; p != data + len; p++) {
1487
+ ch = *p;
1488
+ if (ch == CR) {
1489
+ UPDATE_STATE(s_header_almost_done);
1490
+ parser->header_state = h_state;
1491
+ CALLBACK_DATA(header_value);
1492
+ break;
1493
+ }
1450
1494
 
1451
- if (ch == CR) {
1452
- parser->state = s_header_almost_done;
1453
- CALLBACK_DATA(header_value);
1454
- break;
1455
- }
1495
+ if (ch == LF) {
1496
+ UPDATE_STATE(s_header_almost_done);
1497
+ COUNT_HEADER_SIZE(p - start);
1498
+ parser->header_state = h_state;
1499
+ CALLBACK_DATA_NOADVANCE(header_value);
1500
+ REEXECUTE();
1501
+ }
1456
1502
 
1457
- if (ch == LF) {
1458
- parser->state = s_header_almost_done;
1459
- CALLBACK_DATA_NOADVANCE(header_value);
1460
- goto reexecute_byte;
1461
- }
1503
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1504
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1505
+ goto error;
1506
+ }
1462
1507
 
1463
- c = LOWER(ch);
1508
+ c = LOWER(ch);
1464
1509
 
1465
- switch (parser->header_state) {
1466
- case h_general:
1467
- break;
1510
+ switch (h_state) {
1511
+ case h_general:
1512
+ {
1513
+ size_t left = data + len - p;
1514
+ const char* pe = p + MIN(left, max_header_size);
1515
+
1516
+ for (; p != pe; p++) {
1517
+ ch = *p;
1518
+ if (ch == CR || ch == LF) {
1519
+ --p;
1520
+ break;
1521
+ }
1522
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1523
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1524
+ goto error;
1525
+ }
1526
+ }
1527
+ if (p == data + len)
1528
+ --p;
1529
+ break;
1530
+ }
1468
1531
 
1469
- case h_connection:
1470
- case h_transfer_encoding:
1471
- assert(0 && "Shouldn't get here.");
1472
- break;
1532
+ case h_connection:
1533
+ case h_transfer_encoding:
1534
+ assert(0 && "Shouldn't get here.");
1535
+ break;
1473
1536
 
1474
- case h_content_length:
1475
- {
1476
- uint64_t t;
1537
+ case h_content_length:
1538
+ if (ch == ' ') break;
1539
+ h_state = h_content_length_num;
1540
+ /* fall through */
1477
1541
 
1478
- if (ch == ' ') break;
1542
+ case h_content_length_num:
1543
+ {
1544
+ uint64_t t;
1479
1545
 
1480
- if (!IS_NUM(ch)) {
1481
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1482
- goto error;
1483
- }
1546
+ if (ch == ' ') {
1547
+ h_state = h_content_length_ws;
1548
+ break;
1549
+ }
1550
+
1551
+ if (UNLIKELY(!IS_NUM(ch))) {
1552
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1553
+ parser->header_state = h_state;
1554
+ goto error;
1555
+ }
1484
1556
 
1485
- t = parser->content_length;
1486
- t *= 10;
1487
- t += ch - '0';
1557
+ t = parser->content_length;
1558
+ t *= 10;
1559
+ t += ch - '0';
1488
1560
 
1489
- /* Overflow? */
1490
- if (t < parser->content_length || t == ULLONG_MAX) {
1561
+ /* Overflow? Test against a conservative limit for simplicity. */
1562
+ if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1563
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1564
+ parser->header_state = h_state;
1565
+ goto error;
1566
+ }
1567
+
1568
+ parser->content_length = t;
1569
+ break;
1570
+ }
1571
+
1572
+ case h_content_length_ws:
1573
+ if (ch == ' ') break;
1491
1574
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1575
+ parser->header_state = h_state;
1492
1576
  goto error;
1493
- }
1494
1577
 
1495
- parser->content_length = t;
1496
- break;
1497
- }
1578
+ /* Transfer-Encoding: chunked */
1579
+ case h_matching_transfer_encoding_token_start:
1580
+ /* looking for 'Transfer-Encoding: chunked' */
1581
+ if ('c' == c) {
1582
+ h_state = h_matching_transfer_encoding_chunked;
1583
+ } else if (STRICT_TOKEN(c)) {
1584
+ /* TODO(indutny): similar code below does this, but why?
1585
+ * At the very least it seems to be inconsistent given that
1586
+ * h_matching_transfer_encoding_token does not check for
1587
+ * `STRICT_TOKEN`
1588
+ */
1589
+ h_state = h_matching_transfer_encoding_token;
1590
+ } else if (c == ' ' || c == '\t') {
1591
+ /* Skip lws */
1592
+ } else {
1593
+ h_state = h_general;
1594
+ }
1595
+ break;
1498
1596
 
1499
- /* Transfer-Encoding: chunked */
1500
- case h_matching_transfer_encoding_chunked:
1501
- parser->index++;
1502
- if (parser->index > sizeof(CHUNKED)-1
1503
- || c != CHUNKED[parser->index]) {
1504
- parser->header_state = h_general;
1505
- } else if (parser->index == sizeof(CHUNKED)-2) {
1506
- parser->header_state = h_transfer_encoding_chunked;
1507
- }
1508
- break;
1597
+ case h_matching_transfer_encoding_chunked:
1598
+ parser->index++;
1599
+ if (parser->index > sizeof(CHUNKED)-1
1600
+ || c != CHUNKED[parser->index]) {
1601
+ h_state = h_matching_transfer_encoding_token;
1602
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1603
+ h_state = h_transfer_encoding_chunked;
1604
+ }
1605
+ break;
1509
1606
 
1510
- /* looking for 'Connection: keep-alive' */
1511
- case h_matching_connection_keep_alive:
1512
- parser->index++;
1513
- if (parser->index > sizeof(KEEP_ALIVE)-1
1514
- || c != KEEP_ALIVE[parser->index]) {
1515
- parser->header_state = h_general;
1516
- } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1517
- parser->header_state = h_connection_keep_alive;
1518
- }
1519
- break;
1607
+ case h_matching_transfer_encoding_token:
1608
+ if (ch == ',') {
1609
+ h_state = h_matching_transfer_encoding_token_start;
1610
+ parser->index = 0;
1611
+ }
1612
+ break;
1520
1613
 
1521
- /* looking for 'Connection: close' */
1522
- case h_matching_connection_close:
1523
- parser->index++;
1524
- if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1525
- parser->header_state = h_general;
1526
- } else if (parser->index == sizeof(CLOSE)-2) {
1527
- parser->header_state = h_connection_close;
1528
- }
1529
- break;
1614
+ case h_matching_connection_token_start:
1615
+ /* looking for 'Connection: keep-alive' */
1616
+ if (c == 'k') {
1617
+ h_state = h_matching_connection_keep_alive;
1618
+ /* looking for 'Connection: close' */
1619
+ } else if (c == 'c') {
1620
+ h_state = h_matching_connection_close;
1621
+ } else if (c == 'u') {
1622
+ h_state = h_matching_connection_upgrade;
1623
+ } else if (STRICT_TOKEN(c)) {
1624
+ h_state = h_matching_connection_token;
1625
+ } else if (c == ' ' || c == '\t') {
1626
+ /* Skip lws */
1627
+ } else {
1628
+ h_state = h_general;
1629
+ }
1630
+ break;
1530
1631
 
1531
- case h_transfer_encoding_chunked:
1532
- case h_connection_keep_alive:
1533
- case h_connection_close:
1534
- if (ch != ' ') parser->header_state = h_general;
1535
- break;
1632
+ /* looking for 'Connection: keep-alive' */
1633
+ case h_matching_connection_keep_alive:
1634
+ parser->index++;
1635
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1636
+ || c != KEEP_ALIVE[parser->index]) {
1637
+ h_state = h_matching_connection_token;
1638
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1639
+ h_state = h_connection_keep_alive;
1640
+ }
1641
+ break;
1536
1642
 
1537
- default:
1538
- parser->state = s_header_value;
1539
- parser->header_state = h_general;
1540
- break;
1643
+ /* looking for 'Connection: close' */
1644
+ case h_matching_connection_close:
1645
+ parser->index++;
1646
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1647
+ h_state = h_matching_connection_token;
1648
+ } else if (parser->index == sizeof(CLOSE)-2) {
1649
+ h_state = h_connection_close;
1650
+ }
1651
+ break;
1652
+
1653
+ /* looking for 'Connection: upgrade' */
1654
+ case h_matching_connection_upgrade:
1655
+ parser->index++;
1656
+ if (parser->index > sizeof(UPGRADE) - 1 ||
1657
+ c != UPGRADE[parser->index]) {
1658
+ h_state = h_matching_connection_token;
1659
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1660
+ h_state = h_connection_upgrade;
1661
+ }
1662
+ break;
1663
+
1664
+ case h_matching_connection_token:
1665
+ if (ch == ',') {
1666
+ h_state = h_matching_connection_token_start;
1667
+ parser->index = 0;
1668
+ }
1669
+ break;
1670
+
1671
+ case h_transfer_encoding_chunked:
1672
+ if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1673
+ break;
1674
+
1675
+ case h_connection_keep_alive:
1676
+ case h_connection_close:
1677
+ case h_connection_upgrade:
1678
+ if (ch == ',') {
1679
+ if (h_state == h_connection_keep_alive) {
1680
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1681
+ } else if (h_state == h_connection_close) {
1682
+ parser->flags |= F_CONNECTION_CLOSE;
1683
+ } else if (h_state == h_connection_upgrade) {
1684
+ parser->flags |= F_CONNECTION_UPGRADE;
1685
+ }
1686
+ h_state = h_matching_connection_token_start;
1687
+ parser->index = 0;
1688
+ } else if (ch != ' ') {
1689
+ h_state = h_matching_connection_token;
1690
+ }
1691
+ break;
1692
+
1693
+ default:
1694
+ UPDATE_STATE(s_header_value);
1695
+ h_state = h_general;
1696
+ break;
1697
+ }
1541
1698
  }
1699
+ parser->header_state = h_state;
1700
+
1701
+ if (p == data + len)
1702
+ --p;
1703
+
1704
+ COUNT_HEADER_SIZE(p - start);
1542
1705
  break;
1543
1706
  }
1544
1707
 
1545
1708
  case s_header_almost_done:
1546
1709
  {
1547
- STRICT_CHECK(ch != LF);
1710
+ if (UNLIKELY(ch != LF)) {
1711
+ SET_ERRNO(HPE_LF_EXPECTED);
1712
+ goto error;
1713
+ }
1714
+
1715
+ UPDATE_STATE(s_header_value_lws);
1716
+ break;
1717
+ }
1548
1718
 
1549
- parser->state = s_header_value_lws;
1719
+ case s_header_value_lws:
1720
+ {
1721
+ if (ch == ' ' || ch == '\t') {
1722
+ if (parser->header_state == h_content_length_num) {
1723
+ /* treat obsolete line folding as space */
1724
+ parser->header_state = h_content_length_ws;
1725
+ }
1726
+ UPDATE_STATE(s_header_value_start);
1727
+ REEXECUTE();
1728
+ }
1550
1729
 
1730
+ /* finished the header */
1551
1731
  switch (parser->header_state) {
1552
1732
  case h_connection_keep_alive:
1553
1733
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
@@ -1558,23 +1738,58 @@ size_t http_parser_execute (http_parser *parser,
1558
1738
  case h_transfer_encoding_chunked:
1559
1739
  parser->flags |= F_CHUNKED;
1560
1740
  break;
1741
+ case h_connection_upgrade:
1742
+ parser->flags |= F_CONNECTION_UPGRADE;
1743
+ break;
1561
1744
  default:
1562
1745
  break;
1563
1746
  }
1564
1747
 
1748
+ UPDATE_STATE(s_header_field_start);
1749
+ REEXECUTE();
1750
+ }
1751
+
1752
+ case s_header_value_discard_ws_almost_done:
1753
+ {
1754
+ STRICT_CHECK(ch != LF);
1755
+ UPDATE_STATE(s_header_value_discard_lws);
1565
1756
  break;
1566
1757
  }
1567
1758
 
1568
- case s_header_value_lws:
1759
+ case s_header_value_discard_lws:
1569
1760
  {
1570
- if (ch == ' ' || ch == '\t')
1571
- parser->state = s_header_value_start;
1572
- else
1573
- {
1574
- parser->state = s_header_field_start;
1575
- goto reexecute_byte;
1761
+ if (ch == ' ' || ch == '\t') {
1762
+ UPDATE_STATE(s_header_value_discard_ws);
1763
+ break;
1764
+ } else {
1765
+ switch (parser->header_state) {
1766
+ case h_connection_keep_alive:
1767
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1768
+ break;
1769
+ case h_connection_close:
1770
+ parser->flags |= F_CONNECTION_CLOSE;
1771
+ break;
1772
+ case h_connection_upgrade:
1773
+ parser->flags |= F_CONNECTION_UPGRADE;
1774
+ break;
1775
+ case h_transfer_encoding_chunked:
1776
+ parser->flags |= F_CHUNKED;
1777
+ break;
1778
+ case h_content_length:
1779
+ /* do not allow empty content length */
1780
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1781
+ goto error;
1782
+ break;
1783
+ default:
1784
+ break;
1785
+ }
1786
+
1787
+ /* header value was empty */
1788
+ MARK(header_value);
1789
+ UPDATE_STATE(s_header_field_start);
1790
+ CALLBACK_DATA_NOADVANCE(header_value);
1791
+ REEXECUTE();
1576
1792
  }
1577
- break;
1578
1793
  }
1579
1794
 
1580
1795
  case s_headers_almost_done:
@@ -1583,16 +1798,43 @@ size_t http_parser_execute (http_parser *parser,
1583
1798
 
1584
1799
  if (parser->flags & F_TRAILING) {
1585
1800
  /* End of a chunked request */
1586
- parser->state = NEW_MESSAGE();
1587
- CALLBACK_NOTIFY(message_complete);
1588
- break;
1801
+ UPDATE_STATE(s_message_done);
1802
+ CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1803
+ REEXECUTE();
1804
+ }
1805
+
1806
+ /* Cannot use transfer-encoding and a content-length header together
1807
+ per the HTTP specification. (RFC 7230 Section 3.3.3) */
1808
+ if ((parser->uses_transfer_encoding == 1) &&
1809
+ (parser->flags & F_CONTENTLENGTH)) {
1810
+ /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1811
+ * not `chunked` or allow_length_with_encoding is set
1812
+ */
1813
+ if (parser->flags & F_CHUNKED) {
1814
+ if (!allow_chunked_length) {
1815
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1816
+ goto error;
1817
+ }
1818
+ } else if (!lenient) {
1819
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1820
+ goto error;
1821
+ }
1589
1822
  }
1590
1823
 
1591
- parser->state = s_headers_done;
1824
+ UPDATE_STATE(s_headers_done);
1592
1825
 
1593
1826
  /* Set this here so that on_headers_complete() callbacks can see it */
1594
- parser->upgrade =
1595
- (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1827
+ if ((parser->flags & F_UPGRADE) &&
1828
+ (parser->flags & F_CONNECTION_UPGRADE)) {
1829
+ /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1830
+ * mandatory only when it is a 101 Switching Protocols response,
1831
+ * otherwise it is purely informational, to announce support.
1832
+ */
1833
+ parser->upgrade =
1834
+ (parser->type == HTTP_REQUEST || parser->status_code == 101);
1835
+ } else {
1836
+ parser->upgrade = (parser->method == HTTP_CONNECT);
1837
+ }
1596
1838
 
1597
1839
  /* Here we call the headers_complete callback. This is somewhat
1598
1840
  * different than other callbacks because if the user returns 1, we
@@ -1608,59 +1850,90 @@ size_t http_parser_execute (http_parser *parser,
1608
1850
  case 0:
1609
1851
  break;
1610
1852
 
1853
+ case 2:
1854
+ parser->upgrade = 1;
1855
+
1856
+ /* fall through */
1611
1857
  case 1:
1612
1858
  parser->flags |= F_SKIPBODY;
1613
1859
  break;
1614
1860
 
1615
1861
  default:
1616
1862
  SET_ERRNO(HPE_CB_headers_complete);
1617
- return p - data; /* Error */
1863
+ RETURN(p - data); /* Error */
1618
1864
  }
1619
1865
  }
1620
1866
 
1621
1867
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1622
- return p - data;
1868
+ RETURN(p - data);
1623
1869
  }
1624
1870
 
1625
- goto reexecute_byte;
1871
+ REEXECUTE();
1626
1872
  }
1627
1873
 
1628
1874
  case s_headers_done:
1629
1875
  {
1876
+ int hasBody;
1630
1877
  STRICT_CHECK(ch != LF);
1631
1878
 
1632
1879
  parser->nread = 0;
1633
-
1634
- /* Exit, the rest of the connect is in a different protocol. */
1635
- if (parser->upgrade) {
1636
- parser->state = NEW_MESSAGE();
1880
+ nread = 0;
1881
+
1882
+ hasBody = parser->flags & F_CHUNKED ||
1883
+ (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1884
+ if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1885
+ (parser->flags & F_SKIPBODY) || !hasBody)) {
1886
+ /* Exit, the rest of the message is in a different protocol. */
1887
+ UPDATE_STATE(NEW_MESSAGE());
1637
1888
  CALLBACK_NOTIFY(message_complete);
1638
- return (p - data) + 1;
1889
+ RETURN((p - data) + 1);
1639
1890
  }
1640
1891
 
1641
1892
  if (parser->flags & F_SKIPBODY) {
1642
- parser->state = NEW_MESSAGE();
1893
+ UPDATE_STATE(NEW_MESSAGE());
1643
1894
  CALLBACK_NOTIFY(message_complete);
1644
1895
  } else if (parser->flags & F_CHUNKED) {
1645
- /* chunked encoding - ignore Content-Length header */
1646
- parser->state = s_chunk_size_start;
1896
+ /* chunked encoding - ignore Content-Length header,
1897
+ * prepare for a chunk */
1898
+ UPDATE_STATE(s_chunk_size_start);
1899
+ } else if (parser->uses_transfer_encoding == 1) {
1900
+ if (parser->type == HTTP_REQUEST && !lenient) {
1901
+ /* RFC 7230 3.3.3 */
1902
+
1903
+ /* If a Transfer-Encoding header field
1904
+ * is present in a request and the chunked transfer coding is not
1905
+ * the final encoding, the message body length cannot be determined
1906
+ * reliably; the server MUST respond with the 400 (Bad Request)
1907
+ * status code and then close the connection.
1908
+ */
1909
+ SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1910
+ RETURN(p - data); /* Error */
1911
+ } else {
1912
+ /* RFC 7230 3.3.3 */
1913
+
1914
+ /* If a Transfer-Encoding header field is present in a response and
1915
+ * the chunked transfer coding is not the final encoding, the
1916
+ * message body length is determined by reading the connection until
1917
+ * it is closed by the server.
1918
+ */
1919
+ UPDATE_STATE(s_body_identity_eof);
1920
+ }
1647
1921
  } else {
1648
1922
  if (parser->content_length == 0) {
1649
1923
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1650
- parser->state = NEW_MESSAGE();
1924
+ UPDATE_STATE(NEW_MESSAGE());
1651
1925
  CALLBACK_NOTIFY(message_complete);
1652
1926
  } else if (parser->content_length != ULLONG_MAX) {
1653
1927
  /* Content-Length header given and non-zero */
1654
- parser->state = s_body_identity;
1928
+ UPDATE_STATE(s_body_identity);
1655
1929
  } else {
1656
- if (parser->type == HTTP_REQUEST ||
1657
- !http_message_needs_eof(parser)) {
1930
+ if (!http_message_needs_eof(parser)) {
1658
1931
  /* Assume content-length 0 - read the next */
1659
- parser->state = NEW_MESSAGE();
1932
+ UPDATE_STATE(NEW_MESSAGE());
1660
1933
  CALLBACK_NOTIFY(message_complete);
1661
1934
  } else {
1662
1935
  /* Read body until EOF */
1663
- parser->state = s_body_identity_eof;
1936
+ UPDATE_STATE(s_body_identity_eof);
1664
1937
  }
1665
1938
  }
1666
1939
  }
@@ -1686,7 +1959,7 @@ size_t http_parser_execute (http_parser *parser,
1686
1959
  p += to_read - 1;
1687
1960
 
1688
1961
  if (parser->content_length == 0) {
1689
- parser->state = s_message_done;
1962
+ UPDATE_STATE(s_message_done);
1690
1963
 
1691
1964
  /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1692
1965
  *
@@ -1698,7 +1971,7 @@ size_t http_parser_execute (http_parser *parser,
1698
1971
  * important for applications, but let's keep it for now.
1699
1972
  */
1700
1973
  CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1701
- goto reexecute_byte;
1974
+ REEXECUTE();
1702
1975
  }
1703
1976
 
1704
1977
  break;
@@ -1712,23 +1985,27 @@ size_t http_parser_execute (http_parser *parser,
1712
1985
  break;
1713
1986
 
1714
1987
  case s_message_done:
1715
- parser->state = NEW_MESSAGE();
1988
+ UPDATE_STATE(NEW_MESSAGE());
1716
1989
  CALLBACK_NOTIFY(message_complete);
1990
+ if (parser->upgrade) {
1991
+ /* Exit, the rest of the message is in a different protocol. */
1992
+ RETURN((p - data) + 1);
1993
+ }
1717
1994
  break;
1718
1995
 
1719
1996
  case s_chunk_size_start:
1720
1997
  {
1721
- assert(parser->nread == 1);
1998
+ assert(nread == 1);
1722
1999
  assert(parser->flags & F_CHUNKED);
1723
2000
 
1724
2001
  unhex_val = unhex[(unsigned char)ch];
1725
- if (unhex_val == -1) {
2002
+ if (UNLIKELY(unhex_val == -1)) {
1726
2003
  SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1727
2004
  goto error;
1728
2005
  }
1729
2006
 
1730
2007
  parser->content_length = unhex_val;
1731
- parser->state = s_chunk_size;
2008
+ UPDATE_STATE(s_chunk_size);
1732
2009
  break;
1733
2010
  }
1734
2011
 
@@ -1739,7 +2016,7 @@ size_t http_parser_execute (http_parser *parser,
1739
2016
  assert(parser->flags & F_CHUNKED);
1740
2017
 
1741
2018
  if (ch == CR) {
1742
- parser->state = s_chunk_size_almost_done;
2019
+ UPDATE_STATE(s_chunk_size_almost_done);
1743
2020
  break;
1744
2021
  }
1745
2022
 
@@ -1747,7 +2024,7 @@ size_t http_parser_execute (http_parser *parser,
1747
2024
 
1748
2025
  if (unhex_val == -1) {
1749
2026
  if (ch == ';' || ch == ' ') {
1750
- parser->state = s_chunk_parameters;
2027
+ UPDATE_STATE(s_chunk_parameters);
1751
2028
  break;
1752
2029
  }
1753
2030
 
@@ -1759,8 +2036,8 @@ size_t http_parser_execute (http_parser *parser,
1759
2036
  t *= 16;
1760
2037
  t += unhex_val;
1761
2038
 
1762
- /* Overflow? */
1763
- if (t < parser->content_length || t == ULLONG_MAX) {
2039
+ /* Overflow? Test against a conservative limit for simplicity. */
2040
+ if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1764
2041
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1765
2042
  goto error;
1766
2043
  }
@@ -1774,7 +2051,7 @@ size_t http_parser_execute (http_parser *parser,
1774
2051
  assert(parser->flags & F_CHUNKED);
1775
2052
  /* just ignore this shit. TODO check for overflow */
1776
2053
  if (ch == CR) {
1777
- parser->state = s_chunk_size_almost_done;
2054
+ UPDATE_STATE(s_chunk_size_almost_done);
1778
2055
  break;
1779
2056
  }
1780
2057
  break;
@@ -1786,13 +2063,15 @@ size_t http_parser_execute (http_parser *parser,
1786
2063
  STRICT_CHECK(ch != LF);
1787
2064
 
1788
2065
  parser->nread = 0;
2066
+ nread = 0;
1789
2067
 
1790
2068
  if (parser->content_length == 0) {
1791
2069
  parser->flags |= F_TRAILING;
1792
- parser->state = s_header_field_start;
2070
+ UPDATE_STATE(s_header_field_start);
1793
2071
  } else {
1794
- parser->state = s_chunk_data;
2072
+ UPDATE_STATE(s_chunk_data);
1795
2073
  }
2074
+ CALLBACK_NOTIFY(chunk_header);
1796
2075
  break;
1797
2076
  }
1798
2077
 
@@ -1813,7 +2092,7 @@ size_t http_parser_execute (http_parser *parser,
1813
2092
  p += to_read - 1;
1814
2093
 
1815
2094
  if (parser->content_length == 0) {
1816
- parser->state = s_chunk_data_almost_done;
2095
+ UPDATE_STATE(s_chunk_data_almost_done);
1817
2096
  }
1818
2097
 
1819
2098
  break;
@@ -1823,7 +2102,7 @@ size_t http_parser_execute (http_parser *parser,
1823
2102
  assert(parser->flags & F_CHUNKED);
1824
2103
  assert(parser->content_length == 0);
1825
2104
  STRICT_CHECK(ch != CR);
1826
- parser->state = s_chunk_data_done;
2105
+ UPDATE_STATE(s_chunk_data_done);
1827
2106
  CALLBACK_DATA(body);
1828
2107
  break;
1829
2108
 
@@ -1831,7 +2110,9 @@ size_t http_parser_execute (http_parser *parser,
1831
2110
  assert(parser->flags & F_CHUNKED);
1832
2111
  STRICT_CHECK(ch != LF);
1833
2112
  parser->nread = 0;
1834
- parser->state = s_chunk_size_start;
2113
+ nread = 0;
2114
+ UPDATE_STATE(s_chunk_size_start);
2115
+ CALLBACK_NOTIFY(chunk_complete);
1835
2116
  break;
1836
2117
 
1837
2118
  default:
@@ -1841,7 +2122,7 @@ size_t http_parser_execute (http_parser *parser,
1841
2122
  }
1842
2123
  }
1843
2124
 
1844
- /* Run callbacks for any marks that we have leftover after we ran our of
2125
+ /* Run callbacks for any marks that we have leftover after we ran out of
1845
2126
  * bytes. There should be at most one of these set, so it's OK to invoke
1846
2127
  * them in series (unset marks will not result in callbacks).
1847
2128
  *
@@ -1854,21 +2135,23 @@ size_t http_parser_execute (http_parser *parser,
1854
2135
  assert(((header_field_mark ? 1 : 0) +
1855
2136
  (header_value_mark ? 1 : 0) +
1856
2137
  (url_mark ? 1 : 0) +
1857
- (body_mark ? 1 : 0)) <= 1);
2138
+ (body_mark ? 1 : 0) +
2139
+ (status_mark ? 1 : 0)) <= 1);
1858
2140
 
1859
2141
  CALLBACK_DATA_NOADVANCE(header_field);
1860
2142
  CALLBACK_DATA_NOADVANCE(header_value);
1861
2143
  CALLBACK_DATA_NOADVANCE(url);
1862
2144
  CALLBACK_DATA_NOADVANCE(body);
2145
+ CALLBACK_DATA_NOADVANCE(status);
1863
2146
 
1864
- return len;
2147
+ RETURN(len);
1865
2148
 
1866
2149
  error:
1867
2150
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1868
2151
  SET_ERRNO(HPE_UNKNOWN);
1869
2152
  }
1870
2153
 
1871
- return (p - data);
2154
+ RETURN(p - data);
1872
2155
  }
1873
2156
 
1874
2157
 
@@ -1888,6 +2171,12 @@ http_message_needs_eof (const http_parser *parser)
1888
2171
  return 0;
1889
2172
  }
1890
2173
 
2174
+ /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2175
+ if ((parser->uses_transfer_encoding == 1) &&
2176
+ (parser->flags & F_CHUNKED) == 0) {
2177
+ return 1;
2178
+ }
2179
+
1891
2180
  if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1892
2181
  return 0;
1893
2182
  }
@@ -1921,6 +2210,16 @@ http_method_str (enum http_method m)
1921
2210
  return ELEM_AT(method_strings, m, "<unknown>");
1922
2211
  }
1923
2212
 
2213
+ const char *
2214
+ http_status_str (enum http_status s)
2215
+ {
2216
+ switch (s) {
2217
+ #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2218
+ HTTP_STATUS_MAP(XX)
2219
+ #undef XX
2220
+ default: return "<unknown>";
2221
+ }
2222
+ }
1924
2223
 
1925
2224
  void
1926
2225
  http_parser_init (http_parser *parser, enum http_parser_type t)
@@ -1933,15 +2232,21 @@ http_parser_init (http_parser *parser, enum http_parser_type t)
1933
2232
  parser->http_errno = HPE_OK;
1934
2233
  }
1935
2234
 
2235
+ void
2236
+ http_parser_settings_init(http_parser_settings *settings)
2237
+ {
2238
+ memset(settings, 0, sizeof(*settings));
2239
+ }
2240
+
1936
2241
  const char *
1937
2242
  http_errno_name(enum http_errno err) {
1938
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2243
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1939
2244
  return http_strerror_tab[err].name;
1940
2245
  }
1941
2246
 
1942
2247
  const char *
1943
2248
  http_errno_description(enum http_errno err) {
1944
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2249
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1945
2250
  return http_strerror_tab[err].description;
1946
2251
  }
1947
2252
 
@@ -1975,7 +2280,7 @@ http_parse_host_char(enum http_host_state s, const char ch) {
1975
2280
  return s_http_host;
1976
2281
  }
1977
2282
 
1978
- /* FALLTHROUGH */
2283
+ /* fall through */
1979
2284
  case s_http_host_v6_end:
1980
2285
  if (ch == ':') {
1981
2286
  return s_http_host_port_start;
@@ -1988,12 +2293,29 @@ http_parse_host_char(enum http_host_state s, const char ch) {
1988
2293
  return s_http_host_v6_end;
1989
2294
  }
1990
2295
 
1991
- /* FALLTHROUGH */
2296
+ /* fall through */
1992
2297
  case s_http_host_v6_start:
1993
2298
  if (IS_HEX(ch) || ch == ':' || ch == '.') {
1994
2299
  return s_http_host_v6;
1995
2300
  }
1996
2301
 
2302
+ if (s == s_http_host_v6 && ch == '%') {
2303
+ return s_http_host_v6_zone_start;
2304
+ }
2305
+ break;
2306
+
2307
+ case s_http_host_v6_zone:
2308
+ if (ch == ']') {
2309
+ return s_http_host_v6_end;
2310
+ }
2311
+
2312
+ /* fall through */
2313
+ case s_http_host_v6_zone_start:
2314
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2315
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2316
+ ch == '~') {
2317
+ return s_http_host_v6_zone;
2318
+ }
1997
2319
  break;
1998
2320
 
1999
2321
  case s_http_host_port:
@@ -2017,6 +2339,8 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2017
2339
  const char *p;
2018
2340
  size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2019
2341
 
2342
+ assert(u->field_set & (1 << UF_HOST));
2343
+
2020
2344
  u->field_data[UF_HOST].len = 0;
2021
2345
 
2022
2346
  s = found_at ? s_http_userinfo_start : s_http_host_start;
@@ -2031,21 +2355,26 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2031
2355
  switch(new_s) {
2032
2356
  case s_http_host:
2033
2357
  if (s != s_http_host) {
2034
- u->field_data[UF_HOST].off = p - buf;
2358
+ u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2035
2359
  }
2036
2360
  u->field_data[UF_HOST].len++;
2037
2361
  break;
2038
2362
 
2039
2363
  case s_http_host_v6:
2040
2364
  if (s != s_http_host_v6) {
2041
- u->field_data[UF_HOST].off = p - buf;
2365
+ u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2042
2366
  }
2043
2367
  u->field_data[UF_HOST].len++;
2044
2368
  break;
2045
2369
 
2370
+ case s_http_host_v6_zone_start:
2371
+ case s_http_host_v6_zone:
2372
+ u->field_data[UF_HOST].len++;
2373
+ break;
2374
+
2046
2375
  case s_http_host_port:
2047
2376
  if (s != s_http_host_port) {
2048
- u->field_data[UF_PORT].off = p - buf;
2377
+ u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2049
2378
  u->field_data[UF_PORT].len = 0;
2050
2379
  u->field_set |= (1 << UF_PORT);
2051
2380
  }
@@ -2054,7 +2383,7 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2054
2383
 
2055
2384
  case s_http_userinfo:
2056
2385
  if (s != s_http_userinfo) {
2057
- u->field_data[UF_USERINFO].off = p - buf ;
2386
+ u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2058
2387
  u->field_data[UF_USERINFO].len = 0;
2059
2388
  u->field_set |= (1 << UF_USERINFO);
2060
2389
  }
@@ -2072,6 +2401,8 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2072
2401
  case s_http_host_start:
2073
2402
  case s_http_host_v6_start:
2074
2403
  case s_http_host_v6:
2404
+ case s_http_host_v6_zone_start:
2405
+ case s_http_host_v6_zone:
2075
2406
  case s_http_host_port_start:
2076
2407
  case s_http_userinfo:
2077
2408
  case s_http_userinfo_start:
@@ -2083,6 +2414,11 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2083
2414
  return 0;
2084
2415
  }
2085
2416
 
2417
+ void
2418
+ http_parser_url_init(struct http_parser_url *u) {
2419
+ memset(u, 0, sizeof(*u));
2420
+ }
2421
+
2086
2422
  int
2087
2423
  http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2088
2424
  struct http_parser_url *u)
@@ -2092,9 +2428,13 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2092
2428
  enum http_parser_url_fields uf, old_uf;
2093
2429
  int found_at = 0;
2094
2430
 
2431
+ if (buflen == 0) {
2432
+ return 1;
2433
+ }
2434
+
2095
2435
  u->port = u->field_set = 0;
2096
2436
  s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2097
- uf = old_uf = UF_MAX;
2437
+ old_uf = UF_MAX;
2098
2438
 
2099
2439
  for (p = buf; p < buf + buflen; p++) {
2100
2440
  s = parse_url_char(s, *p);
@@ -2119,7 +2459,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2119
2459
  case s_req_server_with_at:
2120
2460
  found_at = 1;
2121
2461
 
2122
- /* FALLTROUGH */
2462
+ /* fall through */
2123
2463
  case s_req_server:
2124
2464
  uf = UF_HOST;
2125
2465
  break;
@@ -2147,7 +2487,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2147
2487
  continue;
2148
2488
  }
2149
2489
 
2150
- u->field_data[uf].off = p - buf;
2490
+ u->field_data[uf].off = (uint16_t)(p - buf);
2151
2491
  u->field_data[uf].len = 1;
2152
2492
 
2153
2493
  u->field_set |= (1 << uf);
@@ -2156,7 +2496,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2156
2496
 
2157
2497
  /* host must be present if there is a schema */
2158
2498
  /* parsing http:///toto will fail */
2159
- if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2499
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
2500
+ (u->field_set & (1 << UF_HOST)) == 0) {
2501
+ return 1;
2502
+ }
2503
+
2504
+ if (u->field_set & (1 << UF_HOST)) {
2160
2505
  if (http_parse_host(buf, u, found_at) != 0) {
2161
2506
  return 1;
2162
2507
  }
@@ -2168,12 +2513,27 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2168
2513
  }
2169
2514
 
2170
2515
  if (u->field_set & (1 << UF_PORT)) {
2171
- /* Don't bother with endp; we've already validated the string */
2172
- unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2173
-
2174
- /* Ports have a max value of 2^16 */
2175
- if (v > 0xffff) {
2176
- return 1;
2516
+ uint16_t off;
2517
+ uint16_t len;
2518
+ const char* p;
2519
+ const char* end;
2520
+ unsigned long v;
2521
+
2522
+ off = u->field_data[UF_PORT].off;
2523
+ len = u->field_data[UF_PORT].len;
2524
+ end = buf + off + len;
2525
+
2526
+ /* NOTE: The characters are already validated and are in the [0-9] range */
2527
+ assert((size_t) (off + len) <= buflen && "Port number overflow");
2528
+ v = 0;
2529
+ for (p = buf + off; p < end; p++) {
2530
+ v *= 10;
2531
+ v += *p - '0';
2532
+
2533
+ /* Ports have a max value of 2^16 */
2534
+ if (v > 0xffff) {
2535
+ return 1;
2536
+ }
2177
2537
  }
2178
2538
 
2179
2539
  u->port = (uint16_t) v;
@@ -2190,6 +2550,7 @@ http_parser_pause(http_parser *parser, int paused) {
2190
2550
  */
2191
2551
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2192
2552
  HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2553
+ uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2193
2554
  SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2194
2555
  } else {
2195
2556
  assert(0 && "Attempting to pause parser in error state");
@@ -2207,3 +2568,8 @@ http_parser_version(void) {
2207
2568
  HTTP_PARSER_VERSION_MINOR * 0x00100 |
2208
2569
  HTTP_PARSER_VERSION_PATCH * 0x00001;
2209
2570
  }
2571
+
2572
+ void
2573
+ http_parser_set_max_header_size(uint32_t size) {
2574
+ max_header_size = size;
2575
+ }