http_parser.rb 0.5.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/.gitignore +11 -0
  2. data/.gitmodules +6 -0
  3. data/README.md +45 -0
  4. data/Rakefile +6 -0
  5. data/bench/thin.rb +57 -0
  6. data/ext/ruby_http_parser/.gitignore +1 -0
  7. data/ext/ruby_http_parser/RubyHttpParserService.java +18 -0
  8. data/ext/ruby_http_parser/ext_help.h +18 -0
  9. data/ext/ruby_http_parser/extconf.rb +16 -0
  10. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +403 -0
  11. data/ext/ruby_http_parser/ruby_http_parser.c +474 -0
  12. data/ext/ruby_http_parser/vendor/.gitkeep +0 -0
  13. data/ext/ruby_http_parser/vendor/http-parser-java/CONTRIBUTIONS +4 -0
  14. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +19 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +171 -0
  16. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +19 -0
  17. data/ext/ruby_http_parser/vendor/http-parser-java/compile +1 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1590 -0
  19. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +167 -0
  20. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPException.java +7 -0
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +90 -0
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParser.java +31 -0
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserType.java +13 -0
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPCallback.java +5 -0
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPDataCallback.java +25 -0
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPErrorCallback.java +7 -0
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +1894 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +78 -0
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/Util.java +112 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +487 -0
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +115 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1865 -0
  33. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +1 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +1 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +539 -0
  36. data/ext/ruby_http_parser/vendor/http-parser-java/tools/byte_constants.rb +6 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/tools/const_char.rb +13 -0
  38. data/ext/ruby_http_parser/vendor/http-parser-java/tools/lowcase.rb +15 -0
  39. data/ext/ruby_http_parser/vendor/http-parser-java/tools/parse_tests.rb +33 -0
  40. data/ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS +4 -0
  41. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +19 -0
  42. data/ext/ruby_http_parser/vendor/http-parser/README.md +171 -0
  43. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1590 -0
  44. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +167 -0
  45. data/ext/ruby_http_parser/vendor/http-parser/test.c +1755 -0
  46. data/http_parser.rb.gemspec +15 -0
  47. data/lib/http/parser.rb +1 -0
  48. data/lib/http_parser.rb +4 -0
  49. data/lib/ruby_http_parser.rb +2 -0
  50. data/spec/parser_spec.rb +187 -0
  51. data/spec/spec_helper.rb +2 -0
  52. data/spec/support/requests.json +381 -0
  53. data/spec/support/responses.json +186 -0
  54. data/tasks/compile.rake +39 -0
  55. data/tasks/spec.rake +5 -0
  56. data/tasks/submodules.rake +7 -0
  57. metadata +124 -0
@@ -0,0 +1,6 @@
1
+
2
+ "A".upto("Z") {|c|
3
+ puts "public static final byte #{c} = 0x#{c[0].to_s(16)};"
4
+ }
5
+
6
+
@@ -0,0 +1,13 @@
1
+
2
+
3
+ def printbytes str
4
+ str.each_byte { |b|
5
+ print "0x#{b.to_s(16)}, "
6
+ }
7
+ end
8
+
9
+ if $0 == __FILE__
10
+ printf "static final byte [] #{ARGV[0]} = {\n"
11
+ printbytes ARGV[0]
12
+ printf "\n};\n"
13
+ end
@@ -0,0 +1,15 @@
1
+
2
+
3
+ 0.upto(255) { |i|
4
+ printf "\n" if i%16 == 0
5
+ printf " " if i%8 == 0
6
+ s = ("" << i)
7
+ if s =~ /[A-Z0-9\-_\/ ]/
8
+ print "0x#{i.to_s(16)},"
9
+ elsif s =~ /[a-z]/
10
+ print "0x#{s.upcase[0].to_s(16)},"
11
+ else
12
+ print "0x00,"
13
+ end
14
+
15
+ }
@@ -0,0 +1,33 @@
1
+
2
+
3
+
4
+
5
+ # name : 200 trailing space on chunked body
6
+ # raw : "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n25 \r\nThis is the data in the first chunk\r\n\r\n1C\r\nand this is the second one\r\n\r\n0 \r\n\r\n"
7
+ # type : HTTP_RESPONSE
8
+ # method: HTTP_DELETE
9
+ # status code :200
10
+ # request_path:
11
+ # request_url :
12
+ # fragment :
13
+ # query_string:
14
+ # body :"This is the data in the first chunk\r\nand this is the second one\r\n"
15
+ # body_size :65
16
+ # header_0 :{ "Content-Type": "text/plain"}
17
+ # header_1 :{ "Transfer-Encoding": "chunked"}
18
+ # should_keep_alive :1
19
+ # upgrade :0
20
+ # http_major :1
21
+ # http_minor :1
22
+
23
+
24
+ class ParserTest
25
+ attr_accessor :name
26
+ attr_accessor :raw
27
+ attr_accessor :type
28
+ attr_accessor :method
29
+ attr_accessor :status_code
30
+ attr_accessor :request_path
31
+ attr_accessor :method
32
+ end
33
+
@@ -0,0 +1,4 @@
1
+ Contributors must agree to the Contributor License Agreement before patches
2
+ can be accepted.
3
+
4
+ http://spreadsheets2.google.com/viewform?hl=en&formkey=dDJXOGUwbzlYaWM4cHN1MERwQS1CSnc6MQ
@@ -0,0 +1,19 @@
1
+ Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to
5
+ deal in the Software without restriction, including without limitation the
6
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ sell copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ IN THE SOFTWARE.
@@ -0,0 +1,171 @@
1
+ HTTP Parser
2
+ ===========
3
+
4
+ This is a parser for HTTP messages written in C. It parses both requests and
5
+ responses. The parser is designed to be used in performance HTTP
6
+ applications. It does not make any syscalls nor allocations, it does not
7
+ buffer data, it can be interrupted at anytime. Depending on your
8
+ architecture, it only requires about 40 bytes of data per message
9
+ stream (in a web server that is per connection).
10
+
11
+ Features:
12
+
13
+ * No dependencies
14
+ * Handles persistent streams (keep-alive).
15
+ * Decodes chunked encoding.
16
+ * Upgrade support
17
+ * Defends against buffer overflow attacks.
18
+
19
+ The parser extracts the following information from HTTP messages:
20
+
21
+ * Header fields and values
22
+ * Content-Length
23
+ * Request method
24
+ * Response status code
25
+ * Transfer-Encoding
26
+ * HTTP version
27
+ * Request path, query string, fragment
28
+ * Message body
29
+
30
+
31
+ Usage
32
+ -----
33
+
34
+ One `http_parser` object is used per TCP connection. Initialize the struct
35
+ using `http_parser_init()` and set the callbacks. That might look something
36
+ like this for a request parser:
37
+
38
+ http_parser_settings settings;
39
+ settings.on_path = my_path_callback;
40
+ settings.on_header_field = my_header_field_callback;
41
+ /* ... */
42
+
43
+ http_parser *parser = malloc(sizeof(http_parser));
44
+ http_parser_init(parser, HTTP_REQUEST);
45
+ parser->data = my_socket;
46
+
47
+ When data is received on the socket execute the parser and check for errors.
48
+
49
+ size_t len = 80*1024, nparsed;
50
+ char buf[len];
51
+ ssize_t recved;
52
+
53
+ recved = recv(fd, buf, len, 0);
54
+
55
+ if (recved < 0) {
56
+ /* Handle error. */
57
+ }
58
+
59
+ /* Start up / continue the parser.
60
+ * Note we pass recved==0 to signal that EOF has been recieved.
61
+ */
62
+ nparsed = http_parser_execute(parser, &settings, buf, recved);
63
+
64
+ if (parser->upgrade) {
65
+ /* handle new protocol */
66
+ } else if (nparsed != recved) {
67
+ /* Handle error. Usually just close the connection. */
68
+ }
69
+
70
+ HTTP needs to know where the end of the stream is. For example, sometimes
71
+ servers send responses without Content-Length and expect the client to
72
+ consume input (for the body) until EOF. To tell http_parser about EOF, give
73
+ `0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
74
+ can still be encountered during an EOF, so one must still be prepared
75
+ to receive them.
76
+
77
+ Scalar valued message information such as `status_code`, `method`, and the
78
+ HTTP version are stored in the parser structure. This data is only
79
+ temporally stored in `http_parser` and gets reset on each new message. If
80
+ this information is needed later, copy it out of the structure during the
81
+ `headers_complete` callback.
82
+
83
+ The parser decodes the transfer-encoding for both requests and responses
84
+ transparently. That is, a chunked encoding is decoded before being sent to
85
+ the on_body callback.
86
+
87
+
88
+ The Special Problem of Upgrade
89
+ ------------------------------
90
+
91
+ HTTP supports upgrading the connection to a different protocol. An
92
+ increasingly common example of this is the Web Socket protocol which sends
93
+ a request like
94
+
95
+ GET /demo HTTP/1.1
96
+ Upgrade: WebSocket
97
+ Connection: Upgrade
98
+ Host: example.com
99
+ Origin: http://example.com
100
+ WebSocket-Protocol: sample
101
+
102
+ followed by non-HTTP data.
103
+
104
+ (See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
105
+ information the Web Socket protocol.)
106
+
107
+ To support this, the parser will treat this as a normal HTTP message without a
108
+ body. Issuing both on_headers_complete and on_message_complete callbacks. However
109
+ http_parser_execute() will stop parsing at the end of the headers and return.
110
+
111
+ The user is expected to check if `parser->upgrade` has been set to 1 after
112
+ `http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
113
+ offset by the return value of `http_parser_execute()`.
114
+
115
+
116
+ Callbacks
117
+ ---------
118
+
119
+ During the `http_parser_execute()` call, the callbacks set in
120
+ `http_parser_settings` will be executed. The parser maintains state and
121
+ never looks behind, so buffering the data is not necessary. If you need to
122
+ save certain data for later usage, you can do that from the callbacks.
123
+
124
+ There are two types of callbacks:
125
+
126
+ * notification `typedef int (*http_cb) (http_parser*);`
127
+ Callbacks: on_message_begin, on_headers_complete, on_message_complete.
128
+ * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
129
+ Callbacks: (requests only) on_path, on_query_string, on_uri, on_fragment,
130
+ (common) on_header_field, on_header_value, on_body;
131
+
132
+ Callbacks must return 0 on success. Returning a non-zero value indicates
133
+ error to the parser, making it exit immediately.
134
+
135
+ In case you parse HTTP message in chunks (i.e. `read()` request line
136
+ from socket, parse, read half headers, parse, etc) your data callbacks
137
+ may be called more than once. Http-parser guarantees that data pointer is only
138
+ valid for the lifetime of callback. You can also `read()` into a heap allocated
139
+ buffer to avoid copying memory around if this fits your application.
140
+
141
+ Reading headers may be a tricky task if you read/parse headers partially.
142
+ Basically, you need to remember whether last header callback was field or value
143
+ and apply following logic:
144
+
145
+ (on_header_field and on_header_value shortened to on_h_*)
146
+ ------------------------ ------------ --------------------------------------------
147
+ | State (prev. callback) | Callback | Description/action |
148
+ ------------------------ ------------ --------------------------------------------
149
+ | nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
150
+ | | | into it |
151
+ ------------------------ ------------ --------------------------------------------
152
+ | value | on_h_field | New header started. |
153
+ | | | Copy current name,value buffers to headers |
154
+ | | | list and allocate new buffer for new name |
155
+ ------------------------ ------------ --------------------------------------------
156
+ | field | on_h_field | Previous name continues. Reallocate name |
157
+ | | | buffer and append callback data to it |
158
+ ------------------------ ------------ --------------------------------------------
159
+ | field | on_h_value | Value for current header started. Allocate |
160
+ | | | new buffer and copy callback data to it |
161
+ ------------------------ ------------ --------------------------------------------
162
+ | value | on_h_value | Value continues. Reallocate value buffer |
163
+ | | | and append callback data to it |
164
+ ------------------------ ------------ --------------------------------------------
165
+
166
+
167
+ See examples of reading in headers:
168
+
169
+ * [partial example](http://gist.github.com/155877) in C
170
+ * [from http-parser tests](http://github.com/ry/http-parser/blob/37a0ff8928fb0d83cec0d0d8909c5a4abcd221af/test.c#L403) in C
171
+ * [from Node library](http://github.com/ry/node/blob/842eaf446d2fdcb33b296c67c911c32a0dabc747/src/http.js#L284) in Javascript
@@ -0,0 +1,1590 @@
1
+ /* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to
5
+ * deal in the Software without restriction, including without limitation the
6
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ * sell copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ * IN THE SOFTWARE.
20
+ */
21
+ #include <http_parser.h>
22
+ #if defined(_WIN32) && !defined(__MINGW32__)
23
+ typedef __int8 int8_t;
24
+ typedef unsigned __int8 uint8_t;
25
+ typedef __int16 int16_t;
26
+ typedef unsigned __int16 uint16_t;
27
+ typedef __int16 int32_t;
28
+ typedef unsigned __int32 uint32_t;
29
+ #else
30
+ #include <stdint.h>
31
+ #endif
32
+ #include <assert.h>
33
+ #include <stddef.h>
34
+
35
+
36
+ #ifndef MIN
37
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
+ #endif
39
+
40
+
41
+ #define CALLBACK2(FOR) \
42
+ do { \
43
+ if (settings->on_##FOR) { \
44
+ if (0 != settings->on_##FOR(parser)) return (p - data); \
45
+ } \
46
+ } while (0)
47
+
48
+
49
+ #define MARK(FOR) \
50
+ do { \
51
+ FOR##_mark = p; \
52
+ } while (0)
53
+
54
+ #define CALLBACK_NOCLEAR(FOR) \
55
+ do { \
56
+ if (FOR##_mark) { \
57
+ if (settings->on_##FOR) { \
58
+ if (0 != settings->on_##FOR(parser, \
59
+ FOR##_mark, \
60
+ p - FOR##_mark)) \
61
+ { \
62
+ return (p - data); \
63
+ } \
64
+ } \
65
+ } \
66
+ } while (0)
67
+
68
+
69
+ #define CALLBACK(FOR) \
70
+ do { \
71
+ CALLBACK_NOCLEAR(FOR); \
72
+ FOR##_mark = NULL; \
73
+ } while (0)
74
+
75
+
76
+ #define PROXY_CONNECTION "proxy-connection"
77
+ #define CONNECTION "connection"
78
+ #define CONTENT_LENGTH "content-length"
79
+ #define TRANSFER_ENCODING "transfer-encoding"
80
+ #define UPGRADE "upgrade"
81
+ #define CHUNKED "chunked"
82
+ #define KEEP_ALIVE "keep-alive"
83
+ #define CLOSE "close"
84
+
85
+
86
+ static const char *method_strings[] =
87
+ { "DELETE"
88
+ , "GET"
89
+ , "HEAD"
90
+ , "POST"
91
+ , "PUT"
92
+ , "CONNECT"
93
+ , "OPTIONS"
94
+ , "TRACE"
95
+ , "COPY"
96
+ , "LOCK"
97
+ , "MKCOL"
98
+ , "MOVE"
99
+ , "PROPFIND"
100
+ , "PROPPATCH"
101
+ , "UNLOCK"
102
+ , "REPORT"
103
+ , "MKACTIVITY"
104
+ , "CHECKOUT"
105
+ , "MERGE"
106
+ };
107
+
108
+
109
+ /* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header.
110
+ The 'A'-'Z' are lower-cased. */
111
+ static const char acceptable_header[256] = {
112
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
113
+ 0, 0, 0, 0, 0, 0, 0, 0,
114
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
115
+ 0, 0, 0, 0, 0, 0, 0, 0,
116
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
117
+ 0, 0, 0, 0, 0, 0, 0, 0,
118
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
119
+ 0, 0, 0, 0, 0, 0, 0, 0,
120
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
121
+ ' ', 0, 0, 0, 0, 0, 0, 0,
122
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
123
+ 0, 0, 0, 0, 0, '-', 0, 0,
124
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
125
+ '0', '1', '2', '3', '4', '5', '6', '7',
126
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
127
+ '8', '9', 0, 0, 0, 0, 0, 0,
128
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
129
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
130
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
131
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
132
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
133
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
134
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
135
+ 'x', 'y', 'z', 0, 0, 0, 0, '_',
136
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
137
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
138
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
139
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
140
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
141
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
142
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
143
+ 'x', 'y', 'z', 0, 0, 0, 0, 0 };
144
+
145
+
146
+ static const int8_t unhex[256] =
147
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
148
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
149
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
150
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
151
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
152
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
153
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
154
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
155
+ };
156
+
157
+
158
+ static const uint8_t normal_url_char[256] = {
159
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
160
+ 0, 0, 0, 0, 0, 0, 0, 0,
161
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
162
+ 0, 0, 0, 0, 0, 0, 0, 0,
163
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
164
+ 0, 0, 0, 0, 0, 0, 0, 0,
165
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
166
+ 0, 0, 0, 0, 0, 0, 0, 0,
167
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
168
+ 0, 1, 1, 0, 1, 1, 1, 1,
169
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
170
+ 1, 1, 1, 1, 1, 1, 1, 1,
171
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
172
+ 1, 1, 1, 1, 1, 1, 1, 1,
173
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
174
+ 1, 1, 1, 1, 1, 1, 1, 0,
175
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
176
+ 1, 1, 1, 1, 1, 1, 1, 1,
177
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
178
+ 1, 1, 1, 1, 1, 1, 1, 1,
179
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
180
+ 1, 1, 1, 1, 1, 1, 1, 1,
181
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
182
+ 1, 1, 1, 1, 1, 1, 1, 1,
183
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
184
+ 1, 1, 1, 1, 1, 1, 1, 1,
185
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
186
+ 1, 1, 1, 1, 1, 1, 1, 1,
187
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
188
+ 1, 1, 1, 1, 1, 1, 1, 1,
189
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
190
+ 1, 1, 1, 1, 1, 1, 1, 0 };
191
+
192
+
193
+ enum state
194
+ { s_dead = 1 /* important that this is > 0 */
195
+
196
+ , s_start_req_or_res
197
+ , s_res_or_resp_H
198
+ , s_start_res
199
+ , s_res_H
200
+ , s_res_HT
201
+ , s_res_HTT
202
+ , s_res_HTTP
203
+ , s_res_first_http_major
204
+ , s_res_http_major
205
+ , s_res_first_http_minor
206
+ , s_res_http_minor
207
+ , s_res_first_status_code
208
+ , s_res_status_code
209
+ , s_res_status
210
+ , s_res_line_almost_done
211
+
212
+ , s_start_req
213
+
214
+ , s_req_method
215
+ , s_req_spaces_before_url
216
+ , s_req_schema
217
+ , s_req_schema_slash
218
+ , s_req_schema_slash_slash
219
+ , s_req_host
220
+ , s_req_port
221
+ , s_req_path
222
+ , s_req_query_string_start
223
+ , s_req_query_string
224
+ , s_req_fragment_start
225
+ , s_req_fragment
226
+ , s_req_http_start
227
+ , s_req_http_H
228
+ , s_req_http_HT
229
+ , s_req_http_HTT
230
+ , s_req_http_HTTP
231
+ , s_req_first_http_major
232
+ , s_req_http_major
233
+ , s_req_first_http_minor
234
+ , s_req_http_minor
235
+ , s_req_line_almost_done
236
+
237
+ , s_header_field_start
238
+ , s_header_field
239
+ , s_header_value_start
240
+ , s_header_value
241
+
242
+ , s_header_almost_done
243
+
244
+ , s_headers_almost_done
245
+ /* Important: 's_headers_almost_done' must be the last 'header' state. All
246
+ * states beyond this must be 'body' states. It is used for overflow
247
+ * checking. See the PARSING_HEADER() macro.
248
+ */
249
+ , s_chunk_size_start
250
+ , s_chunk_size
251
+ , s_chunk_size_almost_done
252
+ , s_chunk_parameters
253
+ , s_chunk_data
254
+ , s_chunk_data_almost_done
255
+ , s_chunk_data_done
256
+
257
+ , s_body_identity
258
+ , s_body_identity_eof
259
+ };
260
+
261
+
262
+ #define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING))
263
+
264
+
265
+ enum header_states
266
+ { h_general = 0
267
+ , h_C
268
+ , h_CO
269
+ , h_CON
270
+
271
+ , h_matching_connection
272
+ , h_matching_proxy_connection
273
+ , h_matching_content_length
274
+ , h_matching_transfer_encoding
275
+ , h_matching_upgrade
276
+
277
+ , h_connection
278
+ , h_content_length
279
+ , h_transfer_encoding
280
+ , h_upgrade
281
+
282
+ , h_matching_transfer_encoding_chunked
283
+ , h_matching_connection_keep_alive
284
+ , h_matching_connection_close
285
+
286
+ , h_transfer_encoding_chunked
287
+ , h_connection_keep_alive
288
+ , h_connection_close
289
+ };
290
+
291
+
292
+ enum flags
293
+ { F_CHUNKED = 1 << 0
294
+ , F_CONNECTION_KEEP_ALIVE = 1 << 1
295
+ , F_CONNECTION_CLOSE = 1 << 2
296
+ , F_TRAILING = 1 << 3
297
+ , F_UPGRADE = 1 << 4
298
+ , F_SKIPBODY = 1 << 5
299
+ };
300
+
301
+
302
+ #define CR '\r'
303
+ #define LF '\n'
304
+ #define LOWER(c) (unsigned char)(c | 0x20)
305
+
306
+
307
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
308
+
309
+
310
+ #if HTTP_PARSER_STRICT
311
+ # define STRICT_CHECK(cond) if (cond) goto error
312
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
313
+ #else
314
+ # define STRICT_CHECK(cond)
315
+ # define NEW_MESSAGE() start_state
316
+ #endif
317
+
318
+
319
+ size_t http_parser_execute (http_parser *parser,
320
+ const http_parser_settings *settings,
321
+ const char *data,
322
+ size_t len)
323
+ {
324
+ char c, ch;
325
+ const char *p = data, *pe;
326
+ int64_t to_read;
327
+
328
+ enum state state = (enum state) parser->state;
329
+ enum header_states header_state = (enum header_states) parser->header_state;
330
+ uint64_t index = parser->index;
331
+ uint64_t nread = parser->nread;
332
+
333
+ if (len == 0) {
334
+ if (state == s_body_identity_eof) {
335
+ CALLBACK2(message_complete);
336
+ }
337
+ return 0;
338
+ }
339
+
340
+ /* technically we could combine all of these (except for url_mark) into one
341
+ variable, saving stack space, but it seems more clear to have them
342
+ separated. */
343
+ const char *header_field_mark = 0;
344
+ const char *header_value_mark = 0;
345
+ const char *fragment_mark = 0;
346
+ const char *query_string_mark = 0;
347
+ const char *path_mark = 0;
348
+ const char *url_mark = 0;
349
+
350
+ if (state == s_header_field)
351
+ header_field_mark = data;
352
+ if (state == s_header_value)
353
+ header_value_mark = data;
354
+ if (state == s_req_fragment)
355
+ fragment_mark = data;
356
+ if (state == s_req_query_string)
357
+ query_string_mark = data;
358
+ if (state == s_req_path)
359
+ path_mark = data;
360
+ if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
361
+ || state == s_req_schema_slash_slash || state == s_req_port
362
+ || state == s_req_query_string_start || state == s_req_query_string
363
+ || state == s_req_host
364
+ || state == s_req_fragment_start || state == s_req_fragment)
365
+ url_mark = data;
366
+
367
+ for (p=data, pe=data+len; p != pe; p++) {
368
+ ch = *p;
369
+
370
+ if (PARSING_HEADER(state)) {
371
+ ++nread;
372
+ /* Buffer overflow attack */
373
+ if (nread > HTTP_MAX_HEADER_SIZE) goto error;
374
+ }
375
+
376
+ switch (state) {
377
+
378
+ case s_dead:
379
+ /* this state is used after a 'Connection: close' message
380
+ * the parser will error out if it reads another message
381
+ */
382
+ goto error;
383
+
384
+ case s_start_req_or_res:
385
+ {
386
+ if (ch == CR || ch == LF)
387
+ break;
388
+ parser->flags = 0;
389
+ parser->content_length = -1;
390
+
391
+ CALLBACK2(message_begin);
392
+
393
+ if (ch == 'H')
394
+ state = s_res_or_resp_H;
395
+ else {
396
+ parser->type = HTTP_REQUEST;
397
+ goto start_req_method_assign;
398
+ }
399
+ break;
400
+ }
401
+
402
+ case s_res_or_resp_H:
403
+ if (ch == 'T') {
404
+ parser->type = HTTP_RESPONSE;
405
+ state = s_res_HT;
406
+ } else {
407
+ if (ch != 'E') goto error;
408
+ parser->type = HTTP_REQUEST;
409
+ parser->method = HTTP_HEAD;
410
+ index = 2;
411
+ state = s_req_method;
412
+ }
413
+ break;
414
+
415
+ case s_start_res:
416
+ {
417
+ parser->flags = 0;
418
+ parser->content_length = -1;
419
+
420
+ CALLBACK2(message_begin);
421
+
422
+ switch (ch) {
423
+ case 'H':
424
+ state = s_res_H;
425
+ break;
426
+
427
+ case CR:
428
+ case LF:
429
+ break;
430
+
431
+ default:
432
+ goto error;
433
+ }
434
+ break;
435
+ }
436
+
437
+ case s_res_H:
438
+ STRICT_CHECK(ch != 'T');
439
+ state = s_res_HT;
440
+ break;
441
+
442
+ case s_res_HT:
443
+ STRICT_CHECK(ch != 'T');
444
+ state = s_res_HTT;
445
+ break;
446
+
447
+ case s_res_HTT:
448
+ STRICT_CHECK(ch != 'P');
449
+ state = s_res_HTTP;
450
+ break;
451
+
452
+ case s_res_HTTP:
453
+ STRICT_CHECK(ch != '/');
454
+ state = s_res_first_http_major;
455
+ break;
456
+
457
+ case s_res_first_http_major:
458
+ if (ch < '1' || ch > '9') goto error;
459
+ parser->http_major = ch - '0';
460
+ state = s_res_http_major;
461
+ break;
462
+
463
+ /* major HTTP version or dot */
464
+ case s_res_http_major:
465
+ {
466
+ if (ch == '.') {
467
+ state = s_res_first_http_minor;
468
+ break;
469
+ }
470
+
471
+ if (ch < '0' || ch > '9') goto error;
472
+
473
+ parser->http_major *= 10;
474
+ parser->http_major += ch - '0';
475
+
476
+ if (parser->http_major > 999) goto error;
477
+ break;
478
+ }
479
+
480
+ /* first digit of minor HTTP version */
481
+ case s_res_first_http_minor:
482
+ if (ch < '0' || ch > '9') goto error;
483
+ parser->http_minor = ch - '0';
484
+ state = s_res_http_minor;
485
+ break;
486
+
487
+ /* minor HTTP version or end of request line */
488
+ case s_res_http_minor:
489
+ {
490
+ if (ch == ' ') {
491
+ state = s_res_first_status_code;
492
+ break;
493
+ }
494
+
495
+ if (ch < '0' || ch > '9') goto error;
496
+
497
+ parser->http_minor *= 10;
498
+ parser->http_minor += ch - '0';
499
+
500
+ if (parser->http_minor > 999) goto error;
501
+ break;
502
+ }
503
+
504
+ case s_res_first_status_code:
505
+ {
506
+ if (ch < '0' || ch > '9') {
507
+ if (ch == ' ') {
508
+ break;
509
+ }
510
+ goto error;
511
+ }
512
+ parser->status_code = ch - '0';
513
+ state = s_res_status_code;
514
+ break;
515
+ }
516
+
517
+ case s_res_status_code:
518
+ {
519
+ if (ch < '0' || ch > '9') {
520
+ switch (ch) {
521
+ case ' ':
522
+ state = s_res_status;
523
+ break;
524
+ case CR:
525
+ state = s_res_line_almost_done;
526
+ break;
527
+ case LF:
528
+ state = s_header_field_start;
529
+ break;
530
+ default:
531
+ goto error;
532
+ }
533
+ break;
534
+ }
535
+
536
+ parser->status_code *= 10;
537
+ parser->status_code += ch - '0';
538
+
539
+ if (parser->status_code > 999) goto error;
540
+ break;
541
+ }
542
+
543
+ case s_res_status:
544
+ /* the human readable status. e.g. "NOT FOUND"
545
+ * we are not humans so just ignore this */
546
+ if (ch == CR) {
547
+ state = s_res_line_almost_done;
548
+ break;
549
+ }
550
+
551
+ if (ch == LF) {
552
+ state = s_header_field_start;
553
+ break;
554
+ }
555
+ break;
556
+
557
+ case s_res_line_almost_done:
558
+ STRICT_CHECK(ch != LF);
559
+ state = s_header_field_start;
560
+ break;
561
+
562
+ case s_start_req:
563
+ {
564
+ if (ch == CR || ch == LF)
565
+ break;
566
+ parser->flags = 0;
567
+ parser->content_length = -1;
568
+
569
+ CALLBACK2(message_begin);
570
+
571
+ if (ch < 'A' || 'Z' < ch) goto error;
572
+
573
+ start_req_method_assign:
574
+ parser->method = (enum http_method) 0;
575
+ index = 1;
576
+ switch (ch) {
577
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
578
+ case 'D': parser->method = HTTP_DELETE; break;
579
+ case 'G': parser->method = HTTP_GET; break;
580
+ case 'H': parser->method = HTTP_HEAD; break;
581
+ case 'L': parser->method = HTTP_LOCK; break;
582
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE */ break;
583
+ case 'O': parser->method = HTTP_OPTIONS; break;
584
+ case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
585
+ case 'R': parser->method = HTTP_REPORT; break;
586
+ case 'T': parser->method = HTTP_TRACE; break;
587
+ case 'U': parser->method = HTTP_UNLOCK; break;
588
+ default: goto error;
589
+ }
590
+ state = s_req_method;
591
+ break;
592
+ }
593
+
594
+ case s_req_method:
595
+ {
596
+ if (ch == '\0')
597
+ goto error;
598
+
599
+ const char *matcher = method_strings[parser->method];
600
+ if (ch == ' ' && matcher[index] == '\0') {
601
+ state = s_req_spaces_before_url;
602
+ } else if (ch == matcher[index]) {
603
+ ; /* nada */
604
+ } else if (parser->method == HTTP_CONNECT) {
605
+ if (index == 1 && ch == 'H') {
606
+ parser->method = HTTP_CHECKOUT;
607
+ } else if (index == 2 && ch == 'P') {
608
+ parser->method = HTTP_COPY;
609
+ }
610
+ } else if (parser->method == HTTP_MKCOL) {
611
+ if (index == 1 && ch == 'O') {
612
+ parser->method = HTTP_MOVE;
613
+ } else if (index == 1 && ch == 'E') {
614
+ parser->method = HTTP_MERGE;
615
+ } else if (index == 2 && ch == 'A') {
616
+ parser->method = HTTP_MKACTIVITY;
617
+ }
618
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
619
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
620
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
621
+ parser->method = HTTP_PUT;
622
+ } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
623
+ parser->method = HTTP_PROPPATCH;
624
+ } else {
625
+ goto error;
626
+ }
627
+
628
+ ++index;
629
+ break;
630
+ }
631
+ case s_req_spaces_before_url:
632
+ {
633
+ if (ch == ' ') break;
634
+
635
+ if (ch == '/') {
636
+ MARK(url);
637
+ MARK(path);
638
+ state = s_req_path;
639
+ break;
640
+ }
641
+
642
+ c = LOWER(ch);
643
+
644
+ if (c >= 'a' && c <= 'z') {
645
+ MARK(url);
646
+ state = s_req_schema;
647
+ break;
648
+ }
649
+
650
+ goto error;
651
+ }
652
+
653
+ case s_req_schema:
654
+ {
655
+ c = LOWER(ch);
656
+
657
+ if (c >= 'a' && c <= 'z') break;
658
+
659
+ if (ch == ':') {
660
+ state = s_req_schema_slash;
661
+ break;
662
+ } else if (ch == '.') {
663
+ state = s_req_host;
664
+ break;
665
+ }
666
+
667
+ goto error;
668
+ }
669
+
670
+ case s_req_schema_slash:
671
+ STRICT_CHECK(ch != '/');
672
+ state = s_req_schema_slash_slash;
673
+ break;
674
+
675
+ case s_req_schema_slash_slash:
676
+ STRICT_CHECK(ch != '/');
677
+ state = s_req_host;
678
+ break;
679
+
680
+ case s_req_host:
681
+ {
682
+ c = LOWER(ch);
683
+ if (c >= 'a' && c <= 'z') break;
684
+ if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
685
+ switch (ch) {
686
+ case ':':
687
+ state = s_req_port;
688
+ break;
689
+ case '/':
690
+ MARK(path);
691
+ state = s_req_path;
692
+ break;
693
+ case ' ':
694
+ /* The request line looks like:
695
+ * "GET http://foo.bar.com HTTP/1.1"
696
+ * That is, there is no path.
697
+ */
698
+ CALLBACK(url);
699
+ state = s_req_http_start;
700
+ break;
701
+ default:
702
+ goto error;
703
+ }
704
+ break;
705
+ }
706
+
707
+ case s_req_port:
708
+ {
709
+ if (ch >= '0' && ch <= '9') break;
710
+ switch (ch) {
711
+ case '/':
712
+ MARK(path);
713
+ state = s_req_path;
714
+ break;
715
+ case ' ':
716
+ /* The request line looks like:
717
+ * "GET http://foo.bar.com:1234 HTTP/1.1"
718
+ * That is, there is no path.
719
+ */
720
+ CALLBACK(url);
721
+ state = s_req_http_start;
722
+ break;
723
+ default:
724
+ goto error;
725
+ }
726
+ break;
727
+ }
728
+
729
+ case s_req_path:
730
+ {
731
+ if (normal_url_char[(unsigned char)ch]) break;
732
+
733
+ switch (ch) {
734
+ case ' ':
735
+ CALLBACK(url);
736
+ CALLBACK(path);
737
+ state = s_req_http_start;
738
+ break;
739
+ case CR:
740
+ CALLBACK(url);
741
+ CALLBACK(path);
742
+ parser->http_minor = 9;
743
+ state = s_req_line_almost_done;
744
+ break;
745
+ case LF:
746
+ CALLBACK(url);
747
+ CALLBACK(path);
748
+ parser->http_minor = 9;
749
+ state = s_header_field_start;
750
+ break;
751
+ case '?':
752
+ CALLBACK(path);
753
+ state = s_req_query_string_start;
754
+ break;
755
+ case '#':
756
+ CALLBACK(path);
757
+ state = s_req_fragment_start;
758
+ break;
759
+ default:
760
+ goto error;
761
+ }
762
+ break;
763
+ }
764
+
765
+ case s_req_query_string_start:
766
+ {
767
+ if (normal_url_char[(unsigned char)ch]) {
768
+ MARK(query_string);
769
+ state = s_req_query_string;
770
+ break;
771
+ }
772
+
773
+ switch (ch) {
774
+ case '?':
775
+ break; /* XXX ignore extra '?' ... is this right? */
776
+ case ' ':
777
+ CALLBACK(url);
778
+ state = s_req_http_start;
779
+ break;
780
+ case CR:
781
+ CALLBACK(url);
782
+ parser->http_minor = 9;
783
+ state = s_req_line_almost_done;
784
+ break;
785
+ case LF:
786
+ CALLBACK(url);
787
+ parser->http_minor = 9;
788
+ state = s_header_field_start;
789
+ break;
790
+ case '#':
791
+ state = s_req_fragment_start;
792
+ break;
793
+ default:
794
+ goto error;
795
+ }
796
+ break;
797
+ }
798
+
799
+ case s_req_query_string:
800
+ {
801
+ if (normal_url_char[(unsigned char)ch]) break;
802
+
803
+ switch (ch) {
804
+ case '?':
805
+ /* allow extra '?' in query string */
806
+ break;
807
+ case ' ':
808
+ CALLBACK(url);
809
+ CALLBACK(query_string);
810
+ state = s_req_http_start;
811
+ break;
812
+ case CR:
813
+ CALLBACK(url);
814
+ CALLBACK(query_string);
815
+ parser->http_minor = 9;
816
+ state = s_req_line_almost_done;
817
+ break;
818
+ case LF:
819
+ CALLBACK(url);
820
+ CALLBACK(query_string);
821
+ parser->http_minor = 9;
822
+ state = s_header_field_start;
823
+ break;
824
+ case '#':
825
+ CALLBACK(query_string);
826
+ state = s_req_fragment_start;
827
+ break;
828
+ default:
829
+ goto error;
830
+ }
831
+ break;
832
+ }
833
+
834
+ case s_req_fragment_start:
835
+ {
836
+ if (normal_url_char[(unsigned char)ch]) {
837
+ MARK(fragment);
838
+ state = s_req_fragment;
839
+ break;
840
+ }
841
+
842
+ switch (ch) {
843
+ case ' ':
844
+ CALLBACK(url);
845
+ state = s_req_http_start;
846
+ break;
847
+ case CR:
848
+ CALLBACK(url);
849
+ parser->http_minor = 9;
850
+ state = s_req_line_almost_done;
851
+ break;
852
+ case LF:
853
+ CALLBACK(url);
854
+ parser->http_minor = 9;
855
+ state = s_header_field_start;
856
+ break;
857
+ case '?':
858
+ MARK(fragment);
859
+ state = s_req_fragment;
860
+ break;
861
+ case '#':
862
+ break;
863
+ default:
864
+ goto error;
865
+ }
866
+ break;
867
+ }
868
+
869
+ case s_req_fragment:
870
+ {
871
+ if (normal_url_char[(unsigned char)ch]) break;
872
+
873
+ switch (ch) {
874
+ case ' ':
875
+ CALLBACK(url);
876
+ CALLBACK(fragment);
877
+ state = s_req_http_start;
878
+ break;
879
+ case CR:
880
+ CALLBACK(url);
881
+ CALLBACK(fragment);
882
+ parser->http_minor = 9;
883
+ state = s_req_line_almost_done;
884
+ break;
885
+ case LF:
886
+ CALLBACK(url);
887
+ CALLBACK(fragment);
888
+ parser->http_minor = 9;
889
+ state = s_header_field_start;
890
+ break;
891
+ case '?':
892
+ case '#':
893
+ break;
894
+ default:
895
+ goto error;
896
+ }
897
+ break;
898
+ }
899
+
900
+ case s_req_http_start:
901
+ switch (ch) {
902
+ case 'H':
903
+ state = s_req_http_H;
904
+ break;
905
+ case ' ':
906
+ break;
907
+ default:
908
+ goto error;
909
+ }
910
+ break;
911
+
912
+ case s_req_http_H:
913
+ STRICT_CHECK(ch != 'T');
914
+ state = s_req_http_HT;
915
+ break;
916
+
917
+ case s_req_http_HT:
918
+ STRICT_CHECK(ch != 'T');
919
+ state = s_req_http_HTT;
920
+ break;
921
+
922
+ case s_req_http_HTT:
923
+ STRICT_CHECK(ch != 'P');
924
+ state = s_req_http_HTTP;
925
+ break;
926
+
927
+ case s_req_http_HTTP:
928
+ STRICT_CHECK(ch != '/');
929
+ state = s_req_first_http_major;
930
+ break;
931
+
932
+ /* first digit of major HTTP version */
933
+ case s_req_first_http_major:
934
+ if (ch < '1' || ch > '9') goto error;
935
+ parser->http_major = ch - '0';
936
+ state = s_req_http_major;
937
+ break;
938
+
939
+ /* major HTTP version or dot */
940
+ case s_req_http_major:
941
+ {
942
+ if (ch == '.') {
943
+ state = s_req_first_http_minor;
944
+ break;
945
+ }
946
+
947
+ if (ch < '0' || ch > '9') goto error;
948
+
949
+ parser->http_major *= 10;
950
+ parser->http_major += ch - '0';
951
+
952
+ if (parser->http_major > 999) goto error;
953
+ break;
954
+ }
955
+
956
+ /* first digit of minor HTTP version */
957
+ case s_req_first_http_minor:
958
+ if (ch < '0' || ch > '9') goto error;
959
+ parser->http_minor = ch - '0';
960
+ state = s_req_http_minor;
961
+ break;
962
+
963
+ /* minor HTTP version or end of request line */
964
+ case s_req_http_minor:
965
+ {
966
+ if (ch == CR) {
967
+ state = s_req_line_almost_done;
968
+ break;
969
+ }
970
+
971
+ if (ch == LF) {
972
+ state = s_header_field_start;
973
+ break;
974
+ }
975
+
976
+ /* XXX allow spaces after digit? */
977
+
978
+ if (ch < '0' || ch > '9') goto error;
979
+
980
+ parser->http_minor *= 10;
981
+ parser->http_minor += ch - '0';
982
+
983
+ if (parser->http_minor > 999) goto error;
984
+ break;
985
+ }
986
+
987
+ /* end of request line */
988
+ case s_req_line_almost_done:
989
+ {
990
+ if (ch != LF) goto error;
991
+ state = s_header_field_start;
992
+ break;
993
+ }
994
+
995
+ case s_header_field_start:
996
+ {
997
+ if (ch == CR) {
998
+ state = s_headers_almost_done;
999
+ break;
1000
+ }
1001
+
1002
+ if (ch == LF) {
1003
+ /* they might be just sending \n instead of \r\n so this would be
1004
+ * the second \n to denote the end of headers*/
1005
+ state = s_headers_almost_done;
1006
+ goto headers_almost_done;
1007
+ }
1008
+
1009
+ c = LOWER(ch);
1010
+
1011
+ if (c < 'a' || 'z' < c) goto error;
1012
+
1013
+ MARK(header_field);
1014
+
1015
+ index = 0;
1016
+ state = s_header_field;
1017
+
1018
+ switch (c) {
1019
+ case 'c':
1020
+ header_state = h_C;
1021
+ break;
1022
+
1023
+ case 'p':
1024
+ header_state = h_matching_proxy_connection;
1025
+ break;
1026
+
1027
+ case 't':
1028
+ header_state = h_matching_transfer_encoding;
1029
+ break;
1030
+
1031
+ case 'u':
1032
+ header_state = h_matching_upgrade;
1033
+ break;
1034
+
1035
+ default:
1036
+ header_state = h_general;
1037
+ break;
1038
+ }
1039
+ break;
1040
+ }
1041
+
1042
+ case s_header_field:
1043
+ {
1044
+ c = acceptable_header[(unsigned char)ch];
1045
+
1046
+ if (c) {
1047
+ switch (header_state) {
1048
+ case h_general:
1049
+ break;
1050
+
1051
+ case h_C:
1052
+ index++;
1053
+ header_state = (c == 'o' ? h_CO : h_general);
1054
+ break;
1055
+
1056
+ case h_CO:
1057
+ index++;
1058
+ header_state = (c == 'n' ? h_CON : h_general);
1059
+ break;
1060
+
1061
+ case h_CON:
1062
+ index++;
1063
+ switch (c) {
1064
+ case 'n':
1065
+ header_state = h_matching_connection;
1066
+ break;
1067
+ case 't':
1068
+ header_state = h_matching_content_length;
1069
+ break;
1070
+ default:
1071
+ header_state = h_general;
1072
+ break;
1073
+ }
1074
+ break;
1075
+
1076
+ /* connection */
1077
+
1078
+ case h_matching_connection:
1079
+ index++;
1080
+ if (index > sizeof(CONNECTION)-1
1081
+ || c != CONNECTION[index]) {
1082
+ header_state = h_general;
1083
+ } else if (index == sizeof(CONNECTION)-2) {
1084
+ header_state = h_connection;
1085
+ }
1086
+ break;
1087
+
1088
+ /* proxy-connection */
1089
+
1090
+ case h_matching_proxy_connection:
1091
+ index++;
1092
+ if (index > sizeof(PROXY_CONNECTION)-1
1093
+ || c != PROXY_CONNECTION[index]) {
1094
+ header_state = h_general;
1095
+ } else if (index == sizeof(PROXY_CONNECTION)-2) {
1096
+ header_state = h_connection;
1097
+ }
1098
+ break;
1099
+
1100
+ /* content-length */
1101
+
1102
+ case h_matching_content_length:
1103
+ index++;
1104
+ if (index > sizeof(CONTENT_LENGTH)-1
1105
+ || c != CONTENT_LENGTH[index]) {
1106
+ header_state = h_general;
1107
+ } else if (index == sizeof(CONTENT_LENGTH)-2) {
1108
+ header_state = h_content_length;
1109
+ }
1110
+ break;
1111
+
1112
+ /* transfer-encoding */
1113
+
1114
+ case h_matching_transfer_encoding:
1115
+ index++;
1116
+ if (index > sizeof(TRANSFER_ENCODING)-1
1117
+ || c != TRANSFER_ENCODING[index]) {
1118
+ header_state = h_general;
1119
+ } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1120
+ header_state = h_transfer_encoding;
1121
+ }
1122
+ break;
1123
+
1124
+ /* upgrade */
1125
+
1126
+ case h_matching_upgrade:
1127
+ index++;
1128
+ if (index > sizeof(UPGRADE)-1
1129
+ || c != UPGRADE[index]) {
1130
+ header_state = h_general;
1131
+ } else if (index == sizeof(UPGRADE)-2) {
1132
+ header_state = h_upgrade;
1133
+ }
1134
+ break;
1135
+
1136
+ case h_connection:
1137
+ case h_content_length:
1138
+ case h_transfer_encoding:
1139
+ case h_upgrade:
1140
+ if (ch != ' ') header_state = h_general;
1141
+ break;
1142
+
1143
+ default:
1144
+ assert(0 && "Unknown header_state");
1145
+ break;
1146
+ }
1147
+ break;
1148
+ }
1149
+
1150
+ if (ch == ':') {
1151
+ CALLBACK(header_field);
1152
+ state = s_header_value_start;
1153
+ break;
1154
+ }
1155
+
1156
+ if (ch == CR) {
1157
+ state = s_header_almost_done;
1158
+ CALLBACK(header_field);
1159
+ break;
1160
+ }
1161
+
1162
+ if (ch == LF) {
1163
+ CALLBACK(header_field);
1164
+ state = s_header_field_start;
1165
+ break;
1166
+ }
1167
+
1168
+ goto error;
1169
+ }
1170
+
1171
+ case s_header_value_start:
1172
+ {
1173
+ if (ch == ' ') break;
1174
+
1175
+ MARK(header_value);
1176
+
1177
+ state = s_header_value;
1178
+ index = 0;
1179
+
1180
+ c = acceptable_header[(unsigned char)ch];
1181
+
1182
+ if (!c) {
1183
+ if (ch == CR) {
1184
+ CALLBACK(header_value);
1185
+ header_state = h_general;
1186
+ state = s_header_almost_done;
1187
+ break;
1188
+ }
1189
+
1190
+ if (ch == LF) {
1191
+ CALLBACK(header_value);
1192
+ state = s_header_field_start;
1193
+ break;
1194
+ }
1195
+
1196
+ header_state = h_general;
1197
+ break;
1198
+ }
1199
+
1200
+ switch (header_state) {
1201
+ case h_upgrade:
1202
+ parser->flags |= F_UPGRADE;
1203
+ header_state = h_general;
1204
+ break;
1205
+
1206
+ case h_transfer_encoding:
1207
+ /* looking for 'Transfer-Encoding: chunked' */
1208
+ if ('c' == c) {
1209
+ header_state = h_matching_transfer_encoding_chunked;
1210
+ } else {
1211
+ header_state = h_general;
1212
+ }
1213
+ break;
1214
+
1215
+ case h_content_length:
1216
+ if (ch < '0' || ch > '9') goto error;
1217
+ parser->content_length = ch - '0';
1218
+ break;
1219
+
1220
+ case h_connection:
1221
+ /* looking for 'Connection: keep-alive' */
1222
+ if (c == 'k') {
1223
+ header_state = h_matching_connection_keep_alive;
1224
+ /* looking for 'Connection: close' */
1225
+ } else if (c == 'c') {
1226
+ header_state = h_matching_connection_close;
1227
+ } else {
1228
+ header_state = h_general;
1229
+ }
1230
+ break;
1231
+
1232
+ default:
1233
+ header_state = h_general;
1234
+ break;
1235
+ }
1236
+ break;
1237
+ }
1238
+
1239
+ case s_header_value:
1240
+ {
1241
+ c = acceptable_header[(unsigned char)ch];
1242
+
1243
+ if (!c) {
1244
+ if (ch == CR) {
1245
+ CALLBACK(header_value);
1246
+ state = s_header_almost_done;
1247
+ break;
1248
+ }
1249
+
1250
+ if (ch == LF) {
1251
+ CALLBACK(header_value);
1252
+ goto header_almost_done;
1253
+ }
1254
+ break;
1255
+ }
1256
+
1257
+ switch (header_state) {
1258
+ case h_general:
1259
+ break;
1260
+
1261
+ case h_connection:
1262
+ case h_transfer_encoding:
1263
+ assert(0 && "Shouldn't get here.");
1264
+ break;
1265
+
1266
+ case h_content_length:
1267
+ if (ch == ' ') break;
1268
+ if (ch < '0' || ch > '9') goto error;
1269
+ parser->content_length *= 10;
1270
+ parser->content_length += ch - '0';
1271
+ break;
1272
+
1273
+ /* Transfer-Encoding: chunked */
1274
+ case h_matching_transfer_encoding_chunked:
1275
+ index++;
1276
+ if (index > sizeof(CHUNKED)-1
1277
+ || c != CHUNKED[index]) {
1278
+ header_state = h_general;
1279
+ } else if (index == sizeof(CHUNKED)-2) {
1280
+ header_state = h_transfer_encoding_chunked;
1281
+ }
1282
+ break;
1283
+
1284
+ /* looking for 'Connection: keep-alive' */
1285
+ case h_matching_connection_keep_alive:
1286
+ index++;
1287
+ if (index > sizeof(KEEP_ALIVE)-1
1288
+ || c != KEEP_ALIVE[index]) {
1289
+ header_state = h_general;
1290
+ } else if (index == sizeof(KEEP_ALIVE)-2) {
1291
+ header_state = h_connection_keep_alive;
1292
+ }
1293
+ break;
1294
+
1295
+ /* looking for 'Connection: close' */
1296
+ case h_matching_connection_close:
1297
+ index++;
1298
+ if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1299
+ header_state = h_general;
1300
+ } else if (index == sizeof(CLOSE)-2) {
1301
+ header_state = h_connection_close;
1302
+ }
1303
+ break;
1304
+
1305
+ case h_transfer_encoding_chunked:
1306
+ case h_connection_keep_alive:
1307
+ case h_connection_close:
1308
+ if (ch != ' ') header_state = h_general;
1309
+ break;
1310
+
1311
+ default:
1312
+ state = s_header_value;
1313
+ header_state = h_general;
1314
+ break;
1315
+ }
1316
+ break;
1317
+ }
1318
+
1319
+ case s_header_almost_done:
1320
+ header_almost_done:
1321
+ {
1322
+ STRICT_CHECK(ch != LF);
1323
+
1324
+ state = s_header_field_start;
1325
+
1326
+ switch (header_state) {
1327
+ case h_connection_keep_alive:
1328
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1329
+ break;
1330
+ case h_connection_close:
1331
+ parser->flags |= F_CONNECTION_CLOSE;
1332
+ break;
1333
+ case h_transfer_encoding_chunked:
1334
+ parser->flags |= F_CHUNKED;
1335
+ break;
1336
+ default:
1337
+ break;
1338
+ }
1339
+ break;
1340
+ }
1341
+
1342
+ case s_headers_almost_done:
1343
+ headers_almost_done:
1344
+ {
1345
+ STRICT_CHECK(ch != LF);
1346
+
1347
+ if (parser->flags & F_TRAILING) {
1348
+ /* End of a chunked request */
1349
+ CALLBACK2(message_complete);
1350
+ state = NEW_MESSAGE();
1351
+ break;
1352
+ }
1353
+
1354
+ nread = 0;
1355
+
1356
+ if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1357
+ parser->upgrade = 1;
1358
+ }
1359
+
1360
+ /* Here we call the headers_complete callback. This is somewhat
1361
+ * different than other callbacks because if the user returns 1, we
1362
+ * will interpret that as saying that this message has no body. This
1363
+ * is needed for the annoying case of recieving a response to a HEAD
1364
+ * request.
1365
+ */
1366
+ if (settings->on_headers_complete) {
1367
+ switch (settings->on_headers_complete(parser)) {
1368
+ case 0:
1369
+ break;
1370
+
1371
+ case 1:
1372
+ parser->flags |= F_SKIPBODY;
1373
+ break;
1374
+
1375
+ default:
1376
+ return p - data; /* Error */
1377
+ }
1378
+ }
1379
+
1380
+ /* Exit, the rest of the connect is in a different protocol. */
1381
+ if (parser->upgrade) {
1382
+ CALLBACK2(message_complete);
1383
+ return (p - data);
1384
+ }
1385
+
1386
+ if (parser->flags & F_SKIPBODY) {
1387
+ CALLBACK2(message_complete);
1388
+ state = NEW_MESSAGE();
1389
+ } else if (parser->flags & F_CHUNKED) {
1390
+ /* chunked encoding - ignore Content-Length header */
1391
+ state = s_chunk_size_start;
1392
+ } else {
1393
+ if (parser->content_length == 0) {
1394
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1395
+ CALLBACK2(message_complete);
1396
+ state = NEW_MESSAGE();
1397
+ } else if (parser->content_length > 0) {
1398
+ /* Content-Length header given and non-zero */
1399
+ state = s_body_identity;
1400
+ } else {
1401
+ if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1402
+ /* Assume content-length 0 - read the next */
1403
+ CALLBACK2(message_complete);
1404
+ state = NEW_MESSAGE();
1405
+ } else {
1406
+ /* Read body until EOF */
1407
+ state = s_body_identity_eof;
1408
+ }
1409
+ }
1410
+ }
1411
+
1412
+ break;
1413
+ }
1414
+
1415
+ case s_body_identity:
1416
+ to_read = MIN(pe - p, (int64_t)parser->content_length);
1417
+ if (to_read > 0) {
1418
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1419
+ p += to_read - 1;
1420
+ parser->content_length -= to_read;
1421
+ if (parser->content_length == 0) {
1422
+ CALLBACK2(message_complete);
1423
+ state = NEW_MESSAGE();
1424
+ }
1425
+ }
1426
+ break;
1427
+
1428
+ /* read until EOF */
1429
+ case s_body_identity_eof:
1430
+ to_read = pe - p;
1431
+ if (to_read > 0) {
1432
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1433
+ p += to_read - 1;
1434
+ }
1435
+ break;
1436
+
1437
+ case s_chunk_size_start:
1438
+ {
1439
+ assert(parser->flags & F_CHUNKED);
1440
+
1441
+ c = unhex[(unsigned char)ch];
1442
+ if (c == -1) goto error;
1443
+ parser->content_length = c;
1444
+ state = s_chunk_size;
1445
+ break;
1446
+ }
1447
+
1448
+ case s_chunk_size:
1449
+ {
1450
+ assert(parser->flags & F_CHUNKED);
1451
+
1452
+ if (ch == CR) {
1453
+ state = s_chunk_size_almost_done;
1454
+ break;
1455
+ }
1456
+
1457
+ c = unhex[(unsigned char)ch];
1458
+
1459
+ if (c == -1) {
1460
+ if (ch == ';' || ch == ' ') {
1461
+ state = s_chunk_parameters;
1462
+ break;
1463
+ }
1464
+ goto error;
1465
+ }
1466
+
1467
+ parser->content_length *= 16;
1468
+ parser->content_length += c;
1469
+ break;
1470
+ }
1471
+
1472
+ case s_chunk_parameters:
1473
+ {
1474
+ assert(parser->flags & F_CHUNKED);
1475
+ /* just ignore this shit. TODO check for overflow */
1476
+ if (ch == CR) {
1477
+ state = s_chunk_size_almost_done;
1478
+ break;
1479
+ }
1480
+ break;
1481
+ }
1482
+
1483
+ case s_chunk_size_almost_done:
1484
+ {
1485
+ assert(parser->flags & F_CHUNKED);
1486
+ STRICT_CHECK(ch != LF);
1487
+
1488
+ if (parser->content_length == 0) {
1489
+ parser->flags |= F_TRAILING;
1490
+ state = s_header_field_start;
1491
+ } else {
1492
+ state = s_chunk_data;
1493
+ }
1494
+ break;
1495
+ }
1496
+
1497
+ case s_chunk_data:
1498
+ {
1499
+ assert(parser->flags & F_CHUNKED);
1500
+
1501
+ to_read = MIN(pe - p, (int64_t)(parser->content_length));
1502
+
1503
+ if (to_read > 0) {
1504
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1505
+ p += to_read - 1;
1506
+ }
1507
+
1508
+ if (to_read == parser->content_length) {
1509
+ state = s_chunk_data_almost_done;
1510
+ }
1511
+
1512
+ parser->content_length -= to_read;
1513
+ break;
1514
+ }
1515
+
1516
+ case s_chunk_data_almost_done:
1517
+ assert(parser->flags & F_CHUNKED);
1518
+ STRICT_CHECK(ch != CR);
1519
+ state = s_chunk_data_done;
1520
+ break;
1521
+
1522
+ case s_chunk_data_done:
1523
+ assert(parser->flags & F_CHUNKED);
1524
+ STRICT_CHECK(ch != LF);
1525
+ state = s_chunk_size_start;
1526
+ break;
1527
+
1528
+ default:
1529
+ assert(0 && "unhandled state");
1530
+ goto error;
1531
+ }
1532
+ }
1533
+
1534
+ CALLBACK_NOCLEAR(header_field);
1535
+ CALLBACK_NOCLEAR(header_value);
1536
+ CALLBACK_NOCLEAR(fragment);
1537
+ CALLBACK_NOCLEAR(query_string);
1538
+ CALLBACK_NOCLEAR(path);
1539
+ CALLBACK_NOCLEAR(url);
1540
+
1541
+ parser->state = state;
1542
+ parser->header_state = header_state;
1543
+ parser->index = index;
1544
+ parser->nread = nread;
1545
+
1546
+ return len;
1547
+
1548
+ error:
1549
+ parser->state = s_dead;
1550
+ return (p - data);
1551
+ }
1552
+
1553
+
1554
+ int
1555
+ http_should_keep_alive (http_parser *parser)
1556
+ {
1557
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1558
+ /* HTTP/1.1 */
1559
+ if (parser->flags & F_CONNECTION_CLOSE) {
1560
+ return 0;
1561
+ } else {
1562
+ return 1;
1563
+ }
1564
+ } else {
1565
+ /* HTTP/1.0 or earlier */
1566
+ if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1567
+ return 1;
1568
+ } else {
1569
+ return 0;
1570
+ }
1571
+ }
1572
+ }
1573
+
1574
+
1575
+ const char * http_method_str (enum http_method m)
1576
+ {
1577
+ return method_strings[m];
1578
+ }
1579
+
1580
+
1581
+ void
1582
+ http_parser_init (http_parser *parser, enum http_parser_type t)
1583
+ {
1584
+ parser->type = t;
1585
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1586
+ parser->nread = 0;
1587
+ parser->upgrade = 0;
1588
+ parser->flags = 0;
1589
+ parser->method = 0;
1590
+ }