http_parser.rb 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. data/.gitignore +11 -0
  2. data/.gitmodules +6 -0
  3. data/README.md +45 -0
  4. data/Rakefile +6 -0
  5. data/bench/thin.rb +57 -0
  6. data/ext/ruby_http_parser/.gitignore +1 -0
  7. data/ext/ruby_http_parser/RubyHttpParserService.java +18 -0
  8. data/ext/ruby_http_parser/ext_help.h +18 -0
  9. data/ext/ruby_http_parser/extconf.rb +16 -0
  10. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +403 -0
  11. data/ext/ruby_http_parser/ruby_http_parser.c +474 -0
  12. data/ext/ruby_http_parser/vendor/.gitkeep +0 -0
  13. data/ext/ruby_http_parser/vendor/http-parser-java/CONTRIBUTIONS +4 -0
  14. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +19 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +171 -0
  16. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +19 -0
  17. data/ext/ruby_http_parser/vendor/http-parser-java/compile +1 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1590 -0
  19. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +167 -0
  20. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPException.java +7 -0
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +90 -0
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParser.java +31 -0
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserType.java +13 -0
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPCallback.java +5 -0
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPDataCallback.java +25 -0
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPErrorCallback.java +7 -0
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +1894 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +78 -0
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/Util.java +112 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +487 -0
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +115 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1865 -0
  33. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +1 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +1 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +539 -0
  36. data/ext/ruby_http_parser/vendor/http-parser-java/tools/byte_constants.rb +6 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/tools/const_char.rb +13 -0
  38. data/ext/ruby_http_parser/vendor/http-parser-java/tools/lowcase.rb +15 -0
  39. data/ext/ruby_http_parser/vendor/http-parser-java/tools/parse_tests.rb +33 -0
  40. data/ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS +4 -0
  41. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +19 -0
  42. data/ext/ruby_http_parser/vendor/http-parser/README.md +171 -0
  43. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1590 -0
  44. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +167 -0
  45. data/ext/ruby_http_parser/vendor/http-parser/test.c +1755 -0
  46. data/http_parser.rb.gemspec +15 -0
  47. data/lib/http/parser.rb +1 -0
  48. data/lib/http_parser.rb +4 -0
  49. data/spec/parser_spec.rb +187 -0
  50. data/spec/spec_helper.rb +2 -0
  51. data/spec/support/requests.json +381 -0
  52. data/spec/support/responses.json +186 -0
  53. data/tasks/compile.rake +39 -0
  54. data/tasks/spec.rake +5 -0
  55. data/tasks/submodules.rake +7 -0
  56. metadata +121 -0
File without changes
@@ -0,0 +1,4 @@
1
+ Contributors must agree to the Contributor License Agreement before patches
2
+ can be accepted.
3
+
4
+ http://spreadsheets2.google.com/viewform?hl=en&formkey=dDJXOGUwbzlYaWM4cHN1MERwQS1CSnc6MQ
@@ -0,0 +1,19 @@
1
+ Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to
5
+ deal in the Software without restriction, including without limitation the
6
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ sell copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ IN THE SOFTWARE.
@@ -0,0 +1,171 @@
1
+ HTTP Parser
2
+ ===========
3
+
4
+ This is a parser for HTTP messages written in C. It parses both requests and
5
+ responses. The parser is designed to be used in performance HTTP
6
+ applications. It does not make any syscalls nor allocations, it does not
7
+ buffer data, it can be interrupted at anytime. Depending on your
8
+ architecture, it only requires about 40 bytes of data per message
9
+ stream (in a web server that is per connection).
10
+
11
+ Features:
12
+
13
+ * No dependencies
14
+ * Handles persistent streams (keep-alive).
15
+ * Decodes chunked encoding.
16
+ * Upgrade support
17
+ * Defends against buffer overflow attacks.
18
+
19
+ The parser extracts the following information from HTTP messages:
20
+
21
+ * Header fields and values
22
+ * Content-Length
23
+ * Request method
24
+ * Response status code
25
+ * Transfer-Encoding
26
+ * HTTP version
27
+ * Request path, query string, fragment
28
+ * Message body
29
+
30
+
31
+ Usage
32
+ -----
33
+
34
+ One `http_parser` object is used per TCP connection. Initialize the struct
35
+ using `http_parser_init()` and set the callbacks. That might look something
36
+ like this for a request parser:
37
+
38
+ http_parser_settings settings;
39
+ settings.on_path = my_path_callback;
40
+ settings.on_header_field = my_header_field_callback;
41
+ /* ... */
42
+ settings.data = my_socket;
43
+
44
+ http_parser *parser = malloc(sizeof(http_parser));
45
+ http_parser_init(parser, HTTP_REQUEST);
46
+
47
+ When data is received on the socket execute the parser and check for errors.
48
+
49
+ size_t len = 80*1024, nparsed;
50
+ char buf[len];
51
+ ssize_t recved;
52
+
53
+ recved = recv(fd, buf, len, 0);
54
+
55
+ if (recved < 0) {
56
+ /* Handle error. */
57
+ }
58
+
59
+ /* Start up / continue the parser.
60
+ * Note we pass recved==0 to signal that EOF has been recieved.
61
+ */
62
+ nparsed = http_parser_execute(parser, &settings, buf, recved);
63
+
64
+ if (parser->upgrade) {
65
+ /* handle new protocol */
66
+ } else if (nparsed != recved) {
67
+ /* Handle error. Usually just close the connection. */
68
+ }
69
+
70
+ HTTP needs to know where the end of the stream is. For example, sometimes
71
+ servers send responses without Content-Length and expect the client to
72
+ consume input (for the body) until EOF. To tell http_parser about EOF, give
73
+ `0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
74
+ can still be encountered during an EOF, so one must still be prepared
75
+ to receive them.
76
+
77
+ Scalar valued message information such as `status_code`, `method`, and the
78
+ HTTP version are stored in the parser structure. This data is only
79
+ temporally stored in `http_parser` and gets reset on each new message. If
80
+ this information is needed later, copy it out of the structure during the
81
+ `headers_complete` callback.
82
+
83
+ The parser decodes the transfer-encoding for both requests and responses
84
+ transparently. That is, a chunked encoding is decoded before being sent to
85
+ the on_body callback.
86
+
87
+
88
+ The Special Problem of Upgrade
89
+ ------------------------------
90
+
91
+ HTTP supports upgrading the connection to a different protocol. An
92
+ increasingly common example of this is the Web Socket protocol which sends
93
+ a request like
94
+
95
+ GET /demo HTTP/1.1
96
+ Upgrade: WebSocket
97
+ Connection: Upgrade
98
+ Host: example.com
99
+ Origin: http://example.com
100
+ WebSocket-Protocol: sample
101
+
102
+ followed by non-HTTP data.
103
+
104
+ (See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
105
+ information the Web Socket protocol.)
106
+
107
+ To support this, the parser will treat this as a normal HTTP message without a
108
+ body. Issuing both on_headers_complete and on_message_complete callbacks. However
109
+ http_parser_execute() will stop parsing at the end of the headers and return.
110
+
111
+ The user is expected to check if `parser->upgrade` has been set to 1 after
112
+ `http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
113
+ offset by the return value of `http_parser_execute()`.
114
+
115
+
116
+ Callbacks
117
+ ---------
118
+
119
+ During the `http_parser_execute()` call, the callbacks set in
120
+ `http_parser_settings` will be executed. The parser maintains state and
121
+ never looks behind, so buffering the data is not necessary. If you need to
122
+ save certain data for later usage, you can do that from the callbacks.
123
+
124
+ There are two types of callbacks:
125
+
126
+ * notification `typedef int (*http_cb) (http_parser*);`
127
+ Callbacks: on_message_begin, on_headers_complete, on_message_complete.
128
+ * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
129
+ Callbacks: (requests only) on_path, on_query_string, on_uri, on_fragment,
130
+ (common) on_header_field, on_header_value, on_body;
131
+
132
+ Callbacks must return 0 on success. Returning a non-zero value indicates
133
+ error to the parser, making it exit immediately.
134
+
135
+ In case you parse HTTP message in chunks (i.e. `read()` request line
136
+ from socket, parse, read half headers, parse, etc) your data callbacks
137
+ may be called more than once. Http-parser guarantees that data pointer is only
138
+ valid for the lifetime of callback. You can also `read()` into a heap allocated
139
+ buffer to avoid copying memory around if this fits your application.
140
+
141
+ Reading headers may be a tricky task if you read/parse headers partially.
142
+ Basically, you need to remember whether last header callback was field or value
143
+ and apply following logic:
144
+
145
+ (on_header_field and on_header_value shortened to on_h_*)
146
+ ------------------------ ------------ --------------------------------------------
147
+ | State (prev. callback) | Callback | Description/action |
148
+ ------------------------ ------------ --------------------------------------------
149
+ | nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
150
+ | | | into it |
151
+ ------------------------ ------------ --------------------------------------------
152
+ | value | on_h_field | New header started. |
153
+ | | | Copy current name,value buffers to headers |
154
+ | | | list and allocate new buffer for new name |
155
+ ------------------------ ------------ --------------------------------------------
156
+ | field | on_h_field | Previous name continues. Reallocate name |
157
+ | | | buffer and append callback data to it |
158
+ ------------------------ ------------ --------------------------------------------
159
+ | field | on_h_value | Value for current header started. Allocate |
160
+ | | | new buffer and copy callback data to it |
161
+ ------------------------ ------------ --------------------------------------------
162
+ | value | on_h_value | Value continues. Reallocate value buffer |
163
+ | | | and append callback data to it |
164
+ ------------------------ ------------ --------------------------------------------
165
+
166
+
167
+ See examples of reading in headers:
168
+
169
+ * [partial example](http://gist.github.com/155877) in C
170
+ * [from http-parser tests](http://github.com/ry/http-parser/blob/37a0ff8928fb0d83cec0d0d8909c5a4abcd221af/test.c#L403) in C
171
+ * [from Node library](http://github.com/ry/node/blob/842eaf446d2fdcb33b296c67c911c32a0dabc747/src/http.js#L284) in Javascript
@@ -0,0 +1,19 @@
1
+ hi level callback interface
2
+ eventloop
3
+ state() as a function (?)
4
+ - perhaps, the idea being to be able to log/debug better...
5
+ more tests
6
+ - in particular, port available c tests
7
+ impl bits of servlet api.
8
+
9
+ DONE
10
+
11
+ Sun Jul 18 12:19:18 CEST 2010
12
+
13
+ error handling
14
+ - consider callback based error handling and the current highlevel
15
+ "nice" logging moved to high level http impl.
16
+ - use Exceptions "ProtocolException"?
17
+
18
+ better testing
19
+ - no junit to avoid dependencies
@@ -0,0 +1 @@
1
+ javac -d classes/ `find src -name '*.java'` && java -cp classes http_parser.lolevel.TestLoaderNG tests.dumped
@@ -0,0 +1,1590 @@
1
+ /* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to
5
+ * deal in the Software without restriction, including without limitation the
6
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ * sell copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ * IN THE SOFTWARE.
20
+ */
21
+ #include <http_parser.h>
22
+ #ifdef _WIN32
23
+ typedef __int8 int8_t;
24
+ typedef unsigned __int8 uint8_t;
25
+ typedef __int16 int16_t;
26
+ typedef unsigned __int16 uint16_t;
27
+ typedef __int16 int32_t;
28
+ typedef unsigned __int32 uint32_t;
29
+ #else
30
+ #include <stdint.h>
31
+ #endif
32
+ #include <assert.h>
33
+ #include <stddef.h>
34
+
35
+
36
+ #ifndef MIN
37
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
+ #endif
39
+
40
+
41
+ #define CALLBACK2(FOR) \
42
+ do { \
43
+ if (settings->on_##FOR) { \
44
+ if (0 != settings->on_##FOR(parser)) return (p - data); \
45
+ } \
46
+ } while (0)
47
+
48
+
49
+ #define MARK(FOR) \
50
+ do { \
51
+ FOR##_mark = p; \
52
+ } while (0)
53
+
54
+ #define CALLBACK_NOCLEAR(FOR) \
55
+ do { \
56
+ if (FOR##_mark) { \
57
+ if (settings->on_##FOR) { \
58
+ if (0 != settings->on_##FOR(parser, \
59
+ FOR##_mark, \
60
+ p - FOR##_mark)) \
61
+ { \
62
+ return (p - data); \
63
+ } \
64
+ } \
65
+ } \
66
+ } while (0)
67
+
68
+
69
+ #define CALLBACK(FOR) \
70
+ do { \
71
+ CALLBACK_NOCLEAR(FOR); \
72
+ FOR##_mark = NULL; \
73
+ } while (0)
74
+
75
+
76
+ #define PROXY_CONNECTION "proxy-connection"
77
+ #define CONNECTION "connection"
78
+ #define CONTENT_LENGTH "content-length"
79
+ #define TRANSFER_ENCODING "transfer-encoding"
80
+ #define UPGRADE "upgrade"
81
+ #define CHUNKED "chunked"
82
+ #define KEEP_ALIVE "keep-alive"
83
+ #define CLOSE "close"
84
+
85
+
86
+ static const char *method_strings[] =
87
+ { "DELETE"
88
+ , "GET"
89
+ , "HEAD"
90
+ , "POST"
91
+ , "PUT"
92
+ , "CONNECT"
93
+ , "OPTIONS"
94
+ , "TRACE"
95
+ , "COPY"
96
+ , "LOCK"
97
+ , "MKCOL"
98
+ , "MOVE"
99
+ , "PROPFIND"
100
+ , "PROPPATCH"
101
+ , "UNLOCK"
102
+ , "REPORT"
103
+ , "MKACTIVITY"
104
+ , "CHECKOUT"
105
+ , "MERGE"
106
+ };
107
+
108
+
109
+ /* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header.
110
+ The 'A'-'Z' are lower-cased. */
111
+ static const char acceptable_header[256] = {
112
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
113
+ 0, 0, 0, 0, 0, 0, 0, 0,
114
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
115
+ 0, 0, 0, 0, 0, 0, 0, 0,
116
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
117
+ 0, 0, 0, 0, 0, 0, 0, 0,
118
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
119
+ 0, 0, 0, 0, 0, 0, 0, 0,
120
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
121
+ ' ', 0, 0, 0, 0, 0, 0, 0,
122
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
123
+ 0, 0, 0, 0, 0, '-', 0, 0,
124
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
125
+ '0', '1', '2', '3', '4', '5', '6', '7',
126
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
127
+ '8', '9', 0, 0, 0, 0, 0, 0,
128
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
129
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
130
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
131
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
132
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
133
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
134
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
135
+ 'x', 'y', 'z', 0, 0, 0, 0, '_',
136
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
137
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
138
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
139
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
140
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
141
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
142
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
143
+ 'x', 'y', 'z', 0, 0, 0, 0, 0 };
144
+
145
+
146
+ static const int8_t unhex[256] =
147
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
148
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
149
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
150
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
151
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
152
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
153
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
154
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
155
+ };
156
+
157
+
158
+ static const uint8_t normal_url_char[256] = {
159
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
160
+ 0, 0, 0, 0, 0, 0, 0, 0,
161
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
162
+ 0, 0, 0, 0, 0, 0, 0, 0,
163
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
164
+ 0, 0, 0, 0, 0, 0, 0, 0,
165
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
166
+ 0, 0, 0, 0, 0, 0, 0, 0,
167
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
168
+ 0, 1, 1, 0, 1, 1, 1, 1,
169
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
170
+ 1, 1, 1, 1, 1, 1, 1, 1,
171
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
172
+ 1, 1, 1, 1, 1, 1, 1, 1,
173
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
174
+ 1, 1, 1, 1, 1, 1, 1, 0,
175
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
176
+ 1, 1, 1, 1, 1, 1, 1, 1,
177
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
178
+ 1, 1, 1, 1, 1, 1, 1, 1,
179
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
180
+ 1, 1, 1, 1, 1, 1, 1, 1,
181
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
182
+ 1, 1, 1, 1, 1, 1, 1, 1,
183
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
184
+ 1, 1, 1, 1, 1, 1, 1, 1,
185
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
186
+ 1, 1, 1, 1, 1, 1, 1, 1,
187
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
188
+ 1, 1, 1, 1, 1, 1, 1, 1,
189
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
190
+ 1, 1, 1, 1, 1, 1, 1, 0 };
191
+
192
+
193
+ enum state
194
+ { s_dead = 1 /* important that this is > 0 */
195
+
196
+ , s_start_req_or_res
197
+ , s_res_or_resp_H
198
+ , s_start_res
199
+ , s_res_H
200
+ , s_res_HT
201
+ , s_res_HTT
202
+ , s_res_HTTP
203
+ , s_res_first_http_major
204
+ , s_res_http_major
205
+ , s_res_first_http_minor
206
+ , s_res_http_minor
207
+ , s_res_first_status_code
208
+ , s_res_status_code
209
+ , s_res_status
210
+ , s_res_line_almost_done
211
+
212
+ , s_start_req
213
+
214
+ , s_req_method
215
+ , s_req_spaces_before_url
216
+ , s_req_schema
217
+ , s_req_schema_slash
218
+ , s_req_schema_slash_slash
219
+ , s_req_host
220
+ , s_req_port
221
+ , s_req_path
222
+ , s_req_query_string_start
223
+ , s_req_query_string
224
+ , s_req_fragment_start
225
+ , s_req_fragment
226
+ , s_req_http_start
227
+ , s_req_http_H
228
+ , s_req_http_HT
229
+ , s_req_http_HTT
230
+ , s_req_http_HTTP
231
+ , s_req_first_http_major
232
+ , s_req_http_major
233
+ , s_req_first_http_minor
234
+ , s_req_http_minor
235
+ , s_req_line_almost_done
236
+
237
+ , s_header_field_start
238
+ , s_header_field
239
+ , s_header_value_start
240
+ , s_header_value
241
+
242
+ , s_header_almost_done
243
+
244
+ , s_headers_almost_done
245
+ /* Important: 's_headers_almost_done' must be the last 'header' state. All
246
+ * states beyond this must be 'body' states. It is used for overflow
247
+ * checking. See the PARSING_HEADER() macro.
248
+ */
249
+ , s_chunk_size_start
250
+ , s_chunk_size
251
+ , s_chunk_size_almost_done
252
+ , s_chunk_parameters
253
+ , s_chunk_data
254
+ , s_chunk_data_almost_done
255
+ , s_chunk_data_done
256
+
257
+ , s_body_identity
258
+ , s_body_identity_eof
259
+ };
260
+
261
+
262
+ #define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING))
263
+
264
+
265
+ enum header_states
266
+ { h_general = 0
267
+ , h_C
268
+ , h_CO
269
+ , h_CON
270
+
271
+ , h_matching_connection
272
+ , h_matching_proxy_connection
273
+ , h_matching_content_length
274
+ , h_matching_transfer_encoding
275
+ , h_matching_upgrade
276
+
277
+ , h_connection
278
+ , h_content_length
279
+ , h_transfer_encoding
280
+ , h_upgrade
281
+
282
+ , h_matching_transfer_encoding_chunked
283
+ , h_matching_connection_keep_alive
284
+ , h_matching_connection_close
285
+
286
+ , h_transfer_encoding_chunked
287
+ , h_connection_keep_alive
288
+ , h_connection_close
289
+ };
290
+
291
+
292
+ enum flags
293
+ { F_CHUNKED = 1 << 0
294
+ , F_CONNECTION_KEEP_ALIVE = 1 << 1
295
+ , F_CONNECTION_CLOSE = 1 << 2
296
+ , F_TRAILING = 1 << 3
297
+ , F_UPGRADE = 1 << 4
298
+ , F_SKIPBODY = 1 << 5
299
+ };
300
+
301
+
302
+ #define CR '\r'
303
+ #define LF '\n'
304
+ #define LOWER(c) (unsigned char)(c | 0x20)
305
+
306
+
307
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
308
+
309
+
310
+ #if HTTP_PARSER_STRICT
311
+ # define STRICT_CHECK(cond) if (cond) goto error
312
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
313
+ #else
314
+ # define STRICT_CHECK(cond)
315
+ # define NEW_MESSAGE() start_state
316
+ #endif
317
+
318
+
319
+ size_t http_parser_execute (http_parser *parser,
320
+ const http_parser_settings *settings,
321
+ const char *data,
322
+ size_t len)
323
+ {
324
+ char c, ch;
325
+ const char *p = data, *pe;
326
+ int64_t to_read;
327
+
328
+ enum state state = (enum state) parser->state;
329
+ enum header_states header_state = (enum header_states) parser->header_state;
330
+ uint64_t index = parser->index;
331
+ uint64_t nread = parser->nread;
332
+
333
+ if (len == 0) {
334
+ if (state == s_body_identity_eof) {
335
+ CALLBACK2(message_complete);
336
+ }
337
+ return 0;
338
+ }
339
+
340
+ /* technically we could combine all of these (except for url_mark) into one
341
+ variable, saving stack space, but it seems more clear to have them
342
+ separated. */
343
+ const char *header_field_mark = 0;
344
+ const char *header_value_mark = 0;
345
+ const char *fragment_mark = 0;
346
+ const char *query_string_mark = 0;
347
+ const char *path_mark = 0;
348
+ const char *url_mark = 0;
349
+
350
+ if (state == s_header_field)
351
+ header_field_mark = data;
352
+ if (state == s_header_value)
353
+ header_value_mark = data;
354
+ if (state == s_req_fragment)
355
+ fragment_mark = data;
356
+ if (state == s_req_query_string)
357
+ query_string_mark = data;
358
+ if (state == s_req_path)
359
+ path_mark = data;
360
+ if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
361
+ || state == s_req_schema_slash_slash || state == s_req_port
362
+ || state == s_req_query_string_start || state == s_req_query_string
363
+ || state == s_req_host
364
+ || state == s_req_fragment_start || state == s_req_fragment)
365
+ url_mark = data;
366
+
367
+ for (p=data, pe=data+len; p != pe; p++) {
368
+ ch = *p;
369
+
370
+ if (PARSING_HEADER(state)) {
371
+ ++nread;
372
+ /* Buffer overflow attack */
373
+ if (nread > HTTP_MAX_HEADER_SIZE) goto error;
374
+ }
375
+
376
+ switch (state) {
377
+
378
+ case s_dead:
379
+ /* this state is used after a 'Connection: close' message
380
+ * the parser will error out if it reads another message
381
+ */
382
+ goto error;
383
+
384
+ case s_start_req_or_res:
385
+ {
386
+ if (ch == CR || ch == LF)
387
+ break;
388
+ parser->flags = 0;
389
+ parser->content_length = -1;
390
+
391
+ CALLBACK2(message_begin);
392
+
393
+ if (ch == 'H')
394
+ state = s_res_or_resp_H;
395
+ else {
396
+ parser->type = HTTP_REQUEST;
397
+ goto start_req_method_assign;
398
+ }
399
+ break;
400
+ }
401
+
402
+ case s_res_or_resp_H:
403
+ if (ch == 'T') {
404
+ parser->type = HTTP_RESPONSE;
405
+ state = s_res_HT;
406
+ } else {
407
+ if (ch != 'E') goto error;
408
+ parser->type = HTTP_REQUEST;
409
+ parser->method = HTTP_HEAD;
410
+ index = 2;
411
+ state = s_req_method;
412
+ }
413
+ break;
414
+
415
+ case s_start_res:
416
+ {
417
+ parser->flags = 0;
418
+ parser->content_length = -1;
419
+
420
+ CALLBACK2(message_begin);
421
+
422
+ switch (ch) {
423
+ case 'H':
424
+ state = s_res_H;
425
+ break;
426
+
427
+ case CR:
428
+ case LF:
429
+ break;
430
+
431
+ default:
432
+ goto error;
433
+ }
434
+ break;
435
+ }
436
+
437
+ case s_res_H:
438
+ STRICT_CHECK(ch != 'T');
439
+ state = s_res_HT;
440
+ break;
441
+
442
+ case s_res_HT:
443
+ STRICT_CHECK(ch != 'T');
444
+ state = s_res_HTT;
445
+ break;
446
+
447
+ case s_res_HTT:
448
+ STRICT_CHECK(ch != 'P');
449
+ state = s_res_HTTP;
450
+ break;
451
+
452
+ case s_res_HTTP:
453
+ STRICT_CHECK(ch != '/');
454
+ state = s_res_first_http_major;
455
+ break;
456
+
457
+ case s_res_first_http_major:
458
+ if (ch < '1' || ch > '9') goto error;
459
+ parser->http_major = ch - '0';
460
+ state = s_res_http_major;
461
+ break;
462
+
463
+ /* major HTTP version or dot */
464
+ case s_res_http_major:
465
+ {
466
+ if (ch == '.') {
467
+ state = s_res_first_http_minor;
468
+ break;
469
+ }
470
+
471
+ if (ch < '0' || ch > '9') goto error;
472
+
473
+ parser->http_major *= 10;
474
+ parser->http_major += ch - '0';
475
+
476
+ if (parser->http_major > 999) goto error;
477
+ break;
478
+ }
479
+
480
+ /* first digit of minor HTTP version */
481
+ case s_res_first_http_minor:
482
+ if (ch < '0' || ch > '9') goto error;
483
+ parser->http_minor = ch - '0';
484
+ state = s_res_http_minor;
485
+ break;
486
+
487
+ /* minor HTTP version or end of request line */
488
+ case s_res_http_minor:
489
+ {
490
+ if (ch == ' ') {
491
+ state = s_res_first_status_code;
492
+ break;
493
+ }
494
+
495
+ if (ch < '0' || ch > '9') goto error;
496
+
497
+ parser->http_minor *= 10;
498
+ parser->http_minor += ch - '0';
499
+
500
+ if (parser->http_minor > 999) goto error;
501
+ break;
502
+ }
503
+
504
+ case s_res_first_status_code:
505
+ {
506
+ if (ch < '0' || ch > '9') {
507
+ if (ch == ' ') {
508
+ break;
509
+ }
510
+ goto error;
511
+ }
512
+ parser->status_code = ch - '0';
513
+ state = s_res_status_code;
514
+ break;
515
+ }
516
+
517
+ case s_res_status_code:
518
+ {
519
+ if (ch < '0' || ch > '9') {
520
+ switch (ch) {
521
+ case ' ':
522
+ state = s_res_status;
523
+ break;
524
+ case CR:
525
+ state = s_res_line_almost_done;
526
+ break;
527
+ case LF:
528
+ state = s_header_field_start;
529
+ break;
530
+ default:
531
+ goto error;
532
+ }
533
+ break;
534
+ }
535
+
536
+ parser->status_code *= 10;
537
+ parser->status_code += ch - '0';
538
+
539
+ if (parser->status_code > 999) goto error;
540
+ break;
541
+ }
542
+
543
+ case s_res_status:
544
+ /* the human readable status. e.g. "NOT FOUND"
545
+ * we are not humans so just ignore this */
546
+ if (ch == CR) {
547
+ state = s_res_line_almost_done;
548
+ break;
549
+ }
550
+
551
+ if (ch == LF) {
552
+ state = s_header_field_start;
553
+ break;
554
+ }
555
+ break;
556
+
557
+ case s_res_line_almost_done:
558
+ STRICT_CHECK(ch != LF);
559
+ state = s_header_field_start;
560
+ break;
561
+
562
+ case s_start_req:
563
+ {
564
+ if (ch == CR || ch == LF)
565
+ break;
566
+ parser->flags = 0;
567
+ parser->content_length = -1;
568
+
569
+ CALLBACK2(message_begin);
570
+
571
+ if (ch < 'A' || 'Z' < ch) goto error;
572
+
573
+ start_req_method_assign:
574
+ parser->method = (enum http_method) 0;
575
+ index = 1;
576
+ switch (ch) {
577
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
578
+ case 'D': parser->method = HTTP_DELETE; break;
579
+ case 'G': parser->method = HTTP_GET; break;
580
+ case 'H': parser->method = HTTP_HEAD; break;
581
+ case 'L': parser->method = HTTP_LOCK; break;
582
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE */ break;
583
+ case 'O': parser->method = HTTP_OPTIONS; break;
584
+ case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
585
+ case 'R': parser->method = HTTP_REPORT; break;
586
+ case 'T': parser->method = HTTP_TRACE; break;
587
+ case 'U': parser->method = HTTP_UNLOCK; break;
588
+ default: goto error;
589
+ }
590
+ state = s_req_method;
591
+ break;
592
+ }
593
+
594
+ case s_req_method:
595
+ {
596
+ if (ch == '\0')
597
+ goto error;
598
+
599
+ const char *matcher = method_strings[parser->method];
600
+ if (ch == ' ' && matcher[index] == '\0') {
601
+ state = s_req_spaces_before_url;
602
+ } else if (ch == matcher[index]) {
603
+ ; /* nada */
604
+ } else if (parser->method == HTTP_CONNECT) {
605
+ if (index == 1 && ch == 'H') {
606
+ parser->method = HTTP_CHECKOUT;
607
+ } else if (index == 2 && ch == 'P') {
608
+ parser->method = HTTP_COPY;
609
+ }
610
+ } else if (parser->method == HTTP_MKCOL) {
611
+ if (index == 1 && ch == 'O') {
612
+ parser->method = HTTP_MOVE;
613
+ } else if (index == 1 && ch == 'E') {
614
+ parser->method = HTTP_MERGE;
615
+ } else if (index == 2 && ch == 'A') {
616
+ parser->method = HTTP_MKACTIVITY;
617
+ }
618
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
619
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
620
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
621
+ parser->method = HTTP_PUT;
622
+ } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
623
+ parser->method = HTTP_PROPPATCH;
624
+ } else {
625
+ goto error;
626
+ }
627
+
628
+ ++index;
629
+ break;
630
+ }
631
+ case s_req_spaces_before_url:
632
+ {
633
+ if (ch == ' ') break;
634
+
635
+ if (ch == '/') {
636
+ MARK(url);
637
+ MARK(path);
638
+ state = s_req_path;
639
+ break;
640
+ }
641
+
642
+ c = LOWER(ch);
643
+
644
+ if (c >= 'a' && c <= 'z') {
645
+ MARK(url);
646
+ state = s_req_schema;
647
+ break;
648
+ }
649
+
650
+ goto error;
651
+ }
652
+
653
+ case s_req_schema:
654
+ {
655
+ c = LOWER(ch);
656
+
657
+ if (c >= 'a' && c <= 'z') break;
658
+
659
+ if (ch == ':') {
660
+ state = s_req_schema_slash;
661
+ break;
662
+ } else if (ch == '.') {
663
+ state = s_req_host;
664
+ break;
665
+ }
666
+
667
+ goto error;
668
+ }
669
+
670
+ case s_req_schema_slash:
671
+ STRICT_CHECK(ch != '/');
672
+ state = s_req_schema_slash_slash;
673
+ break;
674
+
675
+ case s_req_schema_slash_slash:
676
+ STRICT_CHECK(ch != '/');
677
+ state = s_req_host;
678
+ break;
679
+
680
+ case s_req_host:
681
+ {
682
+ c = LOWER(ch);
683
+ if (c >= 'a' && c <= 'z') break;
684
+ if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
685
+ switch (ch) {
686
+ case ':':
687
+ state = s_req_port;
688
+ break;
689
+ case '/':
690
+ MARK(path);
691
+ state = s_req_path;
692
+ break;
693
+ case ' ':
694
+ /* The request line looks like:
695
+ * "GET http://foo.bar.com HTTP/1.1"
696
+ * That is, there is no path.
697
+ */
698
+ CALLBACK(url);
699
+ state = s_req_http_start;
700
+ break;
701
+ default:
702
+ goto error;
703
+ }
704
+ break;
705
+ }
706
+
707
+ case s_req_port:
708
+ {
709
+ if (ch >= '0' && ch <= '9') break;
710
+ switch (ch) {
711
+ case '/':
712
+ MARK(path);
713
+ state = s_req_path;
714
+ break;
715
+ case ' ':
716
+ /* The request line looks like:
717
+ * "GET http://foo.bar.com:1234 HTTP/1.1"
718
+ * That is, there is no path.
719
+ */
720
+ CALLBACK(url);
721
+ state = s_req_http_start;
722
+ break;
723
+ default:
724
+ goto error;
725
+ }
726
+ break;
727
+ }
728
+
729
+ case s_req_path:
730
+ {
731
+ if (normal_url_char[(unsigned char)ch]) break;
732
+
733
+ switch (ch) {
734
+ case ' ':
735
+ CALLBACK(url);
736
+ CALLBACK(path);
737
+ state = s_req_http_start;
738
+ break;
739
+ case CR:
740
+ CALLBACK(url);
741
+ CALLBACK(path);
742
+ parser->http_minor = 9;
743
+ state = s_req_line_almost_done;
744
+ break;
745
+ case LF:
746
+ CALLBACK(url);
747
+ CALLBACK(path);
748
+ parser->http_minor = 9;
749
+ state = s_header_field_start;
750
+ break;
751
+ case '?':
752
+ CALLBACK(path);
753
+ state = s_req_query_string_start;
754
+ break;
755
+ case '#':
756
+ CALLBACK(path);
757
+ state = s_req_fragment_start;
758
+ break;
759
+ default:
760
+ goto error;
761
+ }
762
+ break;
763
+ }
764
+
765
+ case s_req_query_string_start:
766
+ {
767
+ if (normal_url_char[(unsigned char)ch]) {
768
+ MARK(query_string);
769
+ state = s_req_query_string;
770
+ break;
771
+ }
772
+
773
+ switch (ch) {
774
+ case '?':
775
+ break; /* XXX ignore extra '?' ... is this right? */
776
+ case ' ':
777
+ CALLBACK(url);
778
+ state = s_req_http_start;
779
+ break;
780
+ case CR:
781
+ CALLBACK(url);
782
+ parser->http_minor = 9;
783
+ state = s_req_line_almost_done;
784
+ break;
785
+ case LF:
786
+ CALLBACK(url);
787
+ parser->http_minor = 9;
788
+ state = s_header_field_start;
789
+ break;
790
+ case '#':
791
+ state = s_req_fragment_start;
792
+ break;
793
+ default:
794
+ goto error;
795
+ }
796
+ break;
797
+ }
798
+
799
+ case s_req_query_string:
800
+ {
801
+ if (normal_url_char[(unsigned char)ch]) break;
802
+
803
+ switch (ch) {
804
+ case '?':
805
+ /* allow extra '?' in query string */
806
+ break;
807
+ case ' ':
808
+ CALLBACK(url);
809
+ CALLBACK(query_string);
810
+ state = s_req_http_start;
811
+ break;
812
+ case CR:
813
+ CALLBACK(url);
814
+ CALLBACK(query_string);
815
+ parser->http_minor = 9;
816
+ state = s_req_line_almost_done;
817
+ break;
818
+ case LF:
819
+ CALLBACK(url);
820
+ CALLBACK(query_string);
821
+ parser->http_minor = 9;
822
+ state = s_header_field_start;
823
+ break;
824
+ case '#':
825
+ CALLBACK(query_string);
826
+ state = s_req_fragment_start;
827
+ break;
828
+ default:
829
+ goto error;
830
+ }
831
+ break;
832
+ }
833
+
834
+ case s_req_fragment_start:
835
+ {
836
+ if (normal_url_char[(unsigned char)ch]) {
837
+ MARK(fragment);
838
+ state = s_req_fragment;
839
+ break;
840
+ }
841
+
842
+ switch (ch) {
843
+ case ' ':
844
+ CALLBACK(url);
845
+ state = s_req_http_start;
846
+ break;
847
+ case CR:
848
+ CALLBACK(url);
849
+ parser->http_minor = 9;
850
+ state = s_req_line_almost_done;
851
+ break;
852
+ case LF:
853
+ CALLBACK(url);
854
+ parser->http_minor = 9;
855
+ state = s_header_field_start;
856
+ break;
857
+ case '?':
858
+ MARK(fragment);
859
+ state = s_req_fragment;
860
+ break;
861
+ case '#':
862
+ break;
863
+ default:
864
+ goto error;
865
+ }
866
+ break;
867
+ }
868
+
869
+ case s_req_fragment:
870
+ {
871
+ if (normal_url_char[(unsigned char)ch]) break;
872
+
873
+ switch (ch) {
874
+ case ' ':
875
+ CALLBACK(url);
876
+ CALLBACK(fragment);
877
+ state = s_req_http_start;
878
+ break;
879
+ case CR:
880
+ CALLBACK(url);
881
+ CALLBACK(fragment);
882
+ parser->http_minor = 9;
883
+ state = s_req_line_almost_done;
884
+ break;
885
+ case LF:
886
+ CALLBACK(url);
887
+ CALLBACK(fragment);
888
+ parser->http_minor = 9;
889
+ state = s_header_field_start;
890
+ break;
891
+ case '?':
892
+ case '#':
893
+ break;
894
+ default:
895
+ goto error;
896
+ }
897
+ break;
898
+ }
899
+
900
+ case s_req_http_start:
901
+ switch (ch) {
902
+ case 'H':
903
+ state = s_req_http_H;
904
+ break;
905
+ case ' ':
906
+ break;
907
+ default:
908
+ goto error;
909
+ }
910
+ break;
911
+
912
+ case s_req_http_H:
913
+ STRICT_CHECK(ch != 'T');
914
+ state = s_req_http_HT;
915
+ break;
916
+
917
+ case s_req_http_HT:
918
+ STRICT_CHECK(ch != 'T');
919
+ state = s_req_http_HTT;
920
+ break;
921
+
922
+ case s_req_http_HTT:
923
+ STRICT_CHECK(ch != 'P');
924
+ state = s_req_http_HTTP;
925
+ break;
926
+
927
+ case s_req_http_HTTP:
928
+ STRICT_CHECK(ch != '/');
929
+ state = s_req_first_http_major;
930
+ break;
931
+
932
+ /* first digit of major HTTP version */
933
+ case s_req_first_http_major:
934
+ if (ch < '1' || ch > '9') goto error;
935
+ parser->http_major = ch - '0';
936
+ state = s_req_http_major;
937
+ break;
938
+
939
+ /* major HTTP version or dot */
940
+ case s_req_http_major:
941
+ {
942
+ if (ch == '.') {
943
+ state = s_req_first_http_minor;
944
+ break;
945
+ }
946
+
947
+ if (ch < '0' || ch > '9') goto error;
948
+
949
+ parser->http_major *= 10;
950
+ parser->http_major += ch - '0';
951
+
952
+ if (parser->http_major > 999) goto error;
953
+ break;
954
+ }
955
+
956
+ /* first digit of minor HTTP version */
957
+ case s_req_first_http_minor:
958
+ if (ch < '0' || ch > '9') goto error;
959
+ parser->http_minor = ch - '0';
960
+ state = s_req_http_minor;
961
+ break;
962
+
963
+ /* minor HTTP version or end of request line */
964
+ case s_req_http_minor:
965
+ {
966
+ if (ch == CR) {
967
+ state = s_req_line_almost_done;
968
+ break;
969
+ }
970
+
971
+ if (ch == LF) {
972
+ state = s_header_field_start;
973
+ break;
974
+ }
975
+
976
+ /* XXX allow spaces after digit? */
977
+
978
+ if (ch < '0' || ch > '9') goto error;
979
+
980
+ parser->http_minor *= 10;
981
+ parser->http_minor += ch - '0';
982
+
983
+ if (parser->http_minor > 999) goto error;
984
+ break;
985
+ }
986
+
987
+ /* end of request line */
988
+ case s_req_line_almost_done:
989
+ {
990
+ if (ch != LF) goto error;
991
+ state = s_header_field_start;
992
+ break;
993
+ }
994
+
995
+ case s_header_field_start:
996
+ {
997
+ if (ch == CR) {
998
+ state = s_headers_almost_done;
999
+ break;
1000
+ }
1001
+
1002
+ if (ch == LF) {
1003
+ /* they might be just sending \n instead of \r\n so this would be
1004
+ * the second \n to denote the end of headers*/
1005
+ state = s_headers_almost_done;
1006
+ goto headers_almost_done;
1007
+ }
1008
+
1009
+ c = LOWER(ch);
1010
+
1011
+ if (c < 'a' || 'z' < c) goto error;
1012
+
1013
+ MARK(header_field);
1014
+
1015
+ index = 0;
1016
+ state = s_header_field;
1017
+
1018
+ switch (c) {
1019
+ case 'c':
1020
+ header_state = h_C;
1021
+ break;
1022
+
1023
+ case 'p':
1024
+ header_state = h_matching_proxy_connection;
1025
+ break;
1026
+
1027
+ case 't':
1028
+ header_state = h_matching_transfer_encoding;
1029
+ break;
1030
+
1031
+ case 'u':
1032
+ header_state = h_matching_upgrade;
1033
+ break;
1034
+
1035
+ default:
1036
+ header_state = h_general;
1037
+ break;
1038
+ }
1039
+ break;
1040
+ }
1041
+
1042
+ case s_header_field:
1043
+ {
1044
+ c = acceptable_header[(unsigned char)ch];
1045
+
1046
+ if (c) {
1047
+ switch (header_state) {
1048
+ case h_general:
1049
+ break;
1050
+
1051
+ case h_C:
1052
+ index++;
1053
+ header_state = (c == 'o' ? h_CO : h_general);
1054
+ break;
1055
+
1056
+ case h_CO:
1057
+ index++;
1058
+ header_state = (c == 'n' ? h_CON : h_general);
1059
+ break;
1060
+
1061
+ case h_CON:
1062
+ index++;
1063
+ switch (c) {
1064
+ case 'n':
1065
+ header_state = h_matching_connection;
1066
+ break;
1067
+ case 't':
1068
+ header_state = h_matching_content_length;
1069
+ break;
1070
+ default:
1071
+ header_state = h_general;
1072
+ break;
1073
+ }
1074
+ break;
1075
+
1076
+ /* connection */
1077
+
1078
+ case h_matching_connection:
1079
+ index++;
1080
+ if (index > sizeof(CONNECTION)-1
1081
+ || c != CONNECTION[index]) {
1082
+ header_state = h_general;
1083
+ } else if (index == sizeof(CONNECTION)-2) {
1084
+ header_state = h_connection;
1085
+ }
1086
+ break;
1087
+
1088
+ /* proxy-connection */
1089
+
1090
+ case h_matching_proxy_connection:
1091
+ index++;
1092
+ if (index > sizeof(PROXY_CONNECTION)-1
1093
+ || c != PROXY_CONNECTION[index]) {
1094
+ header_state = h_general;
1095
+ } else if (index == sizeof(PROXY_CONNECTION)-2) {
1096
+ header_state = h_connection;
1097
+ }
1098
+ break;
1099
+
1100
+ /* content-length */
1101
+
1102
+ case h_matching_content_length:
1103
+ index++;
1104
+ if (index > sizeof(CONTENT_LENGTH)-1
1105
+ || c != CONTENT_LENGTH[index]) {
1106
+ header_state = h_general;
1107
+ } else if (index == sizeof(CONTENT_LENGTH)-2) {
1108
+ header_state = h_content_length;
1109
+ }
1110
+ break;
1111
+
1112
+ /* transfer-encoding */
1113
+
1114
+ case h_matching_transfer_encoding:
1115
+ index++;
1116
+ if (index > sizeof(TRANSFER_ENCODING)-1
1117
+ || c != TRANSFER_ENCODING[index]) {
1118
+ header_state = h_general;
1119
+ } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1120
+ header_state = h_transfer_encoding;
1121
+ }
1122
+ break;
1123
+
1124
+ /* upgrade */
1125
+
1126
+ case h_matching_upgrade:
1127
+ index++;
1128
+ if (index > sizeof(UPGRADE)-1
1129
+ || c != UPGRADE[index]) {
1130
+ header_state = h_general;
1131
+ } else if (index == sizeof(UPGRADE)-2) {
1132
+ header_state = h_upgrade;
1133
+ }
1134
+ break;
1135
+
1136
+ case h_connection:
1137
+ case h_content_length:
1138
+ case h_transfer_encoding:
1139
+ case h_upgrade:
1140
+ if (ch != ' ') header_state = h_general;
1141
+ break;
1142
+
1143
+ default:
1144
+ assert(0 && "Unknown header_state");
1145
+ break;
1146
+ }
1147
+ break;
1148
+ }
1149
+
1150
+ if (ch == ':') {
1151
+ CALLBACK(header_field);
1152
+ state = s_header_value_start;
1153
+ break;
1154
+ }
1155
+
1156
+ if (ch == CR) {
1157
+ state = s_header_almost_done;
1158
+ CALLBACK(header_field);
1159
+ break;
1160
+ }
1161
+
1162
+ if (ch == LF) {
1163
+ CALLBACK(header_field);
1164
+ state = s_header_field_start;
1165
+ break;
1166
+ }
1167
+
1168
+ goto error;
1169
+ }
1170
+
1171
+ case s_header_value_start:
1172
+ {
1173
+ if (ch == ' ') break;
1174
+
1175
+ MARK(header_value);
1176
+
1177
+ state = s_header_value;
1178
+ index = 0;
1179
+
1180
+ c = acceptable_header[(unsigned char)ch];
1181
+
1182
+ if (!c) {
1183
+ if (ch == CR) {
1184
+ CALLBACK(header_value);
1185
+ header_state = h_general;
1186
+ state = s_header_almost_done;
1187
+ break;
1188
+ }
1189
+
1190
+ if (ch == LF) {
1191
+ CALLBACK(header_value);
1192
+ state = s_header_field_start;
1193
+ break;
1194
+ }
1195
+
1196
+ header_state = h_general;
1197
+ break;
1198
+ }
1199
+
1200
+ switch (header_state) {
1201
+ case h_upgrade:
1202
+ parser->flags |= F_UPGRADE;
1203
+ header_state = h_general;
1204
+ break;
1205
+
1206
+ case h_transfer_encoding:
1207
+ /* looking for 'Transfer-Encoding: chunked' */
1208
+ if ('c' == c) {
1209
+ header_state = h_matching_transfer_encoding_chunked;
1210
+ } else {
1211
+ header_state = h_general;
1212
+ }
1213
+ break;
1214
+
1215
+ case h_content_length:
1216
+ if (ch < '0' || ch > '9') goto error;
1217
+ parser->content_length = ch - '0';
1218
+ break;
1219
+
1220
+ case h_connection:
1221
+ /* looking for 'Connection: keep-alive' */
1222
+ if (c == 'k') {
1223
+ header_state = h_matching_connection_keep_alive;
1224
+ /* looking for 'Connection: close' */
1225
+ } else if (c == 'c') {
1226
+ header_state = h_matching_connection_close;
1227
+ } else {
1228
+ header_state = h_general;
1229
+ }
1230
+ break;
1231
+
1232
+ default:
1233
+ header_state = h_general;
1234
+ break;
1235
+ }
1236
+ break;
1237
+ }
1238
+
1239
+ case s_header_value:
1240
+ {
1241
+ c = acceptable_header[(unsigned char)ch];
1242
+
1243
+ if (!c) {
1244
+ if (ch == CR) {
1245
+ CALLBACK(header_value);
1246
+ state = s_header_almost_done;
1247
+ break;
1248
+ }
1249
+
1250
+ if (ch == LF) {
1251
+ CALLBACK(header_value);
1252
+ goto header_almost_done;
1253
+ }
1254
+ break;
1255
+ }
1256
+
1257
+ switch (header_state) {
1258
+ case h_general:
1259
+ break;
1260
+
1261
+ case h_connection:
1262
+ case h_transfer_encoding:
1263
+ assert(0 && "Shouldn't get here.");
1264
+ break;
1265
+
1266
+ case h_content_length:
1267
+ if (ch == ' ') break;
1268
+ if (ch < '0' || ch > '9') goto error;
1269
+ parser->content_length *= 10;
1270
+ parser->content_length += ch - '0';
1271
+ break;
1272
+
1273
+ /* Transfer-Encoding: chunked */
1274
+ case h_matching_transfer_encoding_chunked:
1275
+ index++;
1276
+ if (index > sizeof(CHUNKED)-1
1277
+ || c != CHUNKED[index]) {
1278
+ header_state = h_general;
1279
+ } else if (index == sizeof(CHUNKED)-2) {
1280
+ header_state = h_transfer_encoding_chunked;
1281
+ }
1282
+ break;
1283
+
1284
+ /* looking for 'Connection: keep-alive' */
1285
+ case h_matching_connection_keep_alive:
1286
+ index++;
1287
+ if (index > sizeof(KEEP_ALIVE)-1
1288
+ || c != KEEP_ALIVE[index]) {
1289
+ header_state = h_general;
1290
+ } else if (index == sizeof(KEEP_ALIVE)-2) {
1291
+ header_state = h_connection_keep_alive;
1292
+ }
1293
+ break;
1294
+
1295
+ /* looking for 'Connection: close' */
1296
+ case h_matching_connection_close:
1297
+ index++;
1298
+ if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1299
+ header_state = h_general;
1300
+ } else if (index == sizeof(CLOSE)-2) {
1301
+ header_state = h_connection_close;
1302
+ }
1303
+ break;
1304
+
1305
+ case h_transfer_encoding_chunked:
1306
+ case h_connection_keep_alive:
1307
+ case h_connection_close:
1308
+ if (ch != ' ') header_state = h_general;
1309
+ break;
1310
+
1311
+ default:
1312
+ state = s_header_value;
1313
+ header_state = h_general;
1314
+ break;
1315
+ }
1316
+ break;
1317
+ }
1318
+
1319
+ case s_header_almost_done:
1320
+ header_almost_done:
1321
+ {
1322
+ STRICT_CHECK(ch != LF);
1323
+
1324
+ state = s_header_field_start;
1325
+
1326
+ switch (header_state) {
1327
+ case h_connection_keep_alive:
1328
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1329
+ break;
1330
+ case h_connection_close:
1331
+ parser->flags |= F_CONNECTION_CLOSE;
1332
+ break;
1333
+ case h_transfer_encoding_chunked:
1334
+ parser->flags |= F_CHUNKED;
1335
+ break;
1336
+ default:
1337
+ break;
1338
+ }
1339
+ break;
1340
+ }
1341
+
1342
+ case s_headers_almost_done:
1343
+ headers_almost_done:
1344
+ {
1345
+ STRICT_CHECK(ch != LF);
1346
+
1347
+ if (parser->flags & F_TRAILING) {
1348
+ /* End of a chunked request */
1349
+ CALLBACK2(message_complete);
1350
+ state = NEW_MESSAGE();
1351
+ break;
1352
+ }
1353
+
1354
+ nread = 0;
1355
+
1356
+ if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1357
+ parser->upgrade = 1;
1358
+ }
1359
+
1360
+ /* Here we call the headers_complete callback. This is somewhat
1361
+ * different than other callbacks because if the user returns 1, we
1362
+ * will interpret that as saying that this message has no body. This
1363
+ * is needed for the annoying case of recieving a response to a HEAD
1364
+ * request.
1365
+ */
1366
+ if (settings->on_headers_complete) {
1367
+ switch (settings->on_headers_complete(parser)) {
1368
+ case 0:
1369
+ break;
1370
+
1371
+ case 1:
1372
+ parser->flags |= F_SKIPBODY;
1373
+ break;
1374
+
1375
+ default:
1376
+ return p - data; /* Error */
1377
+ }
1378
+ }
1379
+
1380
+ /* Exit, the rest of the connect is in a different protocol. */
1381
+ if (parser->upgrade) {
1382
+ CALLBACK2(message_complete);
1383
+ return (p - data);
1384
+ }
1385
+
1386
+ if (parser->flags & F_SKIPBODY) {
1387
+ CALLBACK2(message_complete);
1388
+ state = NEW_MESSAGE();
1389
+ } else if (parser->flags & F_CHUNKED) {
1390
+ /* chunked encoding - ignore Content-Length header */
1391
+ state = s_chunk_size_start;
1392
+ } else {
1393
+ if (parser->content_length == 0) {
1394
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1395
+ CALLBACK2(message_complete);
1396
+ state = NEW_MESSAGE();
1397
+ } else if (parser->content_length > 0) {
1398
+ /* Content-Length header given and non-zero */
1399
+ state = s_body_identity;
1400
+ } else {
1401
+ if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1402
+ /* Assume content-length 0 - read the next */
1403
+ CALLBACK2(message_complete);
1404
+ state = NEW_MESSAGE();
1405
+ } else {
1406
+ /* Read body until EOF */
1407
+ state = s_body_identity_eof;
1408
+ }
1409
+ }
1410
+ }
1411
+
1412
+ break;
1413
+ }
1414
+
1415
+ case s_body_identity:
1416
+ to_read = MIN(pe - p, (int64_t)parser->content_length);
1417
+ if (to_read > 0) {
1418
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1419
+ p += to_read - 1;
1420
+ parser->content_length -= to_read;
1421
+ if (parser->content_length == 0) {
1422
+ CALLBACK2(message_complete);
1423
+ state = NEW_MESSAGE();
1424
+ }
1425
+ }
1426
+ break;
1427
+
1428
+ /* read until EOF */
1429
+ case s_body_identity_eof:
1430
+ to_read = pe - p;
1431
+ if (to_read > 0) {
1432
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1433
+ p += to_read - 1;
1434
+ }
1435
+ break;
1436
+
1437
+ case s_chunk_size_start:
1438
+ {
1439
+ assert(parser->flags & F_CHUNKED);
1440
+
1441
+ c = unhex[(unsigned char)ch];
1442
+ if (c == -1) goto error;
1443
+ parser->content_length = c;
1444
+ state = s_chunk_size;
1445
+ break;
1446
+ }
1447
+
1448
+ case s_chunk_size:
1449
+ {
1450
+ assert(parser->flags & F_CHUNKED);
1451
+
1452
+ if (ch == CR) {
1453
+ state = s_chunk_size_almost_done;
1454
+ break;
1455
+ }
1456
+
1457
+ c = unhex[(unsigned char)ch];
1458
+
1459
+ if (c == -1) {
1460
+ if (ch == ';' || ch == ' ') {
1461
+ state = s_chunk_parameters;
1462
+ break;
1463
+ }
1464
+ goto error;
1465
+ }
1466
+
1467
+ parser->content_length *= 16;
1468
+ parser->content_length += c;
1469
+ break;
1470
+ }
1471
+
1472
+ case s_chunk_parameters:
1473
+ {
1474
+ assert(parser->flags & F_CHUNKED);
1475
+ /* just ignore this shit. TODO check for overflow */
1476
+ if (ch == CR) {
1477
+ state = s_chunk_size_almost_done;
1478
+ break;
1479
+ }
1480
+ break;
1481
+ }
1482
+
1483
+ case s_chunk_size_almost_done:
1484
+ {
1485
+ assert(parser->flags & F_CHUNKED);
1486
+ STRICT_CHECK(ch != LF);
1487
+
1488
+ if (parser->content_length == 0) {
1489
+ parser->flags |= F_TRAILING;
1490
+ state = s_header_field_start;
1491
+ } else {
1492
+ state = s_chunk_data;
1493
+ }
1494
+ break;
1495
+ }
1496
+
1497
+ case s_chunk_data:
1498
+ {
1499
+ assert(parser->flags & F_CHUNKED);
1500
+
1501
+ to_read = MIN(pe - p, (int64_t)(parser->content_length));
1502
+
1503
+ if (to_read > 0) {
1504
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1505
+ p += to_read - 1;
1506
+ }
1507
+
1508
+ if (to_read == parser->content_length) {
1509
+ state = s_chunk_data_almost_done;
1510
+ }
1511
+
1512
+ parser->content_length -= to_read;
1513
+ break;
1514
+ }
1515
+
1516
+ case s_chunk_data_almost_done:
1517
+ assert(parser->flags & F_CHUNKED);
1518
+ STRICT_CHECK(ch != CR);
1519
+ state = s_chunk_data_done;
1520
+ break;
1521
+
1522
+ case s_chunk_data_done:
1523
+ assert(parser->flags & F_CHUNKED);
1524
+ STRICT_CHECK(ch != LF);
1525
+ state = s_chunk_size_start;
1526
+ break;
1527
+
1528
+ default:
1529
+ assert(0 && "unhandled state");
1530
+ goto error;
1531
+ }
1532
+ }
1533
+
1534
+ CALLBACK_NOCLEAR(header_field);
1535
+ CALLBACK_NOCLEAR(header_value);
1536
+ CALLBACK_NOCLEAR(fragment);
1537
+ CALLBACK_NOCLEAR(query_string);
1538
+ CALLBACK_NOCLEAR(path);
1539
+ CALLBACK_NOCLEAR(url);
1540
+
1541
+ parser->state = state;
1542
+ parser->header_state = header_state;
1543
+ parser->index = index;
1544
+ parser->nread = nread;
1545
+
1546
+ return len;
1547
+
1548
+ error:
1549
+ parser->state = s_dead;
1550
+ return (p - data);
1551
+ }
1552
+
1553
+
1554
+ int
1555
+ http_should_keep_alive (http_parser *parser)
1556
+ {
1557
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1558
+ /* HTTP/1.1 */
1559
+ if (parser->flags & F_CONNECTION_CLOSE) {
1560
+ return 0;
1561
+ } else {
1562
+ return 1;
1563
+ }
1564
+ } else {
1565
+ /* HTTP/1.0 or earlier */
1566
+ if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1567
+ return 1;
1568
+ } else {
1569
+ return 0;
1570
+ }
1571
+ }
1572
+ }
1573
+
1574
+
1575
+ const char * http_method_str (enum http_method m)
1576
+ {
1577
+ return method_strings[m];
1578
+ }
1579
+
1580
+
1581
+ void
1582
+ http_parser_init (http_parser *parser, enum http_parser_type t)
1583
+ {
1584
+ parser->type = t;
1585
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1586
+ parser->nread = 0;
1587
+ parser->upgrade = 0;
1588
+ parser->flags = 0;
1589
+ parser->method = 0;
1590
+ }