opal-up 0.0.2 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +209 -0
- data/README.md +97 -29
- data/bin/up_ruby +4 -0
- data/bin/up_ruby_cluster +4 -0
- data/ext/up_ext/App.h +606 -0
- data/ext/up_ext/AsyncSocket.h +355 -0
- data/ext/up_ext/AsyncSocketData.h +87 -0
- data/ext/up_ext/BloomFilter.h +83 -0
- data/ext/up_ext/ChunkedEncoding.h +236 -0
- data/ext/up_ext/ClientApp.h +36 -0
- data/ext/up_ext/HttpContext.h +502 -0
- data/ext/up_ext/HttpContextData.h +56 -0
- data/ext/up_ext/HttpErrors.h +53 -0
- data/ext/up_ext/HttpParser.h +680 -0
- data/ext/up_ext/HttpResponse.h +578 -0
- data/ext/up_ext/HttpResponseData.h +95 -0
- data/ext/up_ext/HttpRouter.h +380 -0
- data/ext/up_ext/Loop.h +204 -0
- data/ext/up_ext/LoopData.h +112 -0
- data/ext/up_ext/MoveOnlyFunction.h +377 -0
- data/ext/up_ext/PerMessageDeflate.h +315 -0
- data/ext/up_ext/ProxyParser.h +163 -0
- data/ext/up_ext/QueryParser.h +120 -0
- data/ext/up_ext/TopicTree.h +363 -0
- data/ext/up_ext/Utilities.h +66 -0
- data/ext/up_ext/WebSocket.h +381 -0
- data/ext/up_ext/WebSocketContext.h +434 -0
- data/ext/up_ext/WebSocketContextData.h +109 -0
- data/ext/up_ext/WebSocketData.h +86 -0
- data/ext/up_ext/WebSocketExtensions.h +256 -0
- data/ext/up_ext/WebSocketHandshake.h +145 -0
- data/ext/up_ext/WebSocketProtocol.h +506 -0
- data/ext/up_ext/bsd.c +767 -0
- data/ext/up_ext/bsd.h +109 -0
- data/ext/up_ext/context.c +524 -0
- data/ext/up_ext/epoll_kqueue.c +458 -0
- data/ext/up_ext/epoll_kqueue.h +67 -0
- data/ext/up_ext/extconf.rb +5 -0
- data/ext/up_ext/internal.h +224 -0
- data/ext/up_ext/libusockets.h +350 -0
- data/ext/up_ext/libuwebsockets.cpp +1344 -0
- data/ext/up_ext/libuwebsockets.h +396 -0
- data/ext/up_ext/loop.c +386 -0
- data/ext/up_ext/loop_data.h +38 -0
- data/ext/up_ext/socket.c +231 -0
- data/ext/up_ext/up_ext.c +930 -0
- data/lib/up/bun/rack_env.rb +1 -13
- data/lib/up/bun/server.rb +93 -19
- data/lib/up/cli.rb +3 -0
- data/lib/up/client.rb +68 -0
- data/lib/up/ruby/cluster.rb +39 -0
- data/lib/up/ruby/cluster_cli.rb +10 -0
- data/lib/up/{node → ruby}/rack_cluster.rb +5 -4
- data/lib/up/{node → ruby}/rack_server.rb +4 -4
- data/lib/up/ruby/server_cli.rb +10 -0
- data/lib/up/u_web_socket/cluster.rb +18 -3
- data/lib/up/u_web_socket/server.rb +108 -15
- data/lib/up/version.rb +1 -1
- metadata +72 -30
- data/.gitignore +0 -5
- data/Gemfile +0 -2
- data/bin/up_node +0 -12
- data/bin/up_node_cluster +0 -12
- data/example_rack_app/Gemfile +0 -3
- data/example_rack_app/config.ru +0 -6
- data/example_rack_app/rack_app.rb +0 -5
- data/example_roda_app/Gemfile +0 -6
- data/example_roda_app/config.ru +0 -6
- data/example_roda_app/roda_app.rb +0 -37
- data/example_sinatra_app/Gemfile +0 -6
- data/example_sinatra_app/config.ru +0 -6
- data/example_sinatra_app/sinatra_app.rb +0 -7
- data/lib/up/node/cluster.rb +0 -39
- data/lib/up/node/cluster_cli.rb +0 -15
- data/lib/up/node/rack_env.rb +0 -106
- data/lib/up/node/server.rb +0 -84
- data/lib/up/node/server_cli.rb +0 -15
- data/lib/up/u_web_socket/rack_env.rb +0 -101
- data/opal-up.gemspec +0 -27
- data/up_logo.svg +0 -256
@@ -0,0 +1,680 @@
|
|
1
|
+
/*
|
2
|
+
* Authored by Alex Hultman, 2018-2020.
|
3
|
+
* Intellectual property of third-party.
|
4
|
+
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
* you may not use this file except in compliance with the License.
|
7
|
+
* You may obtain a copy of the License at
|
8
|
+
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
* See the License for the specific language governing permissions and
|
15
|
+
* limitations under the License.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#ifndef UWS_HTTPPARSER_H
|
19
|
+
#define UWS_HTTPPARSER_H
|
20
|
+
|
21
|
+
// todo: HttpParser is in need of a few clean-ups and refactorings
|
22
|
+
|
23
|
+
/* The HTTP parser is an independent module subject to unit testing / fuzz testing */
|
24
|
+
|
25
|
+
#include <string>
|
26
|
+
#include <cstring>
|
27
|
+
#include <algorithm>
|
28
|
+
#include <climits>
|
29
|
+
#include <string_view>
|
30
|
+
#include <map>
|
31
|
+
#include "MoveOnlyFunction.h"
|
32
|
+
#include "ChunkedEncoding.h"
|
33
|
+
|
34
|
+
#include "BloomFilter.h"
|
35
|
+
#include "ProxyParser.h"
|
36
|
+
#include "QueryParser.h"
|
37
|
+
#include "HttpErrors.h"
|
38
|
+
|
39
|
+
namespace uWS {
|
40
|
+
|
41
|
+
/* We require at least this much post padding */
|
42
|
+
static const unsigned int MINIMUM_HTTP_POST_PADDING = 32;
|
43
|
+
static void *FULLPTR = (void *)~(uintptr_t)0;
|
44
|
+
|
45
|
+
/* STL needs one of these */
|
46
|
+
template <typename T>
|
47
|
+
std::optional<T *> optional_ptr(T *ptr) {
|
48
|
+
return ptr ? std::optional<T *>(ptr) : std::nullopt;
|
49
|
+
}
|
50
|
+
|
51
|
+
static const size_t MAX_FALLBACK_SIZE = (size_t) atoi(optional_ptr(getenv("UWS_HTTP_MAX_HEADERS_SIZE")).value_or((char *) "4096"));
|
52
|
+
#ifndef UWS_HTTP_MAX_HEADERS_COUNT
|
53
|
+
#define UWS_HTTP_MAX_HEADERS_COUNT 100
|
54
|
+
#endif
|
55
|
+
|
56
|
+
struct HttpRequest {
|
57
|
+
|
58
|
+
friend struct HttpParser;
|
59
|
+
|
60
|
+
private:
|
61
|
+
struct Header {
|
62
|
+
std::string_view key, value;
|
63
|
+
} headers[UWS_HTTP_MAX_HEADERS_COUNT];
|
64
|
+
bool ancientHttp;
|
65
|
+
unsigned int querySeparator;
|
66
|
+
bool didYield;
|
67
|
+
BloomFilter bf;
|
68
|
+
std::pair<int, std::string_view *> currentParameters;
|
69
|
+
std::map<std::string, unsigned short, std::less<>> *currentParameterOffsets = nullptr;
|
70
|
+
|
71
|
+
public:
|
72
|
+
bool isAncient() {
|
73
|
+
return ancientHttp;
|
74
|
+
}
|
75
|
+
|
76
|
+
bool getYield() {
|
77
|
+
return didYield;
|
78
|
+
}
|
79
|
+
|
80
|
+
/* Iteration over headers (key, value) */
|
81
|
+
struct HeaderIterator {
|
82
|
+
Header *ptr;
|
83
|
+
|
84
|
+
bool operator!=(const HeaderIterator &other) const {
|
85
|
+
/* Comparison with end is a special case */
|
86
|
+
if (ptr != other.ptr) {
|
87
|
+
return other.ptr || ptr->key.length();
|
88
|
+
}
|
89
|
+
return false;
|
90
|
+
}
|
91
|
+
|
92
|
+
HeaderIterator &operator++() {
|
93
|
+
ptr++;
|
94
|
+
return *this;
|
95
|
+
}
|
96
|
+
|
97
|
+
std::pair<std::string_view, std::string_view> operator*() const {
|
98
|
+
return {ptr->key, ptr->value};
|
99
|
+
}
|
100
|
+
};
|
101
|
+
|
102
|
+
HeaderIterator begin() {
|
103
|
+
return {headers + 1};
|
104
|
+
}
|
105
|
+
|
106
|
+
HeaderIterator end() {
|
107
|
+
return {nullptr};
|
108
|
+
}
|
109
|
+
|
110
|
+
/* If you do not want to handle this route */
|
111
|
+
void setYield(bool yield) {
|
112
|
+
didYield = yield;
|
113
|
+
}
|
114
|
+
|
115
|
+
std::string_view getHeader(std::string_view lowerCasedHeader) {
|
116
|
+
if (bf.mightHave(lowerCasedHeader)) {
|
117
|
+
for (Header *h = headers; (++h)->key.length(); ) {
|
118
|
+
if (h->key.length() == lowerCasedHeader.length() && !strncmp(h->key.data(), lowerCasedHeader.data(), lowerCasedHeader.length())) {
|
119
|
+
return h->value;
|
120
|
+
}
|
121
|
+
}
|
122
|
+
}
|
123
|
+
return std::string_view(nullptr, 0);
|
124
|
+
}
|
125
|
+
|
126
|
+
std::string_view getUrl() {
|
127
|
+
return std::string_view(headers->value.data(), querySeparator);
|
128
|
+
}
|
129
|
+
|
130
|
+
std::string_view getFullUrl() {
|
131
|
+
return std::string_view(headers->value.data(), headers->value.length());
|
132
|
+
}
|
133
|
+
|
134
|
+
/* Hack: this should be getMethod */
|
135
|
+
std::string_view getCaseSensitiveMethod() {
|
136
|
+
return std::string_view(headers->key.data(), headers->key.length());
|
137
|
+
}
|
138
|
+
|
139
|
+
std::string_view getMethod() {
|
140
|
+
/* Compatibility hack: lower case method (todo: remove when major version bumps) */
|
141
|
+
for (unsigned int i = 0; i < headers->key.length(); i++) {
|
142
|
+
((char *) headers->key.data())[i] |= 32;
|
143
|
+
}
|
144
|
+
|
145
|
+
return std::string_view(headers->key.data(), headers->key.length());
|
146
|
+
}
|
147
|
+
|
148
|
+
/* Returns the raw querystring as a whole, still encoded */
|
149
|
+
std::string_view getQuery() {
|
150
|
+
if (querySeparator < headers->value.length()) {
|
151
|
+
/* Strip the initial ? */
|
152
|
+
return std::string_view(headers->value.data() + querySeparator + 1, headers->value.length() - querySeparator - 1);
|
153
|
+
} else {
|
154
|
+
return std::string_view(nullptr, 0);
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
158
|
+
/* Finds and decodes the URI component. */
|
159
|
+
std::string_view getQuery(std::string_view key) {
|
160
|
+
/* Raw querystring including initial '?' sign */
|
161
|
+
std::string_view queryString = std::string_view(headers->value.data() + querySeparator, headers->value.length() - querySeparator);
|
162
|
+
|
163
|
+
return getDecodedQueryValue(key, queryString);
|
164
|
+
}
|
165
|
+
|
166
|
+
void setParameters(std::pair<int, std::string_view *> parameters) {
|
167
|
+
currentParameters = parameters;
|
168
|
+
}
|
169
|
+
|
170
|
+
void setParameterOffsets(std::map<std::string, unsigned short, std::less<>> *offsets) {
|
171
|
+
currentParameterOffsets = offsets;
|
172
|
+
}
|
173
|
+
|
174
|
+
std::string_view getParameter(std::string_view name) {
|
175
|
+
if (!currentParameterOffsets) {
|
176
|
+
return {nullptr, 0};
|
177
|
+
}
|
178
|
+
auto it = currentParameterOffsets->find(name);
|
179
|
+
if (it == currentParameterOffsets->end()) {
|
180
|
+
return {nullptr, 0};
|
181
|
+
}
|
182
|
+
return getParameter(it->second);
|
183
|
+
}
|
184
|
+
|
185
|
+
std::string_view getParameter(unsigned short index) {
|
186
|
+
if (currentParameters.first < (int) index) {
|
187
|
+
return {};
|
188
|
+
} else {
|
189
|
+
return currentParameters.second[index];
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
};
|
194
|
+
|
195
|
+
struct HttpParser {
|
196
|
+
|
197
|
+
private:
|
198
|
+
std::string fallback;
|
199
|
+
/* This guy really has only 30 bits since we reserve two highest bits to chunked encoding parsing state */
|
200
|
+
uint64_t remainingStreamingBytes = 0;
|
201
|
+
|
202
|
+
/* Returns UINT_MAX on error. Maximum 999999999 is allowed. */
|
203
|
+
static uint64_t toUnsignedInteger(std::string_view str) {
|
204
|
+
/* We assume at least 64-bit integer giving us safely 999999999999999999 (18 number of 9s) */
|
205
|
+
if (str.length() > 18) {
|
206
|
+
return UINT_MAX;
|
207
|
+
}
|
208
|
+
|
209
|
+
uint64_t unsignedIntegerValue = 0;
|
210
|
+
for (char c : str) {
|
211
|
+
/* As long as the letter is 0-9 we cannot overflow. */
|
212
|
+
if (c < '0' || c > '9') {
|
213
|
+
return UINT_MAX;
|
214
|
+
}
|
215
|
+
unsignedIntegerValue = unsignedIntegerValue * 10ull + ((unsigned int) c - (unsigned int) '0');
|
216
|
+
}
|
217
|
+
return unsignedIntegerValue;
|
218
|
+
}
|
219
|
+
|
220
|
+
/* RFC 9110 16.3.1 Field Name Registry (TLDR; alnum + hyphen is allowed)
|
221
|
+
* [...] It MUST conform to the field-name syntax defined in Section 5.1,
|
222
|
+
* and it SHOULD be restricted to just letters, digits,
|
223
|
+
* and hyphen ('-') characters, with the first character being a letter. */
|
224
|
+
static inline bool isFieldNameByte(unsigned char x) {
|
225
|
+
return (x == '-') |
|
226
|
+
((x > '/') & (x < ':')) |
|
227
|
+
((x > '@') & (x < '[')) |
|
228
|
+
((x > 96) & (x < '{'));
|
229
|
+
}
|
230
|
+
|
231
|
+
static inline uint64_t hasLess(uint64_t x, uint64_t n) {
|
232
|
+
return (((x)-~0ULL/255*(n))&~(x)&~0ULL/255*128);
|
233
|
+
}
|
234
|
+
|
235
|
+
static inline uint64_t hasMore(uint64_t x, uint64_t n) {
|
236
|
+
return (( ((x)+~0ULL/255*(127-(n))) |(x))&~0ULL/255*128);
|
237
|
+
}
|
238
|
+
|
239
|
+
static inline uint64_t hasBetween(uint64_t x, uint64_t m, uint64_t n) {
|
240
|
+
return (( (~0ULL/255*(127+(n))-((x)&~0ULL/255*127)) &~(x)& (((x)&~0ULL/255*127)+~0ULL/255*(127-(m))) )&~0ULL/255*128);
|
241
|
+
}
|
242
|
+
|
243
|
+
static inline bool notFieldNameWord(uint64_t x) {
|
244
|
+
return hasLess(x, '-') |
|
245
|
+
hasBetween(x, '-', '0') |
|
246
|
+
hasBetween(x, '9', 'A') |
|
247
|
+
hasBetween(x, 'Z', 'a') |
|
248
|
+
hasMore(x, 'z');
|
249
|
+
}
|
250
|
+
|
251
|
+
static inline void *consumeFieldName(char *p) {
|
252
|
+
for (; true; p += 8) {
|
253
|
+
uint64_t word;
|
254
|
+
memcpy(&word, p, sizeof(uint64_t));
|
255
|
+
if (notFieldNameWord(word)) {
|
256
|
+
while (isFieldNameByte(*(unsigned char *)p)) {
|
257
|
+
*(p++) |= 0x20;
|
258
|
+
}
|
259
|
+
return (void *)p;
|
260
|
+
}
|
261
|
+
word |= 0x2020202020202020ull;
|
262
|
+
memcpy(p, &word, sizeof(uint64_t));
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
266
|
+
/* Puts method as key, target as value and returns non-null (or nullptr on error). */
|
267
|
+
static inline char *consumeRequestLine(char *data, HttpRequest::Header &header) {
|
268
|
+
/* Scan until single SP, assume next is / (origin request) */
|
269
|
+
char *start = data;
|
270
|
+
/* This catches the post padded CR and fails */
|
271
|
+
while (data[0] > 32) data++;
|
272
|
+
if (data[0] == 32 && data[1] == '/') {
|
273
|
+
header.key = {start, (size_t) (data - start)};
|
274
|
+
data++;
|
275
|
+
/* Scan for less than 33 (catches post padded CR and fails) */
|
276
|
+
start = data;
|
277
|
+
for (; true; data += 8) {
|
278
|
+
uint64_t word;
|
279
|
+
memcpy(&word, data, sizeof(uint64_t));
|
280
|
+
if (hasLess(word, 33)) {
|
281
|
+
while (*(unsigned char *)data > 32) data++;
|
282
|
+
/* Now we stand on space */
|
283
|
+
header.value = {start, (size_t) (data - start)};
|
284
|
+
/* Check that the following is http 1.1 */
|
285
|
+
if (memcmp(" HTTP/1.1\r\n", data, 11) == 0) {
|
286
|
+
return data + 11;
|
287
|
+
}
|
288
|
+
return nullptr;
|
289
|
+
}
|
290
|
+
}
|
291
|
+
}
|
292
|
+
return nullptr;
|
293
|
+
}
|
294
|
+
|
295
|
+
/* RFC 9110: 5.5 Field Values (TLDR; anything above 31 is allowed; htab (9) is also allowed)
|
296
|
+
* Field values are usually constrained to the range of US-ASCII characters [...]
|
297
|
+
* Field values containing CR, LF, or NUL characters are invalid and dangerous [...]
|
298
|
+
* Field values containing other CTL characters are also invalid. */
|
299
|
+
static inline void *tryConsumeFieldValue(char *p) {
|
300
|
+
for (; true; p += 8) {
|
301
|
+
uint64_t word;
|
302
|
+
memcpy(&word, p, sizeof(uint64_t));
|
303
|
+
if (hasLess(word, 32)) {
|
304
|
+
while (*(unsigned char *)p > 31) p++;
|
305
|
+
return (void *)p;
|
306
|
+
}
|
307
|
+
}
|
308
|
+
}
|
309
|
+
|
310
|
+
/* End is only used for the proxy parser. The HTTP parser recognizes "\ra" as invalid "\r\n" scan and breaks. */
|
311
|
+
static unsigned int getHeaders(char *postPaddedBuffer, char *end, struct HttpRequest::Header *headers, void *reserved, unsigned int &err) {
|
312
|
+
char *preliminaryKey, *preliminaryValue, *start = postPaddedBuffer;
|
313
|
+
|
314
|
+
#ifdef UWS_WITH_PROXY
|
315
|
+
/* ProxyParser is passed as reserved parameter */
|
316
|
+
ProxyParser *pp = (ProxyParser *) reserved;
|
317
|
+
|
318
|
+
/* Parse PROXY protocol */
|
319
|
+
auto [done, offset] = pp->parse({postPaddedBuffer, (size_t) (end - postPaddedBuffer)});
|
320
|
+
if (!done) {
|
321
|
+
/* We do not reset the ProxyParser (on filure) since it is tied to this
|
322
|
+
* connection, which is really only supposed to ever get one PROXY frame
|
323
|
+
* anyways. We do however allow multiple PROXY frames to be sent (overwrites former). */
|
324
|
+
return 0;
|
325
|
+
} else {
|
326
|
+
/* We have consumed this data so skip it */
|
327
|
+
postPaddedBuffer += offset;
|
328
|
+
}
|
329
|
+
#else
|
330
|
+
/* This one is unused */
|
331
|
+
(void) reserved;
|
332
|
+
(void) end;
|
333
|
+
#endif
|
334
|
+
|
335
|
+
/* It is critical for fallback buffering logic that we only return with success
|
336
|
+
* if we managed to parse a complete HTTP request (minus data). Returning success
|
337
|
+
* for PROXY means we can end up succeeding, yet leaving bytes in the fallback buffer
|
338
|
+
* which is then removed, and our counters to flip due to overflow and we end up with a crash */
|
339
|
+
|
340
|
+
/* The request line is different from the field names / field values */
|
341
|
+
if (!(postPaddedBuffer = consumeRequestLine(postPaddedBuffer, headers[0]))) {
|
342
|
+
/* Error - invalid request line */
|
343
|
+
/* Assuming it is 505 HTTP Version Not Supported */
|
344
|
+
err = HTTP_ERROR_505_HTTP_VERSION_NOT_SUPPORTED;
|
345
|
+
return 0;
|
346
|
+
}
|
347
|
+
headers++;
|
348
|
+
|
349
|
+
for (unsigned int i = 1; i < UWS_HTTP_MAX_HEADERS_COUNT - 1; i++) {
|
350
|
+
/* Lower case and consume the field name */
|
351
|
+
preliminaryKey = postPaddedBuffer;
|
352
|
+
postPaddedBuffer = (char *) consumeFieldName(postPaddedBuffer);
|
353
|
+
headers->key = std::string_view(preliminaryKey, (size_t) (postPaddedBuffer - preliminaryKey));
|
354
|
+
|
355
|
+
/* We should not accept whitespace between key and colon, so colon must foloow immediately */
|
356
|
+
if (postPaddedBuffer[0] != ':') {
|
357
|
+
/* Error: invalid chars in field name */
|
358
|
+
return 0;
|
359
|
+
}
|
360
|
+
postPaddedBuffer++;
|
361
|
+
|
362
|
+
preliminaryValue = postPaddedBuffer;
|
363
|
+
/* The goal of this call is to find next "\r\n", or any invalid field value chars, fast */
|
364
|
+
while (true) {
|
365
|
+
postPaddedBuffer = (char *) tryConsumeFieldValue(postPaddedBuffer);
|
366
|
+
/* If this is not CR then we caught some stinky invalid char on the way */
|
367
|
+
if (postPaddedBuffer[0] != '\r') {
|
368
|
+
/* If TAB then keep searching */
|
369
|
+
if (postPaddedBuffer[0] == '\t') {
|
370
|
+
postPaddedBuffer++;
|
371
|
+
continue;
|
372
|
+
}
|
373
|
+
/* Error - invalid chars in field value */
|
374
|
+
return 0;
|
375
|
+
}
|
376
|
+
break;
|
377
|
+
}
|
378
|
+
/* We fence end[0] with \r, followed by end[1] being something that is "not \n", to signify "not found".
|
379
|
+
* This way we can have this one single check to see if we found \r\n WITHIN our allowed search space. */
|
380
|
+
if (postPaddedBuffer[1] == '\n') {
|
381
|
+
/* Store this header, it is valid */
|
382
|
+
headers->value = std::string_view(preliminaryValue, (size_t) (postPaddedBuffer - preliminaryValue));
|
383
|
+
postPaddedBuffer += 2;
|
384
|
+
|
385
|
+
/* Trim trailing whitespace (SP, HTAB) */
|
386
|
+
while (headers->value.length() && headers->value.back() < 33) {
|
387
|
+
headers->value.remove_suffix(1);
|
388
|
+
}
|
389
|
+
|
390
|
+
/* Trim initial whitespace (SP, HTAB) */
|
391
|
+
while (headers->value.length() && headers->value.front() < 33) {
|
392
|
+
headers->value.remove_prefix(1);
|
393
|
+
}
|
394
|
+
|
395
|
+
headers++;
|
396
|
+
|
397
|
+
/* We definitely have at least one header (or request line), so check if we are done */
|
398
|
+
if (*postPaddedBuffer == '\r') {
|
399
|
+
if (postPaddedBuffer[1] == '\n') {
|
400
|
+
/* This cann take the very last header space */
|
401
|
+
headers->key = std::string_view(nullptr, 0);
|
402
|
+
return (unsigned int) ((postPaddedBuffer + 2) - start);
|
403
|
+
} else {
|
404
|
+
/* \r\n\r plus non-\n letter is malformed request, or simply out of search space */
|
405
|
+
return 0;
|
406
|
+
}
|
407
|
+
}
|
408
|
+
} else {
|
409
|
+
/* We are either out of search space or this is a malformed request */
|
410
|
+
return 0;
|
411
|
+
}
|
412
|
+
}
|
413
|
+
/* We ran out of header space, too large request */
|
414
|
+
return 0;
|
415
|
+
}
|
416
|
+
|
417
|
+
/* This is the only caller of getHeaders and is thus the deepest part of the parser.
|
418
|
+
* From here we return either [consumed, user] for "keep going",
|
419
|
+
* or [consumed, nullptr] for "break; I am closed or upgraded to websocket"
|
420
|
+
* or [whatever, fullptr] for "break and close me, I am a parser error!" */
|
421
|
+
template <int CONSUME_MINIMALLY>
|
422
|
+
std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) {
|
423
|
+
|
424
|
+
/* How much data we CONSUMED (to throw away) */
|
425
|
+
unsigned int consumedTotal = 0;
|
426
|
+
unsigned int err = 0;
|
427
|
+
|
428
|
+
/* Fence two bytes past end of our buffer (buffer has post padded margins).
|
429
|
+
* This is to always catch scan for \r but not for \r\n. */
|
430
|
+
data[length] = '\r';
|
431
|
+
data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */
|
432
|
+
|
433
|
+
for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) {
|
434
|
+
data += consumed;
|
435
|
+
length -= consumed;
|
436
|
+
consumedTotal += consumed;
|
437
|
+
|
438
|
+
/* Store HTTP version (ancient 1.0 or 1.1) */
|
439
|
+
req->ancientHttp = false;
|
440
|
+
|
441
|
+
/* Add all headers to bloom filter */
|
442
|
+
req->bf.reset();
|
443
|
+
for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) {
|
444
|
+
req->bf.add(h->key);
|
445
|
+
}
|
446
|
+
|
447
|
+
/* Break if no host header (but we can have empty string which is different from nullptr) */
|
448
|
+
if (!req->getHeader("host").data()) {
|
449
|
+
return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
|
450
|
+
}
|
451
|
+
|
452
|
+
/* RFC 9112 6.3
|
453
|
+
* If a message is received with both a Transfer-Encoding and a Content-Length header field,
|
454
|
+
* the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt
|
455
|
+
* to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and
|
456
|
+
* ought to be handled as an error. */
|
457
|
+
std::string_view transferEncodingString = req->getHeader("transfer-encoding");
|
458
|
+
std::string_view contentLengthString = req->getHeader("content-length");
|
459
|
+
if (transferEncodingString.length() && contentLengthString.length()) {
|
460
|
+
/* Returning fullptr is the same as calling the errorHandler */
|
461
|
+
/* We could be smart and set an error in the context along with this, to indicate what
|
462
|
+
* http error response we might want to return */
|
463
|
+
return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
|
464
|
+
}
|
465
|
+
|
466
|
+
/* Parse query */
|
467
|
+
const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length());
|
468
|
+
req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data());
|
469
|
+
|
470
|
+
/* If returned socket is not what we put in we need
|
471
|
+
* to break here as we either have upgraded to
|
472
|
+
* WebSockets or otherwise closed the socket. */
|
473
|
+
void *returnedUser = requestHandler(user, req);
|
474
|
+
if (returnedUser != user) {
|
475
|
+
/* We are upgraded to WebSocket or otherwise broken */
|
476
|
+
return {consumedTotal, returnedUser};
|
477
|
+
}
|
478
|
+
|
479
|
+
/* The rules at play here according to RFC 9112 for requests are essentially:
|
480
|
+
* If both content-length and transfer-encoding then invalid message; must break.
|
481
|
+
* If has transfer-encoding then must be chunked regardless of value.
|
482
|
+
* If content-length then fixed length even if 0.
|
483
|
+
* If none of the above then fixed length is 0. */
|
484
|
+
|
485
|
+
/* RFC 9112 6.3
|
486
|
+
* If a message is received with both a Transfer-Encoding and a Content-Length header field,
|
487
|
+
* the Transfer-Encoding overrides the Content-Length. */
|
488
|
+
if (transferEncodingString.length()) {
|
489
|
+
|
490
|
+
/* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is
|
491
|
+
* not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates
|
492
|
+
* all forms of transfer-encoding obfuscation tricks. We just rely on the header. */
|
493
|
+
|
494
|
+
/* RFC 9112 6.3
|
495
|
+
* If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the
|
496
|
+
* final encoding, the message body length cannot be determined reliably; the server MUST respond with the
|
497
|
+
* 400 (Bad Request) status code and then close the connection. */
|
498
|
+
|
499
|
+
/* In this case we fail later by having the wrong interpretation (assuming chunked).
|
500
|
+
* This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */
|
501
|
+
|
502
|
+
remainingStreamingBytes = STATE_IS_CHUNKED;
|
503
|
+
/* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */
|
504
|
+
if (!CONSUME_MINIMALLY) {
|
505
|
+
/* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */
|
506
|
+
std::string_view dataToConsume(data, length);
|
507
|
+
for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
|
508
|
+
dataHandler(user, chunk, chunk.length() == 0);
|
509
|
+
}
|
510
|
+
if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
|
511
|
+
return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
|
512
|
+
}
|
513
|
+
unsigned int consumed = (length - (unsigned int) dataToConsume.length());
|
514
|
+
data = (char *) dataToConsume.data();
|
515
|
+
length = (unsigned int) dataToConsume.length();
|
516
|
+
consumedTotal += consumed;
|
517
|
+
}
|
518
|
+
} else if (contentLengthString.length()) {
|
519
|
+
remainingStreamingBytes = toUnsignedInteger(contentLengthString);
|
520
|
+
if (remainingStreamingBytes == UINT_MAX) {
|
521
|
+
/* Parser error */
|
522
|
+
return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
|
523
|
+
}
|
524
|
+
|
525
|
+
if (!CONSUME_MINIMALLY) {
|
526
|
+
unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length);
|
527
|
+
dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes);
|
528
|
+
remainingStreamingBytes -= emittable;
|
529
|
+
|
530
|
+
data += emittable;
|
531
|
+
length -= emittable;
|
532
|
+
consumedTotal += emittable;
|
533
|
+
}
|
534
|
+
} else {
|
535
|
+
/* If we came here without a body; emit an empty data chunk to signal no data */
|
536
|
+
dataHandler(user, {}, true);
|
537
|
+
}
|
538
|
+
|
539
|
+
/* Consume minimally should break as easrly as possible */
|
540
|
+
if (CONSUME_MINIMALLY) {
|
541
|
+
break;
|
542
|
+
}
|
543
|
+
}
|
544
|
+
/* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */
|
545
|
+
if (err) {
|
546
|
+
return {err, FULLPTR};
|
547
|
+
}
|
548
|
+
return {consumedTotal, user};
|
549
|
+
}
|
550
|
+
|
551
|
+
public:
|
552
|
+
std::pair<unsigned int, void *> consumePostPadded(char *data, unsigned int length, void *user, void *reserved, MoveOnlyFunction<void *(void *, HttpRequest *)> &&requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &&dataHandler) {
|
553
|
+
|
554
|
+
/* This resets BloomFilter by construction, but later we also reset it again.
|
555
|
+
* Optimize this to skip resetting twice (req could be made global) */
|
556
|
+
HttpRequest req;
|
557
|
+
|
558
|
+
if (remainingStreamingBytes) {
|
559
|
+
|
560
|
+
/* It's either chunked or with a content-length */
|
561
|
+
if (isParsingChunkedEncoding(remainingStreamingBytes)) {
|
562
|
+
std::string_view dataToConsume(data, length);
|
563
|
+
for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
|
564
|
+
dataHandler(user, chunk, chunk.length() == 0);
|
565
|
+
}
|
566
|
+
if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
|
567
|
+
return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
|
568
|
+
}
|
569
|
+
data = (char *) dataToConsume.data();
|
570
|
+
length = (unsigned int) dataToConsume.length();
|
571
|
+
} else {
|
572
|
+
// this is exactly the same as below!
|
573
|
+
// todo: refactor this
|
574
|
+
if (remainingStreamingBytes >= length) {
|
575
|
+
void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == length);
|
576
|
+
remainingStreamingBytes -= length;
|
577
|
+
return {0, returnedUser};
|
578
|
+
} else {
|
579
|
+
void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
|
580
|
+
|
581
|
+
data += (unsigned int) remainingStreamingBytes;
|
582
|
+
length -= (unsigned int) remainingStreamingBytes;
|
583
|
+
|
584
|
+
remainingStreamingBytes = 0;
|
585
|
+
|
586
|
+
if (returnedUser != user) {
|
587
|
+
return {0, returnedUser};
|
588
|
+
}
|
589
|
+
}
|
590
|
+
}
|
591
|
+
|
592
|
+
} else if (fallback.length()) {
|
593
|
+
unsigned int had = (unsigned int) fallback.length();
|
594
|
+
|
595
|
+
size_t maxCopyDistance = std::min<size_t>(MAX_FALLBACK_SIZE - fallback.length(), (size_t) length);
|
596
|
+
|
597
|
+
/* We don't want fallback to be short string optimized, since we want to move it */
|
598
|
+
fallback.reserve(fallback.length() + maxCopyDistance + std::max<unsigned int>(MINIMUM_HTTP_POST_PADDING, sizeof(std::string)));
|
599
|
+
fallback.append(data, maxCopyDistance);
|
600
|
+
|
601
|
+
// break here on break
|
602
|
+
std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<true>(fallback.data(), (unsigned int) fallback.length(), user, reserved, &req, requestHandler, dataHandler);
|
603
|
+
if (consumed.second != user) {
|
604
|
+
return consumed;
|
605
|
+
}
|
606
|
+
|
607
|
+
if (consumed.first) {
|
608
|
+
|
609
|
+
/* This logic assumes that we consumed everything in fallback buffer.
|
610
|
+
* This is critically important, as we will get an integer overflow in case
|
611
|
+
* of "had" being larger than what we consumed, and that we would drop data */
|
612
|
+
fallback.clear();
|
613
|
+
data += consumed.first - had;
|
614
|
+
length -= consumed.first - had;
|
615
|
+
|
616
|
+
if (remainingStreamingBytes) {
|
617
|
+
/* It's either chunked or with a content-length */
|
618
|
+
if (isParsingChunkedEncoding(remainingStreamingBytes)) {
|
619
|
+
std::string_view dataToConsume(data, length);
|
620
|
+
for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
|
621
|
+
dataHandler(user, chunk, chunk.length() == 0);
|
622
|
+
}
|
623
|
+
if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
|
624
|
+
return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
|
625
|
+
}
|
626
|
+
data = (char *) dataToConsume.data();
|
627
|
+
length = (unsigned int) dataToConsume.length();
|
628
|
+
} else {
|
629
|
+
// this is exactly the same as above!
|
630
|
+
if (remainingStreamingBytes >= (unsigned int) length) {
|
631
|
+
void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == (unsigned int) length);
|
632
|
+
remainingStreamingBytes -= length;
|
633
|
+
return {0, returnedUser};
|
634
|
+
} else {
|
635
|
+
void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
|
636
|
+
|
637
|
+
data += (unsigned int) remainingStreamingBytes;
|
638
|
+
length -= (unsigned int) remainingStreamingBytes;
|
639
|
+
|
640
|
+
remainingStreamingBytes = 0;
|
641
|
+
|
642
|
+
if (returnedUser != user) {
|
643
|
+
return {0, returnedUser};
|
644
|
+
}
|
645
|
+
}
|
646
|
+
}
|
647
|
+
}
|
648
|
+
|
649
|
+
} else {
|
650
|
+
if (fallback.length() == MAX_FALLBACK_SIZE) {
|
651
|
+
return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
|
652
|
+
}
|
653
|
+
return {0, user};
|
654
|
+
}
|
655
|
+
}
|
656
|
+
|
657
|
+
std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<false>(data, length, user, reserved, &req, requestHandler, dataHandler);
|
658
|
+
if (consumed.second != user) {
|
659
|
+
return consumed;
|
660
|
+
}
|
661
|
+
|
662
|
+
data += consumed.first;
|
663
|
+
length -= consumed.first;
|
664
|
+
|
665
|
+
if (length) {
|
666
|
+
if (length < MAX_FALLBACK_SIZE) {
|
667
|
+
fallback.append(data, length);
|
668
|
+
} else {
|
669
|
+
return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
|
670
|
+
}
|
671
|
+
}
|
672
|
+
|
673
|
+
// added for now
|
674
|
+
return {0, user};
|
675
|
+
}
|
676
|
+
};
|
677
|
+
|
678
|
+
}
|
679
|
+
|
680
|
+
#endif // UWS_HTTPPARSER_H
|