opal-up 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +209 -0
  3. data/README.md +81 -28
  4. data/bin/up_ruby +4 -0
  5. data/bin/up_ruby_cluster +4 -0
  6. data/ext/up_ext/App.h +606 -0
  7. data/ext/up_ext/AsyncSocket.h +355 -0
  8. data/ext/up_ext/AsyncSocketData.h +87 -0
  9. data/ext/up_ext/BloomFilter.h +83 -0
  10. data/ext/up_ext/ChunkedEncoding.h +236 -0
  11. data/ext/up_ext/ClientApp.h +36 -0
  12. data/ext/up_ext/HttpContext.h +502 -0
  13. data/ext/up_ext/HttpContextData.h +56 -0
  14. data/ext/up_ext/HttpErrors.h +53 -0
  15. data/ext/up_ext/HttpParser.h +680 -0
  16. data/ext/up_ext/HttpResponse.h +578 -0
  17. data/ext/up_ext/HttpResponseData.h +95 -0
  18. data/ext/up_ext/HttpRouter.h +380 -0
  19. data/ext/up_ext/Loop.h +204 -0
  20. data/ext/up_ext/LoopData.h +112 -0
  21. data/ext/up_ext/MoveOnlyFunction.h +377 -0
  22. data/ext/up_ext/PerMessageDeflate.h +315 -0
  23. data/ext/up_ext/ProxyParser.h +163 -0
  24. data/ext/up_ext/QueryParser.h +120 -0
  25. data/ext/up_ext/TopicTree.h +363 -0
  26. data/ext/up_ext/Utilities.h +66 -0
  27. data/ext/up_ext/WebSocket.h +381 -0
  28. data/ext/up_ext/WebSocketContext.h +434 -0
  29. data/ext/up_ext/WebSocketContextData.h +109 -0
  30. data/ext/up_ext/WebSocketData.h +86 -0
  31. data/ext/up_ext/WebSocketExtensions.h +256 -0
  32. data/ext/up_ext/WebSocketHandshake.h +145 -0
  33. data/ext/up_ext/WebSocketProtocol.h +506 -0
  34. data/ext/up_ext/bsd.c +767 -0
  35. data/ext/up_ext/bsd.h +109 -0
  36. data/ext/up_ext/context.c +524 -0
  37. data/ext/up_ext/epoll_kqueue.c +458 -0
  38. data/ext/up_ext/epoll_kqueue.h +67 -0
  39. data/ext/up_ext/extconf.rb +5 -0
  40. data/ext/up_ext/internal.h +224 -0
  41. data/ext/up_ext/libusockets.h +350 -0
  42. data/ext/up_ext/libuwebsockets.cpp +1374 -0
  43. data/ext/up_ext/libuwebsockets.h +260 -0
  44. data/ext/up_ext/loop.c +386 -0
  45. data/ext/up_ext/loop_data.h +38 -0
  46. data/ext/up_ext/socket.c +231 -0
  47. data/ext/up_ext/up_ext.c +278 -0
  48. data/lib/up/node/rack_env.rb +2 -2
  49. data/lib/up/ruby/cluster_cli.rb +10 -0
  50. data/lib/up/ruby/rack_cluster.rb +26 -0
  51. data/lib/up/ruby/rack_env.rb +97 -0
  52. data/lib/up/ruby/rack_server.rb +26 -0
  53. data/lib/up/ruby/server_cli.rb +10 -0
  54. data/lib/up/u_web_socket/rack_env.rb +1 -1
  55. data/lib/up/version.rb +1 -1
  56. metadata +71 -18
  57. data/.gitignore +0 -5
  58. data/Gemfile +0 -2
  59. data/example_rack_app/Gemfile +0 -3
  60. data/example_rack_app/config.ru +0 -6
  61. data/example_rack_app/rack_app.rb +0 -5
  62. data/example_roda_app/Gemfile +0 -6
  63. data/example_roda_app/config.ru +0 -6
  64. data/example_roda_app/roda_app.rb +0 -37
  65. data/example_sinatra_app/Gemfile +0 -6
  66. data/example_sinatra_app/config.ru +0 -6
  67. data/example_sinatra_app/sinatra_app.rb +0 -7
  68. data/opal-up.gemspec +0 -27
  69. data/up_logo.svg +0 -256
@@ -0,0 +1,680 @@
1
+ /*
2
+ * Authored by Alex Hultman, 2018-2020.
3
+ * Intellectual property of third-party.
4
+
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #ifndef UWS_HTTPPARSER_H
19
+ #define UWS_HTTPPARSER_H
20
+
21
+ // todo: HttpParser is in need of a few clean-ups and refactorings
22
+
23
+ /* The HTTP parser is an independent module subject to unit testing / fuzz testing */
24
+
25
+ #include <string>
26
+ #include <cstring>
27
+ #include <algorithm>
28
+ #include <climits>
29
+ #include <string_view>
30
+ #include <map>
31
+ #include "MoveOnlyFunction.h"
32
+ #include "ChunkedEncoding.h"
33
+
34
+ #include "BloomFilter.h"
35
+ #include "ProxyParser.h"
36
+ #include "QueryParser.h"
37
+ #include "HttpErrors.h"
38
+
39
+ namespace uWS {
40
+
41
+ /* We require at least this much post padding */
42
+ static const unsigned int MINIMUM_HTTP_POST_PADDING = 32;
43
+ static void *FULLPTR = (void *)~(uintptr_t)0;
44
+
45
+ /* STL needs one of these */
46
+ template <typename T>
47
+ std::optional<T *> optional_ptr(T *ptr) {
48
+ return ptr ? std::optional<T *>(ptr) : std::nullopt;
49
+ }
50
+
51
+ static const size_t MAX_FALLBACK_SIZE = (size_t) atoi(optional_ptr(getenv("UWS_HTTP_MAX_HEADERS_SIZE")).value_or((char *) "4096"));
52
+ #ifndef UWS_HTTP_MAX_HEADERS_COUNT
53
+ #define UWS_HTTP_MAX_HEADERS_COUNT 100
54
+ #endif
55
+
56
+ struct HttpRequest {
57
+
58
+ friend struct HttpParser;
59
+
60
+ private:
61
+ struct Header {
62
+ std::string_view key, value;
63
+ } headers[UWS_HTTP_MAX_HEADERS_COUNT];
64
+ bool ancientHttp;
65
+ unsigned int querySeparator;
66
+ bool didYield;
67
+ BloomFilter bf;
68
+ std::pair<int, std::string_view *> currentParameters;
69
+ std::map<std::string, unsigned short, std::less<>> *currentParameterOffsets = nullptr;
70
+
71
+ public:
72
+ bool isAncient() {
73
+ return ancientHttp;
74
+ }
75
+
76
+ bool getYield() {
77
+ return didYield;
78
+ }
79
+
80
+ /* Iteration over headers (key, value) */
81
+ struct HeaderIterator {
82
+ Header *ptr;
83
+
84
+ bool operator!=(const HeaderIterator &other) const {
85
+ /* Comparison with end is a special case */
86
+ if (ptr != other.ptr) {
87
+ return other.ptr || ptr->key.length();
88
+ }
89
+ return false;
90
+ }
91
+
92
+ HeaderIterator &operator++() {
93
+ ptr++;
94
+ return *this;
95
+ }
96
+
97
+ std::pair<std::string_view, std::string_view> operator*() const {
98
+ return {ptr->key, ptr->value};
99
+ }
100
+ };
101
+
102
+ HeaderIterator begin() {
103
+ return {headers + 1};
104
+ }
105
+
106
+ HeaderIterator end() {
107
+ return {nullptr};
108
+ }
109
+
110
+ /* If you do not want to handle this route */
111
+ void setYield(bool yield) {
112
+ didYield = yield;
113
+ }
114
+
115
+ std::string_view getHeader(std::string_view lowerCasedHeader) {
116
+ if (bf.mightHave(lowerCasedHeader)) {
117
+ for (Header *h = headers; (++h)->key.length(); ) {
118
+ if (h->key.length() == lowerCasedHeader.length() && !strncmp(h->key.data(), lowerCasedHeader.data(), lowerCasedHeader.length())) {
119
+ return h->value;
120
+ }
121
+ }
122
+ }
123
+ return std::string_view(nullptr, 0);
124
+ }
125
+
126
+ std::string_view getUrl() {
127
+ return std::string_view(headers->value.data(), querySeparator);
128
+ }
129
+
130
+ std::string_view getFullUrl() {
131
+ return std::string_view(headers->value.data(), headers->value.length());
132
+ }
133
+
134
+ /* Hack: this should be getMethod */
135
+ std::string_view getCaseSensitiveMethod() {
136
+ return std::string_view(headers->key.data(), headers->key.length());
137
+ }
138
+
139
+ std::string_view getMethod() {
140
+ /* Compatibility hack: lower case method (todo: remove when major version bumps) */
141
+ for (unsigned int i = 0; i < headers->key.length(); i++) {
142
+ ((char *) headers->key.data())[i] |= 32;
143
+ }
144
+
145
+ return std::string_view(headers->key.data(), headers->key.length());
146
+ }
147
+
148
+ /* Returns the raw querystring as a whole, still encoded */
149
+ std::string_view getQuery() {
150
+ if (querySeparator < headers->value.length()) {
151
+ /* Strip the initial ? */
152
+ return std::string_view(headers->value.data() + querySeparator + 1, headers->value.length() - querySeparator - 1);
153
+ } else {
154
+ return std::string_view(nullptr, 0);
155
+ }
156
+ }
157
+
158
+ /* Finds and decodes the URI component. */
159
+ std::string_view getQuery(std::string_view key) {
160
+ /* Raw querystring including initial '?' sign */
161
+ std::string_view queryString = std::string_view(headers->value.data() + querySeparator, headers->value.length() - querySeparator);
162
+
163
+ return getDecodedQueryValue(key, queryString);
164
+ }
165
+
166
+ void setParameters(std::pair<int, std::string_view *> parameters) {
167
+ currentParameters = parameters;
168
+ }
169
+
170
+ void setParameterOffsets(std::map<std::string, unsigned short, std::less<>> *offsets) {
171
+ currentParameterOffsets = offsets;
172
+ }
173
+
174
+ std::string_view getParameter(std::string_view name) {
175
+ if (!currentParameterOffsets) {
176
+ return {nullptr, 0};
177
+ }
178
+ auto it = currentParameterOffsets->find(name);
179
+ if (it == currentParameterOffsets->end()) {
180
+ return {nullptr, 0};
181
+ }
182
+ return getParameter(it->second);
183
+ }
184
+
185
+ std::string_view getParameter(unsigned short index) {
186
+ if (currentParameters.first < (int) index) {
187
+ return {};
188
+ } else {
189
+ return currentParameters.second[index];
190
+ }
191
+ }
192
+
193
+ };
194
+
195
+ struct HttpParser {
196
+
197
+ private:
198
+ std::string fallback;
199
+ /* This guy really has only 30 bits since we reserve two highest bits to chunked encoding parsing state */
200
+ uint64_t remainingStreamingBytes = 0;
201
+
202
+ /* Returns UINT_MAX on error. Maximum 999999999 is allowed. */
203
+ static uint64_t toUnsignedInteger(std::string_view str) {
204
+ /* We assume at least 64-bit integer giving us safely 999999999999999999 (18 number of 9s) */
205
+ if (str.length() > 18) {
206
+ return UINT_MAX;
207
+ }
208
+
209
+ uint64_t unsignedIntegerValue = 0;
210
+ for (char c : str) {
211
+ /* As long as the letter is 0-9 we cannot overflow. */
212
+ if (c < '0' || c > '9') {
213
+ return UINT_MAX;
214
+ }
215
+ unsignedIntegerValue = unsignedIntegerValue * 10ull + ((unsigned int) c - (unsigned int) '0');
216
+ }
217
+ return unsignedIntegerValue;
218
+ }
219
+
220
+ /* RFC 9110 16.3.1 Field Name Registry (TLDR; alnum + hyphen is allowed)
221
+ * [...] It MUST conform to the field-name syntax defined in Section 5.1,
222
+ * and it SHOULD be restricted to just letters, digits,
223
+ * and hyphen ('-') characters, with the first character being a letter. */
224
+ static inline bool isFieldNameByte(unsigned char x) {
225
+ return (x == '-') |
226
+ ((x > '/') & (x < ':')) |
227
+ ((x > '@') & (x < '[')) |
228
+ ((x > 96) & (x < '{'));
229
+ }
230
+
231
+ static inline uint64_t hasLess(uint64_t x, uint64_t n) {
232
+ return (((x)-~0ULL/255*(n))&~(x)&~0ULL/255*128);
233
+ }
234
+
235
+ static inline uint64_t hasMore(uint64_t x, uint64_t n) {
236
+ return (( ((x)+~0ULL/255*(127-(n))) |(x))&~0ULL/255*128);
237
+ }
238
+
239
+ static inline uint64_t hasBetween(uint64_t x, uint64_t m, uint64_t n) {
240
+ return (( (~0ULL/255*(127+(n))-((x)&~0ULL/255*127)) &~(x)& (((x)&~0ULL/255*127)+~0ULL/255*(127-(m))) )&~0ULL/255*128);
241
+ }
242
+
243
+ static inline bool notFieldNameWord(uint64_t x) {
244
+ return hasLess(x, '-') |
245
+ hasBetween(x, '-', '0') |
246
+ hasBetween(x, '9', 'A') |
247
+ hasBetween(x, 'Z', 'a') |
248
+ hasMore(x, 'z');
249
+ }
250
+
251
+ static inline void *consumeFieldName(char *p) {
252
+ for (; true; p += 8) {
253
+ uint64_t word;
254
+ memcpy(&word, p, sizeof(uint64_t));
255
+ if (notFieldNameWord(word)) {
256
+ while (isFieldNameByte(*(unsigned char *)p)) {
257
+ *(p++) |= 0x20;
258
+ }
259
+ return (void *)p;
260
+ }
261
+ word |= 0x2020202020202020ull;
262
+ memcpy(p, &word, sizeof(uint64_t));
263
+ }
264
+ }
265
+
266
+ /* Puts method as key, target as value and returns non-null (or nullptr on error). */
267
+ static inline char *consumeRequestLine(char *data, HttpRequest::Header &header) {
268
+ /* Scan until single SP, assume next is / (origin request) */
269
+ char *start = data;
270
+ /* This catches the post padded CR and fails */
271
+ while (data[0] > 32) data++;
272
+ if (data[0] == 32 && data[1] == '/') {
273
+ header.key = {start, (size_t) (data - start)};
274
+ data++;
275
+ /* Scan for less than 33 (catches post padded CR and fails) */
276
+ start = data;
277
+ for (; true; data += 8) {
278
+ uint64_t word;
279
+ memcpy(&word, data, sizeof(uint64_t));
280
+ if (hasLess(word, 33)) {
281
+ while (*(unsigned char *)data > 32) data++;
282
+ /* Now we stand on space */
283
+ header.value = {start, (size_t) (data - start)};
284
+ /* Check that the following is http 1.1 */
285
+ if (memcmp(" HTTP/1.1\r\n", data, 11) == 0) {
286
+ return data + 11;
287
+ }
288
+ return nullptr;
289
+ }
290
+ }
291
+ }
292
+ return nullptr;
293
+ }
294
+
295
+ /* RFC 9110: 5.5 Field Values (TLDR; anything above 31 is allowed; htab (9) is also allowed)
296
+ * Field values are usually constrained to the range of US-ASCII characters [...]
297
+ * Field values containing CR, LF, or NUL characters are invalid and dangerous [...]
298
+ * Field values containing other CTL characters are also invalid. */
299
+ static inline void *tryConsumeFieldValue(char *p) {
300
+ for (; true; p += 8) {
301
+ uint64_t word;
302
+ memcpy(&word, p, sizeof(uint64_t));
303
+ if (hasLess(word, 32)) {
304
+ while (*(unsigned char *)p > 31) p++;
305
+ return (void *)p;
306
+ }
307
+ }
308
+ }
309
+
310
+ /* End is only used for the proxy parser. The HTTP parser recognizes "\ra" as invalid "\r\n" scan and breaks. */
311
+ static unsigned int getHeaders(char *postPaddedBuffer, char *end, struct HttpRequest::Header *headers, void *reserved, unsigned int &err) {
312
+ char *preliminaryKey, *preliminaryValue, *start = postPaddedBuffer;
313
+
314
+ #ifdef UWS_WITH_PROXY
315
+ /* ProxyParser is passed as reserved parameter */
316
+ ProxyParser *pp = (ProxyParser *) reserved;
317
+
318
+ /* Parse PROXY protocol */
319
+ auto [done, offset] = pp->parse({postPaddedBuffer, (size_t) (end - postPaddedBuffer)});
320
+ if (!done) {
321
+ /* We do not reset the ProxyParser (on filure) since it is tied to this
322
+ * connection, which is really only supposed to ever get one PROXY frame
323
+ * anyways. We do however allow multiple PROXY frames to be sent (overwrites former). */
324
+ return 0;
325
+ } else {
326
+ /* We have consumed this data so skip it */
327
+ postPaddedBuffer += offset;
328
+ }
329
+ #else
330
+ /* This one is unused */
331
+ (void) reserved;
332
+ (void) end;
333
+ #endif
334
+
335
+ /* It is critical for fallback buffering logic that we only return with success
336
+ * if we managed to parse a complete HTTP request (minus data). Returning success
337
+ * for PROXY means we can end up succeeding, yet leaving bytes in the fallback buffer
338
+ * which is then removed, and our counters to flip due to overflow and we end up with a crash */
339
+
340
+ /* The request line is different from the field names / field values */
341
+ if (!(postPaddedBuffer = consumeRequestLine(postPaddedBuffer, headers[0]))) {
342
+ /* Error - invalid request line */
343
+ /* Assuming it is 505 HTTP Version Not Supported */
344
+ err = HTTP_ERROR_505_HTTP_VERSION_NOT_SUPPORTED;
345
+ return 0;
346
+ }
347
+ headers++;
348
+
349
+ for (unsigned int i = 1; i < UWS_HTTP_MAX_HEADERS_COUNT - 1; i++) {
350
+ /* Lower case and consume the field name */
351
+ preliminaryKey = postPaddedBuffer;
352
+ postPaddedBuffer = (char *) consumeFieldName(postPaddedBuffer);
353
+ headers->key = std::string_view(preliminaryKey, (size_t) (postPaddedBuffer - preliminaryKey));
354
+
355
+ /* We should not accept whitespace between key and colon, so colon must foloow immediately */
356
+ if (postPaddedBuffer[0] != ':') {
357
+ /* Error: invalid chars in field name */
358
+ return 0;
359
+ }
360
+ postPaddedBuffer++;
361
+
362
+ preliminaryValue = postPaddedBuffer;
363
+ /* The goal of this call is to find next "\r\n", or any invalid field value chars, fast */
364
+ while (true) {
365
+ postPaddedBuffer = (char *) tryConsumeFieldValue(postPaddedBuffer);
366
+ /* If this is not CR then we caught some stinky invalid char on the way */
367
+ if (postPaddedBuffer[0] != '\r') {
368
+ /* If TAB then keep searching */
369
+ if (postPaddedBuffer[0] == '\t') {
370
+ postPaddedBuffer++;
371
+ continue;
372
+ }
373
+ /* Error - invalid chars in field value */
374
+ return 0;
375
+ }
376
+ break;
377
+ }
378
+ /* We fence end[0] with \r, followed by end[1] being something that is "not \n", to signify "not found".
379
+ * This way we can have this one single check to see if we found \r\n WITHIN our allowed search space. */
380
+ if (postPaddedBuffer[1] == '\n') {
381
+ /* Store this header, it is valid */
382
+ headers->value = std::string_view(preliminaryValue, (size_t) (postPaddedBuffer - preliminaryValue));
383
+ postPaddedBuffer += 2;
384
+
385
+ /* Trim trailing whitespace (SP, HTAB) */
386
+ while (headers->value.length() && headers->value.back() < 33) {
387
+ headers->value.remove_suffix(1);
388
+ }
389
+
390
+ /* Trim initial whitespace (SP, HTAB) */
391
+ while (headers->value.length() && headers->value.front() < 33) {
392
+ headers->value.remove_prefix(1);
393
+ }
394
+
395
+ headers++;
396
+
397
+ /* We definitely have at least one header (or request line), so check if we are done */
398
+ if (*postPaddedBuffer == '\r') {
399
+ if (postPaddedBuffer[1] == '\n') {
400
+ /* This cann take the very last header space */
401
+ headers->key = std::string_view(nullptr, 0);
402
+ return (unsigned int) ((postPaddedBuffer + 2) - start);
403
+ } else {
404
+ /* \r\n\r plus non-\n letter is malformed request, or simply out of search space */
405
+ return 0;
406
+ }
407
+ }
408
+ } else {
409
+ /* We are either out of search space or this is a malformed request */
410
+ return 0;
411
+ }
412
+ }
413
+ /* We ran out of header space, too large request */
414
+ return 0;
415
+ }
416
+
417
+ /* This is the only caller of getHeaders and is thus the deepest part of the parser.
418
+ * From here we return either [consumed, user] for "keep going",
419
+ * or [consumed, nullptr] for "break; I am closed or upgraded to websocket"
420
+ * or [whatever, fullptr] for "break and close me, I am a parser error!" */
421
+ template <int CONSUME_MINIMALLY>
422
+ std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) {
423
+
424
+ /* How much data we CONSUMED (to throw away) */
425
+ unsigned int consumedTotal = 0;
426
+ unsigned int err = 0;
427
+
428
+ /* Fence two bytes past end of our buffer (buffer has post padded margins).
429
+ * This is to always catch scan for \r but not for \r\n. */
430
+ data[length] = '\r';
431
+ data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */
432
+
433
+ for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) {
434
+ data += consumed;
435
+ length -= consumed;
436
+ consumedTotal += consumed;
437
+
438
+ /* Store HTTP version (ancient 1.0 or 1.1) */
439
+ req->ancientHttp = false;
440
+
441
+ /* Add all headers to bloom filter */
442
+ req->bf.reset();
443
+ for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) {
444
+ req->bf.add(h->key);
445
+ }
446
+
447
+ /* Break if no host header (but we can have empty string which is different from nullptr) */
448
+ if (!req->getHeader("host").data()) {
449
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
450
+ }
451
+
452
+ /* RFC 9112 6.3
453
+ * If a message is received with both a Transfer-Encoding and a Content-Length header field,
454
+ * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt
455
+ * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and
456
+ * ought to be handled as an error. */
457
+ std::string_view transferEncodingString = req->getHeader("transfer-encoding");
458
+ std::string_view contentLengthString = req->getHeader("content-length");
459
+ if (transferEncodingString.length() && contentLengthString.length()) {
460
+ /* Returning fullptr is the same as calling the errorHandler */
461
+ /* We could be smart and set an error in the context along with this, to indicate what
462
+ * http error response we might want to return */
463
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
464
+ }
465
+
466
+ /* Parse query */
467
+ const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length());
468
+ req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data());
469
+
470
+ /* If returned socket is not what we put in we need
471
+ * to break here as we either have upgraded to
472
+ * WebSockets or otherwise closed the socket. */
473
+ void *returnedUser = requestHandler(user, req);
474
+ if (returnedUser != user) {
475
+ /* We are upgraded to WebSocket or otherwise broken */
476
+ return {consumedTotal, returnedUser};
477
+ }
478
+
479
+ /* The rules at play here according to RFC 9112 for requests are essentially:
480
+ * If both content-length and transfer-encoding then invalid message; must break.
481
+ * If has transfer-encoding then must be chunked regardless of value.
482
+ * If content-length then fixed length even if 0.
483
+ * If none of the above then fixed length is 0. */
484
+
485
+ /* RFC 9112 6.3
486
+ * If a message is received with both a Transfer-Encoding and a Content-Length header field,
487
+ * the Transfer-Encoding overrides the Content-Length. */
488
+ if (transferEncodingString.length()) {
489
+
490
+ /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is
491
+ * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates
492
+ * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */
493
+
494
+ /* RFC 9112 6.3
495
+ * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the
496
+ * final encoding, the message body length cannot be determined reliably; the server MUST respond with the
497
+ * 400 (Bad Request) status code and then close the connection. */
498
+
499
+ /* In this case we fail later by having the wrong interpretation (assuming chunked).
500
+ * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */
501
+
502
+ remainingStreamingBytes = STATE_IS_CHUNKED;
503
+ /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */
504
+ if (!CONSUME_MINIMALLY) {
505
+ /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */
506
+ std::string_view dataToConsume(data, length);
507
+ for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
508
+ dataHandler(user, chunk, chunk.length() == 0);
509
+ }
510
+ if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
511
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
512
+ }
513
+ unsigned int consumed = (length - (unsigned int) dataToConsume.length());
514
+ data = (char *) dataToConsume.data();
515
+ length = (unsigned int) dataToConsume.length();
516
+ consumedTotal += consumed;
517
+ }
518
+ } else if (contentLengthString.length()) {
519
+ remainingStreamingBytes = toUnsignedInteger(contentLengthString);
520
+ if (remainingStreamingBytes == UINT_MAX) {
521
+ /* Parser error */
522
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
523
+ }
524
+
525
+ if (!CONSUME_MINIMALLY) {
526
+ unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length);
527
+ dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes);
528
+ remainingStreamingBytes -= emittable;
529
+
530
+ data += emittable;
531
+ length -= emittable;
532
+ consumedTotal += emittable;
533
+ }
534
+ } else {
535
+ /* If we came here without a body; emit an empty data chunk to signal no data */
536
+ dataHandler(user, {}, true);
537
+ }
538
+
539
+ /* Consume minimally should break as easrly as possible */
540
+ if (CONSUME_MINIMALLY) {
541
+ break;
542
+ }
543
+ }
544
+ /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */
545
+ if (err) {
546
+ return {err, FULLPTR};
547
+ }
548
+ return {consumedTotal, user};
549
+ }
550
+
551
+ public:
552
+ std::pair<unsigned int, void *> consumePostPadded(char *data, unsigned int length, void *user, void *reserved, MoveOnlyFunction<void *(void *, HttpRequest *)> &&requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &&dataHandler) {
553
+
554
+ /* This resets BloomFilter by construction, but later we also reset it again.
555
+ * Optimize this to skip resetting twice (req could be made global) */
556
+ HttpRequest req;
557
+
558
+ if (remainingStreamingBytes) {
559
+
560
+ /* It's either chunked or with a content-length */
561
+ if (isParsingChunkedEncoding(remainingStreamingBytes)) {
562
+ std::string_view dataToConsume(data, length);
563
+ for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
564
+ dataHandler(user, chunk, chunk.length() == 0);
565
+ }
566
+ if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
567
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
568
+ }
569
+ data = (char *) dataToConsume.data();
570
+ length = (unsigned int) dataToConsume.length();
571
+ } else {
572
+ // this is exactly the same as below!
573
+ // todo: refactor this
574
+ if (remainingStreamingBytes >= length) {
575
+ void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == length);
576
+ remainingStreamingBytes -= length;
577
+ return {0, returnedUser};
578
+ } else {
579
+ void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
580
+
581
+ data += (unsigned int) remainingStreamingBytes;
582
+ length -= (unsigned int) remainingStreamingBytes;
583
+
584
+ remainingStreamingBytes = 0;
585
+
586
+ if (returnedUser != user) {
587
+ return {0, returnedUser};
588
+ }
589
+ }
590
+ }
591
+
592
+ } else if (fallback.length()) {
593
+ unsigned int had = (unsigned int) fallback.length();
594
+
595
+ size_t maxCopyDistance = std::min<size_t>(MAX_FALLBACK_SIZE - fallback.length(), (size_t) length);
596
+
597
+ /* We don't want fallback to be short string optimized, since we want to move it */
598
+ fallback.reserve(fallback.length() + maxCopyDistance + std::max<unsigned int>(MINIMUM_HTTP_POST_PADDING, sizeof(std::string)));
599
+ fallback.append(data, maxCopyDistance);
600
+
601
+ // break here on break
602
+ std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<true>(fallback.data(), (unsigned int) fallback.length(), user, reserved, &req, requestHandler, dataHandler);
603
+ if (consumed.second != user) {
604
+ return consumed;
605
+ }
606
+
607
+ if (consumed.first) {
608
+
609
+ /* This logic assumes that we consumed everything in fallback buffer.
610
+ * This is critically important, as we will get an integer overflow in case
611
+ * of "had" being larger than what we consumed, and that we would drop data */
612
+ fallback.clear();
613
+ data += consumed.first - had;
614
+ length -= consumed.first - had;
615
+
616
+ if (remainingStreamingBytes) {
617
+ /* It's either chunked or with a content-length */
618
+ if (isParsingChunkedEncoding(remainingStreamingBytes)) {
619
+ std::string_view dataToConsume(data, length);
620
+ for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
621
+ dataHandler(user, chunk, chunk.length() == 0);
622
+ }
623
+ if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
624
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
625
+ }
626
+ data = (char *) dataToConsume.data();
627
+ length = (unsigned int) dataToConsume.length();
628
+ } else {
629
+ // this is exactly the same as above!
630
+ if (remainingStreamingBytes >= (unsigned int) length) {
631
+ void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == (unsigned int) length);
632
+ remainingStreamingBytes -= length;
633
+ return {0, returnedUser};
634
+ } else {
635
+ void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
636
+
637
+ data += (unsigned int) remainingStreamingBytes;
638
+ length -= (unsigned int) remainingStreamingBytes;
639
+
640
+ remainingStreamingBytes = 0;
641
+
642
+ if (returnedUser != user) {
643
+ return {0, returnedUser};
644
+ }
645
+ }
646
+ }
647
+ }
648
+
649
+ } else {
650
+ if (fallback.length() == MAX_FALLBACK_SIZE) {
651
+ return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
652
+ }
653
+ return {0, user};
654
+ }
655
+ }
656
+
657
+ std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<false>(data, length, user, reserved, &req, requestHandler, dataHandler);
658
+ if (consumed.second != user) {
659
+ return consumed;
660
+ }
661
+
662
+ data += consumed.first;
663
+ length -= consumed.first;
664
+
665
+ if (length) {
666
+ if (length < MAX_FALLBACK_SIZE) {
667
+ fallback.append(data, length);
668
+ } else {
669
+ return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
670
+ }
671
+ }
672
+
673
+ // added for now
674
+ return {0, user};
675
+ }
676
+ };
677
+
678
+ }
679
+
680
+ #endif // UWS_HTTPPARSER_H