opal-up 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +209 -0
  3. data/README.md +81 -28
  4. data/bin/up_ruby +4 -0
  5. data/bin/up_ruby_cluster +4 -0
  6. data/ext/up_ext/App.h +606 -0
  7. data/ext/up_ext/AsyncSocket.h +355 -0
  8. data/ext/up_ext/AsyncSocketData.h +87 -0
  9. data/ext/up_ext/BloomFilter.h +83 -0
  10. data/ext/up_ext/ChunkedEncoding.h +236 -0
  11. data/ext/up_ext/ClientApp.h +36 -0
  12. data/ext/up_ext/HttpContext.h +502 -0
  13. data/ext/up_ext/HttpContextData.h +56 -0
  14. data/ext/up_ext/HttpErrors.h +53 -0
  15. data/ext/up_ext/HttpParser.h +680 -0
  16. data/ext/up_ext/HttpResponse.h +578 -0
  17. data/ext/up_ext/HttpResponseData.h +95 -0
  18. data/ext/up_ext/HttpRouter.h +380 -0
  19. data/ext/up_ext/Loop.h +204 -0
  20. data/ext/up_ext/LoopData.h +112 -0
  21. data/ext/up_ext/MoveOnlyFunction.h +377 -0
  22. data/ext/up_ext/PerMessageDeflate.h +315 -0
  23. data/ext/up_ext/ProxyParser.h +163 -0
  24. data/ext/up_ext/QueryParser.h +120 -0
  25. data/ext/up_ext/TopicTree.h +363 -0
  26. data/ext/up_ext/Utilities.h +66 -0
  27. data/ext/up_ext/WebSocket.h +381 -0
  28. data/ext/up_ext/WebSocketContext.h +434 -0
  29. data/ext/up_ext/WebSocketContextData.h +109 -0
  30. data/ext/up_ext/WebSocketData.h +86 -0
  31. data/ext/up_ext/WebSocketExtensions.h +256 -0
  32. data/ext/up_ext/WebSocketHandshake.h +145 -0
  33. data/ext/up_ext/WebSocketProtocol.h +506 -0
  34. data/ext/up_ext/bsd.c +767 -0
  35. data/ext/up_ext/bsd.h +109 -0
  36. data/ext/up_ext/context.c +524 -0
  37. data/ext/up_ext/epoll_kqueue.c +458 -0
  38. data/ext/up_ext/epoll_kqueue.h +67 -0
  39. data/ext/up_ext/extconf.rb +5 -0
  40. data/ext/up_ext/internal.h +224 -0
  41. data/ext/up_ext/libusockets.h +350 -0
  42. data/ext/up_ext/libuwebsockets.cpp +1374 -0
  43. data/ext/up_ext/libuwebsockets.h +260 -0
  44. data/ext/up_ext/loop.c +386 -0
  45. data/ext/up_ext/loop_data.h +38 -0
  46. data/ext/up_ext/socket.c +231 -0
  47. data/ext/up_ext/up_ext.c +278 -0
  48. data/lib/up/node/rack_env.rb +2 -2
  49. data/lib/up/ruby/cluster_cli.rb +10 -0
  50. data/lib/up/ruby/rack_cluster.rb +26 -0
  51. data/lib/up/ruby/rack_env.rb +97 -0
  52. data/lib/up/ruby/rack_server.rb +26 -0
  53. data/lib/up/ruby/server_cli.rb +10 -0
  54. data/lib/up/u_web_socket/rack_env.rb +1 -1
  55. data/lib/up/version.rb +1 -1
  56. metadata +71 -18
  57. data/.gitignore +0 -5
  58. data/Gemfile +0 -2
  59. data/example_rack_app/Gemfile +0 -3
  60. data/example_rack_app/config.ru +0 -6
  61. data/example_rack_app/rack_app.rb +0 -5
  62. data/example_roda_app/Gemfile +0 -6
  63. data/example_roda_app/config.ru +0 -6
  64. data/example_roda_app/roda_app.rb +0 -37
  65. data/example_sinatra_app/Gemfile +0 -6
  66. data/example_sinatra_app/config.ru +0 -6
  67. data/example_sinatra_app/sinatra_app.rb +0 -7
  68. data/opal-up.gemspec +0 -27
  69. data/up_logo.svg +0 -256
@@ -0,0 +1,680 @@
1
+ /*
2
+ * Authored by Alex Hultman, 2018-2020.
3
+ * Intellectual property of third-party.
4
+
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #ifndef UWS_HTTPPARSER_H
19
+ #define UWS_HTTPPARSER_H
20
+
21
+ // todo: HttpParser is in need of a few clean-ups and refactorings
22
+
23
+ /* The HTTP parser is an independent module subject to unit testing / fuzz testing */
24
+
25
+ #include <string>
26
+ #include <cstring>
27
+ #include <algorithm>
28
+ #include <climits>
29
+ #include <string_view>
30
+ #include <map>
31
+ #include "MoveOnlyFunction.h"
32
+ #include "ChunkedEncoding.h"
33
+
34
+ #include "BloomFilter.h"
35
+ #include "ProxyParser.h"
36
+ #include "QueryParser.h"
37
+ #include "HttpErrors.h"
38
+
39
+ namespace uWS {
40
+
41
+ /* We require at least this much post padding */
42
+ static const unsigned int MINIMUM_HTTP_POST_PADDING = 32;
43
+ static void *FULLPTR = (void *)~(uintptr_t)0;
44
+
45
+ /* STL needs one of these */
46
+ template <typename T>
47
+ std::optional<T *> optional_ptr(T *ptr) {
48
+ return ptr ? std::optional<T *>(ptr) : std::nullopt;
49
+ }
50
+
51
+ static const size_t MAX_FALLBACK_SIZE = (size_t) atoi(optional_ptr(getenv("UWS_HTTP_MAX_HEADERS_SIZE")).value_or((char *) "4096"));
52
+ #ifndef UWS_HTTP_MAX_HEADERS_COUNT
53
+ #define UWS_HTTP_MAX_HEADERS_COUNT 100
54
+ #endif
55
+
56
+ struct HttpRequest {
57
+
58
+ friend struct HttpParser;
59
+
60
+ private:
61
+ struct Header {
62
+ std::string_view key, value;
63
+ } headers[UWS_HTTP_MAX_HEADERS_COUNT];
64
+ bool ancientHttp;
65
+ unsigned int querySeparator;
66
+ bool didYield;
67
+ BloomFilter bf;
68
+ std::pair<int, std::string_view *> currentParameters;
69
+ std::map<std::string, unsigned short, std::less<>> *currentParameterOffsets = nullptr;
70
+
71
+ public:
72
+ bool isAncient() {
73
+ return ancientHttp;
74
+ }
75
+
76
+ bool getYield() {
77
+ return didYield;
78
+ }
79
+
80
+ /* Iteration over headers (key, value) */
81
+ struct HeaderIterator {
82
+ Header *ptr;
83
+
84
+ bool operator!=(const HeaderIterator &other) const {
85
+ /* Comparison with end is a special case */
86
+ if (ptr != other.ptr) {
87
+ return other.ptr || ptr->key.length();
88
+ }
89
+ return false;
90
+ }
91
+
92
+ HeaderIterator &operator++() {
93
+ ptr++;
94
+ return *this;
95
+ }
96
+
97
+ std::pair<std::string_view, std::string_view> operator*() const {
98
+ return {ptr->key, ptr->value};
99
+ }
100
+ };
101
+
102
+ HeaderIterator begin() {
103
+ return {headers + 1};
104
+ }
105
+
106
+ HeaderIterator end() {
107
+ return {nullptr};
108
+ }
109
+
110
+ /* If you do not want to handle this route */
111
+ void setYield(bool yield) {
112
+ didYield = yield;
113
+ }
114
+
115
+ std::string_view getHeader(std::string_view lowerCasedHeader) {
116
+ if (bf.mightHave(lowerCasedHeader)) {
117
+ for (Header *h = headers; (++h)->key.length(); ) {
118
+ if (h->key.length() == lowerCasedHeader.length() && !strncmp(h->key.data(), lowerCasedHeader.data(), lowerCasedHeader.length())) {
119
+ return h->value;
120
+ }
121
+ }
122
+ }
123
+ return std::string_view(nullptr, 0);
124
+ }
125
+
126
+ std::string_view getUrl() {
127
+ return std::string_view(headers->value.data(), querySeparator);
128
+ }
129
+
130
+ std::string_view getFullUrl() {
131
+ return std::string_view(headers->value.data(), headers->value.length());
132
+ }
133
+
134
+ /* Hack: this should be getMethod */
135
+ std::string_view getCaseSensitiveMethod() {
136
+ return std::string_view(headers->key.data(), headers->key.length());
137
+ }
138
+
139
+ std::string_view getMethod() {
140
+ /* Compatibility hack: lower case method (todo: remove when major version bumps) */
141
+ for (unsigned int i = 0; i < headers->key.length(); i++) {
142
+ ((char *) headers->key.data())[i] |= 32;
143
+ }
144
+
145
+ return std::string_view(headers->key.data(), headers->key.length());
146
+ }
147
+
148
+ /* Returns the raw querystring as a whole, still encoded */
149
+ std::string_view getQuery() {
150
+ if (querySeparator < headers->value.length()) {
151
+ /* Strip the initial ? */
152
+ return std::string_view(headers->value.data() + querySeparator + 1, headers->value.length() - querySeparator - 1);
153
+ } else {
154
+ return std::string_view(nullptr, 0);
155
+ }
156
+ }
157
+
158
+ /* Finds and decodes the URI component. */
159
+ std::string_view getQuery(std::string_view key) {
160
+ /* Raw querystring including initial '?' sign */
161
+ std::string_view queryString = std::string_view(headers->value.data() + querySeparator, headers->value.length() - querySeparator);
162
+
163
+ return getDecodedQueryValue(key, queryString);
164
+ }
165
+
166
+ void setParameters(std::pair<int, std::string_view *> parameters) {
167
+ currentParameters = parameters;
168
+ }
169
+
170
+ void setParameterOffsets(std::map<std::string, unsigned short, std::less<>> *offsets) {
171
+ currentParameterOffsets = offsets;
172
+ }
173
+
174
+ std::string_view getParameter(std::string_view name) {
175
+ if (!currentParameterOffsets) {
176
+ return {nullptr, 0};
177
+ }
178
+ auto it = currentParameterOffsets->find(name);
179
+ if (it == currentParameterOffsets->end()) {
180
+ return {nullptr, 0};
181
+ }
182
+ return getParameter(it->second);
183
+ }
184
+
185
+ std::string_view getParameter(unsigned short index) {
186
+ if (currentParameters.first < (int) index) {
187
+ return {};
188
+ } else {
189
+ return currentParameters.second[index];
190
+ }
191
+ }
192
+
193
+ };
194
+
195
+ struct HttpParser {
196
+
197
+ private:
198
+ std::string fallback;
199
+ /* This guy really has only 30 bits since we reserve two highest bits to chunked encoding parsing state */
200
+ uint64_t remainingStreamingBytes = 0;
201
+
202
+ /* Returns UINT_MAX on error. Maximum 999999999 is allowed. */
203
+ static uint64_t toUnsignedInteger(std::string_view str) {
204
+ /* We assume at least 64-bit integer giving us safely 999999999999999999 (18 number of 9s) */
205
+ if (str.length() > 18) {
206
+ return UINT_MAX;
207
+ }
208
+
209
+ uint64_t unsignedIntegerValue = 0;
210
+ for (char c : str) {
211
+ /* As long as the letter is 0-9 we cannot overflow. */
212
+ if (c < '0' || c > '9') {
213
+ return UINT_MAX;
214
+ }
215
+ unsignedIntegerValue = unsignedIntegerValue * 10ull + ((unsigned int) c - (unsigned int) '0');
216
+ }
217
+ return unsignedIntegerValue;
218
+ }
219
+
220
+ /* RFC 9110 16.3.1 Field Name Registry (TLDR; alnum + hyphen is allowed)
221
+ * [...] It MUST conform to the field-name syntax defined in Section 5.1,
222
+ * and it SHOULD be restricted to just letters, digits,
223
+ * and hyphen ('-') characters, with the first character being a letter. */
224
+ static inline bool isFieldNameByte(unsigned char x) {
225
+ return (x == '-') |
226
+ ((x > '/') & (x < ':')) |
227
+ ((x > '@') & (x < '[')) |
228
+ ((x > 96) & (x < '{'));
229
+ }
230
+
231
+ static inline uint64_t hasLess(uint64_t x, uint64_t n) {
232
+ return (((x)-~0ULL/255*(n))&~(x)&~0ULL/255*128);
233
+ }
234
+
235
+ static inline uint64_t hasMore(uint64_t x, uint64_t n) {
236
+ return (( ((x)+~0ULL/255*(127-(n))) |(x))&~0ULL/255*128);
237
+ }
238
+
239
+ static inline uint64_t hasBetween(uint64_t x, uint64_t m, uint64_t n) {
240
+ return (( (~0ULL/255*(127+(n))-((x)&~0ULL/255*127)) &~(x)& (((x)&~0ULL/255*127)+~0ULL/255*(127-(m))) )&~0ULL/255*128);
241
+ }
242
+
243
+ static inline bool notFieldNameWord(uint64_t x) {
244
+ return hasLess(x, '-') |
245
+ hasBetween(x, '-', '0') |
246
+ hasBetween(x, '9', 'A') |
247
+ hasBetween(x, 'Z', 'a') |
248
+ hasMore(x, 'z');
249
+ }
250
+
251
+ static inline void *consumeFieldName(char *p) {
252
+ for (; true; p += 8) {
253
+ uint64_t word;
254
+ memcpy(&word, p, sizeof(uint64_t));
255
+ if (notFieldNameWord(word)) {
256
+ while (isFieldNameByte(*(unsigned char *)p)) {
257
+ *(p++) |= 0x20;
258
+ }
259
+ return (void *)p;
260
+ }
261
+ word |= 0x2020202020202020ull;
262
+ memcpy(p, &word, sizeof(uint64_t));
263
+ }
264
+ }
265
+
266
+ /* Puts method as key, target as value and returns non-null (or nullptr on error). */
267
+ static inline char *consumeRequestLine(char *data, HttpRequest::Header &header) {
268
+ /* Scan until single SP, assume next is / (origin request) */
269
+ char *start = data;
270
+ /* This catches the post padded CR and fails */
271
+ while (data[0] > 32) data++;
272
+ if (data[0] == 32 && data[1] == '/') {
273
+ header.key = {start, (size_t) (data - start)};
274
+ data++;
275
+ /* Scan for less than 33 (catches post padded CR and fails) */
276
+ start = data;
277
+ for (; true; data += 8) {
278
+ uint64_t word;
279
+ memcpy(&word, data, sizeof(uint64_t));
280
+ if (hasLess(word, 33)) {
281
+ while (*(unsigned char *)data > 32) data++;
282
+ /* Now we stand on space */
283
+ header.value = {start, (size_t) (data - start)};
284
+ /* Check that the following is http 1.1 */
285
+ if (memcmp(" HTTP/1.1\r\n", data, 11) == 0) {
286
+ return data + 11;
287
+ }
288
+ return nullptr;
289
+ }
290
+ }
291
+ }
292
+ return nullptr;
293
+ }
294
+
295
+ /* RFC 9110: 5.5 Field Values (TLDR; anything above 31 is allowed; htab (9) is also allowed)
296
+ * Field values are usually constrained to the range of US-ASCII characters [...]
297
+ * Field values containing CR, LF, or NUL characters are invalid and dangerous [...]
298
+ * Field values containing other CTL characters are also invalid. */
299
+ static inline void *tryConsumeFieldValue(char *p) {
300
+ for (; true; p += 8) {
301
+ uint64_t word;
302
+ memcpy(&word, p, sizeof(uint64_t));
303
+ if (hasLess(word, 32)) {
304
+ while (*(unsigned char *)p > 31) p++;
305
+ return (void *)p;
306
+ }
307
+ }
308
+ }
309
+
310
+ /* End is only used for the proxy parser. The HTTP parser recognizes "\ra" as invalid "\r\n" scan and breaks. */
311
+ static unsigned int getHeaders(char *postPaddedBuffer, char *end, struct HttpRequest::Header *headers, void *reserved, unsigned int &err) {
312
+ char *preliminaryKey, *preliminaryValue, *start = postPaddedBuffer;
313
+
314
+ #ifdef UWS_WITH_PROXY
315
+ /* ProxyParser is passed as reserved parameter */
316
+ ProxyParser *pp = (ProxyParser *) reserved;
317
+
318
+ /* Parse PROXY protocol */
319
+ auto [done, offset] = pp->parse({postPaddedBuffer, (size_t) (end - postPaddedBuffer)});
320
+ if (!done) {
321
+ /* We do not reset the ProxyParser (on filure) since it is tied to this
322
+ * connection, which is really only supposed to ever get one PROXY frame
323
+ * anyways. We do however allow multiple PROXY frames to be sent (overwrites former). */
324
+ return 0;
325
+ } else {
326
+ /* We have consumed this data so skip it */
327
+ postPaddedBuffer += offset;
328
+ }
329
+ #else
330
+ /* This one is unused */
331
+ (void) reserved;
332
+ (void) end;
333
+ #endif
334
+
335
+ /* It is critical for fallback buffering logic that we only return with success
336
+ * if we managed to parse a complete HTTP request (minus data). Returning success
337
+ * for PROXY means we can end up succeeding, yet leaving bytes in the fallback buffer
338
+ * which is then removed, and our counters to flip due to overflow and we end up with a crash */
339
+
340
+ /* The request line is different from the field names / field values */
341
+ if (!(postPaddedBuffer = consumeRequestLine(postPaddedBuffer, headers[0]))) {
342
+ /* Error - invalid request line */
343
+ /* Assuming it is 505 HTTP Version Not Supported */
344
+ err = HTTP_ERROR_505_HTTP_VERSION_NOT_SUPPORTED;
345
+ return 0;
346
+ }
347
+ headers++;
348
+
349
+ for (unsigned int i = 1; i < UWS_HTTP_MAX_HEADERS_COUNT - 1; i++) {
350
+ /* Lower case and consume the field name */
351
+ preliminaryKey = postPaddedBuffer;
352
+ postPaddedBuffer = (char *) consumeFieldName(postPaddedBuffer);
353
+ headers->key = std::string_view(preliminaryKey, (size_t) (postPaddedBuffer - preliminaryKey));
354
+
355
+ /* We should not accept whitespace between key and colon, so colon must foloow immediately */
356
+ if (postPaddedBuffer[0] != ':') {
357
+ /* Error: invalid chars in field name */
358
+ return 0;
359
+ }
360
+ postPaddedBuffer++;
361
+
362
+ preliminaryValue = postPaddedBuffer;
363
+ /* The goal of this call is to find next "\r\n", or any invalid field value chars, fast */
364
+ while (true) {
365
+ postPaddedBuffer = (char *) tryConsumeFieldValue(postPaddedBuffer);
366
+ /* If this is not CR then we caught some stinky invalid char on the way */
367
+ if (postPaddedBuffer[0] != '\r') {
368
+ /* If TAB then keep searching */
369
+ if (postPaddedBuffer[0] == '\t') {
370
+ postPaddedBuffer++;
371
+ continue;
372
+ }
373
+ /* Error - invalid chars in field value */
374
+ return 0;
375
+ }
376
+ break;
377
+ }
378
+ /* We fence end[0] with \r, followed by end[1] being something that is "not \n", to signify "not found".
379
+ * This way we can have this one single check to see if we found \r\n WITHIN our allowed search space. */
380
+ if (postPaddedBuffer[1] == '\n') {
381
+ /* Store this header, it is valid */
382
+ headers->value = std::string_view(preliminaryValue, (size_t) (postPaddedBuffer - preliminaryValue));
383
+ postPaddedBuffer += 2;
384
+
385
+ /* Trim trailing whitespace (SP, HTAB) */
386
+ while (headers->value.length() && headers->value.back() < 33) {
387
+ headers->value.remove_suffix(1);
388
+ }
389
+
390
+ /* Trim initial whitespace (SP, HTAB) */
391
+ while (headers->value.length() && headers->value.front() < 33) {
392
+ headers->value.remove_prefix(1);
393
+ }
394
+
395
+ headers++;
396
+
397
+ /* We definitely have at least one header (or request line), so check if we are done */
398
+ if (*postPaddedBuffer == '\r') {
399
+ if (postPaddedBuffer[1] == '\n') {
400
+ /* This cann take the very last header space */
401
+ headers->key = std::string_view(nullptr, 0);
402
+ return (unsigned int) ((postPaddedBuffer + 2) - start);
403
+ } else {
404
+ /* \r\n\r plus non-\n letter is malformed request, or simply out of search space */
405
+ return 0;
406
+ }
407
+ }
408
+ } else {
409
+ /* We are either out of search space or this is a malformed request */
410
+ return 0;
411
+ }
412
+ }
413
+ /* We ran out of header space, too large request */
414
+ return 0;
415
+ }
416
+
417
+ /* This is the only caller of getHeaders and is thus the deepest part of the parser.
418
+ * From here we return either [consumed, user] for "keep going",
419
+ * or [consumed, nullptr] for "break; I am closed or upgraded to websocket"
420
+ * or [whatever, fullptr] for "break and close me, I am a parser error!" */
421
+ template <int CONSUME_MINIMALLY>
422
+ std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) {
423
+
424
+ /* How much data we CONSUMED (to throw away) */
425
+ unsigned int consumedTotal = 0;
426
+ unsigned int err = 0;
427
+
428
+ /* Fence two bytes past end of our buffer (buffer has post padded margins).
429
+ * This is to always catch scan for \r but not for \r\n. */
430
+ data[length] = '\r';
431
+ data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */
432
+
433
+ for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) {
434
+ data += consumed;
435
+ length -= consumed;
436
+ consumedTotal += consumed;
437
+
438
+ /* Store HTTP version (ancient 1.0 or 1.1) */
439
+ req->ancientHttp = false;
440
+
441
+ /* Add all headers to bloom filter */
442
+ req->bf.reset();
443
+ for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) {
444
+ req->bf.add(h->key);
445
+ }
446
+
447
+ /* Break if no host header (but we can have empty string which is different from nullptr) */
448
+ if (!req->getHeader("host").data()) {
449
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
450
+ }
451
+
452
+ /* RFC 9112 6.3
453
+ * If a message is received with both a Transfer-Encoding and a Content-Length header field,
454
+ * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt
455
+ * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and
456
+ * ought to be handled as an error. */
457
+ std::string_view transferEncodingString = req->getHeader("transfer-encoding");
458
+ std::string_view contentLengthString = req->getHeader("content-length");
459
+ if (transferEncodingString.length() && contentLengthString.length()) {
460
+ /* Returning fullptr is the same as calling the errorHandler */
461
+ /* We could be smart and set an error in the context along with this, to indicate what
462
+ * http error response we might want to return */
463
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
464
+ }
465
+
466
+ /* Parse query */
467
+ const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length());
468
+ req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data());
469
+
470
+ /* If returned socket is not what we put in we need
471
+ * to break here as we either have upgraded to
472
+ * WebSockets or otherwise closed the socket. */
473
+ void *returnedUser = requestHandler(user, req);
474
+ if (returnedUser != user) {
475
+ /* We are upgraded to WebSocket or otherwise broken */
476
+ return {consumedTotal, returnedUser};
477
+ }
478
+
479
+ /* The rules at play here according to RFC 9112 for requests are essentially:
480
+ * If both content-length and transfer-encoding then invalid message; must break.
481
+ * If has transfer-encoding then must be chunked regardless of value.
482
+ * If content-length then fixed length even if 0.
483
+ * If none of the above then fixed length is 0. */
484
+
485
+ /* RFC 9112 6.3
486
+ * If a message is received with both a Transfer-Encoding and a Content-Length header field,
487
+ * the Transfer-Encoding overrides the Content-Length. */
488
+ if (transferEncodingString.length()) {
489
+
490
+ /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is
491
+ * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates
492
+ * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */
493
+
494
+ /* RFC 9112 6.3
495
+ * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the
496
+ * final encoding, the message body length cannot be determined reliably; the server MUST respond with the
497
+ * 400 (Bad Request) status code and then close the connection. */
498
+
499
+ /* In this case we fail later by having the wrong interpretation (assuming chunked).
500
+ * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */
501
+
502
+ remainingStreamingBytes = STATE_IS_CHUNKED;
503
+ /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */
504
+ if (!CONSUME_MINIMALLY) {
505
+ /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */
506
+ std::string_view dataToConsume(data, length);
507
+ for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
508
+ dataHandler(user, chunk, chunk.length() == 0);
509
+ }
510
+ if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
511
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
512
+ }
513
+ unsigned int consumed = (length - (unsigned int) dataToConsume.length());
514
+ data = (char *) dataToConsume.data();
515
+ length = (unsigned int) dataToConsume.length();
516
+ consumedTotal += consumed;
517
+ }
518
+ } else if (contentLengthString.length()) {
519
+ remainingStreamingBytes = toUnsignedInteger(contentLengthString);
520
+ if (remainingStreamingBytes == UINT_MAX) {
521
+ /* Parser error */
522
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
523
+ }
524
+
525
+ if (!CONSUME_MINIMALLY) {
526
+ unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length);
527
+ dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes);
528
+ remainingStreamingBytes -= emittable;
529
+
530
+ data += emittable;
531
+ length -= emittable;
532
+ consumedTotal += emittable;
533
+ }
534
+ } else {
535
+ /* If we came here without a body; emit an empty data chunk to signal no data */
536
+ dataHandler(user, {}, true);
537
+ }
538
+
539
+ /* Consume minimally should break as easrly as possible */
540
+ if (CONSUME_MINIMALLY) {
541
+ break;
542
+ }
543
+ }
544
+ /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */
545
+ if (err) {
546
+ return {err, FULLPTR};
547
+ }
548
+ return {consumedTotal, user};
549
+ }
550
+
551
+ public:
552
+ std::pair<unsigned int, void *> consumePostPadded(char *data, unsigned int length, void *user, void *reserved, MoveOnlyFunction<void *(void *, HttpRequest *)> &&requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &&dataHandler) {
553
+
554
+ /* This resets BloomFilter by construction, but later we also reset it again.
555
+ * Optimize this to skip resetting twice (req could be made global) */
556
+ HttpRequest req;
557
+
558
+ if (remainingStreamingBytes) {
559
+
560
+ /* It's either chunked or with a content-length */
561
+ if (isParsingChunkedEncoding(remainingStreamingBytes)) {
562
+ std::string_view dataToConsume(data, length);
563
+ for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
564
+ dataHandler(user, chunk, chunk.length() == 0);
565
+ }
566
+ if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
567
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
568
+ }
569
+ data = (char *) dataToConsume.data();
570
+ length = (unsigned int) dataToConsume.length();
571
+ } else {
572
+ // this is exactly the same as below!
573
+ // todo: refactor this
574
+ if (remainingStreamingBytes >= length) {
575
+ void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == length);
576
+ remainingStreamingBytes -= length;
577
+ return {0, returnedUser};
578
+ } else {
579
+ void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
580
+
581
+ data += (unsigned int) remainingStreamingBytes;
582
+ length -= (unsigned int) remainingStreamingBytes;
583
+
584
+ remainingStreamingBytes = 0;
585
+
586
+ if (returnedUser != user) {
587
+ return {0, returnedUser};
588
+ }
589
+ }
590
+ }
591
+
592
+ } else if (fallback.length()) {
593
+ unsigned int had = (unsigned int) fallback.length();
594
+
595
+ size_t maxCopyDistance = std::min<size_t>(MAX_FALLBACK_SIZE - fallback.length(), (size_t) length);
596
+
597
+ /* We don't want fallback to be short string optimized, since we want to move it */
598
+ fallback.reserve(fallback.length() + maxCopyDistance + std::max<unsigned int>(MINIMUM_HTTP_POST_PADDING, sizeof(std::string)));
599
+ fallback.append(data, maxCopyDistance);
600
+
601
+ // break here on break
602
+ std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<true>(fallback.data(), (unsigned int) fallback.length(), user, reserved, &req, requestHandler, dataHandler);
603
+ if (consumed.second != user) {
604
+ return consumed;
605
+ }
606
+
607
+ if (consumed.first) {
608
+
609
+ /* This logic assumes that we consumed everything in fallback buffer.
610
+ * This is critically important, as we will get an integer overflow in case
611
+ * of "had" being larger than what we consumed, and that we would drop data */
612
+ fallback.clear();
613
+ data += consumed.first - had;
614
+ length -= consumed.first - had;
615
+
616
+ if (remainingStreamingBytes) {
617
+ /* It's either chunked or with a content-length */
618
+ if (isParsingChunkedEncoding(remainingStreamingBytes)) {
619
+ std::string_view dataToConsume(data, length);
620
+ for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
621
+ dataHandler(user, chunk, chunk.length() == 0);
622
+ }
623
+ if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
624
+ return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
625
+ }
626
+ data = (char *) dataToConsume.data();
627
+ length = (unsigned int) dataToConsume.length();
628
+ } else {
629
+ // this is exactly the same as above!
630
+ if (remainingStreamingBytes >= (unsigned int) length) {
631
+ void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == (unsigned int) length);
632
+ remainingStreamingBytes -= length;
633
+ return {0, returnedUser};
634
+ } else {
635
+ void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
636
+
637
+ data += (unsigned int) remainingStreamingBytes;
638
+ length -= (unsigned int) remainingStreamingBytes;
639
+
640
+ remainingStreamingBytes = 0;
641
+
642
+ if (returnedUser != user) {
643
+ return {0, returnedUser};
644
+ }
645
+ }
646
+ }
647
+ }
648
+
649
+ } else {
650
+ if (fallback.length() == MAX_FALLBACK_SIZE) {
651
+ return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
652
+ }
653
+ return {0, user};
654
+ }
655
+ }
656
+
657
+ std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<false>(data, length, user, reserved, &req, requestHandler, dataHandler);
658
+ if (consumed.second != user) {
659
+ return consumed;
660
+ }
661
+
662
+ data += consumed.first;
663
+ length -= consumed.first;
664
+
665
+ if (length) {
666
+ if (length < MAX_FALLBACK_SIZE) {
667
+ fallback.append(data, length);
668
+ } else {
669
+ return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
670
+ }
671
+ }
672
+
673
+ // added for now
674
+ return {0, user};
675
+ }
676
+ };
677
+
678
+ }
679
+
680
+ #endif // UWS_HTTPPARSER_H