isomorfeus-iodine 0.7.48 → 0.7.49

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,873 +1,873 @@
1
- #ifndef H_HTTP1_PARSER_H
2
- /*
3
- Copyright: Boaz Segev, 2017-2020
4
- License: MIT
5
-
6
- Feel free to copy, use and enjoy according to the license provided.
7
- */
8
-
9
- /**
10
- This is a callback based parser. It parses the skeleton of the HTTP/1.x protocol
11
- and leaves most of the work (validation, error checks, etc') to the callbacks.
12
- */
13
- #define H_HTTP1_PARSER_H
14
- #include <stddef.h>
15
- #include <stdint.h>
16
- #include <stdio.h>
17
- #include <stdlib.h>
18
- #include <string.h>
19
- #include <sys/types.h>
20
-
21
- /* *****************************************************************************
22
- Parser Settings
23
- ***************************************************************************** */
24
-
25
- #ifndef HTTP_HEADERS_LOWERCASE
26
- /**
27
- * When defined, HTTP headers will be converted to lowercase and header
28
- * searches will be case sensitive.
29
- *
30
- * This is highly recommended, required by facil.io and helps with HTTP/2
31
- * compatibility.
32
- */
33
- #define HTTP_HEADERS_LOWERCASE 1
34
- #endif
35
-
36
- #ifndef HTTP_ADD_CONTENT_LENGTH_HEADER_IF_MISSING
37
- #define HTTP_ADD_CONTENT_LENGTH_HEADER_IF_MISSING 1
38
- #endif
39
-
40
- #ifndef FIO_MEMCHAR
41
- /** Prefer a custom memchr implementation. Usualy memchr is better. */
42
- #define FIO_MEMCHAR 0
43
- #endif
44
-
45
- #ifndef ALLOW_UNALIGNED_MEMORY_ACCESS
46
- /** Peforms some optimizations assuming unaligned memory access is okay. */
47
- #define ALLOW_UNALIGNED_MEMORY_ACCESS 0
48
- #endif
49
-
50
- #ifndef HTTP1_PARSER_CONVERT_EOL2NUL
51
- #define HTTP1_PARSER_CONVERT_EOL2NUL 0
52
- #endif
53
-
54
- /* *****************************************************************************
55
- Parser API
56
- ***************************************************************************** */
57
-
58
- /** this struct contains the state of the parser. */
59
- typedef struct http1_parser_s {
60
- struct http1_parser_protected_read_only_state_s {
61
- long long content_length; /* negative values indicate chuncked data state */
62
- ssize_t read; /* total number of bytes read so far (body only) */
63
- uint8_t *next; /* the known position for the end of request/response */
64
- uint8_t reserved; /* for internal use */
65
- } state;
66
- } http1_parser_s;
67
-
68
- #define HTTP1_PARSER_INIT \
69
- { \
70
- { 0 } \
71
- }
72
-
73
- /**
74
- * Returns the amount of data actually consumed by the parser.
75
- *
76
- * The value 0 indicates there wasn't enough data to be parsed and the same
77
- * buffer (with more data) should be resubmitted.
78
- *
79
- * A value smaller than the buffer size indicates that EITHER a request /
80
- * response was detected OR that the leftover could not be consumed because more
81
- * data was required.
82
- *
83
- * Simply resubmit the reminder of the data to continue parsing.
84
- *
85
- * A request / response callback automatically stops the parsing process,
86
- * allowing the user to adjust or refresh the state of the data.
87
- */
88
- static size_t http1_parse(http1_parser_s *parser, void *buffer, size_t length);
89
-
90
- /* *****************************************************************************
91
- Required Callbacks (MUST be implemented by including file)
92
- ***************************************************************************** */
93
-
94
- /** called when a request was received. */
95
- static int http1_on_request(http1_parser_s *parser);
96
- /** called when a response was received. */
97
- static int http1_on_response(http1_parser_s *parser);
98
- /** called when a request method is parsed. */
99
- static int http1_on_method(http1_parser_s *parser, char *method,
100
- size_t method_len);
101
- /** called when a response status is parsed. the status_str is the string
102
- * without the prefixed numerical status indicator.*/
103
- static int http1_on_status(http1_parser_s *parser, size_t status,
104
- char *status_str, size_t len);
105
- /** called when a request path (excluding query) is parsed. */
106
- static int http1_on_path(http1_parser_s *parser, char *path, size_t path_len);
107
- /** called when a request path (excluding query) is parsed. */
108
- static int http1_on_query(http1_parser_s *parser, char *query,
109
- size_t query_len);
110
- /** called when a the HTTP/1.x version is parsed. */
111
- static int http1_on_version(http1_parser_s *parser, char *version, size_t len);
112
- /** called when a header is parsed. */
113
- static int http1_on_header(http1_parser_s *parser, char *name, size_t name_len,
114
- char *data, size_t data_len);
115
- /** called when a body chunk is parsed. */
116
- static int http1_on_body_chunk(http1_parser_s *parser, char *data,
117
- size_t data_len);
118
- /** called when a protocol error occurred. */
119
- static int http1_on_error(http1_parser_s *parser);
120
-
121
- /* *****************************************************************************
122
-
123
-
124
-
125
-
126
-
127
-
128
-
129
-
130
-
131
-
132
-
133
-
134
-
135
-
136
-
137
-
138
-
139
- Implementation Details
140
-
141
-
142
-
143
-
144
-
145
-
146
-
147
-
148
-
149
-
150
-
151
-
152
-
153
-
154
-
155
-
156
-
157
- ***************************************************************************** */
158
-
159
- #if HTTP_HEADERS_LOWERCASE
160
- #define HEADER_NAME_IS_EQ(var_name, const_name, len) \
161
- (!memcmp((var_name), (const_name), (len)))
162
- #else
163
- #define HEADER_NAME_IS_EQ(var_name, const_name, len) \
164
- (!strncasecmp((var_name), (const_name), (len)))
165
- #endif
166
-
167
- #define HTTP1_P_FLAG_STATUS_LINE 1
168
- #define HTTP1_P_FLAG_HEADER_COMPLETE 2
169
- #define HTTP1_P_FLAG_COMPLETE 4
170
- #define HTTP1_P_FLAG_CLENGTH 8
171
- #define HTTP1_PARSER_BIT_16 16
172
- #define HTTP1_PARSER_BIT_32 32
173
- #define HTTP1_P_FLAG_CHUNKED 64
174
- #define HTTP1_P_FLAG_RESPONSE 128
175
-
176
- /* *****************************************************************************
177
- Seeking for characters in a string
178
- ***************************************************************************** */
179
-
180
- #if FIO_MEMCHAR
181
-
182
- /**
183
- * This seems to be faster on some systems, especially for smaller distances.
184
- *
185
- * On newer systems, `memchr` should be faster.
186
- */
187
- static int seek2ch(uint8_t **buffer, register uint8_t *const limit,
188
- const uint8_t c) {
189
- if (*buffer >= limit)
190
- return 0;
191
- if (**buffer == c) {
192
- return 1;
193
- }
194
-
195
- #if !HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
196
- /* too short for this mess */
197
- if ((uintptr_t)limit <= 16 + ((uintptr_t)*buffer & (~(uintptr_t)7)))
198
- goto finish;
199
-
200
- /* align memory */
201
- {
202
- const uint8_t *alignment =
203
- (uint8_t *)(((uintptr_t)(*buffer) & (~(uintptr_t)7)) + 8);
204
- if (*buffer < alignment)
205
- *buffer += 1; /* we already tested this char */
206
- if (limit >= alignment) {
207
- while (*buffer < alignment) {
208
- if (**buffer == c) {
209
- return 1;
210
- }
211
- *buffer += 1;
212
- }
213
- }
214
- }
215
- const uint8_t *limit64 = (uint8_t *)((uintptr_t)limit & (~(uintptr_t)7));
216
- #else
217
- const uint8_t *limit64 = (uint8_t *)limit - 7;
218
- #endif
219
- uint64_t wanted1 = 0x0101010101010101ULL * c;
220
- for (; *buffer < limit64; *buffer += 8) {
221
- const uint64_t eq1 = ~((*((uint64_t *)*buffer)) ^ wanted1);
222
- const uint64_t t0 = (eq1 & 0x7f7f7f7f7f7f7f7fllu) + 0x0101010101010101llu;
223
- const uint64_t t1 = (eq1 & 0x8080808080808080llu);
224
- if ((t0 & t1)) {
225
- break;
226
- }
227
- }
228
- #if !HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
229
- finish:
230
- #endif
231
- while (*buffer < limit) {
232
- if (**buffer == c) {
233
- return 1;
234
- }
235
- (*buffer)++;
236
- }
237
- return 0;
238
- }
239
-
240
- #else
241
-
242
- /* a helper that seeks any char, converts it to NUL and returns 1 if found. */
243
- inline static uint8_t seek2ch(uint8_t **pos, uint8_t *const limit, uint8_t ch) {
244
- /* This is library based alternative that is sometimes slower */
245
- if (*pos >= limit)
246
- return 0;
247
- if (**pos == ch) {
248
- return 1;
249
- }
250
- uint8_t *tmp = memchr(*pos, ch, limit - (*pos));
251
- if (tmp) {
252
- *pos = tmp;
253
- return 1;
254
- }
255
- *pos = limit;
256
- return 0;
257
- }
258
-
259
- #endif
260
-
261
- /* a helper that seeks the EOL, converts it to NUL and returns it's length */
262
- inline static uint8_t seek2eol(uint8_t **pos, uint8_t *const limit) {
263
- /* single char lookup using memchr might be better when target is far... */
264
- if (!seek2ch(pos, limit, '\n'))
265
- return 0;
266
- if ((*pos)[-1] == '\r') {
267
- #if HTTP1_PARSER_CONVERT_EOL2NUL
268
- (*pos)[-1] = (*pos)[0] = 0;
269
- #endif
270
- return 2;
271
- }
272
- #if HTTP1_PARSER_CONVERT_EOL2NUL
273
- (*pos)[0] = 0;
274
- #endif
275
- return 1;
276
- }
277
-
278
- /* *****************************************************************************
279
- Change a letter to lower case (latin only)
280
- ***************************************************************************** */
281
-
282
- static uint8_t http_tolower(uint8_t c) {
283
- if (c >= 'A' && c <= 'Z')
284
- c |= 32;
285
- return c;
286
- }
287
-
288
- /* *****************************************************************************
289
- String to Number
290
- ***************************************************************************** */
291
-
292
- /** Converts a String to a number using base 10 */
293
- static long long http1_atol(const uint8_t *buf, const uint8_t **end) {
294
- register unsigned long long i = 0;
295
- uint8_t inv = 0;
296
- while (*buf == ' ' || *buf == '\t' || *buf == '\f')
297
- ++buf;
298
- while (*buf == '-' || *buf == '+')
299
- inv ^= (*(buf++) == '-');
300
- while (i <= ((((~0ULL) >> 1) / 10)) && *buf >= '0' && *buf <= '9') {
301
- i = i * 10;
302
- i += *buf - '0';
303
- ++buf;
304
- }
305
- /* test for overflow */
306
- if (i >= (~((~0ULL) >> 1)) || (*buf >= '0' && *buf <= '9'))
307
- i = (~0ULL >> 1);
308
- if (inv)
309
- i = 0ULL - i;
310
- if (end)
311
- *end = buf;
312
- return i;
313
- }
314
-
315
- /** Converts a String to a number using base 16, overflow limited to 113bytes */
316
- static long long http1_atol16(const uint8_t *buf, const uint8_t **end) {
317
- register unsigned long long i = 0;
318
- uint8_t inv = 0;
319
- for (int limit_ = 0;
320
- (*buf == ' ' || *buf == '\t' || *buf == '\f') && limit_ < 32; ++limit_)
321
- ++buf;
322
- for (int limit_ = 0; (*buf == '-' || *buf == '+') && limit_ < 32; ++limit_)
323
- inv ^= (*(buf++) == '-');
324
- if (*buf == '0')
325
- ++buf;
326
- if ((*buf | 32) == 'x')
327
- ++buf;
328
- for (int limit_ = 0; (*buf == '0') && limit_ < 32; ++limit_)
329
- ++buf;
330
- while (!(i & (~((~(0ULL)) >> 4)))) {
331
- if (*buf >= '0' && *buf <= '9') {
332
- i <<= 4;
333
- i |= *buf - '0';
334
- } else if ((*buf | 32) >= 'a' && (*buf | 32) <= 'f') {
335
- i <<= 4;
336
- i |= (*buf | 32) - ('a' - 10);
337
- } else
338
- break;
339
- ++buf;
340
- }
341
- if (inv)
342
- i = 0ULL - i;
343
- if (end)
344
- *end = buf;
345
- return i;
346
- }
347
-
348
- /* *****************************************************************************
349
- HTTP/1.1 parsre stages
350
- ***************************************************************************** */
351
-
352
- inline static int http1_consume_response_line(http1_parser_s *parser,
353
- uint8_t *start, uint8_t *end) {
354
- parser->state.reserved |= HTTP1_P_FLAG_RESPONSE;
355
- uint8_t *tmp = start;
356
- if (!seek2ch(&tmp, end, ' '))
357
- return -1;
358
- if (http1_on_version(parser, (char *)start, tmp - start))
359
- return -1;
360
- tmp = start = tmp + 1;
361
- if (!seek2ch(&tmp, end, ' '))
362
- return -1;
363
- if (http1_on_status(parser, http1_atol(start, NULL), (char *)(tmp + 1),
364
- end - tmp))
365
- return -1;
366
- return 0;
367
- }
368
-
369
- inline static int http1_consume_request_line(http1_parser_s *parser,
370
- uint8_t *start, uint8_t *end) {
371
- uint8_t *tmp = start;
372
- uint8_t *host_start = NULL;
373
- uint8_t *host_end = NULL;
374
- if (!seek2ch(&tmp, end, ' '))
375
- return -1;
376
- if (http1_on_method(parser, (char *)start, tmp - start))
377
- return -1;
378
- tmp = start = tmp + 1;
379
- if (start[0] == 'h' && start[1] == 't' && start[2] == 't' &&
380
- start[3] == 'p') {
381
- if (start[4] == ':' && start[5] == '/' && start[6] == '/') {
382
- /* Request URI is in long form... emulate Host header instead. */
383
- tmp = host_end = host_start = (start += 7);
384
- } else if (start[4] == 's' && start[5] == ':' && start[6] == '/' &&
385
- start[7] == '/') {
386
- /* Secure request is in long form... emulate Host header instead. */
387
- tmp = host_end = host_start = (start += 8);
388
- } else
389
- goto review_path;
390
- if (!seek2ch(&tmp, end, ' '))
391
- return -1;
392
- *tmp = ' ';
393
- if (!seek2ch(&host_end, tmp, '/')) {
394
- if (http1_on_path(parser, (char *)"/", 1))
395
- return -1;
396
- goto start_version;
397
- }
398
- host_end[0] = '/';
399
- start = host_end;
400
- }
401
- review_path:
402
- tmp = start;
403
- if (seek2ch(&tmp, end, '?')) {
404
- if (http1_on_path(parser, (char *)start, tmp - start))
405
- return -1;
406
- tmp = start = tmp + 1;
407
- if (!seek2ch(&tmp, end, ' '))
408
- return -1;
409
- if (tmp - start > 0 && http1_on_query(parser, (char *)start, tmp - start))
410
- return -1;
411
- } else {
412
- tmp = start;
413
- if (!seek2ch(&tmp, end, ' '))
414
- return -1;
415
- if (http1_on_path(parser, (char *)start, tmp - start))
416
- return -1;
417
- }
418
- start_version:
419
- start = tmp + 1;
420
- if (start + 5 >= end) /* require "HTTP/" */
421
- return -1;
422
- if (http1_on_version(parser, (char *)start, end - start))
423
- return -1;
424
- /* */
425
- if (host_start && http1_on_header(parser, (char *)"host", 4,
426
- (char *)host_start, host_end - host_start))
427
- return -1;
428
- return 0;
429
- }
430
-
431
- #ifndef HTTP1_ALLOW_CHUNKED_IN_MIDDLE_OF_HEADER
432
- inline /* inline the function of it's short enough */
433
- #endif
434
- static int
435
- http1_consume_header_transfer_encoding(http1_parser_s *parser,
436
- uint8_t *start, uint8_t *end_name,
437
- uint8_t *start_value, uint8_t *end) {
438
- /* this removes the `chunked` marker and prepares to "unchunk" the data */
439
- while (start_value < end && (end[-1] == ',' || end[-1] == ' '))
440
- --end;
441
- if ((end - start_value) == 7 &&
442
- #if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
443
- (((uint32_t *)(start_value))[0] | 0x20202020) ==
444
- ((uint32_t *)"chun")[0] &&
445
- (((uint32_t *)(start_value + 3))[0] | 0x20202020) ==
446
- ((uint32_t *)"nked")[0]
447
- #else
448
- ((start_value[0] | 32) == 'c' && (start_value[1] | 32) == 'h' &&
449
- (start_value[2] | 32) == 'u' && (start_value[3] | 32) == 'n' &&
450
- (start_value[4] | 32) == 'k' && (start_value[5] | 32) == 'e' &&
451
- (start_value[6] | 32) == 'd')
452
- #endif
453
- ) {
454
- /* simple case,only `chunked` as a value */
455
- parser->state.reserved |= HTTP1_P_FLAG_CHUNKED;
456
- parser->state.content_length = 0;
457
- start_value += 7;
458
- while (start_value < end && (*start_value == ',' || *start_value == ' '))
459
- ++start_value;
460
- if (!(end - start_value))
461
- return 0;
462
- } else if ((end - start_value) > 7 &&
463
- ((end[(-7 + 0)] | 32) == 'c' && (end[(-7 + 1)] | 32) == 'h' &&
464
- (end[(-7 + 2)] | 32) == 'u' && (end[(-7 + 3)] | 32) == 'n' &&
465
- (end[(-7 + 4)] | 32) == 'k' && (end[(-7 + 5)] | 32) == 'e' &&
466
- (end[(-7 + 6)] | 32) == 'd')) {
467
- /* simple case,`chunked` at the end of list (RFC required) */
468
- parser->state.reserved |= HTTP1_P_FLAG_CHUNKED;
469
- parser->state.content_length = 0;
470
- end -= 7;
471
- while (start_value < end && (end[-1] == ',' || end[-1] == ' '))
472
- --end;
473
- if (!(end - start_value))
474
- return 0;
475
- }
476
- #ifdef HTTP1_ALLOW_CHUNKED_IN_MIDDLE_OF_HEADER /* RFC diisallows this */
477
- else if ((end - start_value) > 7 && (end - start_value) < 256) {
478
- /* complex case, `the, chunked, marker, is in the middle of list */
479
- uint8_t val[256];
480
- size_t val_len = 0;
481
- while (start_value < end && val_len < 256) {
482
- if ((end - start_value) >= 7) {
483
- if (
484
- #if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
485
- (((uint32_t *)(start_value))[0] | 0x20202020) ==
486
- ((uint32_t *)"chun")[0] &&
487
- (((uint32_t *)(start_value + 3))[0] | 0x20202020) ==
488
- ((uint32_t *)"nked")[0]
489
- #else
490
- ((start_value[0] | 32) == 'c' && (start_value[1] | 32) == 'h' &&
491
- (start_value[2] | 32) == 'u' && (start_value[3] | 32) == 'n' &&
492
- (start_value[4] | 32) == 'k' && (start_value[5] | 32) == 'e' &&
493
- (start_value[6] | 32) == 'd')
494
- #endif
495
-
496
- ) {
497
- parser->state.reserved |= HTTP1_P_FLAG_CHUNKED;
498
- parser->state.content_length = 0;
499
- start_value += 7;
500
- /* skip comma / white space */
501
- while (start_value < end &&
502
- (*start_value == ',' || *start_value == ' '))
503
- ++start_value;
504
- continue;
505
- }
506
- }
507
- /* copy value */
508
- while (start_value < end && val_len < 256 && start_value[0] != ',') {
509
- val[val_len++] = *start_value;
510
- ++start_value;
511
- }
512
- /* copy comma */
513
- if (start_value[0] == ',' && val_len < 256) {
514
- val[val_len++] = *start_value;
515
- ++start_value;
516
- }
517
- /* skip spaces */
518
- while (start_value < end && start_value[0] == ' ') {
519
- ++start_value;
520
- }
521
- }
522
- if (val_len < 256) {
523
- while (start_value < end && val_len < 256) {
524
- val[val_len++] = *start_value;
525
- ++start_value;
526
- }
527
- val[val_len] = 0;
528
- }
529
- /* perform callback with `val` or indicate error */
530
- if (val_len == 256 ||
531
- (val_len && http1_on_header(parser, (char *)start, (end_name - start),
532
- (char *)val, val_len)))
533
- return -1;
534
- return 0;
535
- }
536
- #endif /* HTTP1_ALLOW_CHUNKED_IN_MIDDLE_OF_HEADER */
537
- /* perform callback */
538
- if (http1_on_header(parser, (char *)start, (end_name - start),
539
- (char *)start_value, end - start_value))
540
- return -1;
541
- return 0;
542
- }
543
- inline static int http1_consume_header_top(http1_parser_s *parser,
544
- uint8_t *start, uint8_t *end_name,
545
- uint8_t *start_value, uint8_t *end) {
546
- if ((end_name - start) == 14 &&
547
- #if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED && HTTP_HEADERS_LOWERCASE
548
- *((uint64_t *)start) == *((uint64_t *)"content-") &&
549
- *((uint64_t *)(start + 6)) == *((uint64_t *)"t-length")
550
- #else
551
- HEADER_NAME_IS_EQ((char *)start, "content-length", 14)
552
- #endif
553
- ) {
554
- /* handle the special `content-length` header */
555
- if ((parser->state.reserved & HTTP1_P_FLAG_CHUNKED))
556
- return 0; /* ignore if `chunked` */
557
- long long old_clen = parser->state.content_length;
558
- parser->state.content_length = http1_atol(start_value, NULL);
559
- if ((parser->state.reserved & HTTP1_P_FLAG_CLENGTH) &&
560
- old_clen != parser->state.content_length) {
561
- /* content-length header repeated with conflict */
562
- return -1;
563
- }
564
- parser->state.reserved |= HTTP1_P_FLAG_CLENGTH;
565
- } else if ((end_name - start) == 17 && (end - start_value) >= 7 &&
566
- !parser->state.content_length &&
567
- #if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED && HTTP_HEADERS_LOWERCASE
568
- *((uint64_t *)start) == *((uint64_t *)"transfer") &&
569
- *((uint64_t *)(start + 8)) == *((uint64_t *)"-encodin")
570
- #else
571
- HEADER_NAME_IS_EQ((char *)start, "transfer-encoding", 17)
572
- #endif
573
- ) {
574
- /* handle the special `transfer-encoding: chunked` header */
575
- return http1_consume_header_transfer_encoding(parser, start, end_name,
576
- start_value, end);
577
- }
578
- /* perform callback */
579
- if (http1_on_header(parser, (char *)start, (end_name - start),
580
- (char *)start_value, end - start_value))
581
- return -1;
582
- return 0;
583
- }
584
-
585
- inline static int http1_consume_header_trailer(http1_parser_s *parser,
586
- uint8_t *start,
587
- uint8_t *end_name,
588
- uint8_t *start_value,
589
- uint8_t *end) {
590
- if ((end_name - start) > 1 && start[0] == 'x') {
591
- /* X- headers are allowed */
592
- goto white_listed;
593
- }
594
-
595
- /* white listed trailer names */
596
- const struct {
597
- char *name;
598
- long len;
599
- } http1_trailer_white_list[] = {
600
- {"server-timing", 13}, /* specific for client data... */
601
- {NULL, 0}, /* end of list marker */
602
- };
603
- for (size_t i = 0; http1_trailer_white_list[i].name; ++i) {
604
- if ((long)(end_name - start) == http1_trailer_white_list[i].len &&
605
- HEADER_NAME_IS_EQ((char *)start, http1_trailer_white_list[i].name,
606
- http1_trailer_white_list[i].len)) {
607
- /* header disallowed here */
608
- goto white_listed;
609
- }
610
- }
611
- return 0;
612
- white_listed:
613
- /* perform callback */
614
- if (http1_on_header(parser, (char *)start, (end_name - start),
615
- (char *)start_value, end - start_value))
616
- return -1;
617
- return 0;
618
- }
619
-
620
- inline static int http1_consume_header(http1_parser_s *parser, uint8_t *start,
621
- uint8_t *end) {
622
- uint8_t *end_name = start;
623
- /* divide header name from data */
624
- if (!seek2ch(&end_name, end, ':'))
625
- return -1;
626
- if (end_name[-1] == ' ' || end_name[-1] == '\t')
627
- return -1;
628
- #if HTTP_HEADERS_LOWERCASE
629
- for (uint8_t *t = start; t < end_name; t++) {
630
- *t = http_tolower(*t);
631
- }
632
- #endif
633
- uint8_t *start_value = end_name + 1;
634
- // clear away leading white space from value.
635
- while (start_value < end &&
636
- (start_value[0] == ' ' || start_value[0] == '\t')) {
637
- start_value++;
638
- };
639
- return (parser->state.read ? http1_consume_header_trailer
640
- : http1_consume_header_top)(
641
- parser, start, end_name, start_value, end);
642
- }
643
-
644
- /* *****************************************************************************
645
- HTTP/1.1 Body handling
646
- ***************************************************************************** */
647
-
648
- inline static int http1_consume_body_streamed(http1_parser_s *parser,
649
- void *buffer, size_t length,
650
- uint8_t **start) {
651
- uint8_t *end = *start + parser->state.content_length - parser->state.read;
652
- uint8_t *const stop = ((uint8_t *)buffer) + length;
653
- if (end > stop)
654
- end = stop;
655
- if (end > *start &&
656
- http1_on_body_chunk(parser, (char *)(*start), end - *start))
657
- return -1;
658
- parser->state.read += (end - *start);
659
- *start = end;
660
- if (parser->state.content_length <= parser->state.read)
661
- parser->state.reserved |= HTTP1_P_FLAG_COMPLETE;
662
- return 0;
663
- }
664
-
665
- inline static int http1_consume_body_chunked(http1_parser_s *parser,
666
- void *buffer, size_t length,
667
- uint8_t **start) {
668
- uint8_t *const stop = ((uint8_t *)buffer) + length;
669
- uint8_t *end = *start;
670
- while (*start < stop) {
671
- if (parser->state.content_length == 0) {
672
- if (end + 2 >= stop)
673
- return 0;
674
- if ((end[0] == '\r' && end[1] == '\n')) {
675
- /* remove tailing EOL that wasn't processed and retest */
676
- end += 2;
677
- *start = end;
678
- if (end + 2 >= stop)
679
- return 0;
680
- }
681
- long long chunk_len = http1_atol16(end, (const uint8_t **)&end);
682
- if (end + 2 > stop) /* overflowed? */
683
- return 0;
684
- if ((end[0] != '\r' || end[1] != '\n'))
685
- return -1; /* required EOL after content length */
686
- end += 2;
687
-
688
- parser->state.content_length = 0 - chunk_len;
689
- *start = end;
690
- if (parser->state.content_length == 0) {
691
- /* all chunked data was parsed */
692
- /* update content-length */
693
- parser->state.content_length = parser->state.read;
694
- #ifdef HTTP_ADD_CONTENT_LENGTH_HEADER_IF_MISSING
695
- { /* add virtual header ... ? */
696
- char buf[512];
697
- size_t buf_len = 512;
698
- size_t tmp_len = parser->state.read;
699
- buf[--buf_len] = 0;
700
- while (tmp_len) {
701
- size_t mod = tmp_len / 10;
702
- buf[--buf_len] = '0' + (tmp_len - (mod * 10));
703
- tmp_len = mod;
704
- }
705
- if (!(parser->state.reserved & HTTP1_P_FLAG_CLENGTH) &&
706
- http1_on_header(parser, "content-length", 14,
707
- (char *)buf + buf_len, 511 - buf_len)) {
708
- return -1;
709
- }
710
- }
711
- #endif
712
- /* FIXME: consume trailing EOL */
713
- if (*start + 2 <= stop && (start[0][0] == '\r' || start[0][0] == '\n'))
714
- *start += 1 + (start[0][1] == '\r' || start[0][1] == '\n');
715
- else {
716
- /* remove the "headers complete" and "trailer" flags */
717
- parser->state.reserved =
718
- HTTP1_P_FLAG_STATUS_LINE | HTTP1_P_FLAG_CLENGTH;
719
- return -2;
720
- }
721
- /* the parsing complete flag */
722
- parser->state.reserved |= HTTP1_P_FLAG_COMPLETE;
723
- return 0;
724
- }
725
- }
726
- end = *start + (0 - parser->state.content_length);
727
- if (end > stop)
728
- end = stop;
729
- if (end > *start &&
730
- http1_on_body_chunk(parser, (char *)(*start), end - *start)) {
731
- return -1;
732
- }
733
- parser->state.read += (end - *start);
734
- parser->state.content_length += (end - *start);
735
- *start = end;
736
- }
737
- return 0;
738
- }
739
-
740
- inline static int http1_consume_body(http1_parser_s *parser, void *buffer,
741
- size_t length, uint8_t **start) {
742
- if (parser->state.content_length > 0 &&
743
- parser->state.content_length > parser->state.read) {
744
- /* normal, streamed data */
745
- return http1_consume_body_streamed(parser, buffer, length, start);
746
- } else if (parser->state.content_length <= 0 &&
747
- (parser->state.reserved & HTTP1_P_FLAG_CHUNKED)) {
748
- /* chuncked encoding */
749
- return http1_consume_body_chunked(parser, buffer, length, start);
750
- } else {
751
- /* nothing to do - parsing complete */
752
- parser->state.reserved |= HTTP1_P_FLAG_COMPLETE;
753
- }
754
- return 0;
755
- }
756
-
757
- /* *****************************************************************************
758
- HTTP/1.1 parsre function
759
- ***************************************************************************** */
760
- #if DEBUG
761
- #include <assert.h>
762
- #define HTTP1_ASSERT assert
763
- #else
764
- #define HTTP1_ASSERT(...)
765
- #endif
766
-
767
- /**
768
- * Returns the amount of data actually consumed by the parser.
769
- *
770
- * The value 0 indicates there wasn't enough data to be parsed and the same
771
- * buffer (with more data) should be resubmitted.
772
- *
773
- * A value smaller than the buffer size indicates that EITHER a request /
774
- * response was detected OR that the leftover could not be consumed because more
775
- * data was required.
776
- *
777
- * Simply resubmit the reminder of the data to continue parsing.
778
- *
779
- * A request / response callback automatically stops the parsing process,
780
- * allowing the user to adjust or refresh the state of the data.
781
- */
782
- static size_t http1_parse(http1_parser_s *parser, void *buffer, size_t length) {
783
- if (!length)
784
- return 0;
785
- HTTP1_ASSERT(parser && buffer);
786
- parser->state.next = NULL;
787
- uint8_t *start = (uint8_t *)buffer;
788
- uint8_t *end = start;
789
- uint8_t *const stop = start + length;
790
- uint8_t eol_len = 0;
791
- #define HTTP1_CONSUMED ((size_t)((uintptr_t)start - (uintptr_t)buffer))
792
-
793
- re_eval:
794
- switch ((parser->state.reserved & 7)) {
795
-
796
- case 0: /* request / response line */
797
- /* clear out any leading white space */
798
- while ((start < stop) &&
799
- (*start == '\r' || *start == '\n' || *start == ' ' || *start == 0)) {
800
- ++start;
801
- }
802
- end = start;
803
- /* make sure the whole line is available*/
804
- if (!(eol_len = seek2eol(&end, stop)))
805
- return HTTP1_CONSUMED;
806
-
807
- if (start[0] == 'H' && start[1] == 'T' && start[2] == 'T' &&
808
- start[3] == 'P') {
809
- /* HTTP response */
810
- if (http1_consume_response_line(parser, start, end - eol_len + 1))
811
- goto error;
812
- } else if (http_tolower(start[0]) >= 'a' && http_tolower(start[0]) <= 'z') {
813
- /* HTTP request */
814
- if (http1_consume_request_line(parser, start, end - eol_len + 1))
815
- goto error;
816
- } else
817
- goto error;
818
- end = start = end + 1;
819
- parser->state.reserved |= HTTP1_P_FLAG_STATUS_LINE;
820
-
821
- /* fallthrough */
822
- case 1: /* headers */
823
- do {
824
- if (start >= stop)
825
- return HTTP1_CONSUMED; /* buffer ended on header line */
826
- if (*start == '\r' || *start == '\n') {
827
- goto finished_headers; /* empty line, end of headers */
828
- }
829
- end = start;
830
- if (!(eol_len = seek2eol(&end, stop)))
831
- return HTTP1_CONSUMED;
832
- if (http1_consume_header(parser, start, end - eol_len + 1))
833
- goto error;
834
- end = start = end + 1;
835
- } while ((parser->state.reserved & HTTP1_P_FLAG_HEADER_COMPLETE) == 0);
836
- finished_headers:
837
- ++start;
838
- if (*start == '\n')
839
- ++start;
840
- end = start;
841
- parser->state.reserved |= HTTP1_P_FLAG_HEADER_COMPLETE;
842
- /* fallthrough */
843
- case (HTTP1_P_FLAG_HEADER_COMPLETE | HTTP1_P_FLAG_STATUS_LINE):
844
- /* request body */
845
- {
846
- int t3 = http1_consume_body(parser, buffer, length, &start);
847
- switch (t3) {
848
- case -1:
849
- goto error;
850
- case -2:
851
- goto re_eval;
852
- }
853
- break;
854
- }
855
- }
856
- /* are we done ? */
857
- if (parser->state.reserved & HTTP1_P_FLAG_COMPLETE) {
858
- parser->state.next = start;
859
- if (((parser->state.reserved & HTTP1_P_FLAG_RESPONSE)
860
- ? http1_on_response
861
- : http1_on_request)(parser))
862
- goto error;
863
- parser->state = (struct http1_parser_protected_read_only_state_s){0};
864
- }
865
- return HTTP1_CONSUMED;
866
- error:
867
- http1_on_error(parser);
868
- parser->state = (struct http1_parser_protected_read_only_state_s){0};
869
- return length;
870
- #undef HTTP1_CONSUMED
871
- }
872
-
873
- #endif
1
+ #ifndef H_HTTP1_PARSER_H
2
+ /*
3
+ Copyright: Boaz Segev, 2017-2020
4
+ License: MIT
5
+
6
+ Feel free to copy, use and enjoy according to the license provided.
7
+ */
8
+
9
+ /**
10
+ This is a callback based parser. It parses the skeleton of the HTTP/1.x protocol
11
+ and leaves most of the work (validation, error checks, etc') to the callbacks.
12
+ */
13
+ #define H_HTTP1_PARSER_H
14
+ #include <stddef.h>
15
+ #include <stdint.h>
16
+ #include <stdio.h>
17
+ #include <stdlib.h>
18
+ #include <string.h>
19
+ #include <sys/types.h>
20
+
21
+ /* *****************************************************************************
22
+ Parser Settings
23
+ ***************************************************************************** */
24
+
25
+ #ifndef HTTP_HEADERS_LOWERCASE
26
+ /**
27
+ * When defined, HTTP headers will be converted to lowercase and header
28
+ * searches will be case sensitive.
29
+ *
30
+ * This is highly recommended, required by facil.io and helps with HTTP/2
31
+ * compatibility.
32
+ */
33
+ #define HTTP_HEADERS_LOWERCASE 1
34
+ #endif
35
+
36
+ #ifndef HTTP_ADD_CONTENT_LENGTH_HEADER_IF_MISSING
37
+ #define HTTP_ADD_CONTENT_LENGTH_HEADER_IF_MISSING 1
38
+ #endif
39
+
40
+ #ifndef FIO_MEMCHAR
41
+ /** Prefer a custom memchr implementation. Usualy memchr is better. */
42
+ #define FIO_MEMCHAR 0
43
+ #endif
44
+
45
+ #ifndef ALLOW_UNALIGNED_MEMORY_ACCESS
46
+ /** Peforms some optimizations assuming unaligned memory access is okay. */
47
+ #define ALLOW_UNALIGNED_MEMORY_ACCESS 0
48
+ #endif
49
+
50
+ #ifndef HTTP1_PARSER_CONVERT_EOL2NUL
51
+ #define HTTP1_PARSER_CONVERT_EOL2NUL 0
52
+ #endif
53
+
54
+ /* *****************************************************************************
55
+ Parser API
56
+ ***************************************************************************** */
57
+
58
+ /** this struct contains the state of the parser. */
59
+ typedef struct http1_parser_s {
60
+ struct http1_parser_protected_read_only_state_s {
61
+ long long content_length; /* negative values indicate chuncked data state */
62
+ ssize_t read; /* total number of bytes read so far (body only) */
63
+ uint8_t *next; /* the known position for the end of request/response */
64
+ uint8_t reserved; /* for internal use */
65
+ } state;
66
+ } http1_parser_s;
67
+
68
+ #define HTTP1_PARSER_INIT \
69
+ { \
70
+ { 0 } \
71
+ }
72
+
73
+ /**
74
+ * Returns the amount of data actually consumed by the parser.
75
+ *
76
+ * The value 0 indicates there wasn't enough data to be parsed and the same
77
+ * buffer (with more data) should be resubmitted.
78
+ *
79
+ * A value smaller than the buffer size indicates that EITHER a request /
80
+ * response was detected OR that the leftover could not be consumed because more
81
+ * data was required.
82
+ *
83
+ * Simply resubmit the reminder of the data to continue parsing.
84
+ *
85
+ * A request / response callback automatically stops the parsing process,
86
+ * allowing the user to adjust or refresh the state of the data.
87
+ */
88
+ static size_t http1_parse(http1_parser_s *parser, void *buffer, size_t length);
89
+
90
+ /* *****************************************************************************
91
+ Required Callbacks (MUST be implemented by including file)
92
+ ***************************************************************************** */
93
+
94
+ /** called when a request was received. */
95
+ static int http1_on_request(http1_parser_s *parser);
96
+ /** called when a response was received. */
97
+ static int http1_on_response(http1_parser_s *parser);
98
+ /** called when a request method is parsed. */
99
+ static int http1_on_method(http1_parser_s *parser, char *method,
100
+ size_t method_len);
101
+ /** called when a response status is parsed. the status_str is the string
102
+ * without the prefixed numerical status indicator.*/
103
+ static int http1_on_status(http1_parser_s *parser, size_t status,
104
+ char *status_str, size_t len);
105
+ /** called when a request path (excluding query) is parsed. */
106
+ static int http1_on_path(http1_parser_s *parser, char *path, size_t path_len);
107
+ /** called when a request path (excluding query) is parsed. */
108
+ static int http1_on_query(http1_parser_s *parser, char *query,
109
+ size_t query_len);
110
+ /** called when a the HTTP/1.x version is parsed. */
111
+ static int http1_on_version(http1_parser_s *parser, char *version, size_t len);
112
+ /** called when a header is parsed. */
113
+ static int http1_on_header(http1_parser_s *parser, char *name, size_t name_len,
114
+ char *data, size_t data_len);
115
+ /** called when a body chunk is parsed. */
116
+ static int http1_on_body_chunk(http1_parser_s *parser, char *data,
117
+ size_t data_len);
118
+ /** called when a protocol error occurred. */
119
+ static int http1_on_error(http1_parser_s *parser);
120
+
121
+ /* *****************************************************************************
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+ Implementation Details
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+ ***************************************************************************** */
158
+
159
+ #if HTTP_HEADERS_LOWERCASE
160
+ #define HEADER_NAME_IS_EQ(var_name, const_name, len) \
161
+ (!memcmp((var_name), (const_name), (len)))
162
+ #else
163
+ #define HEADER_NAME_IS_EQ(var_name, const_name, len) \
164
+ (!strncasecmp((var_name), (const_name), (len)))
165
+ #endif
166
+
167
+ #define HTTP1_P_FLAG_STATUS_LINE 1
168
+ #define HTTP1_P_FLAG_HEADER_COMPLETE 2
169
+ #define HTTP1_P_FLAG_COMPLETE 4
170
+ #define HTTP1_P_FLAG_CLENGTH 8
171
+ #define HTTP1_PARSER_BIT_16 16
172
+ #define HTTP1_PARSER_BIT_32 32
173
+ #define HTTP1_P_FLAG_CHUNKED 64
174
+ #define HTTP1_P_FLAG_RESPONSE 128
175
+
176
+ /* *****************************************************************************
177
+ Seeking for characters in a string
178
+ ***************************************************************************** */
179
+
180
+ #if FIO_MEMCHAR
181
+
182
+ /**
183
+ * This seems to be faster on some systems, especially for smaller distances.
184
+ *
185
+ * On newer systems, `memchr` should be faster.
186
+ */
187
+ static int seek2ch(uint8_t **buffer, register uint8_t *const limit,
188
+ const uint8_t c) {
189
+ if (*buffer >= limit)
190
+ return 0;
191
+ if (**buffer == c) {
192
+ return 1;
193
+ }
194
+
195
+ #if !HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
196
+ /* too short for this mess */
197
+ if ((uintptr_t)limit <= 16 + ((uintptr_t)*buffer & (~(uintptr_t)7)))
198
+ goto finish;
199
+
200
+ /* align memory */
201
+ {
202
+ const uint8_t *alignment =
203
+ (uint8_t *)(((uintptr_t)(*buffer) & (~(uintptr_t)7)) + 8);
204
+ if (*buffer < alignment)
205
+ *buffer += 1; /* we already tested this char */
206
+ if (limit >= alignment) {
207
+ while (*buffer < alignment) {
208
+ if (**buffer == c) {
209
+ return 1;
210
+ }
211
+ *buffer += 1;
212
+ }
213
+ }
214
+ }
215
+ const uint8_t *limit64 = (uint8_t *)((uintptr_t)limit & (~(uintptr_t)7));
216
+ #else
217
+ const uint8_t *limit64 = (uint8_t *)limit - 7;
218
+ #endif
219
+ uint64_t wanted1 = 0x0101010101010101ULL * c;
220
+ for (; *buffer < limit64; *buffer += 8) {
221
+ const uint64_t eq1 = ~((*((uint64_t *)*buffer)) ^ wanted1);
222
+ const uint64_t t0 = (eq1 & 0x7f7f7f7f7f7f7f7fllu) + 0x0101010101010101llu;
223
+ const uint64_t t1 = (eq1 & 0x8080808080808080llu);
224
+ if ((t0 & t1)) {
225
+ break;
226
+ }
227
+ }
228
+ #if !HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
229
+ finish:
230
+ #endif
231
+ while (*buffer < limit) {
232
+ if (**buffer == c) {
233
+ return 1;
234
+ }
235
+ (*buffer)++;
236
+ }
237
+ return 0;
238
+ }
239
+
240
+ #else
241
+
242
+ /* a helper that seeks any char, converts it to NUL and returns 1 if found. */
243
+ inline static uint8_t seek2ch(uint8_t **pos, uint8_t *const limit, uint8_t ch) {
244
+ /* This is library based alternative that is sometimes slower */
245
+ if (*pos >= limit)
246
+ return 0;
247
+ if (**pos == ch) {
248
+ return 1;
249
+ }
250
+ uint8_t *tmp = memchr(*pos, ch, limit - (*pos));
251
+ if (tmp) {
252
+ *pos = tmp;
253
+ return 1;
254
+ }
255
+ *pos = limit;
256
+ return 0;
257
+ }
258
+
259
+ #endif
260
+
261
+ /* a helper that seeks the EOL, converts it to NUL and returns it's length */
262
+ inline static uint8_t seek2eol(uint8_t **pos, uint8_t *const limit) {
263
+ /* single char lookup using memchr might be better when target is far... */
264
+ if (!seek2ch(pos, limit, '\n'))
265
+ return 0;
266
+ if ((*pos)[-1] == '\r') {
267
+ #if HTTP1_PARSER_CONVERT_EOL2NUL
268
+ (*pos)[-1] = (*pos)[0] = 0;
269
+ #endif
270
+ return 2;
271
+ }
272
+ #if HTTP1_PARSER_CONVERT_EOL2NUL
273
+ (*pos)[0] = 0;
274
+ #endif
275
+ return 1;
276
+ }
277
+
278
+ /* *****************************************************************************
279
+ Change a letter to lower case (latin only)
280
+ ***************************************************************************** */
281
+
282
+ static uint8_t http_tolower(uint8_t c) {
283
+ if (c >= 'A' && c <= 'Z')
284
+ c |= 32;
285
+ return c;
286
+ }
287
+
288
+ /* *****************************************************************************
289
+ String to Number
290
+ ***************************************************************************** */
291
+
292
+ /** Converts a String to a number using base 10 */
293
+ static long long http1_atol(const uint8_t *buf, const uint8_t **end) {
294
+ register unsigned long long i = 0;
295
+ uint8_t inv = 0;
296
+ while (*buf == ' ' || *buf == '\t' || *buf == '\f')
297
+ ++buf;
298
+ while (*buf == '-' || *buf == '+')
299
+ inv ^= (*(buf++) == '-');
300
+ while (i <= ((((~0ULL) >> 1) / 10)) && *buf >= '0' && *buf <= '9') {
301
+ i = i * 10;
302
+ i += *buf - '0';
303
+ ++buf;
304
+ }
305
+ /* test for overflow */
306
+ if (i >= (~((~0ULL) >> 1)) || (*buf >= '0' && *buf <= '9'))
307
+ i = (~0ULL >> 1);
308
+ if (inv)
309
+ i = 0ULL - i;
310
+ if (end)
311
+ *end = buf;
312
+ return i;
313
+ }
314
+
315
+ /** Converts a String to a number using base 16, overflow limited to 113bytes */
316
+ static long long http1_atol16(const uint8_t *buf, const uint8_t **end) {
317
+ register unsigned long long i = 0;
318
+ uint8_t inv = 0;
319
+ for (int limit_ = 0;
320
+ (*buf == ' ' || *buf == '\t' || *buf == '\f') && limit_ < 32; ++limit_)
321
+ ++buf;
322
+ for (int limit_ = 0; (*buf == '-' || *buf == '+') && limit_ < 32; ++limit_)
323
+ inv ^= (*(buf++) == '-');
324
+ if (*buf == '0')
325
+ ++buf;
326
+ if ((*buf | 32) == 'x')
327
+ ++buf;
328
+ for (int limit_ = 0; (*buf == '0') && limit_ < 32; ++limit_)
329
+ ++buf;
330
+ while (!(i & (~((~(0ULL)) >> 4)))) {
331
+ if (*buf >= '0' && *buf <= '9') {
332
+ i <<= 4;
333
+ i |= *buf - '0';
334
+ } else if ((*buf | 32) >= 'a' && (*buf | 32) <= 'f') {
335
+ i <<= 4;
336
+ i |= (*buf | 32) - ('a' - 10);
337
+ } else
338
+ break;
339
+ ++buf;
340
+ }
341
+ if (inv)
342
+ i = 0ULL - i;
343
+ if (end)
344
+ *end = buf;
345
+ return i;
346
+ }
347
+
348
+ /* *****************************************************************************
349
+ HTTP/1.1 parsre stages
350
+ ***************************************************************************** */
351
+
352
+ inline static int http1_consume_response_line(http1_parser_s *parser,
353
+ uint8_t *start, uint8_t *end) {
354
+ parser->state.reserved |= HTTP1_P_FLAG_RESPONSE;
355
+ uint8_t *tmp = start;
356
+ if (!seek2ch(&tmp, end, ' '))
357
+ return -1;
358
+ if (http1_on_version(parser, (char *)start, tmp - start))
359
+ return -1;
360
+ tmp = start = tmp + 1;
361
+ if (!seek2ch(&tmp, end, ' '))
362
+ return -1;
363
+ if (http1_on_status(parser, http1_atol(start, NULL), (char *)(tmp + 1),
364
+ end - tmp))
365
+ return -1;
366
+ return 0;
367
+ }
368
+
369
+ inline static int http1_consume_request_line(http1_parser_s *parser,
370
+ uint8_t *start, uint8_t *end) {
371
+ uint8_t *tmp = start;
372
+ uint8_t *host_start = NULL;
373
+ uint8_t *host_end = NULL;
374
+ if (!seek2ch(&tmp, end, ' '))
375
+ return -1;
376
+ if (http1_on_method(parser, (char *)start, tmp - start))
377
+ return -1;
378
+ tmp = start = tmp + 1;
379
+ if (start[0] == 'h' && start[1] == 't' && start[2] == 't' &&
380
+ start[3] == 'p') {
381
+ if (start[4] == ':' && start[5] == '/' && start[6] == '/') {
382
+ /* Request URI is in long form... emulate Host header instead. */
383
+ tmp = host_end = host_start = (start += 7);
384
+ } else if (start[4] == 's' && start[5] == ':' && start[6] == '/' &&
385
+ start[7] == '/') {
386
+ /* Secure request is in long form... emulate Host header instead. */
387
+ tmp = host_end = host_start = (start += 8);
388
+ } else
389
+ goto review_path;
390
+ if (!seek2ch(&tmp, end, ' '))
391
+ return -1;
392
+ *tmp = ' ';
393
+ if (!seek2ch(&host_end, tmp, '/')) {
394
+ if (http1_on_path(parser, (char *)"/", 1))
395
+ return -1;
396
+ goto start_version;
397
+ }
398
+ host_end[0] = '/';
399
+ start = host_end;
400
+ }
401
+ review_path:
402
+ tmp = start;
403
+ if (seek2ch(&tmp, end, '?')) {
404
+ if (http1_on_path(parser, (char *)start, tmp - start))
405
+ return -1;
406
+ tmp = start = tmp + 1;
407
+ if (!seek2ch(&tmp, end, ' '))
408
+ return -1;
409
+ if (tmp - start > 0 && http1_on_query(parser, (char *)start, tmp - start))
410
+ return -1;
411
+ } else {
412
+ tmp = start;
413
+ if (!seek2ch(&tmp, end, ' '))
414
+ return -1;
415
+ if (http1_on_path(parser, (char *)start, tmp - start))
416
+ return -1;
417
+ }
418
+ start_version:
419
+ start = tmp + 1;
420
+ if (start + 5 >= end) /* require "HTTP/" */
421
+ return -1;
422
+ if (http1_on_version(parser, (char *)start, end - start))
423
+ return -1;
424
+ /* */
425
+ if (host_start && http1_on_header(parser, (char *)"host", 4,
426
+ (char *)host_start, host_end - host_start))
427
+ return -1;
428
+ return 0;
429
+ }
430
+
431
+ #ifndef HTTP1_ALLOW_CHUNKED_IN_MIDDLE_OF_HEADER
432
+ inline /* inline the function of it's short enough */
433
+ #endif
434
+ static int
435
+ http1_consume_header_transfer_encoding(http1_parser_s *parser,
436
+ uint8_t *start, uint8_t *end_name,
437
+ uint8_t *start_value, uint8_t *end) {
438
+ /* this removes the `chunked` marker and prepares to "unchunk" the data */
439
+ while (start_value < end && (end[-1] == ',' || end[-1] == ' '))
440
+ --end;
441
+ if ((end - start_value) == 7 &&
442
+ #if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
443
+ (((uint32_t *)(start_value))[0] | 0x20202020) ==
444
+ ((uint32_t *)"chun")[0] &&
445
+ (((uint32_t *)(start_value + 3))[0] | 0x20202020) ==
446
+ ((uint32_t *)"nked")[0]
447
+ #else
448
+ ((start_value[0] | 32) == 'c' && (start_value[1] | 32) == 'h' &&
449
+ (start_value[2] | 32) == 'u' && (start_value[3] | 32) == 'n' &&
450
+ (start_value[4] | 32) == 'k' && (start_value[5] | 32) == 'e' &&
451
+ (start_value[6] | 32) == 'd')
452
+ #endif
453
+ ) {
454
+ /* simple case,only `chunked` as a value */
455
+ parser->state.reserved |= HTTP1_P_FLAG_CHUNKED;
456
+ parser->state.content_length = 0;
457
+ start_value += 7;
458
+ while (start_value < end && (*start_value == ',' || *start_value == ' '))
459
+ ++start_value;
460
+ if (!(end - start_value))
461
+ return 0;
462
+ } else if ((end - start_value) > 7 &&
463
+ ((end[(-7 + 0)] | 32) == 'c' && (end[(-7 + 1)] | 32) == 'h' &&
464
+ (end[(-7 + 2)] | 32) == 'u' && (end[(-7 + 3)] | 32) == 'n' &&
465
+ (end[(-7 + 4)] | 32) == 'k' && (end[(-7 + 5)] | 32) == 'e' &&
466
+ (end[(-7 + 6)] | 32) == 'd')) {
467
+ /* simple case,`chunked` at the end of list (RFC required) */
468
+ parser->state.reserved |= HTTP1_P_FLAG_CHUNKED;
469
+ parser->state.content_length = 0;
470
+ end -= 7;
471
+ while (start_value < end && (end[-1] == ',' || end[-1] == ' '))
472
+ --end;
473
+ if (!(end - start_value))
474
+ return 0;
475
+ }
476
+ #ifdef HTTP1_ALLOW_CHUNKED_IN_MIDDLE_OF_HEADER /* RFC diisallows this */
477
+ else if ((end - start_value) > 7 && (end - start_value) < 256) {
478
+ /* complex case, `the, chunked, marker, is in the middle of list */
479
+ uint8_t val[256];
480
+ size_t val_len = 0;
481
+ while (start_value < end && val_len < 256) {
482
+ if ((end - start_value) >= 7) {
483
+ if (
484
+ #if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
485
+ (((uint32_t *)(start_value))[0] | 0x20202020) ==
486
+ ((uint32_t *)"chun")[0] &&
487
+ (((uint32_t *)(start_value + 3))[0] | 0x20202020) ==
488
+ ((uint32_t *)"nked")[0]
489
+ #else
490
+ ((start_value[0] | 32) == 'c' && (start_value[1] | 32) == 'h' &&
491
+ (start_value[2] | 32) == 'u' && (start_value[3] | 32) == 'n' &&
492
+ (start_value[4] | 32) == 'k' && (start_value[5] | 32) == 'e' &&
493
+ (start_value[6] | 32) == 'd')
494
+ #endif
495
+
496
+ ) {
497
+ parser->state.reserved |= HTTP1_P_FLAG_CHUNKED;
498
+ parser->state.content_length = 0;
499
+ start_value += 7;
500
+ /* skip comma / white space */
501
+ while (start_value < end &&
502
+ (*start_value == ',' || *start_value == ' '))
503
+ ++start_value;
504
+ continue;
505
+ }
506
+ }
507
+ /* copy value */
508
+ while (start_value < end && val_len < 256 && start_value[0] != ',') {
509
+ val[val_len++] = *start_value;
510
+ ++start_value;
511
+ }
512
+ /* copy comma */
513
+ if (start_value[0] == ',' && val_len < 256) {
514
+ val[val_len++] = *start_value;
515
+ ++start_value;
516
+ }
517
+ /* skip spaces */
518
+ while (start_value < end && start_value[0] == ' ') {
519
+ ++start_value;
520
+ }
521
+ }
522
+ if (val_len < 256) {
523
+ while (start_value < end && val_len < 256) {
524
+ val[val_len++] = *start_value;
525
+ ++start_value;
526
+ }
527
+ val[val_len] = 0;
528
+ }
529
+ /* perform callback with `val` or indicate error */
530
+ if (val_len == 256 ||
531
+ (val_len && http1_on_header(parser, (char *)start, (end_name - start),
532
+ (char *)val, val_len)))
533
+ return -1;
534
+ return 0;
535
+ }
536
+ #endif /* HTTP1_ALLOW_CHUNKED_IN_MIDDLE_OF_HEADER */
537
+ /* perform callback */
538
+ if (http1_on_header(parser, (char *)start, (end_name - start),
539
+ (char *)start_value, end - start_value))
540
+ return -1;
541
+ return 0;
542
+ }
543
+ inline static int http1_consume_header_top(http1_parser_s *parser,
544
+ uint8_t *start, uint8_t *end_name,
545
+ uint8_t *start_value, uint8_t *end) {
546
+ if ((end_name - start) == 14 &&
547
+ #if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED && HTTP_HEADERS_LOWERCASE
548
+ *((uint64_t *)start) == *((uint64_t *)"content-") &&
549
+ *((uint64_t *)(start + 6)) == *((uint64_t *)"t-length")
550
+ #else
551
+ HEADER_NAME_IS_EQ((char *)start, "content-length", 14)
552
+ #endif
553
+ ) {
554
+ /* handle the special `content-length` header */
555
+ if ((parser->state.reserved & HTTP1_P_FLAG_CHUNKED))
556
+ return 0; /* ignore if `chunked` */
557
+ long long old_clen = parser->state.content_length;
558
+ parser->state.content_length = http1_atol(start_value, NULL);
559
+ if ((parser->state.reserved & HTTP1_P_FLAG_CLENGTH) &&
560
+ old_clen != parser->state.content_length) {
561
+ /* content-length header repeated with conflict */
562
+ return -1;
563
+ }
564
+ parser->state.reserved |= HTTP1_P_FLAG_CLENGTH;
565
+ } else if ((end_name - start) == 17 && (end - start_value) >= 7 &&
566
+ !parser->state.content_length &&
567
+ #if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED && HTTP_HEADERS_LOWERCASE
568
+ *((uint64_t *)start) == *((uint64_t *)"transfer") &&
569
+ *((uint64_t *)(start + 8)) == *((uint64_t *)"-encodin")
570
+ #else
571
+ HEADER_NAME_IS_EQ((char *)start, "transfer-encoding", 17)
572
+ #endif
573
+ ) {
574
+ /* handle the special `transfer-encoding: chunked` header */
575
+ return http1_consume_header_transfer_encoding(parser, start, end_name,
576
+ start_value, end);
577
+ }
578
+ /* perform callback */
579
+ if (http1_on_header(parser, (char *)start, (end_name - start),
580
+ (char *)start_value, end - start_value))
581
+ return -1;
582
+ return 0;
583
+ }
584
+
585
+ inline static int http1_consume_header_trailer(http1_parser_s *parser,
586
+ uint8_t *start,
587
+ uint8_t *end_name,
588
+ uint8_t *start_value,
589
+ uint8_t *end) {
590
+ if ((end_name - start) > 1 && start[0] == 'x') {
591
+ /* X- headers are allowed */
592
+ goto white_listed;
593
+ }
594
+
595
+ /* white listed trailer names */
596
+ const struct {
597
+ char *name;
598
+ long len;
599
+ } http1_trailer_white_list[] = {
600
+ {"server-timing", 13}, /* specific for client data... */
601
+ {NULL, 0}, /* end of list marker */
602
+ };
603
+ for (size_t i = 0; http1_trailer_white_list[i].name; ++i) {
604
+ if ((long)(end_name - start) == http1_trailer_white_list[i].len &&
605
+ HEADER_NAME_IS_EQ((char *)start, http1_trailer_white_list[i].name,
606
+ http1_trailer_white_list[i].len)) {
607
+ /* header disallowed here */
608
+ goto white_listed;
609
+ }
610
+ }
611
+ return 0;
612
+ white_listed:
613
+ /* perform callback */
614
+ if (http1_on_header(parser, (char *)start, (end_name - start),
615
+ (char *)start_value, end - start_value))
616
+ return -1;
617
+ return 0;
618
+ }
619
+
620
+ inline static int http1_consume_header(http1_parser_s *parser, uint8_t *start,
621
+ uint8_t *end) {
622
+ uint8_t *end_name = start;
623
+ /* divide header name from data */
624
+ if (!seek2ch(&end_name, end, ':'))
625
+ return -1;
626
+ if (end_name[-1] == ' ' || end_name[-1] == '\t')
627
+ return -1;
628
+ #if HTTP_HEADERS_LOWERCASE
629
+ for (uint8_t *t = start; t < end_name; t++) {
630
+ *t = http_tolower(*t);
631
+ }
632
+ #endif
633
+ uint8_t *start_value = end_name + 1;
634
+ // clear away leading white space from value.
635
+ while (start_value < end &&
636
+ (start_value[0] == ' ' || start_value[0] == '\t')) {
637
+ start_value++;
638
+ };
639
+ return (parser->state.read ? http1_consume_header_trailer
640
+ : http1_consume_header_top)(
641
+ parser, start, end_name, start_value, end);
642
+ }
643
+
644
+ /* *****************************************************************************
645
+ HTTP/1.1 Body handling
646
+ ***************************************************************************** */
647
+
648
+ inline static int http1_consume_body_streamed(http1_parser_s *parser,
649
+ void *buffer, size_t length,
650
+ uint8_t **start) {
651
+ uint8_t *end = *start + parser->state.content_length - parser->state.read;
652
+ uint8_t *const stop = ((uint8_t *)buffer) + length;
653
+ if (end > stop)
654
+ end = stop;
655
+ if (end > *start &&
656
+ http1_on_body_chunk(parser, (char *)(*start), end - *start))
657
+ return -1;
658
+ parser->state.read += (end - *start);
659
+ *start = end;
660
+ if (parser->state.content_length <= parser->state.read)
661
+ parser->state.reserved |= HTTP1_P_FLAG_COMPLETE;
662
+ return 0;
663
+ }
664
+
665
+ inline static int http1_consume_body_chunked(http1_parser_s *parser,
666
+ void *buffer, size_t length,
667
+ uint8_t **start) {
668
+ uint8_t *const stop = ((uint8_t *)buffer) + length;
669
+ uint8_t *end = *start;
670
+ while (*start < stop) {
671
+ if (parser->state.content_length == 0) {
672
+ if (end + 2 >= stop)
673
+ return 0;
674
+ if ((end[0] == '\r' && end[1] == '\n')) {
675
+ /* remove tailing EOL that wasn't processed and retest */
676
+ end += 2;
677
+ *start = end;
678
+ if (end + 2 >= stop)
679
+ return 0;
680
+ }
681
+ long long chunk_len = http1_atol16(end, (const uint8_t **)&end);
682
+ if (end + 2 > stop) /* overflowed? */
683
+ return 0;
684
+ if ((end[0] != '\r' || end[1] != '\n'))
685
+ return -1; /* required EOL after content length */
686
+ end += 2;
687
+
688
+ parser->state.content_length = 0 - chunk_len;
689
+ *start = end;
690
+ if (parser->state.content_length == 0) {
691
+ /* all chunked data was parsed */
692
+ /* update content-length */
693
+ parser->state.content_length = parser->state.read;
694
+ #ifdef HTTP_ADD_CONTENT_LENGTH_HEADER_IF_MISSING
695
+ { /* add virtual header ... ? */
696
+ char buf[512];
697
+ size_t buf_len = 512;
698
+ size_t tmp_len = parser->state.read;
699
+ buf[--buf_len] = 0;
700
+ while (tmp_len) {
701
+ size_t mod = tmp_len / 10;
702
+ buf[--buf_len] = '0' + (tmp_len - (mod * 10));
703
+ tmp_len = mod;
704
+ }
705
+ if (!(parser->state.reserved & HTTP1_P_FLAG_CLENGTH) &&
706
+ http1_on_header(parser, "content-length", 14,
707
+ (char *)buf + buf_len, 511 - buf_len)) {
708
+ return -1;
709
+ }
710
+ }
711
+ #endif
712
+ /* FIXME: consume trailing EOL */
713
+ if (*start + 2 <= stop && (start[0][0] == '\r' || start[0][0] == '\n'))
714
+ *start += 1 + (start[0][1] == '\r' || start[0][1] == '\n');
715
+ else {
716
+ /* remove the "headers complete" and "trailer" flags */
717
+ parser->state.reserved =
718
+ HTTP1_P_FLAG_STATUS_LINE | HTTP1_P_FLAG_CLENGTH;
719
+ return -2;
720
+ }
721
+ /* the parsing complete flag */
722
+ parser->state.reserved |= HTTP1_P_FLAG_COMPLETE;
723
+ return 0;
724
+ }
725
+ }
726
+ end = *start + (0 - parser->state.content_length);
727
+ if (end > stop)
728
+ end = stop;
729
+ if (end > *start &&
730
+ http1_on_body_chunk(parser, (char *)(*start), end - *start)) {
731
+ return -1;
732
+ }
733
+ parser->state.read += (end - *start);
734
+ parser->state.content_length += (end - *start);
735
+ *start = end;
736
+ }
737
+ return 0;
738
+ }
739
+
740
+ inline static int http1_consume_body(http1_parser_s *parser, void *buffer,
741
+ size_t length, uint8_t **start) {
742
+ if (parser->state.content_length > 0 &&
743
+ parser->state.content_length > parser->state.read) {
744
+ /* normal, streamed data */
745
+ return http1_consume_body_streamed(parser, buffer, length, start);
746
+ } else if (parser->state.content_length <= 0 &&
747
+ (parser->state.reserved & HTTP1_P_FLAG_CHUNKED)) {
748
+ /* chuncked encoding */
749
+ return http1_consume_body_chunked(parser, buffer, length, start);
750
+ } else {
751
+ /* nothing to do - parsing complete */
752
+ parser->state.reserved |= HTTP1_P_FLAG_COMPLETE;
753
+ }
754
+ return 0;
755
+ }
756
+
757
+ /* *****************************************************************************
758
+ HTTP/1.1 parsre function
759
+ ***************************************************************************** */
760
+ #if DEBUG
761
+ #include <assert.h>
762
+ #define HTTP1_ASSERT assert
763
+ #else
764
+ #define HTTP1_ASSERT(...)
765
+ #endif
766
+
767
+ /**
768
+ * Returns the amount of data actually consumed by the parser.
769
+ *
770
+ * The value 0 indicates there wasn't enough data to be parsed and the same
771
+ * buffer (with more data) should be resubmitted.
772
+ *
773
+ * A value smaller than the buffer size indicates that EITHER a request /
774
+ * response was detected OR that the leftover could not be consumed because more
775
+ * data was required.
776
+ *
777
+ * Simply resubmit the reminder of the data to continue parsing.
778
+ *
779
+ * A request / response callback automatically stops the parsing process,
780
+ * allowing the user to adjust or refresh the state of the data.
781
+ */
782
+ static size_t http1_parse(http1_parser_s *parser, void *buffer, size_t length) {
783
+ if (!length)
784
+ return 0;
785
+ HTTP1_ASSERT(parser && buffer);
786
+ parser->state.next = NULL;
787
+ uint8_t *start = (uint8_t *)buffer;
788
+ uint8_t *end = start;
789
+ uint8_t *const stop = start + length;
790
+ uint8_t eol_len = 0;
791
+ #define HTTP1_CONSUMED ((size_t)((uintptr_t)start - (uintptr_t)buffer))
792
+
793
+ re_eval:
794
+ switch ((parser->state.reserved & 7)) {
795
+
796
+ case 0: /* request / response line */
797
+ /* clear out any leading white space */
798
+ while ((start < stop) &&
799
+ (*start == '\r' || *start == '\n' || *start == ' ' || *start == 0)) {
800
+ ++start;
801
+ }
802
+ end = start;
803
+ /* make sure the whole line is available*/
804
+ if (!(eol_len = seek2eol(&end, stop)))
805
+ return HTTP1_CONSUMED;
806
+
807
+ if (start[0] == 'H' && start[1] == 'T' && start[2] == 'T' &&
808
+ start[3] == 'P') {
809
+ /* HTTP response */
810
+ if (http1_consume_response_line(parser, start, end - eol_len + 1))
811
+ goto error;
812
+ } else if (http_tolower(start[0]) >= 'a' && http_tolower(start[0]) <= 'z') {
813
+ /* HTTP request */
814
+ if (http1_consume_request_line(parser, start, end - eol_len + 1))
815
+ goto error;
816
+ } else
817
+ goto error;
818
+ end = start = end + 1;
819
+ parser->state.reserved |= HTTP1_P_FLAG_STATUS_LINE;
820
+
821
+ /* fallthrough */
822
+ case 1: /* headers */
823
+ do {
824
+ if (start >= stop)
825
+ return HTTP1_CONSUMED; /* buffer ended on header line */
826
+ if (*start == '\r' || *start == '\n') {
827
+ goto finished_headers; /* empty line, end of headers */
828
+ }
829
+ end = start;
830
+ if (!(eol_len = seek2eol(&end, stop)))
831
+ return HTTP1_CONSUMED;
832
+ if (http1_consume_header(parser, start, end - eol_len + 1))
833
+ goto error;
834
+ end = start = end + 1;
835
+ } while ((parser->state.reserved & HTTP1_P_FLAG_HEADER_COMPLETE) == 0);
836
+ finished_headers:
837
+ ++start;
838
+ if (*start == '\n')
839
+ ++start;
840
+ end = start;
841
+ parser->state.reserved |= HTTP1_P_FLAG_HEADER_COMPLETE;
842
+ /* fallthrough */
843
+ case (HTTP1_P_FLAG_HEADER_COMPLETE | HTTP1_P_FLAG_STATUS_LINE):
844
+ /* request body */
845
+ {
846
+ int t3 = http1_consume_body(parser, buffer, length, &start);
847
+ switch (t3) {
848
+ case -1:
849
+ goto error;
850
+ case -2:
851
+ goto re_eval;
852
+ }
853
+ break;
854
+ }
855
+ }
856
+ /* are we done ? */
857
+ if (parser->state.reserved & HTTP1_P_FLAG_COMPLETE) {
858
+ parser->state.next = start;
859
+ if (((parser->state.reserved & HTTP1_P_FLAG_RESPONSE)
860
+ ? http1_on_response
861
+ : http1_on_request)(parser))
862
+ goto error;
863
+ parser->state = (struct http1_parser_protected_read_only_state_s){0};
864
+ }
865
+ return HTTP1_CONSUMED;
866
+ error:
867
+ http1_on_error(parser);
868
+ parser->state = (struct http1_parser_protected_read_only_state_s){0};
869
+ return length;
870
+ #undef HTTP1_CONSUMED
871
+ }
872
+
873
+ #endif