corecdtl 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,474 @@
1
+ #include <string>
2
+ #include <cstdint>
3
+ #include <napi.h>
4
+ #include <iostream>
5
+
6
+ #include "http_core.h"
7
+
8
+ FlagBits scanHeaders(
9
+ Napi::Env env, const char* buf, size_t total, uint32_t* offset,
10
+ uint32_t maxContentLength, uint32_t maxHeaderNameSize, uint32_t maxHeaderValueSize,
11
+ uint32_t currentHeaderSize, MethodType method, Napi::Object* outHeaders
12
+ );
13
+
14
+ #if defined(__ARM_NEON) || defined(__ARM_NEON__)
15
+ #define SIMD_NEON 1
16
+ #elif defined(__SSE2__)
17
+ #define SIMD_SSE2 1
18
+ #else
19
+ #error "No SIMD backend"
20
+ #endif
21
+
22
+ #if SIMD_SSE2
23
+ #include <immintrin.h>
24
+ using uint128_t = __m128i;
25
+ static inline __m128i mask128_sse(unsigned n) {
26
+ static const uint8_t masks[17][16] = {
27
+ {0},
28
+ {0xFF},
29
+ {0xFF,0xFF},
30
+ {0xFF,0xFF,0xFF},
31
+ {0xFF,0xFF,0xFF,0xFF},
32
+ {0xFF,0xFF,0xFF,0xFF,0xFF},
33
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
34
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
35
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
36
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
37
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
38
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
39
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
40
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
41
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
42
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
43
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
44
+ };
45
+ return _mm_loadu_si128((const __m128i*)masks[n]);
46
+ }
47
+ #elif SIMD_NEON
48
+ #include <arm_neon.h>
49
+ using uint128_t = uint8x16_t;
50
+ static inline uint8x16_t mask128_neon(unsigned n) {
51
+ static const uint8_t table[17][16] = {
52
+ {0},
53
+ {0xFF},
54
+ {0xFF,0xFF},
55
+ {0xFF,0xFF,0xFF},
56
+ {0xFF,0xFF,0xFF,0xFF},
57
+ {0xFF,0xFF,0xFF,0xFF,0xFF},
58
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
59
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
60
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
61
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
62
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
63
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
64
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
65
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
66
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
67
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF},
68
+ {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}
69
+ };
70
+ return vld1q_u8(table[n]);
71
+ }
72
+ #endif
73
+
74
+ namespace HttpScanner {
75
+
76
+ struct alignas(16) Pack128 {
77
+ uint64_t lo;
78
+ uint64_t hi;
79
+ };
80
+
81
+
82
+ constexpr uint64_t PACK8(const char* s) {
83
+ uint64_t v = 0;
84
+ for (int i = 0; i < 8; ++i) {
85
+ if (s[i] == '\0') break; // String bittiğinde döngüden çık
86
+ v |= (static_cast<uint64_t>(static_cast<uint8_t>(s[i])) << (i * 8));
87
+ }
88
+ return v;
89
+ }
90
+
91
+ constexpr Pack128 PACK16_9(
92
+ char c0, char c1, char c2, char c3,
93
+ char c4, char c5, char c6, char c7,
94
+ char c8
95
+ ) {
96
+ return Pack128{
97
+ uint64_t(c0) |
98
+ (uint64_t(c1) << 8) |
99
+ (uint64_t(c2) << 16) |
100
+ (uint64_t(c3) << 24) |
101
+ (uint64_t(c4) << 32) |
102
+ (uint64_t(c5) << 40) |
103
+ (uint64_t(c6) << 48) |
104
+ (uint64_t(c7) << 56),
105
+
106
+ uint64_t(c8)
107
+ };
108
+ }
109
+
110
+ constexpr Pack128 PACK16_10(
111
+ char c0, char c1, char c2, char c3,
112
+ char c4, char c5, char c6, char c7,
113
+ char c8, char c9
114
+ ) {
115
+ return Pack128{
116
+ uint64_t(c0) |
117
+ (uint64_t(c1) << 8) |
118
+ (uint64_t(c2) << 16) |
119
+ (uint64_t(c3) << 24) |
120
+ (uint64_t(c4) << 32) |
121
+ (uint64_t(c5) << 40) |
122
+ (uint64_t(c6) << 48) |
123
+ (uint64_t(c7) << 56),
124
+
125
+ uint64_t(c8) |
126
+ (uint64_t(c9) << 8)
127
+ };
128
+ }
129
+
130
+ constexpr Pack128 PACK16_11(
131
+ char c0, char c1, char c2, char c3,
132
+ char c4, char c5, char c6, char c7,
133
+ char c8, char c9, char c10
134
+ ) {
135
+ return Pack128{
136
+ uint64_t(c0) |
137
+ (uint64_t(c1) << 8) |
138
+ (uint64_t(c2) << 16) |
139
+ (uint64_t(c3) << 24) |
140
+ (uint64_t(c4) << 32) |
141
+ (uint64_t(c5) << 40) |
142
+ (uint64_t(c6) << 48) |
143
+ (uint64_t(c7) << 56),
144
+
145
+ uint64_t(c8) |
146
+ (uint64_t(c9) << 8) |
147
+ (uint64_t(c10) << 16)
148
+ };
149
+ }
150
+
151
+ constexpr Pack128 PACK16_12(
152
+ char c0, char c1, char c2, char c3,
153
+ char c4, char c5, char c6, char c7,
154
+ char c8, char c9, char c10, char c11
155
+ ) {
156
+ return Pack128{
157
+ uint64_t(c0) |
158
+ (uint64_t(c1) << 8) |
159
+ (uint64_t(c2) << 16) |
160
+ (uint64_t(c3) << 24) |
161
+ (uint64_t(c4) << 32) |
162
+ (uint64_t(c5) << 40) |
163
+ (uint64_t(c6) << 48) |
164
+ (uint64_t(c7) << 56),
165
+
166
+ uint64_t(c8) |
167
+ (uint64_t(c9) << 8) |
168
+ (uint64_t(c10) << 16) |
169
+ (uint64_t(c11) << 24)
170
+ };
171
+ }
172
+
173
+ constexpr Pack128 PACK16_13(
174
+ char c0, char c1, char c2, char c3,
175
+ char c4, char c5, char c6, char c7,
176
+ char c8, char c9, char c10, char c11,
177
+ char c12
178
+ ) {
179
+ return Pack128{
180
+ uint64_t(c0) |
181
+ (uint64_t(c1) << 8) |
182
+ (uint64_t(c2) << 16) |
183
+ (uint64_t(c3) << 24) |
184
+ (uint64_t(c4) << 32) |
185
+ (uint64_t(c5) << 40) |
186
+ (uint64_t(c6) << 48) |
187
+ (uint64_t(c7) << 56),
188
+
189
+ uint64_t(c8) |
190
+ (uint64_t(c9) << 8) |
191
+ (uint64_t(c10) << 16) |
192
+ (uint64_t(c11) << 24) |
193
+ (uint64_t(c12) << 32)
194
+ };
195
+ }
196
+
197
+ constexpr Pack128 PACK16_14(
198
+ char c0, char c1, char c2, char c3,
199
+ char c4, char c5, char c6, char c7,
200
+ char c8, char c9, char c10, char c11,
201
+ char c12, char c13
202
+ ) {
203
+ return Pack128{
204
+ uint64_t(c0) |
205
+ (uint64_t(c1) << 8) |
206
+ (uint64_t(c2) << 16) |
207
+ (uint64_t(c3) << 24) |
208
+ (uint64_t(c4) << 32) |
209
+ (uint64_t(c5) << 40) |
210
+ (uint64_t(c6) << 48) |
211
+ (uint64_t(c7) << 56),
212
+
213
+ uint64_t(c8) |
214
+ (uint64_t(c9) << 8) |
215
+ (uint64_t(c10) << 16) |
216
+ (uint64_t(c11) << 24) |
217
+ (uint64_t(c12) << 32) |
218
+ (uint64_t(c13) << 40)
219
+ };
220
+ }
221
+
222
+ constexpr Pack128 PACK16_15(
223
+ char c0, char c1, char c2, char c3,
224
+ char c4, char c5, char c6, char c7,
225
+ char c8, char c9, char c10, char c11,
226
+ char c12, char c13, char c14
227
+ ) {
228
+ return Pack128{
229
+ uint64_t(c0) |
230
+ (uint64_t(c1) << 8) |
231
+ (uint64_t(c2) << 16) |
232
+ (uint64_t(c3) << 24) |
233
+ (uint64_t(c4) << 32) |
234
+ (uint64_t(c5) << 40) |
235
+ (uint64_t(c6) << 48) |
236
+ (uint64_t(c7) << 56),
237
+
238
+ uint64_t(c8) |
239
+ (uint64_t(c9) << 8) |
240
+ (uint64_t(c10) << 16) |
241
+ (uint64_t(c11) << 24) |
242
+ (uint64_t(c12) << 32) |
243
+ (uint64_t(c13) << 40) |
244
+ (uint64_t(c14) << 48)
245
+ };
246
+ }
247
+
248
+ constexpr Pack128 PACK16_16(
249
+ char c0, char c1, char c2, char c3,
250
+ char c4, char c5, char c6, char c7,
251
+ char c8, char c9, char c10, char c11,
252
+ char c12, char c13, char c14, char c15
253
+ ) {
254
+ return Pack128{
255
+ uint64_t(c0) |
256
+ (uint64_t(c1) << 8) |
257
+ (uint64_t(c2) << 16) |
258
+ (uint64_t(c3) << 24) |
259
+ (uint64_t(c4) << 32) |
260
+ (uint64_t(c5) << 40) |
261
+ (uint64_t(c6) << 48) |
262
+ (uint64_t(c7) << 56),
263
+
264
+ uint64_t(c8) |
265
+ (uint64_t(c9) << 8) |
266
+ (uint64_t(c10) << 16) |
267
+ (uint64_t(c11) << 24) |
268
+ (uint64_t(c12) << 32) |
269
+ (uint64_t(c13) << 40) |
270
+ (uint64_t(c14) << 48) |
271
+ (uint64_t(c15) << 54)
272
+ };
273
+ }
274
+
275
+ constexpr uint8_t tolower_c(uint8_t c) {
276
+ return (c >= 'A' && c <= 'Z') ? (c | 0x20) : c;
277
+ }
278
+
279
+ constexpr Pack128 PACK16_LOWER(const char* s) {
280
+ Pack128 p{0,0};
281
+
282
+ for (int i = 0; i < 8; ++i)
283
+ p.lo |= uint64_t(tolower_c(s[i])) << (i * 8);
284
+
285
+ for (int i = 0; i < 8; ++i)
286
+ p.hi |= uint64_t(tolower_c(s[i + 8])) << (i * 8);
287
+
288
+ return p;
289
+ }
290
+
291
+ constexpr unsigned lit_len(const char* s) {
292
+ unsigned n = 0;
293
+ while (s[n] && n < 16) ++n;
294
+ return n;
295
+ }
296
+
297
+ constexpr uint64_t MASK_U64_2BYTE = 0x000000000000FFFFULL;
298
+ constexpr uint64_t MASK_U64_3BYTE = 0x0000000000FFFFFFULL;
299
+ constexpr uint64_t MASK_U64_4BYTE = 0x00000000FFFFFFFFULL;
300
+ constexpr uint64_t MASK_U64_5BYTE = 0x000000FFFFFFFFFFULL;
301
+ constexpr uint64_t MASK_U64_6BYTE = 0x0000FFFFFFFFFFFFULL;
302
+ constexpr uint64_t MASK_U64_7BYTE = 0x00FFFFFFFFFFFFFFULL;
303
+ ///
304
+
305
+ enum HeaderId : uint16_t {
306
+ HDR_UNKNOWN = 0,
307
+
308
+ // ─────────────
309
+ // SINGLETON (policy critical)
310
+ // ─────────────
311
+ HDR_HOST,
312
+ HDR_CONTENT_LENGTH,
313
+ HDR_TRANSFER_ENCODING,
314
+ HDR_CONTENT_TYPE,
315
+ HDR_CONTENT_RANGE,
316
+ HDR_AUTHORIZATION,
317
+ HDR_PROXY_AUTHORIZATION,
318
+ HDR_USER_AGENT,
319
+ HDR_RANGE,
320
+ HDR_EXPECT,
321
+ HDR_IF_MATCH,
322
+ HDR_IF_NONE_MATCH,
323
+ HDR_IF_MODIFIED_SINCE,
324
+ HDR_IF_UNMODIFIED_SINCE,
325
+ HDR_REFERER,
326
+ HDR_ORIGIN,
327
+ HDR_DATE,
328
+
329
+ // ─────────────
330
+ // MULTI (no merge, order matters)
331
+ // ─────────────
332
+ HDR_SET_COOKIE,
333
+ HDR_WARNING,
334
+ HDR_WWW_AUTHENTICATE,
335
+ HDR_PROXY_AUTHENTICATE,
336
+ HDR_LINK,
337
+ HDR_VIA,
338
+
339
+ // ─────────────
340
+ // MERGEABLE (comma-separated)
341
+ // ─────────────
342
+ HDR_ACCEPT,
343
+ HDR_ACCEPT_LANGUAGE,
344
+ HDR_ACCEPT_ENCODING,
345
+ HDR_ACCEPT_RANGES,
346
+ HDR_ALLOW,
347
+ HDR_CACHE_CONTROL,
348
+ HDR_CONNECTION,
349
+ HDR_PRAGMA,
350
+ HDR_UPGRADE,
351
+ HDR_TRAILER,
352
+ HDR_TE,
353
+ HDR_VARY,
354
+
355
+ // ─────────────
356
+ // NORMAL / KNOWN (no strict policy)
357
+ // ─────────────
358
+ HDR_COOKIE,
359
+ HDR_ETAG,
360
+ HDR_LAST_MODIFIED,
361
+ HDR_EXPIRES,
362
+ HDR_SERVER,
363
+ HDR_LOCATION,
364
+
365
+ // Security / Fetch / Browser
366
+ HDR_REFERER_POLICY,
367
+ HDR_SEC_FETCH_SITE,
368
+ HDR_SEC_FETCH_MODE,
369
+ HDR_SEC_FETCH_DEST,
370
+ HDR_SEC_FETCH_USER,
371
+ HDR_DNT,
372
+
373
+ // Proxy / Forwarding (de-facto)
374
+ HDR_X_FORWARDED_FOR,
375
+ HDR_X_FORWARDED_PROTO,
376
+ HDR_X_FORWARDED_HOST,
377
+ HDR_X_REAL_IP
378
+ };
379
+
380
+ typedef FlagBits (*hv_value_parser_fn)(
381
+ const char* __restrict buf,
382
+ uint32_t* __restrict __offset,
383
+ size_t total,
384
+ uint32_t maxHeaderValueSize,
385
+ std::unique_ptr<std::string>& hv
386
+ );
387
+
388
+ typedef struct {
389
+ const char* name; // lowercase header name
390
+ hv_value_parser_fn value_parser; // value parsing strategy
391
+ } HeaderDesc;
392
+
393
+ FlagBits hv_get_value_number(
394
+ const char* __restrict buf,
395
+ uint32_t* __restrict __offset,
396
+ size_t total,
397
+ uint32_t maxHeaderValueSize,
398
+ std::unique_ptr<std::string>& hv
399
+ );
400
+ FlagBits hv_get_value_any(
401
+ const char* __restrict buf,
402
+ uint32_t* __restrict __offset,
403
+ size_t total,
404
+ uint32_t maxHeaderValueSize,
405
+ std::unique_ptr<std::string>& hv
406
+ );
407
+
408
+ const HeaderDesc HEADERS[] = {
409
+ { "unknown", hv_get_value_any },
410
+
411
+ // SINGLETON
412
+ { "host", hv_get_value_any },
413
+ { "content-length", hv_get_value_number },
414
+ { "transfer-encoding", hv_get_value_any },
415
+ { "content-type", hv_get_value_any },
416
+ { "content-range", hv_get_value_any },
417
+ { "authorization", hv_get_value_any },
418
+ { "proxy-authorization", hv_get_value_any },
419
+ { "user-agent", hv_get_value_any },
420
+ { "range", hv_get_value_any },
421
+ { "expect", hv_get_value_any },
422
+ { "if-match", hv_get_value_any },
423
+ { "if-none-match", hv_get_value_any },
424
+ { "if-modified-since", hv_get_value_any },
425
+ { "if-unmodified-since", hv_get_value_any },
426
+ { "referer", hv_get_value_any },
427
+ { "origin", hv_get_value_any },
428
+ { "date", hv_get_value_any },
429
+
430
+ // MULTI
431
+ { "set-cookie", hv_get_value_any },
432
+ { "warning", hv_get_value_any },
433
+ { "www-authenticate", hv_get_value_any },
434
+ { "proxy-authenticate", hv_get_value_any },
435
+ { "link", hv_get_value_any },
436
+ { "via", hv_get_value_any },
437
+
438
+ // MERGEABLE
439
+ { "accept", hv_get_value_any },
440
+ { "accept-language", hv_get_value_any },
441
+ { "accept-encoding", hv_get_value_any },
442
+ { "accept-ranges", hv_get_value_any },
443
+ { "allow", hv_get_value_any },
444
+ { "cache-control", hv_get_value_any },
445
+ { "connection", hv_get_value_any },
446
+ { "pragma", hv_get_value_any },
447
+ { "upgrade", hv_get_value_any },
448
+ { "trailer", hv_get_value_any },
449
+ { "te", hv_get_value_any },
450
+ { "vary", hv_get_value_any },
451
+
452
+ // NORMAL / KNOWN
453
+ { "cookie", hv_get_value_any },
454
+ { "etag", hv_get_value_any },
455
+ { "last-modified", hv_get_value_any },
456
+ { "expires", hv_get_value_any },
457
+ { "server", hv_get_value_any },
458
+ { "location", hv_get_value_any },
459
+
460
+ // Security / Fetch
461
+ { "referer-policy", hv_get_value_any },
462
+ { "sec-fetch-site", hv_get_value_any },
463
+ { "sec-fetch-mode", hv_get_value_any },
464
+ { "sec-fetch-dest", hv_get_value_any },
465
+ { "sec-fetch-user", hv_get_value_any },
466
+ { "dnt", hv_get_value_number },
467
+
468
+ // Proxy / Forwarding
469
+ { "x-forwarded-for", hv_get_value_any },
470
+ { "x-forwarded-proto", hv_get_value_any },
471
+ { "x-forwarded-host", hv_get_value_any },
472
+ { "x-real-ip", hv_get_value_any }
473
+ };
474
+ }