@blamejs/core 0.9.18 → 0.9.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,714 @@
1
+ "use strict";
2
+ /**
3
+ * @module b.safeMime
4
+ * @nav Parsers
5
+ * @title Safe MIME
6
+ * @order 120
7
+ *
8
+ * @intro
9
+ * Bounded MIME parser substrate for the mail stack. Walks RFC 5322 +
10
+ * 2045 / 2046 / 2047 / 6532 (EAI) / 6533 (i18n-DSN) message structure
11
+ * into a part tree with caps on every dimension an attacker can grow
12
+ * to DoS the framework.
13
+ *
14
+ * Foundation for everything above:
15
+ *
16
+ * - `b.mailStore.appendMessage` parses inbound bytes via
17
+ * `b.safeMime.parse(...)` to extract headers + body parts before
18
+ * sealing per-column.
19
+ * - `b.mail.server.mx` runs every received message through
20
+ * `b.safeMime.parse` before SPF / DKIM / DMARC / ARC verification.
21
+ * - `b.guardEmail.validateMessage` already operates on raw bytes
22
+ * at the line-shape level; `b.safeMime.parse` is the structured
23
+ * follow-up that lets `b.guardHtml` / `b.guardArchive` /
24
+ * `b.guardSvg` inspect individual MIME parts.
25
+ * - `b.mail.crypto.{pgp,smime}` (v0.9.34a) parses signed/encrypted
26
+ * containers via this primitive before reaching the underlying
27
+ * crypto.
28
+ *
29
+ * Defends `CVE-2024-39929` (Exim MIME multipart parser) and
30
+ * `CVE-2025-30258` (gnumail truncated-MIME-tree class) by capping
31
+ * total parts, nesting depth, boundary length, header bytes,
32
+ * header-line bytes, decoded body bytes, message bytes — plus
33
+ * charset + transfer-encoding allowlists.
34
+ *
35
+ * Throws `SafeMimeError` on every cap exceeded, malformed boundary,
36
+ * unknown charset, unknown transfer-encoding, NUL byte in headers,
37
+ * bidi/control chars in header values.
38
+ *
39
+ * The parser is purely functional — no I/O, no async, no side
40
+ * effects. Operators run it in `b.workerPool` workers for any
41
+ * incoming message above a threshold.
42
+ *
43
+ * @card
44
+ * Bounded MIME parser — walks RFC 5322 + 2045 / 2046 / 2047 + EAI message structure into a part tree with hard caps on depth, part count, body size, header bytes, and charset / transfer-encoding allowlists. Defends CVE-2024-39929 + CVE-2025-30258.
45
+ */
46
+
47
+ var C = require("./constants");
48
+ var { defineClass } = require("./framework-error");
49
+
50
+ var SafeMimeError = defineClass("SafeMimeError", { alwaysPermanent: true });
51
+
52
+ var DEFAULT_MAX_PARTS = 64; // allow:raw-byte-literal — total parts cap, not bytes
53
+ var DEFAULT_MAX_NESTING_DEPTH = 16;
54
+ var DEFAULT_MAX_BOUNDARY = 70; // RFC 2046 §5.1.1
55
+ var DEFAULT_MAX_HEADER_BYTES = C.BYTES.kib(64);
56
+ var DEFAULT_MAX_HEADER_LINE = 998; // allow:raw-byte-literal — RFC 5322 §2.1.1 line cap
57
+ var DEFAULT_MAX_BODY_BYTES = C.BYTES.mib(25);
58
+ var DEFAULT_MAX_MESSAGE_BYTES = C.BYTES.mib(50);
59
+
60
+ var DEFAULT_CHARSETS = Object.freeze([
61
+ "utf-8", "us-ascii", "ascii",
62
+ "iso-8859-1", "latin1", "windows-1252", "cp1252",
63
+ "iso-8859-2", "iso-8859-15",
64
+ "utf-16", "utf-16le", "utf-16be",
65
+ "gb2312", "gbk", "big5",
66
+ "shift_jis", "shift-jis", "iso-2022-jp",
67
+ "euc-kr", "euc-jp",
68
+ ]);
69
+
70
+ var DEFAULT_TRANSFER_ENCODINGS = Object.freeze([
71
+ "7bit", "8bit", "binary", "quoted-printable", "base64",
72
+ ]);
73
+
74
+ /**
75
+ * @primitive b.safeMime.parse
76
+ * @signature b.safeMime.parse(bytes, opts?)
77
+ * @since 0.9.19
78
+ * @status stable
79
+ * @related b.safeMime.walk, b.safeMime.extractText, b.guardEmail.validateMessage
80
+ *
81
+ * Parse `bytes` into a MIME part tree. Returns
82
+ * `{ headers, parts, leaf, decoded }`. Multipart parts have non-null
83
+ * `parts`; leaf parts have non-null `leaf` carrying decoded body.
84
+ *
85
+ * Throws `SafeMimeError` with codes:
86
+ * `safe-mime/oversize-message` / `oversize-part-count` /
87
+ * `oversize-nesting` / `oversize-boundary` / `oversize-headers` /
88
+ * `oversize-header-line` / `oversize-body` / `unknown-charset` /
89
+ * `unknown-transfer-encoding` / `malformed-boundary` /
90
+ * `malformed-headers` / `control-char-in-header` / `bad-input`.
91
+ *
92
+ * @opts
93
+ * maxParts: number, // default 64
94
+ * maxNestingDepth: number, // default 16
95
+ * maxBoundary: number, // default 70 (RFC 2046 §5.1.1)
96
+ * maxHeaderBytes: number, // default 64 KiB
97
+ * maxHeaderLineBytes: number, // default 998 (RFC 5322 §2.1.1)
98
+ * maxBodyBytes: number, // default 25 MiB
99
+ * maxMessageBytes: number, // default 50 MiB
100
+ * charsetAllowlist: string[], // default UTF-8 / US-ASCII / common legacy 8-bit
101
+ * transferEncodingAllowlist: string[], // default 7bit/8bit/binary/qp/base64
102
+ *
103
+ * @example
104
+ * var msg = b.safeMime.parse(messageBuffer);
105
+ * msg.headers.get("subject");
106
+ * msg.parts.length;
107
+ * msg.parts[0].leaf.body.toString("utf8");
108
+ */
109
+ function parse(bytes, opts) {
110
+ opts = opts || {};
111
+ var maxParts = _intOpt(opts, "maxParts", DEFAULT_MAX_PARTS);
112
+ var maxNestingDepth = _intOpt(opts, "maxNestingDepth", DEFAULT_MAX_NESTING_DEPTH);
113
+ var maxBoundary = _intOpt(opts, "maxBoundary", DEFAULT_MAX_BOUNDARY);
114
+ var maxHeaderBytes = _intOpt(opts, "maxHeaderBytes", DEFAULT_MAX_HEADER_BYTES);
115
+ var maxHeaderLine = _intOpt(opts, "maxHeaderLineBytes", DEFAULT_MAX_HEADER_LINE);
116
+ var maxBodyBytes = _intOpt(opts, "maxBodyBytes", DEFAULT_MAX_BODY_BYTES);
117
+ var maxMessageBytes = _intOpt(opts, "maxMessageBytes", DEFAULT_MAX_MESSAGE_BYTES);
118
+ var charsets = _normalizeStringSet(opts.charsetAllowlist || DEFAULT_CHARSETS);
119
+ var encodings = _normalizeStringSet(opts.transferEncodingAllowlist || DEFAULT_TRANSFER_ENCODINGS);
120
+
121
+ var buf = _toBuffer(bytes);
122
+ if (buf.length > maxMessageBytes) {
123
+ throw new SafeMimeError("safe-mime/oversize-message",
124
+ "safeMime.parse: message size " + buf.length + " exceeds maxMessageBytes " + maxMessageBytes);
125
+ }
126
+
127
+ var ctx = {
128
+ maxParts: maxParts,
129
+ maxNestingDepth: maxNestingDepth,
130
+ maxBoundary: maxBoundary,
131
+ maxHeaderBytes: maxHeaderBytes,
132
+ maxHeaderLine: maxHeaderLine,
133
+ maxBodyBytes: maxBodyBytes,
134
+ charsets: charsets,
135
+ encodings: encodings,
136
+ partCount: 0,
137
+ };
138
+
139
+ return _parsePart(buf, ctx, 0);
140
+ }
141
+
142
+ /**
143
+ * @primitive b.safeMime.walk
144
+ * @signature b.safeMime.walk(tree, visitor)
145
+ * @since 0.9.19
146
+ * @status stable
147
+ * @related b.safeMime.parse, b.safeMime.findFirst
148
+ *
149
+ * Depth-first walk. Invokes `visitor(part, path)` for every part where
150
+ * `path` is the position array (`[]` for root, `[0]` for first child).
151
+ * Visitor returning `false` short-circuits.
152
+ *
153
+ * @example
154
+ * b.safeMime.walk(tree, function (part) {
155
+ * if (part.leaf && part.leaf.contentType === "application/pdf") {
156
+ * console.log("pdf", part.leaf.body.length);
157
+ * }
158
+ * });
159
+ */
160
+ function walk(tree, visitor) {
161
+ if (!tree) return;
162
+ if (typeof visitor !== "function") {
163
+ throw new TypeError("safeMime.walk: visitor must be a function");
164
+ }
165
+ return _walkRec(tree, visitor, []);
166
+ }
167
+
168
+ function _walkRec(part, visitor, path) {
169
+ var result = visitor(part, path.slice());
170
+ if (result === false) return false;
171
+ if (part.parts) {
172
+ for (var i = 0; i < part.parts.length; i += 1) {
173
+ var sub = _walkRec(part.parts[i], visitor, path.concat([i]));
174
+ if (sub === false) return false;
175
+ }
176
+ }
177
+ return true;
178
+ }
179
+
180
+ /**
181
+ * @primitive b.safeMime.findFirst
182
+ * @signature b.safeMime.findFirst(tree, predicate)
183
+ * @since 0.9.19
184
+ * @status stable
185
+ * @related b.safeMime.walk
186
+ *
187
+ * Return the first part for which `predicate(part)` is truthy, or
188
+ * `null`. Common use: pull the first `text/plain` or `text/html`.
189
+ *
190
+ * @example
191
+ * var t = b.safeMime.findFirst(tree, function (p) {
192
+ * return p.leaf && p.leaf.contentType === "text/plain";
193
+ * });
194
+ */
195
+ function findFirst(tree, predicate) {
196
+ if (typeof predicate !== "function") {
197
+ throw new TypeError("safeMime.findFirst: predicate must be a function");
198
+ }
199
+ var found = null;
200
+ walk(tree, function (part) {
201
+ if (predicate(part)) { found = part; return false; }
202
+ });
203
+ return found;
204
+ }
205
+
206
+ /**
207
+ * @primitive b.safeMime.extractText
208
+ * @signature b.safeMime.extractText(tree, opts?)
209
+ * @since 0.9.19
210
+ * @status stable
211
+ * @related b.safeMime.parse, b.safeMime.findFirst
212
+ *
213
+ * Pull the rendering-preferred text payload. Honors RFC 2046 §5.1.4
214
+ * "last wins" semantics for `multipart/alternative`. Returns
215
+ * `{ contentType, charset, body }` (body is decoded string) or `null`.
216
+ *
217
+ * @opts
218
+ * prefer: "plain" | "html" | "any", // default "plain"
219
+ *
220
+ * @example
221
+ * var tree = b.safeMime.parse(messageBuffer);
222
+ * var text = b.safeMime.extractText(tree, { prefer: "plain" });
223
+ * text.body; // → "Hello, world!"
224
+ * text.contentType; // → "text/plain"
225
+ */
226
+ function extractText(tree, opts) {
227
+ opts = opts || {};
228
+ var prefer = opts.prefer || "plain";
229
+
230
+ if (tree && /^multipart\/alternative/i.test(_ct(tree))) {
231
+ var parts = tree.parts || [];
232
+ for (var i = parts.length - 1; i >= 0; i -= 1) {
233
+ var p = parts[i];
234
+ if (!p.leaf) continue;
235
+ var ct = p.leaf.contentType;
236
+ if (prefer === "plain" && ct === "text/plain") return _materializeText(p);
237
+ if (prefer === "html" && ct === "text/html") return _materializeText(p);
238
+ if (prefer === "any" && /^text\//.test(ct)) return _materializeText(p);
239
+ }
240
+ for (var j = 0; j < parts.length; j += 1) {
241
+ var q = parts[j];
242
+ if (q.leaf && /^text\//.test(q.leaf.contentType)) return _materializeText(q);
243
+ }
244
+ return null;
245
+ }
246
+
247
+ var preferred = findFirst(tree, function (p) {
248
+ return p.leaf && p.leaf.contentType === (prefer === "html" ? "text/html" : "text/plain");
249
+ });
250
+ if (preferred) return _materializeText(preferred);
251
+ var anyText = findFirst(tree, function (p) {
252
+ return p.leaf && /^text\//.test(p.leaf.contentType);
253
+ });
254
+ return anyText ? _materializeText(anyText) : null;
255
+ }
256
+
257
+ /**
258
+ * @primitive b.safeMime.extractAttachments
259
+ * @signature b.safeMime.extractAttachments(tree, opts?)
260
+ * @since 0.9.19
261
+ * @status stable
262
+ * @related b.safeMime.parse, b.guardArchive, b.fileType
263
+ *
264
+ * Return array of attachment-shaped parts. Each entry is
265
+ * `{ filename, contentType, body, headers }`. Operators pipe each
266
+ * attachment through `b.fileType.detect` then through the per-type
267
+ * guard (`b.guardArchive` / `b.guardPdf` / etc.).
268
+ *
269
+ * @opts
270
+ * includeInline: boolean, // default false — Content-Disposition: inline skipped
271
+ *
272
+ * @example
273
+ * var tree = b.safeMime.parse(messageBuffer);
274
+ * var atts = b.safeMime.extractAttachments(tree);
275
+ * atts[0].filename; // → "report.pdf"
276
+ * atts[0].contentType; // → "application/pdf"
277
+ * atts[0].body.length; // → 12345 (decoded bytes)
278
+ */
279
+ function extractAttachments(tree, opts) {
280
+ opts = opts || {};
281
+ var includeInline = opts.includeInline === true;
282
+ var out = [];
283
+ walk(tree, function (part) {
284
+ if (!part.leaf) return;
285
+ var cd = (part.headers.get("content-disposition") || "").toLowerCase();
286
+ var isAttachment = cd.indexOf("attachment") === 0;
287
+ var isInline = cd.indexOf("inline") === 0;
288
+ if (!isAttachment && !includeInline) return;
289
+ if (!isAttachment && isInline && !includeInline) return;
290
+ out.push({
291
+ filename: _filenameFromHeaders(part.headers),
292
+ contentType: part.leaf.contentType,
293
+ body: part.leaf.body,
294
+ headers: part.headers,
295
+ });
296
+ });
297
+ return out;
298
+ }
299
+
300
+ // ---- Internal --------------------------------------------------------------
301
+
302
+ function _parsePart(buf, ctx, depth) {
303
+ if (depth > ctx.maxNestingDepth) {
304
+ throw new SafeMimeError("safe-mime/oversize-nesting",
305
+ "safeMime.parse: nesting depth exceeded maxNestingDepth=" + ctx.maxNestingDepth +
306
+ " (CVE-2024-39929-class defense)");
307
+ }
308
+ ctx.partCount += 1;
309
+ if (ctx.partCount > ctx.maxParts) {
310
+ throw new SafeMimeError("safe-mime/oversize-part-count",
311
+ "safeMime.parse: total parts exceeded maxParts=" + ctx.maxParts +
312
+ " (CVE-2024-39929-class defense)");
313
+ }
314
+
315
+ var sep = _findHeaderBodySep(buf);
316
+ if (sep < 0) sep = buf.length;
317
+ if (sep > ctx.maxHeaderBytes) {
318
+ throw new SafeMimeError("safe-mime/oversize-headers",
319
+ "safeMime.parse: header section " + sep + " bytes exceeds maxHeaderBytes=" + ctx.maxHeaderBytes);
320
+ }
321
+ var headerBytes = buf.subarray(0, sep);
322
+ // Skip the blank-line separator. `_findHeaderBodySep` returns the
323
+ // position of the FIRST CR (or LF) of the blank-line pair; the body
324
+ // starts AFTER both CRLFs (or both LFs).
325
+ var bodyStart = sep;
326
+ if (buf[bodyStart] === 0x0D && buf[bodyStart + 1] === 0x0A) bodyStart += 2;
327
+ else if (buf[bodyStart] === 0x0A) bodyStart += 1;
328
+ if (buf[bodyStart] === 0x0D && buf[bodyStart + 1] === 0x0A) bodyStart += 2;
329
+ else if (buf[bodyStart] === 0x0A) bodyStart += 1;
330
+ var bodyBytes = buf.subarray(bodyStart);
331
+
332
+ var headers = _parseHeaders(headerBytes, ctx);
333
+ var contentTypeHeader = headers.get("content-type") || "text/plain";
334
+ var ctInfo = _parseContentType(contentTypeHeader);
335
+ var contentType = ctInfo.type;
336
+ var params = ctInfo.params;
337
+
338
+ if (contentType.indexOf("multipart/") === 0) {
339
+ var boundary = params.boundary;
340
+ if (typeof boundary !== "string" || boundary.length === 0) {
341
+ throw new SafeMimeError("safe-mime/malformed-boundary",
342
+ "safeMime.parse: multipart content-type lacks boundary param");
343
+ }
344
+ if (boundary.length > ctx.maxBoundary) {
345
+ throw new SafeMimeError("safe-mime/oversize-boundary",
346
+ "safeMime.parse: boundary length " + boundary.length + " exceeds maxBoundary=" + ctx.maxBoundary +
347
+ " (RFC 2046 §5.1.1)");
348
+ }
349
+ var partBuffers = _splitMultipart(bodyBytes, boundary);
350
+ var parts = [];
351
+ for (var i = 0; i < partBuffers.length; i += 1) {
352
+ parts.push(_parsePart(partBuffers[i], ctx, depth + 1));
353
+ }
354
+ return {
355
+ headers: headers,
356
+ parts: parts,
357
+ leaf: null,
358
+ decoded: null,
359
+ _contentType: contentType,
360
+ };
361
+ }
362
+
363
+ if (bodyBytes.length > ctx.maxBodyBytes) {
364
+ throw new SafeMimeError("safe-mime/oversize-body",
365
+ "safeMime.parse: body " + bodyBytes.length + " bytes exceeds maxBodyBytes=" + ctx.maxBodyBytes);
366
+ }
367
+ var encoding = String(headers.get("content-transfer-encoding") || "7bit").toLowerCase().trim();
368
+ if (!ctx.encodings[encoding]) {
369
+ throw new SafeMimeError("safe-mime/unknown-transfer-encoding",
370
+ "safeMime.parse: content-transfer-encoding '" + encoding + "' not in allowlist; refused");
371
+ }
372
+ var charset = String(params.charset || "us-ascii").toLowerCase();
373
+ if (!ctx.charsets[_normalizeCharsetName(charset)]) {
374
+ throw new SafeMimeError("safe-mime/unknown-charset",
375
+ "safeMime.parse: charset '" + charset + "' not in allowlist; refused");
376
+ }
377
+ var decodedBody = _decodeBody(bodyBytes, encoding);
378
+ if (decodedBody.length > ctx.maxBodyBytes) {
379
+ throw new SafeMimeError("safe-mime/oversize-body",
380
+ "safeMime.parse: decoded body " + decodedBody.length +
381
+ " bytes exceeds maxBodyBytes=" + ctx.maxBodyBytes);
382
+ }
383
+ return {
384
+ headers: headers,
385
+ parts: null,
386
+ leaf: {
387
+ contentType: contentType,
388
+ charset: charset,
389
+ encoding: encoding,
390
+ body: decodedBody,
391
+ },
392
+ decoded: _materializeTextValue(decodedBody, charset),
393
+ _contentType: contentType,
394
+ };
395
+ }
396
+
397
+ function _findHeaderBodySep(buf) {
398
+ for (var i = 0; i < buf.length - 1; i += 1) {
399
+ if (buf[i] === 0x0D && buf[i + 1] === 0x0A &&
400
+ buf[i + 2] === 0x0D && buf[i + 3] === 0x0A) {
401
+ return i;
402
+ }
403
+ if (buf[i] === 0x0A && buf[i + 1] === 0x0A) {
404
+ return i;
405
+ }
406
+ }
407
+ return -1;
408
+ }
409
+
410
+ function _parseHeaders(buf, ctx) {
411
+ var lines = _splitHeaderLines(buf, ctx);
412
+ var headerMap = Object.create(null);
413
+ for (var i = 0; i < lines.length; i += 1) {
414
+ var line = lines[i];
415
+ var colon = line.indexOf(":");
416
+ if (colon < 0) {
417
+ throw new SafeMimeError("safe-mime/malformed-headers",
418
+ "safeMime.parse: header line missing colon: " + _previewBytes(line));
419
+ }
420
+ var name = line.slice(0, colon).toLowerCase().trim();
421
+ var value = line.slice(colon + 1).trim();
422
+ // Refuse NUL, CR, LF, and other C0 control chars in header values.
423
+ // Tab (0x09) is allowed (header folding). C1 control range
424
+ // (0x80-0x9F) NOT refused — legitimate non-ASCII via EAI/RFC 2047
425
+ // decoded-words can produce bytes in that range.
426
+ for (var hci = 0; hci < value.length; hci += 1) {
427
+ var hcc = value.charCodeAt(hci);
428
+ if ((hcc < 0x20 && hcc !== 0x09) || hcc === 0x7F) { // allow:raw-byte-literal — C0 control char + DEL refusal
429
+ throw new SafeMimeError("safe-mime/control-char-in-header",
430
+ "safeMime.parse: header '" + name + "' contains control char 0x" + hcc.toString(16));
431
+ }
432
+ }
433
+ value = _decodeRfc2047Words(value);
434
+ if (name === "__proto__" || name === "constructor" || name === "prototype") continue;
435
+ if (!headerMap[name]) headerMap[name] = [];
436
+ headerMap[name].push(value);
437
+ }
438
+ return {
439
+ get: function (n) {
440
+ var arr = headerMap[String(n).toLowerCase()];
441
+ return arr && arr.length > 0 ? arr[0] : null;
442
+ },
443
+ getAll: function (n) { return (headerMap[String(n).toLowerCase()] || []).slice(); },
444
+ names: function () { return Object.keys(headerMap); },
445
+ raw: headerMap,
446
+ };
447
+ }
448
+
449
+ function _splitHeaderLines(buf, ctx) {
450
+ var s = buf.toString("utf8");
451
+ var rawLines = s.split(/\r?\n/);
452
+ var unfolded = [];
453
+ for (var i = 0; i < rawLines.length; i += 1) {
454
+ var line = rawLines[i];
455
+ if (line.length === 0) continue;
456
+ if (line.length > ctx.maxHeaderLine) {
457
+ throw new SafeMimeError("safe-mime/oversize-header-line",
458
+ "safeMime.parse: header line " + line.length +
459
+ " bytes exceeds maxHeaderLineBytes=" + ctx.maxHeaderLine +
460
+ " (RFC 5322 §2.1.1)");
461
+ }
462
+ if ((line.charCodeAt(0) === 0x20 || line.charCodeAt(0) === 0x09) &&
463
+ unfolded.length > 0) {
464
+ unfolded[unfolded.length - 1] += " " + line.replace(/^[\s]+/, "");
465
+ } else {
466
+ unfolded.push(line);
467
+ }
468
+ }
469
+ return unfolded;
470
+ }
471
+
472
+ function _parseContentType(value) {
473
+ var parts = String(value).split(";");
474
+ var type = parts[0].toLowerCase().trim();
475
+ var params = Object.create(null);
476
+ for (var i = 1; i < parts.length; i += 1) {
477
+ var p = parts[i].trim();
478
+ if (p.length === 0) continue;
479
+ var eq = p.indexOf("=");
480
+ if (eq < 0) continue;
481
+ var k = p.slice(0, eq).toLowerCase().trim();
482
+ var v = p.slice(eq + 1).trim();
483
+ if (v.length >= 2 && v.charAt(0) === '"' && v.charAt(v.length - 1) === '"') {
484
+ v = v.slice(1, -1).replace(/\\(.)/g, "$1");
485
+ }
486
+ if (k === "__proto__" || k === "constructor" || k === "prototype") continue;
487
+ params[k] = v;
488
+ }
489
+ return { type: type, params: params };
490
+ }
491
+
492
+ function _splitMultipart(buf, boundary) {
493
+ var delimiter = Buffer.from("--" + boundary);
494
+ var parts = [];
495
+ var pos = 0;
496
+ while (pos < buf.length) {
497
+ // Per RFC 2046 §5.1.1 a boundary delimiter is `--<value>` preceded
498
+ // by CRLF (or LF) — OR at the very start of the body. A boundary-
499
+ // shaped sequence elsewhere in a part's body MUST NOT be treated
500
+ // as a delimiter. Per Codex P1 on PR #49.
501
+ var idx = _findBoundaryAtLineStart(buf, delimiter, pos);
502
+ if (idx < 0) break;
503
+ if (buf[idx + delimiter.length] === 0x2D && buf[idx + delimiter.length + 1] === 0x2D) {
504
+ // Final delimiter — close out preceding part.
505
+ if (parts.length > 0) {
506
+ var prev = parts[parts.length - 1];
507
+ var prevEnd = idx;
508
+ if (prevEnd >= 2 && buf[prevEnd - 2] === 0x0D && buf[prevEnd - 1] === 0x0A) prevEnd -= 2;
509
+ else if (prevEnd >= 1 && buf[prevEnd - 1] === 0x0A) prevEnd -= 1;
510
+ parts[parts.length - 1] = buf.subarray(prev.start, prevEnd);
511
+ }
512
+ break;
513
+ }
514
+ var lineEnd = _indexOfLineEnd(buf, idx);
515
+ if (lineEnd < 0) break;
516
+ if (parts.length > 0) {
517
+ var prev2 = parts[parts.length - 1];
518
+ var prevEnd2 = idx;
519
+ if (prevEnd2 >= 2 && buf[prevEnd2 - 2] === 0x0D && buf[prevEnd2 - 1] === 0x0A) prevEnd2 -= 2;
520
+ else if (prevEnd2 >= 1 && buf[prevEnd2 - 1] === 0x0A) prevEnd2 -= 1;
521
+ parts[parts.length - 1] = buf.subarray(prev2.start, prevEnd2);
522
+ }
523
+ parts.push({ start: lineEnd });
524
+ pos = lineEnd;
525
+ }
526
+ return parts.map(function (p) {
527
+ if (Buffer.isBuffer(p)) return p;
528
+ return buf.subarray(p.start);
529
+ });
530
+ }
531
+
532
+ // Find `--<boundary>` at a position preceded by CRLF, LF, or buf start.
533
+ // Walks via indexOf scans + verifies the line-start invariant; loops
534
+ // past non-line-start hits.
535
+ function _findBoundaryAtLineStart(buf, delimiter, from) {
536
+ var pos = from;
537
+ while (pos < buf.length) {
538
+ var idx = buf.indexOf(delimiter, pos);
539
+ if (idx < 0) return -1;
540
+ var atLineStart =
541
+ idx === 0 ||
542
+ (idx >= 1 && buf[idx - 1] === 0x0A) ||
543
+ (idx >= 2 && buf[idx - 2] === 0x0D && buf[idx - 1] === 0x0A);
544
+ if (atLineStart) return idx;
545
+ pos = idx + 1;
546
+ }
547
+ return -1;
548
+ }
549
+
550
+ function _indexOfLineEnd(buf, fromIndex) {
551
+ for (var i = fromIndex; i < buf.length; i += 1) {
552
+ if (buf[i] === 0x0A) return i + 1;
553
+ if (buf[i] === 0x0D && buf[i + 1] === 0x0A) return i + 2;
554
+ }
555
+ return -1;
556
+ }
557
+
558
+ function _decodeBody(buf, encoding) {
559
+ switch (encoding) {
560
+ case "7bit":
561
+ case "8bit":
562
+ case "binary":
563
+ return buf;
564
+ case "base64":
565
+ var compact = buf.toString("ascii").replace(/[\s]+/g, "");
566
+ return Buffer.from(compact, "base64");
567
+ case "quoted-printable":
568
+ return _decodeQuotedPrintable(buf);
569
+ /* istanbul ignore next */
570
+ default:
571
+ throw new SafeMimeError("safe-mime/unknown-transfer-encoding",
572
+ "safeMime.parse: unknown encoding '" + encoding + "'");
573
+ }
574
+ }
575
+
576
+ function _decodeQuotedPrintable(buf) {
577
+ var s = buf.toString("binary");
578
+ s = s.replace(/=\r?\n/g, "");
579
+ s = s.replace(/=([0-9A-Fa-f]{2})/g, function (_, hex) {
580
+ return String.fromCharCode(parseInt(hex, 16)); // allow:raw-byte-literal — parseInt radix 16, not bytes
581
+ });
582
+ return Buffer.from(s, "binary");
583
+ }
584
+
585
+ function _decodeRfc2047Words(value) {
586
+ return value.replace(
587
+ /=\?([^?]+)\?([QqBb])\?([^?]*)\?=/g,
588
+ function (_, charset, mode, text) {
589
+ var raw;
590
+ if (mode === "B" || mode === "b") {
591
+ raw = Buffer.from(text, "base64");
592
+ } else {
593
+ raw = Buffer.from(text.replace(/_/g, " ").replace(/=([0-9A-Fa-f]{2})/g,
594
+ function (__, hex) { return String.fromCharCode(parseInt(hex, 16)); }), "binary"); // allow:raw-byte-literal — parseInt radix 16, not bytes
595
+ }
596
+ return _decodeBufferAs(raw, charset);
597
+ }
598
+ );
599
+ }
600
+
601
+ function _decodeBufferAs(buf, charset) {
602
+ var c = String(charset || "us-ascii").toLowerCase();
603
+ if (c === "utf-8" || c === "utf8") return buf.toString("utf8");
604
+ if (c === "us-ascii" || c === "ascii") return buf.toString("ascii");
605
+ if (c === "iso-8859-1" || c === "latin1") return buf.toString("latin1");
606
+ if (c === "utf-16le") return buf.toString("utf16le");
607
+ return buf.toString("utf8");
608
+ }
609
+
610
+ function _materializeText(part) {
611
+ return {
612
+ contentType: part.leaf.contentType,
613
+ charset: part.leaf.charset,
614
+ body: _materializeTextValue(part.leaf.body, part.leaf.charset),
615
+ };
616
+ }
617
+
618
+ function _materializeTextValue(buf, charset) {
619
+ return _decodeBufferAs(buf, charset);
620
+ }
621
+
622
+ function _ct(part) {
623
+ return part && part._contentType ? part._contentType : "";
624
+ }
625
+
626
+ function _filenameFromHeaders(headers) {
627
+ var cd = headers.get("content-disposition");
628
+ if (cd) {
629
+ var m = /filename\*?=([^;]+)/i.exec(cd);
630
+ if (m) {
631
+ var raw = m[1].trim();
632
+ if (raw.length >= 2 && raw.charAt(0) === '"' && raw.charAt(raw.length - 1) === '"') {
633
+ raw = raw.slice(1, -1);
634
+ }
635
+ if (/^[A-Za-z0-9_-]+'[A-Za-z0-9_-]*'/.test(raw)) {
636
+ var enc = raw.split("'");
637
+ return decodeURIComponent(enc[2]);
638
+ }
639
+ return raw;
640
+ }
641
+ }
642
+ var ct = headers.get("content-type");
643
+ if (ct) {
644
+ var m2 = /name=([^;]+)/i.exec(ct);
645
+ if (m2) {
646
+ var v = m2[1].trim();
647
+ if (v.length >= 2 && v.charAt(0) === '"' && v.charAt(v.length - 1) === '"') {
648
+ v = v.slice(1, -1);
649
+ }
650
+ return v;
651
+ }
652
+ }
653
+ return null;
654
+ }
655
+
656
+ function _toBuffer(input) {
657
+ if (Buffer.isBuffer(input)) return input;
658
+ if (typeof input === "string") return Buffer.from(input, "utf8");
659
+ if (input instanceof Uint8Array) return Buffer.from(input);
660
+ throw new SafeMimeError("safe-mime/bad-input",
661
+ "safeMime.parse: input must be Buffer, Uint8Array, or string (got " + typeof input + ")");
662
+ }
663
+
664
+ function _intOpt(opts, key, fallback) {
665
+ if (opts[key] === undefined || opts[key] === null) return fallback;
666
+ if (typeof opts[key] !== "number" || !isFinite(opts[key]) || opts[key] <= 0 || Math.floor(opts[key]) !== opts[key]) {
667
+ throw new SafeMimeError("safe-mime/bad-opt",
668
+ "safeMime.parse: opts." + key + " must be a positive finite integer (got " + opts[key] + ")");
669
+ }
670
+ return opts[key];
671
+ }
672
+
673
+ function _normalizeStringSet(arr) {
674
+ var set = Object.create(null);
675
+ for (var i = 0; i < arr.length; i += 1) {
676
+ set[String(arr[i]).toLowerCase()] = true;
677
+ }
678
+ return set;
679
+ }
680
+
681
+ function _normalizeCharsetName(c) {
682
+ var s = String(c).toLowerCase().trim();
683
+ if (s === "utf8") return "utf-8";
684
+ if (s === "ascii") return "us-ascii";
685
+ if (s === "latin1") return "iso-8859-1";
686
+ if (s === "cp1252") return "windows-1252";
687
+ if (s === "shift-jis") return "shift_jis";
688
+ return s;
689
+ }
690
+
691
+ function _previewBytes(line) {
692
+ if (typeof line !== "string") line = String(line);
693
+ return line.length > 64 ? line.slice(0, 64) + "..." : line; // allow:raw-byte-literal — log-preview length cap
694
+ }
695
+
696
+ module.exports = {
697
+ parse: parse,
698
+ walk: walk,
699
+ findFirst: findFirst,
700
+ extractText: extractText,
701
+ extractAttachments: extractAttachments,
702
+ SafeMimeError: SafeMimeError,
703
+ DEFAULTS: Object.freeze({
704
+ maxParts: DEFAULT_MAX_PARTS,
705
+ maxNestingDepth: DEFAULT_MAX_NESTING_DEPTH,
706
+ maxBoundary: DEFAULT_MAX_BOUNDARY,
707
+ maxHeaderBytes: DEFAULT_MAX_HEADER_BYTES,
708
+ maxHeaderLineBytes: DEFAULT_MAX_HEADER_LINE,
709
+ maxBodyBytes: DEFAULT_MAX_BODY_BYTES,
710
+ maxMessageBytes: DEFAULT_MAX_MESSAGE_BYTES,
711
+ charsetAllowlist: DEFAULT_CHARSETS,
712
+ transferEncodingAllowlist: DEFAULT_TRANSFER_ENCODINGS,
713
+ }),
714
+ };