@bobfrankston/rmfmail 1.1.123 → 1.1.125
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/client/app.bundle.js +6 -2
- package/client/app.bundle.js.map +2 -2
- package/client/app.js +7 -1
- package/client/app.js.map +1 -1
- package/client/app.ts +10 -1
- package/client/components/message-viewer.js +7 -1
- package/client/components/message-viewer.js.map +1 -1
- package/client/components/message-viewer.ts +7 -1
- package/package.json +3 -3
- package/packages/mailx-imap/index.d.ts.map +1 -1
- package/packages/mailx-imap/index.js +43 -4
- package/packages/mailx-imap/index.js.map +1 -1
- package/packages/mailx-imap/index.ts +41 -4
- package/packages/mailx-imap/package-lock.json +2 -2
- package/packages/mailx-imap/package.json +1 -1
- package/packages/mailx-store/charset.d.ts +23 -9
- package/packages/mailx-store/charset.d.ts.map +1 -1
- package/packages/mailx-store/charset.js +47 -25
- package/packages/mailx-store/charset.js.map +1 -1
- package/packages/mailx-store/charset.ts +51 -23
- package/packages/mailx-store/index.d.ts +1 -0
- package/packages/mailx-store/index.d.ts.map +1 -1
- package/packages/mailx-store/index.js +1 -0
- package/packages/mailx-store/index.js.map +1 -1
- package/packages/mailx-store/index.ts +1 -0
- package/packages/mailx-store/package.json +1 -1
- /package/packages/mailx-imap/{node_modules.npmglobalize-stash-65548 → node_modules.npmglobalize-stash-30208}/.package-lock.json +0 -0
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
import { createAutoImapConfig, CompatImapClient } from "@bobfrankston/iflow-direct";
|
|
8
8
|
import type { TransportFactory } from "@bobfrankston/tcp-transport";
|
|
9
9
|
import { authenticateOAuth } from "@bobfrankston/oauthsupport";
|
|
10
|
-
import { MailxDB, FileMessageStore, parseSerial, storeBus, Store } from "@bobfrankston/mailx-store";
|
|
10
|
+
import { MailxDB, FileMessageStore, parseSerial, fixCharsetDeclString, storeBus, Store } from "@bobfrankston/mailx-store";
|
|
11
11
|
import { loadSettings, getStorePath, getConfigDir, getHistoryDays, getPrefetch } from "@bobfrankston/mailx-settings";
|
|
12
12
|
import type { AccountConfig, MessageEnvelope, EmailAddress, Folder } from "@bobfrankston/mailx-types";
|
|
13
13
|
import { EventEmitter } from "node:events";
|
|
@@ -175,7 +175,11 @@ function decodeEntities(text: string): string {
|
|
|
175
175
|
/** Extract a plain-text preview from message source */
|
|
176
176
|
async function extractPreview(source: string): Promise<{ bodyHtml: string; bodyText: string; preview: string; hasAttachments: boolean }> {
|
|
177
177
|
try {
|
|
178
|
-
|
|
178
|
+
// Normalize iso-8859-1 → windows-1252 in the part charset declaration
|
|
179
|
+
// before parsing, so the preview decodes 0x80-0x9F (smart quotes,
|
|
180
|
+
// em-dash) correctly — same correction the viewer path gets via
|
|
181
|
+
// sniffAndFixCharset (Bob 2026-05-22).
|
|
182
|
+
const parsed = await parseSerial(fixCharsetDeclString(source), "background");
|
|
179
183
|
const bodyText = parsed.text || "";
|
|
180
184
|
const bodyHtml = parsed.html || "";
|
|
181
185
|
// Use text part; fall back to stripping HTML tags if text is empty
|
|
@@ -3201,6 +3205,15 @@ export class ImapManager extends EventEmitter {
|
|
|
3201
3205
|
}
|
|
3202
3206
|
continue;
|
|
3203
3207
|
}
|
|
3208
|
+
// Success — clear the in-flight delete/move
|
|
3209
|
+
// suppression (see the IMAP branch below for the
|
|
3210
|
+
// full rationale). After a move the local row lives
|
|
3211
|
+
// at the TARGET folder, so the (uid, folder) lookup
|
|
3212
|
+
// uses targetFolderId.
|
|
3213
|
+
if (action.action === "move") {
|
|
3214
|
+
this.db.clearTombstoneForUid(accountId, action.uid, action.targetFolderId);
|
|
3215
|
+
this.db.clearMessagePendingDelete(accountId, action.uid, action.targetFolderId);
|
|
3216
|
+
}
|
|
3204
3217
|
this.db.completeSyncAction(action.id);
|
|
3205
3218
|
} catch (e: any) {
|
|
3206
3219
|
console.error(` [api] ${accountId}: flag sync failed UID ${action.uid}: ${e.message}`);
|
|
@@ -3209,7 +3222,10 @@ export class ImapManager extends EventEmitter {
|
|
|
3209
3222
|
// Terminal failure on delete/move → clear tombstone
|
|
3210
3223
|
// so the row reappears on next sync (server still
|
|
3211
3224
|
// has it). Same rationale as the IMAP branch below.
|
|
3212
|
-
if (action.action === "
|
|
3225
|
+
if (action.action === "move") {
|
|
3226
|
+
this.db.clearTombstoneForUid(accountId, action.uid, action.targetFolderId);
|
|
3227
|
+
this.db.clearMessagePendingDelete(accountId, action.uid, action.targetFolderId);
|
|
3228
|
+
} else if (action.action === "delete") {
|
|
3213
3229
|
this.db.clearTombstoneForUid(accountId, action.uid, action.folderId);
|
|
3214
3230
|
}
|
|
3215
3231
|
this.db.completeSyncAction(action.id);
|
|
@@ -3283,6 +3299,22 @@ export class ImapManager extends EventEmitter {
|
|
|
3283
3299
|
break;
|
|
3284
3300
|
}
|
|
3285
3301
|
}
|
|
3302
|
+
// Success: the local action reached the server. Lift the
|
|
3303
|
+
// in-flight delete/move suppression so the destination
|
|
3304
|
+
// folder syncs the moved message under its real
|
|
3305
|
+
// post-move UID. hasTombstone's documented contract is
|
|
3306
|
+
// "cleared on successful action complete OR permanent
|
|
3307
|
+
// failure" — only the failure half had been wired, so a
|
|
3308
|
+
// successful move left a Message-ID tombstone alive for
|
|
3309
|
+
// 30 days, blocking the Trash folder from ever storing
|
|
3310
|
+
// the message (and resurrecting it once the tombstone
|
|
3311
|
+
// finally aged out). After a move the local row lives at
|
|
3312
|
+
// the TARGET folder, so the (uid, folder) lookup uses
|
|
3313
|
+
// targetFolderId.
|
|
3314
|
+
if (action.action === "move") {
|
|
3315
|
+
this.db.clearTombstoneForUid(accountId, action.uid, action.targetFolderId);
|
|
3316
|
+
this.db.clearMessagePendingDelete(accountId, action.uid, action.targetFolderId);
|
|
3317
|
+
}
|
|
3286
3318
|
this.db.completeSyncAction(action.id);
|
|
3287
3319
|
} catch (e: any) {
|
|
3288
3320
|
console.error(` [sync] Failed action ${action.action} UID ${action.uid}: ${e.message}`);
|
|
@@ -3298,7 +3330,12 @@ export class ImapManager extends EventEmitter {
|
|
|
3298
3330
|
// reflecting "your action didn't take, here it is
|
|
3299
3331
|
// again." Applies to delete + move; flags/append
|
|
3300
3332
|
// never tombstone.
|
|
3301
|
-
if (action.action === "
|
|
3333
|
+
if (action.action === "move") {
|
|
3334
|
+
// Local row is at the TARGET folder (the local
|
|
3335
|
+
// move committed; only the server move failed).
|
|
3336
|
+
this.db.clearTombstoneForUid(accountId, action.uid, action.targetFolderId);
|
|
3337
|
+
this.db.clearMessagePendingDelete(accountId, action.uid, action.targetFolderId);
|
|
3338
|
+
} else if (action.action === "delete") {
|
|
3302
3339
|
this.db.clearTombstoneForUid(accountId, action.uid, action.folderId);
|
|
3303
3340
|
}
|
|
3304
3341
|
this.db.completeSyncAction(action.id);
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bobfrankston/mailx-imap",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.57",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "@bobfrankston/mailx-imap",
|
|
9
|
-
"version": "0.1.
|
|
9
|
+
"version": "0.1.57",
|
|
10
10
|
"license": "ISC",
|
|
11
11
|
"dependencies": {
|
|
12
12
|
"@bobfrankston/iflow-direct": "^0.1.27",
|
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Charset normalization for incoming email bodies.
|
|
2
|
+
* Charset normalization for incoming email bodies. Two corrections, both
|
|
3
|
+
* applied by rewriting the part's `charset=` declaration before the parser
|
|
4
|
+
* decodes it:
|
|
3
5
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
6
|
+
* 1. Mis-declared UTF-8: PHPMailer-style senders declare `iso-8859-1`
|
|
7
|
+
* (the PHP default) but emit UTF-8 bytes. When the raw body really is
|
|
8
|
+
* valid UTF-8, rewrite the declaration to utf-8.
|
|
9
|
+
*
|
|
10
|
+
* 2. iso-8859-1 → windows-1252: senders that genuinely mean a legacy
|
|
11
|
+
* 8-bit charset overwhelmingly emit Windows-1252, not strict
|
|
12
|
+
* ISO-8859-1 — smart quotes / em-dash / euro live in 0x80-0x9F, which
|
|
13
|
+
* ISO-8859-1 leaves as unusable C1 control codes. The WHATWG / browser
|
|
14
|
+
* standard is to decode `iso-8859-1` AS `windows-1252`; iconv-lite
|
|
15
|
+
* does not, so we rewrite the declaration. windows-1252 is a strict
|
|
16
|
+
* superset of printable ISO-8859-1, so a genuine ISO-8859-1 char is
|
|
17
|
+
* never lost.
|
|
11
18
|
*/
|
|
12
19
|
/** Returns either the original buffer (no change needed) or a copy with
|
|
13
|
-
* the leading charset declaration
|
|
20
|
+
* the leading charset declaration corrected. */
|
|
14
21
|
export declare function sniffAndFixCharset(raw: Buffer): Buffer;
|
|
22
|
+
/** String form of sniffAndFixCharset — for the preview path, which parses
|
|
23
|
+
* the message as a string (extractPreview). Same two corrections, applied
|
|
24
|
+
* to the charset declaration in the text. The UTF-8-misdeclare correction
|
|
25
|
+
* can't be done on a string (the bytes are already decoded), so this only
|
|
26
|
+
* does correction 2 (iso-8859-1 → windows-1252), which is a pure
|
|
27
|
+
* declaration rewrite and the common case for previews. */
|
|
28
|
+
export declare function fixCharsetDeclString(s: string): string;
|
|
15
29
|
//# sourceMappingURL=charset.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"charset.d.ts","sourceRoot":"","sources":["charset.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"charset.d.ts","sourceRoot":"","sources":["charset.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH;iDACiD;AACjD,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CA4BtD;AAED;;;;;4DAK4D;AAC5D,wBAAgB,oBAAoB,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAEtD"}
|
|
@@ -1,36 +1,58 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Charset normalization for incoming email bodies.
|
|
2
|
+
* Charset normalization for incoming email bodies. Two corrections, both
|
|
3
|
+
* applied by rewriting the part's `charset=` declaration before the parser
|
|
4
|
+
* decodes it:
|
|
3
5
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
6
|
+
* 1. Mis-declared UTF-8: PHPMailer-style senders declare `iso-8859-1`
|
|
7
|
+
* (the PHP default) but emit UTF-8 bytes. When the raw body really is
|
|
8
|
+
* valid UTF-8, rewrite the declaration to utf-8.
|
|
9
|
+
*
|
|
10
|
+
* 2. iso-8859-1 → windows-1252: senders that genuinely mean a legacy
|
|
11
|
+
* 8-bit charset overwhelmingly emit Windows-1252, not strict
|
|
12
|
+
* ISO-8859-1 — smart quotes / em-dash / euro live in 0x80-0x9F, which
|
|
13
|
+
* ISO-8859-1 leaves as unusable C1 control codes. The WHATWG / browser
|
|
14
|
+
* standard is to decode `iso-8859-1` AS `windows-1252`; iconv-lite
|
|
15
|
+
* does not, so we rewrite the declaration. windows-1252 is a strict
|
|
16
|
+
* superset of printable ISO-8859-1, so a genuine ISO-8859-1 char is
|
|
17
|
+
* never lost.
|
|
11
18
|
*/
|
|
12
19
|
/** Returns either the original buffer (no change needed) or a copy with
|
|
13
|
-
* the leading charset declaration
|
|
20
|
+
* the leading charset declaration corrected. */
|
|
14
21
|
export function sniffAndFixCharset(raw) {
|
|
15
22
|
const HEAD_LIMIT = 16384;
|
|
16
23
|
const head = raw.subarray(0, Math.min(HEAD_LIMIT, raw.length)).toString("latin1");
|
|
17
|
-
const
|
|
18
|
-
if (!
|
|
19
|
-
return raw;
|
|
20
|
-
// The rewrite is only sound when the raw bytes ARE the body bytes — i.e.
|
|
21
|
-
// an 8bit / binary part. For a quoted-printable or base64 part the raw
|
|
22
|
-
// .eml is pure ASCII (the high bytes live inside `=XX` / base64 chars),
|
|
23
|
-
// so isValidUtf8(raw) passes vacuously and we would relabel a genuine
|
|
24
|
-
// Windows-1252 part as utf-8 — every smart-quote / em-dash then decodes
|
|
25
|
-
// as mojibake (Bob's 2026-05-21 report: a QP Windows-1252 Outlook mail).
|
|
26
|
-
// Requiring an actual non-ASCII byte in the raw gates the heuristic to
|
|
27
|
-
// the only case where the UTF-8 sniff is meaningful.
|
|
28
|
-
if (!hasNonAscii(raw))
|
|
29
|
-
return raw;
|
|
30
|
-
if (!isValidUtf8(raw))
|
|
24
|
+
const legacyRe = /charset\s*=\s*"?(iso-8859-1|us-ascii|windows-1252|latin1)"?/gi;
|
|
25
|
+
if (!legacyRe.test(head))
|
|
31
26
|
return raw;
|
|
32
|
-
|
|
33
|
-
|
|
27
|
+
// Correction 1 — mis-declared UTF-8. Only sound when the raw bytes ARE
|
|
28
|
+
// the body bytes (an 8bit part): for a quoted-printable / base64 part
|
|
29
|
+
// the raw .eml is pure ASCII, so isValidUtf8 passes vacuously and we'd
|
|
30
|
+
// wrongly relabel a real Windows-1252 part as utf-8 (Bob 2026-05-21).
|
|
31
|
+
// Requiring a real non-ASCII byte gates the UTF-8 sniff to where it
|
|
32
|
+
// means something.
|
|
33
|
+
if (hasNonAscii(raw) && isValidUtf8(raw)) {
|
|
34
|
+
const fixed = head.replace(legacyRe, "charset=utf-8");
|
|
35
|
+
return Buffer.concat([Buffer.from(fixed, "latin1"), raw.subarray(head.length)]);
|
|
36
|
+
}
|
|
37
|
+
// Correction 2 — iso-8859-1 / latin1 → windows-1252. Applies whether
|
|
38
|
+
// the part is QP or 8bit; the rewrite is purely on the declaration.
|
|
39
|
+
// (Bob 2026-05-22: an Intuit mail declared ISO-8859-1, QP-encoded a
|
|
40
|
+
// 0x92 apostrophe → latin1 decode produced a U+0092 control char.)
|
|
41
|
+
const isoRe = /charset\s*=\s*"?(iso-8859-1|latin1)"?/gi;
|
|
42
|
+
if (isoRe.test(head)) {
|
|
43
|
+
const fixed = head.replace(isoRe, "charset=windows-1252");
|
|
44
|
+
return Buffer.concat([Buffer.from(fixed, "latin1"), raw.subarray(head.length)]);
|
|
45
|
+
}
|
|
46
|
+
return raw;
|
|
47
|
+
}
|
|
48
|
+
/** String form of sniffAndFixCharset — for the preview path, which parses
|
|
49
|
+
* the message as a string (extractPreview). Same two corrections, applied
|
|
50
|
+
* to the charset declaration in the text. The UTF-8-misdeclare correction
|
|
51
|
+
* can't be done on a string (the bytes are already decoded), so this only
|
|
52
|
+
* does correction 2 (iso-8859-1 → windows-1252), which is a pure
|
|
53
|
+
* declaration rewrite and the common case for previews. */
|
|
54
|
+
export function fixCharsetDeclString(s) {
|
|
55
|
+
return s.replace(/charset\s*=\s*"?(iso-8859-1|latin1)"?/gi, "charset=windows-1252");
|
|
34
56
|
}
|
|
35
57
|
/** True if the buffer contains at least one byte >= 0x80. A pure-ASCII
|
|
36
58
|
* buffer is trivially valid UTF-8, so the isValidUtf8 sniff tells us
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"charset.js","sourceRoot":"","sources":["charset.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"charset.js","sourceRoot":"","sources":["charset.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH;iDACiD;AACjD,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC1C,MAAM,UAAU,GAAG,KAAK,CAAC;IACzB,MAAM,IAAI,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAClF,MAAM,QAAQ,GAAG,+DAA+D,CAAC;IACjF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,GAAG,CAAC;IAErC,uEAAuE;IACvE,sEAAsE;IACtE,uEAAuE;IACvE,sEAAsE;IACtE,oEAAoE;IACpE,mBAAmB;IACnB,IAAI,WAAW,CAAC,GAAG,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,eAAe,CAAC,CAAC;QACtD,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IACpF,CAAC;IAED,qEAAqE;IACrE,oEAAoE;IACpE,oEAAoE;IACpE,mEAAmE;IACnE,MAAM,KAAK,GAAG,yCAAyC,CAAC;IACxD,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,sBAAsB,CAAC,CAAC;QAC1D,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IACpF,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC;AAED;;;;;4DAK4D;AAC5D,MAAM,UAAU,oBAAoB,CAAC,CAAS;IAC1C,OAAO,CAAC,CAAC,OAAO,CAAC,yCAAyC,EAAE,sBAAsB,CAAC,CAAC;AACxF,CAAC;AAED;;2EAE2E;AAC3E,SAAS,WAAW,CAAC,GAAW;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI;YAAE,OAAO,IAAI,CAAC;IACpC,CAAC;IACD,OAAO,KAAK,CAAC;AACjB,CAAC;AAED;;qDAEqD;AACrD,SAAS,WAAW,CAAC,GAAW;IAC5B,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACjB,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC;YAAC,CAAC,EAAE,CAAC;YAAC,SAAS;QAAC,CAAC;QAChC,IAAI,IAAY,CAAC;QACjB,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;YAAC,IAAI,CAAC,GAAG,IAAI;gBAAE,OAAO,KAAK,CAAC;YAAC,IAAI,GAAG,CAAC,CAAC;QAAC,CAAC;aAC7D,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,IAAI;YAAE,IAAI,GAAG,CAAC,CAAC;aAClC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;YAAC,IAAI,CAAC,GAAG,IAAI;gBAAE,OAAO,KAAK,CAAC;YAAC,IAAI,GAAG,CAAC,CAAC;QAAC,CAAC;;YAClE,OAAO,KAAK,CAAC;QAClB,IAAI,CAAC,GAAG,IAAI,IAAI,GAAG,CAAC,MAAM;YAAE,OAAO,KAAK,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7B,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,IAAI;gBAAE,OAAO,KAAK,CAAC;QACnD,CAAC;QACD,CAAC,IAAI,IAAI,GAAG,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC"}
|
|
@@ -1,34 +1,62 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Charset normalization for incoming email bodies.
|
|
2
|
+
* Charset normalization for incoming email bodies. Two corrections, both
|
|
3
|
+
* applied by rewriting the part's `charset=` declaration before the parser
|
|
4
|
+
* decodes it:
|
|
3
5
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
6
|
+
* 1. Mis-declared UTF-8: PHPMailer-style senders declare `iso-8859-1`
|
|
7
|
+
* (the PHP default) but emit UTF-8 bytes. When the raw body really is
|
|
8
|
+
* valid UTF-8, rewrite the declaration to utf-8.
|
|
9
|
+
*
|
|
10
|
+
* 2. iso-8859-1 → windows-1252: senders that genuinely mean a legacy
|
|
11
|
+
* 8-bit charset overwhelmingly emit Windows-1252, not strict
|
|
12
|
+
* ISO-8859-1 — smart quotes / em-dash / euro live in 0x80-0x9F, which
|
|
13
|
+
* ISO-8859-1 leaves as unusable C1 control codes. The WHATWG / browser
|
|
14
|
+
* standard is to decode `iso-8859-1` AS `windows-1252`; iconv-lite
|
|
15
|
+
* does not, so we rewrite the declaration. windows-1252 is a strict
|
|
16
|
+
* superset of printable ISO-8859-1, so a genuine ISO-8859-1 char is
|
|
17
|
+
* never lost.
|
|
11
18
|
*/
|
|
12
19
|
|
|
13
20
|
/** Returns either the original buffer (no change needed) or a copy with
|
|
14
|
-
* the leading charset declaration
|
|
21
|
+
* the leading charset declaration corrected. */
|
|
15
22
|
export function sniffAndFixCharset(raw: Buffer): Buffer {
|
|
16
23
|
const HEAD_LIMIT = 16384;
|
|
17
24
|
const head = raw.subarray(0, Math.min(HEAD_LIMIT, raw.length)).toString("latin1");
|
|
18
|
-
const
|
|
19
|
-
if (!
|
|
20
|
-
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
// so isValidUtf8
|
|
24
|
-
// Windows-1252 part as utf-8
|
|
25
|
-
//
|
|
26
|
-
//
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
25
|
+
const legacyRe = /charset\s*=\s*"?(iso-8859-1|us-ascii|windows-1252|latin1)"?/gi;
|
|
26
|
+
if (!legacyRe.test(head)) return raw;
|
|
27
|
+
|
|
28
|
+
// Correction 1 — mis-declared UTF-8. Only sound when the raw bytes ARE
|
|
29
|
+
// the body bytes (an 8bit part): for a quoted-printable / base64 part
|
|
30
|
+
// the raw .eml is pure ASCII, so isValidUtf8 passes vacuously and we'd
|
|
31
|
+
// wrongly relabel a real Windows-1252 part as utf-8 (Bob 2026-05-21).
|
|
32
|
+
// Requiring a real non-ASCII byte gates the UTF-8 sniff to where it
|
|
33
|
+
// means something.
|
|
34
|
+
if (hasNonAscii(raw) && isValidUtf8(raw)) {
|
|
35
|
+
const fixed = head.replace(legacyRe, "charset=utf-8");
|
|
36
|
+
return Buffer.concat([Buffer.from(fixed, "latin1"), raw.subarray(head.length)]);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Correction 2 — iso-8859-1 / latin1 → windows-1252. Applies whether
|
|
40
|
+
// the part is QP or 8bit; the rewrite is purely on the declaration.
|
|
41
|
+
// (Bob 2026-05-22: an Intuit mail declared ISO-8859-1, QP-encoded a
|
|
42
|
+
// 0x92 apostrophe → latin1 decode produced a U+0092 control char.)
|
|
43
|
+
const isoRe = /charset\s*=\s*"?(iso-8859-1|latin1)"?/gi;
|
|
44
|
+
if (isoRe.test(head)) {
|
|
45
|
+
const fixed = head.replace(isoRe, "charset=windows-1252");
|
|
46
|
+
return Buffer.concat([Buffer.from(fixed, "latin1"), raw.subarray(head.length)]);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return raw;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** String form of sniffAndFixCharset — for the preview path, which parses
|
|
53
|
+
* the message as a string (extractPreview). Same two corrections, applied
|
|
54
|
+
* to the charset declaration in the text. The UTF-8-misdeclare correction
|
|
55
|
+
* can't be done on a string (the bytes are already decoded), so this only
|
|
56
|
+
* does correction 2 (iso-8859-1 → windows-1252), which is a pure
|
|
57
|
+
* declaration rewrite and the common case for previews. */
|
|
58
|
+
export function fixCharsetDeclString(s: string): string {
|
|
59
|
+
return s.replace(/charset\s*=\s*"?(iso-8859-1|latin1)"?/gi, "charset=windows-1252");
|
|
32
60
|
}
|
|
33
61
|
|
|
34
62
|
/** True if the buffer contains at least one byte >= 0x80. A pure-ASCII
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
export { MailxDB } from "./db.js";
|
|
6
6
|
export { FileMessageStore } from "./file-store.js";
|
|
7
7
|
export { parseSerial, prewarmParseWorker } from "./parse-serial.js";
|
|
8
|
+
export { sniffAndFixCharset, fixCharsetDeclString } from "./charset.js";
|
|
8
9
|
export { Store } from "./store.js";
|
|
9
10
|
export type { StoreMessage } from "./store.js";
|
|
10
11
|
export { StoreBus, storeBus } from "@bobfrankston/mailx-bus";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,MAAM,cAAc,CAAC;AAGxE,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,YAAY,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAM/C,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AAC7D,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC"}
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
export { MailxDB } from "./db.js";
|
|
6
6
|
export { FileMessageStore } from "./file-store.js";
|
|
7
7
|
export { parseSerial, prewarmParseWorker } from "./parse-serial.js";
|
|
8
|
+
export { sniffAndFixCharset, fixCharsetDeclString } from "./charset.js";
|
|
8
9
|
// Store — the nexus. Owns DB + .eml files + operations + bus.
|
|
9
10
|
export { Store } from "./store.js";
|
|
10
11
|
// Store-event bus lives in `@bobfrankston/mailx-bus` so the browser-side
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,MAAM,cAAc,CAAC;AAExE,8DAA8D;AAC9D,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAGnC,yEAAyE;AACzE,0EAA0E;AAC1E,yEAAyE;AACzE,uBAAuB;AACvB,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC"}
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
export { MailxDB } from "./db.js";
|
|
7
7
|
export { FileMessageStore } from "./file-store.js";
|
|
8
8
|
export { parseSerial, prewarmParseWorker } from "./parse-serial.js";
|
|
9
|
+
export { sniffAndFixCharset, fixCharsetDeclString } from "./charset.js";
|
|
9
10
|
|
|
10
11
|
// Store — the nexus. Owns DB + .eml files + operations + bus.
|
|
11
12
|
export { Store } from "./store.js";
|