email-origin-chain 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +425 -0
- package/dist/detectors/crisp-detector.d.ts +11 -0
- package/dist/detectors/crisp-detector.js +46 -0
- package/dist/detectors/index.d.ts +5 -0
- package/dist/detectors/index.js +11 -0
- package/dist/detectors/new-outlook-detector.d.ts +10 -0
- package/dist/detectors/new-outlook-detector.js +112 -0
- package/dist/detectors/outlook-empty-header-detector.d.ts +16 -0
- package/dist/detectors/outlook-empty-header-detector.js +64 -0
- package/dist/detectors/outlook-fr-detector.d.ts +10 -0
- package/dist/detectors/outlook-fr-detector.js +119 -0
- package/dist/detectors/outlook-reverse-fr-detector.d.ts +13 -0
- package/dist/detectors/outlook-reverse-fr-detector.js +86 -0
- package/dist/detectors/registry.d.ts +25 -0
- package/dist/detectors/registry.js +81 -0
- package/dist/detectors/reply-detector.d.ts +11 -0
- package/dist/detectors/reply-detector.js +82 -0
- package/dist/detectors/types.d.ts +38 -0
- package/dist/detectors/types.js +2 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +132 -0
- package/dist/inline-layer.d.ts +7 -0
- package/dist/inline-layer.js +116 -0
- package/dist/mime-layer.d.ts +15 -0
- package/dist/mime-layer.js +70 -0
- package/dist/types.d.ts +63 -0
- package/dist/types.js +2 -0
- package/dist/utils/cleaner.d.ts +16 -0
- package/dist/utils/cleaner.js +51 -0
- package/dist/utils.d.ts +17 -0
- package/dist/utils.js +221 -0
- package/docs/TEST_COVERAGE.md +54 -0
- package/docs/architecture/README.md +27 -0
- package/docs/architecture/phase1_cc_fix.md +223 -0
- package/docs/architecture/phase2_plugin_foundation.md +185 -0
- package/docs/architecture/phase3_fallbacks.md +62 -0
- package/docs/architecture/plugin_plan.md +318 -0
- package/docs/architecture/refactor_report.md +98 -0
- package/docs/detectors_usage.md +42 -0
- package/docs/walkthrough_address_fix.md +58 -0
- package/docs/walkthrough_deep_forward_fix.md +35 -0
- package/package.json +48 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
exports.extractDeepestHybrid = extractDeepestHybrid;
|
|
18
|
+
const mime_layer_1 = require("./mime-layer");
|
|
19
|
+
const inline_layer_1 = require("./inline-layer");
|
|
20
|
+
const utils_1 = require("./utils");
|
|
21
|
+
/**
|
|
22
|
+
* Main entry point: Extract the deepest forwarded email using hybrid strategy
|
|
23
|
+
*/
|
|
24
|
+
async function extractDeepestHybrid(raw, options) {
|
|
25
|
+
// Validation
|
|
26
|
+
if (typeof raw !== 'string') {
|
|
27
|
+
throw new Error('Input must be a string');
|
|
28
|
+
}
|
|
29
|
+
const opts = {
|
|
30
|
+
maxDepth: options?.maxDepth ?? 15,
|
|
31
|
+
timeoutMs: options?.timeoutMs ?? 10000,
|
|
32
|
+
skipMimeLayer: options?.skipMimeLayer ?? false,
|
|
33
|
+
customDetectors: options?.customDetectors ?? []
|
|
34
|
+
};
|
|
35
|
+
const warnings = [];
|
|
36
|
+
// If skipMimeLayer is true, parse only inline forwards (text-only mode)
|
|
37
|
+
if (opts.skipMimeLayer) {
|
|
38
|
+
return await (0, inline_layer_1.processInline)(raw, 0, [], opts.customDetectors);
|
|
39
|
+
}
|
|
40
|
+
try {
|
|
41
|
+
// Step 1: MIME Layer
|
|
42
|
+
let timer;
|
|
43
|
+
const mimeResult = await Promise.race([
|
|
44
|
+
(0, mime_layer_1.processMime)(raw, opts),
|
|
45
|
+
new Promise((_, reject) => {
|
|
46
|
+
timer = setTimeout(() => reject(new Error('MIME parsing timeout')), opts.timeoutMs);
|
|
47
|
+
})
|
|
48
|
+
]).finally(() => {
|
|
49
|
+
if (timer)
|
|
50
|
+
clearTimeout(timer);
|
|
51
|
+
});
|
|
52
|
+
// Step 2: Inline Layer
|
|
53
|
+
const inlineResult = await (0, inline_layer_1.processInline)(mimeResult.rawBody, mimeResult.depth, mimeResult.history, opts.customDetectors);
|
|
54
|
+
// Step 3: Align results
|
|
55
|
+
let from = (0, utils_1.normalizeFrom)(inlineResult.from);
|
|
56
|
+
let subject = inlineResult.subject;
|
|
57
|
+
let date_raw = inlineResult.date_raw;
|
|
58
|
+
let date_iso = inlineResult.date_iso;
|
|
59
|
+
let text = inlineResult.text;
|
|
60
|
+
if (inlineResult.diagnostics.method === 'fallback' && mimeResult.metadata) {
|
|
61
|
+
const m = mimeResult.metadata;
|
|
62
|
+
if (!from && m.from?.value?.[0]) {
|
|
63
|
+
from = (0, utils_1.normalizeFrom)({ name: m.from.value[0].name, address: m.from.value[0].address });
|
|
64
|
+
}
|
|
65
|
+
if (!subject && m.subject)
|
|
66
|
+
subject = m.subject;
|
|
67
|
+
if (!date_iso && m.date)
|
|
68
|
+
date_iso = m.date.toISOString();
|
|
69
|
+
if (!date_raw && m.date)
|
|
70
|
+
date_raw = m.date.toString();
|
|
71
|
+
if (!text)
|
|
72
|
+
text = mimeResult.rawBody;
|
|
73
|
+
}
|
|
74
|
+
// Align the root entry of history
|
|
75
|
+
if (inlineResult.history.length > 0) {
|
|
76
|
+
const rootInHistory = inlineResult.history[inlineResult.history.length - 1];
|
|
77
|
+
if (!rootInHistory.from && mimeResult.metadata) {
|
|
78
|
+
const m = mimeResult.metadata;
|
|
79
|
+
if (m.from?.value?.[0]) {
|
|
80
|
+
rootInHistory.from = (0, utils_1.normalizeFrom)({ name: m.from.value[0].name, address: m.from.value[0].address });
|
|
81
|
+
}
|
|
82
|
+
if (m.subject)
|
|
83
|
+
rootInHistory.subject = m.subject;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// Step 4: Final enrichment
|
|
87
|
+
const attachments = mimeResult.lastAttachments.map(att => ({
|
|
88
|
+
filename: att.filename,
|
|
89
|
+
contentType: att.contentType || 'application/octet-stream',
|
|
90
|
+
size: att.size || 0
|
|
91
|
+
}));
|
|
92
|
+
date_iso = date_iso || (0, utils_1.normalizeDateToISO)(date_raw);
|
|
93
|
+
// Destructure to exclude 'from' since we have our own normalized version
|
|
94
|
+
const { from: _unusedFrom, ...restInlineResult } = inlineResult;
|
|
95
|
+
const result = {
|
|
96
|
+
...restInlineResult,
|
|
97
|
+
// Use our normalized/enriched values
|
|
98
|
+
from,
|
|
99
|
+
subject,
|
|
100
|
+
date_raw,
|
|
101
|
+
date_iso,
|
|
102
|
+
text: (0, utils_1.cleanText)(text),
|
|
103
|
+
attachments: [...attachments, ...inlineResult.attachments],
|
|
104
|
+
diagnostics: {
|
|
105
|
+
...inlineResult.diagnostics,
|
|
106
|
+
depth: mimeResult.depth + inlineResult.diagnostics.depth,
|
|
107
|
+
method: (inlineResult.diagnostics.method === 'fallback' && mimeResult.isRfc822) ? 'rfc822' : inlineResult.diagnostics.method,
|
|
108
|
+
parsedOk: !!(from && subject) || !!(from && inlineResult.diagnostics.method !== 'fallback'),
|
|
109
|
+
warnings: [...warnings, ...inlineResult.diagnostics.warnings]
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
return result;
|
|
113
|
+
}
|
|
114
|
+
catch (error) {
|
|
115
|
+
return {
|
|
116
|
+
from: null,
|
|
117
|
+
subject: null,
|
|
118
|
+
date_raw: null,
|
|
119
|
+
date_iso: null,
|
|
120
|
+
text: (0, utils_1.cleanText)(raw),
|
|
121
|
+
attachments: [],
|
|
122
|
+
history: [],
|
|
123
|
+
diagnostics: {
|
|
124
|
+
method: 'fallback',
|
|
125
|
+
depth: 0,
|
|
126
|
+
parsedOk: false,
|
|
127
|
+
warnings: [`Fatal error: ${error.message}`]
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
__exportStar(require("./types"), exports);
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { ResultObject, HistoryEntry } from './types';
|
|
2
|
+
import { ForwardDetector } from './detectors/types';
|
|
3
|
+
/**
|
|
4
|
+
* Process inline forwarded content recursively.
|
|
5
|
+
* Uses a manual loop with DetectorRegistry to allow multiple strategies (lib, custom regexes, etc.)
|
|
6
|
+
*/
|
|
7
|
+
export declare function processInline(text: string, depth: number, baseHistory?: HistoryEntry[], customDetectors?: ForwardDetector[]): Promise<ResultObject>;
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.processInline = processInline;
|
|
4
|
+
const detectors_1 = require("./detectors");
|
|
5
|
+
const utils_1 = require("./utils");
|
|
6
|
+
/**
|
|
7
|
+
* Process inline forwarded content recursively.
|
|
8
|
+
* Uses a manual loop with DetectorRegistry to allow multiple strategies (lib, custom regexes, etc.)
|
|
9
|
+
*/
|
|
10
|
+
async function processInline(text, depth, baseHistory = [], customDetectors = []) {
|
|
11
|
+
const warnings = [];
|
|
12
|
+
const registry = new detectors_1.DetectorRegistry(customDetectors);
|
|
13
|
+
const history = [...baseHistory];
|
|
14
|
+
let currentText = text.trim();
|
|
15
|
+
const startingDepth = depth;
|
|
16
|
+
let currentDepth = depth;
|
|
17
|
+
const maxRecursiveDepth = 15; // Increased for deep chains
|
|
18
|
+
// Ensure we have at least one entry representing the "current" starting point
|
|
19
|
+
if (history.length === 0) {
|
|
20
|
+
history.push({
|
|
21
|
+
from: null,
|
|
22
|
+
subject: null,
|
|
23
|
+
date_raw: null,
|
|
24
|
+
date_iso: null,
|
|
25
|
+
text: '',
|
|
26
|
+
depth: currentDepth,
|
|
27
|
+
flags: ['level:root', 'trust:medium_inline']
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
// Detection loop: This allows combining the library (CrispDetector)
|
|
31
|
+
// with custom local detectors (OutlookFRDetector, etc.)
|
|
32
|
+
while (currentDepth < maxRecursiveDepth) {
|
|
33
|
+
const result = registry.detect(currentText);
|
|
34
|
+
if (!result.found || !result.email) {
|
|
35
|
+
// No more forwards detected
|
|
36
|
+
const lastIdx = history.length - 1;
|
|
37
|
+
history[lastIdx].text = (0, utils_1.cleanText)(currentText);
|
|
38
|
+
break;
|
|
39
|
+
}
|
|
40
|
+
const email = result.email;
|
|
41
|
+
// Update previous level's exclusive text
|
|
42
|
+
const previousIdx = history.length - 1;
|
|
43
|
+
history[previousIdx].text = (0, utils_1.cleanText)(result.message || '');
|
|
44
|
+
if (!history[previousIdx].text && !history[previousIdx].flags.includes('content:silent_forward')) {
|
|
45
|
+
history[previousIdx].flags.push('content:silent_forward');
|
|
46
|
+
}
|
|
47
|
+
// Build flags
|
|
48
|
+
const flags = [`method:${result.detector || 'unknown'}`, 'trust:medium_inline'];
|
|
49
|
+
if (!email.body || email.body.trim() === '') {
|
|
50
|
+
flags.push('content:silent_forward');
|
|
51
|
+
}
|
|
52
|
+
// Normalize date
|
|
53
|
+
const dateIso = (0, utils_1.normalizeDateToISO)(email.date);
|
|
54
|
+
if (email.date && !dateIso) {
|
|
55
|
+
warnings.push(`Could not normalize date: "${email.date}"`);
|
|
56
|
+
flags.push('date:unparseable');
|
|
57
|
+
}
|
|
58
|
+
// Normalize from address (fix patterns like "email [email]")
|
|
59
|
+
let fromNormalized = typeof email.from === 'object'
|
|
60
|
+
? { name: email.from.name, address: email.from.address }
|
|
61
|
+
: (email.from ? { address: email.from } : null);
|
|
62
|
+
fromNormalized = (0, utils_1.normalizeFrom)(fromNormalized);
|
|
63
|
+
// Add this forward level to history
|
|
64
|
+
history.push({
|
|
65
|
+
from: fromNormalized,
|
|
66
|
+
subject: email.subject || null,
|
|
67
|
+
date_raw: email.date || null,
|
|
68
|
+
date_iso: dateIso,
|
|
69
|
+
text: (0, utils_1.cleanText)(email.body || ''),
|
|
70
|
+
depth: currentDepth + 1,
|
|
71
|
+
flags: flags
|
|
72
|
+
});
|
|
73
|
+
// Continue with the body for next iteration
|
|
74
|
+
currentText = (email.body || '').trim();
|
|
75
|
+
currentDepth++;
|
|
76
|
+
}
|
|
77
|
+
// Mark the deepest entry
|
|
78
|
+
if (currentDepth > startingDepth) {
|
|
79
|
+
const deepestEntry = history[history.length - 1];
|
|
80
|
+
if (!deepestEntry.flags.includes('level:deepest')) {
|
|
81
|
+
deepestEntry.flags.push('level:deepest');
|
|
82
|
+
}
|
|
83
|
+
return {
|
|
84
|
+
from: deepestEntry.from,
|
|
85
|
+
subject: deepestEntry.subject,
|
|
86
|
+
date_raw: deepestEntry.date_raw,
|
|
87
|
+
date_iso: deepestEntry.date_iso,
|
|
88
|
+
text: deepestEntry.text,
|
|
89
|
+
attachments: [],
|
|
90
|
+
history: history.slice().reverse(),
|
|
91
|
+
diagnostics: {
|
|
92
|
+
method: (deepestEntry.flags.find(f => f.startsWith('method:')) || 'inline'),
|
|
93
|
+
depth: currentDepth - startingDepth,
|
|
94
|
+
parsedOk: true,
|
|
95
|
+
warnings: warnings
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
// No forwards found
|
|
100
|
+
const currentEntry = history[history.length - 1];
|
|
101
|
+
return {
|
|
102
|
+
from: currentEntry.from,
|
|
103
|
+
subject: currentEntry.subject,
|
|
104
|
+
date_raw: currentEntry.date_raw,
|
|
105
|
+
date_iso: currentEntry.date_iso,
|
|
106
|
+
text: currentEntry.text || (0, utils_1.cleanText)(currentText),
|
|
107
|
+
attachments: [],
|
|
108
|
+
history: history.slice().reverse(),
|
|
109
|
+
diagnostics: {
|
|
110
|
+
method: 'fallback',
|
|
111
|
+
depth: 0,
|
|
112
|
+
parsedOk: false,
|
|
113
|
+
warnings: warnings.length > 0 ? warnings : ['No forwarded content detected']
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { Attachment as MailparserAttachment } from 'mailparser';
|
|
2
|
+
import { Options, HistoryEntry } from './types';
|
|
3
|
+
export interface MimeResult {
|
|
4
|
+
rawBody: string;
|
|
5
|
+
depth: number;
|
|
6
|
+
lastAttachments: MailparserAttachment[];
|
|
7
|
+
isRfc822: boolean;
|
|
8
|
+
history: HistoryEntry[];
|
|
9
|
+
metadata?: {
|
|
10
|
+
from?: any;
|
|
11
|
+
subject?: string;
|
|
12
|
+
date?: Date;
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
export declare function processMime(raw: string, options: Options): Promise<MimeResult>;
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.processMime = processMime;
|
|
4
|
+
const mailparser_1 = require("mailparser");
|
|
5
|
+
async function processMime(raw, options) {
|
|
6
|
+
let currentRaw = raw;
|
|
7
|
+
let depth = 0;
|
|
8
|
+
const maxDepth = options.maxDepth || 5;
|
|
9
|
+
let lastAttachments = [];
|
|
10
|
+
let isRfc822 = false;
|
|
11
|
+
const history = [];
|
|
12
|
+
// Safety check
|
|
13
|
+
if (typeof raw !== 'string') {
|
|
14
|
+
throw new Error("MIME parser input must be a string");
|
|
15
|
+
}
|
|
16
|
+
// Iterative approach to avoid call stack limits, though recursion is also fine for depth < 100
|
|
17
|
+
while (depth < maxDepth) {
|
|
18
|
+
try {
|
|
19
|
+
const parsed = await (0, mailparser_1.simpleParser)(currentRaw);
|
|
20
|
+
// Record current level in history
|
|
21
|
+
history.push({
|
|
22
|
+
from: parsed.from?.value?.[0] ? {
|
|
23
|
+
name: parsed.from.value[0].name,
|
|
24
|
+
address: parsed.from.value[0].address
|
|
25
|
+
} : null,
|
|
26
|
+
subject: parsed.subject || null,
|
|
27
|
+
date_raw: parsed.date?.toString() || null,
|
|
28
|
+
date_iso: parsed.date ? parsed.date.toISOString() : null,
|
|
29
|
+
text: parsed.text || null, // Will be "exclusive" text once we know if there’s a forward inside
|
|
30
|
+
depth,
|
|
31
|
+
flags: ['trust:high_mime']
|
|
32
|
+
});
|
|
33
|
+
// Check for attached messages
|
|
34
|
+
const rfcParts = parsed.attachments.filter(a => a.contentType === 'message/rfc822');
|
|
35
|
+
if (rfcParts.length > 0) {
|
|
36
|
+
const last = rfcParts[rfcParts.length - 1];
|
|
37
|
+
if (last.content) {
|
|
38
|
+
currentRaw = last.content.toString('utf8');
|
|
39
|
+
depth++;
|
|
40
|
+
isRfc822 = true;
|
|
41
|
+
// Reset attachments for the new level
|
|
42
|
+
lastAttachments = [];
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
rawBody: parsed.text || currentRaw,
|
|
48
|
+
depth,
|
|
49
|
+
lastAttachments: parsed.attachments,
|
|
50
|
+
isRfc822,
|
|
51
|
+
history,
|
|
52
|
+
metadata: {
|
|
53
|
+
from: parsed.from,
|
|
54
|
+
subject: parsed.subject,
|
|
55
|
+
date: parsed.date
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
catch (error) {
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return {
|
|
64
|
+
rawBody: currentRaw,
|
|
65
|
+
depth,
|
|
66
|
+
lastAttachments,
|
|
67
|
+
isRfc822,
|
|
68
|
+
history
|
|
69
|
+
};
|
|
70
|
+
}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { ForwardDetector, DetectionResult } from './detectors/types';
|
|
2
|
+
export { ForwardDetector, DetectionResult };
|
|
3
|
+
export interface EmailAddress {
|
|
4
|
+
name?: string;
|
|
5
|
+
address?: string;
|
|
6
|
+
}
|
|
7
|
+
export interface Attachment {
|
|
8
|
+
filename?: string;
|
|
9
|
+
contentType: string;
|
|
10
|
+
size: number;
|
|
11
|
+
content?: any;
|
|
12
|
+
}
|
|
13
|
+
export interface Diagnostics {
|
|
14
|
+
method: 'rfc822' | 'inline' | 'fallback';
|
|
15
|
+
depth: number;
|
|
16
|
+
parsedOk: boolean;
|
|
17
|
+
warnings: string[];
|
|
18
|
+
}
|
|
19
|
+
export interface HistoryEntry {
|
|
20
|
+
from: EmailAddress | null;
|
|
21
|
+
subject: string | null;
|
|
22
|
+
date_raw: string | null;
|
|
23
|
+
date_iso: string | null;
|
|
24
|
+
text: string | null;
|
|
25
|
+
depth: number;
|
|
26
|
+
flags: string[];
|
|
27
|
+
}
|
|
28
|
+
export interface ResultObject {
|
|
29
|
+
from: EmailAddress | null;
|
|
30
|
+
subject: string | null;
|
|
31
|
+
date_raw: string | null;
|
|
32
|
+
date_iso: string | null;
|
|
33
|
+
text: string | null;
|
|
34
|
+
attachments: Attachment[];
|
|
35
|
+
history: HistoryEntry[];
|
|
36
|
+
diagnostics: Diagnostics;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Options for extraction behavior
|
|
40
|
+
*/
|
|
41
|
+
export interface Options {
|
|
42
|
+
/**
|
|
43
|
+
* Maximum depth to descend through MIME attachments.
|
|
44
|
+
* Default: 5
|
|
45
|
+
*/
|
|
46
|
+
maxDepth?: number;
|
|
47
|
+
/**
|
|
48
|
+
* Maximum time in milliseconds to wait for MIME parsing before timeout.
|
|
49
|
+
* Default: 5000ms
|
|
50
|
+
*/
|
|
51
|
+
timeoutMs?: number;
|
|
52
|
+
/**
|
|
53
|
+
* Skip MIME layer processing and parse only inline forwards.
|
|
54
|
+
* Use this when input is plain text body (not a full email with headers).
|
|
55
|
+
* Default: false
|
|
56
|
+
*/
|
|
57
|
+
skipMimeLayer?: boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Custom forward detectors to register.
|
|
60
|
+
* These will be added to the registry and used for detection.
|
|
61
|
+
*/
|
|
62
|
+
customDetectors?: ForwardDetector[];
|
|
63
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export declare class Cleaner {
|
|
2
|
+
private static normalizationCache;
|
|
3
|
+
/**
|
|
4
|
+
* Normalizes whitespace scories (BOM, nbsp, line breaks)
|
|
5
|
+
*/
|
|
6
|
+
static normalize(text: string): string;
|
|
7
|
+
/**
|
|
8
|
+
* Consistently strips quotes (>) and common Outlook leading indentation (4 spaces)
|
|
9
|
+
*/
|
|
10
|
+
static stripQuotes(text: string): string;
|
|
11
|
+
/**
|
|
12
|
+
* Robustly identifies the body after a header block by finding the
|
|
13
|
+
* first double-newline (or single if strict) after the last known header line.
|
|
14
|
+
*/
|
|
15
|
+
static extractBody(lines: string[], lastHeaderIndex: number): string;
|
|
16
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Cleaner = void 0;
|
|
4
|
+
class Cleaner {
|
|
5
|
+
/**
|
|
6
|
+
* Normalizes whitespace scories (BOM, nbsp, line breaks)
|
|
7
|
+
*/
|
|
8
|
+
static normalize(text) {
|
|
9
|
+
if (!text)
|
|
10
|
+
return '';
|
|
11
|
+
const cached = this.normalizationCache.get(text);
|
|
12
|
+
if (cached !== undefined)
|
|
13
|
+
return cached;
|
|
14
|
+
const normalized = text
|
|
15
|
+
.replace(/\r\n/gm, '\n')
|
|
16
|
+
.replace(/\uFEFF/gm, '')
|
|
17
|
+
.replace(/\u00A0$/gm, '')
|
|
18
|
+
.replace(/\u00A0/gm, ' ')
|
|
19
|
+
.trim();
|
|
20
|
+
if (this.normalizationCache.size > 200) {
|
|
21
|
+
this.normalizationCache.clear();
|
|
22
|
+
}
|
|
23
|
+
this.normalizationCache.set(text, normalized);
|
|
24
|
+
return normalized;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Consistently strips quotes (>) and common Outlook leading indentation (4 spaces)
|
|
28
|
+
*/
|
|
29
|
+
static stripQuotes(text) {
|
|
30
|
+
return text
|
|
31
|
+
.replace(/^(>+)\s?$/gm, '') // Empty quote lines
|
|
32
|
+
.replace(/^(>+)\s?/gm, '') // Quote lines with content
|
|
33
|
+
.replace(/^(\ {4})\s?/gm, ''); // 4 spaces indentation
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Robustly identifies the body after a header block by finding the
|
|
37
|
+
* first double-newline (or single if strict) after the last known header line.
|
|
38
|
+
*/
|
|
39
|
+
static extractBody(lines, lastHeaderIndex) {
|
|
40
|
+
// Crisp logic: looks for \n\n (start of next line being empty)
|
|
41
|
+
// following the last header.
|
|
42
|
+
let bodyStartIndex = lastHeaderIndex + 1;
|
|
43
|
+
// Skip any empty lines immediately following headers to find the real body start
|
|
44
|
+
while (bodyStartIndex < lines.length && lines[bodyStartIndex].trim() === '') {
|
|
45
|
+
bodyStartIndex++;
|
|
46
|
+
}
|
|
47
|
+
return lines.slice(bodyStartIndex).join('\n').trim();
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
exports.Cleaner = Cleaner;
|
|
51
|
+
Cleaner.normalizationCache = new Map();
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { ResultObject, EmailAddress } from './types';
|
|
2
|
+
export declare function normalizeDateToISO(dateRaw: string | Date | null | undefined): string | null;
|
|
3
|
+
export declare function cleanText(text: string | null | undefined): string | null;
|
|
4
|
+
/**
|
|
5
|
+
* Normalizes EmailAddress to fix edge cases like "email [email]" pattern
|
|
6
|
+
*
|
|
7
|
+
* Issue: Some email clients (Gmail, Outlook) produce formats like:
|
|
8
|
+
* "john.doe@example.com [john.doe@example.com]"
|
|
9
|
+
*
|
|
10
|
+
* email-forward-parser may parse this as:
|
|
11
|
+
* { name: "john.doe@example.com [john.doe@example.com]", address: "" }
|
|
12
|
+
*
|
|
13
|
+
* This function detects and fixes this pattern to:
|
|
14
|
+
* { name: null, address: "john.doe@example.com" }
|
|
15
|
+
*/
|
|
16
|
+
export declare function normalizeFrom(from: EmailAddress | null | undefined): EmailAddress | null;
|
|
17
|
+
export declare function normalizeParserResult(parsed: any, method: 'inline' | 'fallback', depth: number, warnings?: string[]): ResultObject;
|