arn-rawmime 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -1
- package/src/index.d.ts +1 -0
- package/src/index.js +2 -2
- package/src/utility/mailParser.d.ts +93 -0
- package/src/utility/mailParser.js +217 -0
package/package.json
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "arn-rawmime",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.2",
|
|
4
4
|
"description": "A lightweight, dependency-free raw MIME email builder with DKIM support.",
|
|
5
5
|
"author": "ARNDESK",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"main": "src/index.js",
|
|
8
8
|
"types": "src/index.d.ts",
|
|
9
|
+
"files": [
|
|
10
|
+
"src"
|
|
11
|
+
],
|
|
9
12
|
"directories": {
|
|
10
13
|
"test": "test"
|
|
11
14
|
},
|
|
@@ -20,6 +23,7 @@
|
|
|
20
23
|
"html-to-text": "^9.0.5",
|
|
21
24
|
"htmlparser2": "^8.0.2",
|
|
22
25
|
"leac": "^0.6.0",
|
|
26
|
+
"mailparser": "^3.9.0",
|
|
23
27
|
"marked": "^17.0.1",
|
|
24
28
|
"mime-types": "^3.0.2",
|
|
25
29
|
"parseley": "^0.12.1",
|
package/src/index.d.ts
CHANGED
package/src/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { DKIMSign } from "./dkim-signer.js";
|
|
2
2
|
import { processMarkDown } from "./processMarkDown.js";
|
|
3
3
|
import { MimeMessage } from "./rawmimeBuilder.js";
|
|
4
|
+
import { parseMail, randomizeDoublelistPostID } from "./utility/mailParser.js";
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
export { processMarkDown, MimeMessage, DKIMSign };
|
|
6
|
+
export { processMarkDown, MimeMessage, DKIMSign, parseMail, randomizeDoublelistPostID };
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
// src/utility/mailParser.d.ts
|
|
2
|
+
import type { ProcessMarkDownResult } from "../processMarkDown";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Generates a random numeric ID string for Doublelist posts.
|
|
6
|
+
* @param length - Length of the ID (default: 11)
|
|
7
|
+
* @returns A random numeric string starting with 1-9
|
|
8
|
+
*/
|
|
9
|
+
export function randomizeDoublelistPostID(length?: number): string;
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Input options for the parseMail function.
|
|
13
|
+
*/
|
|
14
|
+
export interface ParseMailOptions {
|
|
15
|
+
/** Raw email data (string or Buffer), can be Base64 encoded */
|
|
16
|
+
rawData: string | Buffer;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Parsed email data returned on successful parsing.
|
|
21
|
+
*/
|
|
22
|
+
export interface ParsedMailData {
|
|
23
|
+
/** Sender email address (lowercase) */
|
|
24
|
+
fromEmail: string | null;
|
|
25
|
+
/** Sender display name */
|
|
26
|
+
fromName: string;
|
|
27
|
+
/** Primary recipient email address (lowercase) */
|
|
28
|
+
toEmail: string | null;
|
|
29
|
+
/** Array of all recipient email addresses (deduplicated, lowercase) */
|
|
30
|
+
toEmailArray: string[];
|
|
31
|
+
/** Primary recipient display name */
|
|
32
|
+
toName: string;
|
|
33
|
+
/** Reply-to email address (lowercase) */
|
|
34
|
+
replyToEmail: string | null;
|
|
35
|
+
/** Reply-to display name */
|
|
36
|
+
replyToName: string;
|
|
37
|
+
/** Email subject */
|
|
38
|
+
subject: string;
|
|
39
|
+
/** Email date */
|
|
40
|
+
date: Date | null;
|
|
41
|
+
/** Message ID header */
|
|
42
|
+
messageId: string | null;
|
|
43
|
+
/** In-Reply-To header */
|
|
44
|
+
inReplyTo: string | null;
|
|
45
|
+
/** References header */
|
|
46
|
+
references: string | string[] | null;
|
|
47
|
+
/** Return-Path header (lowercase) */
|
|
48
|
+
returnPath: string | null;
|
|
49
|
+
|
|
50
|
+
/** HTML body with tracking images removed */
|
|
51
|
+
html: string;
|
|
52
|
+
/** Processed markdown result from processMarkDown */
|
|
53
|
+
markdownResult: ProcessMarkDownResult;
|
|
54
|
+
/** Plain text body */
|
|
55
|
+
text: string;
|
|
56
|
+
/** Number of attachments */
|
|
57
|
+
attachmentCount: number;
|
|
58
|
+
|
|
59
|
+
/** Original post ID extracted or generated */
|
|
60
|
+
originalPostId: string;
|
|
61
|
+
/** Randomly generated post ID */
|
|
62
|
+
randomPostId: string;
|
|
63
|
+
/** Post ID extracted from subject (if present) */
|
|
64
|
+
subjectPostId: string | null;
|
|
65
|
+
|
|
66
|
+
/** Envelope-to header (lowercase) */
|
|
67
|
+
envelopeTo: string;
|
|
68
|
+
/** Domain from envelope-to header */
|
|
69
|
+
envelopeToDomain: string | null;
|
|
70
|
+
/** Detected traffic source: 'dbr-w4m' | 'dbr-m4w' | 'other' */
|
|
71
|
+
trafficSource: "dbr-w4m" | "dbr-m4w" | "other";
|
|
72
|
+
/** Status, set to 'bounce' if email is a bounce notification */
|
|
73
|
+
status: "bounce" | undefined;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Result of the parseMail function.
|
|
78
|
+
*/
|
|
79
|
+
export interface ParseMailResult {
|
|
80
|
+
/** Parsed email data, null on error */
|
|
81
|
+
data: ParsedMailData | null;
|
|
82
|
+
/** Error object if parsing failed, null on success */
|
|
83
|
+
error: Error | null;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Parses raw email data (MIME format) and extracts structured information.
|
|
88
|
+
* Handles Base64 encoded input, bounce detection, and markdown processing.
|
|
89
|
+
*
|
|
90
|
+
* @param options - Parsing options containing the raw email data
|
|
91
|
+
* @returns Promise resolving to parsed mail data or error
|
|
92
|
+
*/
|
|
93
|
+
export function parseMail(options: ParseMailOptions): Promise<ParseMailResult>;
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
// src/utility/mailParser.js
|
|
2
|
+
import { processMarkDown } from "../processMarkDown.js";
|
|
3
|
+
import { simpleParser } from "mailparser";
|
|
4
|
+
|
|
5
|
+
// ============================================================================
|
|
6
|
+
// HELPER: Generate Random ID
|
|
7
|
+
// ============================================================================
|
|
8
|
+
export const randomizeDoublelistPostID = (length = 11) => {
|
|
9
|
+
const firstDigit = "123456789"; // No zero
|
|
10
|
+
const restDigits = "0123456789";
|
|
11
|
+
// 1. Pick the first digit (1-9)
|
|
12
|
+
let result = firstDigit.charAt(Math.floor(Math.random() * firstDigit.length));
|
|
13
|
+
// 2. Generate the rest, stopping 1 digit early
|
|
14
|
+
// If length is 11, this loop generates 9 more digits (Total: 10)
|
|
15
|
+
for (let i = 1; i < length - 1; i++) {
|
|
16
|
+
result += restDigits.charAt(Math.floor(Math.random() * restDigits.length));
|
|
17
|
+
}
|
|
18
|
+
return result;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
// ============================================================================
|
|
22
|
+
// HELPER: Detect Bounce Emails
|
|
23
|
+
// ============================================================================
|
|
24
|
+
const detectBounce = (subject, fromEmail, text, html, dsn) => {
|
|
25
|
+
let isBounce = false;
|
|
26
|
+
|
|
27
|
+
// 1. Check Keywords
|
|
28
|
+
if (
|
|
29
|
+
(subject && /undelivered mail returned to sender|undeliverable|undeliverable:/i.test(subject)) ||
|
|
30
|
+
(fromEmail && /mailer-daemon@|microsoftoutlook|microsoftexchange/i.test(fromEmail))
|
|
31
|
+
) {
|
|
32
|
+
isBounce = true;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (!isBounce) return null;
|
|
36
|
+
|
|
37
|
+
// 2. Check DSN Object (Preferred)
|
|
38
|
+
if (dsn?.recipient?.length > 0) {
|
|
39
|
+
const recipient = dsn.recipient[0];
|
|
40
|
+
return (recipient.finalRecipient?.value || recipient.originalRecipient?.value)?.toLowerCase();
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// 3. Regex Fallback (Body Search)
|
|
44
|
+
const patterns = [
|
|
45
|
+
/(?:Final-Recipient|Original-Recipient): rfc822;([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i,
|
|
46
|
+
/Your message to\s*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i,
|
|
47
|
+
/Recipient Address:\s*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i,
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
const content = (text || "") + " " + (html || "");
|
|
51
|
+
for (const regex of patterns) {
|
|
52
|
+
const match = content.match(regex);
|
|
53
|
+
if (match && match[1]) {
|
|
54
|
+
return match[1].toLowerCase();
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return null;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// ============================================================================
|
|
62
|
+
// MAIN FUNCTION
|
|
63
|
+
// ============================================================================
|
|
64
|
+
export const parseMail = async ({ rawData }) => {
|
|
65
|
+
try {
|
|
66
|
+
// --- 1. Encoding & Buffer Handling (Crucial Fix) ---
|
|
67
|
+
let data = rawData;
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
// Check if input is Base64 by removing whitespace and checking pattern
|
|
71
|
+
const cleanRaw = typeof rawData === "string" ? rawData.replace(/\s/g, "") : "";
|
|
72
|
+
const isBase64 = /^[A-Za-z0-9+/]*={0,2}$/.test(cleanRaw);
|
|
73
|
+
|
|
74
|
+
if (isBase64 && cleanRaw.length > 0) {
|
|
75
|
+
// DO NOT use .toString("ascii") or "utf8" here.
|
|
76
|
+
// Pass the Buffer directly so simpleParser detects the correct charset.
|
|
77
|
+
data = Buffer.from(rawData, "base64");
|
|
78
|
+
}
|
|
79
|
+
} catch (err) {
|
|
80
|
+
data = rawData;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// --- 2. Parse Email ---
|
|
84
|
+
let mail = await simpleParser(data, {
|
|
85
|
+
skipHtmlToText: false,
|
|
86
|
+
skipTextToHtml: false,
|
|
87
|
+
skipTextLinks: true,
|
|
88
|
+
keepCidLinks: false,
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// --- 3. Extract Headers ---
|
|
92
|
+
let envelopeTo = (mail.headers.get("envelope-to") || "").toLowerCase();
|
|
93
|
+
let envelopeToDomain = envelopeTo && envelopeTo.includes("@") ? envelopeTo.split("@")[1].toLowerCase() : null;
|
|
94
|
+
|
|
95
|
+
let fromEmail = mail.from?.value[0]?.address ? mail.from.value[0].address.toLowerCase() : null;
|
|
96
|
+
let toEmail = mail.to?.value[0]?.address ? mail.to.value[0].address.toLowerCase() : null;
|
|
97
|
+
|
|
98
|
+
// New: toEmailArray with Deduplication
|
|
99
|
+
let toEmailArray = [];
|
|
100
|
+
if (mail.to && Array.isArray(mail.to.value)) {
|
|
101
|
+
const rawEmails = mail.to.value
|
|
102
|
+
.map((r) => (r.address ? r.address.toLowerCase().replace(/\s/g, "") : null))
|
|
103
|
+
.filter(Boolean);
|
|
104
|
+
toEmailArray = [...new Set(rawEmails)]; // Remove duplicates
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
let replyToEmail = mail.replyTo?.value[0]?.address ? mail.replyTo.value[0].address.toLowerCase() : null;
|
|
108
|
+
|
|
109
|
+
// --- 4. Clean Names ---
|
|
110
|
+
const cleanName = (nameObj, emailObj) => {
|
|
111
|
+
let name = nameObj?.split("@")[0] || null;
|
|
112
|
+
if (!name || name.includes("@")) {
|
|
113
|
+
name = emailObj ? emailObj.split("@")[0] : null;
|
|
114
|
+
}
|
|
115
|
+
return name ? name.trim().substring(0, 100) : "";
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
let fromName = cleanName(mail.from?.value[0]?.name, fromEmail);
|
|
119
|
+
let toName = cleanName(mail.to?.value[0]?.name, toEmail);
|
|
120
|
+
let replyToName = cleanName(mail.replyTo?.value[0]?.name, replyToEmail);
|
|
121
|
+
|
|
122
|
+
// --- 5. Clean HTML & Text ---
|
|
123
|
+
let html = mail.html || mail.textAsHtml || "";
|
|
124
|
+
if (html) {
|
|
125
|
+
// Remove <img> tags to block tracking pixels (preserves layout divs)
|
|
126
|
+
html = html.replace(/<img[^>]*>/gi, "");
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let text = mail.text || "";
|
|
130
|
+
let subject = mail.subject || "";
|
|
131
|
+
const attachmentCount = mail.attachments ? mail.attachments.length : 0;
|
|
132
|
+
|
|
133
|
+
// --- 6. Handle Bounces ---
|
|
134
|
+
let status;
|
|
135
|
+
const detectedBounceEmail = detectBounce(subject, fromEmail, text, html, mail.dsn);
|
|
136
|
+
if (detectedBounceEmail) {
|
|
137
|
+
status = "bounce";
|
|
138
|
+
fromEmail = detectedBounceEmail; // Update fromEmail to the one that bounced
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// --- 7. Doublelist / ID Logic ---
|
|
142
|
+
let originalPostId = null;
|
|
143
|
+
let subjectPostId = null;
|
|
144
|
+
let trafficSource = "other";
|
|
145
|
+
|
|
146
|
+
// EXPLANATION:
|
|
147
|
+
// # -> literal hash character
|
|
148
|
+
// \d{11} -> matches exactly 11 digits
|
|
149
|
+
// (?!\d) -> Negative Lookahead: Asserts that the next char is NOT a digit.
|
|
150
|
+
// If it is a 12th digit, the match fails completely.
|
|
151
|
+
const subjectMatch = subject.match(/#(\d{11})(?!\d)/);
|
|
152
|
+
if (subjectMatch && subjectMatch[1]) {
|
|
153
|
+
subjectPostId = subjectMatch[1];
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (html) {
|
|
157
|
+
const urlMatch = html.match(/https:\/\/doublelist\.com\/posts\/(\d+)\.html/);
|
|
158
|
+
if (urlMatch && urlMatch[1]) {
|
|
159
|
+
originalPostId = urlMatch[1];
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (!originalPostId) originalPostId = subjectPostId;
|
|
163
|
+
|
|
164
|
+
// --- 8. Traffic Source ---
|
|
165
|
+
const isOfficialMailer = fromEmail === "mailer@mailersp.doublelist.com" || fromEmail === "robot@doublelist.com";
|
|
166
|
+
if (isOfficialMailer && mail.replyTo?.value[0]?.address) {
|
|
167
|
+
fromEmail = mail.replyTo?.value[0]?.address?.toLowerCase();
|
|
168
|
+
fromName = "";
|
|
169
|
+
trafficSource = "dbr-w4m";
|
|
170
|
+
} else if (!isOfficialMailer && toEmail && toEmail.includes("+")) {
|
|
171
|
+
trafficSource = "dbr-m4w";
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// --- 9. Process Markdown (Updated for New processMarkDown.js) ---
|
|
175
|
+
// processMarkDown now handles string input automatically
|
|
176
|
+
const markdownResult = await processMarkDown({ inputData: html, showLinks: true, showImages: true });
|
|
177
|
+
|
|
178
|
+
// // We select the 'replyMarkdown' (cleanest)
|
|
179
|
+
// html = markdownResult.replyMarkdown || "";
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
data: {
|
|
183
|
+
fromEmail,
|
|
184
|
+
fromName,
|
|
185
|
+
toEmail,
|
|
186
|
+
toEmailArray,
|
|
187
|
+
toName,
|
|
188
|
+
replyToEmail,
|
|
189
|
+
replyToName,
|
|
190
|
+
subject,
|
|
191
|
+
date: mail.date || null,
|
|
192
|
+
messageId: mail.messageId || null,
|
|
193
|
+
inReplyTo: mail.inReplyTo || null,
|
|
194
|
+
references: mail.references || null,
|
|
195
|
+
returnPath: mail.headers.get("return-path")?.text?.toLowerCase() || null,
|
|
196
|
+
|
|
197
|
+
html,
|
|
198
|
+
markdownResult, // This now contains the Cleaned Reply Markdown
|
|
199
|
+
text,
|
|
200
|
+
attachmentCount,
|
|
201
|
+
|
|
202
|
+
originalPostId: originalPostId || randomizeDoublelistPostID(11),
|
|
203
|
+
randomPostId: randomizeDoublelistPostID(11),
|
|
204
|
+
subjectPostId,
|
|
205
|
+
|
|
206
|
+
envelopeTo,
|
|
207
|
+
envelopeToDomain,
|
|
208
|
+
trafficSource,
|
|
209
|
+
status,
|
|
210
|
+
},
|
|
211
|
+
error: null,
|
|
212
|
+
};
|
|
213
|
+
} catch (error) {
|
|
214
|
+
console.error(`Error processing envelope: ${error.message}`);
|
|
215
|
+
return { data: null, error: error };
|
|
216
|
+
}
|
|
217
|
+
};
|