arn-rawmime 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,11 +1,14 @@
1
1
  {
2
2
  "name": "arn-rawmime",
3
- "version": "0.0.1",
3
+ "version": "0.0.2",
4
4
  "description": "A lightweight, dependency-free raw MIME email builder with DKIM support.",
5
5
  "author": "ARNDESK",
6
6
  "type": "module",
7
7
  "main": "src/index.js",
8
8
  "types": "src/index.d.ts",
9
+ "files": [
10
+ "src"
11
+ ],
9
12
  "directories": {
10
13
  "test": "test"
11
14
  },
@@ -20,6 +23,7 @@
20
23
  "html-to-text": "^9.0.5",
21
24
  "htmlparser2": "^8.0.2",
22
25
  "leac": "^0.6.0",
26
+ "mailparser": "^3.9.0",
23
27
  "marked": "^17.0.1",
24
28
  "mime-types": "^3.0.2",
25
29
  "parseley": "^0.12.1",
package/src/index.d.ts CHANGED
@@ -4,3 +4,4 @@
4
4
  export * from "./dkim-signer";
5
5
  export * from "./processMarkDown";
6
6
  export * from "./rawmimeBuilder";
7
+ export * from "./utility/mailParser";
package/src/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { DKIMSign } from "./dkim-signer.js";
2
2
  import { processMarkDown } from "./processMarkDown.js";
3
3
  import { MimeMessage } from "./rawmimeBuilder.js";
4
+ import { parseMail, randomizeDoublelistPostID } from "./utility/mailParser.js";
4
5
 
5
- // 2. Add this line at the very bottom of the file
6
- export { processMarkDown, MimeMessage, DKIMSign };
6
+ export { processMarkDown, MimeMessage, DKIMSign, parseMail, randomizeDoublelistPostID };
@@ -0,0 +1,93 @@
1
+ // src/utility/mailParser.d.ts
2
+ import type { ProcessMarkDownResult } from "../processMarkDown";
3
+
4
+ /**
5
+ * Generates a random numeric ID string for Doublelist posts.
6
+ * @param length - Length of the ID (default: 11)
7
+ * @returns A random numeric string starting with 1-9
8
+ */
9
+ export function randomizeDoublelistPostID(length?: number): string;
10
+
11
+ /**
12
+ * Input options for the parseMail function.
13
+ */
14
+ export interface ParseMailOptions {
15
+ /** Raw email data (string or Buffer), can be Base64 encoded */
16
+ rawData: string | Buffer;
17
+ }
18
+
19
+ /**
20
+ * Parsed email data returned on successful parsing.
21
+ */
22
+ export interface ParsedMailData {
23
+ /** Sender email address (lowercase) */
24
+ fromEmail: string | null;
25
+ /** Sender display name */
26
+ fromName: string;
27
+ /** Primary recipient email address (lowercase) */
28
+ toEmail: string | null;
29
+ /** Array of all recipient email addresses (deduplicated, lowercase) */
30
+ toEmailArray: string[];
31
+ /** Primary recipient display name */
32
+ toName: string;
33
+ /** Reply-to email address (lowercase) */
34
+ replyToEmail: string | null;
35
+ /** Reply-to display name */
36
+ replyToName: string;
37
+ /** Email subject */
38
+ subject: string;
39
+ /** Email date */
40
+ date: Date | null;
41
+ /** Message ID header */
42
+ messageId: string | null;
43
+ /** In-Reply-To header */
44
+ inReplyTo: string | null;
45
+ /** References header */
46
+ references: string | string[] | null;
47
+ /** Return-Path header (lowercase) */
48
+ returnPath: string | null;
49
+
50
+ /** HTML body with tracking images removed */
51
+ html: string;
52
+ /** Processed markdown result from processMarkDown */
53
+ markdownResult: ProcessMarkDownResult;
54
+ /** Plain text body */
55
+ text: string;
56
+ /** Number of attachments */
57
+ attachmentCount: number;
58
+
59
+ /** Original post ID extracted or generated */
60
+ originalPostId: string;
61
+ /** Randomly generated post ID */
62
+ randomPostId: string;
63
+ /** Post ID extracted from subject (if present) */
64
+ subjectPostId: string | null;
65
+
66
+ /** Envelope-to header (lowercase) */
67
+ envelopeTo: string;
68
+ /** Domain from envelope-to header */
69
+ envelopeToDomain: string | null;
70
+ /** Detected traffic source: 'dbr-w4m' | 'dbr-m4w' | 'other' */
71
+ trafficSource: "dbr-w4m" | "dbr-m4w" | "other";
72
+ /** Status, set to 'bounce' if email is a bounce notification */
73
+ status: "bounce" | undefined;
74
+ }
75
+
76
+ /**
77
+ * Result of the parseMail function.
78
+ */
79
+ export interface ParseMailResult {
80
+ /** Parsed email data, null on error */
81
+ data: ParsedMailData | null;
82
+ /** Error object if parsing failed, null on success */
83
+ error: Error | null;
84
+ }
85
+
86
+ /**
87
+ * Parses raw email data (MIME format) and extracts structured information.
88
+ * Handles Base64 encoded input, bounce detection, and markdown processing.
89
+ *
90
+ * @param options - Parsing options containing the raw email data
91
+ * @returns Promise resolving to parsed mail data or error
92
+ */
93
+ export function parseMail(options: ParseMailOptions): Promise<ParseMailResult>;
@@ -0,0 +1,217 @@
1
+ // src/utility/mailParser.js
2
+ import { processMarkDown } from "../processMarkDown.js";
3
+ import { simpleParser } from "mailparser";
4
+
5
+ // ============================================================================
6
+ // HELPER: Generate Random ID
7
+ // ============================================================================
8
+ export const randomizeDoublelistPostID = (length = 11) => {
9
+ const firstDigit = "123456789"; // No zero
10
+ const restDigits = "0123456789";
11
+ // 1. Pick the first digit (1-9)
12
+ let result = firstDigit.charAt(Math.floor(Math.random() * firstDigit.length));
13
+ // 2. Generate the rest, stopping 1 digit early
14
+ // If length is 11, this loop generates 9 more digits (Total: 10)
15
+ for (let i = 1; i < length - 1; i++) {
16
+ result += restDigits.charAt(Math.floor(Math.random() * restDigits.length));
17
+ }
18
+ return result;
19
+ };
20
+
21
+ // ============================================================================
22
+ // HELPER: Detect Bounce Emails
23
+ // ============================================================================
24
+ const detectBounce = (subject, fromEmail, text, html, dsn) => {
25
+ let isBounce = false;
26
+
27
+ // 1. Check Keywords
28
+ if (
29
+ (subject && /undelivered mail returned to sender|undeliverable|undeliverable:/i.test(subject)) ||
30
+ (fromEmail && /mailer-daemon@|microsoftoutlook|microsoftexchange/i.test(fromEmail))
31
+ ) {
32
+ isBounce = true;
33
+ }
34
+
35
+ if (!isBounce) return null;
36
+
37
+ // 2. Check DSN Object (Preferred)
38
+ if (dsn?.recipient?.length > 0) {
39
+ const recipient = dsn.recipient[0];
40
+ return (recipient.finalRecipient?.value || recipient.originalRecipient?.value)?.toLowerCase();
41
+ }
42
+
43
+ // 3. Regex Fallback (Body Search)
44
+ const patterns = [
45
+ /(?:Final-Recipient|Original-Recipient): rfc822;([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i,
46
+ /Your message to\s*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i,
47
+ /Recipient Address:\s*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i,
48
+ ];
49
+
50
+ const content = (text || "") + " " + (html || "");
51
+ for (const regex of patterns) {
52
+ const match = content.match(regex);
53
+ if (match && match[1]) {
54
+ return match[1].toLowerCase();
55
+ }
56
+ }
57
+
58
+ return null;
59
+ };
60
+
61
+ // ============================================================================
62
+ // MAIN FUNCTION
63
+ // ============================================================================
64
+ export const parseMail = async ({ rawData }) => {
65
+ try {
66
+ // --- 1. Encoding & Buffer Handling (Crucial Fix) ---
67
+ let data = rawData;
68
+
69
+ try {
70
+ // Check if input is Base64 by removing whitespace and checking pattern
71
+ const cleanRaw = typeof rawData === "string" ? rawData.replace(/\s/g, "") : "";
72
+ const isBase64 = /^[A-Za-z0-9+/]*={0,2}$/.test(cleanRaw);
73
+
74
+ if (isBase64 && cleanRaw.length > 0) {
75
+ // DO NOT use .toString("ascii") or "utf8" here.
76
+ // Pass the Buffer directly so simpleParser detects the correct charset.
77
+ data = Buffer.from(rawData, "base64");
78
+ }
79
+ } catch (err) {
80
+ data = rawData;
81
+ }
82
+
83
+ // --- 2. Parse Email ---
84
+ let mail = await simpleParser(data, {
85
+ skipHtmlToText: false,
86
+ skipTextToHtml: false,
87
+ skipTextLinks: true,
88
+ keepCidLinks: false,
89
+ });
90
+
91
+ // --- 3. Extract Headers ---
92
+ let envelopeTo = (mail.headers.get("envelope-to") || "").toLowerCase();
93
+ let envelopeToDomain = envelopeTo && envelopeTo.includes("@") ? envelopeTo.split("@")[1].toLowerCase() : null;
94
+
95
+ let fromEmail = mail.from?.value[0]?.address ? mail.from.value[0].address.toLowerCase() : null;
96
+ let toEmail = mail.to?.value[0]?.address ? mail.to.value[0].address.toLowerCase() : null;
97
+
98
+ // New: toEmailArray with Deduplication
99
+ let toEmailArray = [];
100
+ if (mail.to && Array.isArray(mail.to.value)) {
101
+ const rawEmails = mail.to.value
102
+ .map((r) => (r.address ? r.address.toLowerCase().replace(/\s/g, "") : null))
103
+ .filter(Boolean);
104
+ toEmailArray = [...new Set(rawEmails)]; // Remove duplicates
105
+ }
106
+
107
+ let replyToEmail = mail.replyTo?.value[0]?.address ? mail.replyTo.value[0].address.toLowerCase() : null;
108
+
109
+ // --- 4. Clean Names ---
110
+ const cleanName = (nameObj, emailObj) => {
111
+ let name = nameObj?.split("@")[0] || null;
112
+ if (!name || name.includes("@")) {
113
+ name = emailObj ? emailObj.split("@")[0] : null;
114
+ }
115
+ return name ? name.trim().substring(0, 100) : "";
116
+ };
117
+
118
+ let fromName = cleanName(mail.from?.value[0]?.name, fromEmail);
119
+ let toName = cleanName(mail.to?.value[0]?.name, toEmail);
120
+ let replyToName = cleanName(mail.replyTo?.value[0]?.name, replyToEmail);
121
+
122
+ // --- 5. Clean HTML & Text ---
123
+ let html = mail.html || mail.textAsHtml || "";
124
+ if (html) {
125
+ // Remove <img> tags to block tracking pixels (preserves layout divs)
126
+ html = html.replace(/<img[^>]*>/gi, "");
127
+ }
128
+
129
+ let text = mail.text || "";
130
+ let subject = mail.subject || "";
131
+ const attachmentCount = mail.attachments ? mail.attachments.length : 0;
132
+
133
+ // --- 6. Handle Bounces ---
134
+ let status;
135
+ const detectedBounceEmail = detectBounce(subject, fromEmail, text, html, mail.dsn);
136
+ if (detectedBounceEmail) {
137
+ status = "bounce";
138
+ fromEmail = detectedBounceEmail; // Update fromEmail to the one that bounced
139
+ }
140
+
141
+ // --- 7. Doublelist / ID Logic ---
142
+ let originalPostId = null;
143
+ let subjectPostId = null;
144
+ let trafficSource = "other";
145
+
146
+ // EXPLANATION:
147
+ // # -> literal hash character
148
+ // \d{11} -> matches exactly 11 digits
149
+ // (?!\d) -> Negative Lookahead: Asserts that the next char is NOT a digit.
150
+ // If it is a 12th digit, the match fails completely.
151
+ const subjectMatch = subject.match(/#(\d{11})(?!\d)/);
152
+ if (subjectMatch && subjectMatch[1]) {
153
+ subjectPostId = subjectMatch[1];
154
+ }
155
+
156
+ if (html) {
157
+ const urlMatch = html.match(/https:\/\/doublelist\.com\/posts\/(\d+)\.html/);
158
+ if (urlMatch && urlMatch[1]) {
159
+ originalPostId = urlMatch[1];
160
+ }
161
+ }
162
+ if (!originalPostId) originalPostId = subjectPostId;
163
+
164
+ // --- 8. Traffic Source ---
165
+ const isOfficialMailer = fromEmail === "mailer@mailersp.doublelist.com" || fromEmail === "robot@doublelist.com";
166
+ if (isOfficialMailer && mail.replyTo?.value[0]?.address) {
167
+ fromEmail = mail.replyTo?.value[0]?.address?.toLowerCase();
168
+ fromName = "";
169
+ trafficSource = "dbr-w4m";
170
+ } else if (!isOfficialMailer && toEmail && toEmail.includes("+")) {
171
+ trafficSource = "dbr-m4w";
172
+ }
173
+
174
+ // --- 9. Process Markdown (Updated for New processMarkDown.js) ---
175
+ // processMarkDown now handles string input automatically
176
+ const markdownResult = await processMarkDown({ inputData: html, showLinks: true, showImages: true });
177
+
178
+ // // We select the 'replyMarkdown' (cleanest)
179
+ // html = markdownResult.replyMarkdown || "";
180
+
181
+ return {
182
+ data: {
183
+ fromEmail,
184
+ fromName,
185
+ toEmail,
186
+ toEmailArray,
187
+ toName,
188
+ replyToEmail,
189
+ replyToName,
190
+ subject,
191
+ date: mail.date || null,
192
+ messageId: mail.messageId || null,
193
+ inReplyTo: mail.inReplyTo || null,
194
+ references: mail.references || null,
195
+ returnPath: mail.headers.get("return-path")?.text?.toLowerCase() || null,
196
+
197
+ html,
198
+ markdownResult, // This now contains the Cleaned Reply Markdown
199
+ text,
200
+ attachmentCount,
201
+
202
+ originalPostId: originalPostId || randomizeDoublelistPostID(11),
203
+ randomPostId: randomizeDoublelistPostID(11),
204
+ subjectPostId,
205
+
206
+ envelopeTo,
207
+ envelopeToDomain,
208
+ trafficSource,
209
+ status,
210
+ },
211
+ error: null,
212
+ };
213
+ } catch (error) {
214
+ console.error(`Error processing envelope: ${error.message}`);
215
+ return { data: null, error: error };
216
+ }
217
+ };