npm - arn-rawmime - Versions diffs - 0.0.1 → 0.0.3 - Mend

arn-rawmime 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +5 -1
package/src/index.d.ts +1 -0
package/src/index.js +2 -2
package/src/rawmimeBuilder.js +36 -8
package/src/utility/mailParser.d.ts +93 -0
package/src/utility/mailParser.js +217 -0

package/package.json CHANGED Viewed

@@ -1,11 +1,14 @@
 {
 	"name": "arn-rawmime",
-	"version": "0.0.1",
+	"version": "0.0.3",
 	"description": "A lightweight, dependency-free raw MIME email builder with DKIM support.",
 	"author": "ARNDESK",
 	"type": "module",
 	"main": "src/index.js",
 	"types": "src/index.d.ts",
+	"files": [
+		"src"
+	],
 	"directories": {
 		"test": "test"
 	},
@@ -20,6 +23,7 @@
 		"html-to-text": "^9.0.5",
 		"htmlparser2": "^8.0.2",
 		"leac": "^0.6.0",
+		"mailparser": "^3.9.0",
 		"marked": "^17.0.1",
 		"mime-types": "^3.0.2",
 		"parseley": "^0.12.1",

package/src/index.d.ts CHANGED Viewed

@@ -4,3 +4,4 @@
 export * from "./dkim-signer";
 export * from "./processMarkDown";
 export * from "./rawmimeBuilder";
+export * from "./utility/mailParser";

package/src/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { DKIMSign } from "./dkim-signer.js";
 import { processMarkDown } from "./processMarkDown.js";
 import { MimeMessage } from "./rawmimeBuilder.js";
+import { parseMail, randomizeDoublelistPostID } from "./utility/mailParser.js";
-// 2. Add this line at the very bottom of the file
-export { processMarkDown, MimeMessage, DKIMSign };
+export { processMarkDown, MimeMessage, DKIMSign, parseMail, randomizeDoublelistPostID };

package/src/rawmimeBuilder.js CHANGED Viewed

@@ -96,15 +96,17 @@ class MimeMessage {
 							currentLineLength = 0;
 						}
 						if (token.length > 75) {
-							// Hard split for giant tokens
-							const chunks = token.match(/.{1,73}/g);
-							chunks.forEach((chunk, idx) => {
-								if (idx < chunks.length - 1) {
-									result += chunk + "=\r\n";
-								} else {
-									result += chunk;
-									currentLineLength = chunk.length;
+							// Hard split for giant tokens (Atom-aware)
+							// Find QP sequences (=XX) or single chars to avoid breaking a triplet
+							const atoms = token.match(/=[0-9A-F]{2}|./g) || [];
+							atoms.forEach((atom) => {
+								// Check if adding this atom exceeds the safety limit (75 to leave room for soft break '=')
+								if (currentLineLength + atom.length > 75) {
+									result += "=\r\n";
+									currentLineLength = 0;
 								}
+								result += atom;
+								currentLineLength += atom.length;
 							});
 						} else {
 							result += token;
@@ -123,6 +125,32 @@ class MimeMessage {
 	// ─── HELPER: Header Folding (RFC 5322) ──────────────────────────
 	_foldHeader(name, value) {
+		const hasNonAscii = /[^\x00-\x7F]/.test(value);
+		// 1. Unstructured headers (Subject, etc) -> Full Encode
+		const unstructured = ["subject", "x-report-abuse", "thread-topic"];
+		if (hasNonAscii && unstructured.includes(name.toLowerCase())) {
+			const encodedValue = Buffer.from(value, "utf8").toString("base64");
+			return `${name}: =?UTF-8?B?${encodedValue}?=`;
+		}
+		// 2. Structured headers (From, To) -> Smart Replace
+		// Finds quoted strings with special chars, e.g. "René", and encodes JUST that part.
+		if (hasNonAscii) {
+			const encodedStruct = value.replace(/"([^"]*)"/g, (match, content) => {
+				if (/[^\x00-\x7F]/.test(content)) {
+					const b64 = Buffer.from(content, "utf8").toString("base64");
+					return `=?UTF-8?B?${b64}?=`;
+				}
+				return match;
+			});
+			// If we changed anything, return it. If not (e.g. unquoted special chars), fallback to old folding
+			if (encodedStruct !== value) {
+				return `${name}: ${encodedStruct}`;
+			}
+		}
+		// 3. Standard folding for ASCII-only (or unhandled) headers
 		const line = `${name}: ${value}`;
 		if (line.length <= 76) return line;

package/src/utility/mailParser.d.ts ADDED Viewed

@@ -0,0 +1,93 @@
+// src/utility/mailParser.d.ts
+import type { ProcessMarkDownResult } from "../processMarkDown";
+/**
+ * Generates a random numeric ID string for Doublelist posts.
+ * @param length - Length of the ID (default: 11)
+ * @returns A random numeric string starting with 1-9
+ */
+export function randomizeDoublelistPostID(length?: number): string;
+/**
+ * Input options for the parseMail function.
+ */
+export interface ParseMailOptions {
+    /** Raw email data (string or Buffer), can be Base64 encoded */
+    rawData: string | Buffer;
+}
+/**
+ * Parsed email data returned on successful parsing.
+ */
+export interface ParsedMailData {
+    /** Sender email address (lowercase) */
+    fromEmail: string | null;
+    /** Sender display name */
+    fromName: string;
+    /** Primary recipient email address (lowercase) */
+    toEmail: string | null;
+    /** Array of all recipient email addresses (deduplicated, lowercase) */
+    toEmailArray: string[];
+    /** Primary recipient display name */
+    toName: string;
+    /** Reply-to email address (lowercase) */
+    replyToEmail: string | null;
+    /** Reply-to display name */
+    replyToName: string;
+    /** Email subject */
+    subject: string;
+    /** Email date */
+    date: Date | null;
+    /** Message ID header */
+    messageId: string | null;
+    /** In-Reply-To header */
+    inReplyTo: string | null;
+    /** References header */
+    references: string | string[] | null;
+    /** Return-Path header (lowercase) */
+    returnPath: string | null;
+    /** HTML body with tracking images removed */
+    html: string;
+    /** Processed markdown result from processMarkDown */
+    markdownResult: ProcessMarkDownResult;
+    /** Plain text body */
+    text: string;
+    /** Number of attachments */
+    attachmentCount: number;
+    /** Original post ID extracted or generated */
+    originalPostId: string;
+    /** Randomly generated post ID */
+    randomPostId: string;
+    /** Post ID extracted from subject (if present) */
+    subjectPostId: string | null;
+    /** Envelope-to header (lowercase) */
+    envelopeTo: string;
+    /** Domain from envelope-to header */
+    envelopeToDomain: string | null;
+    /** Detected traffic source: 'dbr-w4m' | 'dbr-m4w' | 'other' */
+    trafficSource: "dbr-w4m" | "dbr-m4w" | "other";
+    /** Status, set to 'bounce' if email is a bounce notification */
+    status: "bounce" | undefined;
+}
+/**
+ * Result of the parseMail function.
+ */
+export interface ParseMailResult {
+    /** Parsed email data, null on error */
+    data: ParsedMailData | null;
+    /** Error object if parsing failed, null on success */
+    error: Error | null;
+}
+/**
+ * Parses raw email data (MIME format) and extracts structured information.
+ * Handles Base64 encoded input, bounce detection, and markdown processing.
+ *
+ * @param options - Parsing options containing the raw email data
+ * @returns Promise resolving to parsed mail data or error
+ */
+export function parseMail(options: ParseMailOptions): Promise<ParseMailResult>;

package/src/utility/mailParser.js ADDED Viewed

@@ -0,0 +1,217 @@
+// src/utility/mailParser.js
+import { processMarkDown } from "../processMarkDown.js";
+import { simpleParser } from "mailparser";
+// ============================================================================
+// HELPER: Generate Random ID
+// ============================================================================
+export const randomizeDoublelistPostID = (length = 11) => {
+    const firstDigit = "123456789"; // No zero
+    const restDigits = "0123456789";
+    // 1. Pick the first digit (1-9)
+    let result = firstDigit.charAt(Math.floor(Math.random() * firstDigit.length));
+    // 2. Generate the rest, stopping 1 digit early
+    // If length is 11, this loop generates 9 more digits (Total: 10)
+    for (let i = 1; i < length - 1; i++) {
+        result += restDigits.charAt(Math.floor(Math.random() * restDigits.length));
+    }
+    return result;
+};
+// ============================================================================
+// HELPER: Detect Bounce Emails
+// ============================================================================
+const detectBounce = (subject, fromEmail, text, html, dsn) => {
+    let isBounce = false;
+    // 1. Check Keywords
+    if (
+        (subject && /undelivered mail returned to sender|undeliverable|undeliverable:/i.test(subject)) ||
+        (fromEmail && /mailer-daemon@|microsoftoutlook|microsoftexchange/i.test(fromEmail))
+    ) {
+        isBounce = true;
+    }
+    if (!isBounce) return null;
+    // 2. Check DSN Object (Preferred)
+    if (dsn?.recipient?.length > 0) {
+        const recipient = dsn.recipient[0];
+        return (recipient.finalRecipient?.value || recipient.originalRecipient?.value)?.toLowerCase();
+    }
+    // 3. Regex Fallback (Body Search)
+    const patterns = [
+        /(?:Final-Recipient|Original-Recipient): rfc822;([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i,
+        /Your message to\s*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i,
+        /Recipient Address:\s*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/i,
+    ];
+    const content = (text || "") + " " + (html || "");
+    for (const regex of patterns) {
+        const match = content.match(regex);
+        if (match && match[1]) {
+            return match[1].toLowerCase();
+        }
+    }
+    return null;
+};
+// ============================================================================
+// MAIN FUNCTION
+// ============================================================================
+export const parseMail = async ({ rawData }) => {
+    try {
+        // --- 1. Encoding & Buffer Handling (Crucial Fix) ---
+        let data = rawData;
+        try {
+            // Check if input is Base64 by removing whitespace and checking pattern
+            const cleanRaw = typeof rawData === "string" ? rawData.replace(/\s/g, "") : "";
+            const isBase64 = /^[A-Za-z0-9+/]*={0,2}$/.test(cleanRaw);
+            if (isBase64 && cleanRaw.length > 0) {
+                // DO NOT use .toString("ascii") or "utf8" here.
+                // Pass the Buffer directly so simpleParser detects the correct charset.
+                data = Buffer.from(rawData, "base64");
+            }
+        } catch (err) {
+            data = rawData;
+        }
+        // --- 2. Parse Email ---
+        let mail = await simpleParser(data, {
+            skipHtmlToText: false,
+            skipTextToHtml: false,
+            skipTextLinks: true,
+            keepCidLinks: false,
+        });
+        // --- 3. Extract Headers ---
+        let envelopeTo = (mail.headers.get("envelope-to") || "").toLowerCase();
+        let envelopeToDomain = envelopeTo && envelopeTo.includes("@") ? envelopeTo.split("@")[1].toLowerCase() : null;
+        let fromEmail = mail.from?.value[0]?.address ? mail.from.value[0].address.toLowerCase() : null;
+        let toEmail = mail.to?.value[0]?.address ? mail.to.value[0].address.toLowerCase() : null;
+        // New: toEmailArray with Deduplication
+        let toEmailArray = [];
+        if (mail.to && Array.isArray(mail.to.value)) {
+            const rawEmails = mail.to.value
+                .map((r) => (r.address ? r.address.toLowerCase().replace(/\s/g, "") : null))
+                .filter(Boolean);
+            toEmailArray = [...new Set(rawEmails)]; // Remove duplicates
+        }
+        let replyToEmail = mail.replyTo?.value[0]?.address ? mail.replyTo.value[0].address.toLowerCase() : null;
+        // --- 4. Clean Names ---
+        const cleanName = (nameObj, emailObj) => {
+            let name = nameObj?.split("@")[0] || null;
+            if (!name || name.includes("@")) {
+                name = emailObj ? emailObj.split("@")[0] : null;
+            }
+            return name ? name.trim().substring(0, 100) : "";
+        };
+        let fromName = cleanName(mail.from?.value[0]?.name, fromEmail);
+        let toName = cleanName(mail.to?.value[0]?.name, toEmail);
+        let replyToName = cleanName(mail.replyTo?.value[0]?.name, replyToEmail);
+        // --- 5. Clean HTML & Text ---
+        let html = mail.html || mail.textAsHtml || "";
+        if (html) {
+            // Remove <img> tags to block tracking pixels (preserves layout divs)
+            html = html.replace(/<img[^>]*>/gi, "");
+        }
+        let text = mail.text || "";
+        let subject = mail.subject || "";
+        const attachmentCount = mail.attachments ? mail.attachments.length : 0;
+        // --- 6. Handle Bounces ---
+        let status;
+        const detectedBounceEmail = detectBounce(subject, fromEmail, text, html, mail.dsn);
+        if (detectedBounceEmail) {
+            status = "bounce";
+            fromEmail = detectedBounceEmail; // Update fromEmail to the one that bounced
+        }
+        // --- 7. Doublelist / ID Logic ---
+        let originalPostId = null;
+        let subjectPostId = null;
+        let trafficSource = "other";
+        // EXPLANATION:
+        // #       -> literal hash character
+        // \d{11}  -> matches exactly 11 digits
+        // (?!\d)  -> Negative Lookahead: Asserts that the next char is NOT a digit.
+        //            If it is a 12th digit, the match fails completely.
+        const subjectMatch = subject.match(/#(\d{11})(?!\d)/);
+        if (subjectMatch && subjectMatch[1]) {
+            subjectPostId = subjectMatch[1];
+        }
+        if (html) {
+            const urlMatch = html.match(/https:\/\/doublelist\.com\/posts\/(\d+)\.html/);
+            if (urlMatch && urlMatch[1]) {
+                originalPostId = urlMatch[1];
+            }
+        }
+        if (!originalPostId) originalPostId = subjectPostId;
+        // --- 8. Traffic Source ---
+        const isOfficialMailer = fromEmail === "mailer@mailersp.doublelist.com" || fromEmail === "robot@doublelist.com";
+        if (isOfficialMailer && mail.replyTo?.value[0]?.address) {
+            fromEmail = mail.replyTo?.value[0]?.address?.toLowerCase();
+            fromName = "";
+            trafficSource = "dbr-w4m";
+        } else if (!isOfficialMailer && toEmail && toEmail.includes("+")) {
+            trafficSource = "dbr-m4w";
+        }
+        // --- 9. Process Markdown (Updated for New processMarkDown.js) ---
+        // processMarkDown now handles string input automatically
+        const markdownResult = await processMarkDown({ inputData: html, showLinks: true, showImages: true });
+        // // We select the 'replyMarkdown' (cleanest)
+        // html = markdownResult.replyMarkdown || "";
+        return {
+            data: {
+                fromEmail,
+                fromName,
+                toEmail,
+                toEmailArray,
+                toName,
+                replyToEmail,
+                replyToName,
+                subject,
+                date: mail.date || null,
+                messageId: mail.messageId || null,
+                inReplyTo: mail.inReplyTo || null,
+                references: mail.references || null,
+                returnPath: mail.headers.get("return-path")?.text?.toLowerCase() || null,
+                html,
+                markdownResult, // This now contains the Cleaned Reply Markdown
+                text,
+                attachmentCount,
+                originalPostId: originalPostId || randomizeDoublelistPostID(11),
+                randomPostId: randomizeDoublelistPostID(11),
+                subjectPostId,
+                envelopeTo,
+                envelopeToDomain,
+                trafficSource,
+                status,
+            },
+            error: null,
+        };
+    } catch (error) {
+        console.error(`Error processing envelope: ${error.message}`);
+        return { data: null, error: error };
+    }
+};