@datasynx/agentic-crm 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +17 -0
  2. package/dist/{attachments-rLa96rOK.js → attachments-BddHbCt8.js} +51 -32
  3. package/dist/{attachments-D207gXfN.js.map → attachments-BddHbCt8.js.map} +1 -1
  4. package/dist/{attachments-D207gXfN.js → attachments-Co3kXIvu.js} +46 -31
  5. package/dist/{attachments-rLa96rOK.js.map → attachments-Co3kXIvu.js.map} +1 -1
  6. package/dist/{attachments-CX2GAtsw.cjs → attachments-Dbe7Bidz.cjs} +46 -31
  7. package/dist/{attachments-CX2GAtsw.cjs.map → attachments-Dbe7Bidz.cjs.map} +1 -1
  8. package/dist/attachments-YQKYmg6N.js +2 -0
  9. package/dist/cli.js +168 -3
  10. package/dist/cli.js.map +1 -1
  11. package/dist/daemon/worker.js +1 -1
  12. package/dist/{gmail-sync-DIbrPnTK.js → gmail-sync-BHLa8v51.js} +2 -2
  13. package/dist/{gmail-sync-DIbrPnTK.js.map → gmail-sync-BHLa8v51.js.map} +1 -1
  14. package/dist/{gmail-sync-BpSVESSe.cjs → gmail-sync-CodrUNR4.cjs} +2 -2
  15. package/dist/{gmail-sync-BpSVESSe.cjs.map → gmail-sync-CodrUNR4.cjs.map} +1 -1
  16. package/dist/{gmail-sync-B4Iu3AQb.js → gmail-sync-SvECok5p.js} +2 -2
  17. package/dist/{gmail-sync-B4Iu3AQb.js.map → gmail-sync-SvECok5p.js.map} +1 -1
  18. package/dist/imap-o6PRuBvm.js +270 -0
  19. package/dist/imap-o6PRuBvm.js.map +1 -0
  20. package/dist/{index-DMTVVYwr.d.cts → index-B5_QnkG8.d.cts} +16 -16
  21. package/dist/{index-DMTVVYwr.d.cts.map → index-B5_QnkG8.d.cts.map} +1 -1
  22. package/dist/{index-BBAlKZg6.d.ts → index-FzDsNSSb.d.ts} +3 -3
  23. package/dist/{index-BBAlKZg6.d.ts.map → index-FzDsNSSb.d.ts.map} +1 -1
  24. package/dist/index.d.cts +16 -16
  25. package/dist/index.d.cts.map +1 -1
  26. package/dist/index.d.ts +3 -3
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/login-CYgla6-A.js +73 -0
  29. package/dist/login-CYgla6-A.js.map +1 -0
  30. package/dist/mcp.cjs +2 -2
  31. package/dist/mcp.js +2 -2
  32. package/dist/microsoft-DgbVlHdT.js +159 -0
  33. package/dist/microsoft-DgbVlHdT.js.map +1 -0
  34. package/dist/{server-BhNLrnAD.js → server-uqXUhF4H.js} +3 -3
  35. package/dist/{server-BhNLrnAD.js.map → server-uqXUhF4H.js.map} +1 -1
  36. package/dist/token-resolver-BRLOmRvF.js +50 -0
  37. package/dist/token-resolver-BRLOmRvF.js.map +1 -0
  38. package/package.json +4 -1
@@ -450,22 +450,49 @@ function sanitizeFilename(name) {
450
450
  return (name.split(/[\\/]/).pop() ?? name).replace(/[^a-zA-Z0-9._-]+/g, "_").replace(/^_+|_+$/g, "").slice(0, 120) || "attachment";
451
451
  }
452
452
  /**
453
+ * Persist one already-downloaded attachment: save the raw bytes under
454
+ * `customers/<slug>/attachments/<messageId>__<name>`, convert to a sibling
455
+ * `.md` file, and chunk-index the Markdown into LanceDB for semantic search.
456
+ * Provider-agnostic — used by every mailbox connector (Gmail, IMAP, Graph).
457
+ */
458
+ async function persistAttachment(opts) {
459
+ require_session_store.assertSafeSlug(opts.slug);
460
+ const attachmentsDir = path.default.join(opts.dataDir, "customers", opts.slug, "attachments");
461
+ fs.default.mkdirSync(attachmentsDir, { recursive: true });
462
+ const storedName = `${opts.messageId}__${sanitizeFilename(opts.filename)}`;
463
+ const markdownName = `${storedName}.md`;
464
+ fs.default.writeFileSync(path.default.join(attachmentsDir, storedName), opts.buffer);
465
+ const { markdown } = await convertAttachment(opts.buffer, opts.filename, opts.mimeType);
466
+ const mdBody = `# ${opts.filename}\n\n_Source: ${opts.source} · ${opts.date}_\n\n${markdown}\n`;
467
+ fs.default.writeFileSync(path.default.join(attachmentsDir, markdownName), mdBody);
468
+ const ref = `${opts.source}#att:${opts.filename}`;
469
+ const { indexInLanceDB } = await Promise.resolve().then(() => require("./mcp.cjs")).then((n) => n.lancedb_exports);
470
+ const chunks = require_chunk$1.chunkText(markdown);
471
+ for (let i = 0; i < chunks.length; i++) await indexInLanceDB(opts.dataDir, opts.slug, chunks[i], `${ref}#${i}`, {
472
+ date: opts.date,
473
+ type: "attachment"
474
+ }).catch((err) => {
475
+ require_logger.logger.error("attachments", "attachment index failed", { error: err.message });
476
+ });
477
+ return {
478
+ originalName: opts.filename,
479
+ storedName,
480
+ markdownName,
481
+ ref,
482
+ chunks: chunks.length
483
+ };
484
+ }
485
+ /**
453
486
  * Download, convert and index every attachment of a single Gmail message.
454
- *
455
- * For each attachment: the raw bytes are saved under
456
- * `customers/<slug>/attachments/<messageId>__<name>`, converted to a sibling
457
- * `.md` file, and the Markdown is chunked and indexed into LanceDB so the
458
- * attachment's content is semantically searchable. Failures on a single
459
- * attachment are logged and skipped — they never abort the message sync.
487
+ * Fetches each part's bytes from the Gmail API, then delegates persistence to
488
+ * the shared {@link persistAttachment}. Failures on a single attachment are
489
+ * logged and skipped they never abort the message sync.
460
490
  */
461
491
  async function processMessageAttachments(opts) {
462
492
  const parts = collectAttachmentParts(opts.payload);
463
493
  if (parts.length === 0) return [];
464
494
  require_session_store.assertSafeSlug(opts.slug);
465
495
  const maxBytes = opts.maxBytes ?? 26214400;
466
- const attachmentsDir = path.default.join(opts.dataDir, "customers", opts.slug, "attachments");
467
- fs.default.mkdirSync(attachmentsDir, { recursive: true });
468
- const { indexInLanceDB } = await Promise.resolve().then(() => require("./mcp.cjs")).then((n) => n.lancedb_exports);
469
496
  const saved = [];
470
497
  for (const part of parts) try {
471
498
  if (part.size > maxBytes) {
@@ -481,28 +508,16 @@ async function processMessageAttachments(opts) {
481
508
  id: part.attachmentId
482
509
  })).data.data;
483
510
  if (!data) continue;
484
- const buffer = Buffer.from(data, "base64url");
485
- const storedName = `${opts.messageId}__${sanitizeFilename(part.filename)}`;
486
- const markdownName = `${storedName}.md`;
487
- fs.default.writeFileSync(path.default.join(attachmentsDir, storedName), buffer);
488
- const { markdown } = await convertAttachment(buffer, part.filename, part.mimeType);
489
- const mdBody = `# ${part.filename}\n\n_Source: ${opts.source} · ${opts.date}_\n\n${markdown}\n`;
490
- fs.default.writeFileSync(path.default.join(attachmentsDir, markdownName), mdBody);
491
- const ref = `${opts.source}#att:${part.filename}`;
492
- const chunks = require_chunk$1.chunkText(markdown);
493
- for (let i = 0; i < chunks.length; i++) await indexInLanceDB(opts.dataDir, opts.slug, chunks[i], `${ref}#${i}`, {
511
+ saved.push(await persistAttachment({
512
+ dataDir: opts.dataDir,
513
+ slug: opts.slug,
514
+ messageId: opts.messageId,
515
+ source: opts.source,
494
516
  date: opts.date,
495
- type: "attachment"
496
- }).catch((err) => {
497
- require_logger.logger.error("gmail-sync", "attachment index failed", { error: err.message });
498
- });
499
- saved.push({
500
- originalName: part.filename,
501
- storedName,
502
- markdownName,
503
- ref,
504
- chunks: chunks.length
505
- });
517
+ filename: part.filename,
518
+ mimeType: part.mimeType,
519
+ buffer: Buffer.from(data, "base64url")
520
+ }));
506
521
  } catch (err) {
507
522
  require_logger.logger.warn("gmail-sync", "attachment failed", {
508
523
  filename: part.filename,
@@ -514,4 +529,4 @@ async function processMessageAttachments(opts) {
514
529
  //#endregion
515
530
  exports.processMessageAttachments = processMessageAttachments;
516
531
 
517
- //# sourceMappingURL=attachments-CX2GAtsw.cjs.map
532
+ //# sourceMappingURL=attachments-Dbe7Bidz.cjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"attachments-CX2GAtsw.cjs","names":["htmlToMarkdown","htmlConverter","chunkText"],"sources":["../src/sync/converters/text.ts","../src/sync/converters/docx.ts","../src/sync/converters/xlsx.ts","../src/sync/converters/pptx.ts","../src/sync/converters/pdf.ts","../src/sync/converters/image.ts","../src/sync/converters/registry.ts","../src/sync/attachments.ts"],"sourcesContent":["// src/sync/converters/text.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\n\n/** Escape a CSV cell for safe inclusion in a Markdown table cell. */\nfunction mdCell(value: string): string {\n return value.replace(/\\\\/g, \"\\\\\\\\\").replace(/\\|/g, \"\\\\|\").replace(/\\r?\\n/g, \" \").trim();\n}\n\n/**\n * Minimal RFC-4180-ish CSV line splitter: handles quoted fields containing\n * commas and escaped double quotes. Good enough for rendering CSV attachments\n * as readable Markdown tables (we are not round-tripping data).\n */\nexport function parseCsvLine(line: string): string[] {\n const out: string[] = [];\n let cur = \"\";\n let inQuotes = false;\n for (let i = 0; i < line.length; i++) {\n const ch = line[i];\n if (inQuotes) {\n if (ch === '\"') {\n if (line[i + 1] === '\"') {\n cur += '\"';\n i++;\n } else {\n inQuotes = false;\n }\n } else {\n cur += ch;\n }\n } else if (ch === '\"') {\n inQuotes = true;\n } else if (ch === \",\") {\n out.push(cur);\n cur = \"\";\n } else {\n cur += ch;\n }\n }\n out.push(cur);\n return out;\n}\n\n/** Render a matrix of cells as a GitHub-flavored Markdown pipe table. */\nexport function rowsToMarkdown(rows: string[][]): string {\n if (rows.length === 0) return \"\";\n const width = Math.max(...rows.map((r) => r.length));\n const pad = (r: string[]): string[] =>\n Array.from({ length: width }, (_, i) => mdCell(r[i] ?? \"\"));\n\n const header = pad(rows[0] ?? []);\n const lines = [\n `| ${header.join(\" | \")} |`,\n `| ${header.map(() => \"---\").join(\" | \")} |`,\n ...rows.slice(1).map((r) => `| ${pad(r).join(\" | \")} |`),\n ];\n return lines.join(\"\\n\");\n}\n\n/** Render CSV text as a GitHub-flavored Markdown pipe table. */\nexport function csvToMarkdown(csv: string): string {\n const rows = csv\n .split(/\\r?\\n/)\n .filter((l) => l.trim().length > 0)\n .map(parseCsvLine);\n return rowsToMarkdown(rows);\n}\n\nconst TEXT_EXTENSIONS = [\"txt\", \"text\", \"log\", \"md\", \"markdown\"];\nconst CODE_FENCE_EXTENSIONS: Record<string, string> = {\n json: \"json\",\n xml: \"xml\",\n yaml: \"yaml\",\n yml: \"yaml\",\n};\n\n/**\n * Converter for plain-text-ish attachments: Markdown/text passthrough, CSV/TSV\n * to Markdown tables, and structured text (JSON/XML/YAML) into fenced code\n * blocks so they stay readable and searchable without a heavy parser.\n */\nexport const textConverter: Converter = {\n name: \"text\",\n extensions: [...TEXT_EXTENSIONS, \"csv\", \"tsv\", ...Object.keys(CODE_FENCE_EXTENSIONS)],\n mimeTypes: [\"text/plain\", \"text/csv\", \"text/markdown\", \"application/json\", \"text/*\"],\n convert(buffer: Buffer, filename: string): Promise<ConversionResult> {\n const ext = filename.split(\".\").pop()?.toLowerCase() ?? \"\";\n const content = buffer.toString(\"utf-8\");\n\n if (ext === \"csv\") {\n return Promise.resolve({ markdown: csvToMarkdown(content), meta: { format: \"csv\" } });\n }\n if (ext === \"tsv\") {\n const asCsv = content.replace(/\\t/g, \",\");\n return Promise.resolve({ markdown: csvToMarkdown(asCsv), meta: { format: \"tsv\" } });\n }\n const fence = CODE_FENCE_EXTENSIONS[ext];\n if (fence) {\n return Promise.resolve({\n markdown: `\\`\\`\\`${fence}\\n${content.trim()}\\n\\`\\`\\``,\n meta: { format: fence },\n });\n }\n // Markdown / plain text: pass through verbatim.\n return Promise.resolve({ markdown: content.trim(), meta: { format: \"text\" } });\n },\n};\n","// src/sync/converters/docx.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\nimport { htmlToMarkdown } from \"./html.js\";\n\n/**\n * DOCX → Markdown via mammoth (DOCX → semantic HTML) then Turndown (HTML →\n * Markdown). Mammoth's own Markdown output is deprecated; the HTML route keeps\n * tables, lists and headings intact. mammoth is loaded lazily.\n */\nexport const docxConverter: Converter = {\n name: \"docx\",\n extensions: [\"docx\"],\n mimeTypes: [\"application/vnd.openxmlformats-officedocument.wordprocessingml.document\"],\n async convert(buffer: Buffer): Promise<ConversionResult> {\n const mammoth = (await import(\"mammoth\")).default;\n const { value: html, messages } = await mammoth.convertToHtml({ buffer });\n const markdown = await htmlToMarkdown(html);\n return {\n markdown,\n meta: { format: \"docx\", warnings: messages.filter((m) => m.type === \"warning\").length },\n };\n },\n};\n","// src/sync/converters/xlsx.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\nimport { rowsToMarkdown } from \"./text.js\";\n\n/** Render a single ExcelJS cell value as plain text. */\nfunction cellText(value: unknown): string {\n if (value === null || value === undefined) return \"\";\n if (value instanceof Date) return value.toISOString().slice(0, 10);\n if (typeof value === \"object\") {\n const v = value as Record<string, unknown>;\n if (typeof v[\"text\"] === \"string\") return v[\"text\"];\n if (\"result\" in v) return String(v[\"result\"] ?? \"\");\n if (Array.isArray(v[\"richText\"])) {\n return (v[\"richText\"] as Array<{ text?: string }>).map((r) => r.text ?? \"\").join(\"\");\n }\n if (\"hyperlink\" in v) return String(v[\"text\"] ?? v[\"hyperlink\"] ?? \"\");\n }\n return String(value);\n}\n\n/**\n * Spreadsheet (XLSX) → Markdown via ExcelJS. Each worksheet becomes a\n * `## <sheet name>` section followed by a GitHub-flavored Markdown table.\n * ExcelJS is loaded lazily.\n */\nexport const xlsxConverter: Converter = {\n name: \"xlsx\",\n extensions: [\"xlsx\", \"xlsm\"],\n mimeTypes: [\n \"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\",\n \"application/vnd.ms-excel\",\n ],\n async convert(buffer: Buffer): Promise<ConversionResult> {\n const ExcelJS = (await import(\"exceljs\")).default;\n const wb = new ExcelJS.Workbook();\n // ExcelJS's typings predate the @types/node generic Buffer; widen via ArrayBuffer.\n await wb.xlsx.load(buffer as unknown as ArrayBuffer);\n\n const sections: string[] = [];\n const sheetNames: string[] = [];\n wb.eachSheet((sheet) => {\n sheetNames.push(sheet.name);\n const rows: string[][] = [];\n sheet.eachRow({ includeEmpty: false }, (row) => {\n const cells: string[] = [];\n row.eachCell({ includeEmpty: true }, (cell) => {\n cells.push(cellText(cell.value));\n });\n rows.push(cells);\n });\n const table = rowsToMarkdown(rows);\n if (table) sections.push(`## ${sheet.name}\\n\\n${table}`);\n });\n\n return { markdown: sections.join(\"\\n\\n\"), meta: { format: \"xlsx\", sheets: sheetNames } };\n },\n};\n","// src/sync/converters/pptx.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\n\n/** Extract the visible text runs (`<a:t>…</a:t>`) from one slide's XML. */\nexport function extractSlideText(xml: string): string {\n const runs = [...xml.matchAll(/<a:t>([\\s\\S]*?)<\\/a:t>/g)].map((m) =>\n (m[1] ?? \"\")\n .replace(/&amp;/g, \"&\")\n .replace(/&lt;/g, \"<\")\n .replace(/&gt;/g, \">\")\n .replace(/&quot;/g, '\"')\n .replace(/&apos;/g, \"'\")\n );\n return runs.join(\" \").replace(/\\s+/g, \" \").trim();\n}\n\nfunction slideNumber(entryName: string): number {\n const m = entryName.match(/slide(\\d+)\\.xml$/);\n return m ? parseInt(m[1] ?? \"0\", 10) : 0;\n}\n\n/**\n * PPTX → Markdown. A .pptx is a zip; slide text lives in `ppt/slides/slideN.xml`\n * as `<a:t>` runs. We unzip with adm-zip (already a dependency) and emit one\n * `## Slide N` section per slide — no extra native parser needed.\n */\nexport const pptxConverter: Converter = {\n name: \"pptx\",\n extensions: [\"pptx\"],\n mimeTypes: [\"application/vnd.openxmlformats-officedocument.presentationml.presentation\"],\n async convert(buffer: Buffer): Promise<ConversionResult> {\n const AdmZip = (await import(\"adm-zip\")).default;\n const zip = new AdmZip(buffer);\n const slides = zip\n .getEntries()\n .filter((e) => /^ppt\\/slides\\/slide\\d+\\.xml$/.test(e.entryName))\n .sort((a, b) => slideNumber(a.entryName) - slideNumber(b.entryName));\n\n const sections: string[] = [];\n for (const entry of slides) {\n const text = extractSlideText(entry.getData().toString(\"utf-8\"));\n if (text) sections.push(`## Slide ${slideNumber(entry.entryName)}\\n\\n${text}`);\n }\n return { markdown: sections.join(\"\\n\\n\"), meta: { format: \"pptx\", slides: slides.length } };\n },\n};\n","// src/sync/converters/pdf.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\n\n/** Whether scanned-PDF OCR is enabled (opt-in, requires @napi-rs/canvas). */\nexport function isPdfOcrEnabled(): boolean {\n const v = (process.env[\"DXCRM_PDF_OCR\"] ?? \"\").toLowerCase();\n return v === \"1\" || v === \"true\" || v === \"yes\" || v === \"on\";\n}\n\n/**\n * OCR a scanned PDF by rendering each page to a PNG (unpdf + @napi-rs/canvas)\n * and running tesseract.js over it. The canvas backend is an optional peer\n * dependency; if it isn't installed we throw a clear, catchable error so the\n * caller can fall back to the OCR-candidate stub. Page count is capped via\n * DXCRM_PDF_OCR_MAX_PAGES (default 20) to keep this slow path bounded.\n */\nasync function ocrPdf(pdf: unknown, totalPages: number): Promise<string> {\n // Fail fast with a helpful message when the optional canvas backend is absent.\n try {\n await import(\"@napi-rs/canvas\");\n } catch {\n throw new Error(\n \"PDF OCR requires the optional '@napi-rs/canvas' package — run `npm install @napi-rs/canvas`\"\n );\n }\n\n const { renderPageAsImage } = await import(\"unpdf\");\n const { recognize } = await import(\"tesseract.js\");\n const lang = process.env[\"DXCRM_OCR_LANG\"] ?? \"eng\";\n const canvasImport = (): Promise<unknown> => import(\"@napi-rs/canvas\");\n\n const maxPages = Number(process.env[\"DXCRM_PDF_OCR_MAX_PAGES\"] ?? 20) || 20;\n const pageCount = Math.min(totalPages, maxPages);\n\n const pages: string[] = [];\n for (let p = 1; p <= pageCount; p++) {\n const png = await renderPageAsImage(pdf as never, p, { canvasImport, scale: 2 } as never);\n const {\n data: { text },\n } = await recognize(Buffer.from(png), lang);\n if (text.trim()) pages.push(`## Page ${p}\\n\\n${text.trim()}`);\n }\n return pages.join(\"\\n\\n\");\n}\n\n/**\n * PDF → Markdown. Extracts the digital text layer with unpdf (a serverless\n * pdf.js build). Scanned PDFs have no text layer: when DXCRM_PDF_OCR is enabled\n * they are rendered and OCR'd page-by-page, otherwise they're flagged as OCR\n * candidates rather than emitting garbage. unpdf is loaded lazily.\n */\nexport const pdfConverter: Converter = {\n name: \"pdf\",\n extensions: [\"pdf\"],\n mimeTypes: [\"application/pdf\"],\n async convert(buffer: Buffer, filename: string): Promise<ConversionResult> {\n const { extractText, getDocumentProxy } = await import(\"unpdf\");\n const pdf = await getDocumentProxy(new Uint8Array(buffer));\n const { totalPages, text } = await extractText(pdf, { mergePages: true });\n const merged = (Array.isArray(text) ? text.join(\"\\n\\n\") : text).trim();\n\n if (merged) return { markdown: merged, meta: { format: \"pdf\", pages: totalPages } };\n\n // No text layer — scanned PDF. OCR if opted in, else flag as a candidate.\n if (isPdfOcrEnabled()) {\n try {\n const ocr = await ocrPdf(pdf, totalPages);\n if (ocr.trim()) {\n return {\n markdown: `> _OCR of \\`${filename}\\` (${totalPages} pages):_\\n\\n${ocr.trim()}`,\n meta: { format: \"pdf\", pages: totalPages, ocr: true },\n };\n }\n } catch (err) {\n process.stderr.write(\n `[converters] pdf OCR failed for ${filename}: ${(err as Error).message}\\n`\n );\n }\n }\n\n return { markdown: \"\", meta: { format: \"pdf\", pages: totalPages, ocrCandidate: true } };\n },\n};\n","// src/sync/converters/image.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\n\n/**\n * Image → Markdown via Tesseract.js OCR (pure-JS, 100+ languages, fully local).\n * This is the heaviest converter: tesseract.js downloads a WASM core and\n * language data on first use, so it is loaded lazily and only invoked for image\n * attachments. Language defaults to English, override with DXCRM_OCR_LANG.\n */\nexport const imageConverter: Converter = {\n name: \"image\",\n extensions: [\"png\", \"jpg\", \"jpeg\", \"tif\", \"tiff\", \"bmp\", \"webp\", \"gif\", \"pbm\"],\n mimeTypes: [\"image/*\"],\n async convert(buffer: Buffer, filename: string): Promise<ConversionResult> {\n const lang = process.env[\"DXCRM_OCR_LANG\"] ?? \"eng\";\n const { recognize } = await import(\"tesseract.js\");\n const {\n data: { text },\n } = await recognize(buffer, lang);\n const ocr = text.trim();\n return {\n markdown: ocr ? `> _OCR of \\`${filename}\\`:_\\n\\n${ocr}` : \"\",\n meta: { format: \"image\", ocr: true, lang },\n };\n },\n};\n","// src/sync/converters/registry.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\nimport { textConverter } from \"./text.js\";\nimport { docxConverter } from \"./docx.js\";\nimport { xlsxConverter } from \"./xlsx.js\";\nimport { pptxConverter } from \"./pptx.js\";\nimport { pdfConverter } from \"./pdf.js\";\nimport { htmlConverter } from \"./html.js\";\nimport { imageConverter } from \"./image.js\";\n\n/**\n * Fallback converter for unknown/binary attachments: emit a small metadata stub\n * instead of garbage bytes, so the attachment is still recorded and linkable.\n */\nexport const fallbackConverter: Converter = {\n name: \"binary\",\n extensions: [],\n convert(buffer: Buffer, filename: string): Promise<ConversionResult> {\n const kb = Math.max(1, Math.round(buffer.length / 1024));\n return Promise.resolve({\n markdown: `> _Binary attachment \\`${filename}\\` (${kb} KB) — no text representation available._`,\n meta: { format: \"binary\", bytes: buffer.length },\n });\n },\n};\n\n/**\n * Ordered converter registry. Earlier entries win on extension conflicts. The\n * text converter is intentionally last among the \"real\" converters so that more\n * specific formats (html, etc.) take precedence over generic text matching.\n */\nexport const CONVERTERS: Converter[] = [\n docxConverter,\n xlsxConverter,\n pptxConverter,\n pdfConverter,\n imageConverter,\n htmlConverter,\n textConverter,\n];\n\nfunction extensionOf(filename: string): string {\n return filename.split(\".\").pop()?.toLowerCase() ?? \"\";\n}\n\nfunction mimeMatches(converter: Converter, mime: string): boolean {\n if (!converter.mimeTypes) return false;\n const lower = mime.toLowerCase();\n return converter.mimeTypes.some((m) => {\n const ml = m.toLowerCase();\n if (ml.endsWith(\"/*\")) return lower.startsWith(ml.slice(0, -1));\n return ml === lower;\n });\n}\n\n/**\n * Pick the converter for an attachment by file extension first (most reliable\n * for Gmail attachments, which always carry a filename), then by MIME type.\n * Returns `undefined` when nothing matches.\n */\nexport function matchConverter(filename: string, mime?: string): Converter | undefined {\n const ext = extensionOf(filename);\n if (ext) {\n const byExt = CONVERTERS.find((c) => c.extensions.includes(ext));\n if (byExt) return byExt;\n }\n if (mime) {\n const byMime = CONVERTERS.find((c) => mimeMatches(c, mime));\n if (byMime) return byMime;\n }\n return undefined;\n}\n\n/**\n * Convert an attachment to Markdown, dispatching to the best converter and\n * falling back to a metadata stub. Converter errors never throw: they are\n * swallowed into the fallback so a single bad attachment can't break a sync.\n */\nexport async function convertAttachment(\n buffer: Buffer,\n filename: string,\n mime?: string\n): Promise<ConversionResult> {\n const converter = matchConverter(filename, mime) ?? fallbackConverter;\n try {\n const result = await converter.convert(buffer, filename);\n if (!result.markdown.trim()) {\n return {\n markdown: `> _Attachment \\`${filename}\\` contained no extractable text._`,\n meta: { ...result.meta, empty: true },\n };\n }\n return result;\n } catch (err) {\n process.stderr.write(\n `[converters] ${converter.name} failed for ${filename}: ${(err as Error).message}\\n`\n );\n return fallbackConverter.convert(buffer, filename);\n }\n}\n","// src/sync/attachments.ts\nimport fs from \"fs\";\nimport path from \"path\";\nimport type { gmail_v1 } from \"@googleapis/gmail\";\nimport { convertAttachment } from \"./converters/registry.js\";\nimport { chunkText } from \"../core/chunk.js\";\nimport { assertSafeSlug } from \"../fs/customer-dir.js\";\nimport { logger } from \"../core/logger.js\";\n\n/** Default per-attachment size cap (skip larger blobs to keep syncs bounded). */\nexport const DEFAULT_MAX_ATTACHMENT_BYTES = 25 * 1024 * 1024;\n\nexport interface AttachmentPart {\n filename: string;\n mimeType: string;\n attachmentId: string;\n size: number;\n}\n\nexport interface SavedAttachment {\n /** Original filename as sent. */\n originalName: string;\n /** Stored raw filename (sanitized, message-prefixed) under attachments/. */\n storedName: string;\n /** Markdown filename under attachments/. */\n markdownName: string;\n /** Source ref used for LanceDB indexing. */\n ref: string;\n /** Number of indexed chunks produced from the Markdown. */\n chunks: number;\n}\n\n/**\n * Recursively collect downloadable attachment parts from a Gmail message\n * payload — any MIME part that carries both a filename and a body.attachmentId.\n * Inline parts without a filename (e.g. signature logos) are ignored.\n */\nexport function collectAttachmentParts(\n payload: gmail_v1.Schema$MessagePart | undefined\n): AttachmentPart[] {\n const out: AttachmentPart[] = [];\n const walk = (part?: gmail_v1.Schema$MessagePart): void => {\n if (!part) return;\n const filename = part.filename ?? \"\";\n const attachmentId = part.body?.attachmentId ?? \"\";\n if (filename && attachmentId) {\n out.push({\n filename,\n mimeType: part.mimeType ?? \"application/octet-stream\",\n attachmentId,\n size: part.body?.size ?? 0,\n });\n }\n for (const child of part.parts ?? []) walk(child);\n };\n walk(payload);\n return out;\n}\n\n/** Make a filename safe for use as a single path segment. */\nexport function sanitizeFilename(name: string): string {\n const base = name.split(/[\\\\/]/).pop() ?? name;\n return (\n base\n .replace(/[^a-zA-Z0-9._-]+/g, \"_\")\n .replace(/^_+|_+$/g, \"\")\n .slice(0, 120) || \"attachment\"\n );\n}\n\n/**\n * Download, convert and index every attachment of a single Gmail message.\n *\n * For each attachment: the raw bytes are saved under\n * `customers/<slug>/attachments/<messageId>__<name>`, converted to a sibling\n * `.md` file, and the Markdown is chunked and indexed into LanceDB so the\n * attachment's content is semantically searchable. Failures on a single\n * attachment are logged and skipped — they never abort the message sync.\n */\nexport async function processMessageAttachments(opts: {\n gmail: gmail_v1.Gmail;\n dataDir: string;\n slug: string;\n messageId: string;\n source: string;\n payload: gmail_v1.Schema$MessagePart | undefined;\n date: string;\n maxBytes?: number;\n}): Promise<SavedAttachment[]> {\n const parts = collectAttachmentParts(opts.payload);\n if (parts.length === 0) return [];\n\n assertSafeSlug(opts.slug);\n const maxBytes = opts.maxBytes ?? DEFAULT_MAX_ATTACHMENT_BYTES;\n const attachmentsDir = path.join(opts.dataDir, \"customers\", opts.slug, \"attachments\");\n fs.mkdirSync(attachmentsDir, { recursive: true });\n\n const { indexInLanceDB } = await import(\"../core/lancedb.js\");\n const saved: SavedAttachment[] = [];\n\n for (const part of parts) {\n try {\n if (part.size > maxBytes) {\n logger.warn(\"gmail-sync\", \"skipping oversized attachment\", {\n filename: part.filename,\n bytes: part.size,\n });\n continue;\n }\n\n const resp = await opts.gmail.users.messages.attachments.get({\n userId: \"me\",\n messageId: opts.messageId,\n id: part.attachmentId,\n });\n const data = resp.data.data;\n if (!data) continue;\n const buffer = Buffer.from(data, \"base64url\");\n\n const storedName = `${opts.messageId}__${sanitizeFilename(part.filename)}`;\n const markdownName = `${storedName}.md`;\n fs.writeFileSync(path.join(attachmentsDir, storedName), buffer);\n\n const { markdown } = await convertAttachment(buffer, part.filename, part.mimeType);\n const mdBody = `# ${part.filename}\\n\\n_Source: ${opts.source} · ${opts.date}_\\n\\n${markdown}\\n`;\n fs.writeFileSync(path.join(attachmentsDir, markdownName), mdBody);\n\n const ref = `${opts.source}#att:${part.filename}`;\n const chunks = chunkText(markdown);\n for (let i = 0; i < chunks.length; i++) {\n await indexInLanceDB(opts.dataDir, opts.slug, chunks[i]!, `${ref}#${i}`, {\n date: opts.date,\n type: \"attachment\",\n }).catch((err: unknown) => {\n logger.error(\"gmail-sync\", \"attachment index failed\", {\n error: (err as Error).message,\n });\n });\n }\n\n saved.push({\n originalName: part.filename,\n storedName,\n markdownName,\n ref,\n chunks: chunks.length,\n });\n } catch (err) {\n logger.warn(\"gmail-sync\", \"attachment failed\", {\n filename: part.filename,\n error: (err as Error).message,\n });\n }\n }\n\n return saved;\n}\n"],"mappings":";;;;;;;;;;;AAIA,SAAS,OAAO,OAAuB;CACrC,OAAO,MAAM,QAAQ,OAAO,MAAM,EAAE,QAAQ,OAAO,KAAK,EAAE,QAAQ,UAAU,GAAG,EAAE,KAAK;AACxF;;;;;;AAOA,SAAgB,aAAa,MAAwB;CACnD,MAAM,MAAgB,CAAC;CACvB,IAAI,MAAM;CACV,IAAI,WAAW;CACf,KAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;EACpC,MAAM,KAAK,KAAK;EAChB,IAAI,UACF,IAAI,OAAO,MACT,IAAI,KAAK,IAAI,OAAO,MAAK;GACvB,OAAO;GACP;EACF,OACE,WAAW;OAGb,OAAO;OAEJ,IAAI,OAAO,MAChB,WAAW;OACN,IAAI,OAAO,KAAK;GACrB,IAAI,KAAK,GAAG;GACZ,MAAM;EACR,OACE,OAAO;CAEX;CACA,IAAI,KAAK,GAAG;CACZ,OAAO;AACT;;AAGA,SAAgB,eAAe,MAA0B;CACvD,IAAI,KAAK,WAAW,GAAG,OAAO;CAC9B,MAAM,QAAQ,KAAK,IAAI,GAAG,KAAK,KAAK,MAAM,EAAE,MAAM,CAAC;CACnD,MAAM,OAAO,MACX,MAAM,KAAK,EAAE,QAAQ,MAAM,IAAI,GAAG,MAAM,OAAO,EAAE,MAAM,EAAE,CAAC;CAE5D,MAAM,SAAS,IAAI,KAAK,MAAM,CAAC,CAAC;CAMhC,OAAO;EAJL,KAAK,OAAO,KAAK,KAAK,EAAE;EACxB,KAAK,OAAO,UAAU,KAAK,EAAE,KAAK,KAAK,EAAE;EACzC,GAAG,KAAK,MAAM,CAAC,EAAE,KAAK,MAAM,KAAK,IAAI,CAAC,EAAE,KAAK,KAAK,EAAE,GAAG;CAE9C,EAAE,KAAK,IAAI;AACxB;;AAGA,SAAgB,cAAc,KAAqB;CAKjD,OAAO,eAJM,IACV,MAAM,OAAO,EACb,QAAQ,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,EACjC,IAAI,YACkB,CAAC;AAC5B;AAEA,MAAM,kBAAkB;CAAC;CAAO;CAAQ;CAAO;CAAM;AAAU;AAC/D,MAAM,wBAAgD;CACpD,MAAM;CACN,KAAK;CACL,MAAM;CACN,KAAK;AACP;;;;;;AAOA,MAAa,gBAA2B;CACtC,MAAM;CACN,YAAY;EAAC,GAAG;EAAiB;EAAO;EAAO,GAAG,OAAO,KAAK,qBAAqB;CAAC;CACpF,WAAW;EAAC;EAAc;EAAY;EAAiB;EAAoB;CAAQ;CACnF,QAAQ,QAAgB,UAA6C;EACnE,MAAM,MAAM,SAAS,MAAM,GAAG,EAAE,IAAI,GAAG,YAAY,KAAK;EACxD,MAAM,UAAU,OAAO,SAAS,OAAO;EAEvC,IAAI,QAAQ,OACV,OAAO,QAAQ,QAAQ;GAAE,UAAU,cAAc,OAAO;GAAG,MAAM,EAAE,QAAQ,MAAM;EAAE,CAAC;EAEtF,IAAI,QAAQ,OAAO;GACjB,MAAM,QAAQ,QAAQ,QAAQ,OAAO,GAAG;GACxC,OAAO,QAAQ,QAAQ;IAAE,UAAU,cAAc,KAAK;IAAG,MAAM,EAAE,QAAQ,MAAM;GAAE,CAAC;EACpF;EACA,MAAM,QAAQ,sBAAsB;EACpC,IAAI,OACF,OAAO,QAAQ,QAAQ;GACrB,UAAU,SAAS,MAAM,IAAI,QAAQ,KAAK,EAAE;GAC5C,MAAM,EAAE,QAAQ,MAAM;EACxB,CAAC;EAGH,OAAO,QAAQ,QAAQ;GAAE,UAAU,QAAQ,KAAK;GAAG,MAAM,EAAE,QAAQ,OAAO;EAAE,CAAC;CAC/E;AACF;;;;;;;;ACjGA,MAAa,gBAA2B;CACtC,MAAM;CACN,YAAY,CAAC,MAAM;CACnB,WAAW,CAAC,yEAAyE;CACrF,MAAM,QAAQ,QAA2C;EAEvD,MAAM,EAAE,OAAO,MAAM,aAAa,OADjB,MAAM,OAAO,YAAY,QACM,cAAc,EAAE,OAAO,CAAC;EAExE,OAAO;GACL,UAAA,MAFqBA,aAAAA,eAAe,IAAI;GAGxC,MAAM;IAAE,QAAQ;IAAQ,UAAU,SAAS,QAAQ,MAAM,EAAE,SAAS,SAAS,EAAE;GAAO;EACxF;CACF;AACF;;;;ACjBA,SAAS,SAAS,OAAwB;CACxC,IAAI,UAAU,QAAQ,UAAU,KAAA,GAAW,OAAO;CAClD,IAAI,iBAAiB,MAAM,OAAO,MAAM,YAAY,EAAE,MAAM,GAAG,EAAE;CACjE,IAAI,OAAO,UAAU,UAAU;EAC7B,MAAM,IAAI;EACV,IAAI,OAAO,EAAE,YAAY,UAAU,OAAO,EAAE;EAC5C,IAAI,YAAY,GAAG,OAAO,OAAO,EAAE,aAAa,EAAE;EAClD,IAAI,MAAM,QAAQ,EAAE,WAAW,GAC7B,OAAQ,EAAE,YAAyC,KAAK,MAAM,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE;EAErF,IAAI,eAAe,GAAG,OAAO,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE;CACvE;CACA,OAAO,OAAO,KAAK;AACrB;;;;;;AAOA,MAAa,gBAA2B;CACtC,MAAM;CACN,YAAY,CAAC,QAAQ,MAAM;CAC3B,WAAW,CACT,qEACA,0BACF;CACA,MAAM,QAAQ,QAA2C;EAEvD,MAAM,KAAK,KADM,OAAM,OAAO,aAAY,QACnB,SAAS;EAEhC,MAAM,GAAG,KAAK,KAAK,MAAgC;EAEnD,MAAM,WAAqB,CAAC;EAC5B,MAAM,aAAuB,CAAC;EAC9B,GAAG,WAAW,UAAU;GACtB,WAAW,KAAK,MAAM,IAAI;GAC1B,MAAM,OAAmB,CAAC;GAC1B,MAAM,QAAQ,EAAE,cAAc,MAAM,IAAI,QAAQ;IAC9C,MAAM,QAAkB,CAAC;IACzB,IAAI,SAAS,EAAE,cAAc,KAAK,IAAI,SAAS;KAC7C,MAAM,KAAK,SAAS,KAAK,KAAK,CAAC;IACjC,CAAC;IACD,KAAK,KAAK,KAAK;GACjB,CAAC;GACD,MAAM,QAAQ,eAAe,IAAI;GACjC,IAAI,OAAO,SAAS,KAAK,MAAM,MAAM,KAAK,MAAM,OAAO;EACzD,CAAC;EAED,OAAO;GAAE,UAAU,SAAS,KAAK,MAAM;GAAG,MAAM;IAAE,QAAQ;IAAQ,QAAQ;GAAW;EAAE;CACzF;AACF;;;;ACpDA,SAAgB,iBAAiB,KAAqB;CASpD,OARa,CAAC,GAAG,IAAI,SAAS,yBAAyB,CAAC,EAAE,KAAK,OAC5D,EAAE,MAAM,IACN,QAAQ,UAAU,GAAG,EACrB,QAAQ,SAAS,GAAG,EACpB,QAAQ,SAAS,GAAG,EACpB,QAAQ,WAAW,IAAG,EACtB,QAAQ,WAAW,GAAG,CAEjB,EAAE,KAAK,GAAG,EAAE,QAAQ,QAAQ,GAAG,EAAE,KAAK;AAClD;AAEA,SAAS,YAAY,WAA2B;CAC9C,MAAM,IAAI,UAAU,MAAM,kBAAkB;CAC5C,OAAO,IAAI,SAAS,EAAE,MAAM,KAAK,EAAE,IAAI;AACzC;;;;;;AAOA,MAAa,gBAA2B;CACtC,MAAM;CACN,YAAY,CAAC,MAAM;CACnB,WAAW,CAAC,2EAA2E;CACvF,MAAM,QAAQ,QAA2C;EACvD,MAAM,UAAU,MAAM,OAAO,YAAY;EAEzC,MAAM,SAAS,IADC,OAAO,MACN,EACd,WAAW,EACX,QAAQ,MAAM,+BAA+B,KAAK,EAAE,SAAS,CAAC,EAC9D,MAAM,GAAG,MAAM,YAAY,EAAE,SAAS,IAAI,YAAY,EAAE,SAAS,CAAC;EAErE,MAAM,WAAqB,CAAC;EAC5B,KAAK,MAAM,SAAS,QAAQ;GAC1B,MAAM,OAAO,iBAAiB,MAAM,QAAQ,EAAE,SAAS,OAAO,CAAC;GAC/D,IAAI,MAAM,SAAS,KAAK,YAAY,YAAY,MAAM,SAAS,EAAE,MAAM,MAAM;EAC/E;EACA,OAAO;GAAE,UAAU,SAAS,KAAK,MAAM;GAAG,MAAM;IAAE,QAAQ;IAAQ,QAAQ,OAAO;GAAO;EAAE;CAC5F;AACF;;;;ACzCA,SAAgB,kBAA2B;CACzC,MAAM,KAAK,QAAQ,IAAI,oBAAoB,IAAI,YAAY;CAC3D,OAAO,MAAM,OAAO,MAAM,UAAU,MAAM,SAAS,MAAM;AAC3D;;;;;;;;AASA,eAAe,OAAO,KAAc,YAAqC;CAEvE,IAAI;EACF,MAAM,OAAO;CACf,QAAQ;EACN,MAAM,IAAI,MACR,6FACF;CACF;CAEA,MAAM,EAAE,sBAAsB,MAAM,OAAO;CAC3C,MAAM,EAAE,cAAc,MAAM,OAAO;CACnC,MAAM,OAAO,QAAQ,IAAI,qBAAqB;CAC9C,MAAM,qBAAuC,OAAO;CAEpD,MAAM,WAAW,OAAO,QAAQ,IAAI,8BAA8B,EAAE,KAAK;CACzE,MAAM,YAAY,KAAK,IAAI,YAAY,QAAQ;CAE/C,MAAM,QAAkB,CAAC;CACzB,KAAK,IAAI,IAAI,GAAG,KAAK,WAAW,KAAK;EACnC,MAAM,MAAM,MAAM,kBAAkB,KAAc,GAAG;GAAE;GAAc,OAAO;EAAE,CAAU;EACxF,MAAM,EACJ,MAAM,EAAE,WACN,MAAM,UAAU,OAAO,KAAK,GAAG,GAAG,IAAI;EAC1C,IAAI,KAAK,KAAK,GAAG,MAAM,KAAK,WAAW,EAAE,MAAM,KAAK,KAAK,GAAG;CAC9D;CACA,OAAO,MAAM,KAAK,MAAM;AAC1B;;;;;;;AAQA,MAAa,eAA0B;CACrC,MAAM;CACN,YAAY,CAAC,KAAK;CAClB,WAAW,CAAC,iBAAiB;CAC7B,MAAM,QAAQ,QAAgB,UAA6C;EACzE,MAAM,EAAE,aAAa,qBAAqB,MAAM,OAAO;EACvD,MAAM,MAAM,MAAM,iBAAiB,IAAI,WAAW,MAAM,CAAC;EACzD,MAAM,EAAE,YAAY,SAAS,MAAM,YAAY,KAAK,EAAE,YAAY,KAAK,CAAC;EACxE,MAAM,UAAU,MAAM,QAAQ,IAAI,IAAI,KAAK,KAAK,MAAM,IAAI,MAAM,KAAK;EAErE,IAAI,QAAQ,OAAO;GAAE,UAAU;GAAQ,MAAM;IAAE,QAAQ;IAAO,OAAO;GAAW;EAAE;EAGlF,IAAI,gBAAgB,GAClB,IAAI;GACF,MAAM,MAAM,MAAM,OAAO,KAAK,UAAU;GACxC,IAAI,IAAI,KAAK,GACX,OAAO;IACL,UAAU,eAAe,SAAS,MAAM,WAAW,eAAe,IAAI,KAAK;IAC3E,MAAM;KAAE,QAAQ;KAAO,OAAO;KAAY,KAAK;IAAK;GACtD;EAEJ,SAAS,KAAK;GACZ,QAAQ,OAAO,MACb,mCAAmC,SAAS,IAAK,IAAc,QAAQ,GACzE;EACF;EAGF,OAAO;GAAE,UAAU;GAAI,MAAM;IAAE,QAAQ;IAAO,OAAO;IAAY,cAAc;GAAK;EAAE;CACxF;AACF;;;;;;;;;ACzEA,MAAa,iBAA4B;CACvC,MAAM;CACN,YAAY;EAAC;EAAO;EAAO;EAAQ;EAAO;EAAQ;EAAO;EAAQ;EAAO;CAAK;CAC7E,WAAW,CAAC,SAAS;CACrB,MAAM,QAAQ,QAAgB,UAA6C;EACzE,MAAM,OAAO,QAAQ,IAAI,qBAAqB;EAC9C,MAAM,EAAE,cAAc,MAAM,OAAO;EACnC,MAAM,EACJ,MAAM,EAAE,WACN,MAAM,UAAU,QAAQ,IAAI;EAChC,MAAM,MAAM,KAAK,KAAK;EACtB,OAAO;GACL,UAAU,MAAM,eAAe,SAAS,UAAU,QAAQ;GAC1D,MAAM;IAAE,QAAQ;IAAS,KAAK;IAAM;GAAK;EAC3C;CACF;AACF;;;;;;;ACXA,MAAa,oBAA+B;CAC1C,MAAM;CACN,YAAY,CAAC;CACb,QAAQ,QAAgB,UAA6C;EACnE,MAAM,KAAK,KAAK,IAAI,GAAG,KAAK,MAAM,OAAO,SAAS,IAAI,CAAC;EACvD,OAAO,QAAQ,QAAQ;GACrB,UAAU,0BAA0B,SAAS,MAAM,GAAG;GACtD,MAAM;IAAE,QAAQ;IAAU,OAAO,OAAO;GAAO;EACjD,CAAC;CACH;AACF;;;;;;AAOA,MAAa,aAA0B;CACrC;CACA;CACA;CACA;CACA;CACAC,aAAAA;CACA;AACF;AAEA,SAAS,YAAY,UAA0B;CAC7C,OAAO,SAAS,MAAM,GAAG,EAAE,IAAI,GAAG,YAAY,KAAK;AACrD;AAEA,SAAS,YAAY,WAAsB,MAAuB;CAChE,IAAI,CAAC,UAAU,WAAW,OAAO;CACjC,MAAM,QAAQ,KAAK,YAAY;CAC/B,OAAO,UAAU,UAAU,MAAM,MAAM;EACrC,MAAM,KAAK,EAAE,YAAY;EACzB,IAAI,GAAG,SAAS,IAAI,GAAG,OAAO,MAAM,WAAW,GAAG,MAAM,GAAG,EAAE,CAAC;EAC9D,OAAO,OAAO;CAChB,CAAC;AACH;;;;;;AAOA,SAAgB,eAAe,UAAkB,MAAsC;CACrF,MAAM,MAAM,YAAY,QAAQ;CAChC,IAAI,KAAK;EACP,MAAM,QAAQ,WAAW,MAAM,MAAM,EAAE,WAAW,SAAS,GAAG,CAAC;EAC/D,IAAI,OAAO,OAAO;CACpB;CACA,IAAI,MAAM;EACR,MAAM,SAAS,WAAW,MAAM,MAAM,YAAY,GAAG,IAAI,CAAC;EAC1D,IAAI,QAAQ,OAAO;CACrB;AAEF;;;;;;AAOA,eAAsB,kBACpB,QACA,UACA,MAC2B;CAC3B,MAAM,YAAY,eAAe,UAAU,IAAI,KAAK;CACpD,IAAI;EACF,MAAM,SAAS,MAAM,UAAU,QAAQ,QAAQ,QAAQ;EACvD,IAAI,CAAC,OAAO,SAAS,KAAK,GACxB,OAAO;GACL,UAAU,mBAAmB,SAAS;GACtC,MAAM;IAAE,GAAG,OAAO;IAAM,OAAO;GAAK;EACtC;EAEF,OAAO;CACT,SAAS,KAAK;EACZ,QAAQ,OAAO,MACb,gBAAgB,UAAU,KAAK,cAAc,SAAS,IAAK,IAAc,QAAQ,GACnF;EACA,OAAO,kBAAkB,QAAQ,QAAQ,QAAQ;CACnD;AACF;;;;;;AC9DA,SAAgB,uBACd,SACkB;CAClB,MAAM,MAAwB,CAAC;CAC/B,MAAM,QAAQ,SAA6C;EACzD,IAAI,CAAC,MAAM;EACX,MAAM,WAAW,KAAK,YAAY;EAClC,MAAM,eAAe,KAAK,MAAM,gBAAgB;EAChD,IAAI,YAAY,cACd,IAAI,KAAK;GACP;GACA,UAAU,KAAK,YAAY;GAC3B;GACA,MAAM,KAAK,MAAM,QAAQ;EAC3B,CAAC;EAEH,KAAK,MAAM,SAAS,KAAK,SAAS,CAAC,GAAG,KAAK,KAAK;CAClD;CACA,KAAK,OAAO;CACZ,OAAO;AACT;;AAGA,SAAgB,iBAAiB,MAAsB;CAErD,QADa,KAAK,MAAM,OAAO,EAAE,IAAI,KAAK,MAGrC,QAAQ,qBAAqB,GAAG,EAChC,QAAQ,YAAY,EAAE,EACtB,MAAM,GAAG,GAAG,KAAK;AAExB;;;;;;;;;;AAWA,eAAsB,0BAA0B,MASjB;CAC7B,MAAM,QAAQ,uBAAuB,KAAK,OAAO;CACjD,IAAI,MAAM,WAAW,GAAG,OAAO,CAAC;CAEhC,sBAAA,eAAe,KAAK,IAAI;CACxB,MAAM,WAAW,KAAK,YAAA;CACtB,MAAM,iBAAiB,KAAA,QAAK,KAAK,KAAK,SAAS,aAAa,KAAK,MAAM,aAAa;CACpF,GAAA,QAAG,UAAU,gBAAgB,EAAE,WAAW,KAAK,CAAC;CAEhD,MAAM,EAAE,mBAAmB,MAAA,QAAA,QAAA,EAAA,WAAA,QAAM,WAAA,CAAA,EAAA,MAAA,MAAA,EAAA,eAAA;CACjC,MAAM,QAA2B,CAAC;CAElC,KAAK,MAAM,QAAQ,OACjB,IAAI;EACF,IAAI,KAAK,OAAO,UAAU;GACxB,eAAA,OAAO,KAAK,cAAc,iCAAiC;IACzD,UAAU,KAAK;IACf,OAAO,KAAK;GACd,CAAC;GACD;EACF;EAOA,MAAM,QAAO,MALM,KAAK,MAAM,MAAM,SAAS,YAAY,IAAI;GAC3D,QAAQ;GACR,WAAW,KAAK;GAChB,IAAI,KAAK;EACX,CAAC,GACiB,KAAK;EACvB,IAAI,CAAC,MAAM;EACX,MAAM,SAAS,OAAO,KAAK,MAAM,WAAW;EAE5C,MAAM,aAAa,GAAG,KAAK,UAAU,IAAI,iBAAiB,KAAK,QAAQ;EACvE,MAAM,eAAe,GAAG,WAAW;EACnC,GAAA,QAAG,cAAc,KAAA,QAAK,KAAK,gBAAgB,UAAU,GAAG,MAAM;EAE9D,MAAM,EAAE,aAAa,MAAM,kBAAkB,QAAQ,KAAK,UAAU,KAAK,QAAQ;EACjF,MAAM,SAAS,KAAK,KAAK,SAAS,eAAe,KAAK,OAAO,KAAK,KAAK,KAAK,OAAO,SAAS;EAC5F,GAAA,QAAG,cAAc,KAAA,QAAK,KAAK,gBAAgB,YAAY,GAAG,MAAM;EAEhE,MAAM,MAAM,GAAG,KAAK,OAAO,OAAO,KAAK;EACvC,MAAM,SAASC,gBAAAA,UAAU,QAAQ;EACjC,KAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KACjC,MAAM,eAAe,KAAK,SAAS,KAAK,MAAM,OAAO,IAAK,GAAG,IAAI,GAAG,KAAK;GACvE,MAAM,KAAK;GACX,MAAM;EACR,CAAC,EAAE,OAAO,QAAiB;GACzB,eAAA,OAAO,MAAM,cAAc,2BAA2B,EACpD,OAAQ,IAAc,QACxB,CAAC;EACH,CAAC;EAGH,MAAM,KAAK;GACT,cAAc,KAAK;GACnB;GACA;GACA;GACA,QAAQ,OAAO;EACjB,CAAC;CACH,SAAS,KAAK;EACZ,eAAA,OAAO,KAAK,cAAc,qBAAqB;GAC7C,UAAU,KAAK;GACf,OAAQ,IAAc;EACxB,CAAC;CACH;CAGF,OAAO;AACT"}
1
+ {"version":3,"file":"attachments-Dbe7Bidz.cjs","names":["htmlToMarkdown","htmlConverter","chunkText"],"sources":["../src/sync/converters/text.ts","../src/sync/converters/docx.ts","../src/sync/converters/xlsx.ts","../src/sync/converters/pptx.ts","../src/sync/converters/pdf.ts","../src/sync/converters/image.ts","../src/sync/converters/registry.ts","../src/sync/attachments.ts"],"sourcesContent":["// src/sync/converters/text.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\n\n/** Escape a CSV cell for safe inclusion in a Markdown table cell. */\nfunction mdCell(value: string): string {\n return value.replace(/\\\\/g, \"\\\\\\\\\").replace(/\\|/g, \"\\\\|\").replace(/\\r?\\n/g, \" \").trim();\n}\n\n/**\n * Minimal RFC-4180-ish CSV line splitter: handles quoted fields containing\n * commas and escaped double quotes. Good enough for rendering CSV attachments\n * as readable Markdown tables (we are not round-tripping data).\n */\nexport function parseCsvLine(line: string): string[] {\n const out: string[] = [];\n let cur = \"\";\n let inQuotes = false;\n for (let i = 0; i < line.length; i++) {\n const ch = line[i];\n if (inQuotes) {\n if (ch === '\"') {\n if (line[i + 1] === '\"') {\n cur += '\"';\n i++;\n } else {\n inQuotes = false;\n }\n } else {\n cur += ch;\n }\n } else if (ch === '\"') {\n inQuotes = true;\n } else if (ch === \",\") {\n out.push(cur);\n cur = \"\";\n } else {\n cur += ch;\n }\n }\n out.push(cur);\n return out;\n}\n\n/** Render a matrix of cells as a GitHub-flavored Markdown pipe table. */\nexport function rowsToMarkdown(rows: string[][]): string {\n if (rows.length === 0) return \"\";\n const width = Math.max(...rows.map((r) => r.length));\n const pad = (r: string[]): string[] =>\n Array.from({ length: width }, (_, i) => mdCell(r[i] ?? \"\"));\n\n const header = pad(rows[0] ?? []);\n const lines = [\n `| ${header.join(\" | \")} |`,\n `| ${header.map(() => \"---\").join(\" | \")} |`,\n ...rows.slice(1).map((r) => `| ${pad(r).join(\" | \")} |`),\n ];\n return lines.join(\"\\n\");\n}\n\n/** Render CSV text as a GitHub-flavored Markdown pipe table. */\nexport function csvToMarkdown(csv: string): string {\n const rows = csv\n .split(/\\r?\\n/)\n .filter((l) => l.trim().length > 0)\n .map(parseCsvLine);\n return rowsToMarkdown(rows);\n}\n\nconst TEXT_EXTENSIONS = [\"txt\", \"text\", \"log\", \"md\", \"markdown\"];\nconst CODE_FENCE_EXTENSIONS: Record<string, string> = {\n json: \"json\",\n xml: \"xml\",\n yaml: \"yaml\",\n yml: \"yaml\",\n};\n\n/**\n * Converter for plain-text-ish attachments: Markdown/text passthrough, CSV/TSV\n * to Markdown tables, and structured text (JSON/XML/YAML) into fenced code\n * blocks so they stay readable and searchable without a heavy parser.\n */\nexport const textConverter: Converter = {\n name: \"text\",\n extensions: [...TEXT_EXTENSIONS, \"csv\", \"tsv\", ...Object.keys(CODE_FENCE_EXTENSIONS)],\n mimeTypes: [\"text/plain\", \"text/csv\", \"text/markdown\", \"application/json\", \"text/*\"],\n convert(buffer: Buffer, filename: string): Promise<ConversionResult> {\n const ext = filename.split(\".\").pop()?.toLowerCase() ?? \"\";\n const content = buffer.toString(\"utf-8\");\n\n if (ext === \"csv\") {\n return Promise.resolve({ markdown: csvToMarkdown(content), meta: { format: \"csv\" } });\n }\n if (ext === \"tsv\") {\n const asCsv = content.replace(/\\t/g, \",\");\n return Promise.resolve({ markdown: csvToMarkdown(asCsv), meta: { format: \"tsv\" } });\n }\n const fence = CODE_FENCE_EXTENSIONS[ext];\n if (fence) {\n return Promise.resolve({\n markdown: `\\`\\`\\`${fence}\\n${content.trim()}\\n\\`\\`\\``,\n meta: { format: fence },\n });\n }\n // Markdown / plain text: pass through verbatim.\n return Promise.resolve({ markdown: content.trim(), meta: { format: \"text\" } });\n },\n};\n","// src/sync/converters/docx.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\nimport { htmlToMarkdown } from \"./html.js\";\n\n/**\n * DOCX → Markdown via mammoth (DOCX → semantic HTML) then Turndown (HTML →\n * Markdown). Mammoth's own Markdown output is deprecated; the HTML route keeps\n * tables, lists and headings intact. mammoth is loaded lazily.\n */\nexport const docxConverter: Converter = {\n name: \"docx\",\n extensions: [\"docx\"],\n mimeTypes: [\"application/vnd.openxmlformats-officedocument.wordprocessingml.document\"],\n async convert(buffer: Buffer): Promise<ConversionResult> {\n const mammoth = (await import(\"mammoth\")).default;\n const { value: html, messages } = await mammoth.convertToHtml({ buffer });\n const markdown = await htmlToMarkdown(html);\n return {\n markdown,\n meta: { format: \"docx\", warnings: messages.filter((m) => m.type === \"warning\").length },\n };\n },\n};\n","// src/sync/converters/xlsx.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\nimport { rowsToMarkdown } from \"./text.js\";\n\n/** Render a single ExcelJS cell value as plain text. */\nfunction cellText(value: unknown): string {\n if (value === null || value === undefined) return \"\";\n if (value instanceof Date) return value.toISOString().slice(0, 10);\n if (typeof value === \"object\") {\n const v = value as Record<string, unknown>;\n if (typeof v[\"text\"] === \"string\") return v[\"text\"];\n if (\"result\" in v) return String(v[\"result\"] ?? \"\");\n if (Array.isArray(v[\"richText\"])) {\n return (v[\"richText\"] as Array<{ text?: string }>).map((r) => r.text ?? \"\").join(\"\");\n }\n if (\"hyperlink\" in v) return String(v[\"text\"] ?? v[\"hyperlink\"] ?? \"\");\n }\n return String(value);\n}\n\n/**\n * Spreadsheet (XLSX) → Markdown via ExcelJS. Each worksheet becomes a\n * `## <sheet name>` section followed by a GitHub-flavored Markdown table.\n * ExcelJS is loaded lazily.\n */\nexport const xlsxConverter: Converter = {\n name: \"xlsx\",\n extensions: [\"xlsx\", \"xlsm\"],\n mimeTypes: [\n \"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\",\n \"application/vnd.ms-excel\",\n ],\n async convert(buffer: Buffer): Promise<ConversionResult> {\n const ExcelJS = (await import(\"exceljs\")).default;\n const wb = new ExcelJS.Workbook();\n // ExcelJS's typings predate the @types/node generic Buffer; widen via ArrayBuffer.\n await wb.xlsx.load(buffer as unknown as ArrayBuffer);\n\n const sections: string[] = [];\n const sheetNames: string[] = [];\n wb.eachSheet((sheet) => {\n sheetNames.push(sheet.name);\n const rows: string[][] = [];\n sheet.eachRow({ includeEmpty: false }, (row) => {\n const cells: string[] = [];\n row.eachCell({ includeEmpty: true }, (cell) => {\n cells.push(cellText(cell.value));\n });\n rows.push(cells);\n });\n const table = rowsToMarkdown(rows);\n if (table) sections.push(`## ${sheet.name}\\n\\n${table}`);\n });\n\n return { markdown: sections.join(\"\\n\\n\"), meta: { format: \"xlsx\", sheets: sheetNames } };\n },\n};\n","// src/sync/converters/pptx.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\n\n/** Extract the visible text runs (`<a:t>…</a:t>`) from one slide's XML. */\nexport function extractSlideText(xml: string): string {\n const runs = [...xml.matchAll(/<a:t>([\\s\\S]*?)<\\/a:t>/g)].map((m) =>\n (m[1] ?? \"\")\n .replace(/&amp;/g, \"&\")\n .replace(/&lt;/g, \"<\")\n .replace(/&gt;/g, \">\")\n .replace(/&quot;/g, '\"')\n .replace(/&apos;/g, \"'\")\n );\n return runs.join(\" \").replace(/\\s+/g, \" \").trim();\n}\n\nfunction slideNumber(entryName: string): number {\n const m = entryName.match(/slide(\\d+)\\.xml$/);\n return m ? parseInt(m[1] ?? \"0\", 10) : 0;\n}\n\n/**\n * PPTX → Markdown. A .pptx is a zip; slide text lives in `ppt/slides/slideN.xml`\n * as `<a:t>` runs. We unzip with adm-zip (already a dependency) and emit one\n * `## Slide N` section per slide — no extra native parser needed.\n */\nexport const pptxConverter: Converter = {\n name: \"pptx\",\n extensions: [\"pptx\"],\n mimeTypes: [\"application/vnd.openxmlformats-officedocument.presentationml.presentation\"],\n async convert(buffer: Buffer): Promise<ConversionResult> {\n const AdmZip = (await import(\"adm-zip\")).default;\n const zip = new AdmZip(buffer);\n const slides = zip\n .getEntries()\n .filter((e) => /^ppt\\/slides\\/slide\\d+\\.xml$/.test(e.entryName))\n .sort((a, b) => slideNumber(a.entryName) - slideNumber(b.entryName));\n\n const sections: string[] = [];\n for (const entry of slides) {\n const text = extractSlideText(entry.getData().toString(\"utf-8\"));\n if (text) sections.push(`## Slide ${slideNumber(entry.entryName)}\\n\\n${text}`);\n }\n return { markdown: sections.join(\"\\n\\n\"), meta: { format: \"pptx\", slides: slides.length } };\n },\n};\n","// src/sync/converters/pdf.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\n\n/** Whether scanned-PDF OCR is enabled (opt-in, requires @napi-rs/canvas). */\nexport function isPdfOcrEnabled(): boolean {\n const v = (process.env[\"DXCRM_PDF_OCR\"] ?? \"\").toLowerCase();\n return v === \"1\" || v === \"true\" || v === \"yes\" || v === \"on\";\n}\n\n/**\n * OCR a scanned PDF by rendering each page to a PNG (unpdf + @napi-rs/canvas)\n * and running tesseract.js over it. The canvas backend is an optional peer\n * dependency; if it isn't installed we throw a clear, catchable error so the\n * caller can fall back to the OCR-candidate stub. Page count is capped via\n * DXCRM_PDF_OCR_MAX_PAGES (default 20) to keep this slow path bounded.\n */\nasync function ocrPdf(pdf: unknown, totalPages: number): Promise<string> {\n // Fail fast with a helpful message when the optional canvas backend is absent.\n try {\n await import(\"@napi-rs/canvas\");\n } catch {\n throw new Error(\n \"PDF OCR requires the optional '@napi-rs/canvas' package — run `npm install @napi-rs/canvas`\"\n );\n }\n\n const { renderPageAsImage } = await import(\"unpdf\");\n const { recognize } = await import(\"tesseract.js\");\n const lang = process.env[\"DXCRM_OCR_LANG\"] ?? \"eng\";\n const canvasImport = (): Promise<unknown> => import(\"@napi-rs/canvas\");\n\n const maxPages = Number(process.env[\"DXCRM_PDF_OCR_MAX_PAGES\"] ?? 20) || 20;\n const pageCount = Math.min(totalPages, maxPages);\n\n const pages: string[] = [];\n for (let p = 1; p <= pageCount; p++) {\n const png = await renderPageAsImage(pdf as never, p, { canvasImport, scale: 2 } as never);\n const {\n data: { text },\n } = await recognize(Buffer.from(png), lang);\n if (text.trim()) pages.push(`## Page ${p}\\n\\n${text.trim()}`);\n }\n return pages.join(\"\\n\\n\");\n}\n\n/**\n * PDF → Markdown. Extracts the digital text layer with unpdf (a serverless\n * pdf.js build). Scanned PDFs have no text layer: when DXCRM_PDF_OCR is enabled\n * they are rendered and OCR'd page-by-page, otherwise they're flagged as OCR\n * candidates rather than emitting garbage. unpdf is loaded lazily.\n */\nexport const pdfConverter: Converter = {\n name: \"pdf\",\n extensions: [\"pdf\"],\n mimeTypes: [\"application/pdf\"],\n async convert(buffer: Buffer, filename: string): Promise<ConversionResult> {\n const { extractText, getDocumentProxy } = await import(\"unpdf\");\n const pdf = await getDocumentProxy(new Uint8Array(buffer));\n const { totalPages, text } = await extractText(pdf, { mergePages: true });\n const merged = (Array.isArray(text) ? text.join(\"\\n\\n\") : text).trim();\n\n if (merged) return { markdown: merged, meta: { format: \"pdf\", pages: totalPages } };\n\n // No text layer — scanned PDF. OCR if opted in, else flag as a candidate.\n if (isPdfOcrEnabled()) {\n try {\n const ocr = await ocrPdf(pdf, totalPages);\n if (ocr.trim()) {\n return {\n markdown: `> _OCR of \\`${filename}\\` (${totalPages} pages):_\\n\\n${ocr.trim()}`,\n meta: { format: \"pdf\", pages: totalPages, ocr: true },\n };\n }\n } catch (err) {\n process.stderr.write(\n `[converters] pdf OCR failed for ${filename}: ${(err as Error).message}\\n`\n );\n }\n }\n\n return { markdown: \"\", meta: { format: \"pdf\", pages: totalPages, ocrCandidate: true } };\n },\n};\n","// src/sync/converters/image.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\n\n/**\n * Image → Markdown via Tesseract.js OCR (pure-JS, 100+ languages, fully local).\n * This is the heaviest converter: tesseract.js downloads a WASM core and\n * language data on first use, so it is loaded lazily and only invoked for image\n * attachments. Language defaults to English, override with DXCRM_OCR_LANG.\n */\nexport const imageConverter: Converter = {\n name: \"image\",\n extensions: [\"png\", \"jpg\", \"jpeg\", \"tif\", \"tiff\", \"bmp\", \"webp\", \"gif\", \"pbm\"],\n mimeTypes: [\"image/*\"],\n async convert(buffer: Buffer, filename: string): Promise<ConversionResult> {\n const lang = process.env[\"DXCRM_OCR_LANG\"] ?? \"eng\";\n const { recognize } = await import(\"tesseract.js\");\n const {\n data: { text },\n } = await recognize(buffer, lang);\n const ocr = text.trim();\n return {\n markdown: ocr ? `> _OCR of \\`${filename}\\`:_\\n\\n${ocr}` : \"\",\n meta: { format: \"image\", ocr: true, lang },\n };\n },\n};\n","// src/sync/converters/registry.ts\nimport type { Converter, ConversionResult } from \"./types.js\";\nimport { textConverter } from \"./text.js\";\nimport { docxConverter } from \"./docx.js\";\nimport { xlsxConverter } from \"./xlsx.js\";\nimport { pptxConverter } from \"./pptx.js\";\nimport { pdfConverter } from \"./pdf.js\";\nimport { htmlConverter } from \"./html.js\";\nimport { imageConverter } from \"./image.js\";\n\n/**\n * Fallback converter for unknown/binary attachments: emit a small metadata stub\n * instead of garbage bytes, so the attachment is still recorded and linkable.\n */\nexport const fallbackConverter: Converter = {\n name: \"binary\",\n extensions: [],\n convert(buffer: Buffer, filename: string): Promise<ConversionResult> {\n const kb = Math.max(1, Math.round(buffer.length / 1024));\n return Promise.resolve({\n markdown: `> _Binary attachment \\`${filename}\\` (${kb} KB) — no text representation available._`,\n meta: { format: \"binary\", bytes: buffer.length },\n });\n },\n};\n\n/**\n * Ordered converter registry. Earlier entries win on extension conflicts. The\n * text converter is intentionally last among the \"real\" converters so that more\n * specific formats (html, etc.) take precedence over generic text matching.\n */\nexport const CONVERTERS: Converter[] = [\n docxConverter,\n xlsxConverter,\n pptxConverter,\n pdfConverter,\n imageConverter,\n htmlConverter,\n textConverter,\n];\n\nfunction extensionOf(filename: string): string {\n return filename.split(\".\").pop()?.toLowerCase() ?? \"\";\n}\n\nfunction mimeMatches(converter: Converter, mime: string): boolean {\n if (!converter.mimeTypes) return false;\n const lower = mime.toLowerCase();\n return converter.mimeTypes.some((m) => {\n const ml = m.toLowerCase();\n if (ml.endsWith(\"/*\")) return lower.startsWith(ml.slice(0, -1));\n return ml === lower;\n });\n}\n\n/**\n * Pick the converter for an attachment by file extension first (most reliable\n * for Gmail attachments, which always carry a filename), then by MIME type.\n * Returns `undefined` when nothing matches.\n */\nexport function matchConverter(filename: string, mime?: string): Converter | undefined {\n const ext = extensionOf(filename);\n if (ext) {\n const byExt = CONVERTERS.find((c) => c.extensions.includes(ext));\n if (byExt) return byExt;\n }\n if (mime) {\n const byMime = CONVERTERS.find((c) => mimeMatches(c, mime));\n if (byMime) return byMime;\n }\n return undefined;\n}\n\n/**\n * Convert an attachment to Markdown, dispatching to the best converter and\n * falling back to a metadata stub. Converter errors never throw: they are\n * swallowed into the fallback so a single bad attachment can't break a sync.\n */\nexport async function convertAttachment(\n buffer: Buffer,\n filename: string,\n mime?: string\n): Promise<ConversionResult> {\n const converter = matchConverter(filename, mime) ?? fallbackConverter;\n try {\n const result = await converter.convert(buffer, filename);\n if (!result.markdown.trim()) {\n return {\n markdown: `> _Attachment \\`${filename}\\` contained no extractable text._`,\n meta: { ...result.meta, empty: true },\n };\n }\n return result;\n } catch (err) {\n process.stderr.write(\n `[converters] ${converter.name} failed for ${filename}: ${(err as Error).message}\\n`\n );\n return fallbackConverter.convert(buffer, filename);\n }\n}\n","// src/sync/attachments.ts\nimport fs from \"fs\";\nimport path from \"path\";\nimport type { gmail_v1 } from \"@googleapis/gmail\";\nimport { convertAttachment } from \"./converters/registry.js\";\nimport { chunkText } from \"../core/chunk.js\";\nimport { assertSafeSlug } from \"../fs/customer-dir.js\";\nimport { logger } from \"../core/logger.js\";\n\n/** Default per-attachment size cap (skip larger blobs to keep syncs bounded). */\nexport const DEFAULT_MAX_ATTACHMENT_BYTES = 25 * 1024 * 1024;\n\nexport interface AttachmentPart {\n filename: string;\n mimeType: string;\n attachmentId: string;\n size: number;\n}\n\nexport interface SavedAttachment {\n /** Original filename as sent. */\n originalName: string;\n /** Stored raw filename (sanitized, message-prefixed) under attachments/. */\n storedName: string;\n /** Markdown filename under attachments/. */\n markdownName: string;\n /** Source ref used for LanceDB indexing. */\n ref: string;\n /** Number of indexed chunks produced from the Markdown. */\n chunks: number;\n}\n\n/**\n * Recursively collect downloadable attachment parts from a Gmail message\n * payload — any MIME part that carries both a filename and a body.attachmentId.\n * Inline parts without a filename (e.g. signature logos) are ignored.\n */\nexport function collectAttachmentParts(\n payload: gmail_v1.Schema$MessagePart | undefined\n): AttachmentPart[] {\n const out: AttachmentPart[] = [];\n const walk = (part?: gmail_v1.Schema$MessagePart): void => {\n if (!part) return;\n const filename = part.filename ?? \"\";\n const attachmentId = part.body?.attachmentId ?? \"\";\n if (filename && attachmentId) {\n out.push({\n filename,\n mimeType: part.mimeType ?? \"application/octet-stream\",\n attachmentId,\n size: part.body?.size ?? 0,\n });\n }\n for (const child of part.parts ?? []) walk(child);\n };\n walk(payload);\n return out;\n}\n\n/** Make a filename safe for use as a single path segment. */\nexport function sanitizeFilename(name: string): string {\n const base = name.split(/[\\\\/]/).pop() ?? name;\n return (\n base\n .replace(/[^a-zA-Z0-9._-]+/g, \"_\")\n .replace(/^_+|_+$/g, \"\")\n .slice(0, 120) || \"attachment\"\n );\n}\n\n/**\n * Persist one already-downloaded attachment: save the raw bytes under\n * `customers/<slug>/attachments/<messageId>__<name>`, convert to a sibling\n * `.md` file, and chunk-index the Markdown into LanceDB for semantic search.\n * Provider-agnostic — used by every mailbox connector (Gmail, IMAP, Graph).\n */\nexport async function persistAttachment(opts: {\n dataDir: string;\n slug: string;\n messageId: string;\n source: string;\n date: string;\n filename: string;\n mimeType: string;\n buffer: Buffer;\n}): Promise<SavedAttachment> {\n assertSafeSlug(opts.slug);\n const attachmentsDir = path.join(opts.dataDir, \"customers\", opts.slug, \"attachments\");\n fs.mkdirSync(attachmentsDir, { recursive: true });\n\n const storedName = `${opts.messageId}__${sanitizeFilename(opts.filename)}`;\n const markdownName = `${storedName}.md`;\n fs.writeFileSync(path.join(attachmentsDir, storedName), opts.buffer);\n\n const { markdown } = await convertAttachment(opts.buffer, opts.filename, opts.mimeType);\n const mdBody = `# ${opts.filename}\\n\\n_Source: ${opts.source} · ${opts.date}_\\n\\n${markdown}\\n`;\n fs.writeFileSync(path.join(attachmentsDir, markdownName), mdBody);\n\n const ref = `${opts.source}#att:${opts.filename}`;\n const { indexInLanceDB } = await import(\"../core/lancedb.js\");\n const chunks = chunkText(markdown);\n for (let i = 0; i < chunks.length; i++) {\n await indexInLanceDB(opts.dataDir, opts.slug, chunks[i]!, `${ref}#${i}`, {\n date: opts.date,\n type: \"attachment\",\n }).catch((err: unknown) => {\n logger.error(\"attachments\", \"attachment index failed\", { error: (err as Error).message });\n });\n }\n\n return {\n originalName: opts.filename,\n storedName,\n markdownName,\n ref,\n chunks: chunks.length,\n };\n}\n\n/**\n * Download, convert and index every attachment of a single Gmail message.\n * Fetches each part's bytes from the Gmail API, then delegates persistence to\n * the shared {@link persistAttachment}. Failures on a single attachment are\n * logged and skipped — they never abort the message sync.\n */\nexport async function processMessageAttachments(opts: {\n gmail: gmail_v1.Gmail;\n dataDir: string;\n slug: string;\n messageId: string;\n source: string;\n payload: gmail_v1.Schema$MessagePart | undefined;\n date: string;\n maxBytes?: number;\n}): Promise<SavedAttachment[]> {\n const parts = collectAttachmentParts(opts.payload);\n if (parts.length === 0) return [];\n\n assertSafeSlug(opts.slug);\n const maxBytes = opts.maxBytes ?? DEFAULT_MAX_ATTACHMENT_BYTES;\n const saved: SavedAttachment[] = [];\n\n for (const part of parts) {\n try {\n if (part.size > maxBytes) {\n logger.warn(\"gmail-sync\", \"skipping oversized attachment\", {\n filename: part.filename,\n bytes: part.size,\n });\n continue;\n }\n\n const resp = await opts.gmail.users.messages.attachments.get({\n userId: \"me\",\n messageId: opts.messageId,\n id: part.attachmentId,\n });\n const data = resp.data.data;\n if (!data) continue;\n\n saved.push(\n await persistAttachment({\n dataDir: opts.dataDir,\n slug: opts.slug,\n messageId: opts.messageId,\n source: opts.source,\n date: opts.date,\n filename: part.filename,\n mimeType: part.mimeType,\n buffer: Buffer.from(data, \"base64url\"),\n })\n );\n } catch (err) {\n logger.warn(\"gmail-sync\", \"attachment failed\", {\n filename: part.filename,\n error: (err as Error).message,\n });\n }\n }\n\n return saved;\n}\n"],"mappings":";;;;;;;;;;;AAIA,SAAS,OAAO,OAAuB;CACrC,OAAO,MAAM,QAAQ,OAAO,MAAM,EAAE,QAAQ,OAAO,KAAK,EAAE,QAAQ,UAAU,GAAG,EAAE,KAAK;AACxF;;;;;;AAOA,SAAgB,aAAa,MAAwB;CACnD,MAAM,MAAgB,CAAC;CACvB,IAAI,MAAM;CACV,IAAI,WAAW;CACf,KAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;EACpC,MAAM,KAAK,KAAK;EAChB,IAAI,UACF,IAAI,OAAO,MACT,IAAI,KAAK,IAAI,OAAO,MAAK;GACvB,OAAO;GACP;EACF,OACE,WAAW;OAGb,OAAO;OAEJ,IAAI,OAAO,MAChB,WAAW;OACN,IAAI,OAAO,KAAK;GACrB,IAAI,KAAK,GAAG;GACZ,MAAM;EACR,OACE,OAAO;CAEX;CACA,IAAI,KAAK,GAAG;CACZ,OAAO;AACT;;AAGA,SAAgB,eAAe,MAA0B;CACvD,IAAI,KAAK,WAAW,GAAG,OAAO;CAC9B,MAAM,QAAQ,KAAK,IAAI,GAAG,KAAK,KAAK,MAAM,EAAE,MAAM,CAAC;CACnD,MAAM,OAAO,MACX,MAAM,KAAK,EAAE,QAAQ,MAAM,IAAI,GAAG,MAAM,OAAO,EAAE,MAAM,EAAE,CAAC;CAE5D,MAAM,SAAS,IAAI,KAAK,MAAM,CAAC,CAAC;CAMhC,OAAO;EAJL,KAAK,OAAO,KAAK,KAAK,EAAE;EACxB,KAAK,OAAO,UAAU,KAAK,EAAE,KAAK,KAAK,EAAE;EACzC,GAAG,KAAK,MAAM,CAAC,EAAE,KAAK,MAAM,KAAK,IAAI,CAAC,EAAE,KAAK,KAAK,EAAE,GAAG;CAE9C,EAAE,KAAK,IAAI;AACxB;;AAGA,SAAgB,cAAc,KAAqB;CAKjD,OAAO,eAJM,IACV,MAAM,OAAO,EACb,QAAQ,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,EACjC,IAAI,YACkB,CAAC;AAC5B;AAEA,MAAM,kBAAkB;CAAC;CAAO;CAAQ;CAAO;CAAM;AAAU;AAC/D,MAAM,wBAAgD;CACpD,MAAM;CACN,KAAK;CACL,MAAM;CACN,KAAK;AACP;;;;;;AAOA,MAAa,gBAA2B;CACtC,MAAM;CACN,YAAY;EAAC,GAAG;EAAiB;EAAO;EAAO,GAAG,OAAO,KAAK,qBAAqB;CAAC;CACpF,WAAW;EAAC;EAAc;EAAY;EAAiB;EAAoB;CAAQ;CACnF,QAAQ,QAAgB,UAA6C;EACnE,MAAM,MAAM,SAAS,MAAM,GAAG,EAAE,IAAI,GAAG,YAAY,KAAK;EACxD,MAAM,UAAU,OAAO,SAAS,OAAO;EAEvC,IAAI,QAAQ,OACV,OAAO,QAAQ,QAAQ;GAAE,UAAU,cAAc,OAAO;GAAG,MAAM,EAAE,QAAQ,MAAM;EAAE,CAAC;EAEtF,IAAI,QAAQ,OAAO;GACjB,MAAM,QAAQ,QAAQ,QAAQ,OAAO,GAAG;GACxC,OAAO,QAAQ,QAAQ;IAAE,UAAU,cAAc,KAAK;IAAG,MAAM,EAAE,QAAQ,MAAM;GAAE,CAAC;EACpF;EACA,MAAM,QAAQ,sBAAsB;EACpC,IAAI,OACF,OAAO,QAAQ,QAAQ;GACrB,UAAU,SAAS,MAAM,IAAI,QAAQ,KAAK,EAAE;GAC5C,MAAM,EAAE,QAAQ,MAAM;EACxB,CAAC;EAGH,OAAO,QAAQ,QAAQ;GAAE,UAAU,QAAQ,KAAK;GAAG,MAAM,EAAE,QAAQ,OAAO;EAAE,CAAC;CAC/E;AACF;;;;;;;;ACjGA,MAAa,gBAA2B;CACtC,MAAM;CACN,YAAY,CAAC,MAAM;CACnB,WAAW,CAAC,yEAAyE;CACrF,MAAM,QAAQ,QAA2C;EAEvD,MAAM,EAAE,OAAO,MAAM,aAAa,OADjB,MAAM,OAAO,YAAY,QACM,cAAc,EAAE,OAAO,CAAC;EAExE,OAAO;GACL,UAAA,MAFqBA,aAAAA,eAAe,IAAI;GAGxC,MAAM;IAAE,QAAQ;IAAQ,UAAU,SAAS,QAAQ,MAAM,EAAE,SAAS,SAAS,EAAE;GAAO;EACxF;CACF;AACF;;;;ACjBA,SAAS,SAAS,OAAwB;CACxC,IAAI,UAAU,QAAQ,UAAU,KAAA,GAAW,OAAO;CAClD,IAAI,iBAAiB,MAAM,OAAO,MAAM,YAAY,EAAE,MAAM,GAAG,EAAE;CACjE,IAAI,OAAO,UAAU,UAAU;EAC7B,MAAM,IAAI;EACV,IAAI,OAAO,EAAE,YAAY,UAAU,OAAO,EAAE;EAC5C,IAAI,YAAY,GAAG,OAAO,OAAO,EAAE,aAAa,EAAE;EAClD,IAAI,MAAM,QAAQ,EAAE,WAAW,GAC7B,OAAQ,EAAE,YAAyC,KAAK,MAAM,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE;EAErF,IAAI,eAAe,GAAG,OAAO,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE;CACvE;CACA,OAAO,OAAO,KAAK;AACrB;;;;;;AAOA,MAAa,gBAA2B;CACtC,MAAM;CACN,YAAY,CAAC,QAAQ,MAAM;CAC3B,WAAW,CACT,qEACA,0BACF;CACA,MAAM,QAAQ,QAA2C;EAEvD,MAAM,KAAK,KADM,OAAM,OAAO,aAAY,QACnB,SAAS;EAEhC,MAAM,GAAG,KAAK,KAAK,MAAgC;EAEnD,MAAM,WAAqB,CAAC;EAC5B,MAAM,aAAuB,CAAC;EAC9B,GAAG,WAAW,UAAU;GACtB,WAAW,KAAK,MAAM,IAAI;GAC1B,MAAM,OAAmB,CAAC;GAC1B,MAAM,QAAQ,EAAE,cAAc,MAAM,IAAI,QAAQ;IAC9C,MAAM,QAAkB,CAAC;IACzB,IAAI,SAAS,EAAE,cAAc,KAAK,IAAI,SAAS;KAC7C,MAAM,KAAK,SAAS,KAAK,KAAK,CAAC;IACjC,CAAC;IACD,KAAK,KAAK,KAAK;GACjB,CAAC;GACD,MAAM,QAAQ,eAAe,IAAI;GACjC,IAAI,OAAO,SAAS,KAAK,MAAM,MAAM,KAAK,MAAM,OAAO;EACzD,CAAC;EAED,OAAO;GAAE,UAAU,SAAS,KAAK,MAAM;GAAG,MAAM;IAAE,QAAQ;IAAQ,QAAQ;GAAW;EAAE;CACzF;AACF;;;;ACpDA,SAAgB,iBAAiB,KAAqB;CASpD,OARa,CAAC,GAAG,IAAI,SAAS,yBAAyB,CAAC,EAAE,KAAK,OAC5D,EAAE,MAAM,IACN,QAAQ,UAAU,GAAG,EACrB,QAAQ,SAAS,GAAG,EACpB,QAAQ,SAAS,GAAG,EACpB,QAAQ,WAAW,IAAG,EACtB,QAAQ,WAAW,GAAG,CAEjB,EAAE,KAAK,GAAG,EAAE,QAAQ,QAAQ,GAAG,EAAE,KAAK;AAClD;AAEA,SAAS,YAAY,WAA2B;CAC9C,MAAM,IAAI,UAAU,MAAM,kBAAkB;CAC5C,OAAO,IAAI,SAAS,EAAE,MAAM,KAAK,EAAE,IAAI;AACzC;;;;;;AAOA,MAAa,gBAA2B;CACtC,MAAM;CACN,YAAY,CAAC,MAAM;CACnB,WAAW,CAAC,2EAA2E;CACvF,MAAM,QAAQ,QAA2C;EACvD,MAAM,UAAU,MAAM,OAAO,YAAY;EAEzC,MAAM,SAAS,IADC,OAAO,MACN,EACd,WAAW,EACX,QAAQ,MAAM,+BAA+B,KAAK,EAAE,SAAS,CAAC,EAC9D,MAAM,GAAG,MAAM,YAAY,EAAE,SAAS,IAAI,YAAY,EAAE,SAAS,CAAC;EAErE,MAAM,WAAqB,CAAC;EAC5B,KAAK,MAAM,SAAS,QAAQ;GAC1B,MAAM,OAAO,iBAAiB,MAAM,QAAQ,EAAE,SAAS,OAAO,CAAC;GAC/D,IAAI,MAAM,SAAS,KAAK,YAAY,YAAY,MAAM,SAAS,EAAE,MAAM,MAAM;EAC/E;EACA,OAAO;GAAE,UAAU,SAAS,KAAK,MAAM;GAAG,MAAM;IAAE,QAAQ;IAAQ,QAAQ,OAAO;GAAO;EAAE;CAC5F;AACF;;;;ACzCA,SAAgB,kBAA2B;CACzC,MAAM,KAAK,QAAQ,IAAI,oBAAoB,IAAI,YAAY;CAC3D,OAAO,MAAM,OAAO,MAAM,UAAU,MAAM,SAAS,MAAM;AAC3D;;;;;;;;AASA,eAAe,OAAO,KAAc,YAAqC;CAEvE,IAAI;EACF,MAAM,OAAO;CACf,QAAQ;EACN,MAAM,IAAI,MACR,6FACF;CACF;CAEA,MAAM,EAAE,sBAAsB,MAAM,OAAO;CAC3C,MAAM,EAAE,cAAc,MAAM,OAAO;CACnC,MAAM,OAAO,QAAQ,IAAI,qBAAqB;CAC9C,MAAM,qBAAuC,OAAO;CAEpD,MAAM,WAAW,OAAO,QAAQ,IAAI,8BAA8B,EAAE,KAAK;CACzE,MAAM,YAAY,KAAK,IAAI,YAAY,QAAQ;CAE/C,MAAM,QAAkB,CAAC;CACzB,KAAK,IAAI,IAAI,GAAG,KAAK,WAAW,KAAK;EACnC,MAAM,MAAM,MAAM,kBAAkB,KAAc,GAAG;GAAE;GAAc,OAAO;EAAE,CAAU;EACxF,MAAM,EACJ,MAAM,EAAE,WACN,MAAM,UAAU,OAAO,KAAK,GAAG,GAAG,IAAI;EAC1C,IAAI,KAAK,KAAK,GAAG,MAAM,KAAK,WAAW,EAAE,MAAM,KAAK,KAAK,GAAG;CAC9D;CACA,OAAO,MAAM,KAAK,MAAM;AAC1B;;;;;;;AAQA,MAAa,eAA0B;CACrC,MAAM;CACN,YAAY,CAAC,KAAK;CAClB,WAAW,CAAC,iBAAiB;CAC7B,MAAM,QAAQ,QAAgB,UAA6C;EACzE,MAAM,EAAE,aAAa,qBAAqB,MAAM,OAAO;EACvD,MAAM,MAAM,MAAM,iBAAiB,IAAI,WAAW,MAAM,CAAC;EACzD,MAAM,EAAE,YAAY,SAAS,MAAM,YAAY,KAAK,EAAE,YAAY,KAAK,CAAC;EACxE,MAAM,UAAU,MAAM,QAAQ,IAAI,IAAI,KAAK,KAAK,MAAM,IAAI,MAAM,KAAK;EAErE,IAAI,QAAQ,OAAO;GAAE,UAAU;GAAQ,MAAM;IAAE,QAAQ;IAAO,OAAO;GAAW;EAAE;EAGlF,IAAI,gBAAgB,GAClB,IAAI;GACF,MAAM,MAAM,MAAM,OAAO,KAAK,UAAU;GACxC,IAAI,IAAI,KAAK,GACX,OAAO;IACL,UAAU,eAAe,SAAS,MAAM,WAAW,eAAe,IAAI,KAAK;IAC3E,MAAM;KAAE,QAAQ;KAAO,OAAO;KAAY,KAAK;IAAK;GACtD;EAEJ,SAAS,KAAK;GACZ,QAAQ,OAAO,MACb,mCAAmC,SAAS,IAAK,IAAc,QAAQ,GACzE;EACF;EAGF,OAAO;GAAE,UAAU;GAAI,MAAM;IAAE,QAAQ;IAAO,OAAO;IAAY,cAAc;GAAK;EAAE;CACxF;AACF;;;;;;;;;ACzEA,MAAa,iBAA4B;CACvC,MAAM;CACN,YAAY;EAAC;EAAO;EAAO;EAAQ;EAAO;EAAQ;EAAO;EAAQ;EAAO;CAAK;CAC7E,WAAW,CAAC,SAAS;CACrB,MAAM,QAAQ,QAAgB,UAA6C;EACzE,MAAM,OAAO,QAAQ,IAAI,qBAAqB;EAC9C,MAAM,EAAE,cAAc,MAAM,OAAO;EACnC,MAAM,EACJ,MAAM,EAAE,WACN,MAAM,UAAU,QAAQ,IAAI;EAChC,MAAM,MAAM,KAAK,KAAK;EACtB,OAAO;GACL,UAAU,MAAM,eAAe,SAAS,UAAU,QAAQ;GAC1D,MAAM;IAAE,QAAQ;IAAS,KAAK;IAAM;GAAK;EAC3C;CACF;AACF;;;;;;;ACXA,MAAa,oBAA+B;CAC1C,MAAM;CACN,YAAY,CAAC;CACb,QAAQ,QAAgB,UAA6C;EACnE,MAAM,KAAK,KAAK,IAAI,GAAG,KAAK,MAAM,OAAO,SAAS,IAAI,CAAC;EACvD,OAAO,QAAQ,QAAQ;GACrB,UAAU,0BAA0B,SAAS,MAAM,GAAG;GACtD,MAAM;IAAE,QAAQ;IAAU,OAAO,OAAO;GAAO;EACjD,CAAC;CACH;AACF;;;;;;AAOA,MAAa,aAA0B;CACrC;CACA;CACA;CACA;CACA;CACAC,aAAAA;CACA;AACF;AAEA,SAAS,YAAY,UAA0B;CAC7C,OAAO,SAAS,MAAM,GAAG,EAAE,IAAI,GAAG,YAAY,KAAK;AACrD;AAEA,SAAS,YAAY,WAAsB,MAAuB;CAChE,IAAI,CAAC,UAAU,WAAW,OAAO;CACjC,MAAM,QAAQ,KAAK,YAAY;CAC/B,OAAO,UAAU,UAAU,MAAM,MAAM;EACrC,MAAM,KAAK,EAAE,YAAY;EACzB,IAAI,GAAG,SAAS,IAAI,GAAG,OAAO,MAAM,WAAW,GAAG,MAAM,GAAG,EAAE,CAAC;EAC9D,OAAO,OAAO;CAChB,CAAC;AACH;;;;;;AAOA,SAAgB,eAAe,UAAkB,MAAsC;CACrF,MAAM,MAAM,YAAY,QAAQ;CAChC,IAAI,KAAK;EACP,MAAM,QAAQ,WAAW,MAAM,MAAM,EAAE,WAAW,SAAS,GAAG,CAAC;EAC/D,IAAI,OAAO,OAAO;CACpB;CACA,IAAI,MAAM;EACR,MAAM,SAAS,WAAW,MAAM,MAAM,YAAY,GAAG,IAAI,CAAC;EAC1D,IAAI,QAAQ,OAAO;CACrB;AAEF;;;;;;AAOA,eAAsB,kBACpB,QACA,UACA,MAC2B;CAC3B,MAAM,YAAY,eAAe,UAAU,IAAI,KAAK;CACpD,IAAI;EACF,MAAM,SAAS,MAAM,UAAU,QAAQ,QAAQ,QAAQ;EACvD,IAAI,CAAC,OAAO,SAAS,KAAK,GACxB,OAAO;GACL,UAAU,mBAAmB,SAAS;GACtC,MAAM;IAAE,GAAG,OAAO;IAAM,OAAO;GAAK;EACtC;EAEF,OAAO;CACT,SAAS,KAAK;EACZ,QAAQ,OAAO,MACb,gBAAgB,UAAU,KAAK,cAAc,SAAS,IAAK,IAAc,QAAQ,GACnF;EACA,OAAO,kBAAkB,QAAQ,QAAQ,QAAQ;CACnD;AACF;;;;;;AC9DA,SAAgB,uBACd,SACkB;CAClB,MAAM,MAAwB,CAAC;CAC/B,MAAM,QAAQ,SAA6C;EACzD,IAAI,CAAC,MAAM;EACX,MAAM,WAAW,KAAK,YAAY;EAClC,MAAM,eAAe,KAAK,MAAM,gBAAgB;EAChD,IAAI,YAAY,cACd,IAAI,KAAK;GACP;GACA,UAAU,KAAK,YAAY;GAC3B;GACA,MAAM,KAAK,MAAM,QAAQ;EAC3B,CAAC;EAEH,KAAK,MAAM,SAAS,KAAK,SAAS,CAAC,GAAG,KAAK,KAAK;CAClD;CACA,KAAK,OAAO;CACZ,OAAO;AACT;;AAGA,SAAgB,iBAAiB,MAAsB;CAErD,QADa,KAAK,MAAM,OAAO,EAAE,IAAI,KAAK,MAGrC,QAAQ,qBAAqB,GAAG,EAChC,QAAQ,YAAY,EAAE,EACtB,MAAM,GAAG,GAAG,KAAK;AAExB;;;;;;;AAQA,eAAsB,kBAAkB,MASX;CAC3B,sBAAA,eAAe,KAAK,IAAI;CACxB,MAAM,iBAAiB,KAAA,QAAK,KAAK,KAAK,SAAS,aAAa,KAAK,MAAM,aAAa;CACpF,GAAA,QAAG,UAAU,gBAAgB,EAAE,WAAW,KAAK,CAAC;CAEhD,MAAM,aAAa,GAAG,KAAK,UAAU,IAAI,iBAAiB,KAAK,QAAQ;CACvE,MAAM,eAAe,GAAG,WAAW;CACnC,GAAA,QAAG,cAAc,KAAA,QAAK,KAAK,gBAAgB,UAAU,GAAG,KAAK,MAAM;CAEnE,MAAM,EAAE,aAAa,MAAM,kBAAkB,KAAK,QAAQ,KAAK,UAAU,KAAK,QAAQ;CACtF,MAAM,SAAS,KAAK,KAAK,SAAS,eAAe,KAAK,OAAO,KAAK,KAAK,KAAK,OAAO,SAAS;CAC5F,GAAA,QAAG,cAAc,KAAA,QAAK,KAAK,gBAAgB,YAAY,GAAG,MAAM;CAEhE,MAAM,MAAM,GAAG,KAAK,OAAO,OAAO,KAAK;CACvC,MAAM,EAAE,mBAAmB,MAAA,QAAA,QAAA,EAAA,WAAA,QAAM,WAAA,CAAA,EAAA,MAAA,MAAA,EAAA,eAAA;CACjC,MAAM,SAASC,gBAAAA,UAAU,QAAQ;CACjC,KAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KACjC,MAAM,eAAe,KAAK,SAAS,KAAK,MAAM,OAAO,IAAK,GAAG,IAAI,GAAG,KAAK;EACvE,MAAM,KAAK;EACX,MAAM;CACR,CAAC,EAAE,OAAO,QAAiB;EACzB,eAAA,OAAO,MAAM,eAAe,2BAA2B,EAAE,OAAQ,IAAc,QAAQ,CAAC;CAC1F,CAAC;CAGH,OAAO;EACL,cAAc,KAAK;EACnB;EACA;EACA;EACA,QAAQ,OAAO;CACjB;AACF;;;;;;;AAQA,eAAsB,0BAA0B,MASjB;CAC7B,MAAM,QAAQ,uBAAuB,KAAK,OAAO;CACjD,IAAI,MAAM,WAAW,GAAG,OAAO,CAAC;CAEhC,sBAAA,eAAe,KAAK,IAAI;CACxB,MAAM,WAAW,KAAK,YAAA;CACtB,MAAM,QAA2B,CAAC;CAElC,KAAK,MAAM,QAAQ,OACjB,IAAI;EACF,IAAI,KAAK,OAAO,UAAU;GACxB,eAAA,OAAO,KAAK,cAAc,iCAAiC;IACzD,UAAU,KAAK;IACf,OAAO,KAAK;GACd,CAAC;GACD;EACF;EAOA,MAAM,QAAO,MALM,KAAK,MAAM,MAAM,SAAS,YAAY,IAAI;GAC3D,QAAQ;GACR,WAAW,KAAK;GAChB,IAAI,KAAK;EACX,CAAC,GACiB,KAAK;EACvB,IAAI,CAAC,MAAM;EAEX,MAAM,KACJ,MAAM,kBAAkB;GACtB,SAAS,KAAK;GACd,MAAM,KAAK;GACX,WAAW,KAAK;GAChB,QAAQ,KAAK;GACb,MAAM,KAAK;GACX,UAAU,KAAK;GACf,UAAU,KAAK;GACf,QAAQ,OAAO,KAAK,MAAM,WAAW;EACvC,CAAC,CACH;CACF,SAAS,KAAK;EACZ,eAAA,OAAO,KAAK,cAAc,qBAAqB;GAC7C,UAAU,KAAK;GACf,OAAQ,IAAc;EACxB,CAAC;CACH;CAGF,OAAO;AACT"}
@@ -0,0 +1,2 @@
1
+ import { i as processMessageAttachments } from "./attachments-BddHbCt8.js";
2
+ export { processMessageAttachments };
package/dist/cli.js CHANGED
@@ -32,6 +32,7 @@ import { createHash } from "crypto";
32
32
  import Table from "cli-table3";
33
33
  import os from "os";
34
34
  import { execSync, spawn } from "child_process";
35
+ import readline from "readline";
35
36
  //#region src/commands/create.ts
36
37
  async function createCustomer(opts) {
37
38
  const id = slugify(opts.name, { lower: true });
@@ -240,10 +241,10 @@ mcpCommand.command("start").description("Start MCP server (stdio by default)").o
240
241
  if (opts.http) {
241
242
  const port = parseInt(opts.port, 10);
242
243
  console.error(info(`Starting MCP server in HTTP mode on port ${port}...`));
243
- const { startHttp } = await import("./server-BhNLrnAD.js");
244
+ const { startHttp } = await import("./server-uqXUhF4H.js");
244
245
  await startHttp(port);
245
246
  } else {
246
- const { startStdio } = await import("./server-BhNLrnAD.js");
247
+ const { startStdio } = await import("./server-uqXUhF4H.js");
247
248
  await startStdio();
248
249
  }
249
250
  });
@@ -1632,7 +1633,7 @@ const syncCommand = new Command("sync").argument("<slug>", "Customer slug to syn
1632
1633
  else try {
1633
1634
  console.log(info(` Syncing Gmail for ${bold(slug)}...`));
1634
1635
  const { getGmailAuth } = await import("./gmail-auth-OComS92L.js");
1635
- const { syncGmail: doGmailSync } = await import("./gmail-sync-B4Iu3AQb.js");
1636
+ const { syncGmail: doGmailSync } = await import("./gmail-sync-SvECok5p.js");
1636
1637
  const result = await doGmailSync({
1637
1638
  slug,
1638
1639
  dataDir,
@@ -4426,6 +4427,169 @@ complianceCommand.command("status", { isDefault: true }).description("Show the a
4426
4427
  if (cfg.aiDisclosure) console.log(info(` Disclosure: "${aiDisclosure()}"`));
4427
4428
  });
4428
4429
  //#endregion
4430
+ //#region src/commands/mailbox.ts
4431
+ /** Default IMAP endpoints for OAuth providers. */
4432
+ const PROVIDER_IMAP_HOST = {
4433
+ gmail: {
4434
+ host: "imap.gmail.com",
4435
+ port: 993
4436
+ },
4437
+ microsoft: {
4438
+ host: "outlook.office365.com",
4439
+ port: 993
4440
+ }
4441
+ };
4442
+ /** Read IMAP mailbox connection settings from the environment. */
4443
+ function imapConfigFromEnv(env = process.env) {
4444
+ const host = env["DXCRM_IMAP_HOST"];
4445
+ const user = env["DXCRM_IMAP_USER"];
4446
+ const pass = env["DXCRM_IMAP_PASS"];
4447
+ const accessToken = env["DXCRM_IMAP_TOKEN"];
4448
+ if (!host || !user || !pass && !accessToken) return null;
4449
+ return {
4450
+ host,
4451
+ port: env["DXCRM_IMAP_PORT"] ? Number(env["DXCRM_IMAP_PORT"]) : 993,
4452
+ secure: env["DXCRM_IMAP_SECURE"] !== "false",
4453
+ mailbox: env["DXCRM_IMAP_MAILBOX"] ?? "INBOX",
4454
+ auth: accessToken ? {
4455
+ user,
4456
+ accessToken
4457
+ } : {
4458
+ user,
4459
+ pass
4460
+ }
4461
+ };
4462
+ }
4463
+ /** Parse a "provider:user" account string. */
4464
+ function parseAccount(account) {
4465
+ const idx = account.indexOf(":");
4466
+ if (idx < 0) return null;
4467
+ const provider = account.slice(0, idx);
4468
+ const user = account.slice(idx + 1);
4469
+ if (provider !== "gmail" && provider !== "microsoft" || !user) return null;
4470
+ return {
4471
+ provider,
4472
+ user
4473
+ };
4474
+ }
4475
+ /** Build an IMAP config for a stored OAuth account, refreshing the token if needed. */
4476
+ async function resolveAccountConfig(dataDir, account, env = process.env, mailbox) {
4477
+ const parsed = parseAccount(account);
4478
+ if (!parsed) throw new Error(`Invalid --account '${account}'. Use 'gmail:you@gmail.com' or 'microsoft:you@org.com'.`);
4479
+ const { getFreshAccessToken } = await import("./token-resolver-BRLOmRvF.js");
4480
+ const accessToken = await getFreshAccessToken(dataDir, parsed.provider, parsed.user, { env });
4481
+ const { host, port } = PROVIDER_IMAP_HOST[parsed.provider];
4482
+ return {
4483
+ host,
4484
+ port,
4485
+ secure: true,
4486
+ mailbox: mailbox ?? env["DXCRM_IMAP_MAILBOX"] ?? "INBOX",
4487
+ auth: {
4488
+ user: parsed.user,
4489
+ accessToken
4490
+ }
4491
+ };
4492
+ }
4493
+ /**
4494
+ * Sync an IMAP mailbox (any provider). With a slug, all mail goes to that one
4495
+ * customer; without, mail is auto-routed to customers by sender/recipient
4496
+ * domain and unmatched mail is reported as unrouted. Use `account` to sync a
4497
+ * stored OAuth mailbox (Gmail/Outlook) instead of env-configured IMAP.
4498
+ */
4499
+ async function runMailboxSync(opts) {
4500
+ const env = opts.env ?? process.env;
4501
+ let config;
4502
+ try {
4503
+ config = opts.account ? await resolveAccountConfig(opts.dataDir, opts.account, env) : imapConfigFromEnv(env);
4504
+ } catch (err) {
4505
+ const msg = err.message;
4506
+ console.error(error(msg));
4507
+ return { error: msg };
4508
+ }
4509
+ if (!config) {
4510
+ const msg = "IMAP not configured. Set DXCRM_IMAP_HOST, DXCRM_IMAP_USER and DXCRM_IMAP_PASS (or DXCRM_IMAP_TOKEN), or use --account.";
4511
+ console.error(error(msg));
4512
+ return { error: msg };
4513
+ }
4514
+ const { syncImapMailbox } = await import("./imap-o6PRuBvm.js");
4515
+ const result = await syncImapMailbox({
4516
+ dataDir: opts.dataDir,
4517
+ config,
4518
+ ...opts.slug ? { slug: opts.slug } : {},
4519
+ ...opts.since ? { since: opts.since } : {},
4520
+ ...opts.includeAttachments !== void 0 ? { includeAttachments: opts.includeAttachments } : {}
4521
+ });
4522
+ const target = opts.slug ? `customer ${bold(opts.slug)}` : "all customers (auto-routed)";
4523
+ console.log(success(`✓ IMAP ${config.mailbox} → ${target}: +${result.synced} synced, ${result.skipped} skipped, ${result.unrouted} unrouted`));
4524
+ if (!opts.slug && result.unrouted > 0) console.log(info(` ${result.unrouted} message(s) matched no customer. Add their domains via 'dxcrm create <slug> --domain <domain>'.`));
4525
+ return result;
4526
+ }
4527
+ function ask(question) {
4528
+ const rl = readline.createInterface({
4529
+ input: process.stdin,
4530
+ output: process.stdout
4531
+ });
4532
+ return new Promise((resolve) => rl.question(question, (a) => (rl.close(), resolve(a))));
4533
+ }
4534
+ /** Interactive `dxcrm mailbox login <provider>` flow. */
4535
+ async function runLogin(provider, user) {
4536
+ const dataDir = process.env["DXCRM_DATA_DIR"] ?? process.cwd();
4537
+ const print = (l) => console.log(l);
4538
+ if (provider === "gmail") {
4539
+ const clientId = process.env["DXCRM_GOOGLE_CLIENT_ID"];
4540
+ const clientSecret = process.env["DXCRM_GOOGLE_CLIENT_SECRET"];
4541
+ if (!clientId || !clientSecret) {
4542
+ console.error(error("Set DXCRM_GOOGLE_CLIENT_ID and DXCRM_GOOGLE_CLIENT_SECRET (OAuth desktop app)."));
4543
+ return;
4544
+ }
4545
+ const account = user ?? (await ask("Gmail address: ")).trim();
4546
+ const { runGmailLogin } = await import("./login-CYgla6-A.js");
4547
+ await runGmailLogin({
4548
+ dataDir,
4549
+ clientId,
4550
+ clientSecret,
4551
+ user: account,
4552
+ prompt: ask,
4553
+ print
4554
+ });
4555
+ console.log(success(`✓ Gmail authorized for ${bold(account)}. Sync: dxcrm mailbox sync --account gmail:${account}`));
4556
+ return;
4557
+ }
4558
+ if (provider === "microsoft") {
4559
+ const clientId = process.env["DXCRM_MS_CLIENT_ID"];
4560
+ if (!clientId) {
4561
+ console.error(error("Set DXCRM_MS_CLIENT_ID (Azure app registration, public client)."));
4562
+ return;
4563
+ }
4564
+ const tenant = process.env["DXCRM_MS_TENANT"] ?? "common";
4565
+ const account = user ?? (await ask("Outlook/Microsoft address: ")).trim();
4566
+ const { runMicrosoftLogin } = await import("./login-CYgla6-A.js");
4567
+ await runMicrosoftLogin({
4568
+ dataDir,
4569
+ clientId,
4570
+ user: account,
4571
+ tenant,
4572
+ print
4573
+ });
4574
+ console.log(success(`✓ Microsoft authorized for ${bold(account)}. Sync: dxcrm mailbox sync --account microsoft:${account}`));
4575
+ return;
4576
+ }
4577
+ console.error(error(`Unknown provider '${provider}'. Use 'gmail' or 'microsoft'.`));
4578
+ }
4579
+ const mailboxCommand = new Command("mailbox").description("Sync any IMAP mailbox (Gmail, Outlook, custom) into the CRM");
4580
+ mailboxCommand.command("login").description("Authorize a Gmail or Microsoft mailbox via OAuth (stores tokens locally)").argument("<provider>", "gmail | microsoft").option("--user <email>", "Mailbox address (otherwise prompted)").action(async (provider, options) => {
4581
+ await runLogin(provider, options.user);
4582
+ });
4583
+ mailboxCommand.command("sync").description("Sync an IMAP mailbox; auto-routes to customers by domain unless a slug is given").argument("[slug]", "Route all mail to this customer (omit to auto-route by domain)").option("--account <provider:user>", "Use a stored OAuth mailbox (e.g. gmail:you@gmail.com)").option("--since <date>", "Only sync messages after this date (YYYY-MM-DD)").option("--no-attachments", "Skip downloading/converting/indexing attachments").action(async (slug, options) => {
4584
+ await runMailboxSync({
4585
+ dataDir: process.env["DXCRM_DATA_DIR"] ?? process.cwd(),
4586
+ slug,
4587
+ ...options.account ? { account: options.account } : {},
4588
+ ...options.since ? { since: new Date(options.since) } : {},
4589
+ includeAttachments: options.attachments !== false
4590
+ });
4591
+ });
4592
+ //#endregion
4429
4593
  //#region src/commands/registry.ts
4430
4594
  /** Every top-level `dxcrm` command, in display order. */
4431
4595
  const ALL_COMMANDS = [
@@ -4437,6 +4601,7 @@ const ALL_COMMANDS = [
4437
4601
  guideCommand,
4438
4602
  mcpCommand,
4439
4603
  syncCommand,
4604
+ mailboxCommand,
4440
4605
  backupCommand,
4441
4606
  restoreCommand,
4442
4607
  daemonCommand,