npm - agent-reader - Versions diffs - 1.0.0 - Mend

agent-reader 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +213 -0
package/bin/agent-reader.js +83 -0
package/package.json +52 -0
package/src/cli/commands.js +602 -0
package/src/core/assets.js +429 -0
package/src/core/exporter.js +710 -0
package/src/core/opener.js +329 -0
package/src/core/renderer.js +235 -0
package/src/core/sanitizer.js +79 -0
package/src/core/slideshow.js +383 -0
package/src/core/templates/docx-table.lua +4 -0
package/src/core/templates/reference.docx +0 -0
package/src/core/themes/dark.css +256 -0
package/src/core/themes/light.css +312 -0
package/src/core/themes/print.css +54 -0
package/src/mcp/server.js +381 -0
package/src/templates/document.html +145 -0
package/src/templates/slideshow.html +42 -0
package/src/utils/logger.js +64 -0
package/src/utils/naturalSort.js +12 -0
package/src/utils/output.js +85 -0
package/src/utils/preferences.js +89 -0
package/src/utils/server.js +295 -0

package/src/core/exporter.js ADDED Viewed

@@ -0,0 +1,710 @@
+import { promises as fs } from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { createRequire } from 'node:module';
+import { fileURLToPath, pathToFileURL } from 'node:url';
+import { execa } from 'execa';
+import MarkdownIt from 'markdown-it';
+import {
+  Document,
+  HeadingLevel,
+  Packer,
+  Paragraph,
+  ShadingType,
+  Table,
+  TableCell,
+  TableRow,
+  TextRun,
+  WidthType,
+} from 'docx';
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const require = createRequire(import.meta.url);
+const REFERENCE_DOC_PATH = path.join(__dirname, 'templates', 'reference.docx');
+const LUA_TABLE_FILTER = path.join(__dirname, 'templates', 'docx-table.lua');
+const POSTPROCESS_SCRIPT = path.join(__dirname, '..', '..', 'scripts', 'postprocess-docx.py');
+const markdownParser = new MarkdownIt({ html: false, linkify: true, typographer: true });
+function paragraphFromText(text) {
+  return new Paragraph({
+    children: [new TextRun(text)],
+  });
+}
+function headingLevelFromNumber(level) {
+  switch (level) {
+    case 1:
+      return HeadingLevel.HEADING_1;
+    case 2:
+      return HeadingLevel.HEADING_2;
+    case 3:
+      return HeadingLevel.HEADING_3;
+    case 4:
+      return HeadingLevel.HEADING_4;
+    case 5:
+      return HeadingLevel.HEADING_5;
+    default:
+      return HeadingLevel.HEADING_6;
+  }
+}
+function inlineToPlainText(token) {
+  if (!token.children?.length) {
+    return token.content || '';
+  }
+  return token.children
+    .filter((child) =>
+      child.type === 'text'
+      || child.type === 'code_inline'
+      || child.type === 'link_open'
+      || child.type === 'link_close'
+      || child.type === 'image')
+    .map((child) => child.content || '')
+    .join('');
+}
+function collectTocEntries(tokens) {
+  const entries = [];
+  for (let i = 0; i < tokens.length; i += 1) {
+    const token = tokens[i];
+    if (token.type !== 'heading_open') {
+      continue;
+    }
+    const inlineToken = tokens[i + 1];
+    const text = inlineToPlainText(inlineToken).trim();
+    if (!text) {
+      continue;
+    }
+    entries.push({
+      level: Number(token.tag.slice(1)) || 1,
+      text,
+    });
+  }
+  return entries;
+}
+function buildDocxTocParagraphs(entries) {
+  if (!entries.length) {
+    return [];
+  }
+  const paragraphs = [
+    new Paragraph({
+      text: '目录',
+      heading: HeadingLevel.HEADING_1,
+    }),
+  ];
+  for (const entry of entries) {
+    const indent = '  '.repeat(Math.max(0, Math.min(5, entry.level - 1)));
+    paragraphs.push(paragraphFromText(`${indent}${entry.text}`));
+  }
+  paragraphs.push(paragraphFromText(''));
+  return paragraphs;
+}
+function parseTable(tokens, startIndex) {
+  const rows = [];
+  let row = [];
+  let currentCell = null;
+  for (let i = startIndex + 1; i < tokens.length; i += 1) {
+    const token = tokens[i];
+    if (token.type === 'table_close') {
+      return {
+        table: new Table({
+          width: { size: 100, type: WidthType.PERCENTAGE },
+          rows: rows.map(
+            (items) =>
+              new TableRow({
+                children: items.map((item) =>
+                  new TableCell({
+                    children: [paragraphFromText(item || '')],
+                  }),
+                ),
+              }),
+          ),
+        }),
+        nextIndex: i,
+      };
+    }
+    if (token.type === 'tr_open') {
+      row = [];
+      continue;
+    }
+    if (token.type === 'tr_close') {
+      rows.push(row);
+      continue;
+    }
+    if (token.type === 'th_open' || token.type === 'td_open') {
+      currentCell = '';
+      continue;
+    }
+    if (token.type === 'inline' && currentCell !== null) {
+      currentCell += inlineToPlainText(token);
+      continue;
+    }
+    if (token.type === 'th_close' || token.type === 'td_close') {
+      row.push(currentCell || '');
+      currentCell = null;
+    }
+  }
+  return {
+    table: null,
+    nextIndex: startIndex,
+  };
+}
+function markdownToDocx(markdown) {
+  const tokens = markdownParser.parse(markdown, {});
+  const tocEntries = collectTocEntries(tokens);
+  const children = [];
+  const listStack = [];
+  let pendingBlock = null;
+  for (let i = 0; i < tokens.length; i += 1) {
+    const token = tokens[i];
+    if (token.type === 'heading_open') {
+      pendingBlock = { type: 'heading', level: Number(token.tag.slice(1)) || 1 };
+      continue;
+    }
+    if (token.type === 'paragraph_open') {
+      pendingBlock = { type: 'paragraph' };
+      continue;
+    }
+    if (token.type === 'bullet_list_open') {
+      listStack.push({ type: 'bullet', count: 0 });
+      continue;
+    }
+    if (token.type === 'ordered_list_open') {
+      listStack.push({ type: 'ordered', count: Number(token.attrGet('start')) || 1 });
+      continue;
+    }
+    if (token.type === 'bullet_list_close' || token.type === 'ordered_list_close') {
+      listStack.pop();
+      continue;
+    }
+    if (token.type === 'list_item_close') {
+      const top = listStack[listStack.length - 1];
+      if (top?.type === 'ordered') {
+        top.count += 1;
+      }
+      continue;
+    }
+    if (token.type === 'fence' || token.type === 'code_block') {
+      children.push(
+        new Paragraph({
+          children: [new TextRun({ text: token.content, font: 'Consolas' })],
+          shading: {
+            type: ShadingType.CLEAR,
+            color: 'auto',
+            fill: 'F4F4F5',
+          },
+        }),
+      );
+      continue;
+    }
+    if (token.type === 'table_open') {
+      const parsed = parseTable(tokens, i);
+      if (parsed.table) {
+        children.push(parsed.table);
+      }
+      i = parsed.nextIndex;
+      continue;
+    }
+    if (token.type !== 'inline') {
+      continue;
+    }
+    const text = inlineToPlainText(token).trim();
+    if (!text) {
+      continue;
+    }
+    if (pendingBlock?.type === 'heading') {
+      children.push(
+        new Paragraph({
+          text,
+          heading: headingLevelFromNumber(pendingBlock.level),
+        }),
+      );
+      pendingBlock = null;
+      continue;
+    }
+    if (listStack.length > 0) {
+      const top = listStack[listStack.length - 1];
+      const prefix = top.type === 'ordered' ? `${top.count}. ` : '• ';
+      children.push(paragraphFromText(`${prefix}${text}`));
+      continue;
+    }
+    children.push(paragraphFromText(text));
+    pendingBlock = null;
+  }
+  return new Document({
+    sections: [
+      {
+        children,
+      },
+    ],
+  });
+}
+/**
+ * Convert Markdown to HTML with smart table column widths.
+ * Uses markdown-it to render, then injects <colgroup> with proportional
+ * widths based on content length. Pandoc respects colgroup widths when
+ * converting HTML → DOCX, unlike pipe/grid table width hints.
+ */
+function markdownToHtmlWithTableWidths(markdown) {
+  // First render to HTML normally
+  let html = markdownParser.render(markdown);
+  // Parse original markdown to measure table column content widths
+  const tokens = markdownParser.parse(markdown, {});
+  const tables = [];
+  let tableIdx = -1;
+  for (let i = 0; i < tokens.length; i += 1) {
+    const token = tokens[i];
+    if (token.type === 'table_open') {
+      tableIdx += 1;
+      tables[tableIdx] = { maxLen: [], ncols: 0 };
+      continue;
+    }
+    if (token.type === 'table_close') continue;
+    if (tableIdx < 0) continue;
+    if (token.type === 'inline') {
+      const text = inlineToPlainText(token).trim();
+      const tbl = tables[tableIdx];
+      // Track which column we're in
+      if (tbl._currentCol !== undefined) {
+        let w = 0;
+        for (const ch of text) {
+          const code = ch.codePointAt(0);
+          if (
+            (code >= 0x4E00 && code <= 0x9FFF)
+            || (code >= 0x3000 && code <= 0x303F)
+            || (code >= 0xFF00 && code <= 0xFFEF)
+            || (code >= 0x3400 && code <= 0x4DBF)
+          ) {
+            w += 2;
+          } else {
+            w += 1;
+          }
+        }
+        const ci = tbl._currentCol;
+        tbl.maxLen[ci] = Math.max(tbl.maxLen[ci] || 1, w);
+      }
+    }
+    if (token.type === 'th_open' || token.type === 'td_open') {
+      const tbl = tables[tableIdx];
+      if (tbl._currentCol === undefined) tbl._currentCol = 0;
+      else tbl._currentCol += 1;
+      continue;
+    }
+    if (token.type === 'tr_open') {
+      const tbl = tables[tableIdx];
+      tbl._currentCol = -1;
+      continue;
+    }
+    if (token.type === 'tr_close') {
+      const tbl = tables[tableIdx];
+      tbl.ncols = Math.max(tbl.ncols, (tbl._currentCol || 0) + 1);
+      continue;
+    }
+  }
+  // Now inject <colgroup> into each <table> in the HTML
+  let tIdx = 0;
+  html = html.replace(/<table>/g, () => {
+    const tbl = tables[tIdx];
+    tIdx += 1;
+    if (!tbl || tbl.ncols === 0) return '<table>';
+    // Smart proportional widths with first-column protection and max-width caps
+    const needed = [];
+    for (let c = 0; c < tbl.ncols; c += 1) {
+      const rawLen = tbl.maxLen[c] || 1;
+      // Penalize extremely long columns (>80 chars) to prevent them from dominating
+      // Use sqrt to compress their weight while still giving them more space
+      needed[c] = rawLen > 80 ? Math.sqrt(rawLen * 80) : rawLen;
+    }
+    const totalNeeded = needed.reduce((a, b) => a + b, 0);
+    // Raw proportional percentages
+    let pcts = needed.map((n) => Math.round((n / totalNeeded) * 100));
+    // First column protection: always at least 18% (common label/title column)
+    if (tbl.ncols >= 2 && pcts[0] < 18) {
+      pcts[0] = 18;
+    }
+    // Cap maximum: no column should exceed 65% (prevents one column from dominating)
+    for (let c = 0; c < tbl.ncols; c += 1) {
+      if (pcts[c] > 65) pcts[c] = 65;
+    }
+    // Apply smart minimums based on content length
+    for (let c = 0; c < tbl.ncols; c += 1) {
+      const len = tbl.maxLen[c] || 1;
+      const minPct = len <= 4 ? 6 : len <= 8 ? 10 : 12;
+      if (pcts[c] < minPct) pcts[c] = minPct;
+    }
+    // Normalize to 100%
+    const pctTotal = pcts.reduce((a, b) => a + b, 0);
+    if (pctTotal !== 100) {
+      // Distribute the difference proportionally to non-first columns
+      const diff = 100 - pctTotal;
+      const adjustableCols = tbl.ncols > 1 ? tbl.ncols - 1 : 1;
+      const perCol = Math.floor(diff / adjustableCols);
+      for (let c = 1; c < tbl.ncols; c += 1) {
+        pcts[c] += perCol;
+      }
+      // Put any remainder on the last column
+      pcts[pcts.length - 1] += diff - (perCol * adjustableCols);
+    }
+    // Use width attribute (not style) — Pandoc respects this more reliably
+    const colgroup = `<colgroup>${pcts.map((p) => `<col width="${p}%" />`).join('')}</colgroup>`;
+    return `<table>${colgroup}`;
+  });
+  return html;
+}
+export async function checkPandoc() {
+  try {
+    const result = await execa('pandoc', ['--version']);
+    const firstLine = result.stdout.split('\n')[0] || '';
+    return {
+      available: true,
+      version: firstLine,
+    };
+  } catch {
+    return {
+      available: false,
+      version: null,
+    };
+  }
+}
+export async function checkPuppeteer() {
+  try {
+    await import('puppeteer');
+    const packageJson = JSON.parse(await fs.readFile(require.resolve('puppeteer/package.json'), 'utf8'));
+    return {
+      available: true,
+      version: packageJson.version || null,
+    };
+  } catch {
+    return {
+      available: false,
+      version: null,
+    };
+  }
+}
+export async function exportPDF(html, options = {}) {
+  const {
+    pageSize = 'A4',
+    landscape = false,
+    outDir = os.tmpdir(),
+    fileName = 'output.pdf',
+    htmlPath,
+  } = options;
+  let puppeteer;
+  try {
+    const mod = await import('puppeteer');
+    puppeteer = mod.default || mod;
+  } catch {
+    throw new Error('Puppeteer is not installed. Install optional dependency: npm install puppeteer');
+  }
+  const warnings = [];
+  const pdfPath = path.join(path.resolve(outDir), fileName);
+  const browser = await puppeteer.launch({
+    headless: true,
+    args: landscape ? ['--allow-file-access-from-files'] : [],
+  });
+  try {
+    const page = await browser.newPage();
+    if (htmlPath) {
+      await page.goto(pathToFileURL(path.resolve(htmlPath)).toString(), {
+        waitUntil: 'networkidle0',
+      });
+    } else {
+      await page.setContent(html, {
+        waitUntil: 'networkidle0',
+      });
+    }
+    await page.addStyleTag({
+      content: `
+@page {
+  size: ${pageSize}${landscape ? ' landscape' : ''};
+  margin: ${landscape ? '0' : '2cm 2.5cm'};
+}
+/* Hide sidebar and toolbar for PDF export */
+#sidebar { display: none !important; }
+.doc-toolbar { display: none !important; }
+body {
+  display: block !important;
+  overflow: visible !important;
+  height: auto !important;
+}
+#main-content {
+  overflow: visible !important;
+  flex: none !important;
+}
+.markdown-body {
+  max-width: 100% !important;
+  padding: 40px 0 20px !important;
+  margin: 0 auto !important;
+}
+thead { display: table-header-group; }
+h1, h2, h3, h4, h5, h6 { break-after: avoid; }
+pre { break-inside: auto; white-space: pre-wrap; word-break: break-word; }
+table { break-inside: auto; }
+tr { break-inside: avoid; }
+img { break-inside: avoid; max-width: 100%; }
+blockquote { break-inside: avoid; }`,
+    });
+    if (landscape) {
+      await page.evaluate(() => {
+        const MAX_W = 1920;
+        const QUALITY = 0.82;
+        const imgs = document.querySelectorAll('img');
+        const promises = Array.from(imgs).map((img) => new Promise((resolve) => {
+          if (!img.naturalWidth) { resolve(); return; }
+          const scale = img.naturalWidth > MAX_W ? MAX_W / img.naturalWidth : 1;
+          const w = Math.round(img.naturalWidth * scale);
+          const h = Math.round(img.naturalHeight * scale);
+          const canvas = document.createElement('canvas');
+          canvas.width = w;
+          canvas.height = h;
+          const ctx = canvas.getContext('2d');
+          ctx.drawImage(img, 0, 0, w, h);
+          img.src = canvas.toDataURL('image/jpeg', QUALITY);
+          img.onload = resolve;
+          img.onerror = resolve;
+        }));
+        return Promise.all(promises);
+      });
+    }
+    await page.pdf({
+      path: pdfPath,
+      format: pageSize,
+      landscape,
+      printBackground: true,
+      preferCSSPageSize: true,
+      outline: true,
+      tagged: true,
+    });
+  } finally {
+    await browser.close();
+  }
+  const stat = await fs.stat(pdfPath);
+  return {
+    pdfPath,
+    size: stat.size,
+    warnings,
+  };
+}
+export async function exportDOCX(markdownString, options = {}) {
+  const {
+    baseDir,
+    outDir = os.tmpdir(),
+    fileName = 'output.docx',
+  } = options;
+  const targetDir = path.resolve(outDir);
+  await fs.mkdir(targetDir, { recursive: true });
+  const docxPath = path.join(targetDir, fileName);
+  const warnings = [];
+  const pandoc = await checkPandoc();
+  if (pandoc.available) {
+    // Convert MD → HTML with smart colgroup widths, then HTML → DOCX via Pandoc
+    // Pandoc respects <colgroup> width percentages in HTML input
+    const htmlContent = markdownToHtmlWithTableWidths(markdownString);
+    const tempInput = path.join(targetDir, `.temp-${Date.now()}.html`);
+    await fs.writeFile(tempInput, htmlContent, 'utf8');
+    const args = [
+      tempInput,
+      '-f',
+      'html',
+      '-o',
+      docxPath,
+      `--reference-doc=${REFERENCE_DOC_PATH}`,
+    ];
+    if (baseDir) {
+      args.push(`--resource-path=${path.resolve(baseDir)}`);
+    }
+    try {
+      await execa('pandoc', args);
+      // Post-process: style tables (header bg, bold, centered)
+      await execa('python3', [POSTPROCESS_SCRIPT, docxPath]).catch(() => {});
+    } finally {
+      await fs.rm(tempInput, { force: true }).catch(() => {});
+    }
+  } else {
+    const warning = 'Pandoc not found, using docx fallback. Install Pandoc for better results.';
+    warnings.push(warning);
+    const document = markdownToDocx(markdownString);
+    const buffer = await Packer.toBuffer(document);
+    await fs.writeFile(docxPath, buffer);
+  }
+  const stat = await fs.stat(docxPath);
+  return {
+    docxPath,
+    size: stat.size,
+    warnings,
+  };
+}
+export async function exportDOCXFromHTML(htmlString, options = {}) {
+  const {
+    htmlPath,
+    baseDir,
+    outDir = os.tmpdir(),
+    fileName = 'output.docx',
+  } = options;
+  const targetDir = path.resolve(outDir);
+  await fs.mkdir(targetDir, { recursive: true });
+  const docxPath = path.join(targetDir, fileName);
+  const warnings = [];
+  // Read the full HTML (from file or string)
+  let fullHtml = htmlString || '';
+  if (htmlPath) {
+    fullHtml = await fs.readFile(path.resolve(htmlPath), 'utf8');
+  }
+  // Strip UI elements (sidebar, toolbar) — only keep .markdown-body content
+  // This prevents navigation/TOC UI from leaking into the Word document
+  const bodyMatch = fullHtml.match(/<div[^>]*class="markdown-body"[^>]*>([\s\S]*?)<\/div>\s*<\/div>\s*(?:<script|$)/i);
+  if (bodyMatch) {
+    fullHtml = bodyMatch[1];
+  } else {
+    // Try a simpler match: extract content between markdown-body div
+    const startIdx = fullHtml.indexOf('class="markdown-body"');
+    if (startIdx !== -1) {
+      const tagEnd = fullHtml.indexOf('>', startIdx);
+      if (tagEnd !== -1) {
+        // Find the content after the opening tag, strip scripts/style at the end
+        let content = fullHtml.substring(tagEnd + 1);
+        // Remove trailing scripts and closing divs
+        content = content.replace(/<script[\s\S]*$/i, '');
+        // Remove sidebar and toolbar remnants
+        content = content.replace(/<div[^>]*id="sidebar"[\s\S]*?<\/div>\s*<\/div>/gi, '');
+        content = content.replace(/<div[^>]*class="doc-toolbar"[\s\S]*?<\/div>\s*<\/div>/gi, '');
+        fullHtml = content;
+      }
+    }
+  }
+  // Also strip any remaining UI elements by tag/class
+  fullHtml = fullHtml.replace(/<nav[\s\S]*?<\/nav>/gi, '');
+  fullHtml = fullHtml.replace(/<div[^>]*id="sidebar"[\s\S]*?<\/div>/gi, '');
+  fullHtml = fullHtml.replace(/<div[^>]*class="doc-toolbar"[\s\S]*?<\/div>/gi, '');
+  const pandoc = await checkPandoc();
+  if (pandoc.available) {
+    const tempInput = path.join(targetDir, `.temp-${Date.now()}.html`);
+    await fs.writeFile(tempInput, fullHtml, 'utf8');
+    const args = [
+      tempInput,
+      '-f',
+      'html',
+      '-o',
+      docxPath,
+      `--reference-doc=${REFERENCE_DOC_PATH}`,
+      `--lua-filter=${LUA_TABLE_FILTER}`,
+    ];
+    if (baseDir) {
+      args.push(`--resource-path=${path.resolve(baseDir)}`);
+    }
+    try {
+      await execa('pandoc', args);
+      await execa('python3', [POSTPROCESS_SCRIPT, docxPath]).catch(() => {});
+    } finally {
+      await fs.rm(tempInput, { force: true }).catch(() => {});
+    }
+  } else {
+    warnings.push('Pandoc not found, using text-only DOCX fallback.');
+    const plainText = String(htmlString || '')
+      .replace(/<style[\s\S]*?<\/style>/gi, ' ')
+      .replace(/<script[\s\S]*?<\/script>/gi, ' ')
+      .replace(/<[^>]+>/g, ' ')
+      .replaceAll('&nbsp;', ' ')
+      .replaceAll('&amp;', '&')
+      .replaceAll('&lt;', '<')
+      .replaceAll('&gt;', '>')
+      .replace(/\s+/g, ' ')
+      .trim();
+    const document = new Document({
+      sections: [
+        {
+          children: [paragraphFromText(plainText || ' ')],
+        },
+      ],
+    });
+    const buffer = await Packer.toBuffer(document);
+    await fs.writeFile(docxPath, buffer);
+  }
+  const stat = await fs.stat(docxPath);
+  return {
+    docxPath,
+    size: stat.size,
+    warnings,
+  };
+}