npm - @satiyap/confluence-reader-mcp - Versions diffs - 0.2.1 → 0.2.3 - Mend

@satiyap/confluence-reader-mcp 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md +2 -1
package/dist/confluence/transform.js +83 -11
package/dist/index.js +13 -18
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -76,12 +76,13 @@ Lists the direct child pages of a Confluence page without fetching their content
 ### `confluence.fetch_image`
-Downloads an image attachment from a Confluence page by filename. Returns the image as base64-encoded data.
+Downloads an image attachment from a Confluence page by filename and saves it to a local directory.
 | Parameter | Type | Description |
 |-----------|------|-------------|
 | `url` | string | Confluence page URL |
 | `filename` | string | Attachment filename (e.g. `architecture.png`) |
+| `destination` | string | Local directory path to save the image to |
 ### `confluence.compare`

package/dist/confluence/transform.js CHANGED Viewed

@@ -14,29 +14,40 @@ turndown.use(gfm);
  */
 function normalizeConfluenceHtml(html) {
     let out = html;
-    // Convert ac:layout-section / ac:layout-cell to divs
-    out = out.replace(/<ac:layout-section>/gi, "<div>");
+    // --- Confluence layout tags → divs ---
+    out = out.replace(/<ac:layout-section[^>]*>/gi, "<div>");
     out = out.replace(/<\/ac:layout-section>/gi, "</div>");
     out = out.replace(/<ac:layout-cell>/gi, "<div>");
     out = out.replace(/<\/ac:layout-cell>/gi, "</div>");
     out = out.replace(/<ac:layout>/gi, "<div>");
     out = out.replace(/<\/ac:layout>/gi, "</div>");
-    // Convert ac:structured-macro (panels, code blocks, etc.) to divs
-    // Preserve the macro name as a data attribute for potential future use
+    // --- Table cleanup: strip attributes and colgroup so Turndown can parse ---
+    out = out.replace(/<table[^>]*>/gi, "<table>");
+    out = out.replace(/<colgroup>[\s\S]*?<\/colgroup>/gi, "");
+    out = out.replace(/<col[^>]*\/?>/gi, "");
+    out = out.replace(/<div class="content-wrapper">/gi, "");
+    // (closing </div> for content-wrapper will be handled by generic div cleanup later)
+    // --- Jira macro → text reference (extract key) ---
+    out = out.replace(/<ac:structured-macro[^>]*ac:name="jira"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
+        const keyMatch = inner.match(/<ac:parameter[^>]*ac:name="key"[^>]*>([\s\S]*?)<\/ac:parameter>/i);
+        return keyMatch ? `<code>${keyMatch[1].trim()}</code>` : "";
+    });
+    // --- TOC and other self-closing macros → remove ---
+    out = out.replace(/<ac:structured-macro[^>]*\/>/gi, "");
+    // --- Code blocks ---
     out = out.replace(/<ac:structured-macro[^>]*ac:name="code"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
-        // Extract plain-text-body for code blocks
         const bodyMatch = inner.match(/<ac:plain-text-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-body>/i);
         if (bodyMatch) {
             return `<pre><code>${bodyMatch[1]}</code></pre>`;
         }
         return `<pre><code>${inner.replace(/<[^>]+>/g, "")}</code></pre>`;
     });
-    // Convert info/note/warning/tip panels to blockquotes
+    // --- Info/note/warning/tip panels → blockquotes ---
     out = out.replace(/<ac:structured-macro[^>]*ac:name="(info|note|warning|tip|panel)"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, _type, inner) => {
         const bodyMatch = inner.match(/<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>/i);
         return bodyMatch ? `<blockquote>${bodyMatch[1]}</blockquote>` : `<blockquote>${inner}</blockquote>`;
     });
-    // Generic: any remaining ac:structured-macro — unwrap to div
+    // --- Generic remaining ac:structured-macro → unwrap to div ---
     out = out.replace(/<ac:structured-macro[^>]*>/gi, "<div>");
     out = out.replace(/<\/ac:structured-macro>/gi, "</div>");
     // ac:rich-text-body → div
@@ -48,14 +59,24 @@ function normalizeConfluenceHtml(html) {
     out = out.replace(/<\/ac:plain-text-body>/gi, "</pre>");
     // ac:parameter tags — remove entirely
     out = out.replace(/<ac:parameter[^>]*>[\s\S]*?<\/ac:parameter>/gi, "");
-    // ac:image → img tag
+    // --- ac:image → img tag ---
     out = out.replace(/<ac:image[^>]*>([\s\S]*?)<\/ac:image>/gi, (_match, inner) => {
         const filenameMatch = inner.match(/ri:filename="([^"]+)"/i);
         const filename = filenameMatch ? filenameMatch[1] : "image";
         return `<img alt="${filename}" src="${filename}" />`;
     });
-    // ac:link with ri:page → anchor
+    // --- ac:link: handle attachment links, user mentions, and page links ---
     out = out.replace(/<ac:link>([\s\S]*?)<\/ac:link>/gi, (_match, inner) => {
+        // Attachment link
+        const attachMatch = inner.match(/ri:filename="([^"]+)"/i);
+        if (attachMatch) {
+            return `<a href="#">📎 ${attachMatch[1]}</a>`;
+        }
+        // User mention
+        if (/<ri:user/i.test(inner)) {
+            return `<code>@user</code>`;
+        }
+        // Page link
         const pageMatch = inner.match(/ri:content-title="([^"]+)"/i);
         const bodyMatch = inner.match(/<ac:link-body>([\s\S]*?)<\/ac:link-body>/i)
             || inner.match(/<ac:plain-text-link-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-link-body>/i);
@@ -65,7 +86,7 @@ function normalizeConfluenceHtml(html) {
     });
     // ac:emoticon → remove
     out = out.replace(/<ac:emoticon[^>]*\/>/gi, "");
-    // ac:task-list / ac:task / ac:task-body → ul/li
+    // --- ac:task-list / ac:task → ul/li ---
     out = out.replace(/<ac:task-list>/gi, "<ul>");
     out = out.replace(/<\/ac:task-list>/gi, "</ul>");
     out = out.replace(/<ac:task>([\s\S]*?)<\/ac:task>/gi, (_match, inner) => {
@@ -75,11 +96,62 @@ function normalizeConfluenceHtml(html) {
         const body = bodyMatch ? bodyMatch[1] : inner;
         return `<li>${checked ? "[x] " : "[ ] "}${body}</li>`;
     });
-    // Remove any remaining ac:* or ri:* tags but keep their text content
+    // --- Cleanup: remove any remaining ac:*/ri:* tags, keep text content ---
     out = out.replace(/<\/?(?:ac|ri):[^>]*>/gi, "");
     // Clean up CDATA remnants
     out = out.replace(/<!\[CDATA\[/g, "");
     out = out.replace(/\]\]>/g, "");
+    // Strip attributes from th/td so Turndown sees clean cells
+    out = out.replace(/<th[^>]*>/gi, "<th>");
+    out = out.replace(/<td[^>]*>/gi, "<td>");
+    // --- Normalize table rows to uniform column count ---
+    // Turndown GFM requires every row to have the same number of cells.
+    // Confluence tables often have irregular column spans.
+    out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, tableInner) => {
+        // Count cells per row
+        const rows = tableInner.match(/<tr>[\s\S]*?<\/tr>/gi) ?? [];
+        const cellCounts = rows.map((row) => {
+            const cells = row.match(/<(?:th|td)>/gi);
+            return cells ? cells.length : 0;
+        });
+        const maxCols = Math.max(0, ...cellCounts);
+        if (maxCols === 0)
+            return _match;
+        // Pad short rows
+        const paddedRows = rows.map((row, i) => {
+            const deficit = maxCols - cellCounts[i];
+            if (deficit <= 0)
+                return row;
+            const pad = "<td></td>".repeat(deficit);
+            return row.replace(/<\/tr>/i, `${pad}</tr>`);
+        });
+        // Ensure first row uses <th> so Turndown generates a header row
+        const rebuilt = tableInner.replace(/<tr>[\s\S]*?<\/tr>/gi, () => paddedRows.shift());
+        return `<table>${rebuilt}</table>`;
+    });
+    // Clean up table internals so Turndown GFM can convert them
+    out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, inner) => {
+        let cleaned = inner;
+        // Strip block-level wrappers inside cells
+        cleaned = cleaned.replace(/<\/?p>/gi, "");
+        cleaned = cleaned.replace(/<\/?span[^>]*>/gi, "");
+        cleaned = cleaned.replace(/<\/?div[^>]*>/gi, "");
+        // Convert <br> to space (markdown tables can't have line breaks)
+        cleaned = cleaned.replace(/<br\s*\/?>/gi, " ");
+        // Strip <tbody> wrapper — Turndown wants <table><thead><tr>…</tr></thead>…
+        cleaned = cleaned.replace(/<\/?tbody>/gi, "");
+        // Ensure first row uses <th> so Turndown sees a header
+        let firstDone = false;
+        cleaned = cleaned.replace(/<tr>([\s\S]*?)<\/tr>/gi, (trMatch, trInner) => {
+            if (!firstDone) {
+                firstDone = true;
+                const promoted = trInner.replace(/<td>/gi, "<th>").replace(/<\/td>/gi, "</th>");
+                return `<thead><tr>${promoted}</tr></thead>`;
+            }
+            return `<tr>${trInner}</tr>`;
+        });
+        return `<table>${cleaned}</table>`;
+    });
     return out;
 }
 /**

package/dist/index.js CHANGED Viewed

@@ -8,7 +8,7 @@ import { storageToMarkdown } from "./confluence/transform.js";
 import { generateUnifiedDiff, generateDiffStats } from "./compare/diff.js";
 const server = new McpServer({
     name: "confluence-reader-mcp",
-    version: "0.2.1"
+    version: "0.2.3"
 });
 function getEnv(name) {
     const v = process.env[name];
@@ -79,10 +79,11 @@ server.tool("confluence.list_children", "List the direct child pages of a Conflu
         : "No child pages found.";
     return { content: [{ type: "text", text }] };
 });
-server.tool("confluence.fetch_image", "Download an image attachment from a Confluence page by filename. Returns the image as base64-encoded data.", {
+server.tool("confluence.fetch_image", "Download an image attachment from a Confluence page by filename and save it to a local directory. Returns the saved file path.", {
     url: z.string().describe("Confluence page URL"),
-    filename: z.string().describe("Attachment filename (e.g. 'architecture.png')")
-}, async ({ url, filename }) => {
+    filename: z.string().describe("Attachment filename (e.g. 'architecture.png')"),
+    destination: z.string().describe("Local directory path to save the image to")
+}, async ({ url, filename, destination }) => {
     const cfg = getCfg();
     const pageId = extractConfluencePageId(url);
     const attachments = await fetchAttachments(cfg, pageId);
@@ -96,23 +97,17 @@ server.tool("confluence.fetch_image", "Download an image attachment from a Confl
                 }]
         };
     }
-    const { buffer, contentType } = await downloadAttachment(cfg, pageId, match.id);
-    const base64 = buffer.toString("base64");
-    // Return as base64 image content
-    if (contentType.startsWith("image/")) {
-        return {
-            content: [{
-                    type: "image",
-                    data: base64,
-                    mimeType: contentType,
-                }]
-        };
-    }
-    // Non-image attachment — return as base64 text
+    const { buffer } = await downloadAttachment(cfg, pageId, match.id);
+    // Ensure destination directory exists
+    const fs = await import("node:fs/promises");
+    const path = await import("node:path");
+    await fs.mkdir(destination, { recursive: true });
+    const filePath = path.join(destination, match.title);
+    await fs.writeFile(filePath, buffer);
     return {
         content: [{
                 type: "text",
-                text: `Downloaded "${filename}" (${contentType}, ${buffer.length} bytes).\nBase64: ${base64.slice(0, 200)}...`
+                text: `Saved "${match.title}" (${buffer.length} bytes) to ${filePath}`
             }]
     };
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@satiyap/confluence-reader-mcp",
-  "version": "0.2.1",
+  "version": "0.2.3",
   "description": "MCP server for fetching and comparing Confluence documentation with local files",
   "author": "satiyap",
   "license": "MIT",