@satiyap/confluence-reader-mcp 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -76,12 +76,13 @@ Lists the direct child pages of a Confluence page without fetching their content
76
76
 
77
77
  ### `confluence.fetch_image`
78
78
 
79
- Downloads an image attachment from a Confluence page by filename. Returns the image as base64-encoded data.
79
+ Downloads an image attachment from a Confluence page by filename and saves it to a local directory.
80
80
 
81
81
  | Parameter | Type | Description |
82
82
  |-----------|------|-------------|
83
83
  | `url` | string | Confluence page URL |
84
84
  | `filename` | string | Attachment filename (e.g. `architecture.png`) |
85
+ | `destination` | string | Local directory path to save the image to |
85
86
 
86
87
  ### `confluence.compare`
87
88
 
@@ -14,29 +14,40 @@ turndown.use(gfm);
14
14
  */
15
15
  function normalizeConfluenceHtml(html) {
16
16
  let out = html;
17
- // Convert ac:layout-section / ac:layout-cell to divs
18
- out = out.replace(/<ac:layout-section>/gi, "<div>");
17
+ // --- Confluence layout tags divs ---
18
+ out = out.replace(/<ac:layout-section[^>]*>/gi, "<div>");
19
19
  out = out.replace(/<\/ac:layout-section>/gi, "</div>");
20
20
  out = out.replace(/<ac:layout-cell>/gi, "<div>");
21
21
  out = out.replace(/<\/ac:layout-cell>/gi, "</div>");
22
22
  out = out.replace(/<ac:layout>/gi, "<div>");
23
23
  out = out.replace(/<\/ac:layout>/gi, "</div>");
24
- // Convert ac:structured-macro (panels, code blocks, etc.) to divs
25
- // Preserve the macro name as a data attribute for potential future use
24
+ // --- Table cleanup: strip attributes and colgroup so Turndown can parse ---
25
+ out = out.replace(/<table[^>]*>/gi, "<table>");
26
+ out = out.replace(/<colgroup>[\s\S]*?<\/colgroup>/gi, "");
27
+ out = out.replace(/<col[^>]*\/?>/gi, "");
28
+ out = out.replace(/<div class="content-wrapper">/gi, "");
29
+ // (closing </div> for content-wrapper will be handled by generic div cleanup later)
30
+ // --- Jira macro → text reference (extract key) ---
31
+ out = out.replace(/<ac:structured-macro[^>]*ac:name="jira"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
32
+ const keyMatch = inner.match(/<ac:parameter[^>]*ac:name="key"[^>]*>([\s\S]*?)<\/ac:parameter>/i);
33
+ return keyMatch ? `<code>${keyMatch[1].trim()}</code>` : "";
34
+ });
35
+ // --- TOC and other self-closing macros → remove ---
36
+ out = out.replace(/<ac:structured-macro[^>]*\/>/gi, "");
37
+ // --- Code blocks ---
26
38
  out = out.replace(/<ac:structured-macro[^>]*ac:name="code"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
27
- // Extract plain-text-body for code blocks
28
39
  const bodyMatch = inner.match(/<ac:plain-text-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-body>/i);
29
40
  if (bodyMatch) {
30
41
  return `<pre><code>${bodyMatch[1]}</code></pre>`;
31
42
  }
32
43
  return `<pre><code>${inner.replace(/<[^>]+>/g, "")}</code></pre>`;
33
44
  });
34
- // Convert info/note/warning/tip panels to blockquotes
45
+ // --- Info/note/warning/tip panels blockquotes ---
35
46
  out = out.replace(/<ac:structured-macro[^>]*ac:name="(info|note|warning|tip|panel)"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, _type, inner) => {
36
47
  const bodyMatch = inner.match(/<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>/i);
37
48
  return bodyMatch ? `<blockquote>${bodyMatch[1]}</blockquote>` : `<blockquote>${inner}</blockquote>`;
38
49
  });
39
- // Generic: any remaining ac:structured-macro unwrap to div
50
+ // --- Generic remaining ac:structured-macro unwrap to div ---
40
51
  out = out.replace(/<ac:structured-macro[^>]*>/gi, "<div>");
41
52
  out = out.replace(/<\/ac:structured-macro>/gi, "</div>");
42
53
  // ac:rich-text-body → div
@@ -48,14 +59,24 @@ function normalizeConfluenceHtml(html) {
48
59
  out = out.replace(/<\/ac:plain-text-body>/gi, "</pre>");
49
60
  // ac:parameter tags — remove entirely
50
61
  out = out.replace(/<ac:parameter[^>]*>[\s\S]*?<\/ac:parameter>/gi, "");
51
- // ac:image → img tag
62
+ // --- ac:image → img tag ---
52
63
  out = out.replace(/<ac:image[^>]*>([\s\S]*?)<\/ac:image>/gi, (_match, inner) => {
53
64
  const filenameMatch = inner.match(/ri:filename="([^"]+)"/i);
54
65
  const filename = filenameMatch ? filenameMatch[1] : "image";
55
66
  return `<img alt="${filename}" src="${filename}" />`;
56
67
  });
57
- // ac:link with ri:page anchor
68
+ // --- ac:link: handle attachment links, user mentions, and page links ---
58
69
  out = out.replace(/<ac:link>([\s\S]*?)<\/ac:link>/gi, (_match, inner) => {
70
+ // Attachment link
71
+ const attachMatch = inner.match(/ri:filename="([^"]+)"/i);
72
+ if (attachMatch) {
73
+ return `<a href="#">📎 ${attachMatch[1]}</a>`;
74
+ }
75
+ // User mention
76
+ if (/<ri:user/i.test(inner)) {
77
+ return `<code>@user</code>`;
78
+ }
79
+ // Page link
59
80
  const pageMatch = inner.match(/ri:content-title="([^"]+)"/i);
60
81
  const bodyMatch = inner.match(/<ac:link-body>([\s\S]*?)<\/ac:link-body>/i)
61
82
  || inner.match(/<ac:plain-text-link-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-link-body>/i);
@@ -65,7 +86,7 @@ function normalizeConfluenceHtml(html) {
65
86
  });
66
87
  // ac:emoticon → remove
67
88
  out = out.replace(/<ac:emoticon[^>]*\/>/gi, "");
68
- // ac:task-list / ac:task / ac:task-body → ul/li
89
+ // --- ac:task-list / ac:task → ul/li ---
69
90
  out = out.replace(/<ac:task-list>/gi, "<ul>");
70
91
  out = out.replace(/<\/ac:task-list>/gi, "</ul>");
71
92
  out = out.replace(/<ac:task>([\s\S]*?)<\/ac:task>/gi, (_match, inner) => {
@@ -75,11 +96,62 @@ function normalizeConfluenceHtml(html) {
75
96
  const body = bodyMatch ? bodyMatch[1] : inner;
76
97
  return `<li>${checked ? "[x] " : "[ ] "}${body}</li>`;
77
98
  });
78
- // Remove any remaining ac:* or ri:* tags but keep their text content
99
+ // --- Cleanup: remove any remaining ac:*/ri:* tags, keep text content ---
79
100
  out = out.replace(/<\/?(?:ac|ri):[^>]*>/gi, "");
80
101
  // Clean up CDATA remnants
81
102
  out = out.replace(/<!\[CDATA\[/g, "");
82
103
  out = out.replace(/\]\]>/g, "");
104
+ // Strip attributes from th/td so Turndown sees clean cells
105
+ out = out.replace(/<th[^>]*>/gi, "<th>");
106
+ out = out.replace(/<td[^>]*>/gi, "<td>");
107
+ // --- Normalize table rows to uniform column count ---
108
+ // Turndown GFM requires every row to have the same number of cells.
109
+ // Confluence tables often have irregular column spans.
110
+ out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, tableInner) => {
111
+ // Count cells per row
112
+ const rows = tableInner.match(/<tr>[\s\S]*?<\/tr>/gi) ?? [];
113
+ const cellCounts = rows.map((row) => {
114
+ const cells = row.match(/<(?:th|td)>/gi);
115
+ return cells ? cells.length : 0;
116
+ });
117
+ const maxCols = Math.max(0, ...cellCounts);
118
+ if (maxCols === 0)
119
+ return _match;
120
+ // Pad short rows
121
+ const paddedRows = rows.map((row, i) => {
122
+ const deficit = maxCols - cellCounts[i];
123
+ if (deficit <= 0)
124
+ return row;
125
+ const pad = "<td></td>".repeat(deficit);
126
+ return row.replace(/<\/tr>/i, `${pad}</tr>`);
127
+ });
128
+ // Ensure first row uses <th> so Turndown generates a header row
129
+ const rebuilt = tableInner.replace(/<tr>[\s\S]*?<\/tr>/gi, () => paddedRows.shift());
130
+ return `<table>${rebuilt}</table>`;
131
+ });
132
+ // Clean up table internals so Turndown GFM can convert them
133
+ out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, inner) => {
134
+ let cleaned = inner;
135
+ // Strip block-level wrappers inside cells
136
+ cleaned = cleaned.replace(/<\/?p>/gi, "");
137
+ cleaned = cleaned.replace(/<\/?span[^>]*>/gi, "");
138
+ cleaned = cleaned.replace(/<\/?div[^>]*>/gi, "");
139
+ // Convert <br> to space (markdown tables can't have line breaks)
140
+ cleaned = cleaned.replace(/<br\s*\/?>/gi, " ");
141
+ // Strip <tbody> wrapper — Turndown wants <table><thead><tr>…</tr></thead>…
142
+ cleaned = cleaned.replace(/<\/?tbody>/gi, "");
143
+ // Ensure first row uses <th> so Turndown sees a header
144
+ let firstDone = false;
145
+ cleaned = cleaned.replace(/<tr>([\s\S]*?)<\/tr>/gi, (trMatch, trInner) => {
146
+ if (!firstDone) {
147
+ firstDone = true;
148
+ const promoted = trInner.replace(/<td>/gi, "<th>").replace(/<\/td>/gi, "</th>");
149
+ return `<thead><tr>${promoted}</tr></thead>`;
150
+ }
151
+ return `<tr>${trInner}</tr>`;
152
+ });
153
+ return `<table>${cleaned}</table>`;
154
+ });
83
155
  return out;
84
156
  }
85
157
  /**
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ import { storageToMarkdown } from "./confluence/transform.js";
8
8
  import { generateUnifiedDiff, generateDiffStats } from "./compare/diff.js";
9
9
  const server = new McpServer({
10
10
  name: "confluence-reader-mcp",
11
- version: "0.2.1"
11
+ version: "0.2.3"
12
12
  });
13
13
  function getEnv(name) {
14
14
  const v = process.env[name];
@@ -79,10 +79,11 @@ server.tool("confluence.list_children", "List the direct child pages of a Conflu
79
79
  : "No child pages found.";
80
80
  return { content: [{ type: "text", text }] };
81
81
  });
82
- server.tool("confluence.fetch_image", "Download an image attachment from a Confluence page by filename. Returns the image as base64-encoded data.", {
82
+ server.tool("confluence.fetch_image", "Download an image attachment from a Confluence page by filename and save it to a local directory. Returns the saved file path.", {
83
83
  url: z.string().describe("Confluence page URL"),
84
- filename: z.string().describe("Attachment filename (e.g. 'architecture.png')")
85
- }, async ({ url, filename }) => {
84
+ filename: z.string().describe("Attachment filename (e.g. 'architecture.png')"),
85
+ destination: z.string().describe("Local directory path to save the image to")
86
+ }, async ({ url, filename, destination }) => {
86
87
  const cfg = getCfg();
87
88
  const pageId = extractConfluencePageId(url);
88
89
  const attachments = await fetchAttachments(cfg, pageId);
@@ -96,23 +97,17 @@ server.tool("confluence.fetch_image", "Download an image attachment from a Confl
96
97
  }]
97
98
  };
98
99
  }
99
- const { buffer, contentType } = await downloadAttachment(cfg, pageId, match.id);
100
- const base64 = buffer.toString("base64");
101
- // Return as base64 image content
102
- if (contentType.startsWith("image/")) {
103
- return {
104
- content: [{
105
- type: "image",
106
- data: base64,
107
- mimeType: contentType,
108
- }]
109
- };
110
- }
111
- // Non-image attachment — return as base64 text
100
+ const { buffer } = await downloadAttachment(cfg, pageId, match.id);
101
+ // Ensure destination directory exists
102
+ const fs = await import("node:fs/promises");
103
+ const path = await import("node:path");
104
+ await fs.mkdir(destination, { recursive: true });
105
+ const filePath = path.join(destination, match.title);
106
+ await fs.writeFile(filePath, buffer);
112
107
  return {
113
108
  content: [{
114
109
  type: "text",
115
- text: `Downloaded "${filename}" (${contentType}, ${buffer.length} bytes).\nBase64: ${base64.slice(0, 200)}...`
110
+ text: `Saved "${match.title}" (${buffer.length} bytes) to ${filePath}`
116
111
  }]
117
112
  };
118
113
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@satiyap/confluence-reader-mcp",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "description": "MCP server for fetching and comparing Confluence documentation with local files",
5
5
  "author": "satiyap",
6
6
  "license": "MIT",