@satiyap/confluence-reader-mcp 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/confluence/transform.js +83 -11
- package/dist/index.js +13 -18
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -76,12 +76,13 @@ Lists the direct child pages of a Confluence page without fetching their content
|
|
|
76
76
|
|
|
77
77
|
### `confluence.fetch_image`
|
|
78
78
|
|
|
79
|
-
Downloads an image attachment from a Confluence page by filename
|
|
79
|
+
Downloads an image attachment from a Confluence page by filename and saves it to a local directory.
|
|
80
80
|
|
|
81
81
|
| Parameter | Type | Description |
|
|
82
82
|
|-----------|------|-------------|
|
|
83
83
|
| `url` | string | Confluence page URL |
|
|
84
84
|
| `filename` | string | Attachment filename (e.g. `architecture.png`) |
|
|
85
|
+
| `destination` | string | Local directory path to save the image to |
|
|
85
86
|
|
|
86
87
|
### `confluence.compare`
|
|
87
88
|
|
|
@@ -14,29 +14,40 @@ turndown.use(gfm);
|
|
|
14
14
|
*/
|
|
15
15
|
function normalizeConfluenceHtml(html) {
|
|
16
16
|
let out = html;
|
|
17
|
-
//
|
|
18
|
-
out = out.replace(/<ac:layout-section
|
|
17
|
+
// --- Confluence layout tags → divs ---
|
|
18
|
+
out = out.replace(/<ac:layout-section[^>]*>/gi, "<div>");
|
|
19
19
|
out = out.replace(/<\/ac:layout-section>/gi, "</div>");
|
|
20
20
|
out = out.replace(/<ac:layout-cell>/gi, "<div>");
|
|
21
21
|
out = out.replace(/<\/ac:layout-cell>/gi, "</div>");
|
|
22
22
|
out = out.replace(/<ac:layout>/gi, "<div>");
|
|
23
23
|
out = out.replace(/<\/ac:layout>/gi, "</div>");
|
|
24
|
-
//
|
|
25
|
-
|
|
24
|
+
// --- Table cleanup: strip attributes and colgroup so Turndown can parse ---
|
|
25
|
+
out = out.replace(/<table[^>]*>/gi, "<table>");
|
|
26
|
+
out = out.replace(/<colgroup>[\s\S]*?<\/colgroup>/gi, "");
|
|
27
|
+
out = out.replace(/<col[^>]*\/?>/gi, "");
|
|
28
|
+
out = out.replace(/<div class="content-wrapper">/gi, "");
|
|
29
|
+
// (closing </div> for content-wrapper will be handled by generic div cleanup later)
|
|
30
|
+
// --- Jira macro → text reference (extract key) ---
|
|
31
|
+
out = out.replace(/<ac:structured-macro[^>]*ac:name="jira"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
|
|
32
|
+
const keyMatch = inner.match(/<ac:parameter[^>]*ac:name="key"[^>]*>([\s\S]*?)<\/ac:parameter>/i);
|
|
33
|
+
return keyMatch ? `<code>${keyMatch[1].trim()}</code>` : "";
|
|
34
|
+
});
|
|
35
|
+
// --- TOC and other self-closing macros → remove ---
|
|
36
|
+
out = out.replace(/<ac:structured-macro[^>]*\/>/gi, "");
|
|
37
|
+
// --- Code blocks ---
|
|
26
38
|
out = out.replace(/<ac:structured-macro[^>]*ac:name="code"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
|
|
27
|
-
// Extract plain-text-body for code blocks
|
|
28
39
|
const bodyMatch = inner.match(/<ac:plain-text-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-body>/i);
|
|
29
40
|
if (bodyMatch) {
|
|
30
41
|
return `<pre><code>${bodyMatch[1]}</code></pre>`;
|
|
31
42
|
}
|
|
32
43
|
return `<pre><code>${inner.replace(/<[^>]+>/g, "")}</code></pre>`;
|
|
33
44
|
});
|
|
34
|
-
//
|
|
45
|
+
// --- Info/note/warning/tip panels → blockquotes ---
|
|
35
46
|
out = out.replace(/<ac:structured-macro[^>]*ac:name="(info|note|warning|tip|panel)"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, _type, inner) => {
|
|
36
47
|
const bodyMatch = inner.match(/<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>/i);
|
|
37
48
|
return bodyMatch ? `<blockquote>${bodyMatch[1]}</blockquote>` : `<blockquote>${inner}</blockquote>`;
|
|
38
49
|
});
|
|
39
|
-
// Generic
|
|
50
|
+
// --- Generic remaining ac:structured-macro → unwrap to div ---
|
|
40
51
|
out = out.replace(/<ac:structured-macro[^>]*>/gi, "<div>");
|
|
41
52
|
out = out.replace(/<\/ac:structured-macro>/gi, "</div>");
|
|
42
53
|
// ac:rich-text-body → div
|
|
@@ -48,14 +59,24 @@ function normalizeConfluenceHtml(html) {
|
|
|
48
59
|
out = out.replace(/<\/ac:plain-text-body>/gi, "</pre>");
|
|
49
60
|
// ac:parameter tags — remove entirely
|
|
50
61
|
out = out.replace(/<ac:parameter[^>]*>[\s\S]*?<\/ac:parameter>/gi, "");
|
|
51
|
-
// ac:image → img tag
|
|
62
|
+
// --- ac:image → img tag ---
|
|
52
63
|
out = out.replace(/<ac:image[^>]*>([\s\S]*?)<\/ac:image>/gi, (_match, inner) => {
|
|
53
64
|
const filenameMatch = inner.match(/ri:filename="([^"]+)"/i);
|
|
54
65
|
const filename = filenameMatch ? filenameMatch[1] : "image";
|
|
55
66
|
return `<img alt="${filename}" src="${filename}" />`;
|
|
56
67
|
});
|
|
57
|
-
// ac:link
|
|
68
|
+
// --- ac:link: handle attachment links, user mentions, and page links ---
|
|
58
69
|
out = out.replace(/<ac:link>([\s\S]*?)<\/ac:link>/gi, (_match, inner) => {
|
|
70
|
+
// Attachment link
|
|
71
|
+
const attachMatch = inner.match(/ri:filename="([^"]+)"/i);
|
|
72
|
+
if (attachMatch) {
|
|
73
|
+
return `<a href="#">📎 ${attachMatch[1]}</a>`;
|
|
74
|
+
}
|
|
75
|
+
// User mention
|
|
76
|
+
if (/<ri:user/i.test(inner)) {
|
|
77
|
+
return `<code>@user</code>`;
|
|
78
|
+
}
|
|
79
|
+
// Page link
|
|
59
80
|
const pageMatch = inner.match(/ri:content-title="([^"]+)"/i);
|
|
60
81
|
const bodyMatch = inner.match(/<ac:link-body>([\s\S]*?)<\/ac:link-body>/i)
|
|
61
82
|
|| inner.match(/<ac:plain-text-link-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-link-body>/i);
|
|
@@ -65,7 +86,7 @@ function normalizeConfluenceHtml(html) {
|
|
|
65
86
|
});
|
|
66
87
|
// ac:emoticon → remove
|
|
67
88
|
out = out.replace(/<ac:emoticon[^>]*\/>/gi, "");
|
|
68
|
-
// ac:task-list / ac:task
|
|
89
|
+
// --- ac:task-list / ac:task → ul/li ---
|
|
69
90
|
out = out.replace(/<ac:task-list>/gi, "<ul>");
|
|
70
91
|
out = out.replace(/<\/ac:task-list>/gi, "</ul>");
|
|
71
92
|
out = out.replace(/<ac:task>([\s\S]*?)<\/ac:task>/gi, (_match, inner) => {
|
|
@@ -75,11 +96,62 @@ function normalizeConfluenceHtml(html) {
|
|
|
75
96
|
const body = bodyMatch ? bodyMatch[1] : inner;
|
|
76
97
|
return `<li>${checked ? "[x] " : "[ ] "}${body}</li>`;
|
|
77
98
|
});
|
|
78
|
-
//
|
|
99
|
+
// --- Cleanup: remove any remaining ac:*/ri:* tags, keep text content ---
|
|
79
100
|
out = out.replace(/<\/?(?:ac|ri):[^>]*>/gi, "");
|
|
80
101
|
// Clean up CDATA remnants
|
|
81
102
|
out = out.replace(/<!\[CDATA\[/g, "");
|
|
82
103
|
out = out.replace(/\]\]>/g, "");
|
|
104
|
+
// Strip attributes from th/td so Turndown sees clean cells
|
|
105
|
+
out = out.replace(/<th[^>]*>/gi, "<th>");
|
|
106
|
+
out = out.replace(/<td[^>]*>/gi, "<td>");
|
|
107
|
+
// --- Normalize table rows to uniform column count ---
|
|
108
|
+
// Turndown GFM requires every row to have the same number of cells.
|
|
109
|
+
// Confluence tables often have irregular column spans.
|
|
110
|
+
out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, tableInner) => {
|
|
111
|
+
// Count cells per row
|
|
112
|
+
const rows = tableInner.match(/<tr>[\s\S]*?<\/tr>/gi) ?? [];
|
|
113
|
+
const cellCounts = rows.map((row) => {
|
|
114
|
+
const cells = row.match(/<(?:th|td)>/gi);
|
|
115
|
+
return cells ? cells.length : 0;
|
|
116
|
+
});
|
|
117
|
+
const maxCols = Math.max(0, ...cellCounts);
|
|
118
|
+
if (maxCols === 0)
|
|
119
|
+
return _match;
|
|
120
|
+
// Pad short rows
|
|
121
|
+
const paddedRows = rows.map((row, i) => {
|
|
122
|
+
const deficit = maxCols - cellCounts[i];
|
|
123
|
+
if (deficit <= 0)
|
|
124
|
+
return row;
|
|
125
|
+
const pad = "<td></td>".repeat(deficit);
|
|
126
|
+
return row.replace(/<\/tr>/i, `${pad}</tr>`);
|
|
127
|
+
});
|
|
128
|
+
// Ensure first row uses <th> so Turndown generates a header row
|
|
129
|
+
const rebuilt = tableInner.replace(/<tr>[\s\S]*?<\/tr>/gi, () => paddedRows.shift());
|
|
130
|
+
return `<table>${rebuilt}</table>`;
|
|
131
|
+
});
|
|
132
|
+
// Clean up table internals so Turndown GFM can convert them
|
|
133
|
+
out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, inner) => {
|
|
134
|
+
let cleaned = inner;
|
|
135
|
+
// Strip block-level wrappers inside cells
|
|
136
|
+
cleaned = cleaned.replace(/<\/?p>/gi, "");
|
|
137
|
+
cleaned = cleaned.replace(/<\/?span[^>]*>/gi, "");
|
|
138
|
+
cleaned = cleaned.replace(/<\/?div[^>]*>/gi, "");
|
|
139
|
+
// Convert <br> to space (markdown tables can't have line breaks)
|
|
140
|
+
cleaned = cleaned.replace(/<br\s*\/?>/gi, " ");
|
|
141
|
+
// Strip <tbody> wrapper — Turndown wants <table><thead><tr>…</tr></thead>…
|
|
142
|
+
cleaned = cleaned.replace(/<\/?tbody>/gi, "");
|
|
143
|
+
// Ensure first row uses <th> so Turndown sees a header
|
|
144
|
+
let firstDone = false;
|
|
145
|
+
cleaned = cleaned.replace(/<tr>([\s\S]*?)<\/tr>/gi, (trMatch, trInner) => {
|
|
146
|
+
if (!firstDone) {
|
|
147
|
+
firstDone = true;
|
|
148
|
+
const promoted = trInner.replace(/<td>/gi, "<th>").replace(/<\/td>/gi, "</th>");
|
|
149
|
+
return `<thead><tr>${promoted}</tr></thead>`;
|
|
150
|
+
}
|
|
151
|
+
return `<tr>${trInner}</tr>`;
|
|
152
|
+
});
|
|
153
|
+
return `<table>${cleaned}</table>`;
|
|
154
|
+
});
|
|
83
155
|
return out;
|
|
84
156
|
}
|
|
85
157
|
/**
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import { storageToMarkdown } from "./confluence/transform.js";
|
|
|
8
8
|
import { generateUnifiedDiff, generateDiffStats } from "./compare/diff.js";
|
|
9
9
|
const server = new McpServer({
|
|
10
10
|
name: "confluence-reader-mcp",
|
|
11
|
-
version: "0.2.
|
|
11
|
+
version: "0.2.3"
|
|
12
12
|
});
|
|
13
13
|
function getEnv(name) {
|
|
14
14
|
const v = process.env[name];
|
|
@@ -79,10 +79,11 @@ server.tool("confluence.list_children", "List the direct child pages of a Conflu
|
|
|
79
79
|
: "No child pages found.";
|
|
80
80
|
return { content: [{ type: "text", text }] };
|
|
81
81
|
});
|
|
82
|
-
server.tool("confluence.fetch_image", "Download an image attachment from a Confluence page by filename. Returns the
|
|
82
|
+
server.tool("confluence.fetch_image", "Download an image attachment from a Confluence page by filename and save it to a local directory. Returns the saved file path.", {
|
|
83
83
|
url: z.string().describe("Confluence page URL"),
|
|
84
|
-
filename: z.string().describe("Attachment filename (e.g. 'architecture.png')")
|
|
85
|
-
|
|
84
|
+
filename: z.string().describe("Attachment filename (e.g. 'architecture.png')"),
|
|
85
|
+
destination: z.string().describe("Local directory path to save the image to")
|
|
86
|
+
}, async ({ url, filename, destination }) => {
|
|
86
87
|
const cfg = getCfg();
|
|
87
88
|
const pageId = extractConfluencePageId(url);
|
|
88
89
|
const attachments = await fetchAttachments(cfg, pageId);
|
|
@@ -96,23 +97,17 @@ server.tool("confluence.fetch_image", "Download an image attachment from a Confl
|
|
|
96
97
|
}]
|
|
97
98
|
};
|
|
98
99
|
}
|
|
99
|
-
const { buffer
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
data: base64,
|
|
107
|
-
mimeType: contentType,
|
|
108
|
-
}]
|
|
109
|
-
};
|
|
110
|
-
}
|
|
111
|
-
// Non-image attachment — return as base64 text
|
|
100
|
+
const { buffer } = await downloadAttachment(cfg, pageId, match.id);
|
|
101
|
+
// Ensure destination directory exists
|
|
102
|
+
const fs = await import("node:fs/promises");
|
|
103
|
+
const path = await import("node:path");
|
|
104
|
+
await fs.mkdir(destination, { recursive: true });
|
|
105
|
+
const filePath = path.join(destination, match.title);
|
|
106
|
+
await fs.writeFile(filePath, buffer);
|
|
112
107
|
return {
|
|
113
108
|
content: [{
|
|
114
109
|
type: "text",
|
|
115
|
-
text: `
|
|
110
|
+
text: `Saved "${match.title}" (${buffer.length} bytes) to ${filePath}`
|
|
116
111
|
}]
|
|
117
112
|
};
|
|
118
113
|
});
|