@satiyap/confluence-reader-mcp 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/confluence/transform.js +83 -11
- package/dist/index.js +1 -1
- package/package.json +1 -1
|
@@ -14,29 +14,40 @@ turndown.use(gfm);
|
|
|
14
14
|
*/
|
|
15
15
|
function normalizeConfluenceHtml(html) {
|
|
16
16
|
let out = html;
|
|
17
|
-
//
|
|
18
|
-
out = out.replace(/<ac:layout-section
|
|
17
|
+
// --- Confluence layout tags → divs ---
|
|
18
|
+
out = out.replace(/<ac:layout-section[^>]*>/gi, "<div>");
|
|
19
19
|
out = out.replace(/<\/ac:layout-section>/gi, "</div>");
|
|
20
20
|
out = out.replace(/<ac:layout-cell>/gi, "<div>");
|
|
21
21
|
out = out.replace(/<\/ac:layout-cell>/gi, "</div>");
|
|
22
22
|
out = out.replace(/<ac:layout>/gi, "<div>");
|
|
23
23
|
out = out.replace(/<\/ac:layout>/gi, "</div>");
|
|
24
|
-
//
|
|
25
|
-
|
|
24
|
+
// --- Table cleanup: strip attributes and colgroup so Turndown can parse ---
|
|
25
|
+
out = out.replace(/<table[^>]*>/gi, "<table>");
|
|
26
|
+
out = out.replace(/<colgroup>[\s\S]*?<\/colgroup>/gi, "");
|
|
27
|
+
out = out.replace(/<col[^>]*\/?>/gi, "");
|
|
28
|
+
out = out.replace(/<div class="content-wrapper">/gi, "");
|
|
29
|
+
// (closing </div> for content-wrapper will be handled by generic div cleanup later)
|
|
30
|
+
// --- Jira macro → text reference (extract key) ---
|
|
31
|
+
out = out.replace(/<ac:structured-macro[^>]*ac:name="jira"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
|
|
32
|
+
const keyMatch = inner.match(/<ac:parameter[^>]*ac:name="key"[^>]*>([\s\S]*?)<\/ac:parameter>/i);
|
|
33
|
+
return keyMatch ? `<code>${keyMatch[1].trim()}</code>` : "";
|
|
34
|
+
});
|
|
35
|
+
// --- TOC and other self-closing macros → remove ---
|
|
36
|
+
out = out.replace(/<ac:structured-macro[^>]*\/>/gi, "");
|
|
37
|
+
// --- Code blocks ---
|
|
26
38
|
out = out.replace(/<ac:structured-macro[^>]*ac:name="code"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
|
|
27
|
-
// Extract plain-text-body for code blocks
|
|
28
39
|
const bodyMatch = inner.match(/<ac:plain-text-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-body>/i);
|
|
29
40
|
if (bodyMatch) {
|
|
30
41
|
return `<pre><code>${bodyMatch[1]}</code></pre>`;
|
|
31
42
|
}
|
|
32
43
|
return `<pre><code>${inner.replace(/<[^>]+>/g, "")}</code></pre>`;
|
|
33
44
|
});
|
|
34
|
-
//
|
|
45
|
+
// --- Info/note/warning/tip panels → blockquotes ---
|
|
35
46
|
out = out.replace(/<ac:structured-macro[^>]*ac:name="(info|note|warning|tip|panel)"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, _type, inner) => {
|
|
36
47
|
const bodyMatch = inner.match(/<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>/i);
|
|
37
48
|
return bodyMatch ? `<blockquote>${bodyMatch[1]}</blockquote>` : `<blockquote>${inner}</blockquote>`;
|
|
38
49
|
});
|
|
39
|
-
// Generic
|
|
50
|
+
// --- Generic remaining ac:structured-macro → unwrap to div ---
|
|
40
51
|
out = out.replace(/<ac:structured-macro[^>]*>/gi, "<div>");
|
|
41
52
|
out = out.replace(/<\/ac:structured-macro>/gi, "</div>");
|
|
42
53
|
// ac:rich-text-body → div
|
|
@@ -48,14 +59,24 @@ function normalizeConfluenceHtml(html) {
|
|
|
48
59
|
out = out.replace(/<\/ac:plain-text-body>/gi, "</pre>");
|
|
49
60
|
// ac:parameter tags — remove entirely
|
|
50
61
|
out = out.replace(/<ac:parameter[^>]*>[\s\S]*?<\/ac:parameter>/gi, "");
|
|
51
|
-
// ac:image → img tag
|
|
62
|
+
// --- ac:image → img tag ---
|
|
52
63
|
out = out.replace(/<ac:image[^>]*>([\s\S]*?)<\/ac:image>/gi, (_match, inner) => {
|
|
53
64
|
const filenameMatch = inner.match(/ri:filename="([^"]+)"/i);
|
|
54
65
|
const filename = filenameMatch ? filenameMatch[1] : "image";
|
|
55
66
|
return `<img alt="${filename}" src="${filename}" />`;
|
|
56
67
|
});
|
|
57
|
-
// ac:link
|
|
68
|
+
// --- ac:link: handle attachment links, user mentions, and page links ---
|
|
58
69
|
out = out.replace(/<ac:link>([\s\S]*?)<\/ac:link>/gi, (_match, inner) => {
|
|
70
|
+
// Attachment link
|
|
71
|
+
const attachMatch = inner.match(/ri:filename="([^"]+)"/i);
|
|
72
|
+
if (attachMatch) {
|
|
73
|
+
return `<a href="#">📎 ${attachMatch[1]}</a>`;
|
|
74
|
+
}
|
|
75
|
+
// User mention
|
|
76
|
+
if (/<ri:user/i.test(inner)) {
|
|
77
|
+
return `<code>@user</code>`;
|
|
78
|
+
}
|
|
79
|
+
// Page link
|
|
59
80
|
const pageMatch = inner.match(/ri:content-title="([^"]+)"/i);
|
|
60
81
|
const bodyMatch = inner.match(/<ac:link-body>([\s\S]*?)<\/ac:link-body>/i)
|
|
61
82
|
|| inner.match(/<ac:plain-text-link-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-link-body>/i);
|
|
@@ -65,7 +86,7 @@ function normalizeConfluenceHtml(html) {
|
|
|
65
86
|
});
|
|
66
87
|
// ac:emoticon → remove
|
|
67
88
|
out = out.replace(/<ac:emoticon[^>]*\/>/gi, "");
|
|
68
|
-
// ac:task-list / ac:task
|
|
89
|
+
// --- ac:task-list / ac:task → ul/li ---
|
|
69
90
|
out = out.replace(/<ac:task-list>/gi, "<ul>");
|
|
70
91
|
out = out.replace(/<\/ac:task-list>/gi, "</ul>");
|
|
71
92
|
out = out.replace(/<ac:task>([\s\S]*?)<\/ac:task>/gi, (_match, inner) => {
|
|
@@ -75,11 +96,62 @@ function normalizeConfluenceHtml(html) {
|
|
|
75
96
|
const body = bodyMatch ? bodyMatch[1] : inner;
|
|
76
97
|
return `<li>${checked ? "[x] " : "[ ] "}${body}</li>`;
|
|
77
98
|
});
|
|
78
|
-
//
|
|
99
|
+
// --- Cleanup: remove any remaining ac:*/ri:* tags, keep text content ---
|
|
79
100
|
out = out.replace(/<\/?(?:ac|ri):[^>]*>/gi, "");
|
|
80
101
|
// Clean up CDATA remnants
|
|
81
102
|
out = out.replace(/<!\[CDATA\[/g, "");
|
|
82
103
|
out = out.replace(/\]\]>/g, "");
|
|
104
|
+
// Strip attributes from th/td so Turndown sees clean cells
|
|
105
|
+
out = out.replace(/<th[^>]*>/gi, "<th>");
|
|
106
|
+
out = out.replace(/<td[^>]*>/gi, "<td>");
|
|
107
|
+
// --- Normalize table rows to uniform column count ---
|
|
108
|
+
// Turndown GFM requires every row to have the same number of cells.
|
|
109
|
+
// Confluence tables often have irregular column spans.
|
|
110
|
+
out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, tableInner) => {
|
|
111
|
+
// Count cells per row
|
|
112
|
+
const rows = tableInner.match(/<tr>[\s\S]*?<\/tr>/gi) ?? [];
|
|
113
|
+
const cellCounts = rows.map((row) => {
|
|
114
|
+
const cells = row.match(/<(?:th|td)>/gi);
|
|
115
|
+
return cells ? cells.length : 0;
|
|
116
|
+
});
|
|
117
|
+
const maxCols = Math.max(0, ...cellCounts);
|
|
118
|
+
if (maxCols === 0)
|
|
119
|
+
return _match;
|
|
120
|
+
// Pad short rows
|
|
121
|
+
const paddedRows = rows.map((row, i) => {
|
|
122
|
+
const deficit = maxCols - cellCounts[i];
|
|
123
|
+
if (deficit <= 0)
|
|
124
|
+
return row;
|
|
125
|
+
const pad = "<td></td>".repeat(deficit);
|
|
126
|
+
return row.replace(/<\/tr>/i, `${pad}</tr>`);
|
|
127
|
+
});
|
|
128
|
+
// Ensure first row uses <th> so Turndown generates a header row
|
|
129
|
+
const rebuilt = tableInner.replace(/<tr>[\s\S]*?<\/tr>/gi, () => paddedRows.shift());
|
|
130
|
+
return `<table>${rebuilt}</table>`;
|
|
131
|
+
});
|
|
132
|
+
// Clean up table internals so Turndown GFM can convert them
|
|
133
|
+
out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, inner) => {
|
|
134
|
+
let cleaned = inner;
|
|
135
|
+
// Strip block-level wrappers inside cells
|
|
136
|
+
cleaned = cleaned.replace(/<\/?p>/gi, "");
|
|
137
|
+
cleaned = cleaned.replace(/<\/?span[^>]*>/gi, "");
|
|
138
|
+
cleaned = cleaned.replace(/<\/?div[^>]*>/gi, "");
|
|
139
|
+
// Convert <br> to space (markdown tables can't have line breaks)
|
|
140
|
+
cleaned = cleaned.replace(/<br\s*\/?>/gi, " ");
|
|
141
|
+
// Strip <tbody> wrapper — Turndown wants <table><thead><tr>…</tr></thead>…
|
|
142
|
+
cleaned = cleaned.replace(/<\/?tbody>/gi, "");
|
|
143
|
+
// Ensure first row uses <th> so Turndown sees a header
|
|
144
|
+
let firstDone = false;
|
|
145
|
+
cleaned = cleaned.replace(/<tr>([\s\S]*?)<\/tr>/gi, (trMatch, trInner) => {
|
|
146
|
+
if (!firstDone) {
|
|
147
|
+
firstDone = true;
|
|
148
|
+
const promoted = trInner.replace(/<td>/gi, "<th>").replace(/<\/td>/gi, "</th>");
|
|
149
|
+
return `<thead><tr>${promoted}</tr></thead>`;
|
|
150
|
+
}
|
|
151
|
+
return `<tr>${trInner}</tr>`;
|
|
152
|
+
});
|
|
153
|
+
return `<table>${cleaned}</table>`;
|
|
154
|
+
});
|
|
83
155
|
return out;
|
|
84
156
|
}
|
|
85
157
|
/**
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import { storageToMarkdown } from "./confluence/transform.js";
|
|
|
8
8
|
import { generateUnifiedDiff, generateDiffStats } from "./compare/diff.js";
|
|
9
9
|
const server = new McpServer({
|
|
10
10
|
name: "confluence-reader-mcp",
|
|
11
|
-
version: "0.2.
|
|
11
|
+
version: "0.2.3"
|
|
12
12
|
});
|
|
13
13
|
function getEnv(name) {
|
|
14
14
|
const v = process.env[name];
|