@satiyap/confluence-reader-mcp 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,29 +14,40 @@ turndown.use(gfm);
14
14
  */
15
15
  function normalizeConfluenceHtml(html) {
16
16
  let out = html;
17
- // Convert ac:layout-section / ac:layout-cell to divs
18
- out = out.replace(/<ac:layout-section>/gi, "<div>");
17
+ // --- Confluence layout tags divs ---
18
+ out = out.replace(/<ac:layout-section[^>]*>/gi, "<div>");
19
19
  out = out.replace(/<\/ac:layout-section>/gi, "</div>");
20
20
  out = out.replace(/<ac:layout-cell>/gi, "<div>");
21
21
  out = out.replace(/<\/ac:layout-cell>/gi, "</div>");
22
22
  out = out.replace(/<ac:layout>/gi, "<div>");
23
23
  out = out.replace(/<\/ac:layout>/gi, "</div>");
24
- // Convert ac:structured-macro (panels, code blocks, etc.) to divs
25
- // Preserve the macro name as a data attribute for potential future use
24
+ // --- Table cleanup: strip attributes and colgroup so Turndown can parse ---
25
+ out = out.replace(/<table[^>]*>/gi, "<table>");
26
+ out = out.replace(/<colgroup>[\s\S]*?<\/colgroup>/gi, "");
27
+ out = out.replace(/<col[^>]*\/?>/gi, "");
28
+ out = out.replace(/<div class="content-wrapper">/gi, "");
29
+ // (closing </div> for content-wrapper will be handled by generic div cleanup later)
30
+ // --- Jira macro → text reference (extract key) ---
31
+ out = out.replace(/<ac:structured-macro[^>]*ac:name="jira"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
32
+ const keyMatch = inner.match(/<ac:parameter[^>]*ac:name="key"[^>]*>([\s\S]*?)<\/ac:parameter>/i);
33
+ return keyMatch ? `<code>${keyMatch[1].trim()}</code>` : "";
34
+ });
35
+ // --- TOC and other self-closing macros → remove ---
36
+ out = out.replace(/<ac:structured-macro[^>]*\/>/gi, "");
37
+ // --- Code blocks ---
26
38
  out = out.replace(/<ac:structured-macro[^>]*ac:name="code"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, inner) => {
27
- // Extract plain-text-body for code blocks
28
39
  const bodyMatch = inner.match(/<ac:plain-text-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-body>/i);
29
40
  if (bodyMatch) {
30
41
  return `<pre><code>${bodyMatch[1]}</code></pre>`;
31
42
  }
32
43
  return `<pre><code>${inner.replace(/<[^>]+>/g, "")}</code></pre>`;
33
44
  });
34
- // Convert info/note/warning/tip panels to blockquotes
45
+ // --- Info/note/warning/tip panels blockquotes ---
35
46
  out = out.replace(/<ac:structured-macro[^>]*ac:name="(info|note|warning|tip|panel)"[^>]*>([\s\S]*?)<\/ac:structured-macro>/gi, (_match, _type, inner) => {
36
47
  const bodyMatch = inner.match(/<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>/i);
37
48
  return bodyMatch ? `<blockquote>${bodyMatch[1]}</blockquote>` : `<blockquote>${inner}</blockquote>`;
38
49
  });
39
- // Generic: any remaining ac:structured-macro unwrap to div
50
+ // --- Generic remaining ac:structured-macro unwrap to div ---
40
51
  out = out.replace(/<ac:structured-macro[^>]*>/gi, "<div>");
41
52
  out = out.replace(/<\/ac:structured-macro>/gi, "</div>");
42
53
  // ac:rich-text-body → div
@@ -48,14 +59,24 @@ function normalizeConfluenceHtml(html) {
48
59
  out = out.replace(/<\/ac:plain-text-body>/gi, "</pre>");
49
60
  // ac:parameter tags — remove entirely
50
61
  out = out.replace(/<ac:parameter[^>]*>[\s\S]*?<\/ac:parameter>/gi, "");
51
- // ac:image → img tag
62
+ // --- ac:image → img tag ---
52
63
  out = out.replace(/<ac:image[^>]*>([\s\S]*?)<\/ac:image>/gi, (_match, inner) => {
53
64
  const filenameMatch = inner.match(/ri:filename="([^"]+)"/i);
54
65
  const filename = filenameMatch ? filenameMatch[1] : "image";
55
66
  return `<img alt="${filename}" src="${filename}" />`;
56
67
  });
57
- // ac:link with ri:page anchor
68
+ // --- ac:link: handle attachment links, user mentions, and page links ---
58
69
  out = out.replace(/<ac:link>([\s\S]*?)<\/ac:link>/gi, (_match, inner) => {
70
+ // Attachment link
71
+ const attachMatch = inner.match(/ri:filename="([^"]+)"/i);
72
+ if (attachMatch) {
73
+ return `<a href="#">📎 ${attachMatch[1]}</a>`;
74
+ }
75
+ // User mention
76
+ if (/<ri:user/i.test(inner)) {
77
+ return `<code>@user</code>`;
78
+ }
79
+ // Page link
59
80
  const pageMatch = inner.match(/ri:content-title="([^"]+)"/i);
60
81
  const bodyMatch = inner.match(/<ac:link-body>([\s\S]*?)<\/ac:link-body>/i)
61
82
  || inner.match(/<ac:plain-text-link-body>\s*<!\[CDATA\[([\s\S]*?)\]\]>\s*<\/ac:plain-text-link-body>/i);
@@ -65,7 +86,7 @@ function normalizeConfluenceHtml(html) {
65
86
  });
66
87
  // ac:emoticon → remove
67
88
  out = out.replace(/<ac:emoticon[^>]*\/>/gi, "");
68
- // ac:task-list / ac:task / ac:task-body → ul/li
89
+ // --- ac:task-list / ac:task → ul/li ---
69
90
  out = out.replace(/<ac:task-list>/gi, "<ul>");
70
91
  out = out.replace(/<\/ac:task-list>/gi, "</ul>");
71
92
  out = out.replace(/<ac:task>([\s\S]*?)<\/ac:task>/gi, (_match, inner) => {
@@ -75,11 +96,62 @@ function normalizeConfluenceHtml(html) {
75
96
  const body = bodyMatch ? bodyMatch[1] : inner;
76
97
  return `<li>${checked ? "[x] " : "[ ] "}${body}</li>`;
77
98
  });
78
- // Remove any remaining ac:* or ri:* tags but keep their text content
99
+ // --- Cleanup: remove any remaining ac:*/ri:* tags, keep text content ---
79
100
  out = out.replace(/<\/?(?:ac|ri):[^>]*>/gi, "");
80
101
  // Clean up CDATA remnants
81
102
  out = out.replace(/<!\[CDATA\[/g, "");
82
103
  out = out.replace(/\]\]>/g, "");
104
+ // Strip attributes from th/td so Turndown sees clean cells
105
+ out = out.replace(/<th[^>]*>/gi, "<th>");
106
+ out = out.replace(/<td[^>]*>/gi, "<td>");
107
+ // --- Normalize table rows to uniform column count ---
108
+ // Turndown GFM requires every row to have the same number of cells.
109
+ // Confluence tables often have irregular column spans.
110
+ out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, tableInner) => {
111
+ // Count cells per row
112
+ const rows = tableInner.match(/<tr>[\s\S]*?<\/tr>/gi) ?? [];
113
+ const cellCounts = rows.map((row) => {
114
+ const cells = row.match(/<(?:th|td)>/gi);
115
+ return cells ? cells.length : 0;
116
+ });
117
+ const maxCols = Math.max(0, ...cellCounts);
118
+ if (maxCols === 0)
119
+ return _match;
120
+ // Pad short rows
121
+ const paddedRows = rows.map((row, i) => {
122
+ const deficit = maxCols - cellCounts[i];
123
+ if (deficit <= 0)
124
+ return row;
125
+ const pad = "<td></td>".repeat(deficit);
126
+ return row.replace(/<\/tr>/i, `${pad}</tr>`);
127
+ });
128
+ // Ensure first row uses <th> so Turndown generates a header row
129
+ const rebuilt = tableInner.replace(/<tr>[\s\S]*?<\/tr>/gi, () => paddedRows.shift());
130
+ return `<table>${rebuilt}</table>`;
131
+ });
132
+ // Clean up table internals so Turndown GFM can convert them
133
+ out = out.replace(/<table>([\s\S]*?)<\/table>/gi, (_match, inner) => {
134
+ let cleaned = inner;
135
+ // Strip block-level wrappers inside cells
136
+ cleaned = cleaned.replace(/<\/?p>/gi, "");
137
+ cleaned = cleaned.replace(/<\/?span[^>]*>/gi, "");
138
+ cleaned = cleaned.replace(/<\/?div[^>]*>/gi, "");
139
+ // Convert <br> to space (markdown tables can't have line breaks)
140
+ cleaned = cleaned.replace(/<br\s*\/?>/gi, " ");
141
+ // Strip <tbody> wrapper — Turndown wants <table><thead><tr>…</tr></thead>…
142
+ cleaned = cleaned.replace(/<\/?tbody>/gi, "");
143
+ // Ensure first row uses <th> so Turndown sees a header
144
+ let firstDone = false;
145
+ cleaned = cleaned.replace(/<tr>([\s\S]*?)<\/tr>/gi, (trMatch, trInner) => {
146
+ if (!firstDone) {
147
+ firstDone = true;
148
+ const promoted = trInner.replace(/<td>/gi, "<th>").replace(/<\/td>/gi, "</th>");
149
+ return `<thead><tr>${promoted}</tr></thead>`;
150
+ }
151
+ return `<tr>${trInner}</tr>`;
152
+ });
153
+ return `<table>${cleaned}</table>`;
154
+ });
83
155
  return out;
84
156
  }
85
157
  /**
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ import { storageToMarkdown } from "./confluence/transform.js";
8
8
  import { generateUnifiedDiff, generateDiffStats } from "./compare/diff.js";
9
9
  const server = new McpServer({
10
10
  name: "confluence-reader-mcp",
11
- version: "0.2.2"
11
+ version: "0.2.3"
12
12
  });
13
13
  function getEnv(name) {
14
14
  const v = process.env[name];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@satiyap/confluence-reader-mcp",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "description": "MCP server for fetching and comparing Confluence documentation with local files",
5
5
  "author": "satiyap",
6
6
  "license": "MIT",