@aaronshaf/confluence-cli 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +69 -0
- package/package.json +73 -0
- package/src/cli/commands/attachments.ts +113 -0
- package/src/cli/commands/clone.ts +188 -0
- package/src/cli/commands/comments.ts +56 -0
- package/src/cli/commands/create.ts +58 -0
- package/src/cli/commands/delete.ts +46 -0
- package/src/cli/commands/doctor.ts +161 -0
- package/src/cli/commands/duplicate-check.ts +89 -0
- package/src/cli/commands/file-rename.ts +113 -0
- package/src/cli/commands/folder-hierarchy.ts +241 -0
- package/src/cli/commands/info.ts +56 -0
- package/src/cli/commands/labels.ts +53 -0
- package/src/cli/commands/move.ts +23 -0
- package/src/cli/commands/open.ts +145 -0
- package/src/cli/commands/pull.ts +241 -0
- package/src/cli/commands/push-errors.ts +40 -0
- package/src/cli/commands/push.ts +699 -0
- package/src/cli/commands/search.ts +62 -0
- package/src/cli/commands/setup.ts +124 -0
- package/src/cli/commands/spaces.ts +42 -0
- package/src/cli/commands/status.ts +88 -0
- package/src/cli/commands/tree.ts +190 -0
- package/src/cli/help.ts +425 -0
- package/src/cli/index.ts +413 -0
- package/src/cli/utils/browser.ts +34 -0
- package/src/cli/utils/progress-reporter.ts +49 -0
- package/src/cli.ts +6 -0
- package/src/lib/config.ts +156 -0
- package/src/lib/confluence-client/attachment-operations.ts +221 -0
- package/src/lib/confluence-client/client.ts +653 -0
- package/src/lib/confluence-client/comment-operations.ts +60 -0
- package/src/lib/confluence-client/folder-operations.ts +203 -0
- package/src/lib/confluence-client/index.ts +47 -0
- package/src/lib/confluence-client/label-operations.ts +102 -0
- package/src/lib/confluence-client/page-operations.ts +270 -0
- package/src/lib/confluence-client/search-operations.ts +60 -0
- package/src/lib/confluence-client/types.ts +329 -0
- package/src/lib/confluence-client/user-operations.ts +58 -0
- package/src/lib/dependency-sorter.ts +233 -0
- package/src/lib/errors.ts +237 -0
- package/src/lib/file-scanner.ts +195 -0
- package/src/lib/formatters.ts +314 -0
- package/src/lib/health-check.ts +204 -0
- package/src/lib/markdown/converter.ts +427 -0
- package/src/lib/markdown/frontmatter.ts +116 -0
- package/src/lib/markdown/html-converter.ts +398 -0
- package/src/lib/markdown/index.ts +21 -0
- package/src/lib/markdown/link-converter.ts +189 -0
- package/src/lib/markdown/reference-updater.ts +251 -0
- package/src/lib/markdown/slugify.ts +32 -0
- package/src/lib/page-state.ts +195 -0
- package/src/lib/resolve-page-target.ts +33 -0
- package/src/lib/space-config.ts +264 -0
- package/src/lib/sync/cleanup.ts +50 -0
- package/src/lib/sync/folder-path.ts +61 -0
- package/src/lib/sync/index.ts +2 -0
- package/src/lib/sync/link-resolution-pass.ts +139 -0
- package/src/lib/sync/sync-engine.ts +681 -0
- package/src/lib/sync/sync-specific.ts +221 -0
- package/src/lib/sync/types.ts +42 -0
- package/src/test/attachments.test.ts +68 -0
- package/src/test/clone.test.ts +373 -0
- package/src/test/comments.test.ts +53 -0
- package/src/test/config.test.ts +209 -0
- package/src/test/confluence-client.test.ts +535 -0
- package/src/test/delete.test.ts +39 -0
- package/src/test/dependency-sorter.test.ts +384 -0
- package/src/test/errors.test.ts +199 -0
- package/src/test/file-rename.test.ts +305 -0
- package/src/test/file-scanner.test.ts +331 -0
- package/src/test/folder-hierarchy.test.ts +337 -0
- package/src/test/formatters.test.ts +213 -0
- package/src/test/html-converter.test.ts +399 -0
- package/src/test/info.test.ts +56 -0
- package/src/test/labels.test.ts +70 -0
- package/src/test/link-conversion-integration.test.ts +189 -0
- package/src/test/link-converter.test.ts +413 -0
- package/src/test/link-resolution-pass.test.ts +368 -0
- package/src/test/markdown.test.ts +443 -0
- package/src/test/mocks/handlers.ts +228 -0
- package/src/test/move.test.ts +53 -0
- package/src/test/msw-schema-validation.ts +151 -0
- package/src/test/page-state.test.ts +542 -0
- package/src/test/push.test.ts +551 -0
- package/src/test/reference-updater.test.ts +293 -0
- package/src/test/resolve-page-target.test.ts +55 -0
- package/src/test/search.test.ts +64 -0
- package/src/test/setup-msw.ts +75 -0
- package/src/test/space-config.test.ts +516 -0
- package/src/test/spaces.test.ts +53 -0
- package/src/test/sync-engine.test.ts +486 -0
- package/src/types/turndown-plugin-gfm.d.ts +9 -0
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
import TurndownService from 'turndown';
|
|
2
|
+
import * as turndownPluginGfm from 'turndown-plugin-gfm';
|
|
3
|
+
import type { Label, Page, User } from '../confluence-client/types.js';
|
|
4
|
+
import { createFrontmatter, serializeMarkdown, type PageFrontmatter } from './frontmatter.js';
|
|
5
|
+
import { confluenceLinkToRelativePath, extractPageTitleFromLink, type PageLookupMap } from './link-converter.js';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Markdown converter that transforms Confluence HTML to Markdown
|
|
9
|
+
* Uses Turndown with custom rules for Confluence-specific elements
|
|
10
|
+
* Per ADR-0004
|
|
11
|
+
*/
|
|
12
|
+
export class MarkdownConverter {
|
|
13
|
+
private turndown: TurndownService;
|
|
14
|
+
private warnings: string[] = [];
|
|
15
|
+
private currentBaseUrl: string = '';
|
|
16
|
+
private currentPageId: string = '';
|
|
17
|
+
private currentPagePath: string = '';
|
|
18
|
+
private pageLookupMap: PageLookupMap | null = null;
|
|
19
|
+
|
|
20
|
+
constructor() {
|
|
21
|
+
this.turndown = new TurndownService({
|
|
22
|
+
headingStyle: 'atx',
|
|
23
|
+
codeBlockStyle: 'fenced',
|
|
24
|
+
bulletListMarker: '-',
|
|
25
|
+
emDelimiter: '_',
|
|
26
|
+
strongDelimiter: '**',
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// Add GFM plugin for tables and strikethrough
|
|
30
|
+
this.turndown.use(turndownPluginGfm.gfm);
|
|
31
|
+
|
|
32
|
+
this.addCustomRules();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Escape HTML special characters for embedding in HTML attributes/content
|
|
37
|
+
* Used to safely embed CDATA content in pre/code elements
|
|
38
|
+
*/
|
|
39
|
+
private escapeHtml(text: string): string {
|
|
40
|
+
return text
|
|
41
|
+
.replace(/&/g, '&')
|
|
42
|
+
.replace(/</g, '<')
|
|
43
|
+
.replace(/>/g, '>')
|
|
44
|
+
.replace(/"/g, '"')
|
|
45
|
+
.replace(/'/g, ''');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Add custom rules for Confluence-specific elements
|
|
50
|
+
*/
|
|
51
|
+
private addCustomRules(): void {
|
|
52
|
+
// Code blocks with language detection
|
|
53
|
+
this.turndown.addRule('confluenceCodeBlock', {
|
|
54
|
+
filter: (node) => {
|
|
55
|
+
return (
|
|
56
|
+
node.nodeName === 'DIV' &&
|
|
57
|
+
(node.classList?.contains('code') ||
|
|
58
|
+
node.classList?.contains('codeContent') ||
|
|
59
|
+
node.classList?.contains('preformatted'))
|
|
60
|
+
);
|
|
61
|
+
},
|
|
62
|
+
replacement: (content, node) => {
|
|
63
|
+
const element = node as HTMLElement;
|
|
64
|
+
// Try to detect language from class or data attributes
|
|
65
|
+
const language =
|
|
66
|
+
element.getAttribute('data-syntaxhighlighter-params')?.match(/brush:\s*(\w+)/)?.[1] ||
|
|
67
|
+
element.getAttribute('data-language') ||
|
|
68
|
+
'';
|
|
69
|
+
|
|
70
|
+
const code = element.textContent || content;
|
|
71
|
+
return `\n\`\`\`${language}\n${code.trim()}\n\`\`\`\n`;
|
|
72
|
+
},
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// Confluence pre/code blocks
|
|
76
|
+
this.turndown.addRule('confluencePreCode', {
|
|
77
|
+
filter: (node) => {
|
|
78
|
+
if (node.nodeName !== 'PRE') return false;
|
|
79
|
+
const parent = node.parentNode as HTMLElement | null;
|
|
80
|
+
return parent?.classList?.contains('code') || parent?.classList?.contains('codeContent') || false;
|
|
81
|
+
},
|
|
82
|
+
replacement: (content, node) => {
|
|
83
|
+
const element = node as HTMLElement;
|
|
84
|
+
const language = element.getAttribute('data-language') || '';
|
|
85
|
+
return `\n\`\`\`${language}\n${content.trim()}\n\`\`\`\n`;
|
|
86
|
+
},
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// Confluence ac:structured-macro (info, note, warning, tip panels)
|
|
90
|
+
this.turndown.addRule('confluenceMacro', {
|
|
91
|
+
filter: (node) => {
|
|
92
|
+
return node.nodeName === 'AC:STRUCTURED-MACRO' || node.nodeName.toLowerCase() === 'ac:structured-macro';
|
|
93
|
+
},
|
|
94
|
+
replacement: (content, node) => {
|
|
95
|
+
const element = node as HTMLElement;
|
|
96
|
+
const macroName = element.getAttribute('ac:name') || 'unknown';
|
|
97
|
+
|
|
98
|
+
// Handle specific macros
|
|
99
|
+
switch (macroName) {
|
|
100
|
+
case 'info':
|
|
101
|
+
return `\n> **Info:** ${content.trim()}\n`;
|
|
102
|
+
case 'note':
|
|
103
|
+
return `\n> **Note:** ${content.trim()}\n`;
|
|
104
|
+
case 'warning':
|
|
105
|
+
return `\n> **Warning:** ${content.trim()}\n`;
|
|
106
|
+
case 'tip':
|
|
107
|
+
return `\n> **Tip:** ${content.trim()}\n`;
|
|
108
|
+
case 'code':
|
|
109
|
+
return `\n\`\`\`\n${content.trim()}\n\`\`\`\n`;
|
|
110
|
+
case 'toc':
|
|
111
|
+
// Table of contents - skip with warning
|
|
112
|
+
this.warnings.push('Table of Contents macro was removed');
|
|
113
|
+
return '';
|
|
114
|
+
default:
|
|
115
|
+
this.warnings.push(`Unsupported macro "${macroName}" was converted to blockquote`);
|
|
116
|
+
return `\n> **${macroName}:** ${content.trim()}\n`;
|
|
117
|
+
}
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// Confluence task lists
|
|
122
|
+
this.turndown.addRule('confluenceTaskList', {
|
|
123
|
+
filter: (node) => {
|
|
124
|
+
return node.nodeName === 'AC:TASK-LIST' || node.nodeName.toLowerCase() === 'ac:task-list';
|
|
125
|
+
},
|
|
126
|
+
replacement: (content) => {
|
|
127
|
+
return content;
|
|
128
|
+
},
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
this.turndown.addRule('confluenceTask', {
|
|
132
|
+
filter: (node) => {
|
|
133
|
+
return node.nodeName === 'AC:TASK' || node.nodeName.toLowerCase() === 'ac:task';
|
|
134
|
+
},
|
|
135
|
+
replacement: (content, node) => {
|
|
136
|
+
const element = node as HTMLElement;
|
|
137
|
+
const status = element.querySelector('ac\\:task-status, [ac\\:task-status]')?.textContent || '';
|
|
138
|
+
const body = element.querySelector('ac\\:task-body, [ac\\:task-body]')?.textContent || content;
|
|
139
|
+
const checked = status === 'complete' ? 'x' : ' ';
|
|
140
|
+
return `- [${checked}] ${body.trim()}\n`;
|
|
141
|
+
},
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// Confluence user mentions
|
|
145
|
+
this.turndown.addRule('confluenceMention', {
|
|
146
|
+
filter: (node) => {
|
|
147
|
+
return (
|
|
148
|
+
node.nodeName === 'AC:LINK' ||
|
|
149
|
+
node.nodeName.toLowerCase() === 'ac:link' ||
|
|
150
|
+
(node.nodeName === 'A' && (node as HTMLElement).classList?.contains('confluence-userlink'))
|
|
151
|
+
);
|
|
152
|
+
},
|
|
153
|
+
replacement: (content, node) => {
|
|
154
|
+
const element = node as HTMLElement;
|
|
155
|
+
const userName =
|
|
156
|
+
element.getAttribute('ri:username') || element.getAttribute('data-username') || content || 'user';
|
|
157
|
+
return `@${userName}`;
|
|
158
|
+
},
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
// Confluence emoticons
|
|
162
|
+
this.turndown.addRule('confluenceEmoticon', {
|
|
163
|
+
filter: (node) => {
|
|
164
|
+
return node.nodeName === 'AC:EMOTICON' || node.nodeName.toLowerCase() === 'ac:emoticon';
|
|
165
|
+
},
|
|
166
|
+
replacement: (_content, node) => {
|
|
167
|
+
const element = node as HTMLElement;
|
|
168
|
+
const name = element.getAttribute('ac:name') || '';
|
|
169
|
+
// Map common Confluence emoticons to Unicode
|
|
170
|
+
const emojiMap: Record<string, string> = {
|
|
171
|
+
smile: ':)',
|
|
172
|
+
sad: ':(',
|
|
173
|
+
cheeky: ':P',
|
|
174
|
+
laugh: ':D',
|
|
175
|
+
wink: ';)',
|
|
176
|
+
thumbsup: '(y)',
|
|
177
|
+
thumbsdown: '(n)',
|
|
178
|
+
information: '(i)',
|
|
179
|
+
tick: '(/))',
|
|
180
|
+
cross: '(x)',
|
|
181
|
+
warning: '(!)',
|
|
182
|
+
plus: '(+)',
|
|
183
|
+
minus: '(-)',
|
|
184
|
+
question: '(?)',
|
|
185
|
+
light_bulb: '(*)',
|
|
186
|
+
yellow_star: '(*y)',
|
|
187
|
+
red_star: '(*r)',
|
|
188
|
+
green_star: '(*g)',
|
|
189
|
+
blue_star: '(*b)',
|
|
190
|
+
};
|
|
191
|
+
return emojiMap[name] || `(${name})`;
|
|
192
|
+
},
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
// Confluence images (ac:image elements)
|
|
196
|
+
// Since attachments are not synced, we link to the Confluence URL and warn
|
|
197
|
+
this.turndown.addRule('confluenceImage', {
|
|
198
|
+
filter: (node) => {
|
|
199
|
+
return node.nodeName === 'AC:IMAGE' || node.nodeName.toLowerCase() === 'ac:image';
|
|
200
|
+
},
|
|
201
|
+
replacement: (_content, node) => {
|
|
202
|
+
const element = node as HTMLElement;
|
|
203
|
+
const attachment = element.querySelector('ri\\:attachment, [ri\\:attachment]');
|
|
204
|
+
const filename = attachment?.getAttribute('ri:filename') || 'image';
|
|
205
|
+
this.warnings.push(`Image "${filename}" links to Confluence (attachments not synced)`);
|
|
206
|
+
// Build Confluence attachment URL if we have context
|
|
207
|
+
if (this.currentBaseUrl && this.currentPageId) {
|
|
208
|
+
const attachmentUrl = `${this.currentBaseUrl}/wiki/download/attachments/${this.currentPageId}/${encodeURIComponent(filename)}`;
|
|
209
|
+
return ``;
|
|
210
|
+
}
|
|
211
|
+
// Fallback: just use filename as placeholder
|
|
212
|
+
return ``;
|
|
213
|
+
},
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// Standard images with Confluence attachment URLs
|
|
217
|
+
// Since attachments are not synced, we preserve the original URL and warn
|
|
218
|
+
this.turndown.addRule('confluenceAttachmentImage', {
|
|
219
|
+
filter: (node) => {
|
|
220
|
+
if (node.nodeName !== 'IMG') return false;
|
|
221
|
+
const src = (node as HTMLImageElement).getAttribute('src') || '';
|
|
222
|
+
return src.includes('/attachments/') || src.includes('/download/');
|
|
223
|
+
},
|
|
224
|
+
replacement: (_content, node) => {
|
|
225
|
+
const element = node as HTMLImageElement;
|
|
226
|
+
const src = element.getAttribute('src') || '';
|
|
227
|
+
const alt = element.getAttribute('alt') || 'image';
|
|
228
|
+
const filename = src.split('/').pop()?.split('?')[0] || 'image';
|
|
229
|
+
this.warnings.push(`Image "${filename}" links to Confluence (attachments not synced)`);
|
|
230
|
+
// Use absolute URL if src is relative, otherwise preserve original
|
|
231
|
+
if (src.startsWith('/') && this.currentBaseUrl) {
|
|
232
|
+
return ``;
|
|
233
|
+
}
|
|
234
|
+
return ``;
|
|
235
|
+
},
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
// Confluence page links (ac:link with ri:page)
|
|
239
|
+
// Per ADR-0022: Convert to relative markdown paths
|
|
240
|
+
this.turndown.addRule('confluencePageLink', {
|
|
241
|
+
filter: (node) => {
|
|
242
|
+
// Match <ac:link> elements containing <ri:page>
|
|
243
|
+
if (node.nodeName === 'AC:LINK' || node.nodeName.toLowerCase() === 'ac:link') {
|
|
244
|
+
return true;
|
|
245
|
+
}
|
|
246
|
+
// Also match standard <a> tags with confluence link classes
|
|
247
|
+
return (
|
|
248
|
+
node.nodeName === 'A' &&
|
|
249
|
+
((node as HTMLElement).getAttribute('href')?.includes('/wiki/') ||
|
|
250
|
+
(node as HTMLElement).classList?.contains('confluence-link'))
|
|
251
|
+
);
|
|
252
|
+
},
|
|
253
|
+
replacement: (content, node) => {
|
|
254
|
+
const element = node as HTMLElement;
|
|
255
|
+
|
|
256
|
+
// Check if this is an ac:link with ri:page
|
|
257
|
+
const riPage = element.querySelector('ri\\:page, [ri\\:page]');
|
|
258
|
+
if (riPage) {
|
|
259
|
+
const targetTitle = riPage.getAttribute('ri:content-title');
|
|
260
|
+
// TODO: Extract ri:space-key attribute to support cross-space links
|
|
261
|
+
// const targetSpaceKey = riPage.getAttribute('ri:space-key');
|
|
262
|
+
|
|
263
|
+
// Extract link text from ac:plain-text-link-body if content is empty
|
|
264
|
+
let linkText = content;
|
|
265
|
+
if (!linkText || linkText.trim() === '') {
|
|
266
|
+
// Try to find link body with different selectors
|
|
267
|
+
const linkBody =
|
|
268
|
+
element.querySelector('ac\\:plain-text-link-body, [ac\\:plain-text-link-body], plain-text-link-body') ||
|
|
269
|
+
element.querySelector('[ri\\:content-title]')?.nextElementSibling;
|
|
270
|
+
linkText = linkBody?.textContent?.trim() || element.textContent?.trim() || targetTitle || '';
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Only attempt conversion if we have all required context
|
|
274
|
+
if (targetTitle && this.pageLookupMap && this.currentPagePath) {
|
|
275
|
+
// Try to convert to relative path
|
|
276
|
+
const relativePath = confluenceLinkToRelativePath(targetTitle, this.currentPagePath, this.pageLookupMap);
|
|
277
|
+
|
|
278
|
+
if (relativePath) {
|
|
279
|
+
return `[${linkText}](${relativePath})`;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Target page not found in sync state
|
|
283
|
+
this.warnings.push(`Link to "${targetTitle}" could not be resolved to local path (page not in sync state)`);
|
|
284
|
+
} else if (targetTitle && !this.pageLookupMap) {
|
|
285
|
+
// Missing lookup map context
|
|
286
|
+
this.warnings.push(`Link to "${targetTitle}" could not be converted (missing page lookup map)`);
|
|
287
|
+
} else if (!targetTitle) {
|
|
288
|
+
// Missing title in link
|
|
289
|
+
this.warnings.push('Confluence page link missing ri:content-title attribute');
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Fallback: preserve as Confluence URL
|
|
294
|
+
const href = element.getAttribute('href') || '';
|
|
295
|
+
if (href) {
|
|
296
|
+
return `[${content}](${href})`;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Last resort: just return the text
|
|
300
|
+
return content;
|
|
301
|
+
},
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
// Remove empty paragraphs
|
|
305
|
+
this.turndown.addRule('removeEmptyParagraphs', {
|
|
306
|
+
filter: (node) => {
|
|
307
|
+
return node.nodeName === 'P' && !node.textContent?.trim() && !node.querySelector('img');
|
|
308
|
+
},
|
|
309
|
+
replacement: () => '',
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Convert Confluence HTML to Markdown
|
|
315
|
+
*/
|
|
316
|
+
convert(html: string): string {
|
|
317
|
+
this.warnings = [];
|
|
318
|
+
|
|
319
|
+
// Pre-process HTML to handle Confluence-specific namespace elements
|
|
320
|
+
let processedHtml = html;
|
|
321
|
+
|
|
322
|
+
// Handle Confluence code macros specially - extract CDATA content before Turndown
|
|
323
|
+
// CDATA is not valid HTML5 and gets stripped during parsing, so we must handle it here
|
|
324
|
+
processedHtml = processedHtml.replace(
|
|
325
|
+
/<ac:structured-macro[^>]*ac:name="code"[^>]*>[\s\S]*?<ac:parameter[^>]*ac:name="language"[^>]*>([^<]*)<\/ac:parameter>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/gi,
|
|
326
|
+
(_match, language, code) => {
|
|
327
|
+
const lang = (language || '').trim();
|
|
328
|
+
return `<pre><code class="language-${lang}">${this.escapeHtml(code)}</code></pre>`;
|
|
329
|
+
},
|
|
330
|
+
);
|
|
331
|
+
|
|
332
|
+
// Handle code macros without language parameter
|
|
333
|
+
processedHtml = processedHtml.replace(
|
|
334
|
+
/<ac:structured-macro[^>]*ac:name="code"[^>]*>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/gi,
|
|
335
|
+
(_match, code) => {
|
|
336
|
+
return `<pre><code>${this.escapeHtml(code)}</code></pre>`;
|
|
337
|
+
},
|
|
338
|
+
);
|
|
339
|
+
|
|
340
|
+
processedHtml = processedHtml
|
|
341
|
+
// Convert remaining ac: namespace elements to standard HTML attributes
|
|
342
|
+
.replace(/<ac:structured-macro/gi, '<div data-macro="true" data-macro-name')
|
|
343
|
+
.replace(/<\/ac:structured-macro>/gi, '</div>')
|
|
344
|
+
.replace(/<ac:parameter/gi, '<span data-param')
|
|
345
|
+
.replace(/<\/ac:parameter>/gi, '</span>')
|
|
346
|
+
.replace(/<ac:rich-text-body>/gi, '<div>')
|
|
347
|
+
.replace(/<\/ac:rich-text-body>/gi, '</div>')
|
|
348
|
+
.replace(/<ac:plain-text-body>/gi, '<pre>')
|
|
349
|
+
.replace(/<\/ac:plain-text-body>/gi, '</pre>')
|
|
350
|
+
// Convert Confluence user references to @mentions
|
|
351
|
+
.replace(/<ac:link><ri:user[^>]*ri:account-id="([^"]*)"[^/]*\/><\/ac:link>/gi, '@$1')
|
|
352
|
+
.replace(/<ri:user[^>]*ri:account-id="([^"]*)"[^/]*\/>/gi, '@$1');
|
|
353
|
+
|
|
354
|
+
// Convert using Turndown, with error handling for malformed HTML
|
|
355
|
+
let markdown: string;
|
|
356
|
+
try {
|
|
357
|
+
markdown = this.turndown.turndown(processedHtml);
|
|
358
|
+
} catch {
|
|
359
|
+
// If turndown fails (often due to malformed tables), try stripping tables and retry
|
|
360
|
+
try {
|
|
361
|
+
const tableCount = (processedHtml.match(/<table[\s\S]*?<\/table>/gi) || []).length;
|
|
362
|
+
const htmlWithoutTables = processedHtml.replace(
|
|
363
|
+
/<table[\s\S]*?<\/table>/gi,
|
|
364
|
+
'\n\n[Table removed due to conversion error]\n\n',
|
|
365
|
+
);
|
|
366
|
+
markdown = this.turndown.turndown(htmlWithoutTables);
|
|
367
|
+
this.warnings.push(`Removed ${tableCount} malformed table(s) during conversion`);
|
|
368
|
+
} catch {
|
|
369
|
+
// Last resort: return raw text content
|
|
370
|
+
this.warnings.push('Converted as plain text (HTML too malformed)');
|
|
371
|
+
markdown = processedHtml
|
|
372
|
+
.replace(/<[^>]+>/g, ' ')
|
|
373
|
+
.replace(/\s+/g, ' ')
|
|
374
|
+
.trim();
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// Post-process: clean up extra whitespace
|
|
379
|
+
markdown = markdown
|
|
380
|
+
.replace(/\n{3,}/g, '\n\n') // Collapse multiple newlines
|
|
381
|
+
.trim();
|
|
382
|
+
|
|
383
|
+
return markdown;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
/**
|
|
387
|
+
* Convert a page to markdown with frontmatter
|
|
388
|
+
* Per ADR-0022: Converts Confluence page links to relative markdown paths
|
|
389
|
+
* Note: space_key is not included in frontmatter (inferred from .confluence.json)
|
|
390
|
+
*/
|
|
391
|
+
convertPage(
|
|
392
|
+
page: Page,
|
|
393
|
+
labels: Label[] = [],
|
|
394
|
+
parentTitle?: string,
|
|
395
|
+
baseUrl?: string,
|
|
396
|
+
author?: User,
|
|
397
|
+
lastModifier?: User,
|
|
398
|
+
currentPagePath?: string,
|
|
399
|
+
pageLookupMap?: PageLookupMap,
|
|
400
|
+
childCount?: number,
|
|
401
|
+
): { markdown: string; warnings: string[] } {
|
|
402
|
+
// Set context for image URL generation and link conversion
|
|
403
|
+
this.currentBaseUrl = baseUrl || '';
|
|
404
|
+
this.currentPageId = page.id;
|
|
405
|
+
this.currentPagePath = currentPagePath || '';
|
|
406
|
+
this.pageLookupMap = pageLookupMap || null;
|
|
407
|
+
|
|
408
|
+
const html = page.body?.storage?.value || '';
|
|
409
|
+
const bodyContent = this.convert(html);
|
|
410
|
+
// Add H1 with page title at the start of content (Confluence shows title separately)
|
|
411
|
+
const content = `# ${page.title}\n\n${bodyContent}`;
|
|
412
|
+
const frontmatter = createFrontmatter(page, labels, parentTitle, baseUrl, author, lastModifier, childCount);
|
|
413
|
+
const markdown = serializeMarkdown(frontmatter, content);
|
|
414
|
+
|
|
415
|
+
return {
|
|
416
|
+
markdown,
|
|
417
|
+
warnings: [...this.warnings],
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Get any warnings from the last conversion
|
|
423
|
+
*/
|
|
424
|
+
getWarnings(): string[] {
|
|
425
|
+
return [...this.warnings];
|
|
426
|
+
}
|
|
427
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import matter from 'gray-matter';
|
|
2
|
+
import type { Label, Page, User } from '../confluence-client/types.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Comprehensive frontmatter metadata for synced pages
|
|
6
|
+
* Per ADR-0006: Include all available metadata in YAML frontmatter
|
|
7
|
+
*/
|
|
8
|
+
export interface PageFrontmatter {
|
|
9
|
+
page_id: string;
|
|
10
|
+
title: string;
|
|
11
|
+
space_key?: string; // Deprecated: inferred from .confluence.json
|
|
12
|
+
created_at?: string;
|
|
13
|
+
updated_at?: string;
|
|
14
|
+
version?: number;
|
|
15
|
+
parent_id?: string | null;
|
|
16
|
+
parent_title?: string;
|
|
17
|
+
child_count?: number;
|
|
18
|
+
author_id?: string;
|
|
19
|
+
author_name?: string;
|
|
20
|
+
author_email?: string;
|
|
21
|
+
last_modifier_id?: string;
|
|
22
|
+
last_modifier_name?: string;
|
|
23
|
+
last_modifier_email?: string;
|
|
24
|
+
labels?: string[];
|
|
25
|
+
url?: string;
|
|
26
|
+
synced_at: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Create frontmatter from a Confluence page
|
|
31
|
+
* Note: space_key is not included as it's inferred from .confluence.json
|
|
32
|
+
*/
|
|
33
|
+
export function createFrontmatter(
|
|
34
|
+
page: Page,
|
|
35
|
+
labels: Label[] = [],
|
|
36
|
+
parentTitle?: string,
|
|
37
|
+
baseUrl?: string,
|
|
38
|
+
author?: User,
|
|
39
|
+
lastModifier?: User,
|
|
40
|
+
childCount?: number,
|
|
41
|
+
): PageFrontmatter {
|
|
42
|
+
const webui = page._links?.webui;
|
|
43
|
+
const url = webui && baseUrl ? `${baseUrl}/wiki${webui}` : undefined;
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
page_id: page.id,
|
|
47
|
+
title: page.title,
|
|
48
|
+
created_at: page.createdAt,
|
|
49
|
+
updated_at: page.version?.createdAt,
|
|
50
|
+
version: page.version?.number,
|
|
51
|
+
parent_id: page.parentId,
|
|
52
|
+
parent_title: parentTitle,
|
|
53
|
+
child_count: childCount,
|
|
54
|
+
author_id: page.authorId,
|
|
55
|
+
author_name: author?.displayName,
|
|
56
|
+
author_email: author?.email,
|
|
57
|
+
last_modifier_id: page.version?.authorId,
|
|
58
|
+
last_modifier_name: lastModifier?.displayName,
|
|
59
|
+
last_modifier_email: lastModifier?.email,
|
|
60
|
+
labels: labels.length > 0 ? labels.map((l) => l.name) : undefined,
|
|
61
|
+
url,
|
|
62
|
+
synced_at: new Date().toISOString(),
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Serialize frontmatter and content to a markdown string
|
|
68
|
+
* Accepts Partial frontmatter for flexibility when updating existing files
|
|
69
|
+
*/
|
|
70
|
+
export function serializeMarkdown(frontmatter: Partial<PageFrontmatter>, content: string): string {
|
|
71
|
+
// Filter out undefined values
|
|
72
|
+
const cleanFrontmatter: Record<string, unknown> = {};
|
|
73
|
+
for (const [key, value] of Object.entries(frontmatter)) {
|
|
74
|
+
if (value !== undefined) {
|
|
75
|
+
cleanFrontmatter[key] = value;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return matter.stringify(content, cleanFrontmatter);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Parse frontmatter and content from a markdown string
|
|
84
|
+
*/
|
|
85
|
+
export function parseMarkdown(markdown: string): { frontmatter: Partial<PageFrontmatter>; content: string } {
|
|
86
|
+
const parsed = matter(markdown);
|
|
87
|
+
return {
|
|
88
|
+
frontmatter: parsed.data as Partial<PageFrontmatter>,
|
|
89
|
+
content: parsed.content,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Extract page ID from frontmatter
|
|
95
|
+
*/
|
|
96
|
+
export function extractPageId(markdown: string): string | undefined {
|
|
97
|
+
const { frontmatter } = parseMarkdown(markdown);
|
|
98
|
+
return frontmatter.page_id;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Extract the first H1 heading from markdown content
|
|
103
|
+
* Returns undefined if no H1 is found
|
|
104
|
+
*/
|
|
105
|
+
export function extractH1Title(content: string): string | undefined {
|
|
106
|
+
const match = content.match(/^#\s+(.+)$/m);
|
|
107
|
+
return match?.[1]?.trim();
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Strip the first H1 heading from markdown content
|
|
112
|
+
* Used when pushing to Confluence (title is displayed separately)
|
|
113
|
+
*/
|
|
114
|
+
export function stripH1Title(content: string): string {
|
|
115
|
+
return content.replace(/^#\s+.+\n*/, '').trim();
|
|
116
|
+
}
|