confluence-cli 2.1.2 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -789,7 +789,7 @@ A blockquote whose first line is `**INFO**`, `**WARNING**`, or `**NOTE**` become
789
789
  > Side note for the reader.
790
790
  ```
791
791
 
792
- The reverse direction emits the equivalent shorthand (`[!info]` / `[!warning]` / `[!note]` followed by the body), which markdown→storage then re-expands.
792
+ The reverse direction emits the same `> **INFO**` / `> **WARNING**` / `> **NOTE**` blockquote form, so multi-paragraph bodies round-trip cleanly. The bare `[!info]` / `[!warning]` / `[!note]` shorthand is still accepted on input for backwards compatibility.
793
793
 
794
794
  A blockquote without one of these markers stays a **plain blockquote** (`<blockquote>…</blockquote>`) — `> …` is treated as a quotation, not an alert. Use the markers above when you want a callout.
795
795
 
@@ -1,5 +1,5 @@
1
1
  const MarkdownIt = require('markdown-it');
2
- const { htmlToMarkdown } = require('./html-to-markdown');
2
+ const { StorageWalker } = require('./storage-walker');
3
3
 
4
4
  const VALID_LINK_STYLES = ['smart', 'plain', 'wiki'];
5
5
  const CALLOUT_MARKERS = ['info', 'warning', 'note'];
@@ -251,148 +251,14 @@ class MacroConverter {
251
251
 
252
252
  storageToMarkdown(storage, options = {}) {
253
253
  const attachmentsDir = options.attachmentsDir || 'attachments';
254
- let markdown = storage;
255
-
256
- const labels = this.detectLanguageLabels(markdown);
257
-
258
- markdown = markdown.replace(/<ac:structured-macro ac:name="toc"[^>]*\s*\/>/g, '');
259
- markdown = markdown.replace(/<ac:structured-macro ac:name="toc"[^>]*>[\s\S]*?<\/ac:structured-macro>/g, '');
260
-
261
- markdown = markdown.replace(/<ac:structured-macro ac:name="floatmenu"[^>]*>[\s\S]*?<\/ac:structured-macro>/g, '');
262
-
263
- markdown = markdown.replace(/<ac:image[^>]*>\s*<ri:attachment\s+ri:filename="([^"]+)"[^>]*\s*\/>\s*<\/ac:image>/g, (_, filename) => {
264
- return `![${filename}](${attachmentsDir}/${filename})`;
265
- });
266
-
267
- markdown = markdown.replace(/<ac:image[^>]*><ri:attachment\s+ri:filename="([^"]+)"[^>]*><\/ri:attachment><\/ac:image>/g, (_, filename) => {
268
- return `![${filename}](${attachmentsDir}/${filename})`;
269
- });
270
-
271
- markdown = markdown.replace(/<ac:structured-macro ac:name="mermaid-macro"[^>]*>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, code) => {
272
- return `\n\`\`\`mermaid\n${code.trim()}\n\`\`\`\n`;
273
- });
274
-
275
- // Titled expand macros → **EXPAND: title** / **EXPAND_END** markers
276
- // (round-trip with markdownToStorage). Must run before the generic
277
- // <details> fallback below, which would otherwise drop the title.
278
- markdown = markdown.replace(
279
- /<ac:structured-macro ac:name="expand"[^>]*>[\s\S]*?<ac:parameter ac:name="title">([\s\S]*?)<\/ac:parameter>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g,
280
- '\n**EXPAND: $1**\n\n$2\n\n**EXPAND_END**\n'
281
- );
282
-
283
- // Title-less expand macros (e.g. created in the Confluence UI without a
284
- // title) fall back to <details>/<summary> with a localized default label.
285
- markdown = markdown.replace(/<ac:structured-macro ac:name="expand"[^>]*>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, content) => {
286
- return `\n<details>\n<summary>${labels.expandDetails}</summary>\n\n${content}\n\n</details>\n`;
287
- });
288
-
289
- markdown = markdown.replace(/<ac:structured-macro ac:name="code"[^>]*>[\s\S]*?<ac:parameter ac:name="language">([^<]*)<\/ac:parameter>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, lang, code) => {
290
- return `\n\`\`\`${lang}\n${code}\n\`\`\`\n`;
291
- });
292
-
293
- markdown = markdown.replace(/<ac:structured-macro ac:name="code"[^>]*>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, code) => {
294
- return `\n\`\`\`\n${code}\n\`\`\`\n`;
295
- });
296
-
297
- markdown = markdown.replace(/<ac:structured-macro ac:name="info"[^>]*>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, content) => {
298
- const cleanContent = htmlToMarkdown(content);
299
- return `[!info]\n${cleanContent}`;
300
- });
301
-
302
- markdown = markdown.replace(/<ac:structured-macro ac:name="warning"[^>]*>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, content) => {
303
- const cleanContent = htmlToMarkdown(content);
304
- return `[!warning]\n${cleanContent}`;
305
- });
306
-
307
- markdown = markdown.replace(/<ac:structured-macro ac:name="note"[^>]*>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, content) => {
308
- const cleanContent = htmlToMarkdown(content);
309
- return `[!note]\n${cleanContent}`;
310
- });
311
-
312
- // anchor macro → **ANCHOR: id** marker (round-trip with markdownToStorage).
313
- // Must run before the generic <ac:structured-macro> catch-all below, which
314
- // would otherwise drop the anchor entirely.
315
- markdown = markdown.replace(
316
- /<ac:structured-macro ac:name="anchor"[^>]*>[\s\S]*?<ac:parameter ac:name="">([\s\S]*?)<\/ac:parameter>[\s\S]*?<\/ac:structured-macro>/g,
317
- '\n**ANCHOR: $1**\n'
318
- );
319
-
320
- markdown = markdown.replace(/<ac:task-list>([\s\S]*?)<\/ac:task-list>/g, (_, content) => {
321
- const tasks = [];
322
- const taskRegex = /<ac:task>[\s\S]*?<ac:task-status>([^<]*)<\/ac:task-status>[\s\S]*?<ac:task-body>([\s\S]*?)<\/ac:task-body>[\s\S]*?<\/ac:task>/g;
323
- let match;
324
- while ((match = taskRegex.exec(content)) !== null) {
325
- const status = match[1];
326
- let taskBody = match[2];
327
- taskBody = taskBody.replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim();
328
- const checkbox = status === 'complete' ? '[x]' : '[ ]';
329
- if (taskBody) {
330
- tasks.push(`- ${checkbox} ${taskBody}`);
331
- }
332
- }
333
- return tasks.length > 0 ? '\n' + tasks.join('\n') + '\n' : '';
334
- });
335
-
336
- markdown = markdown.replace(/<ac:structured-macro ac:name="panel"[^>]*>[\s\S]*?<ac:parameter ac:name="title">([^<]*)<\/ac:parameter>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, title, content) => {
337
- const cleanContent = htmlToMarkdown(content);
338
- return `\n> **${title}**\n>\n${cleanContent.split('\n').map(line => line ? `> ${line}` : '>').join('\n')}\n`;
339
- });
340
-
341
- markdown = markdown.replace(/<ac:structured-macro ac:name="include"[^>]*>[\s\S]*?<ac:parameter ac:name="">[\s\S]*?<ac:link>[\s\S]*?<ri:page\s+ri:space-key="([^"]+)"\s+ri:content-title="([^"]+)"[^>]*\/>[\s\S]*?<\/ac:link>[\s\S]*?<\/ac:parameter>[\s\S]*?<\/ac:structured-macro>/g, (_, spaceKey, title) => {
342
- if (spaceKey.startsWith('~')) {
343
- const spacePath = `display/${spaceKey}/${encodeURIComponent(title)}`;
344
- return `\n> 📄 **${labels.includePage}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/${spacePath}`)})\n`;
345
- } else {
346
- return `\n> 📄 **${labels.includePage}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/spaces/${spaceKey}/pages/[PAGE_ID_HERE]`)}) _(manual link correction required)_\n`;
347
- }
348
- });
349
-
350
- markdown = markdown.replace(/<ac:structured-macro ac:name="(shared-block|include-shared-block)"[^>]*>[\s\S]*?<ac:parameter ac:name="shared-block-key">([^<]*)<\/ac:parameter>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, macroType, blockKey, content) => {
351
- const cleanContent = htmlToMarkdown(content);
352
- return `\n> **${labels.sharedBlock}: ${blockKey}**\n>\n${cleanContent.split('\n').map(line => line ? `> ${line}` : '>').join('\n')}\n`;
353
- });
354
-
355
- markdown = markdown.replace(/<ac:structured-macro ac:name="include-shared-block"[^>]*>[\s\S]*?<ac:parameter ac:name="shared-block-key">([^<]*)<\/ac:parameter>[\s\S]*?<ac:parameter ac:name="page">[\s\S]*?<ac:link>[\s\S]*?<ri:page\s+ri:space-key="([^"]+)"\s+ri:content-title="([^"]+)"[^>]*\/>[\s\S]*?<\/ac:link>[\s\S]*?<\/ac:parameter>[\s\S]*?<\/ac:structured-macro>/g, (_, blockKey, spaceKey, pageTitle) => {
356
- return `\n> 📄 **${labels.includeSharedBlock}**: ${blockKey} (${labels.fromPage}: ${pageTitle} [link needs manual correction])\n`;
254
+ const labels = this.detectLanguageLabels(storage);
255
+ const walker = new StorageWalker({
256
+ attachmentsDir,
257
+ labels,
258
+ buildUrl: this.buildUrl,
259
+ webUrlPrefix: this.webUrlPrefix,
357
260
  });
358
-
359
- markdown = markdown.replace(/<ac:structured-macro ac:name="view-file"[^>]*>[\s\S]*?<ac:parameter ac:name="name">[\s\S]*?<ri:attachment\s+ri:filename="([^"]+)"[^>]*\/>[\s\S]*?<\/ac:parameter>[\s\S]*?<\/ac:structured-macro>/g, (_, filename) => {
360
- return `\n📎 [${filename}](${attachmentsDir}/${filename})\n`;
361
- });
362
-
363
- markdown = markdown.replace(/<ac:structured-macro ac:name="view-file"[^>]*>[\s\S]*?<ac:parameter ac:name="name">[\s\S]*?<ri:attachment\s+ri:filename="([^"]+)"[^>]*\/>[\s\S]*?<\/ac:parameter>[\s\S]*?<ac:parameter ac:name="height">([^<]*)<\/ac:parameter>[\s\S]*?<\/ac:structured-macro>/g, (_, filename, _height) => {
364
- return `\n📎 [${filename}](${attachmentsDir}/${filename})\n`;
365
- });
366
-
367
- markdown = markdown.replace(/<ac:layout>/g, '');
368
- markdown = markdown.replace(/<\/ac:layout>/g, '');
369
- markdown = markdown.replace(/<ac:layout-section[^>]*>/g, '');
370
- markdown = markdown.replace(/<\/ac:layout-section>/g, '');
371
- markdown = markdown.replace(/<ac:layout-cell[^>]*>/g, '');
372
- markdown = markdown.replace(/<\/ac:layout-cell>/g, '');
373
-
374
- markdown = markdown.replace(/<ac:structured-macro[^>]*>[\s\S]*?<\/ac:structured-macro>/g, '');
375
-
376
- // ac:link with ac:anchor → [text](#id) (round-trip with markdownToStorage).
377
- // Must run before the <ac:link[^>]*>…</ac:link> catch-all below, which
378
- // would otherwise drop the anchor link entirely.
379
- markdown = markdown.replace(
380
- /<ac:link ac:anchor="([^"]*)">\s*<ac:plain-text-link-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-link-body>\s*<\/ac:link>/g,
381
- '[$2](#$1)'
382
- );
383
-
384
- markdown = markdown.replace(/<ac:link><ri:url ri:value="([^"]*)" \/><ac:plain-text-link-body><!\[CDATA\[([^\]]*)\]\]><\/ac:plain-text-link-body><\/ac:link>/g, '[$2]($1)');
385
-
386
- markdown = markdown.replace(/<ac:link>\s*<ri:page[^>]*ri:content-title="([^"]*)"[^>]*\/>\s*<\/ac:link>/g, '[$1]');
387
- markdown = markdown.replace(/<ac:link>\s*<ri:page[^>]*ri:content-title="([^"]*)"[^>]*>\s*<\/ri:page>\s*<\/ac:link>/g, '[$1]');
388
-
389
- markdown = markdown.replace(/<ac:link[^>]*>[\s\S]*?<ac:link-body>([\s\S]*?)<\/ac:link-body>[\s\S]*?<\/ac:link>/g, '$1');
390
-
391
- markdown = markdown.replace(/<ac:link[^>]*>[\s\S]*?<\/ac:link>/g, '');
392
-
393
- markdown = htmlToMarkdown(markdown);
394
-
395
- return markdown;
261
+ return walker.walk(storage);
396
262
  }
397
263
  }
398
264
 
@@ -0,0 +1,489 @@
1
+ const { parseDocument } = require('htmlparser2');
2
+ const { decodeHTML } = require('entities');
3
+
4
+ const DEFAULT_MAX_DEPTH = 256;
5
+
6
+ // Decode HTML entity references, matching the original htmlToMarkdown
7
+ // bit-for-bit: nbsp / ldquo / rdquo / lsquo / rsquo / hellip → ASCII,
8
+ // other named entities (eacute, mdash, copy, …) → Unicode via the
9
+ // entities lib, numeric refs → codepoints. Only `&…;` sequences are
10
+ // touched — literal Unicode characters already in the text pass through
11
+ // unchanged.
12
+ const ENTITY_ASCII_MAP = {
13
+ nbsp: ' ',
14
+ ldquo: '"',
15
+ rdquo: '"',
16
+ lsquo: '\'',
17
+ rsquo: '\'',
18
+ hellip: '...',
19
+ };
20
+
21
+ function decodeEntities(text) {
22
+ if (!text) return '';
23
+ return text.replace(/&(#x[0-9a-fA-F]+|#\d+|[a-zA-Z][a-zA-Z0-9]*);/g, (match, body) => {
24
+ if (body[0] === '#') {
25
+ const code = body[1] === 'x' || body[1] === 'X'
26
+ ? parseInt(body.slice(2), 16)
27
+ : parseInt(body.slice(1), 10);
28
+ if (!Number.isFinite(code)) return match;
29
+ try {
30
+ return String.fromCodePoint(code);
31
+ } catch (_) {
32
+ return match;
33
+ }
34
+ }
35
+ if (Object.prototype.hasOwnProperty.call(ENTITY_ASCII_MAP, body)) {
36
+ return ENTITY_ASCII_MAP[body];
37
+ }
38
+ return decodeHTML(`&${body};`);
39
+ });
40
+ }
41
+
42
+ class StorageDepthExceededError extends Error {
43
+ constructor(maxDepth) {
44
+ super(`Storage XML nesting exceeds limit of ${maxDepth} levels`);
45
+ this.name = 'StorageDepthExceededError';
46
+ this.maxDepth = maxDepth;
47
+ }
48
+ }
49
+
50
+ class StorageWalker {
51
+ constructor({
52
+ attachmentsDir = 'attachments',
53
+ labels = {},
54
+ buildUrl = (u) => u,
55
+ webUrlPrefix = '',
56
+ maxDepth = DEFAULT_MAX_DEPTH,
57
+ } = {}) {
58
+ this.attachmentsDir = attachmentsDir;
59
+ this.labels = labels;
60
+ this.buildUrl = buildUrl;
61
+ this.webUrlPrefix = webUrlPrefix;
62
+ this.maxDepth = maxDepth;
63
+ }
64
+
65
+ walk(storage) {
66
+ this._depth = 0;
67
+ const dom = parseDocument(storage, {
68
+ xmlMode: true,
69
+ recognizeSelfClosing: true,
70
+ decodeEntities: true,
71
+ });
72
+ return this.cleanup(this.walkNodes(dom.children));
73
+ }
74
+
75
+ walkNodes(nodes) {
76
+ if (!nodes) return '';
77
+ return nodes.map((n) => this.walkNode(n)).join('');
78
+ }
79
+
80
+ walkNode(node) {
81
+ if (!node) return '';
82
+ switch (node.type) {
83
+ case 'text':
84
+ // htmlparser2 in xmlMode only decodes the five XML entities (&amp;
85
+ // &lt; &gt; &quot; &apos;). Confluence storage prose still ships HTML
86
+ // named entities like &nbsp;, &eacute;, &ndash;, so decode them here
87
+ // before they reach markdown output.
88
+ return decodeEntities(node.data || '');
89
+ case 'cdata':
90
+ return this.walkNodes(node.children);
91
+ case 'comment':
92
+ case 'directive':
93
+ return '';
94
+ case 'tag':
95
+ case 'script':
96
+ case 'style':
97
+ return this.walkElement(node);
98
+ default:
99
+ return '';
100
+ }
101
+ }
102
+
103
+ walkElement(node) {
104
+ if (++this._depth > this.maxDepth) {
105
+ this._depth--;
106
+ throw new StorageDepthExceededError(this.maxDepth);
107
+ }
108
+ try {
109
+ return this._dispatchElement(node);
110
+ } finally {
111
+ this._depth--;
112
+ }
113
+ }
114
+
115
+ _dispatchElement(node) {
116
+ const tag = node.name;
117
+ switch (tag) {
118
+ case 'p':
119
+ return '\n' + this.walkNodes(node.children).trim() + '\n';
120
+ case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': {
121
+ const level = parseInt(tag.charAt(1), 10);
122
+ return '\n' + '#'.repeat(level) + ' ' + this.walkNodes(node.children).trim() + '\n';
123
+ }
124
+ case 'strong': case 'b':
125
+ return '**' + this.walkNodes(node.children) + '**';
126
+ case 'em': case 'i':
127
+ return '*' + this.walkNodes(node.children) + '*';
128
+ case 'code':
129
+ return '`' + this.walkNodes(node.children) + '`';
130
+ case 'br':
131
+ return '\n';
132
+ case 'hr':
133
+ return '\n---\n';
134
+ case 'a': {
135
+ const href = decodeEntities((node.attribs && node.attribs.href) || '');
136
+ const inner = this.walkNodes(node.children);
137
+ if (!href) return inner;
138
+ return `[${inner}](${href})`;
139
+ }
140
+ case 'time':
141
+ return decodeEntities((node.attribs && node.attribs.datetime) || '') || this.walkNodes(node.children);
142
+ case 'ul':
143
+ return this.handleList(node, false);
144
+ case 'ol':
145
+ return this.handleList(node, true);
146
+ case 'li':
147
+ return this.walkNodes(node.children);
148
+ case 'table':
149
+ return this.handleTable(node);
150
+ case 'thead': case 'tbody': case 'tfoot': case 'tr': case 'th': case 'td':
151
+ return this.walkNodes(node.children);
152
+ case 'blockquote':
153
+ return this.handleBlockquote(node);
154
+ case 'details': case 'summary':
155
+ return `<${tag}>` + this.walkNodes(node.children) + `</${tag}>`;
156
+ case 'ac:structured-macro':
157
+ return this.handleMacro(node);
158
+ case 'ac:image':
159
+ return this.handleImage(node);
160
+ case 'ac:link':
161
+ return this.handleAcLink(node);
162
+ case 'ac:task-list':
163
+ return this.handleTaskList(node);
164
+ case 'ac:layout': case 'ac:layout-section': case 'ac:layout-cell':
165
+ case 'ac:rich-text-body': case 'ac:link-body':
166
+ return this.walkNodes(node.children);
167
+ case 'ri:url': case 'ri:page': case 'ri:attachment':
168
+ case 'ac:plain-text-body': case 'ac:plain-text-link-body':
169
+ case 'ac:parameter':
170
+ return '';
171
+ default:
172
+ return this.walkNodes(node.children);
173
+ }
174
+ }
175
+
176
+ handleList(node, ordered) {
177
+ const items = (node.children || []).filter((c) => c.type === 'tag' && c.name === 'li');
178
+ let counter = 1;
179
+ let out = '';
180
+ for (const item of items) {
181
+ const text = this.walkNodes(item.children).replace(/\s+/g, ' ').trim();
182
+ if (!text) continue;
183
+ const marker = ordered ? `${counter++}.` : '-';
184
+ out += `${marker} ${text}\n`;
185
+ }
186
+ return out ? '\n' + out : '';
187
+ }
188
+
189
+ handleTable(node) {
190
+ const rows = [];
191
+ const trs = this.findAllDescendants(node, 'tr');
192
+ let isHeader = true;
193
+ for (const tr of trs) {
194
+ const cells = (tr.children || []).filter((c) => c.type === 'tag' && (c.name === 'th' || c.name === 'td'));
195
+ if (cells.length === 0) continue;
196
+ const cellTexts = cells.map((cell) =>
197
+ this.walkNodes(cell.children).replace(/\s+/g, ' ').trim() || ' '
198
+ );
199
+ rows.push('| ' + cellTexts.join(' | ') + ' |');
200
+ if (isHeader) {
201
+ rows.push('| ' + cellTexts.map(() => '---').join(' | ') + ' |');
202
+ isHeader = false;
203
+ }
204
+ }
205
+ return rows.length > 0 ? '\n' + rows.join('\n') + '\n' : '';
206
+ }
207
+
208
+ handleBlockquote(node) {
209
+ const inner = this.walkNodes(node.children).trim();
210
+ if (!inner) return '';
211
+ const quoted = inner
212
+ .split('\n')
213
+ .map((line) => (line.length === 0 ? '>' : `> ${line}`))
214
+ .join('\n');
215
+ return '\n' + quoted + '\n';
216
+ }
217
+
218
+ handleMacro(node) {
219
+ const name = node.attribs && node.attribs['ac:name'];
220
+ switch (name) {
221
+ case 'toc':
222
+ case 'floatmenu':
223
+ return '';
224
+ case 'expand':
225
+ return this.handleExpand(node);
226
+ case 'code':
227
+ return this.handleCode(node);
228
+ case 'info': case 'warning': case 'note':
229
+ return this.handleCallout(node, name);
230
+ case 'anchor':
231
+ return this.handleAnchor(node);
232
+ case 'panel':
233
+ return this.handlePanel(node);
234
+ case 'mermaid-macro':
235
+ return this.handleMermaid(node);
236
+ case 'include':
237
+ return this.handleInclude(node);
238
+ case 'shared-block':
239
+ case 'include-shared-block':
240
+ return this.handleSharedBlock(node, name);
241
+ case 'view-file':
242
+ return this.handleViewFile(node);
243
+ default:
244
+ return '';
245
+ }
246
+ }
247
+
248
+ handleExpand(node) {
249
+ const titleParam = this.findParamByName(node, 'title');
250
+ const body = this.getMacroBody(node);
251
+ if (titleParam) {
252
+ const title = this.getTextContent(titleParam);
253
+ return `\n**EXPAND: ${title}**\n\n${this.walkNodes(body).trim()}\n\n**EXPAND_END**\n`;
254
+ }
255
+ return `\n<details>\n<summary>${this.labels.expandDetails || 'Expand Details'}</summary>\n\n${this.walkNodes(body).trim()}\n\n</details>\n`;
256
+ }
257
+
258
+ handleCode(node) {
259
+ const langParam = this.findParamByName(node, 'language');
260
+ const lang = langParam ? this.getTextContent(langParam) : '';
261
+ const plainBody = this.findChildByName(node, 'ac:plain-text-body');
262
+ const code = plainBody ? this.getRawText(plainBody) : '';
263
+ return `\n\`\`\`${lang}\n${code}\n\`\`\`\n`;
264
+ }
265
+
266
+ handleCallout(node, marker) {
267
+ const body = this.getMacroBody(node);
268
+ const inner = this.walkNodes(body).trim();
269
+ const quoted = inner
270
+ .split('\n')
271
+ .map((line) => (line.length === 0 ? '>' : `> ${line}`))
272
+ .join('\n');
273
+ const header = `> **${marker.toUpperCase()}**`;
274
+ const wrapped = inner.length === 0 ? header : `${header}\n${quoted}`;
275
+ return `\n${wrapped}\n`;
276
+ }
277
+
278
+ handleAnchor(node) {
279
+ const param = this.findParamByName(node, '');
280
+ const id = param ? this.getTextContent(param) : '';
281
+ return `\n**ANCHOR: ${id}**\n`;
282
+ }
283
+
284
+ handlePanel(node) {
285
+ const titleParam = this.findParamByName(node, 'title');
286
+ const title = titleParam ? this.getTextContent(titleParam) : '';
287
+ const body = this.getMacroBody(node);
288
+ // Trim before quoting — walkNodes wraps every <p> with a leading and
289
+ // trailing \n, so untrimmed body splits into ['', 'body', ''] and emits
290
+ // extra `>` blank lines that bracket the real content.
291
+ const cleanContent = this.walkNodes(body).trim();
292
+ const quoted = cleanContent.split('\n').map((line) => (line ? `> ${line}` : '>')).join('\n');
293
+ return `\n> **${title}**\n>\n${quoted}\n`;
294
+ }
295
+
296
+ handleMermaid(node) {
297
+ const plainBody = this.findChildByName(node, 'ac:plain-text-body');
298
+ const code = plainBody ? this.getRawText(plainBody).trim() : '';
299
+ return `\n\`\`\`mermaid\n${code}\n\`\`\`\n`;
300
+ }
301
+
302
+ handleInclude(node) {
303
+ const param = this.findParamByName(node, '');
304
+ if (!param) return '';
305
+ const acLink = this.findChildByName(param, 'ac:link');
306
+ if (!acLink) return '';
307
+ const riPage = this.findChildByName(acLink, 'ri:page');
308
+ if (!riPage) return '';
309
+ const spaceKey = decodeEntities(riPage.attribs['ri:space-key'] || '');
310
+ const title = decodeEntities(riPage.attribs['ri:content-title'] || '');
311
+ const label = this.labels.includePage || 'Include Page';
312
+ if (spaceKey.startsWith('~')) {
313
+ const spacePath = `display/${spaceKey}/${encodeURIComponent(title)}`;
314
+ return `\n> 📄 **${label}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/${spacePath}`)})\n`;
315
+ }
316
+ return `\n> 📄 **${label}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/spaces/${spaceKey}/pages/[PAGE_ID_HERE]`)}) _(manual link correction required)_\n`;
317
+ }
318
+
319
+ handleSharedBlock(node, type) {
320
+ const blockKeyParam = this.findParamByName(node, 'shared-block-key');
321
+ const blockKey = blockKeyParam ? this.getTextContent(blockKeyParam) : '';
322
+ const pageParam = this.findParamByName(node, 'page');
323
+ if (pageParam && type === 'include-shared-block') {
324
+ const acLink = this.findChildByName(pageParam, 'ac:link');
325
+ if (acLink) {
326
+ const riPage = this.findChildByName(acLink, 'ri:page');
327
+ if (riPage) {
328
+ const pageTitle = decodeEntities(riPage.attribs['ri:content-title'] || '');
329
+ const includeLabel = this.labels.includeSharedBlock || 'Include Shared Block';
330
+ const fromPageLabel = this.labels.fromPage || 'from page';
331
+ return `\n> 📄 **${includeLabel}**: ${blockKey} (${fromPageLabel}: ${pageTitle} [link needs manual correction])\n`;
332
+ }
333
+ }
334
+ }
335
+ const body = this.getMacroBody(node);
336
+ // See handlePanel — trim to avoid bracketing `>` blank lines.
337
+ const cleanContent = this.walkNodes(body).trim();
338
+ const sharedLabel = this.labels.sharedBlock || 'Shared Block';
339
+ const quoted = cleanContent.split('\n').map((line) => (line ? `> ${line}` : '>')).join('\n');
340
+ return `\n> **${sharedLabel}: ${blockKey}**\n>\n${quoted}\n`;
341
+ }
342
+
343
+ handleViewFile(node) {
344
+ const nameParam = this.findParamByName(node, 'name');
345
+ if (!nameParam) return '';
346
+ const riAttachment = this.findChildByName(nameParam, 'ri:attachment');
347
+ if (!riAttachment) return '';
348
+ const filename = decodeEntities(riAttachment.attribs['ri:filename'] || '');
349
+ return `\n📎 [${filename}](${this.attachmentsDir}/${filename})\n`;
350
+ }
351
+
352
+ handleImage(node) {
353
+ const riAttachment = this.findChildByName(node, 'ri:attachment');
354
+ if (!riAttachment) return '';
355
+ const filename = decodeEntities(riAttachment.attribs['ri:filename'] || '');
356
+ return `![${filename}](${this.attachmentsDir}/${filename})`;
357
+ }
358
+
359
+ handleAcLink(node) {
360
+ const attribs = node.attribs || {};
361
+ // ac:anchor and ri:url branches both require an explicit link body —
362
+ // without one the original regex pipeline dropped the link entirely
363
+ // rather than emitting a visibly-empty `[](url)` marker. Match that.
364
+ if (attribs['ac:anchor']) {
365
+ const linkBody = this.findChildByName(node, 'ac:plain-text-link-body');
366
+ const text = linkBody ? this.getRawText(linkBody) : '';
367
+ if (!text) return '';
368
+ return `[${text}](#${decodeEntities(attribs['ac:anchor'])})`;
369
+ }
370
+ const riUrl = this.findChildByName(node, 'ri:url');
371
+ if (riUrl) {
372
+ const url = decodeEntities(riUrl.attribs['ri:value'] || '');
373
+ const linkBody = this.findChildByName(node, 'ac:plain-text-link-body');
374
+ const text = linkBody ? this.getRawText(linkBody) : '';
375
+ if (!text) return '';
376
+ return `[${text}](${url})`;
377
+ }
378
+ const linkBody = this.findChildByName(node, 'ac:link-body');
379
+ if (linkBody) {
380
+ return this.walkNodes(linkBody.children).trim();
381
+ }
382
+ const riPage = this.findChildByName(node, 'ri:page');
383
+ if (riPage) {
384
+ const title = decodeEntities(riPage.attribs['ri:content-title'] || '');
385
+ return `[${title}]`;
386
+ }
387
+ return '';
388
+ }
389
+
390
+ handleTaskList(node) {
391
+ const tasks = (node.children || []).filter((c) => c.type === 'tag' && c.name === 'ac:task');
392
+ const lines = [];
393
+ for (const task of tasks) {
394
+ const status = this.findChildByName(task, 'ac:task-status');
395
+ const body = this.findChildByName(task, 'ac:task-body');
396
+ const statusText = status ? this.getTextContent(status) : '';
397
+ const bodyText = body
398
+ ? this.walkNodes(body.children).replace(/\s+/g, ' ').trim()
399
+ : '';
400
+ const checkbox = statusText === 'complete' ? '[x]' : '[ ]';
401
+ if (bodyText) lines.push(`- ${checkbox} ${bodyText}`);
402
+ }
403
+ return lines.length > 0 ? '\n' + lines.join('\n') + '\n' : '';
404
+ }
405
+
406
+ findParamByName(node, name) {
407
+ if (!node || !node.children) return null;
408
+ for (const child of node.children) {
409
+ if (child.type === 'tag' && child.name === 'ac:parameter' && child.attribs['ac:name'] === name) {
410
+ return child;
411
+ }
412
+ }
413
+ return null;
414
+ }
415
+
416
+ findChildByName(node, name) {
417
+ if (!node || !node.children) return null;
418
+ for (const child of node.children) {
419
+ if (child.type === 'tag' && child.name === name) return child;
420
+ }
421
+ return null;
422
+ }
423
+
424
+ findAllDescendants(node, name) {
425
+ const result = [];
426
+ const visit = (n) => {
427
+ if (!n) return;
428
+ if (n.type === 'tag' && n.name === name) result.push(n);
429
+ if (n.children) n.children.forEach(visit);
430
+ };
431
+ if (node.children) node.children.forEach(visit);
432
+ return result;
433
+ }
434
+
435
+ getMacroBody(node) {
436
+ const body = this.findChildByName(node, 'ac:rich-text-body');
437
+ return body ? body.children : [];
438
+ }
439
+
440
+ getTextContent(node) {
441
+ return decodeEntities(this._collectText(node));
442
+ }
443
+
444
+ _collectText(node) {
445
+ if (!node) return '';
446
+ if (node.type === 'text') return node.data || '';
447
+ if (node.children) return node.children.map((c) => this._collectText(c)).join('');
448
+ return '';
449
+ }
450
+
451
+ getRawText(node) {
452
+ // Apply entity decoding once at the top level. Internal recursion uses
453
+ // the raw helper so nested CDATA segments are not double-decoded.
454
+ return decodeEntities(this._collectRawText(node));
455
+ }
456
+
457
+ _collectRawText(node) {
458
+ if (!node || !node.children) return '';
459
+ let out = '';
460
+ for (const child of node.children) {
461
+ if (child.type === 'text') out += child.data || '';
462
+ else if (child.type === 'cdata') out += this._collectRawText(child);
463
+ }
464
+ return out;
465
+ }
466
+
467
+ cleanup(text) {
468
+ // Split on fenced code boundaries so cleanup rules (indent stripping,
469
+ // multi-space collapsing) don't mangle indentation-sensitive code.
470
+ // Walker only emits triple-backtick fences (see code/mermaid macros).
471
+ const segments = text.split(/(```[\s\S]*?```)/g);
472
+ const cleaned = segments
473
+ .map((seg, i) => (i % 2 === 1 ? seg : this._cleanupOutsideFence(seg)))
474
+ .join('');
475
+ return cleaned.trim();
476
+ }
477
+
478
+ _cleanupOutsideFence(text) {
479
+ let out = text;
480
+ out = out.replace(/[ \t]+$/gm, '');
481
+ out = out.replace(/^[ \t]+(?!([`>]|[*+-] |\d+[.)] ))/gm, '');
482
+ out = out.replace(/^(#{1,6}[^\n]+)\n(?!\n)/gm, '$1\n\n');
483
+ out = out.replace(/\n\s*\n\s*\n+/g, '\n\n');
484
+ out = out.replace(/[ \t]+/g, ' ');
485
+ return out;
486
+ }
487
+ }
488
+
489
+ module.exports = { StorageWalker, StorageDepthExceededError, DEFAULT_MAX_DEPTH };
@@ -1,19 +1,21 @@
1
1
  {
2
2
  "name": "confluence-cli",
3
- "version": "2.1.2",
3
+ "version": "2.1.4",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "confluence-cli",
9
- "version": "2.1.2",
9
+ "version": "2.1.4",
10
10
  "license": "MIT",
11
11
  "dependencies": {
12
12
  "axios": "^1.15.0",
13
13
  "chalk": "^4.1.2",
14
14
  "commander": "^11.1.0",
15
+ "entities": "^4.5.0",
15
16
  "form-data": "^4.0.5",
16
17
  "html-to-text": "^9.0.5",
18
+ "htmlparser2": "^9.1.0",
17
19
  "inquirer": "^8.2.6",
18
20
  "markdown-it": "^14.1.0"
19
21
  },
@@ -652,7 +654,7 @@
652
654
  "node": ">=14"
653
655
  }
654
656
  },
655
- "node_modules/htmlparser2": {
657
+ "node_modules/html-to-text/node_modules/htmlparser2": {
656
658
  "version": "8.0.2",
657
659
  "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz",
658
660
  "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==",
@@ -671,6 +673,25 @@
671
673
  "entities": "^4.4.0"
672
674
  }
673
675
  },
676
+ "node_modules/htmlparser2": {
677
+ "version": "9.1.0",
678
+ "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.1.0.tgz",
679
+ "integrity": "sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==",
680
+ "funding": [
681
+ "https://github.com/fb55/htmlparser2?sponsor=1",
682
+ {
683
+ "type": "github",
684
+ "url": "https://github.com/sponsors/fb55"
685
+ }
686
+ ],
687
+ "license": "MIT",
688
+ "dependencies": {
689
+ "domelementtype": "^2.3.0",
690
+ "domhandler": "^5.0.3",
691
+ "domutils": "^3.1.0",
692
+ "entities": "^4.5.0"
693
+ }
694
+ },
674
695
  "node_modules/iconv-lite": {
675
696
  "version": "0.6.3",
676
697
  "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "confluence-cli",
3
- "version": "2.1.2",
3
+ "version": "2.1.4",
4
4
  "description": "A command-line interface for Atlassian Confluence with page creation and editing capabilities",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -26,8 +26,10 @@
26
26
  "axios": "^1.15.0",
27
27
  "chalk": "^4.1.2",
28
28
  "commander": "^11.1.0",
29
+ "entities": "^4.5.0",
29
30
  "form-data": "^4.0.5",
30
31
  "html-to-text": "^9.0.5",
32
+ "htmlparser2": "^9.1.0",
31
33
  "inquirer": "^8.2.6",
32
34
  "markdown-it": "^14.1.0"
33
35
  },