confluence-cli 2.1.8 → 2.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ // HTML → Confluence storage walker. Parses with htmlparser2 using
2
+ // `decodeEntities: false` so attribute and text entities round-trip
3
+ // byte-identical. Dispatches by tag; unhandled tags pass through with
4
+ // attributes preserved.
5
+
6
+ const { parseDocument } = require('htmlparser2');
7
+
8
+ const VALID_LINK_STYLES = ['smart', 'plain', 'wiki'];
9
+ // Hard cap on input HTML nesting to keep the recursive walker off the JS
10
+ // stack ceiling for pathological / malicious input.
11
+ const DEFAULT_MAX_DEPTH = 256;
12
+
13
+ class HtmlDepthExceededError extends Error {
14
+ constructor(maxDepth) {
15
+ super(`HTML nesting exceeds limit of ${maxDepth} levels`);
16
+ this.name = 'HtmlDepthExceededError';
17
+ this.maxDepth = maxDepth;
18
+ }
19
+ }
20
+ // Only `hr` is normalized to self-closing. `br` / `img` flow through in
21
+ // whatever shape the source had (markdown-it emits them without a slash).
22
+ const VOID_TAGS = new Set(['hr']);
23
+ const CALLOUT_MARKERS = ['info', 'warning', 'note'];
24
+
25
+ // Phrasing-content tags that trigger the `<li>` / `<th>` / `<td>` `<p>`-wrap
26
+ // quirk: if an item contains only inline children and no text-node newline,
27
+ // the walker wraps its content in `<p>`. markdown-it never emits the latter
28
+ // half of this set, but raw HTML input does, so they need the same treatment.
29
+ const INLINE_TAGS = new Set([
30
+ 'a', 'strong', 'em', 'code', 'br', 'img', 'span',
31
+ 'mark', 'sub', 'sup', 'ins', 'del', 'b', 'i', 'u', 'small', 's',
32
+ 'abbr', 'kbd', 'q', 'var', 'cite', 'time', 'dfn', 'samp',
33
+ ]);
34
+
35
+ function shouldWrapInP(node) {
36
+ if (!node.children) return true;
37
+ for (const child of node.children) {
38
+ if (child.type === 'text' && child.data.includes('\n')) return false;
39
+ if (child.type === 'tag' && !INLINE_TAGS.has(child.name)) return false;
40
+ }
41
+ return true;
42
+ }
43
+
44
+ function isWhitespaceOnly(node) {
45
+ return node.type === 'text' && /^\s*$/.test(node.data);
46
+ }
47
+
48
+ // Filter out whitespace-only text nodes so structural shape checks (single
49
+ // `<strong>` inside a paragraph, etc.) tolerate parser variations that emit
50
+ // trailing/leading whitespace text siblings.
51
+ function meaningfulChildren(node) {
52
+ return (node.children || []).filter((c) => !isWhitespaceOnly(c));
53
+ }
54
+
55
+ // Detects `<p><strong>TOC</strong></p>` and `<p><strong>ANCHOR: id</strong></p>`
56
+ // macro markers. The strict "p > one strong > one text" shape is intentional —
57
+ // any embellishment must fall through to a plain paragraph.
58
+ function detectParagraphMarker(node) {
59
+ if (node.name !== 'p') return null;
60
+ const kids = meaningfulChildren(node);
61
+ if (kids.length !== 1) return null;
62
+ const strong = kids[0];
63
+ if (strong.type !== 'tag' || strong.name !== 'strong') return null;
64
+ const strongKids = meaningfulChildren(strong);
65
+ if (strongKids.length !== 1) return null;
66
+ const text = strongKids[0];
67
+ if (text.type !== 'text') return null;
68
+ if (text.data === 'TOC') return { kind: 'toc' };
69
+ const anchor = text.data.match(/^ANCHOR: (.+)$/);
70
+ if (anchor) return { kind: 'anchor', id: anchor[1] };
71
+ return null;
72
+ }
73
+
74
+ // EXPAND open `<p><strong>EXPAND: …</strong></p>`. The title may contain
75
+ // nested inline HTML (em, code, a, s, …) which gets stripped later — so we
76
+ // only require that the strong's first text child starts with `EXPAND: `,
77
+ // not that it's the only child.
78
+ function isExpandOpen(node) {
79
+ if (node.type !== 'tag' || node.name !== 'p') return false;
80
+ const kids = meaningfulChildren(node);
81
+ if (kids.length !== 1) return false;
82
+ const strong = kids[0];
83
+ if (strong.type !== 'tag' || strong.name !== 'strong') return false;
84
+ if (!strong.children || strong.children.length === 0) return false;
85
+ const first = strong.children[0];
86
+ return first.type === 'text' && first.data.startsWith('EXPAND: ');
87
+ }
88
+
89
+ function isExpandClose(node) {
90
+ if (node.type !== 'tag' || node.name !== 'p') return false;
91
+ const kids = meaningfulChildren(node);
92
+ if (kids.length !== 1) return false;
93
+ const strong = kids[0];
94
+ if (strong.type !== 'tag' || strong.name !== 'strong') return false;
95
+ const strongKids = meaningfulChildren(strong);
96
+ if (strongKids.length !== 1) return false;
97
+ const text = strongKids[0];
98
+ return text.type === 'text' && text.data === 'EXPAND_END';
99
+ }
100
+
101
+ // Replacement order matters for doubly-escaped input: the default order
102
+ // replaces the ampersand entity first, which over-decodes (the escaped
103
+ // form of an `<`-entity collapses all the way to `<`). `preserveDouble`
104
+ // reverses the order so the same input round-trips as `&lt;` instead.
105
+ // Both orderings are intentional — call sites pick via the option.
106
+ function decodeEntities(text, { preserveDouble = false } = {}) {
107
+ if (preserveDouble) {
108
+ // Apostrophe (`&#39;`) intentionally omitted from this branch: the
109
+ // previous code-fence decoder didn't list it either, only the anchor
110
+ // body decoder did. The asymmetry is preserved for byte parity.
111
+ return text
112
+ .replace(/&quot;/g, '"')
113
+ .replace(/&lt;/g, '<')
114
+ .replace(/&gt;/g, '>')
115
+ .replace(/&amp;/g, '&');
116
+ }
117
+ return text
118
+ .replace(/&amp;/g, '&')
119
+ .replace(/&lt;/g, '<')
120
+ .replace(/&gt;/g, '>')
121
+ .replace(/&quot;/g, '"')
122
+ .replace(/&#39;/g, '\'');
123
+ }
124
+
125
+ // Anchor links (`href="#id"`) short-circuit linkStyle; external href
126
+ // branches on it.
127
+ function convertLink(node, ctx) {
128
+ const attribs = node.attribs || {};
129
+ const href = attribs.href || '';
130
+ const inner = walkChildren(node, ctx);
131
+
132
+ if (href.startsWith('#')) {
133
+ const anchor = href.slice(1);
134
+ const text = decodeEntities(inner);
135
+ return `<ac:link ac:anchor="${anchor}"><ac:plain-text-link-body><![CDATA[${text}]]></ac:plain-text-link-body></ac:link>`;
136
+ }
137
+
138
+ switch (ctx.linkStyle) {
139
+ case 'smart': {
140
+ // Spread order forces policy: any pre-existing `data-card-appearance`
141
+ // on the source `<a>` is overwritten — smart links must be inline.
142
+ const merged = { ...attribs, 'data-card-appearance': 'inline' };
143
+ return `<a${renderAttrs(merged)}>${inner}</a>`;
144
+ }
145
+ case 'wiki':
146
+ return `<ac:link><ri:url ri:value="${href}" /><ac:plain-text-link-body><![CDATA[${inner}]]></ac:plain-text-link-body></ac:link>`;
147
+ case 'plain':
148
+ default:
149
+ return `<a${renderAttrs(attribs)}>${inner}</a>`;
150
+ }
151
+ }
152
+
153
+ // `> **INFO|WARNING|NOTE**` blockquotes become callout macros; others pass
154
+ // through. Detection runs structurally on the DOM so false-positives for
155
+ // mid-paragraph `**INFO**` are ruled out — the strong must be the first
156
+ // meaningful child of the first paragraph.
157
+ //
158
+ // Two markdown-it shapes:
159
+ // separated: `<blockquote><p><strong>INFO</strong></p><p>body</p></blockquote>`
160
+ // same-line: `<blockquote><p><strong>INFO</strong>\nbody</p></blockquote>`
161
+ function detectBlockquoteCallout(node) {
162
+ const kids = meaningfulChildren(node);
163
+ if (kids.length === 0) return null;
164
+ const firstP = kids[0];
165
+ if (firstP.type !== 'tag' || firstP.name !== 'p') return null;
166
+ const pKids = firstP.children || [];
167
+ const firstIdx = pKids.findIndex((c) => !isWhitespaceOnly(c));
168
+ if (firstIdx < 0) return null;
169
+ const strong = pKids[firstIdx];
170
+ if (strong.type !== 'tag' || strong.name !== 'strong') return null;
171
+ const strongKids = meaningfulChildren(strong);
172
+ if (strongKids.length !== 1 || strongKids[0].type !== 'text') return null;
173
+ const marker = CALLOUT_MARKERS.find((m) => strongKids[0].data === m.toUpperCase());
174
+ if (!marker) return null;
175
+ const tail = pKids.slice(firstIdx + 1);
176
+ const tailHasContent = tail.some((c) => !isWhitespaceOnly(c));
177
+ if (tailHasContent) {
178
+ // Check tail[0] (not the first meaningful child): the body of
179
+ // `> **INFO**\n> body` must literally start with a newline. A leading
180
+ // whitespace-only text node without a newline (`<strong>INFO</strong>
181
+ // body`) is prose continuation, not callout — it must be rejected here
182
+ // even though `meaningfulChildren` would otherwise look past it.
183
+ if (tail[0].type !== 'text' || !/^\s*\n/.test(tail[0].data)) return null;
184
+ }
185
+ return { marker, sameLine: tailHasContent, markerP: firstP, tail };
186
+ }
187
+
188
+ function convertBlockquote(node, ctx) {
189
+ const detected = detectBlockquoteCallout(node);
190
+ if (!detected) {
191
+ return `<blockquote>${walkChildren(node, ctx)}</blockquote>`;
192
+ }
193
+ const { marker, sameLine, markerP, tail } = detected;
194
+ const blockquoteKids = node.children || [];
195
+ let body;
196
+ if (sameLine) {
197
+ // Same-line form: the strong's siblings inside the marker paragraph form
198
+ // the body of the first `<p>`. Strip the leading newline that markdown-it
199
+ // emits between strong and body text.
200
+ const firstPBody = tail.map((c) => walkNode(c, ctx)).join('').replace(/^\s*\n/, '');
201
+ const rest = blockquoteKids
202
+ .filter((c) => c !== markerP)
203
+ .map((c) => walkNode(c, ctx))
204
+ .join('');
205
+ body = `<p>${firstPBody}</p>${rest}`;
206
+ } else {
207
+ // Separated form: drop the marker paragraph entirely and walk the rest.
208
+ // Leading whitespace from the now-removed paragraph's neighbor text node
209
+ // is trimmed.
210
+ body = blockquoteKids
211
+ .filter((c) => c !== markerP)
212
+ .map((c) => walkNode(c, ctx))
213
+ .join('')
214
+ .replace(/^\s+/, '');
215
+ }
216
+ return `<ac:structured-macro ac:name="${marker}">
217
+ <ac:rich-text-body>${body}</ac:rich-text-body>
218
+ </ac:structured-macro>`;
219
+ }
220
+
221
+ // Strict `<pre><code>` adjacency only — `<pre>` with whitespace siblings or
222
+ // any other shape falls through as plain `<pre>`. The body needs manual
223
+ // entity decode because the parser keeps entities raw and CDATA is opaque
224
+ // downstream.
225
+ function convertCodeBlock(node, ctx) {
226
+ const children = node.children || [];
227
+ const isCodeBlock = children.length === 1 &&
228
+ children[0].type === 'tag' &&
229
+ children[0].name === 'code';
230
+ if (!isCodeBlock) {
231
+ return `<pre>${walkChildren(node, ctx)}</pre>`;
232
+ }
233
+ const codeNode = children[0];
234
+ const classAttr = codeNode.attribs.class || '';
235
+ const langMatch = classAttr.match(/language-(\w+)/);
236
+ const language = langMatch ? langMatch[1] : 'text';
237
+ let body = '';
238
+ for (const c of codeNode.children || []) {
239
+ if (c.type === 'text') body += c.data;
240
+ }
241
+ body = decodeEntities(body.replace(/\n$/, ''), { preserveDouble: true })
242
+ .replace(/]]>/g, ']]]]><![CDATA[>');
243
+ return `<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">${language}</ac:parameter><ac:plain-text-body><![CDATA[${body}]]></ac:plain-text-body></ac:structured-macro>`;
244
+ }
245
+
246
+ // Re-escape literal `"` inside attribute values. htmlparser2 with
247
+ // `decodeEntities: false` keeps source-escaped entities intact, but a
248
+ // single-quoted source attribute (`<a title='he said "hi"'>`) lands a
249
+ // literal `"` here that would close the emitted double-quoted slot and
250
+ // corrupt the XML. `&` is left as-is so already-escaped sources
251
+ // (`&amp;`, `&quot;`, …) round-trip cleanly.
252
+ //
253
+ // Trust boundary: input is assumed to be valid HTML. A valid HTML
254
+ // attribute value cannot contain raw `<` or `>` (they must be entities),
255
+ // so they're not escaped here. Malformed input that smuggles raw
256
+ // angle brackets through would produce malformed XML.
257
+ function escapeAttrValue(v) {
258
+ return String(v).replace(/"/g, '&quot;');
259
+ }
260
+
261
+ function renderAttrs(attribs) {
262
+ if (!attribs) return '';
263
+ return Object.keys(attribs)
264
+ .map((k) => ` ${k}="${escapeAttrValue(attribs[k])}"`)
265
+ .join('');
266
+ }
267
+
268
+ function walkChildren(node, ctx) {
269
+ if (!node.children) return '';
270
+ const children = node.children;
271
+ const out = [];
272
+ let i = 0;
273
+ while (i < children.length) {
274
+ const child = children[i];
275
+ // Sibling-level EXPAND span — collapse open/close pair into one macro
276
+ // with everything between as the body. Pairs the first EXPAND_END
277
+ // after this open: a nested EXPAND open/close pair inside the body
278
+ // would have its close consumed by the outer open, leaving the
279
+ // second close as an orphan paragraph. Same non-greedy behavior as
280
+ // the previous regex pipeline.
281
+ if (isExpandOpen(child)) {
282
+ const endIdx = children.findIndex((c, j) => j > i && isExpandClose(c));
283
+ if (endIdx !== -1) {
284
+ const titleStrong = child.children[0];
285
+ const titleHtml = walkChildren(titleStrong, ctx).replace(/^EXPAND: /, '');
286
+ // Confluence's `<ac:parameter>` normalizer is text-only (rejects `<s>`
287
+ // with HTTP 500, silently truncates at the first '<'). Strip literal
288
+ // tags; entities survive because the rule requires a literal '<'.
289
+ const cleanTitle = titleHtml.replace(/<[^>]+>/g, '').trim();
290
+ const bodyHtml = children
291
+ .slice(i + 1, endIdx)
292
+ .map((c) => walkNode(c, ctx))
293
+ .join('')
294
+ .trim();
295
+ out.push(`<ac:structured-macro ac:name="expand"><ac:parameter ac:name="title">${cleanTitle}</ac:parameter><ac:rich-text-body>${bodyHtml}</ac:rich-text-body></ac:structured-macro>`);
296
+ i = endIdx + 1;
297
+ continue;
298
+ }
299
+ }
300
+ out.push(walkNode(child, ctx));
301
+ i++;
302
+ }
303
+ return out.join('');
304
+ }
305
+
306
+ function walkNode(node, ctx) {
307
+ if (node.type === 'text') return node.data;
308
+ if (node.type !== 'tag') return '';
309
+ if (++ctx.depth > ctx.maxDepth) {
310
+ ctx.depth--;
311
+ throw new HtmlDepthExceededError(ctx.maxDepth);
312
+ }
313
+ try {
314
+ return dispatchTag(node, ctx);
315
+ } finally {
316
+ ctx.depth--;
317
+ }
318
+ }
319
+
320
+ function dispatchTag(node, ctx) {
321
+ switch (node.name) {
322
+ case 'p': {
323
+ const marker = detectParagraphMarker(node);
324
+ if (marker && marker.kind === 'toc') return '<ac:structured-macro ac:name="toc" />';
325
+ if (marker && marker.kind === 'anchor') {
326
+ return `<ac:structured-macro ac:name="anchor"><ac:parameter ac:name="">${marker.id}</ac:parameter></ac:structured-macro>`;
327
+ }
328
+ return `<p${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</p>`;
329
+ }
330
+ case 'h1':
331
+ case 'h2':
332
+ case 'h3':
333
+ case 'h4':
334
+ case 'h5':
335
+ case 'h6':
336
+ case 'strong':
337
+ case 'em':
338
+ return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
339
+ case 'hr':
340
+ return '<hr />';
341
+ case 'br':
342
+ return '<br>';
343
+ case 'img':
344
+ return `<img${renderAttrs(node.attribs)}>`;
345
+ case 'ul':
346
+ case 'ol':
347
+ return `<${node.name}>${walkChildren(node, ctx)}</${node.name}>`;
348
+ case 'li': {
349
+ const inner = walkChildren(node, ctx);
350
+ return shouldWrapInP(node) ? `<li><p>${inner}</p></li>` : `<li>${inner}</li>`;
351
+ }
352
+ case 'pre':
353
+ return convertCodeBlock(node, ctx);
354
+ case 'code':
355
+ // Inline only — `<code>` inside `<pre>` is consumed by convertCodeBlock.
356
+ return `<code${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</code>`;
357
+ case 'a':
358
+ return convertLink(node, ctx);
359
+ case 'blockquote':
360
+ return convertBlockquote(node, ctx);
361
+ case 'table':
362
+ case 'thead':
363
+ case 'tbody':
364
+ case 'tfoot':
365
+ case 'tr':
366
+ return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
367
+ case 'th':
368
+ case 'td': {
369
+ const inner = walkChildren(node, ctx);
370
+ const open = `<${node.name}${renderAttrs(node.attribs)}>`;
371
+ return shouldWrapInP(node) ? `${open}<p>${inner}</p></${node.name}>` : `${open}${inner}</${node.name}>`;
372
+ }
373
+ default:
374
+ if (VOID_TAGS.has(node.name)) {
375
+ return `<${node.name}${renderAttrs(node.attribs)} />`;
376
+ }
377
+ return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
378
+ }
379
+ }
380
+
381
+ function htmlToStorage(html, options = {}) {
382
+ const isCloud = !!options.isCloud;
383
+ const linkStyle = VALID_LINK_STYLES.includes(options.linkStyle)
384
+ ? options.linkStyle
385
+ : (isCloud ? 'smart' : 'wiki');
386
+ const ctx = {
387
+ linkStyle,
388
+ depth: 0,
389
+ maxDepth: typeof options.maxDepth === 'number' ? options.maxDepth : DEFAULT_MAX_DEPTH,
390
+ };
391
+ return walkChildren(parseDocument(html, { decodeEntities: false }), ctx);
392
+ }
393
+
394
+ module.exports = { htmlToStorage, HtmlDepthExceededError };
@@ -1,5 +1,6 @@
1
1
  const MarkdownIt = require('markdown-it');
2
2
  const { StorageWalker } = require('./storage-walker');
3
+ const { htmlToStorage } = require('./html-to-storage');
3
4
 
4
5
  const VALID_LINK_STYLES = ['smart', 'plain', 'wiki'];
5
6
  const CALLOUT_MARKERS = ['info', 'warning', 'note'];
@@ -39,9 +40,7 @@ class MacroConverter {
39
40
 
40
41
  // Anchor `[!info]` to the start of a line (string start or after a
41
42
  // newline) so prose mid-paragraph, headings on the same line, and
42
- // `> [!info]` GitHub-style alerts are left alone. The latter would
43
- // otherwise expand to a nested blockquote that the storage handler's
44
- // lazy regex cannot balance, producing malformed XML.
43
+ // `> [!info]` GitHub-style alerts are left alone.
45
44
  for (const m of CALLOUT_MARKERS) {
46
45
  const re = new RegExp(`(^|\\n)\\[!${m}\\]\\s*([\\s\\S]*?)(?=\\n\\s*\\n|\\n\\s*\\[!|$)`, 'g');
47
46
  state.src = state.src.replace(re, (_, pre, content) =>
@@ -68,129 +67,7 @@ class MacroConverter {
68
67
  }
69
68
 
70
69
  htmlToConfluenceStorage(html) {
71
- let storage = html;
72
-
73
- storage = storage.replace(/<h([1-6])>(.*?)<\/h[1-6]>/g, '<h$1>$2</h$1>');
74
-
75
- storage = storage.replace(/<p>(.*?)<\/p>/g, '<p>$1</p>');
76
-
77
- storage = storage.replace(/<strong>(.*?)<\/strong>/g, '<strong>$1</strong>');
78
-
79
- storage = storage.replace(/<em>(.*?)<\/em>/g, '<em>$1</em>');
80
-
81
- storage = storage.replace(/<ul>(.*?)<\/ul>/gs, '<ul>$1</ul>');
82
- storage = storage.replace(/<li>(.*?)<\/li>/g, '<li><p>$1</p></li>');
83
-
84
- storage = storage.replace(/<ol>(.*?)<\/ol>/gs, '<ol>$1</ol>');
85
-
86
- storage = storage.replace(/<pre><code(?:\s+class="language-(\w+)")?>(.*?)<\/code><\/pre>/gs, (_, lang, code) => {
87
- const language = lang || 'text';
88
- const decodedCode = code.replace(/\n$/, '')
89
- .replace(/&quot;/g, '"')
90
- .replace(/&lt;/g, '<')
91
- .replace(/&gt;/g, '>')
92
- .replace(/&amp;/g, '&');
93
- const safeCode = decodedCode.replace(/]]>/g, ']]]]><![CDATA[>');
94
- return `<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">${language}</ac:parameter><ac:plain-text-body><![CDATA[${safeCode}]]></ac:plain-text-body></ac:structured-macro>`;
95
- });
96
-
97
- storage = storage.replace(/<code>(.*?)<\/code>/g, '<code>$1</code>');
98
-
99
- // **TOC** paragraph → Confluence Table of Contents macro (uses macro defaults)
100
- storage = storage.replace(
101
- /<p><strong>TOC<\/strong><\/p>/g,
102
- '<ac:structured-macro ac:name="toc" />'
103
- );
104
-
105
- storage = storage.replace(/<blockquote>(.*?)<\/blockquote>/gs, (_, content) => {
106
- // Detect the marker only when it sits at the very start of the first
107
- // paragraph, immediately followed by a `</p>` close (separated form) or
108
- // a `\n` (same-line body form). This is the same anchor condition the
109
- // strip step uses below, so detection and stripping stay in sync.
110
- // Without this anchor, a quotation that merely *mentions* `**INFO**` —
111
- // e.g. `> Use **INFO** at the start.` — would be silently wrapped in an
112
- // info macro, surprising the author.
113
- const marker = CALLOUT_MARKERS.find((m) =>
114
- new RegExp(`<p><strong>${m.toUpperCase()}<\\/strong>(<\\/p>|\\s*\\n)`).test(content)
115
- );
116
- if (!marker) {
117
- // Plain blockquote — `> …` is a quotation, not an alert. Use the
118
- // `> **INFO**` / `> **WARNING**` / `> **NOTE**` markers above to
119
- // produce a Confluence info / warning / note macro instead.
120
- return `<blockquote>${content}</blockquote>`;
121
- }
122
- // Strip the leading `<strong>MARKER</strong>`. markdown-it produces two
123
- // shapes depending on whether a blank `>` line separates marker and body:
124
- // case A (separated): `<p><strong>MARKER</strong></p>\n<p>body</p>`
125
- // case B (same-line): `<p><strong>MARKER</strong>\nbody</p>`
126
- // The original cleanup only handled case A, so case B leaked the marker
127
- // into the rendered macro body. README's recommended `> **INFO**\n> body`
128
- // form parses as case B — exactly the form that broke.
129
- const cleanContent = content.replace(
130
- new RegExp(`<p><strong>${marker.toUpperCase()}<\\/strong>(<\\/p>\\s*|\\s*\\n)`),
131
- (_, tail) => tail.startsWith('</p>') ? '' : '<p>'
132
- );
133
- return `<ac:structured-macro ac:name="${marker}">
134
- <ac:rich-text-body>${cleanContent}</ac:rich-text-body>
135
- </ac:structured-macro>`;
136
- });
137
-
138
- storage = storage.replace(/<table>(.*?)<\/table>/gs, '<table>$1</table>');
139
- storage = storage.replace(/<thead>(.*?)<\/thead>/gs, '<thead>$1</thead>');
140
- storage = storage.replace(/<tbody>(.*?)<\/tbody>/gs, '<tbody>$1</tbody>');
141
- storage = storage.replace(/<tr>(.*?)<\/tr>/gs, '<tr>$1</tr>');
142
- storage = storage.replace(/<th>(.*?)<\/th>/g, '<th><p>$1</p></th>');
143
- storage = storage.replace(/<td>(.*?)<\/td>/g, '<td><p>$1</p></td>');
144
-
145
- // **ANCHOR: id** paragraph → Confluence anchor macro
146
- storage = storage.replace(
147
- /<p><strong>ANCHOR: (.*?)<\/strong><\/p>/g,
148
- '<ac:structured-macro ac:name="anchor"><ac:parameter ac:name="">$1</ac:parameter></ac:structured-macro>'
149
- );
150
-
151
- // **EXPAND: title** … **EXPAND_END** → Confluence expand macro. Runs
152
- // after code/blockquote/table conversion so the body can contain those
153
- // macros. Strips inline HTML from the title because Confluence's storage
154
- // normalizer treats <ac:parameter> as text-only — it silently truncates
155
- // at the first '<' and rejects <s> outright with HTTP 500. Entities
156
- // (&amp;, &lt;) survive because the regex requires a literal '<'.
157
- storage = storage.replace(
158
- /<p><strong>EXPAND: (.*?)<\/strong><\/p>\s*([\s\S]*?)\s*<p><strong>EXPAND_END<\/strong><\/p>/g,
159
- (_, title, body) => {
160
- const cleanTitle = title.replace(/<[^>]+>/g, '').trim();
161
- return `<ac:structured-macro ac:name="expand"><ac:parameter ac:name="title">${cleanTitle}</ac:parameter><ac:rich-text-body>${body.trim()}</ac:rich-text-body></ac:structured-macro>`;
162
- }
163
- );
164
-
165
- // Same-page anchor links (href="#id") → ac:link with ac:anchor. Must run
166
- // before the general link conversion below so the #id pattern is not
167
- // consumed by the generic <a href> replacement (and so it works under
168
- // all linkStyle modes, including "plain" which leaves <a> tags as-is).
169
- storage = storage.replace(/<a href="#(.*?)">(.*?)<\/a>/gs, (_, anchor, body) => {
170
- const text = body
171
- .replace(/&amp;/g, '&')
172
- .replace(/&lt;/g, '<')
173
- .replace(/&gt;/g, '>')
174
- .replace(/&quot;/g, '"')
175
- .replace(/&#39;/g, '\'');
176
- return `<ac:link ac:anchor="${anchor}"><ac:plain-text-link-body><![CDATA[${text}]]></ac:plain-text-link-body></ac:link>`;
177
- });
178
-
179
- // Convert links based on linkStyle:
180
- // "smart" — Cloud smart links (<a data-card-appearance="inline">)
181
- // "plain" — simple <a href>; workaround for "Cannot handle: DefaultLink"
182
- // errors on custom-domain Cloud instances
183
- // "wiki" — Server/DC ac:link + ri:url storage format
184
- if (this.linkStyle === 'smart') {
185
- storage = storage.replace(/<a href="(.*?)">(.*?)<\/a>/g, '<a href="$1" data-card-appearance="inline">$2</a>');
186
- } else if (this.linkStyle === 'wiki') {
187
- storage = storage.replace(/<a href="(.*?)">(.*?)<\/a>/g, '<ac:link><ri:url ri:value="$1" /><ac:plain-text-link-body><![CDATA[$2]]></ac:plain-text-link-body></ac:link>');
188
- }
189
- // "plain" — leave <a href> tags as-is
190
-
191
- storage = storage.replace(/<hr\s*\/?>/g, '<hr />');
192
-
193
- return storage;
70
+ return htmlToStorage(html, { isCloud: this._isCloud, linkStyle: this.linkStyle });
194
71
  }
195
72
 
196
73
  detectLanguageLabels(text) {
@@ -281,19 +281,23 @@ class StorageWalker {
281
281
 
282
282
  handleAnchor(node) {
283
283
  const param = this.findParamByName(node, '');
284
- const id = param ? this.getTextContent(param) : '';
284
+ const id = (param ? this.getTextContent(param) : '').trim();
285
+ if (!id) return '';
285
286
  return `\n**ANCHOR: ${id}**\n`;
286
287
  }
287
288
 
288
289
  handlePanel(node) {
289
290
  const titleParam = this.findParamByName(node, 'title');
290
- const title = titleParam ? this.getTextContent(titleParam) : '';
291
+ const title = (titleParam ? this.getTextContent(titleParam) : '').trim();
291
292
  const body = this.getMacroBody(node);
292
293
  // Trim before quoting — walkNodes wraps every <p> with a leading and
293
294
  // trailing \n, so untrimmed body splits into ['', 'body', ''] and emits
294
295
  // extra `>` blank lines that bracket the real content.
295
296
  const cleanContent = this.walkNodes(body).trim();
297
+ if (!title && !cleanContent) return '';
296
298
  const quoted = cleanContent.split('\n').map((line) => (line ? `> ${line}` : '>')).join('\n');
299
+ if (!title) return `\n${quoted}\n`;
300
+ if (!cleanContent) return `\n> **${title}**\n`;
297
301
  return `\n> **${title}**\n>\n${quoted}\n`;
298
302
  }
299
303
 
@@ -313,12 +317,13 @@ class StorageWalker {
313
317
  if (!riPage) return '';
314
318
  const spaceKey = decodeEntities(riPage.attribs['ri:space-key'] || '');
315
319
  const title = decodeEntities(riPage.attribs['ri:content-title'] || '');
320
+ const escapedTitle = this.escapeMarkdownText(title);
316
321
  const label = this.labels.includePage || 'Include Page';
317
322
  if (spaceKey.startsWith('~')) {
318
323
  const spacePath = `display/${spaceKey}/${encodeURIComponent(title)}`;
319
- return `\n> 📄 **${label}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/${spacePath}`)})\n`;
324
+ return `\n> 📄 **${label}**: [${escapedTitle}](${this.buildUrl(`${this.webUrlPrefix}/${spacePath}`)})\n`;
320
325
  }
321
- return `\n> 📄 **${label}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/spaces/${spaceKey}/pages/[PAGE_ID_HERE]`)}) _(manual link correction required)_\n`;
326
+ return `\n> 📄 **${label}**: [${escapedTitle}](${this.buildUrl(`${this.webUrlPrefix}/spaces/${spaceKey}/pages/[PAGE_ID_HERE]`)}) _(manual link correction required)_\n`;
322
327
  }
323
328
 
324
329
  handleSharedBlock(node, type) {
@@ -330,7 +335,7 @@ class StorageWalker {
330
335
  if (acLink) {
331
336
  const riPage = this.findChildByName(acLink, 'ri:page');
332
337
  if (riPage) {
333
- const pageTitle = decodeEntities(riPage.attribs['ri:content-title'] || '');
338
+ const pageTitle = this.escapeMarkdownText(decodeEntities(riPage.attribs['ri:content-title'] || ''));
334
339
  const includeLabel = this.labels.includeSharedBlock || 'Include Shared Block';
335
340
  const fromPageLabel = this.labels.fromPage || 'from page';
336
341
  return `\n> 📄 **${includeLabel}**: ${blockKey} (${fromPageLabel}: ${pageTitle} [link needs manual correction])\n`;
@@ -394,7 +399,7 @@ class StorageWalker {
394
399
  }
395
400
  const riPage = this.findChildByName(node, 'ri:page');
396
401
  if (riPage) {
397
- const title = decodeEntities(riPage.attribs['ri:content-title'] || '');
402
+ const title = this.escapeMarkdownText(decodeEntities(riPage.attribs['ri:content-title'] || ''));
398
403
  return `[${title}]`;
399
404
  }
400
405
  return '';
@@ -454,6 +459,16 @@ class StorageWalker {
454
459
  return decodeEntities(this._collectText(node));
455
460
  }
456
461
 
462
+ // Escape markdown structural characters in text that will be interpolated
463
+ // into link syntax (`[text](url)`). Confluence page titles can legitimately
464
+ // contain `()` / `[]`, and a maliciously-crafted title could otherwise inject
465
+ // a sibling link or break downstream parsers. Backslash is escaped so that
466
+ // an existing `\` in a title isn't reinterpreted as a markdown escape.
467
+ escapeMarkdownText(s) {
468
+ if (!s) return '';
469
+ return s.replace(/([\\[\]()])/g, '\\$1');
470
+ }
471
+
457
472
  _collectText(node) {
458
473
  if (!node) return '';
459
474
  if (node.type === 'text') return node.data || '';
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "confluence-cli",
3
- "version": "2.1.8",
3
+ "version": "2.1.10",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "confluence-cli",
9
- "version": "2.1.8",
9
+ "version": "2.1.10",
10
10
  "license": "MIT",
11
11
  "dependencies": {
12
12
  "axios": "^1.15.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "confluence-cli",
3
- "version": "2.1.8",
3
+ "version": "2.1.10",
4
4
  "description": "A command-line interface for Atlassian Confluence with page creation and editing capabilities",
5
5
  "main": "index.js",
6
6
  "bin": {