confluence-cli 2.1.7 → 2.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ // HTML → Confluence storage walker. Parses with htmlparser2 using
2
+ // `decodeEntities: false` so attribute and text entities round-trip
3
+ // byte-identical. Dispatches by tag; unhandled tags pass through with
4
+ // attributes preserved.
5
+
6
+ const { parseDocument } = require('htmlparser2');
7
+
8
+ const VALID_LINK_STYLES = ['smart', 'plain', 'wiki'];
9
+ // Hard cap on input HTML nesting to keep the recursive walker off the JS
10
+ // stack ceiling for pathological / malicious input.
11
+ const DEFAULT_MAX_DEPTH = 256;
12
+
13
+ class HtmlDepthExceededError extends Error {
14
+ constructor(maxDepth) {
15
+ super(`HTML nesting exceeds limit of ${maxDepth} levels`);
16
+ this.name = 'HtmlDepthExceededError';
17
+ this.maxDepth = maxDepth;
18
+ }
19
+ }
20
+ // Only `hr` is normalized to self-closing. `br` / `img` flow through in
21
+ // whatever shape the source had (markdown-it emits them without a slash).
22
+ const VOID_TAGS = new Set(['hr']);
23
+ const CALLOUT_MARKERS = ['info', 'warning', 'note'];
24
+
25
+ // Phrasing-content tags that trigger the `<li>` / `<th>` / `<td>` `<p>`-wrap
26
+ // quirk: if an item contains only inline children and no text-node newline,
27
+ // the walker wraps its content in `<p>`. markdown-it never emits the latter
28
+ // half of this set, but raw HTML input does, so they need the same treatment.
29
+ const INLINE_TAGS = new Set([
30
+ 'a', 'strong', 'em', 'code', 'br', 'img', 'span',
31
+ 'mark', 'sub', 'sup', 'ins', 'del', 'b', 'i', 'u', 'small', 's',
32
+ 'abbr', 'kbd', 'q', 'var', 'cite', 'time', 'dfn', 'samp',
33
+ ]);
34
+
35
+ function shouldWrapInP(node) {
36
+ if (!node.children) return true;
37
+ for (const child of node.children) {
38
+ if (child.type === 'text' && child.data.includes('\n')) return false;
39
+ if (child.type === 'tag' && !INLINE_TAGS.has(child.name)) return false;
40
+ }
41
+ return true;
42
+ }
43
+
44
+ function isWhitespaceOnly(node) {
45
+ return node.type === 'text' && /^\s*$/.test(node.data);
46
+ }
47
+
48
+ // Filter out whitespace-only text nodes so structural shape checks (single
49
+ // `<strong>` inside a paragraph, etc.) tolerate parser variations that emit
50
+ // trailing/leading whitespace text siblings.
51
+ function meaningfulChildren(node) {
52
+ return (node.children || []).filter((c) => !isWhitespaceOnly(c));
53
+ }
54
+
55
+ // Detects `<p><strong>TOC</strong></p>` and `<p><strong>ANCHOR: id</strong></p>`
56
+ // macro markers. The strict "p > one strong > one text" shape is intentional —
57
+ // any embellishment must fall through to a plain paragraph.
58
+ function detectParagraphMarker(node) {
59
+ if (node.name !== 'p') return null;
60
+ const kids = meaningfulChildren(node);
61
+ if (kids.length !== 1) return null;
62
+ const strong = kids[0];
63
+ if (strong.type !== 'tag' || strong.name !== 'strong') return null;
64
+ const strongKids = meaningfulChildren(strong);
65
+ if (strongKids.length !== 1) return null;
66
+ const text = strongKids[0];
67
+ if (text.type !== 'text') return null;
68
+ if (text.data === 'TOC') return { kind: 'toc' };
69
+ const anchor = text.data.match(/^ANCHOR: (.+)$/);
70
+ if (anchor) return { kind: 'anchor', id: anchor[1] };
71
+ return null;
72
+ }
73
+
74
+ // EXPAND open `<p><strong>EXPAND: …</strong></p>`. The title may contain
75
+ // nested inline HTML (em, code, a, s, …) which gets stripped later — so we
76
+ // only require that the strong's first text child starts with `EXPAND: `,
77
+ // not that it's the only child.
78
+ function isExpandOpen(node) {
79
+ if (node.type !== 'tag' || node.name !== 'p') return false;
80
+ const kids = meaningfulChildren(node);
81
+ if (kids.length !== 1) return false;
82
+ const strong = kids[0];
83
+ if (strong.type !== 'tag' || strong.name !== 'strong') return false;
84
+ if (!strong.children || strong.children.length === 0) return false;
85
+ const first = strong.children[0];
86
+ return first.type === 'text' && first.data.startsWith('EXPAND: ');
87
+ }
88
+
89
+ function isExpandClose(node) {
90
+ if (node.type !== 'tag' || node.name !== 'p') return false;
91
+ const kids = meaningfulChildren(node);
92
+ if (kids.length !== 1) return false;
93
+ const strong = kids[0];
94
+ if (strong.type !== 'tag' || strong.name !== 'strong') return false;
95
+ const strongKids = meaningfulChildren(strong);
96
+ if (strongKids.length !== 1) return false;
97
+ const text = strongKids[0];
98
+ return text.type === 'text' && text.data === 'EXPAND_END';
99
+ }
100
+
101
+ // Replacement order matters for doubly-escaped input: the default order
102
+ // replaces the ampersand entity first, which over-decodes (the escaped
103
+ // form of an `<`-entity collapses all the way to `<`). `preserveDouble`
104
+ // reverses the order so the same input round-trips as `&lt;` instead.
105
+ // Both orderings are intentional — call sites pick via the option.
106
+ function decodeEntities(text, { preserveDouble = false } = {}) {
107
+ if (preserveDouble) {
108
+ // Apostrophe (`&#39;`) intentionally omitted from this branch: the
109
+ // previous code-fence decoder didn't list it either, only the anchor
110
+ // body decoder did. The asymmetry is preserved for byte parity.
111
+ return text
112
+ .replace(/&quot;/g, '"')
113
+ .replace(/&lt;/g, '<')
114
+ .replace(/&gt;/g, '>')
115
+ .replace(/&amp;/g, '&');
116
+ }
117
+ return text
118
+ .replace(/&amp;/g, '&')
119
+ .replace(/&lt;/g, '<')
120
+ .replace(/&gt;/g, '>')
121
+ .replace(/&quot;/g, '"')
122
+ .replace(/&#39;/g, '\'');
123
+ }
124
+
125
+ // Anchor links (`href="#id"`) short-circuit linkStyle; external href
126
+ // branches on it.
127
+ function convertLink(node, ctx) {
128
+ const attribs = node.attribs || {};
129
+ const href = attribs.href || '';
130
+ const inner = walkChildren(node, ctx);
131
+
132
+ if (href.startsWith('#')) {
133
+ const anchor = href.slice(1);
134
+ const text = decodeEntities(inner);
135
+ return `<ac:link ac:anchor="${anchor}"><ac:plain-text-link-body><![CDATA[${text}]]></ac:plain-text-link-body></ac:link>`;
136
+ }
137
+
138
+ switch (ctx.linkStyle) {
139
+ case 'smart': {
140
+ // Spread order forces policy: any pre-existing `data-card-appearance`
141
+ // on the source `<a>` is overwritten — smart links must be inline.
142
+ const merged = { ...attribs, 'data-card-appearance': 'inline' };
143
+ return `<a${renderAttrs(merged)}>${inner}</a>`;
144
+ }
145
+ case 'wiki':
146
+ return `<ac:link><ri:url ri:value="${href}" /><ac:plain-text-link-body><![CDATA[${inner}]]></ac:plain-text-link-body></ac:link>`;
147
+ case 'plain':
148
+ default:
149
+ return `<a${renderAttrs(attribs)}>${inner}</a>`;
150
+ }
151
+ }
152
+
153
+ // `> **INFO|WARNING|NOTE**` blockquotes become callout macros; others pass
154
+ // through. Detection runs structurally on the DOM so false-positives for
155
+ // mid-paragraph `**INFO**` are ruled out — the strong must be the first
156
+ // meaningful child of the first paragraph.
157
+ //
158
+ // Two markdown-it shapes:
159
+ // separated: `<blockquote><p><strong>INFO</strong></p><p>body</p></blockquote>`
160
+ // same-line: `<blockquote><p><strong>INFO</strong>\nbody</p></blockquote>`
161
+ function detectBlockquoteCallout(node) {
162
+ const kids = meaningfulChildren(node);
163
+ if (kids.length === 0) return null;
164
+ const firstP = kids[0];
165
+ if (firstP.type !== 'tag' || firstP.name !== 'p') return null;
166
+ const pKids = firstP.children || [];
167
+ const firstIdx = pKids.findIndex((c) => !isWhitespaceOnly(c));
168
+ if (firstIdx < 0) return null;
169
+ const strong = pKids[firstIdx];
170
+ if (strong.type !== 'tag' || strong.name !== 'strong') return null;
171
+ const strongKids = meaningfulChildren(strong);
172
+ if (strongKids.length !== 1 || strongKids[0].type !== 'text') return null;
173
+ const marker = CALLOUT_MARKERS.find((m) => strongKids[0].data === m.toUpperCase());
174
+ if (!marker) return null;
175
+ const tail = pKids.slice(firstIdx + 1);
176
+ const tailHasContent = tail.some((c) => !isWhitespaceOnly(c));
177
+ if (tailHasContent) {
178
+ // Check tail[0] (not the first meaningful child): the body of
179
+ // `> **INFO**\n> body` must literally start with a newline. A leading
180
+ // whitespace-only text node without a newline (`<strong>INFO</strong>
181
+ // body`) is prose continuation, not callout — it must be rejected here
182
+ // even though `meaningfulChildren` would otherwise look past it.
183
+ if (tail[0].type !== 'text' || !/^\s*\n/.test(tail[0].data)) return null;
184
+ }
185
+ return { marker, sameLine: tailHasContent, markerP: firstP, tail };
186
+ }
187
+
188
+ function convertBlockquote(node, ctx) {
189
+ const detected = detectBlockquoteCallout(node);
190
+ if (!detected) {
191
+ return `<blockquote>${walkChildren(node, ctx)}</blockquote>`;
192
+ }
193
+ const { marker, sameLine, markerP, tail } = detected;
194
+ const blockquoteKids = node.children || [];
195
+ let body;
196
+ if (sameLine) {
197
+ // Same-line form: the strong's siblings inside the marker paragraph form
198
+ // the body of the first `<p>`. Strip the leading newline that markdown-it
199
+ // emits between strong and body text.
200
+ const firstPBody = tail.map((c) => walkNode(c, ctx)).join('').replace(/^\s*\n/, '');
201
+ const rest = blockquoteKids
202
+ .filter((c) => c !== markerP)
203
+ .map((c) => walkNode(c, ctx))
204
+ .join('');
205
+ body = `<p>${firstPBody}</p>${rest}`;
206
+ } else {
207
+ // Separated form: drop the marker paragraph entirely and walk the rest.
208
+ // Leading whitespace from the now-removed paragraph's neighbor text node
209
+ // is trimmed.
210
+ body = blockquoteKids
211
+ .filter((c) => c !== markerP)
212
+ .map((c) => walkNode(c, ctx))
213
+ .join('')
214
+ .replace(/^\s+/, '');
215
+ }
216
+ return `<ac:structured-macro ac:name="${marker}">
217
+ <ac:rich-text-body>${body}</ac:rich-text-body>
218
+ </ac:structured-macro>`;
219
+ }
220
+
221
+ // Strict `<pre><code>` adjacency only — `<pre>` with whitespace siblings or
222
+ // any other shape falls through as plain `<pre>`. The body needs manual
223
+ // entity decode because the parser keeps entities raw and CDATA is opaque
224
+ // downstream.
225
+ function convertCodeBlock(node, ctx) {
226
+ const children = node.children || [];
227
+ const isCodeBlock = children.length === 1 &&
228
+ children[0].type === 'tag' &&
229
+ children[0].name === 'code';
230
+ if (!isCodeBlock) {
231
+ return `<pre>${walkChildren(node, ctx)}</pre>`;
232
+ }
233
+ const codeNode = children[0];
234
+ const classAttr = codeNode.attribs.class || '';
235
+ const langMatch = classAttr.match(/language-(\w+)/);
236
+ const language = langMatch ? langMatch[1] : 'text';
237
+ let body = '';
238
+ for (const c of codeNode.children || []) {
239
+ if (c.type === 'text') body += c.data;
240
+ }
241
+ body = decodeEntities(body.replace(/\n$/, ''), { preserveDouble: true })
242
+ .replace(/]]>/g, ']]]]><![CDATA[>');
243
+ return `<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">${language}</ac:parameter><ac:plain-text-body><![CDATA[${body}]]></ac:plain-text-body></ac:structured-macro>`;
244
+ }
245
+
246
+ // Re-escape literal `"` inside attribute values. htmlparser2 with
247
+ // `decodeEntities: false` keeps source-escaped entities intact, but a
248
+ // single-quoted source attribute (`<a title='he said "hi"'>`) lands a
249
+ // literal `"` here that would close the emitted double-quoted slot and
250
+ // corrupt the XML. `&` is left as-is so already-escaped sources
251
+ // (`&amp;`, `&quot;`, …) round-trip cleanly.
252
+ //
253
+ // Trust boundary: input is assumed to be valid HTML. A valid HTML
254
+ // attribute value cannot contain raw `<` or `>` (they must be entities),
255
+ // so they're not escaped here. Malformed input that smuggles raw
256
+ // angle brackets through would produce malformed XML.
257
+ function escapeAttrValue(v) {
258
+ return String(v).replace(/"/g, '&quot;');
259
+ }
260
+
261
+ function renderAttrs(attribs) {
262
+ if (!attribs) return '';
263
+ return Object.keys(attribs)
264
+ .map((k) => ` ${k}="${escapeAttrValue(attribs[k])}"`)
265
+ .join('');
266
+ }
267
+
268
+ function walkChildren(node, ctx) {
269
+ if (!node.children) return '';
270
+ const children = node.children;
271
+ const out = [];
272
+ let i = 0;
273
+ while (i < children.length) {
274
+ const child = children[i];
275
+ // Sibling-level EXPAND span — collapse open/close pair into one macro
276
+ // with everything between as the body. Pairs the first EXPAND_END
277
+ // after this open: a nested EXPAND open/close pair inside the body
278
+ // would have its close consumed by the outer open, leaving the
279
+ // second close as an orphan paragraph. Same non-greedy behavior as
280
+ // the previous regex pipeline.
281
+ if (isExpandOpen(child)) {
282
+ const endIdx = children.findIndex((c, j) => j > i && isExpandClose(c));
283
+ if (endIdx !== -1) {
284
+ const titleStrong = child.children[0];
285
+ const titleHtml = walkChildren(titleStrong, ctx).replace(/^EXPAND: /, '');
286
+ // Confluence's `<ac:parameter>` normalizer is text-only (rejects `<s>`
287
+ // with HTTP 500, silently truncates at the first '<'). Strip literal
288
+ // tags; entities survive because the rule requires a literal '<'.
289
+ const cleanTitle = titleHtml.replace(/<[^>]+>/g, '').trim();
290
+ const bodyHtml = children
291
+ .slice(i + 1, endIdx)
292
+ .map((c) => walkNode(c, ctx))
293
+ .join('')
294
+ .trim();
295
+ out.push(`<ac:structured-macro ac:name="expand"><ac:parameter ac:name="title">${cleanTitle}</ac:parameter><ac:rich-text-body>${bodyHtml}</ac:rich-text-body></ac:structured-macro>`);
296
+ i = endIdx + 1;
297
+ continue;
298
+ }
299
+ }
300
+ out.push(walkNode(child, ctx));
301
+ i++;
302
+ }
303
+ return out.join('');
304
+ }
305
+
306
+ function walkNode(node, ctx) {
307
+ if (node.type === 'text') return node.data;
308
+ if (node.type !== 'tag') return '';
309
+ if (++ctx.depth > ctx.maxDepth) {
310
+ ctx.depth--;
311
+ throw new HtmlDepthExceededError(ctx.maxDepth);
312
+ }
313
+ try {
314
+ return dispatchTag(node, ctx);
315
+ } finally {
316
+ ctx.depth--;
317
+ }
318
+ }
319
+
320
+ function dispatchTag(node, ctx) {
321
+ switch (node.name) {
322
+ case 'p': {
323
+ const marker = detectParagraphMarker(node);
324
+ if (marker && marker.kind === 'toc') return '<ac:structured-macro ac:name="toc" />';
325
+ if (marker && marker.kind === 'anchor') {
326
+ return `<ac:structured-macro ac:name="anchor"><ac:parameter ac:name="">${marker.id}</ac:parameter></ac:structured-macro>`;
327
+ }
328
+ return `<p${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</p>`;
329
+ }
330
+ case 'h1':
331
+ case 'h2':
332
+ case 'h3':
333
+ case 'h4':
334
+ case 'h5':
335
+ case 'h6':
336
+ case 'strong':
337
+ case 'em':
338
+ return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
339
+ case 'hr':
340
+ return '<hr />';
341
+ case 'br':
342
+ return '<br>';
343
+ case 'img':
344
+ return `<img${renderAttrs(node.attribs)}>`;
345
+ case 'ul':
346
+ case 'ol':
347
+ return `<${node.name}>${walkChildren(node, ctx)}</${node.name}>`;
348
+ case 'li': {
349
+ const inner = walkChildren(node, ctx);
350
+ return shouldWrapInP(node) ? `<li><p>${inner}</p></li>` : `<li>${inner}</li>`;
351
+ }
352
+ case 'pre':
353
+ return convertCodeBlock(node, ctx);
354
+ case 'code':
355
+ // Inline only — `<code>` inside `<pre>` is consumed by convertCodeBlock.
356
+ return `<code${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</code>`;
357
+ case 'a':
358
+ return convertLink(node, ctx);
359
+ case 'blockquote':
360
+ return convertBlockquote(node, ctx);
361
+ case 'table':
362
+ case 'thead':
363
+ case 'tbody':
364
+ case 'tfoot':
365
+ case 'tr':
366
+ return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
367
+ case 'th':
368
+ case 'td': {
369
+ const inner = walkChildren(node, ctx);
370
+ const open = `<${node.name}${renderAttrs(node.attribs)}>`;
371
+ return shouldWrapInP(node) ? `${open}<p>${inner}</p></${node.name}>` : `${open}${inner}</${node.name}>`;
372
+ }
373
+ default:
374
+ if (VOID_TAGS.has(node.name)) {
375
+ return `<${node.name}${renderAttrs(node.attribs)} />`;
376
+ }
377
+ return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
378
+ }
379
+ }
380
+
381
+ function htmlToStorage(html, options = {}) {
382
+ const isCloud = !!options.isCloud;
383
+ const linkStyle = VALID_LINK_STYLES.includes(options.linkStyle)
384
+ ? options.linkStyle
385
+ : (isCloud ? 'smart' : 'wiki');
386
+ const ctx = {
387
+ linkStyle,
388
+ depth: 0,
389
+ maxDepth: typeof options.maxDepth === 'number' ? options.maxDepth : DEFAULT_MAX_DEPTH,
390
+ };
391
+ return walkChildren(parseDocument(html, { decodeEntities: false }), ctx);
392
+ }
393
+
394
+ module.exports = { htmlToStorage, HtmlDepthExceededError };
@@ -1,5 +1,6 @@
1
1
  const MarkdownIt = require('markdown-it');
2
2
  const { StorageWalker } = require('./storage-walker');
3
+ const { htmlToStorage } = require('./html-to-storage');
3
4
 
4
5
  const VALID_LINK_STYLES = ['smart', 'plain', 'wiki'];
5
6
  const CALLOUT_MARKERS = ['info', 'warning', 'note'];
@@ -39,9 +40,7 @@ class MacroConverter {
39
40
 
40
41
  // Anchor `[!info]` to the start of a line (string start or after a
41
42
  // newline) so prose mid-paragraph, headings on the same line, and
42
- // `> [!info]` GitHub-style alerts are left alone. The latter would
43
- // otherwise expand to a nested blockquote that the storage handler's
44
- // lazy regex cannot balance, producing malformed XML.
43
+ // `> [!info]` GitHub-style alerts are left alone.
45
44
  for (const m of CALLOUT_MARKERS) {
46
45
  const re = new RegExp(`(^|\\n)\\[!${m}\\]\\s*([\\s\\S]*?)(?=\\n\\s*\\n|\\n\\s*\\[!|$)`, 'g');
47
46
  state.src = state.src.replace(re, (_, pre, content) =>
@@ -68,129 +67,7 @@ class MacroConverter {
68
67
  }
69
68
 
70
69
  htmlToConfluenceStorage(html) {
71
- let storage = html;
72
-
73
- storage = storage.replace(/<h([1-6])>(.*?)<\/h[1-6]>/g, '<h$1>$2</h$1>');
74
-
75
- storage = storage.replace(/<p>(.*?)<\/p>/g, '<p>$1</p>');
76
-
77
- storage = storage.replace(/<strong>(.*?)<\/strong>/g, '<strong>$1</strong>');
78
-
79
- storage = storage.replace(/<em>(.*?)<\/em>/g, '<em>$1</em>');
80
-
81
- storage = storage.replace(/<ul>(.*?)<\/ul>/gs, '<ul>$1</ul>');
82
- storage = storage.replace(/<li>(.*?)<\/li>/g, '<li><p>$1</p></li>');
83
-
84
- storage = storage.replace(/<ol>(.*?)<\/ol>/gs, '<ol>$1</ol>');
85
-
86
- storage = storage.replace(/<pre><code(?:\s+class="language-(\w+)")?>(.*?)<\/code><\/pre>/gs, (_, lang, code) => {
87
- const language = lang || 'text';
88
- const decodedCode = code.replace(/\n$/, '')
89
- .replace(/&quot;/g, '"')
90
- .replace(/&lt;/g, '<')
91
- .replace(/&gt;/g, '>')
92
- .replace(/&amp;/g, '&');
93
- const safeCode = decodedCode.replace(/]]>/g, ']]]]><![CDATA[>');
94
- return `<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">${language}</ac:parameter><ac:plain-text-body><![CDATA[${safeCode}]]></ac:plain-text-body></ac:structured-macro>`;
95
- });
96
-
97
- storage = storage.replace(/<code>(.*?)<\/code>/g, '<code>$1</code>');
98
-
99
- // **TOC** paragraph → Confluence Table of Contents macro (uses macro defaults)
100
- storage = storage.replace(
101
- /<p><strong>TOC<\/strong><\/p>/g,
102
- '<ac:structured-macro ac:name="toc" />'
103
- );
104
-
105
- storage = storage.replace(/<blockquote>(.*?)<\/blockquote>/gs, (_, content) => {
106
- // Detect the marker only when it sits at the very start of the first
107
- // paragraph, immediately followed by a `</p>` close (separated form) or
108
- // a `\n` (same-line body form). This is the same anchor condition the
109
- // strip step uses below, so detection and stripping stay in sync.
110
- // Without this anchor, a quotation that merely *mentions* `**INFO**` —
111
- // e.g. `> Use **INFO** at the start.` — would be silently wrapped in an
112
- // info macro, surprising the author.
113
- const marker = CALLOUT_MARKERS.find((m) =>
114
- new RegExp(`<p><strong>${m.toUpperCase()}<\\/strong>(<\\/p>|\\s*\\n)`).test(content)
115
- );
116
- if (!marker) {
117
- // Plain blockquote — `> …` is a quotation, not an alert. Use the
118
- // `> **INFO**` / `> **WARNING**` / `> **NOTE**` markers above to
119
- // produce a Confluence info / warning / note macro instead.
120
- return `<blockquote>${content}</blockquote>`;
121
- }
122
- // Strip the leading `<strong>MARKER</strong>`. markdown-it produces two
123
- // shapes depending on whether a blank `>` line separates marker and body:
124
- // case A (separated): `<p><strong>MARKER</strong></p>\n<p>body</p>`
125
- // case B (same-line): `<p><strong>MARKER</strong>\nbody</p>`
126
- // The original cleanup only handled case A, so case B leaked the marker
127
- // into the rendered macro body. README's recommended `> **INFO**\n> body`
128
- // form parses as case B — exactly the form that broke.
129
- const cleanContent = content.replace(
130
- new RegExp(`<p><strong>${marker.toUpperCase()}<\\/strong>(<\\/p>\\s*|\\s*\\n)`),
131
- (_, tail) => tail.startsWith('</p>') ? '' : '<p>'
132
- );
133
- return `<ac:structured-macro ac:name="${marker}">
134
- <ac:rich-text-body>${cleanContent}</ac:rich-text-body>
135
- </ac:structured-macro>`;
136
- });
137
-
138
- storage = storage.replace(/<table>(.*?)<\/table>/gs, '<table>$1</table>');
139
- storage = storage.replace(/<thead>(.*?)<\/thead>/gs, '<thead>$1</thead>');
140
- storage = storage.replace(/<tbody>(.*?)<\/tbody>/gs, '<tbody>$1</tbody>');
141
- storage = storage.replace(/<tr>(.*?)<\/tr>/gs, '<tr>$1</tr>');
142
- storage = storage.replace(/<th>(.*?)<\/th>/g, '<th><p>$1</p></th>');
143
- storage = storage.replace(/<td>(.*?)<\/td>/g, '<td><p>$1</p></td>');
144
-
145
- // **ANCHOR: id** paragraph → Confluence anchor macro
146
- storage = storage.replace(
147
- /<p><strong>ANCHOR: (.*?)<\/strong><\/p>/g,
148
- '<ac:structured-macro ac:name="anchor"><ac:parameter ac:name="">$1</ac:parameter></ac:structured-macro>'
149
- );
150
-
151
- // **EXPAND: title** … **EXPAND_END** → Confluence expand macro. Runs
152
- // after code/blockquote/table conversion so the body can contain those
153
- // macros. Strips inline HTML from the title because Confluence's storage
154
- // normalizer treats <ac:parameter> as text-only — it silently truncates
155
- // at the first '<' and rejects <s> outright with HTTP 500. Entities
156
- // (&amp;, &lt;) survive because the regex requires a literal '<'.
157
- storage = storage.replace(
158
- /<p><strong>EXPAND: (.*?)<\/strong><\/p>\s*([\s\S]*?)\s*<p><strong>EXPAND_END<\/strong><\/p>/g,
159
- (_, title, body) => {
160
- const cleanTitle = title.replace(/<[^>]+>/g, '').trim();
161
- return `<ac:structured-macro ac:name="expand"><ac:parameter ac:name="title">${cleanTitle}</ac:parameter><ac:rich-text-body>${body.trim()}</ac:rich-text-body></ac:structured-macro>`;
162
- }
163
- );
164
-
165
- // Same-page anchor links (href="#id") → ac:link with ac:anchor. Must run
166
- // before the general link conversion below so the #id pattern is not
167
- // consumed by the generic <a href> replacement (and so it works under
168
- // all linkStyle modes, including "plain" which leaves <a> tags as-is).
169
- storage = storage.replace(/<a href="#(.*?)">(.*?)<\/a>/gs, (_, anchor, body) => {
170
- const text = body
171
- .replace(/&amp;/g, '&')
172
- .replace(/&lt;/g, '<')
173
- .replace(/&gt;/g, '>')
174
- .replace(/&quot;/g, '"')
175
- .replace(/&#39;/g, '\'');
176
- return `<ac:link ac:anchor="${anchor}"><ac:plain-text-link-body><![CDATA[${text}]]></ac:plain-text-link-body></ac:link>`;
177
- });
178
-
179
- // Convert links based on linkStyle:
180
- // "smart" — Cloud smart links (<a data-card-appearance="inline">)
181
- // "plain" — simple <a href>; workaround for "Cannot handle: DefaultLink"
182
- // errors on custom-domain Cloud instances
183
- // "wiki" — Server/DC ac:link + ri:url storage format
184
- if (this.linkStyle === 'smart') {
185
- storage = storage.replace(/<a href="(.*?)">(.*?)<\/a>/g, '<a href="$1" data-card-appearance="inline">$2</a>');
186
- } else if (this.linkStyle === 'wiki') {
187
- storage = storage.replace(/<a href="(.*?)">(.*?)<\/a>/g, '<ac:link><ri:url ri:value="$1" /><ac:plain-text-link-body><![CDATA[$2]]></ac:plain-text-link-body></ac:link>');
188
- }
189
- // "plain" — leave <a href> tags as-is
190
-
191
- storage = storage.replace(/<hr\s*\/?>/g, '<hr />');
192
-
193
- return storage;
70
+ return htmlToStorage(html, { isCloud: this._isCloud, linkStyle: this.linkStyle });
194
71
  }
195
72
 
196
73
  detectLanguageLabels(text) {
@@ -126,6 +126,8 @@ class StorageWalker {
126
126
  return '**' + this.walkNodes(node.children) + '**';
127
127
  case 'em': case 'i':
128
128
  return '*' + this.walkNodes(node.children) + '*';
129
+ case 's': case 'del':
130
+ return '~~' + this.walkNodes(node.children) + '~~';
129
131
  case 'code':
130
132
  return '`' + this.walkNodes(node.children) + '`';
131
133
  case 'br':
@@ -311,12 +313,13 @@ class StorageWalker {
311
313
  if (!riPage) return '';
312
314
  const spaceKey = decodeEntities(riPage.attribs['ri:space-key'] || '');
313
315
  const title = decodeEntities(riPage.attribs['ri:content-title'] || '');
316
+ const escapedTitle = this.escapeMarkdownText(title);
314
317
  const label = this.labels.includePage || 'Include Page';
315
318
  if (spaceKey.startsWith('~')) {
316
319
  const spacePath = `display/${spaceKey}/${encodeURIComponent(title)}`;
317
- return `\n> 📄 **${label}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/${spacePath}`)})\n`;
320
+ return `\n> 📄 **${label}**: [${escapedTitle}](${this.buildUrl(`${this.webUrlPrefix}/${spacePath}`)})\n`;
318
321
  }
319
- return `\n> 📄 **${label}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/spaces/${spaceKey}/pages/[PAGE_ID_HERE]`)}) _(manual link correction required)_\n`;
322
+ return `\n> 📄 **${label}**: [${escapedTitle}](${this.buildUrl(`${this.webUrlPrefix}/spaces/${spaceKey}/pages/[PAGE_ID_HERE]`)}) _(manual link correction required)_\n`;
320
323
  }
321
324
 
322
325
  handleSharedBlock(node, type) {
@@ -328,7 +331,7 @@ class StorageWalker {
328
331
  if (acLink) {
329
332
  const riPage = this.findChildByName(acLink, 'ri:page');
330
333
  if (riPage) {
331
- const pageTitle = decodeEntities(riPage.attribs['ri:content-title'] || '');
334
+ const pageTitle = this.escapeMarkdownText(decodeEntities(riPage.attribs['ri:content-title'] || ''));
332
335
  const includeLabel = this.labels.includeSharedBlock || 'Include Shared Block';
333
336
  const fromPageLabel = this.labels.fromPage || 'from page';
334
337
  return `\n> 📄 **${includeLabel}**: ${blockKey} (${fromPageLabel}: ${pageTitle} [link needs manual correction])\n`;
@@ -392,7 +395,7 @@ class StorageWalker {
392
395
  }
393
396
  const riPage = this.findChildByName(node, 'ri:page');
394
397
  if (riPage) {
395
- const title = decodeEntities(riPage.attribs['ri:content-title'] || '');
398
+ const title = this.escapeMarkdownText(decodeEntities(riPage.attribs['ri:content-title'] || ''));
396
399
  return `[${title}]`;
397
400
  }
398
401
  return '';
@@ -452,6 +455,16 @@ class StorageWalker {
452
455
  return decodeEntities(this._collectText(node));
453
456
  }
454
457
 
458
+ // Escape markdown structural characters in text that will be interpolated
459
+ // into link syntax (`[text](url)`). Confluence page titles can legitimately
460
+ // contain `()` / `[]`, and a maliciously-crafted title could otherwise inject
461
+ // a sibling link or break downstream parsers. Backslash is escaped so that
462
+ // an existing `\` in a title isn't reinterpreted as a markdown escape.
463
+ escapeMarkdownText(s) {
464
+ if (!s) return '';
465
+ return s.replace(/([\\[\]()])/g, '\\$1');
466
+ }
467
+
455
468
  _collectText(node) {
456
469
  if (!node) return '';
457
470
  if (node.type === 'text') return node.data || '';
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "confluence-cli",
3
- "version": "2.1.7",
3
+ "version": "2.1.9",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "confluence-cli",
9
- "version": "2.1.7",
9
+ "version": "2.1.9",
10
10
  "license": "MIT",
11
11
  "dependencies": {
12
12
  "axios": "^1.15.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "confluence-cli",
3
- "version": "2.1.7",
3
+ "version": "2.1.9",
4
4
  "description": "A command-line interface for Atlassian Confluence with page creation and editing capabilities",
5
5
  "main": "index.js",
6
6
  "bin": {