confluence-cli 2.1.8 → 2.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/html-to-storage.js +394 -0
- package/lib/macro-converter.js +3 -126
- package/lib/storage-walker.js +21 -6
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
// HTML → Confluence storage walker. Parses with htmlparser2 using
|
|
2
|
+
// `decodeEntities: false` so attribute and text entities round-trip
|
|
3
|
+
// byte-identical. Dispatches by tag; unhandled tags pass through with
|
|
4
|
+
// attributes preserved.
|
|
5
|
+
|
|
6
|
+
const { parseDocument } = require('htmlparser2');
|
|
7
|
+
|
|
8
|
+
const VALID_LINK_STYLES = ['smart', 'plain', 'wiki'];
|
|
9
|
+
// Hard cap on input HTML nesting to keep the recursive walker off the JS
|
|
10
|
+
// stack ceiling for pathological / malicious input.
|
|
11
|
+
const DEFAULT_MAX_DEPTH = 256;
|
|
12
|
+
|
|
13
|
+
class HtmlDepthExceededError extends Error {
|
|
14
|
+
constructor(maxDepth) {
|
|
15
|
+
super(`HTML nesting exceeds limit of ${maxDepth} levels`);
|
|
16
|
+
this.name = 'HtmlDepthExceededError';
|
|
17
|
+
this.maxDepth = maxDepth;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
// Only `hr` is normalized to self-closing. `br` / `img` flow through in
|
|
21
|
+
// whatever shape the source had (markdown-it emits them without a slash).
|
|
22
|
+
const VOID_TAGS = new Set(['hr']);
|
|
23
|
+
const CALLOUT_MARKERS = ['info', 'warning', 'note'];
|
|
24
|
+
|
|
25
|
+
// Phrasing-content tags that trigger the `<li>` / `<th>` / `<td>` `<p>`-wrap
|
|
26
|
+
// quirk: if an item contains only inline children and no text-node newline,
|
|
27
|
+
// the walker wraps its content in `<p>`. markdown-it never emits the latter
|
|
28
|
+
// half of this set, but raw HTML input does, so they need the same treatment.
|
|
29
|
+
const INLINE_TAGS = new Set([
|
|
30
|
+
'a', 'strong', 'em', 'code', 'br', 'img', 'span',
|
|
31
|
+
'mark', 'sub', 'sup', 'ins', 'del', 'b', 'i', 'u', 'small', 's',
|
|
32
|
+
'abbr', 'kbd', 'q', 'var', 'cite', 'time', 'dfn', 'samp',
|
|
33
|
+
]);
|
|
34
|
+
|
|
35
|
+
function shouldWrapInP(node) {
|
|
36
|
+
if (!node.children) return true;
|
|
37
|
+
for (const child of node.children) {
|
|
38
|
+
if (child.type === 'text' && child.data.includes('\n')) return false;
|
|
39
|
+
if (child.type === 'tag' && !INLINE_TAGS.has(child.name)) return false;
|
|
40
|
+
}
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function isWhitespaceOnly(node) {
|
|
45
|
+
return node.type === 'text' && /^\s*$/.test(node.data);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Filter out whitespace-only text nodes so structural shape checks (single
|
|
49
|
+
// `<strong>` inside a paragraph, etc.) tolerate parser variations that emit
|
|
50
|
+
// trailing/leading whitespace text siblings.
|
|
51
|
+
function meaningfulChildren(node) {
|
|
52
|
+
return (node.children || []).filter((c) => !isWhitespaceOnly(c));
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Detects `<p><strong>TOC</strong></p>` and `<p><strong>ANCHOR: id</strong></p>`
|
|
56
|
+
// macro markers. The strict "p > one strong > one text" shape is intentional —
|
|
57
|
+
// any embellishment must fall through to a plain paragraph.
|
|
58
|
+
function detectParagraphMarker(node) {
|
|
59
|
+
if (node.name !== 'p') return null;
|
|
60
|
+
const kids = meaningfulChildren(node);
|
|
61
|
+
if (kids.length !== 1) return null;
|
|
62
|
+
const strong = kids[0];
|
|
63
|
+
if (strong.type !== 'tag' || strong.name !== 'strong') return null;
|
|
64
|
+
const strongKids = meaningfulChildren(strong);
|
|
65
|
+
if (strongKids.length !== 1) return null;
|
|
66
|
+
const text = strongKids[0];
|
|
67
|
+
if (text.type !== 'text') return null;
|
|
68
|
+
if (text.data === 'TOC') return { kind: 'toc' };
|
|
69
|
+
const anchor = text.data.match(/^ANCHOR: (.+)$/);
|
|
70
|
+
if (anchor) return { kind: 'anchor', id: anchor[1] };
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// EXPAND open `<p><strong>EXPAND: …</strong></p>`. The title may contain
|
|
75
|
+
// nested inline HTML (em, code, a, s, …) which gets stripped later — so we
|
|
76
|
+
// only require that the strong's first text child starts with `EXPAND: `,
|
|
77
|
+
// not that it's the only child.
|
|
78
|
+
function isExpandOpen(node) {
|
|
79
|
+
if (node.type !== 'tag' || node.name !== 'p') return false;
|
|
80
|
+
const kids = meaningfulChildren(node);
|
|
81
|
+
if (kids.length !== 1) return false;
|
|
82
|
+
const strong = kids[0];
|
|
83
|
+
if (strong.type !== 'tag' || strong.name !== 'strong') return false;
|
|
84
|
+
if (!strong.children || strong.children.length === 0) return false;
|
|
85
|
+
const first = strong.children[0];
|
|
86
|
+
return first.type === 'text' && first.data.startsWith('EXPAND: ');
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function isExpandClose(node) {
|
|
90
|
+
if (node.type !== 'tag' || node.name !== 'p') return false;
|
|
91
|
+
const kids = meaningfulChildren(node);
|
|
92
|
+
if (kids.length !== 1) return false;
|
|
93
|
+
const strong = kids[0];
|
|
94
|
+
if (strong.type !== 'tag' || strong.name !== 'strong') return false;
|
|
95
|
+
const strongKids = meaningfulChildren(strong);
|
|
96
|
+
if (strongKids.length !== 1) return false;
|
|
97
|
+
const text = strongKids[0];
|
|
98
|
+
return text.type === 'text' && text.data === 'EXPAND_END';
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Replacement order matters for doubly-escaped input: the default order
|
|
102
|
+
// replaces the ampersand entity first, which over-decodes (the escaped
|
|
103
|
+
// form of an `<`-entity collapses all the way to `<`). `preserveDouble`
|
|
104
|
+
// reverses the order so the same input round-trips as `<` instead.
|
|
105
|
+
// Both orderings are intentional — call sites pick via the option.
|
|
106
|
+
function decodeEntities(text, { preserveDouble = false } = {}) {
|
|
107
|
+
if (preserveDouble) {
|
|
108
|
+
// Apostrophe (`'`) intentionally omitted from this branch: the
|
|
109
|
+
// previous code-fence decoder didn't list it either, only the anchor
|
|
110
|
+
// body decoder did. The asymmetry is preserved for byte parity.
|
|
111
|
+
return text
|
|
112
|
+
.replace(/"/g, '"')
|
|
113
|
+
.replace(/</g, '<')
|
|
114
|
+
.replace(/>/g, '>')
|
|
115
|
+
.replace(/&/g, '&');
|
|
116
|
+
}
|
|
117
|
+
return text
|
|
118
|
+
.replace(/&/g, '&')
|
|
119
|
+
.replace(/</g, '<')
|
|
120
|
+
.replace(/>/g, '>')
|
|
121
|
+
.replace(/"/g, '"')
|
|
122
|
+
.replace(/'/g, '\'');
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Anchor links (`href="#id"`) short-circuit linkStyle; external href
|
|
126
|
+
// branches on it.
|
|
127
|
+
function convertLink(node, ctx) {
|
|
128
|
+
const attribs = node.attribs || {};
|
|
129
|
+
const href = attribs.href || '';
|
|
130
|
+
const inner = walkChildren(node, ctx);
|
|
131
|
+
|
|
132
|
+
if (href.startsWith('#')) {
|
|
133
|
+
const anchor = href.slice(1);
|
|
134
|
+
const text = decodeEntities(inner);
|
|
135
|
+
return `<ac:link ac:anchor="${anchor}"><ac:plain-text-link-body><![CDATA[${text}]]></ac:plain-text-link-body></ac:link>`;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
switch (ctx.linkStyle) {
|
|
139
|
+
case 'smart': {
|
|
140
|
+
// Spread order forces policy: any pre-existing `data-card-appearance`
|
|
141
|
+
// on the source `<a>` is overwritten — smart links must be inline.
|
|
142
|
+
const merged = { ...attribs, 'data-card-appearance': 'inline' };
|
|
143
|
+
return `<a${renderAttrs(merged)}>${inner}</a>`;
|
|
144
|
+
}
|
|
145
|
+
case 'wiki':
|
|
146
|
+
return `<ac:link><ri:url ri:value="${href}" /><ac:plain-text-link-body><![CDATA[${inner}]]></ac:plain-text-link-body></ac:link>`;
|
|
147
|
+
case 'plain':
|
|
148
|
+
default:
|
|
149
|
+
return `<a${renderAttrs(attribs)}>${inner}</a>`;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// `> **INFO|WARNING|NOTE**` blockquotes become callout macros; others pass
|
|
154
|
+
// through. Detection runs structurally on the DOM so false-positives for
|
|
155
|
+
// mid-paragraph `**INFO**` are ruled out — the strong must be the first
|
|
156
|
+
// meaningful child of the first paragraph.
|
|
157
|
+
//
|
|
158
|
+
// Two markdown-it shapes:
|
|
159
|
+
// separated: `<blockquote><p><strong>INFO</strong></p><p>body</p></blockquote>`
|
|
160
|
+
// same-line: `<blockquote><p><strong>INFO</strong>\nbody</p></blockquote>`
|
|
161
|
+
function detectBlockquoteCallout(node) {
|
|
162
|
+
const kids = meaningfulChildren(node);
|
|
163
|
+
if (kids.length === 0) return null;
|
|
164
|
+
const firstP = kids[0];
|
|
165
|
+
if (firstP.type !== 'tag' || firstP.name !== 'p') return null;
|
|
166
|
+
const pKids = firstP.children || [];
|
|
167
|
+
const firstIdx = pKids.findIndex((c) => !isWhitespaceOnly(c));
|
|
168
|
+
if (firstIdx < 0) return null;
|
|
169
|
+
const strong = pKids[firstIdx];
|
|
170
|
+
if (strong.type !== 'tag' || strong.name !== 'strong') return null;
|
|
171
|
+
const strongKids = meaningfulChildren(strong);
|
|
172
|
+
if (strongKids.length !== 1 || strongKids[0].type !== 'text') return null;
|
|
173
|
+
const marker = CALLOUT_MARKERS.find((m) => strongKids[0].data === m.toUpperCase());
|
|
174
|
+
if (!marker) return null;
|
|
175
|
+
const tail = pKids.slice(firstIdx + 1);
|
|
176
|
+
const tailHasContent = tail.some((c) => !isWhitespaceOnly(c));
|
|
177
|
+
if (tailHasContent) {
|
|
178
|
+
// Check tail[0] (not the first meaningful child): the body of
|
|
179
|
+
// `> **INFO**\n> body` must literally start with a newline. A leading
|
|
180
|
+
// whitespace-only text node without a newline (`<strong>INFO</strong>
|
|
181
|
+
// body`) is prose continuation, not callout — it must be rejected here
|
|
182
|
+
// even though `meaningfulChildren` would otherwise look past it.
|
|
183
|
+
if (tail[0].type !== 'text' || !/^\s*\n/.test(tail[0].data)) return null;
|
|
184
|
+
}
|
|
185
|
+
return { marker, sameLine: tailHasContent, markerP: firstP, tail };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function convertBlockquote(node, ctx) {
|
|
189
|
+
const detected = detectBlockquoteCallout(node);
|
|
190
|
+
if (!detected) {
|
|
191
|
+
return `<blockquote>${walkChildren(node, ctx)}</blockquote>`;
|
|
192
|
+
}
|
|
193
|
+
const { marker, sameLine, markerP, tail } = detected;
|
|
194
|
+
const blockquoteKids = node.children || [];
|
|
195
|
+
let body;
|
|
196
|
+
if (sameLine) {
|
|
197
|
+
// Same-line form: the strong's siblings inside the marker paragraph form
|
|
198
|
+
// the body of the first `<p>`. Strip the leading newline that markdown-it
|
|
199
|
+
// emits between strong and body text.
|
|
200
|
+
const firstPBody = tail.map((c) => walkNode(c, ctx)).join('').replace(/^\s*\n/, '');
|
|
201
|
+
const rest = blockquoteKids
|
|
202
|
+
.filter((c) => c !== markerP)
|
|
203
|
+
.map((c) => walkNode(c, ctx))
|
|
204
|
+
.join('');
|
|
205
|
+
body = `<p>${firstPBody}</p>${rest}`;
|
|
206
|
+
} else {
|
|
207
|
+
// Separated form: drop the marker paragraph entirely and walk the rest.
|
|
208
|
+
// Leading whitespace from the now-removed paragraph's neighbor text node
|
|
209
|
+
// is trimmed.
|
|
210
|
+
body = blockquoteKids
|
|
211
|
+
.filter((c) => c !== markerP)
|
|
212
|
+
.map((c) => walkNode(c, ctx))
|
|
213
|
+
.join('')
|
|
214
|
+
.replace(/^\s+/, '');
|
|
215
|
+
}
|
|
216
|
+
return `<ac:structured-macro ac:name="${marker}">
|
|
217
|
+
<ac:rich-text-body>${body}</ac:rich-text-body>
|
|
218
|
+
</ac:structured-macro>`;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Strict `<pre><code>` adjacency only — `<pre>` with whitespace siblings or
|
|
222
|
+
// any other shape falls through as plain `<pre>`. The body needs manual
|
|
223
|
+
// entity decode because the parser keeps entities raw and CDATA is opaque
|
|
224
|
+
// downstream.
|
|
225
|
+
function convertCodeBlock(node, ctx) {
|
|
226
|
+
const children = node.children || [];
|
|
227
|
+
const isCodeBlock = children.length === 1 &&
|
|
228
|
+
children[0].type === 'tag' &&
|
|
229
|
+
children[0].name === 'code';
|
|
230
|
+
if (!isCodeBlock) {
|
|
231
|
+
return `<pre>${walkChildren(node, ctx)}</pre>`;
|
|
232
|
+
}
|
|
233
|
+
const codeNode = children[0];
|
|
234
|
+
const classAttr = codeNode.attribs.class || '';
|
|
235
|
+
const langMatch = classAttr.match(/language-(\w+)/);
|
|
236
|
+
const language = langMatch ? langMatch[1] : 'text';
|
|
237
|
+
let body = '';
|
|
238
|
+
for (const c of codeNode.children || []) {
|
|
239
|
+
if (c.type === 'text') body += c.data;
|
|
240
|
+
}
|
|
241
|
+
body = decodeEntities(body.replace(/\n$/, ''), { preserveDouble: true })
|
|
242
|
+
.replace(/]]>/g, ']]]]><![CDATA[>');
|
|
243
|
+
return `<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">${language}</ac:parameter><ac:plain-text-body><![CDATA[${body}]]></ac:plain-text-body></ac:structured-macro>`;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Re-escape literal `"` inside attribute values. htmlparser2 with
|
|
247
|
+
// `decodeEntities: false` keeps source-escaped entities intact, but a
|
|
248
|
+
// single-quoted source attribute (`<a title='he said "hi"'>`) lands a
|
|
249
|
+
// literal `"` here that would close the emitted double-quoted slot and
|
|
250
|
+
// corrupt the XML. `&` is left as-is so already-escaped sources
|
|
251
|
+
// (`&`, `"`, …) round-trip cleanly.
|
|
252
|
+
//
|
|
253
|
+
// Trust boundary: input is assumed to be valid HTML. A valid HTML
|
|
254
|
+
// attribute value cannot contain raw `<` or `>` (they must be entities),
|
|
255
|
+
// so they're not escaped here. Malformed input that smuggles raw
|
|
256
|
+
// angle brackets through would produce malformed XML.
|
|
257
|
+
function escapeAttrValue(v) {
|
|
258
|
+
return String(v).replace(/"/g, '"');
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function renderAttrs(attribs) {
|
|
262
|
+
if (!attribs) return '';
|
|
263
|
+
return Object.keys(attribs)
|
|
264
|
+
.map((k) => ` ${k}="${escapeAttrValue(attribs[k])}"`)
|
|
265
|
+
.join('');
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function walkChildren(node, ctx) {
|
|
269
|
+
if (!node.children) return '';
|
|
270
|
+
const children = node.children;
|
|
271
|
+
const out = [];
|
|
272
|
+
let i = 0;
|
|
273
|
+
while (i < children.length) {
|
|
274
|
+
const child = children[i];
|
|
275
|
+
// Sibling-level EXPAND span — collapse open/close pair into one macro
|
|
276
|
+
// with everything between as the body. Pairs the first EXPAND_END
|
|
277
|
+
// after this open: a nested EXPAND open/close pair inside the body
|
|
278
|
+
// would have its close consumed by the outer open, leaving the
|
|
279
|
+
// second close as an orphan paragraph. Same non-greedy behavior as
|
|
280
|
+
// the previous regex pipeline.
|
|
281
|
+
if (isExpandOpen(child)) {
|
|
282
|
+
const endIdx = children.findIndex((c, j) => j > i && isExpandClose(c));
|
|
283
|
+
if (endIdx !== -1) {
|
|
284
|
+
const titleStrong = child.children[0];
|
|
285
|
+
const titleHtml = walkChildren(titleStrong, ctx).replace(/^EXPAND: /, '');
|
|
286
|
+
// Confluence's `<ac:parameter>` normalizer is text-only (rejects `<s>`
|
|
287
|
+
// with HTTP 500, silently truncates at the first '<'). Strip literal
|
|
288
|
+
// tags; entities survive because the rule requires a literal '<'.
|
|
289
|
+
const cleanTitle = titleHtml.replace(/<[^>]+>/g, '').trim();
|
|
290
|
+
const bodyHtml = children
|
|
291
|
+
.slice(i + 1, endIdx)
|
|
292
|
+
.map((c) => walkNode(c, ctx))
|
|
293
|
+
.join('')
|
|
294
|
+
.trim();
|
|
295
|
+
out.push(`<ac:structured-macro ac:name="expand"><ac:parameter ac:name="title">${cleanTitle}</ac:parameter><ac:rich-text-body>${bodyHtml}</ac:rich-text-body></ac:structured-macro>`);
|
|
296
|
+
i = endIdx + 1;
|
|
297
|
+
continue;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
out.push(walkNode(child, ctx));
|
|
301
|
+
i++;
|
|
302
|
+
}
|
|
303
|
+
return out.join('');
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function walkNode(node, ctx) {
|
|
307
|
+
if (node.type === 'text') return node.data;
|
|
308
|
+
if (node.type !== 'tag') return '';
|
|
309
|
+
if (++ctx.depth > ctx.maxDepth) {
|
|
310
|
+
ctx.depth--;
|
|
311
|
+
throw new HtmlDepthExceededError(ctx.maxDepth);
|
|
312
|
+
}
|
|
313
|
+
try {
|
|
314
|
+
return dispatchTag(node, ctx);
|
|
315
|
+
} finally {
|
|
316
|
+
ctx.depth--;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function dispatchTag(node, ctx) {
|
|
321
|
+
switch (node.name) {
|
|
322
|
+
case 'p': {
|
|
323
|
+
const marker = detectParagraphMarker(node);
|
|
324
|
+
if (marker && marker.kind === 'toc') return '<ac:structured-macro ac:name="toc" />';
|
|
325
|
+
if (marker && marker.kind === 'anchor') {
|
|
326
|
+
return `<ac:structured-macro ac:name="anchor"><ac:parameter ac:name="">${marker.id}</ac:parameter></ac:structured-macro>`;
|
|
327
|
+
}
|
|
328
|
+
return `<p${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</p>`;
|
|
329
|
+
}
|
|
330
|
+
case 'h1':
|
|
331
|
+
case 'h2':
|
|
332
|
+
case 'h3':
|
|
333
|
+
case 'h4':
|
|
334
|
+
case 'h5':
|
|
335
|
+
case 'h6':
|
|
336
|
+
case 'strong':
|
|
337
|
+
case 'em':
|
|
338
|
+
return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
|
|
339
|
+
case 'hr':
|
|
340
|
+
return '<hr />';
|
|
341
|
+
case 'br':
|
|
342
|
+
return '<br>';
|
|
343
|
+
case 'img':
|
|
344
|
+
return `<img${renderAttrs(node.attribs)}>`;
|
|
345
|
+
case 'ul':
|
|
346
|
+
case 'ol':
|
|
347
|
+
return `<${node.name}>${walkChildren(node, ctx)}</${node.name}>`;
|
|
348
|
+
case 'li': {
|
|
349
|
+
const inner = walkChildren(node, ctx);
|
|
350
|
+
return shouldWrapInP(node) ? `<li><p>${inner}</p></li>` : `<li>${inner}</li>`;
|
|
351
|
+
}
|
|
352
|
+
case 'pre':
|
|
353
|
+
return convertCodeBlock(node, ctx);
|
|
354
|
+
case 'code':
|
|
355
|
+
// Inline only — `<code>` inside `<pre>` is consumed by convertCodeBlock.
|
|
356
|
+
return `<code${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</code>`;
|
|
357
|
+
case 'a':
|
|
358
|
+
return convertLink(node, ctx);
|
|
359
|
+
case 'blockquote':
|
|
360
|
+
return convertBlockquote(node, ctx);
|
|
361
|
+
case 'table':
|
|
362
|
+
case 'thead':
|
|
363
|
+
case 'tbody':
|
|
364
|
+
case 'tfoot':
|
|
365
|
+
case 'tr':
|
|
366
|
+
return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
|
|
367
|
+
case 'th':
|
|
368
|
+
case 'td': {
|
|
369
|
+
const inner = walkChildren(node, ctx);
|
|
370
|
+
const open = `<${node.name}${renderAttrs(node.attribs)}>`;
|
|
371
|
+
return shouldWrapInP(node) ? `${open}<p>${inner}</p></${node.name}>` : `${open}${inner}</${node.name}>`;
|
|
372
|
+
}
|
|
373
|
+
default:
|
|
374
|
+
if (VOID_TAGS.has(node.name)) {
|
|
375
|
+
return `<${node.name}${renderAttrs(node.attribs)} />`;
|
|
376
|
+
}
|
|
377
|
+
return `<${node.name}${renderAttrs(node.attribs)}>${walkChildren(node, ctx)}</${node.name}>`;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function htmlToStorage(html, options = {}) {
|
|
382
|
+
const isCloud = !!options.isCloud;
|
|
383
|
+
const linkStyle = VALID_LINK_STYLES.includes(options.linkStyle)
|
|
384
|
+
? options.linkStyle
|
|
385
|
+
: (isCloud ? 'smart' : 'wiki');
|
|
386
|
+
const ctx = {
|
|
387
|
+
linkStyle,
|
|
388
|
+
depth: 0,
|
|
389
|
+
maxDepth: typeof options.maxDepth === 'number' ? options.maxDepth : DEFAULT_MAX_DEPTH,
|
|
390
|
+
};
|
|
391
|
+
return walkChildren(parseDocument(html, { decodeEntities: false }), ctx);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
module.exports = { htmlToStorage, HtmlDepthExceededError };
|
package/lib/macro-converter.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
const MarkdownIt = require('markdown-it');
|
|
2
2
|
const { StorageWalker } = require('./storage-walker');
|
|
3
|
+
const { htmlToStorage } = require('./html-to-storage');
|
|
3
4
|
|
|
4
5
|
const VALID_LINK_STYLES = ['smart', 'plain', 'wiki'];
|
|
5
6
|
const CALLOUT_MARKERS = ['info', 'warning', 'note'];
|
|
@@ -39,9 +40,7 @@ class MacroConverter {
|
|
|
39
40
|
|
|
40
41
|
// Anchor `[!info]` to the start of a line (string start or after a
|
|
41
42
|
// newline) so prose mid-paragraph, headings on the same line, and
|
|
42
|
-
// `> [!info]` GitHub-style alerts are left alone.
|
|
43
|
-
// otherwise expand to a nested blockquote that the storage handler's
|
|
44
|
-
// lazy regex cannot balance, producing malformed XML.
|
|
43
|
+
// `> [!info]` GitHub-style alerts are left alone.
|
|
45
44
|
for (const m of CALLOUT_MARKERS) {
|
|
46
45
|
const re = new RegExp(`(^|\\n)\\[!${m}\\]\\s*([\\s\\S]*?)(?=\\n\\s*\\n|\\n\\s*\\[!|$)`, 'g');
|
|
47
46
|
state.src = state.src.replace(re, (_, pre, content) =>
|
|
@@ -68,129 +67,7 @@ class MacroConverter {
|
|
|
68
67
|
}
|
|
69
68
|
|
|
70
69
|
htmlToConfluenceStorage(html) {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
storage = storage.replace(/<h([1-6])>(.*?)<\/h[1-6]>/g, '<h$1>$2</h$1>');
|
|
74
|
-
|
|
75
|
-
storage = storage.replace(/<p>(.*?)<\/p>/g, '<p>$1</p>');
|
|
76
|
-
|
|
77
|
-
storage = storage.replace(/<strong>(.*?)<\/strong>/g, '<strong>$1</strong>');
|
|
78
|
-
|
|
79
|
-
storage = storage.replace(/<em>(.*?)<\/em>/g, '<em>$1</em>');
|
|
80
|
-
|
|
81
|
-
storage = storage.replace(/<ul>(.*?)<\/ul>/gs, '<ul>$1</ul>');
|
|
82
|
-
storage = storage.replace(/<li>(.*?)<\/li>/g, '<li><p>$1</p></li>');
|
|
83
|
-
|
|
84
|
-
storage = storage.replace(/<ol>(.*?)<\/ol>/gs, '<ol>$1</ol>');
|
|
85
|
-
|
|
86
|
-
storage = storage.replace(/<pre><code(?:\s+class="language-(\w+)")?>(.*?)<\/code><\/pre>/gs, (_, lang, code) => {
|
|
87
|
-
const language = lang || 'text';
|
|
88
|
-
const decodedCode = code.replace(/\n$/, '')
|
|
89
|
-
.replace(/"/g, '"')
|
|
90
|
-
.replace(/</g, '<')
|
|
91
|
-
.replace(/>/g, '>')
|
|
92
|
-
.replace(/&/g, '&');
|
|
93
|
-
const safeCode = decodedCode.replace(/]]>/g, ']]]]><![CDATA[>');
|
|
94
|
-
return `<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">${language}</ac:parameter><ac:plain-text-body><![CDATA[${safeCode}]]></ac:plain-text-body></ac:structured-macro>`;
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
storage = storage.replace(/<code>(.*?)<\/code>/g, '<code>$1</code>');
|
|
98
|
-
|
|
99
|
-
// **TOC** paragraph → Confluence Table of Contents macro (uses macro defaults)
|
|
100
|
-
storage = storage.replace(
|
|
101
|
-
/<p><strong>TOC<\/strong><\/p>/g,
|
|
102
|
-
'<ac:structured-macro ac:name="toc" />'
|
|
103
|
-
);
|
|
104
|
-
|
|
105
|
-
storage = storage.replace(/<blockquote>(.*?)<\/blockquote>/gs, (_, content) => {
|
|
106
|
-
// Detect the marker only when it sits at the very start of the first
|
|
107
|
-
// paragraph, immediately followed by a `</p>` close (separated form) or
|
|
108
|
-
// a `\n` (same-line body form). This is the same anchor condition the
|
|
109
|
-
// strip step uses below, so detection and stripping stay in sync.
|
|
110
|
-
// Without this anchor, a quotation that merely *mentions* `**INFO**` —
|
|
111
|
-
// e.g. `> Use **INFO** at the start.` — would be silently wrapped in an
|
|
112
|
-
// info macro, surprising the author.
|
|
113
|
-
const marker = CALLOUT_MARKERS.find((m) =>
|
|
114
|
-
new RegExp(`<p><strong>${m.toUpperCase()}<\\/strong>(<\\/p>|\\s*\\n)`).test(content)
|
|
115
|
-
);
|
|
116
|
-
if (!marker) {
|
|
117
|
-
// Plain blockquote — `> …` is a quotation, not an alert. Use the
|
|
118
|
-
// `> **INFO**` / `> **WARNING**` / `> **NOTE**` markers above to
|
|
119
|
-
// produce a Confluence info / warning / note macro instead.
|
|
120
|
-
return `<blockquote>${content}</blockquote>`;
|
|
121
|
-
}
|
|
122
|
-
// Strip the leading `<strong>MARKER</strong>`. markdown-it produces two
|
|
123
|
-
// shapes depending on whether a blank `>` line separates marker and body:
|
|
124
|
-
// case A (separated): `<p><strong>MARKER</strong></p>\n<p>body</p>`
|
|
125
|
-
// case B (same-line): `<p><strong>MARKER</strong>\nbody</p>`
|
|
126
|
-
// The original cleanup only handled case A, so case B leaked the marker
|
|
127
|
-
// into the rendered macro body. README's recommended `> **INFO**\n> body`
|
|
128
|
-
// form parses as case B — exactly the form that broke.
|
|
129
|
-
const cleanContent = content.replace(
|
|
130
|
-
new RegExp(`<p><strong>${marker.toUpperCase()}<\\/strong>(<\\/p>\\s*|\\s*\\n)`),
|
|
131
|
-
(_, tail) => tail.startsWith('</p>') ? '' : '<p>'
|
|
132
|
-
);
|
|
133
|
-
return `<ac:structured-macro ac:name="${marker}">
|
|
134
|
-
<ac:rich-text-body>${cleanContent}</ac:rich-text-body>
|
|
135
|
-
</ac:structured-macro>`;
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
storage = storage.replace(/<table>(.*?)<\/table>/gs, '<table>$1</table>');
|
|
139
|
-
storage = storage.replace(/<thead>(.*?)<\/thead>/gs, '<thead>$1</thead>');
|
|
140
|
-
storage = storage.replace(/<tbody>(.*?)<\/tbody>/gs, '<tbody>$1</tbody>');
|
|
141
|
-
storage = storage.replace(/<tr>(.*?)<\/tr>/gs, '<tr>$1</tr>');
|
|
142
|
-
storage = storage.replace(/<th>(.*?)<\/th>/g, '<th><p>$1</p></th>');
|
|
143
|
-
storage = storage.replace(/<td>(.*?)<\/td>/g, '<td><p>$1</p></td>');
|
|
144
|
-
|
|
145
|
-
// **ANCHOR: id** paragraph → Confluence anchor macro
|
|
146
|
-
storage = storage.replace(
|
|
147
|
-
/<p><strong>ANCHOR: (.*?)<\/strong><\/p>/g,
|
|
148
|
-
'<ac:structured-macro ac:name="anchor"><ac:parameter ac:name="">$1</ac:parameter></ac:structured-macro>'
|
|
149
|
-
);
|
|
150
|
-
|
|
151
|
-
// **EXPAND: title** … **EXPAND_END** → Confluence expand macro. Runs
|
|
152
|
-
// after code/blockquote/table conversion so the body can contain those
|
|
153
|
-
// macros. Strips inline HTML from the title because Confluence's storage
|
|
154
|
-
// normalizer treats <ac:parameter> as text-only — it silently truncates
|
|
155
|
-
// at the first '<' and rejects <s> outright with HTTP 500. Entities
|
|
156
|
-
// (&, <) survive because the regex requires a literal '<'.
|
|
157
|
-
storage = storage.replace(
|
|
158
|
-
/<p><strong>EXPAND: (.*?)<\/strong><\/p>\s*([\s\S]*?)\s*<p><strong>EXPAND_END<\/strong><\/p>/g,
|
|
159
|
-
(_, title, body) => {
|
|
160
|
-
const cleanTitle = title.replace(/<[^>]+>/g, '').trim();
|
|
161
|
-
return `<ac:structured-macro ac:name="expand"><ac:parameter ac:name="title">${cleanTitle}</ac:parameter><ac:rich-text-body>${body.trim()}</ac:rich-text-body></ac:structured-macro>`;
|
|
162
|
-
}
|
|
163
|
-
);
|
|
164
|
-
|
|
165
|
-
// Same-page anchor links (href="#id") → ac:link with ac:anchor. Must run
|
|
166
|
-
// before the general link conversion below so the #id pattern is not
|
|
167
|
-
// consumed by the generic <a href> replacement (and so it works under
|
|
168
|
-
// all linkStyle modes, including "plain" which leaves <a> tags as-is).
|
|
169
|
-
storage = storage.replace(/<a href="#(.*?)">(.*?)<\/a>/gs, (_, anchor, body) => {
|
|
170
|
-
const text = body
|
|
171
|
-
.replace(/&/g, '&')
|
|
172
|
-
.replace(/</g, '<')
|
|
173
|
-
.replace(/>/g, '>')
|
|
174
|
-
.replace(/"/g, '"')
|
|
175
|
-
.replace(/'/g, '\'');
|
|
176
|
-
return `<ac:link ac:anchor="${anchor}"><ac:plain-text-link-body><![CDATA[${text}]]></ac:plain-text-link-body></ac:link>`;
|
|
177
|
-
});
|
|
178
|
-
|
|
179
|
-
// Convert links based on linkStyle:
|
|
180
|
-
// "smart" — Cloud smart links (<a data-card-appearance="inline">)
|
|
181
|
-
// "plain" — simple <a href>; workaround for "Cannot handle: DefaultLink"
|
|
182
|
-
// errors on custom-domain Cloud instances
|
|
183
|
-
// "wiki" — Server/DC ac:link + ri:url storage format
|
|
184
|
-
if (this.linkStyle === 'smart') {
|
|
185
|
-
storage = storage.replace(/<a href="(.*?)">(.*?)<\/a>/g, '<a href="$1" data-card-appearance="inline">$2</a>');
|
|
186
|
-
} else if (this.linkStyle === 'wiki') {
|
|
187
|
-
storage = storage.replace(/<a href="(.*?)">(.*?)<\/a>/g, '<ac:link><ri:url ri:value="$1" /><ac:plain-text-link-body><![CDATA[$2]]></ac:plain-text-link-body></ac:link>');
|
|
188
|
-
}
|
|
189
|
-
// "plain" — leave <a href> tags as-is
|
|
190
|
-
|
|
191
|
-
storage = storage.replace(/<hr\s*\/?>/g, '<hr />');
|
|
192
|
-
|
|
193
|
-
return storage;
|
|
70
|
+
return htmlToStorage(html, { isCloud: this._isCloud, linkStyle: this.linkStyle });
|
|
194
71
|
}
|
|
195
72
|
|
|
196
73
|
detectLanguageLabels(text) {
|
package/lib/storage-walker.js
CHANGED
|
@@ -281,19 +281,23 @@ class StorageWalker {
|
|
|
281
281
|
|
|
282
282
|
handleAnchor(node) {
|
|
283
283
|
const param = this.findParamByName(node, '');
|
|
284
|
-
const id = param ? this.getTextContent(param) : '';
|
|
284
|
+
const id = (param ? this.getTextContent(param) : '').trim();
|
|
285
|
+
if (!id) return '';
|
|
285
286
|
return `\n**ANCHOR: ${id}**\n`;
|
|
286
287
|
}
|
|
287
288
|
|
|
288
289
|
handlePanel(node) {
|
|
289
290
|
const titleParam = this.findParamByName(node, 'title');
|
|
290
|
-
const title = titleParam ? this.getTextContent(titleParam) : '';
|
|
291
|
+
const title = (titleParam ? this.getTextContent(titleParam) : '').trim();
|
|
291
292
|
const body = this.getMacroBody(node);
|
|
292
293
|
// Trim before quoting — walkNodes wraps every <p> with a leading and
|
|
293
294
|
// trailing \n, so untrimmed body splits into ['', 'body', ''] and emits
|
|
294
295
|
// extra `>` blank lines that bracket the real content.
|
|
295
296
|
const cleanContent = this.walkNodes(body).trim();
|
|
297
|
+
if (!title && !cleanContent) return '';
|
|
296
298
|
const quoted = cleanContent.split('\n').map((line) => (line ? `> ${line}` : '>')).join('\n');
|
|
299
|
+
if (!title) return `\n${quoted}\n`;
|
|
300
|
+
if (!cleanContent) return `\n> **${title}**\n`;
|
|
297
301
|
return `\n> **${title}**\n>\n${quoted}\n`;
|
|
298
302
|
}
|
|
299
303
|
|
|
@@ -313,12 +317,13 @@ class StorageWalker {
|
|
|
313
317
|
if (!riPage) return '';
|
|
314
318
|
const spaceKey = decodeEntities(riPage.attribs['ri:space-key'] || '');
|
|
315
319
|
const title = decodeEntities(riPage.attribs['ri:content-title'] || '');
|
|
320
|
+
const escapedTitle = this.escapeMarkdownText(title);
|
|
316
321
|
const label = this.labels.includePage || 'Include Page';
|
|
317
322
|
if (spaceKey.startsWith('~')) {
|
|
318
323
|
const spacePath = `display/${spaceKey}/${encodeURIComponent(title)}`;
|
|
319
|
-
return `\n> 📄 **${label}**: [${
|
|
324
|
+
return `\n> 📄 **${label}**: [${escapedTitle}](${this.buildUrl(`${this.webUrlPrefix}/${spacePath}`)})\n`;
|
|
320
325
|
}
|
|
321
|
-
return `\n> 📄 **${label}**: [${
|
|
326
|
+
return `\n> 📄 **${label}**: [${escapedTitle}](${this.buildUrl(`${this.webUrlPrefix}/spaces/${spaceKey}/pages/[PAGE_ID_HERE]`)}) _(manual link correction required)_\n`;
|
|
322
327
|
}
|
|
323
328
|
|
|
324
329
|
handleSharedBlock(node, type) {
|
|
@@ -330,7 +335,7 @@ class StorageWalker {
|
|
|
330
335
|
if (acLink) {
|
|
331
336
|
const riPage = this.findChildByName(acLink, 'ri:page');
|
|
332
337
|
if (riPage) {
|
|
333
|
-
const pageTitle = decodeEntities(riPage.attribs['ri:content-title'] || '');
|
|
338
|
+
const pageTitle = this.escapeMarkdownText(decodeEntities(riPage.attribs['ri:content-title'] || ''));
|
|
334
339
|
const includeLabel = this.labels.includeSharedBlock || 'Include Shared Block';
|
|
335
340
|
const fromPageLabel = this.labels.fromPage || 'from page';
|
|
336
341
|
return `\n> 📄 **${includeLabel}**: ${blockKey} (${fromPageLabel}: ${pageTitle} [link needs manual correction])\n`;
|
|
@@ -394,7 +399,7 @@ class StorageWalker {
|
|
|
394
399
|
}
|
|
395
400
|
const riPage = this.findChildByName(node, 'ri:page');
|
|
396
401
|
if (riPage) {
|
|
397
|
-
const title = decodeEntities(riPage.attribs['ri:content-title'] || '');
|
|
402
|
+
const title = this.escapeMarkdownText(decodeEntities(riPage.attribs['ri:content-title'] || ''));
|
|
398
403
|
return `[${title}]`;
|
|
399
404
|
}
|
|
400
405
|
return '';
|
|
@@ -454,6 +459,16 @@ class StorageWalker {
|
|
|
454
459
|
return decodeEntities(this._collectText(node));
|
|
455
460
|
}
|
|
456
461
|
|
|
462
|
+
// Escape markdown structural characters in text that will be interpolated
|
|
463
|
+
// into link syntax (`[text](url)`). Confluence page titles can legitimately
|
|
464
|
+
// contain `()` / `[]`, and a maliciously-crafted title could otherwise inject
|
|
465
|
+
// a sibling link or break downstream parsers. Backslash is escaped so that
|
|
466
|
+
// an existing `\` in a title isn't reinterpreted as a markdown escape.
|
|
467
|
+
escapeMarkdownText(s) {
|
|
468
|
+
if (!s) return '';
|
|
469
|
+
return s.replace(/([\\[\]()])/g, '\\$1');
|
|
470
|
+
}
|
|
471
|
+
|
|
457
472
|
_collectText(node) {
|
|
458
473
|
if (!node) return '';
|
|
459
474
|
if (node.type === 'text') return node.data || '';
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "confluence-cli",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.10",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "confluence-cli",
|
|
9
|
-
"version": "2.1.
|
|
9
|
+
"version": "2.1.10",
|
|
10
10
|
"license": "MIT",
|
|
11
11
|
"dependencies": {
|
|
12
12
|
"axios": "^1.15.0",
|