confluence-cli 2.1.4 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/html-to-markdown.js +72 -5
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/lib/html-to-markdown.js
CHANGED
|
@@ -34,6 +34,29 @@ function htmlToMarkdown(html) {
|
|
|
34
34
|
|
|
35
35
|
markdown = markdown.replace(/<em[^>]*>(.*?)<\/em>/g, '*$1*');
|
|
36
36
|
|
|
37
|
+
// Multi-line <pre><code> → fenced block. Must run before the inline <code>
|
|
38
|
+
// rule and before catch-all tag stripping so indentation-sensitive bodies
|
|
39
|
+
// are wrapped in fences and skipped by the cleanup chain below.
|
|
40
|
+
markdown = markdown.replace(
|
|
41
|
+
/<pre[^>]*>\s*<code([^>]*)>([\s\S]*?)<\/code>\s*<\/pre>/g,
|
|
42
|
+
(_, codeAttrs, body) => {
|
|
43
|
+
// Stop the language token at whitespace so multi-class conventions
|
|
44
|
+
// like Prism / highlight.js (`class="language-js hljs"`) don't leak
|
|
45
|
+
// sibling class names into the fence info string.
|
|
46
|
+
const langMatch = codeAttrs.match(/class="language-([^"\s]+)/);
|
|
47
|
+
const lang = langMatch ? langMatch[1] : '';
|
|
48
|
+
const trimmed = body.replace(/^\n+|\n+$/g, '');
|
|
49
|
+
// Size against entity-decoded content: the entity-decode pass runs
|
|
50
|
+
// after this rule, so ``` / ``` would otherwise expose
|
|
51
|
+
// backticks inside the fence post-emission and break it.
|
|
52
|
+
const decoded = trimmed
|
|
53
|
+
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
|
|
54
|
+
.replace(/&#x([0-9a-fA-F]+);/g, (_, code) => String.fromCharCode(parseInt(code, 16)));
|
|
55
|
+
const fence = '`'.repeat(fenceLength(decoded));
|
|
56
|
+
return `\n${fence}${lang}\n${trimmed}\n${fence}\n`;
|
|
57
|
+
}
|
|
58
|
+
);
|
|
59
|
+
|
|
37
60
|
markdown = markdown.replace(/<code[^>]*>(.*?)<\/code>/g, '`$1`');
|
|
38
61
|
|
|
39
62
|
markdown = markdown.replace(/<(\w+)[^>]*>/g, '<$1>');
|
|
@@ -142,16 +165,60 @@ function htmlToMarkdown(html) {
|
|
|
142
165
|
|
|
143
166
|
markdown = markdown.replace(/&([a-zA-Z]+);/g, (match, name) => NAMED_ENTITIES[name] || match);
|
|
144
167
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
168
|
+
// Split on fenced code boundaries so cleanup rules (indent stripping,
|
|
169
|
+
// multi-space collapsing) don't mangle indentation-sensitive code.
|
|
170
|
+
// Backreference matches dynamically-sized fences emitted above when the
|
|
171
|
+
// body itself contains backticks.
|
|
172
|
+
const segments = splitOnFences(markdown);
|
|
173
|
+
markdown = segments
|
|
174
|
+
.map((seg, i) => (i % 2 === 1 ? seg : cleanupOutsideFence(seg)))
|
|
175
|
+
.join('');
|
|
150
176
|
markdown = markdown.trim();
|
|
151
177
|
|
|
152
178
|
return markdown;
|
|
153
179
|
}
|
|
154
180
|
|
|
181
|
+
// CommonMark allows fenced code with N≥3 backticks where the body contains
|
|
182
|
+
// no run of N+ backticks. Pick the smallest N satisfying both so a code
|
|
183
|
+
// block whose payload itself contains ``` does not close its own fence.
|
|
184
|
+
function fenceLength(body) {
|
|
185
|
+
let max = 0;
|
|
186
|
+
const runs = body.match(/`+/g);
|
|
187
|
+
if (runs) {
|
|
188
|
+
for (const r of runs) if (r.length > max) max = r.length;
|
|
189
|
+
}
|
|
190
|
+
return Math.max(3, max + 1);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function splitOnFences(text) {
|
|
194
|
+
// CommonMark: a fence opens on a line that starts with up to 3 spaces
|
|
195
|
+
// followed by 3+ backticks, and closes on a line of equal-length
|
|
196
|
+
// backticks followed only by whitespace. Anchoring to line boundaries
|
|
197
|
+
// (^ / $ with m flag) prevents prose backticks (e.g. <p>literal ``` x</p>)
|
|
198
|
+
// from being mis-paired with real fence boundaries.
|
|
199
|
+
const result = [];
|
|
200
|
+
const re = /^ {0,3}(`{3,})[^\n]*\n[\s\S]*?\n {0,3}\1[\t ]*$/gm;
|
|
201
|
+
let lastIdx = 0;
|
|
202
|
+
let m;
|
|
203
|
+
while ((m = re.exec(text)) !== null) {
|
|
204
|
+
result.push(text.slice(lastIdx, m.index));
|
|
205
|
+
result.push(m[0]);
|
|
206
|
+
lastIdx = m.index + m[0].length;
|
|
207
|
+
}
|
|
208
|
+
result.push(text.slice(lastIdx));
|
|
209
|
+
return result;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function cleanupOutsideFence(text) {
|
|
213
|
+
let out = text;
|
|
214
|
+
out = out.replace(/[ \t]+$/gm, '');
|
|
215
|
+
out = out.replace(/^[ \t]+(?!([`>]|[*+-] |\d+[.)] ))/gm, '');
|
|
216
|
+
out = out.replace(/^(#{1,6}[^\n]+)\n(?!\n)/gm, '$1\n\n');
|
|
217
|
+
out = out.replace(/\n\s*\n\s*\n+/g, '\n\n');
|
|
218
|
+
out = out.replace(/[ \t]+/g, ' ');
|
|
219
|
+
return out;
|
|
220
|
+
}
|
|
221
|
+
|
|
155
222
|
module.exports = {
|
|
156
223
|
htmlToMarkdown,
|
|
157
224
|
NAMED_ENTITIES
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "confluence-cli",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.6",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "confluence-cli",
|
|
9
|
-
"version": "2.1.
|
|
9
|
+
"version": "2.1.6",
|
|
10
10
|
"license": "MIT",
|
|
11
11
|
"dependencies": {
|
|
12
12
|
"axios": "^1.15.0",
|