@f-o-t/markdown 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +305 -0
- package/dist/index.d.ts +1452 -0
- package/dist/index.js +3512 -0
- package/package.json +63 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,3512 @@
|
|
|
1
|
+
// src/utils.ts
|
|
2
|
+
function detectEncoding(buffer) {
|
|
3
|
+
if (buffer.length >= 3 && buffer[0] === 239 && buffer[1] === 187 && buffer[2] === 191) {
|
|
4
|
+
return { encoding: "utf-8", bomLength: 3 };
|
|
5
|
+
}
|
|
6
|
+
if (buffer.length >= 2 && buffer[0] === 255 && buffer[1] === 254) {
|
|
7
|
+
return { encoding: "utf-16le", bomLength: 2 };
|
|
8
|
+
}
|
|
9
|
+
if (buffer.length >= 2 && buffer[0] === 254 && buffer[1] === 255) {
|
|
10
|
+
return { encoding: "utf-16be", bomLength: 2 };
|
|
11
|
+
}
|
|
12
|
+
return { encoding: "utf-8", bomLength: 0 };
|
|
13
|
+
}
|
|
14
|
+
function decodeBuffer(buffer) {
|
|
15
|
+
const { encoding, bomLength } = detectEncoding(buffer);
|
|
16
|
+
const data = bomLength > 0 ? buffer.slice(bomLength) : buffer;
|
|
17
|
+
const decoder = new TextDecoder(encoding);
|
|
18
|
+
return decoder.decode(data);
|
|
19
|
+
}
|
|
20
|
+
function detectLineEnding(content) {
|
|
21
|
+
const crlfIndex = content.indexOf(`\r
|
|
22
|
+
`);
|
|
23
|
+
const lfIndex = content.indexOf(`
|
|
24
|
+
`);
|
|
25
|
+
if (crlfIndex !== -1 && (lfIndex === -1 || crlfIndex <= lfIndex)) {
|
|
26
|
+
return `\r
|
|
27
|
+
`;
|
|
28
|
+
}
|
|
29
|
+
return `
|
|
30
|
+
`;
|
|
31
|
+
}
|
|
32
|
+
function normalizeLineEndings(content) {
|
|
33
|
+
return content.replace(/\r\n?/g, `
|
|
34
|
+
`);
|
|
35
|
+
}
|
|
36
|
+
function normalizeEscapedNewlines(content) {
|
|
37
|
+
return content.replace(/\\\\n/g, `
|
|
38
|
+
`).replace(/\\n/g, `
|
|
39
|
+
`);
|
|
40
|
+
}
|
|
41
|
+
function splitLines(content) {
|
|
42
|
+
const normalized = normalizeLineEndings(content);
|
|
43
|
+
const rawLines = normalized.split(`
|
|
44
|
+
`);
|
|
45
|
+
let offset = 0;
|
|
46
|
+
return rawLines.map((raw, index) => {
|
|
47
|
+
const indent = countIndent(raw);
|
|
48
|
+
const lineContent = raw.slice(indent);
|
|
49
|
+
const lineOffset = offset;
|
|
50
|
+
offset += raw.length + 1;
|
|
51
|
+
return {
|
|
52
|
+
raw,
|
|
53
|
+
content: lineContent,
|
|
54
|
+
indent,
|
|
55
|
+
lineNumber: index + 1,
|
|
56
|
+
isBlank: lineContent.length === 0,
|
|
57
|
+
offset: lineOffset
|
|
58
|
+
};
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
function countIndent(line) {
|
|
62
|
+
let indent = 0;
|
|
63
|
+
for (const char of line) {
|
|
64
|
+
if (char === " ") {
|
|
65
|
+
indent++;
|
|
66
|
+
} else if (char === "\t") {
|
|
67
|
+
indent = Math.ceil((indent + 1) / 4) * 4;
|
|
68
|
+
} else {
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return indent;
|
|
73
|
+
}
|
|
74
|
+
function removeIndent(line, amount) {
|
|
75
|
+
let removed = 0;
|
|
76
|
+
let i = 0;
|
|
77
|
+
while (i < line.length && removed < amount) {
|
|
78
|
+
const char = line[i];
|
|
79
|
+
if (char === " ") {
|
|
80
|
+
removed++;
|
|
81
|
+
i++;
|
|
82
|
+
} else if (char === "\t") {
|
|
83
|
+
const tabWidth = 4 - removed % 4;
|
|
84
|
+
if (removed + tabWidth <= amount) {
|
|
85
|
+
removed += tabWidth;
|
|
86
|
+
i++;
|
|
87
|
+
} else {
|
|
88
|
+
const spaces = amount - removed;
|
|
89
|
+
return " ".repeat(tabWidth - spaces) + line.slice(i + 1);
|
|
90
|
+
}
|
|
91
|
+
} else {
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return line.slice(i);
|
|
96
|
+
}
|
|
97
|
+
function isBlankLine(line) {
|
|
98
|
+
return line.trim().length === 0;
|
|
99
|
+
}
|
|
100
|
+
var ESCAPABLE = "\\!\"#$%&'()*+,-./:;<=>?@[]^_`{|}~";
|
|
101
|
+
function unescapeMarkdown(text) {
|
|
102
|
+
const parts = [];
|
|
103
|
+
let i = 0;
|
|
104
|
+
while (i < text.length) {
|
|
105
|
+
if (text[i] === "\\" && i + 1 < text.length) {
|
|
106
|
+
const nextChar = text[i + 1];
|
|
107
|
+
if (nextChar && ESCAPABLE.includes(nextChar)) {
|
|
108
|
+
parts.push(nextChar);
|
|
109
|
+
i += 2;
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
parts.push(text[i]);
|
|
114
|
+
i++;
|
|
115
|
+
}
|
|
116
|
+
return parts.join("");
|
|
117
|
+
}
|
|
118
|
+
function normalizeMarkdownEmphasis(text) {
|
|
119
|
+
return text.replace(/\\(\*{1,2})/g, "$1");
|
|
120
|
+
}
|
|
121
|
+
var HTML_ENTITIES = {
|
|
122
|
+
amp: "&",
|
|
123
|
+
lt: "<",
|
|
124
|
+
gt: ">",
|
|
125
|
+
quot: '"',
|
|
126
|
+
apos: "'",
|
|
127
|
+
nbsp: " ",
|
|
128
|
+
copy: "©",
|
|
129
|
+
reg: "®",
|
|
130
|
+
trade: "™",
|
|
131
|
+
mdash: "—",
|
|
132
|
+
ndash: "–",
|
|
133
|
+
hellip: "…",
|
|
134
|
+
lsquo: "‘",
|
|
135
|
+
rsquo: "’",
|
|
136
|
+
ldquo: "“",
|
|
137
|
+
rdquo: "”"
|
|
138
|
+
};
|
|
139
|
+
var HTML_ENTITY_REGEX = /&(?:#(\d+)|#[xX]([0-9a-fA-F]+)|([a-zA-Z]+));/g;
|
|
140
|
+
function decodeHtmlEntities(text) {
|
|
141
|
+
return text.replace(HTML_ENTITY_REGEX, (match, decimal, hex, named) => {
|
|
142
|
+
if (decimal) {
|
|
143
|
+
const num = Number.parseInt(decimal, 10);
|
|
144
|
+
return num > 0 && num < 1114111 ? String.fromCodePoint(num) : match;
|
|
145
|
+
}
|
|
146
|
+
if (hex) {
|
|
147
|
+
const num = Number.parseInt(hex, 16);
|
|
148
|
+
return num > 0 && num < 1114111 ? String.fromCodePoint(num) : match;
|
|
149
|
+
}
|
|
150
|
+
if (named) {
|
|
151
|
+
return HTML_ENTITIES[named] ?? match;
|
|
152
|
+
}
|
|
153
|
+
return match;
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
function encodeHtmlEntities(text) {
|
|
157
|
+
return text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
158
|
+
}
|
|
159
|
+
function encodeUrl(url) {
|
|
160
|
+
return url.replace(/%/g, "%25").replace(/ /g, "%20").replace(/\(/g, "%28").replace(/\)/g, "%29");
|
|
161
|
+
}
|
|
162
|
+
function normalizeLabel(label) {
|
|
163
|
+
return label.trim().toLowerCase().replace(/[\t\n\r ]+/g, " ");
|
|
164
|
+
}
|
|
165
|
+
function repeat(str, count) {
|
|
166
|
+
return str.repeat(Math.max(0, count));
|
|
167
|
+
}
|
|
168
|
+
var ATX_HEADING_REGEX = /^(#{1,6})(?:[ \t]+(.*))?$/;
|
|
169
|
+
var SETEXT_HEADING_REGEX = /^(=+|-+)[ \t]*$/;
|
|
170
|
+
var THEMATIC_BREAK_REGEX = /^(?:(?:\*[ \t]*){3,}|(?:-[ \t]*){3,}|(?:_[ \t]*){3,})$/;
|
|
171
|
+
var FENCED_CODE_OPEN_REGEX = /^(`{3,}|~{3,})[ \t]*([^`\n]*)?$/;
|
|
172
|
+
var ORDERED_LIST_REGEX = /^(\d{1,9})([.)])[ \t]+/;
|
|
173
|
+
var UNORDERED_LIST_REGEX = /^([-*+])[ \t]+/;
|
|
174
|
+
var BLOCKQUOTE_REGEX = /^>[ \t]?/;
|
|
175
|
+
var LINK_REFERENCE_REGEX = /^\[([^\]]+)\]:[ \t]*<?([^\s>]+)>?(?:[ \t]+(?:"([^"]*)"|'([^']*)'|\(([^)]*)\)))?[ \t]*$/;
|
|
176
|
+
var AUTOLINK_REGEX = /^<([a-zA-Z][a-zA-Z0-9+.-]{1,31}:[^\s<>]*)>$/;
|
|
177
|
+
var EMAIL_AUTOLINK_REGEX = /^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>$/;
|
|
178
|
+
var HTML_BLOCK_1_OPEN = /^<(?:script|pre|style|textarea)(?:\s|>|$)/i;
|
|
179
|
+
var HTML_BLOCK_1_CLOSE = /<\/(?:script|pre|style|textarea)>/i;
|
|
180
|
+
var HTML_BLOCK_2_OPEN = /^<!--/;
|
|
181
|
+
var HTML_BLOCK_2_CLOSE = /-->/;
|
|
182
|
+
var HTML_BLOCK_3_OPEN = /^<\?/;
|
|
183
|
+
var HTML_BLOCK_3_CLOSE = /\?>/;
|
|
184
|
+
var HTML_BLOCK_4_OPEN = /^<![A-Z]/;
|
|
185
|
+
var HTML_BLOCK_4_CLOSE = />/;
|
|
186
|
+
var HTML_BLOCK_5_OPEN = /^<!\[CDATA\[/;
|
|
187
|
+
var HTML_BLOCK_5_CLOSE = /\]\]>/;
|
|
188
|
+
var HTML_BLOCK_6_OPEN = /^<\/?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|search|section|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s|\/?>|$)/i;
|
|
189
|
+
var HTML_BLOCK_7_OPEN = /^(?:<[a-zA-Z][a-zA-Z0-9-]*(?:\s+[a-zA-Z_:][a-zA-Z0-9_.:-]*(?:\s*=\s*(?:[^"'=<>`\s]+|'[^']*'|"[^"]*"))?)*\s*\/?>|<\/[a-zA-Z][a-zA-Z0-9-]*\s*>)[ \t]*$/;
|
|
190
|
+
function getHtmlBlockType(line) {
|
|
191
|
+
if (HTML_BLOCK_1_OPEN.test(line))
|
|
192
|
+
return 1;
|
|
193
|
+
if (HTML_BLOCK_2_OPEN.test(line))
|
|
194
|
+
return 2;
|
|
195
|
+
if (HTML_BLOCK_3_OPEN.test(line))
|
|
196
|
+
return 3;
|
|
197
|
+
if (HTML_BLOCK_4_OPEN.test(line))
|
|
198
|
+
return 4;
|
|
199
|
+
if (HTML_BLOCK_5_OPEN.test(line))
|
|
200
|
+
return 5;
|
|
201
|
+
if (HTML_BLOCK_6_OPEN.test(line))
|
|
202
|
+
return 6;
|
|
203
|
+
if (HTML_BLOCK_7_OPEN.test(line))
|
|
204
|
+
return 7;
|
|
205
|
+
return 0;
|
|
206
|
+
}
|
|
207
|
+
function closesHtmlBlock(line, type) {
|
|
208
|
+
switch (type) {
|
|
209
|
+
case 1:
|
|
210
|
+
return HTML_BLOCK_1_CLOSE.test(line);
|
|
211
|
+
case 2:
|
|
212
|
+
return HTML_BLOCK_2_CLOSE.test(line);
|
|
213
|
+
case 3:
|
|
214
|
+
return HTML_BLOCK_3_CLOSE.test(line);
|
|
215
|
+
case 4:
|
|
216
|
+
return HTML_BLOCK_4_CLOSE.test(line);
|
|
217
|
+
case 5:
|
|
218
|
+
return HTML_BLOCK_5_CLOSE.test(line);
|
|
219
|
+
case 6:
|
|
220
|
+
case 7:
|
|
221
|
+
return isBlankLine(line);
|
|
222
|
+
default:
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// src/inline-parser.ts
|
|
228
|
+
function tokenize(text) {
|
|
229
|
+
const tokens = [];
|
|
230
|
+
let i = 0;
|
|
231
|
+
let textStart = 0;
|
|
232
|
+
const pushText = (end) => {
|
|
233
|
+
if (end > textStart) {
|
|
234
|
+
tokens.push({
|
|
235
|
+
type: "text",
|
|
236
|
+
value: text.slice(textStart, end),
|
|
237
|
+
start: textStart,
|
|
238
|
+
end
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
};
|
|
242
|
+
while (i < text.length) {
|
|
243
|
+
const char = text[i];
|
|
244
|
+
if (char === "\\") {
|
|
245
|
+
if (i + 1 < text.length) {
|
|
246
|
+
const nextChar = text[i + 1];
|
|
247
|
+
if (nextChar === `
|
|
248
|
+
`) {
|
|
249
|
+
pushText(i);
|
|
250
|
+
tokens.push({ type: "hardBreak", start: i, end: i + 2 });
|
|
251
|
+
i += 2;
|
|
252
|
+
textStart = i;
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
if (nextChar && "\\!\"#$%&'()*+,-./:;<=>?@[]^_`{|}~".includes(nextChar)) {
|
|
256
|
+
pushText(i);
|
|
257
|
+
tokens.push({
|
|
258
|
+
type: "text",
|
|
259
|
+
value: nextChar,
|
|
260
|
+
start: i,
|
|
261
|
+
end: i + 2
|
|
262
|
+
});
|
|
263
|
+
i += 2;
|
|
264
|
+
textStart = i;
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
i++;
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
if (char === "`") {
|
|
272
|
+
const codeSpan = parseCodeSpan(text, i);
|
|
273
|
+
if (codeSpan) {
|
|
274
|
+
pushText(i);
|
|
275
|
+
tokens.push(codeSpan);
|
|
276
|
+
i = codeSpan.end;
|
|
277
|
+
textStart = i;
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
i++;
|
|
281
|
+
continue;
|
|
282
|
+
}
|
|
283
|
+
if (char === "<") {
|
|
284
|
+
const autolink = parseAutolink(text, i);
|
|
285
|
+
if (autolink) {
|
|
286
|
+
pushText(i);
|
|
287
|
+
tokens.push(autolink);
|
|
288
|
+
i = autolink.end;
|
|
289
|
+
textStart = i;
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
const htmlInline = parseHtmlInline(text, i);
|
|
293
|
+
if (htmlInline) {
|
|
294
|
+
pushText(i);
|
|
295
|
+
tokens.push(htmlInline);
|
|
296
|
+
i = htmlInline.end;
|
|
297
|
+
textStart = i;
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
i++;
|
|
301
|
+
continue;
|
|
302
|
+
}
|
|
303
|
+
if (char === `
|
|
304
|
+
`) {
|
|
305
|
+
pushText(i);
|
|
306
|
+
const prevText = text.slice(textStart, i);
|
|
307
|
+
if (prevText.endsWith(" ")) {
|
|
308
|
+
if (tokens.length > 0 && tokens[tokens.length - 1]?.type === "text") {
|
|
309
|
+
const lastToken = tokens[tokens.length - 1];
|
|
310
|
+
lastToken.value = lastToken.value.replace(/ {2,}$/, "");
|
|
311
|
+
}
|
|
312
|
+
tokens.push({ type: "hardBreak", start: i, end: i + 1 });
|
|
313
|
+
} else {
|
|
314
|
+
tokens.push({ type: "softBreak", start: i, end: i + 1 });
|
|
315
|
+
}
|
|
316
|
+
i++;
|
|
317
|
+
textStart = i;
|
|
318
|
+
continue;
|
|
319
|
+
}
|
|
320
|
+
if (char === "*" || char === "_") {
|
|
321
|
+
const run = parseDelimiterRun(text, i, char);
|
|
322
|
+
if (run) {
|
|
323
|
+
pushText(i);
|
|
324
|
+
tokens.push(run);
|
|
325
|
+
i = run.end;
|
|
326
|
+
textStart = i;
|
|
327
|
+
continue;
|
|
328
|
+
}
|
|
329
|
+
i++;
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
332
|
+
if (char === "[") {
|
|
333
|
+
pushText(i);
|
|
334
|
+
const isImage = i > 0 && text[i - 1] === "!";
|
|
335
|
+
if (isImage && tokens.length > 0) {
|
|
336
|
+
const lastToken = tokens[tokens.length - 1];
|
|
337
|
+
if (lastToken?.type === "text" && lastToken.value.endsWith("!")) {
|
|
338
|
+
lastToken.value = lastToken.value.slice(0, -1);
|
|
339
|
+
if (lastToken.value === "") {
|
|
340
|
+
tokens.pop();
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
tokens.push({
|
|
345
|
+
type: "openBracket",
|
|
346
|
+
isImage,
|
|
347
|
+
start: isImage ? i - 1 : i,
|
|
348
|
+
end: i + 1
|
|
349
|
+
});
|
|
350
|
+
i++;
|
|
351
|
+
textStart = i;
|
|
352
|
+
continue;
|
|
353
|
+
}
|
|
354
|
+
if (char === "]") {
|
|
355
|
+
pushText(i);
|
|
356
|
+
tokens.push({ type: "closeBracket", start: i, end: i + 1 });
|
|
357
|
+
i++;
|
|
358
|
+
textStart = i;
|
|
359
|
+
if (i < text.length && text[i] === "(") {
|
|
360
|
+
const linkInfo = parseLinkDestination(text, i);
|
|
361
|
+
if (linkInfo) {
|
|
362
|
+
tokens.push(linkInfo);
|
|
363
|
+
i = linkInfo.end;
|
|
364
|
+
textStart = i;
|
|
365
|
+
}
|
|
366
|
+
} else if (i < text.length && text[i] === "[") {
|
|
367
|
+
const refEnd = text.indexOf("]", i + 1);
|
|
368
|
+
if (refEnd > i + 1) {
|
|
369
|
+
const ref = text.slice(i + 1, refEnd);
|
|
370
|
+
tokens.push({
|
|
371
|
+
type: "linkInfo",
|
|
372
|
+
url: "",
|
|
373
|
+
title: ref,
|
|
374
|
+
start: i,
|
|
375
|
+
end: refEnd + 1
|
|
376
|
+
});
|
|
377
|
+
i = refEnd + 1;
|
|
378
|
+
textStart = i;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
continue;
|
|
382
|
+
}
|
|
383
|
+
i++;
|
|
384
|
+
}
|
|
385
|
+
pushText(text.length);
|
|
386
|
+
return tokens;
|
|
387
|
+
}
|
|
388
|
+
function parseCodeSpan(text, start) {
|
|
389
|
+
let backticks = 0;
|
|
390
|
+
let i = start;
|
|
391
|
+
while (i < text.length && text[i] === "`") {
|
|
392
|
+
backticks++;
|
|
393
|
+
i++;
|
|
394
|
+
}
|
|
395
|
+
if (backticks === 0)
|
|
396
|
+
return null;
|
|
397
|
+
const closingPattern = "`".repeat(backticks);
|
|
398
|
+
let searchStart = i;
|
|
399
|
+
while (searchStart < text.length) {
|
|
400
|
+
const closingIndex = text.indexOf(closingPattern, searchStart);
|
|
401
|
+
if (closingIndex === -1)
|
|
402
|
+
return null;
|
|
403
|
+
const afterClosing = closingIndex + backticks;
|
|
404
|
+
if ((closingIndex === searchStart || text[closingIndex - 1] !== "`") && (afterClosing >= text.length || text[afterClosing] !== "`")) {
|
|
405
|
+
let content = text.slice(i, closingIndex);
|
|
406
|
+
content = content.replace(/\n/g, " ");
|
|
407
|
+
if (content.length >= 2 && content.startsWith(" ") && content.endsWith(" ") && !/^ +$/.test(content)) {
|
|
408
|
+
content = content.slice(1, -1);
|
|
409
|
+
}
|
|
410
|
+
return {
|
|
411
|
+
type: "code",
|
|
412
|
+
value: content,
|
|
413
|
+
start,
|
|
414
|
+
end: afterClosing
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
searchStart = closingIndex + 1;
|
|
418
|
+
}
|
|
419
|
+
return null;
|
|
420
|
+
}
|
|
421
|
+
function parseAutolink(text, start) {
|
|
422
|
+
if (text[start] !== "<")
|
|
423
|
+
return null;
|
|
424
|
+
const closingIndex = text.indexOf(">", start + 1);
|
|
425
|
+
if (closingIndex === -1)
|
|
426
|
+
return null;
|
|
427
|
+
const content = text.slice(start, closingIndex + 1);
|
|
428
|
+
const urlMatch = AUTOLINK_REGEX.exec(content);
|
|
429
|
+
if (urlMatch && urlMatch[1]) {
|
|
430
|
+
return {
|
|
431
|
+
type: "autolink",
|
|
432
|
+
url: urlMatch[1],
|
|
433
|
+
isEmail: false,
|
|
434
|
+
start,
|
|
435
|
+
end: closingIndex + 1
|
|
436
|
+
};
|
|
437
|
+
}
|
|
438
|
+
const emailMatch = EMAIL_AUTOLINK_REGEX.exec(content);
|
|
439
|
+
if (emailMatch && emailMatch[1]) {
|
|
440
|
+
return {
|
|
441
|
+
type: "autolink",
|
|
442
|
+
url: `mailto:${emailMatch[1]}`,
|
|
443
|
+
isEmail: true,
|
|
444
|
+
start,
|
|
445
|
+
end: closingIndex + 1
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
return null;
|
|
449
|
+
}
|
|
450
|
+
function parseHtmlInline(text, start) {
|
|
451
|
+
if (text[start] !== "<")
|
|
452
|
+
return null;
|
|
453
|
+
const remaining = text.slice(start);
|
|
454
|
+
const openTagMatch = /^<[a-zA-Z][a-zA-Z0-9-]*(?:\s+[a-zA-Z_:][a-zA-Z0-9_.:-]*(?:\s*=\s*(?:[^"'=<>`\s]+|'[^']*'|"[^"]*"))?)*\s*\/?>/.exec(remaining);
|
|
455
|
+
if (openTagMatch) {
|
|
456
|
+
return {
|
|
457
|
+
type: "htmlInline",
|
|
458
|
+
value: openTagMatch[0],
|
|
459
|
+
start,
|
|
460
|
+
end: start + openTagMatch[0].length
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
const closeTagMatch = /^<\/[a-zA-Z][a-zA-Z0-9-]*\s*>/.exec(remaining);
|
|
464
|
+
if (closeTagMatch) {
|
|
465
|
+
return {
|
|
466
|
+
type: "htmlInline",
|
|
467
|
+
value: closeTagMatch[0],
|
|
468
|
+
start,
|
|
469
|
+
end: start + closeTagMatch[0].length
|
|
470
|
+
};
|
|
471
|
+
}
|
|
472
|
+
const commentMatch = /^<!--(?!-?>)(?:[^-]|-(?!-))*-->/.exec(remaining);
|
|
473
|
+
if (commentMatch) {
|
|
474
|
+
return {
|
|
475
|
+
type: "htmlInline",
|
|
476
|
+
value: commentMatch[0],
|
|
477
|
+
start,
|
|
478
|
+
end: start + commentMatch[0].length
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
const piMatch = /^<\?.*?\?>/.exec(remaining);
|
|
482
|
+
if (piMatch) {
|
|
483
|
+
return {
|
|
484
|
+
type: "htmlInline",
|
|
485
|
+
value: piMatch[0],
|
|
486
|
+
start,
|
|
487
|
+
end: start + piMatch[0].length
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
const declMatch = /^<![A-Z]+\s+[^>]*>/.exec(remaining);
|
|
491
|
+
if (declMatch) {
|
|
492
|
+
return {
|
|
493
|
+
type: "htmlInline",
|
|
494
|
+
value: declMatch[0],
|
|
495
|
+
start,
|
|
496
|
+
end: start + declMatch[0].length
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
const cdataMatch = /^<!\[CDATA\[[\s\S]*?\]\]>/.exec(remaining);
|
|
500
|
+
if (cdataMatch) {
|
|
501
|
+
return {
|
|
502
|
+
type: "htmlInline",
|
|
503
|
+
value: cdataMatch[0],
|
|
504
|
+
start,
|
|
505
|
+
end: start + cdataMatch[0].length
|
|
506
|
+
};
|
|
507
|
+
}
|
|
508
|
+
return null;
|
|
509
|
+
}
|
|
510
|
+
function parseDelimiterRun(text, start, char) {
|
|
511
|
+
let count = 0;
|
|
512
|
+
let i = start;
|
|
513
|
+
while (i < text.length && text[i] === char) {
|
|
514
|
+
count++;
|
|
515
|
+
i++;
|
|
516
|
+
}
|
|
517
|
+
if (count === 0)
|
|
518
|
+
return null;
|
|
519
|
+
const before = start > 0 ? text[start - 1] : " ";
|
|
520
|
+
const after = i < text.length ? text[i] : " ";
|
|
521
|
+
const beforeWhitespace = /\s/.test(before ?? " ");
|
|
522
|
+
const afterWhitespace = /\s/.test(after ?? " ");
|
|
523
|
+
const beforePunct = /[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]/.test(before ?? "");
|
|
524
|
+
const afterPunct = /[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]/.test(after ?? "");
|
|
525
|
+
const leftFlanking = !afterWhitespace && (!afterPunct || beforeWhitespace || beforePunct);
|
|
526
|
+
const rightFlanking = !beforeWhitespace && (!beforePunct || afterWhitespace || afterPunct);
|
|
527
|
+
let canOpen;
|
|
528
|
+
let canClose;
|
|
529
|
+
if (char === "*") {
|
|
530
|
+
canOpen = leftFlanking;
|
|
531
|
+
canClose = rightFlanking;
|
|
532
|
+
} else {
|
|
533
|
+
canOpen = leftFlanking && (!rightFlanking || beforePunct);
|
|
534
|
+
canClose = rightFlanking && (!leftFlanking || afterPunct);
|
|
535
|
+
}
|
|
536
|
+
return {
|
|
537
|
+
type: "delimiterRun",
|
|
538
|
+
char,
|
|
539
|
+
count,
|
|
540
|
+
start,
|
|
541
|
+
end: i,
|
|
542
|
+
canOpen,
|
|
543
|
+
canClose
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
function parseLinkDestination(text, start) {
|
|
547
|
+
if (text[start] !== "(")
|
|
548
|
+
return null;
|
|
549
|
+
let i = start + 1;
|
|
550
|
+
while (i < text.length && /\s/.test(text[i] ?? "")) {
|
|
551
|
+
i++;
|
|
552
|
+
}
|
|
553
|
+
if (i >= text.length)
|
|
554
|
+
return null;
|
|
555
|
+
let url = "";
|
|
556
|
+
let title;
|
|
557
|
+
if (text[i] === "<") {
|
|
558
|
+
const closeBracket = text.indexOf(">", i + 1);
|
|
559
|
+
if (closeBracket === -1)
|
|
560
|
+
return null;
|
|
561
|
+
const urlContent = text.slice(i + 1, closeBracket);
|
|
562
|
+
if (urlContent.includes("<") || urlContent.includes(`
|
|
563
|
+
`))
|
|
564
|
+
return null;
|
|
565
|
+
url = urlContent;
|
|
566
|
+
i = closeBracket + 1;
|
|
567
|
+
} else {
|
|
568
|
+
let parenDepth = 0;
|
|
569
|
+
const urlStart = i;
|
|
570
|
+
while (i < text.length) {
|
|
571
|
+
const char = text[i];
|
|
572
|
+
if (char === " " || char === "\t" || char === `
|
|
573
|
+
`)
|
|
574
|
+
break;
|
|
575
|
+
if (char === "(") {
|
|
576
|
+
parenDepth++;
|
|
577
|
+
} else if (char === ")") {
|
|
578
|
+
if (parenDepth === 0)
|
|
579
|
+
break;
|
|
580
|
+
parenDepth--;
|
|
581
|
+
}
|
|
582
|
+
if (char === "\\" && i + 1 < text.length) {
|
|
583
|
+
i += 2;
|
|
584
|
+
continue;
|
|
585
|
+
}
|
|
586
|
+
if (char && char.charCodeAt(0) < 32)
|
|
587
|
+
return null;
|
|
588
|
+
i++;
|
|
589
|
+
}
|
|
590
|
+
url = text.slice(urlStart, i);
|
|
591
|
+
}
|
|
592
|
+
while (i < text.length && /\s/.test(text[i] ?? "")) {
|
|
593
|
+
i++;
|
|
594
|
+
}
|
|
595
|
+
if (i < text.length && (text[i] === '"' || text[i] === "'" || text[i] === "(")) {
|
|
596
|
+
const titleChar = text[i] === "(" ? ")" : text[i];
|
|
597
|
+
const titleStart = i + 1;
|
|
598
|
+
i++;
|
|
599
|
+
let escaped = false;
|
|
600
|
+
while (i < text.length) {
|
|
601
|
+
if (escaped) {
|
|
602
|
+
escaped = false;
|
|
603
|
+
i++;
|
|
604
|
+
continue;
|
|
605
|
+
}
|
|
606
|
+
if (text[i] === "\\") {
|
|
607
|
+
escaped = true;
|
|
608
|
+
i++;
|
|
609
|
+
continue;
|
|
610
|
+
}
|
|
611
|
+
if (text[i] === titleChar) {
|
|
612
|
+
title = text.slice(titleStart, i);
|
|
613
|
+
i++;
|
|
614
|
+
break;
|
|
615
|
+
}
|
|
616
|
+
i++;
|
|
617
|
+
}
|
|
618
|
+
if (title === undefined)
|
|
619
|
+
return null;
|
|
620
|
+
}
|
|
621
|
+
while (i < text.length && /\s/.test(text[i] ?? "")) {
|
|
622
|
+
i++;
|
|
623
|
+
}
|
|
624
|
+
if (text[i] !== ")")
|
|
625
|
+
return null;
|
|
626
|
+
return {
|
|
627
|
+
type: "linkInfo",
|
|
628
|
+
url: decodeHtmlEntities(unescapeMarkdown(url)),
|
|
629
|
+
title: title ? decodeHtmlEntities(unescapeMarkdown(title)) : undefined,
|
|
630
|
+
start,
|
|
631
|
+
end: i + 1
|
|
632
|
+
};
|
|
633
|
+
}
|
|
634
|
+
function findEmphasisMatches(delimiters) {
|
|
635
|
+
const matches = [];
|
|
636
|
+
for (let closeIdx = 0;closeIdx < delimiters.length; closeIdx++) {
|
|
637
|
+
const closer = delimiters[closeIdx];
|
|
638
|
+
if (!closer || !closer.canClose || closer.count === 0)
|
|
639
|
+
continue;
|
|
640
|
+
for (let openIdx = closeIdx - 1;openIdx >= 0; openIdx--) {
|
|
641
|
+
const opener = delimiters[openIdx];
|
|
642
|
+
if (!opener || !opener.canOpen || opener.count === 0)
|
|
643
|
+
continue;
|
|
644
|
+
if (opener.char !== closer.char)
|
|
645
|
+
continue;
|
|
646
|
+
if (opener.canOpen && opener.canClose || closer.canOpen && closer.canClose) {
|
|
647
|
+
if ((opener.count + closer.count) % 3 === 0) {
|
|
648
|
+
if (opener.count % 3 !== 0 || closer.count % 3 !== 0) {
|
|
649
|
+
continue;
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
const useStrong = opener.count >= 2 && closer.count >= 2;
|
|
654
|
+
const used = useStrong ? 2 : 1;
|
|
655
|
+
matches.push({
|
|
656
|
+
openerIdx: openIdx,
|
|
657
|
+
closerIdx: closeIdx,
|
|
658
|
+
count: used
|
|
659
|
+
});
|
|
660
|
+
opener.count -= used;
|
|
661
|
+
closer.count -= used;
|
|
662
|
+
if (closer.count > 0) {}
|
|
663
|
+
break;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
return matches;
|
|
667
|
+
}
|
|
668
|
+
function tokensToNodes(tokens, references) {
|
|
669
|
+
const delimiters = [];
|
|
670
|
+
for (let i = 0;i < tokens.length; i++) {
|
|
671
|
+
const token = tokens[i];
|
|
672
|
+
if (token?.type === "delimiterRun") {
|
|
673
|
+
delimiters.push({
|
|
674
|
+
char: token.char,
|
|
675
|
+
count: token.count,
|
|
676
|
+
position: i,
|
|
677
|
+
canOpen: token.canOpen,
|
|
678
|
+
canClose: token.canClose,
|
|
679
|
+
active: true,
|
|
680
|
+
tokenIndex: i
|
|
681
|
+
});
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
const matches = findEmphasisMatches(delimiters);
|
|
685
|
+
matches.sort((a, b) => b.openerIdx - a.openerIdx);
|
|
686
|
+
const bracketStack = [];
|
|
687
|
+
const usedDelimiters = new Map;
|
|
688
|
+
for (const match of matches) {
|
|
689
|
+
const opener = delimiters[match.openerIdx];
|
|
690
|
+
const closer = delimiters[match.closerIdx];
|
|
691
|
+
if (!opener || !closer)
|
|
692
|
+
continue;
|
|
693
|
+
const openerUsed = usedDelimiters.get(opener.tokenIndex) ?? [];
|
|
694
|
+
openerUsed.push(match.count);
|
|
695
|
+
usedDelimiters.set(opener.tokenIndex, openerUsed);
|
|
696
|
+
const closerUsed = usedDelimiters.get(closer.tokenIndex) ?? [];
|
|
697
|
+
closerUsed.push(match.count);
|
|
698
|
+
usedDelimiters.set(closer.tokenIndex, closerUsed);
|
|
699
|
+
}
|
|
700
|
+
function processTokenRange(start, end, _emphasisStack) {
|
|
701
|
+
const result = [];
|
|
702
|
+
let i = start;
|
|
703
|
+
while (i < end) {
|
|
704
|
+
const token = tokens[i];
|
|
705
|
+
if (!token) {
|
|
706
|
+
i++;
|
|
707
|
+
continue;
|
|
708
|
+
}
|
|
709
|
+
switch (token.type) {
|
|
710
|
+
case "text":
|
|
711
|
+
result.push({
|
|
712
|
+
type: "text",
|
|
713
|
+
value: decodeHtmlEntities(unescapeMarkdown(token.value))
|
|
714
|
+
});
|
|
715
|
+
break;
|
|
716
|
+
case "code":
|
|
717
|
+
result.push({
|
|
718
|
+
type: "codeSpan",
|
|
719
|
+
value: token.value
|
|
720
|
+
});
|
|
721
|
+
break;
|
|
722
|
+
case "hardBreak":
|
|
723
|
+
result.push({ type: "hardBreak" });
|
|
724
|
+
break;
|
|
725
|
+
case "softBreak":
|
|
726
|
+
result.push({ type: "softBreak" });
|
|
727
|
+
break;
|
|
728
|
+
case "autolink":
|
|
729
|
+
result.push({
|
|
730
|
+
type: "link",
|
|
731
|
+
url: token.url,
|
|
732
|
+
children: [
|
|
733
|
+
{ type: "text", value: token.url.replace(/^mailto:/, "") }
|
|
734
|
+
]
|
|
735
|
+
});
|
|
736
|
+
break;
|
|
737
|
+
case "htmlInline":
|
|
738
|
+
result.push({
|
|
739
|
+
type: "htmlInline",
|
|
740
|
+
value: token.value
|
|
741
|
+
});
|
|
742
|
+
break;
|
|
743
|
+
case "delimiterRun": {
|
|
744
|
+
const delimIdx = delimiters.findIndex((d) => d.tokenIndex === i);
|
|
745
|
+
const delim = delimiters[delimIdx];
|
|
746
|
+
if (delim) {
|
|
747
|
+
let remaining = token.count;
|
|
748
|
+
for (const match of matches) {
|
|
749
|
+
if (match.openerIdx === delimIdx && remaining >= match.count) {
|
|
750
|
+
const closerDelim = delimiters[match.closerIdx];
|
|
751
|
+
if (closerDelim) {
|
|
752
|
+
const closerTokenIdx = closerDelim.tokenIndex;
|
|
753
|
+
const innerNodes = processTokenRange(i + 1, closerTokenIdx, []);
|
|
754
|
+
const emphNode = match.count === 2 ? {
|
|
755
|
+
type: "strong",
|
|
756
|
+
children: innerNodes,
|
|
757
|
+
marker: token.char + token.char
|
|
758
|
+
} : {
|
|
759
|
+
type: "emphasis",
|
|
760
|
+
children: innerNodes,
|
|
761
|
+
marker: token.char
|
|
762
|
+
};
|
|
763
|
+
result.push(emphNode);
|
|
764
|
+
remaining -= match.count;
|
|
765
|
+
i = closerTokenIdx;
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
if (remaining > 0) {
|
|
770
|
+
result.push({
|
|
771
|
+
type: "text",
|
|
772
|
+
value: token.char.repeat(remaining)
|
|
773
|
+
});
|
|
774
|
+
}
|
|
775
|
+
} else {
|
|
776
|
+
result.push({
|
|
777
|
+
type: "text",
|
|
778
|
+
value: token.char.repeat(token.count)
|
|
779
|
+
});
|
|
780
|
+
}
|
|
781
|
+
break;
|
|
782
|
+
}
|
|
783
|
+
case "openBracket":
|
|
784
|
+
bracketStack.push({
|
|
785
|
+
position: result.length,
|
|
786
|
+
isImage: token.isImage,
|
|
787
|
+
delimiterIndex: -1,
|
|
788
|
+
active: true
|
|
789
|
+
});
|
|
790
|
+
result.push({ type: "text", value: token.isImage ? "![" : "[" });
|
|
791
|
+
break;
|
|
792
|
+
case "closeBracket": {
|
|
793
|
+
let matched = false;
|
|
794
|
+
for (let j = bracketStack.length - 1;j >= 0; j--) {
|
|
795
|
+
const bracket = bracketStack[j];
|
|
796
|
+
if (!bracket || !bracket.active)
|
|
797
|
+
continue;
|
|
798
|
+
const nextToken = tokens[i + 1];
|
|
799
|
+
if (nextToken?.type === "linkInfo") {
|
|
800
|
+
const linkContent = result.slice(bracket.position + 1);
|
|
801
|
+
result.length = bracket.position;
|
|
802
|
+
let url = nextToken.url;
|
|
803
|
+
let title = nextToken.title;
|
|
804
|
+
if (!url && title) {
|
|
805
|
+
const ref = references.get(normalizeLabel(title));
|
|
806
|
+
if (ref) {
|
|
807
|
+
url = ref.url;
|
|
808
|
+
title = ref.title;
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
if (bracket.isImage) {
|
|
812
|
+
const alt = linkContent.map((n) => {
|
|
813
|
+
if (n.type === "text")
|
|
814
|
+
return n.value;
|
|
815
|
+
if (n.type === "codeSpan")
|
|
816
|
+
return n.value;
|
|
817
|
+
return "";
|
|
818
|
+
}).join("");
|
|
819
|
+
result.push({
|
|
820
|
+
type: "image",
|
|
821
|
+
alt,
|
|
822
|
+
url,
|
|
823
|
+
title
|
|
824
|
+
});
|
|
825
|
+
} else {
|
|
826
|
+
result.push({
|
|
827
|
+
type: "link",
|
|
828
|
+
url,
|
|
829
|
+
title,
|
|
830
|
+
children: linkContent
|
|
831
|
+
});
|
|
832
|
+
}
|
|
833
|
+
i++;
|
|
834
|
+
matched = true;
|
|
835
|
+
} else {
|
|
836
|
+
const linkText = result.slice(bracket.position + 1).map((n) => n.type === "text" ? n.value : "").join("");
|
|
837
|
+
const ref = references.get(normalizeLabel(linkText));
|
|
838
|
+
if (ref) {
|
|
839
|
+
const linkContent = result.slice(bracket.position + 1);
|
|
840
|
+
result.length = bracket.position;
|
|
841
|
+
if (bracket.isImage) {
|
|
842
|
+
result.push({
|
|
843
|
+
type: "image",
|
|
844
|
+
alt: linkText,
|
|
845
|
+
url: ref.url,
|
|
846
|
+
title: ref.title
|
|
847
|
+
});
|
|
848
|
+
} else {
|
|
849
|
+
result.push({
|
|
850
|
+
type: "link",
|
|
851
|
+
url: ref.url,
|
|
852
|
+
title: ref.title,
|
|
853
|
+
children: linkContent
|
|
854
|
+
});
|
|
855
|
+
}
|
|
856
|
+
matched = true;
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
bracket.active = false;
|
|
860
|
+
break;
|
|
861
|
+
}
|
|
862
|
+
if (!matched) {
|
|
863
|
+
result.push({ type: "text", value: "]" });
|
|
864
|
+
}
|
|
865
|
+
break;
|
|
866
|
+
}
|
|
867
|
+
case "linkInfo":
|
|
868
|
+
result.push({ type: "text", value: `(${token.url})` });
|
|
869
|
+
break;
|
|
870
|
+
}
|
|
871
|
+
i++;
|
|
872
|
+
}
|
|
873
|
+
return mergeTextNodes(result);
|
|
874
|
+
}
|
|
875
|
+
return processTokenRange(0, tokens.length, []);
|
|
876
|
+
}
|
|
877
|
+
function mergeTextNodes(nodes) {
|
|
878
|
+
const result = [];
|
|
879
|
+
for (const node of nodes) {
|
|
880
|
+
if (node.type === "text" && result.length > 0) {
|
|
881
|
+
const last = result[result.length - 1];
|
|
882
|
+
if (last?.type === "text") {
|
|
883
|
+
last.value += node.value;
|
|
884
|
+
continue;
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
result.push(node);
|
|
888
|
+
}
|
|
889
|
+
return result;
|
|
890
|
+
}
|
|
891
|
+
function parseInline(text, references = new Map) {
|
|
892
|
+
if (!text)
|
|
893
|
+
return [];
|
|
894
|
+
const tokens = tokenize(text);
|
|
895
|
+
return tokensToNodes(tokens, references);
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
// src/block-parser.ts
|
|
899
|
+
function createBlockContext() {
|
|
900
|
+
return {
|
|
901
|
+
lineNumber: 1,
|
|
902
|
+
column: 1,
|
|
903
|
+
indent: 0,
|
|
904
|
+
tight: true,
|
|
905
|
+
inBlockquote: false,
|
|
906
|
+
listDepth: 0,
|
|
907
|
+
blockquoteDepth: 0,
|
|
908
|
+
references: new Map
|
|
909
|
+
};
|
|
910
|
+
}
|
|
911
|
+
function createPosition(startLine, endLine) {
|
|
912
|
+
return {
|
|
913
|
+
startLine: startLine.lineNumber,
|
|
914
|
+
startColumn: 1,
|
|
915
|
+
endLine: endLine.lineNumber,
|
|
916
|
+
endColumn: endLine.raw.length + 1,
|
|
917
|
+
startOffset: startLine.offset,
|
|
918
|
+
endOffset: endLine.offset + endLine.raw.length
|
|
919
|
+
};
|
|
920
|
+
}
|
|
921
|
+
function isThematicBreak(line) {
|
|
922
|
+
return THEMATIC_BREAK_REGEX.test(line.trim());
|
|
923
|
+
}
|
|
924
|
+
function isAtxHeading(line) {
|
|
925
|
+
return ATX_HEADING_REGEX.exec(line.trim());
|
|
926
|
+
}
|
|
927
|
+
function isSetextUnderline(line) {
|
|
928
|
+
return SETEXT_HEADING_REGEX.exec(line.trim());
|
|
929
|
+
}
|
|
930
|
+
function isFencedCodeStart(line) {
|
|
931
|
+
const trimmed = line.trimStart();
|
|
932
|
+
const indent = countIndent(line);
|
|
933
|
+
if (indent >= 4)
|
|
934
|
+
return null;
|
|
935
|
+
return FENCED_CODE_OPEN_REGEX.exec(trimmed);
|
|
936
|
+
}
|
|
937
|
+
function isBlockquoteStart(line) {
|
|
938
|
+
return BLOCKQUOTE_REGEX.test(line.trimStart());
|
|
939
|
+
}
|
|
940
|
+
function isListItemStart(line) {
|
|
941
|
+
const trimmed = line.trimStart();
|
|
942
|
+
const leadingIndent = countIndent(line);
|
|
943
|
+
const ordered = ORDERED_LIST_REGEX.exec(trimmed);
|
|
944
|
+
if (ordered) {
|
|
945
|
+
return {
|
|
946
|
+
ordered: true,
|
|
947
|
+
marker: ordered[2],
|
|
948
|
+
start: Number.parseInt(ordered[1] ?? "1", 10),
|
|
949
|
+
indent: leadingIndent + (ordered[0]?.length ?? 0)
|
|
950
|
+
};
|
|
951
|
+
}
|
|
952
|
+
const unordered = UNORDERED_LIST_REGEX.exec(trimmed);
|
|
953
|
+
if (unordered) {
|
|
954
|
+
return {
|
|
955
|
+
ordered: false,
|
|
956
|
+
marker: unordered[1],
|
|
957
|
+
indent: leadingIndent + (unordered[0]?.length ?? 0)
|
|
958
|
+
};
|
|
959
|
+
}
|
|
960
|
+
return null;
|
|
961
|
+
}
|
|
962
|
+
function isLinkReferenceDefinition(line) {
|
|
963
|
+
const match = LINK_REFERENCE_REGEX.exec(line.trim());
|
|
964
|
+
if (!match)
|
|
965
|
+
return null;
|
|
966
|
+
return {
|
|
967
|
+
label: match[1] ?? "",
|
|
968
|
+
url: match[2] ?? "",
|
|
969
|
+
title: match[3] ?? match[4] ?? match[5]
|
|
970
|
+
};
|
|
971
|
+
}
|
|
972
|
+
var TABLE_DELIMITER_REGEX = /^\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*)*\|?\s*$/;
|
|
973
|
+
function isTableRow(line) {
|
|
974
|
+
const trimmed = line.trim();
|
|
975
|
+
return trimmed.includes("|");
|
|
976
|
+
}
|
|
977
|
+
function isTableDelimiter(line) {
|
|
978
|
+
return TABLE_DELIMITER_REGEX.test(line.trim());
|
|
979
|
+
}
|
|
980
|
+
function parseAlignment(cell) {
|
|
981
|
+
const trimmed = cell.trim();
|
|
982
|
+
const hasLeft = trimmed.startsWith(":");
|
|
983
|
+
const hasRight = trimmed.endsWith(":");
|
|
984
|
+
if (hasLeft && hasRight)
|
|
985
|
+
return "center";
|
|
986
|
+
if (hasRight)
|
|
987
|
+
return "right";
|
|
988
|
+
if (hasLeft)
|
|
989
|
+
return "left";
|
|
990
|
+
return null;
|
|
991
|
+
}
|
|
992
|
+
function splitTableRow(line) {
|
|
993
|
+
let content = line.trim();
|
|
994
|
+
if (content.startsWith("|"))
|
|
995
|
+
content = content.slice(1);
|
|
996
|
+
if (content.endsWith("|"))
|
|
997
|
+
content = content.slice(0, -1);
|
|
998
|
+
const cells = [];
|
|
999
|
+
let current = "";
|
|
1000
|
+
let escaped = false;
|
|
1001
|
+
for (const char of content) {
|
|
1002
|
+
if (escaped) {
|
|
1003
|
+
current += char;
|
|
1004
|
+
escaped = false;
|
|
1005
|
+
} else if (char === "\\") {
|
|
1006
|
+
escaped = true;
|
|
1007
|
+
current += char;
|
|
1008
|
+
} else if (char === "|") {
|
|
1009
|
+
cells.push(current.trim());
|
|
1010
|
+
current = "";
|
|
1011
|
+
} else {
|
|
1012
|
+
current += char;
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
cells.push(current.trim());
|
|
1016
|
+
return cells;
|
|
1017
|
+
}
|
|
1018
|
+
var MAX_NESTING_DEPTH = 100;
|
|
1019
|
+
function collectReferences(lines, context) {
|
|
1020
|
+
for (const line of lines) {
|
|
1021
|
+
if (isBlankLine(line.raw))
|
|
1022
|
+
continue;
|
|
1023
|
+
const refMatch = isLinkReferenceDefinition(line.raw);
|
|
1024
|
+
if (refMatch) {
|
|
1025
|
+
const normalizedLabel = refMatch.label.toLowerCase().replace(/\s+/g, " ");
|
|
1026
|
+
context.references.set(normalizedLabel, {
|
|
1027
|
+
url: refMatch.url,
|
|
1028
|
+
title: refMatch.title
|
|
1029
|
+
});
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
function parseBlocks(content, context, includePositions = true, depth = 0) {
|
|
1034
|
+
if (depth > MAX_NESTING_DEPTH) {
|
|
1035
|
+
return { blocks: [], references: context?.references ?? new Map };
|
|
1036
|
+
}
|
|
1037
|
+
const ctx = context ?? createBlockContext();
|
|
1038
|
+
const lines = splitLines(content);
|
|
1039
|
+
collectReferences(lines, ctx);
|
|
1040
|
+
const blocks = [];
|
|
1041
|
+
let i = 0;
|
|
1042
|
+
while (i < lines.length) {
|
|
1043
|
+
const result = parseBlock(lines, i, ctx, includePositions, depth);
|
|
1044
|
+
if (result.block) {
|
|
1045
|
+
blocks.push(result.block);
|
|
1046
|
+
}
|
|
1047
|
+
i += result.consumed;
|
|
1048
|
+
}
|
|
1049
|
+
return { blocks, references: ctx.references };
|
|
1050
|
+
}
|
|
1051
|
+
function parseBlock(lines, startIndex, context, includePositions, depth) {
|
|
1052
|
+
const line = lines[startIndex];
|
|
1053
|
+
if (!line) {
|
|
1054
|
+
return { block: null, consumed: 1 };
|
|
1055
|
+
}
|
|
1056
|
+
if (line.isBlank) {
|
|
1057
|
+
return { block: null, consumed: 1 };
|
|
1058
|
+
}
|
|
1059
|
+
const content = line.content;
|
|
1060
|
+
if (isThematicBreak(content)) {
|
|
1061
|
+
return parseThematicBreak(line, includePositions);
|
|
1062
|
+
}
|
|
1063
|
+
const atxMatch = isAtxHeading(content);
|
|
1064
|
+
if (atxMatch) {
|
|
1065
|
+
return parseAtxHeading(line, atxMatch, context, includePositions);
|
|
1066
|
+
}
|
|
1067
|
+
const fenceMatch = isFencedCodeStart(line.raw);
|
|
1068
|
+
if (fenceMatch) {
|
|
1069
|
+
return parseFencedCodeBlock(lines, startIndex, fenceMatch, includePositions);
|
|
1070
|
+
}
|
|
1071
|
+
const htmlType = getHtmlBlockType(content);
|
|
1072
|
+
if (htmlType > 0 && line.indent < 4) {
|
|
1073
|
+
return parseHtmlBlock(lines, startIndex, htmlType, includePositions);
|
|
1074
|
+
}
|
|
1075
|
+
if (isBlockquoteStart(line.raw)) {
|
|
1076
|
+
return parseBlockquote(lines, startIndex, context, includePositions, depth);
|
|
1077
|
+
}
|
|
1078
|
+
const listMatch = isListItemStart(line.raw);
|
|
1079
|
+
if (listMatch) {
|
|
1080
|
+
return parseList(lines, startIndex, listMatch, context, includePositions, depth);
|
|
1081
|
+
}
|
|
1082
|
+
if (line.indent >= 4 && !context.inBlockquote) {
|
|
1083
|
+
return parseIndentedCodeBlock(lines, startIndex, includePositions);
|
|
1084
|
+
}
|
|
1085
|
+
if (isTableRow(line.content)) {
|
|
1086
|
+
const tableResult = parseTable(lines, startIndex, context, includePositions);
|
|
1087
|
+
if (tableResult.block) {
|
|
1088
|
+
return tableResult;
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
const refMatch = isLinkReferenceDefinition(line.raw);
|
|
1092
|
+
if (refMatch) {
|
|
1093
|
+
const normalizedLabel = refMatch.label.toLowerCase().replace(/\s+/g, " ");
|
|
1094
|
+
context.references.set(normalizedLabel, {
|
|
1095
|
+
url: refMatch.url,
|
|
1096
|
+
title: refMatch.title
|
|
1097
|
+
});
|
|
1098
|
+
return { block: null, consumed: 1 };
|
|
1099
|
+
}
|
|
1100
|
+
return parseParagraph(lines, startIndex, context, includePositions);
|
|
1101
|
+
}
|
|
1102
|
+
function parseThematicBreak(line, includePositions) {
|
|
1103
|
+
const content = line.content.trim();
|
|
1104
|
+
let marker = "-";
|
|
1105
|
+
if (content.includes("*"))
|
|
1106
|
+
marker = "*";
|
|
1107
|
+
else if (content.includes("_"))
|
|
1108
|
+
marker = "_";
|
|
1109
|
+
const node = {
|
|
1110
|
+
type: "thematicBreak",
|
|
1111
|
+
marker
|
|
1112
|
+
};
|
|
1113
|
+
if (includePositions) {
|
|
1114
|
+
node.position = createPosition(line, line);
|
|
1115
|
+
}
|
|
1116
|
+
return { block: node, consumed: 1 };
|
|
1117
|
+
}
|
|
1118
|
+
function parseAtxHeading(line, match, context, includePositions) {
|
|
1119
|
+
const level = match[1]?.length ?? 1;
|
|
1120
|
+
let text = match[2] ?? "";
|
|
1121
|
+
text = text.replace(/[ \t]+#+[ \t]*$/, "").replace(/#+[ \t]*$/, "").trim();
|
|
1122
|
+
const node = {
|
|
1123
|
+
type: "heading",
|
|
1124
|
+
level,
|
|
1125
|
+
children: parseInline(text, context.references),
|
|
1126
|
+
style: "atx"
|
|
1127
|
+
};
|
|
1128
|
+
if (includePositions) {
|
|
1129
|
+
node.position = createPosition(line, line);
|
|
1130
|
+
}
|
|
1131
|
+
return { block: node, consumed: 1 };
|
|
1132
|
+
}
|
|
1133
|
+
function parseFencedCodeBlock(lines, startIndex, match, includePositions) {
|
|
1134
|
+
const startLine = lines[startIndex];
|
|
1135
|
+
if (!startLine) {
|
|
1136
|
+
return {
|
|
1137
|
+
block: {
|
|
1138
|
+
type: "codeBlock",
|
|
1139
|
+
value: "",
|
|
1140
|
+
style: "fenced",
|
|
1141
|
+
fence: "`",
|
|
1142
|
+
fenceLength: 3
|
|
1143
|
+
},
|
|
1144
|
+
consumed: 1
|
|
1145
|
+
};
|
|
1146
|
+
}
|
|
1147
|
+
const fence = match[1]?.[0];
|
|
1148
|
+
const fenceLength = match[1]?.length ?? 3;
|
|
1149
|
+
const infoString = (match[2] ?? "").trim();
|
|
1150
|
+
const spaceIndex = infoString.indexOf(" ");
|
|
1151
|
+
let lang;
|
|
1152
|
+
let meta;
|
|
1153
|
+
if (infoString) {
|
|
1154
|
+
if (spaceIndex > 0) {
|
|
1155
|
+
lang = infoString.slice(0, spaceIndex);
|
|
1156
|
+
meta = infoString.slice(spaceIndex + 1).trim();
|
|
1157
|
+
} else {
|
|
1158
|
+
lang = infoString;
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
const indent = countIndent(startLine.raw);
|
|
1162
|
+
const contentLines = [];
|
|
1163
|
+
let i = startIndex + 1;
|
|
1164
|
+
const closingFenceRegex = new RegExp(`^${fence}{${fenceLength},}[ \\t]*$`);
|
|
1165
|
+
while (i < lines.length) {
|
|
1166
|
+
const line = lines[i];
|
|
1167
|
+
if (!line)
|
|
1168
|
+
break;
|
|
1169
|
+
const lineIndent = countIndent(line.raw);
|
|
1170
|
+
const trimmed = line.raw.trimStart();
|
|
1171
|
+
if (closingFenceRegex.test(trimmed) && lineIndent < 4) {
|
|
1172
|
+
i++;
|
|
1173
|
+
break;
|
|
1174
|
+
}
|
|
1175
|
+
const contentLine = removeIndent(line.raw, Math.min(indent, line.indent));
|
|
1176
|
+
contentLines.push(contentLine);
|
|
1177
|
+
i++;
|
|
1178
|
+
}
|
|
1179
|
+
const node = {
|
|
1180
|
+
type: "codeBlock",
|
|
1181
|
+
value: contentLines.join(`
|
|
1182
|
+
`),
|
|
1183
|
+
style: "fenced",
|
|
1184
|
+
fence,
|
|
1185
|
+
fenceLength
|
|
1186
|
+
};
|
|
1187
|
+
if (lang)
|
|
1188
|
+
node.lang = lang;
|
|
1189
|
+
if (meta)
|
|
1190
|
+
node.meta = meta;
|
|
1191
|
+
if (includePositions) {
|
|
1192
|
+
const endLine = lines[i - 1] ?? startLine;
|
|
1193
|
+
node.position = createPosition(startLine, endLine);
|
|
1194
|
+
}
|
|
1195
|
+
return { block: node, consumed: i - startIndex };
|
|
1196
|
+
}
|
|
1197
|
+
function parseIndentedCodeBlock(lines, startIndex, includePositions) {
|
|
1198
|
+
const contentLines = [];
|
|
1199
|
+
let i = startIndex;
|
|
1200
|
+
let lastNonBlankIndex = startIndex;
|
|
1201
|
+
while (i < lines.length) {
|
|
1202
|
+
const line = lines[i];
|
|
1203
|
+
if (!line)
|
|
1204
|
+
break;
|
|
1205
|
+
if (line.isBlank) {
|
|
1206
|
+
contentLines.push("");
|
|
1207
|
+
i++;
|
|
1208
|
+
continue;
|
|
1209
|
+
}
|
|
1210
|
+
if (line.indent < 4) {
|
|
1211
|
+
break;
|
|
1212
|
+
}
|
|
1213
|
+
contentLines.push(removeIndent(line.raw, 4));
|
|
1214
|
+
lastNonBlankIndex = i;
|
|
1215
|
+
i++;
|
|
1216
|
+
}
|
|
1217
|
+
const trimmedContent = contentLines.slice(0, lastNonBlankIndex - startIndex + 1);
|
|
1218
|
+
const node = {
|
|
1219
|
+
type: "codeBlock",
|
|
1220
|
+
value: trimmedContent.join(`
|
|
1221
|
+
`),
|
|
1222
|
+
style: "indented"
|
|
1223
|
+
};
|
|
1224
|
+
if (includePositions) {
|
|
1225
|
+
const startLine = lines[startIndex];
|
|
1226
|
+
const endLine = lines[lastNonBlankIndex] ?? startLine;
|
|
1227
|
+
if (startLine && endLine) {
|
|
1228
|
+
node.position = createPosition(startLine, endLine);
|
|
1229
|
+
}
|
|
1230
|
+
}
|
|
1231
|
+
return { block: node, consumed: i - startIndex };
|
|
1232
|
+
}
|
|
1233
|
+
function parseTable(lines, startIndex, context, includePositions) {
|
|
1234
|
+
if (startIndex + 1 >= lines.length) {
|
|
1235
|
+
return { block: null, consumed: 0 };
|
|
1236
|
+
}
|
|
1237
|
+
const headerLine = lines[startIndex];
|
|
1238
|
+
const delimiterLine = lines[startIndex + 1];
|
|
1239
|
+
if (!headerLine || !delimiterLine) {
|
|
1240
|
+
return { block: null, consumed: 0 };
|
|
1241
|
+
}
|
|
1242
|
+
if (!isTableRow(headerLine.content) || !isTableDelimiter(delimiterLine.content)) {
|
|
1243
|
+
return { block: null, consumed: 0 };
|
|
1244
|
+
}
|
|
1245
|
+
const headerCells = splitTableRow(headerLine.content);
|
|
1246
|
+
const delimiterCells = splitTableRow(delimiterLine.content);
|
|
1247
|
+
if (headerCells.length !== delimiterCells.length || headerCells.length === 0) {
|
|
1248
|
+
return { block: null, consumed: 0 };
|
|
1249
|
+
}
|
|
1250
|
+
for (const cell of delimiterCells) {
|
|
1251
|
+
if (!/^:?-+:?$/.test(cell.trim())) {
|
|
1252
|
+
return { block: null, consumed: 0 };
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1255
|
+
const alignments = delimiterCells.map(parseAlignment);
|
|
1256
|
+
const headerRowCells = headerCells.map((cell, idx) => ({
|
|
1257
|
+
type: "tableCell",
|
|
1258
|
+
children: parseInline(cell, context.references),
|
|
1259
|
+
align: alignments[idx] ?? undefined,
|
|
1260
|
+
isHeader: true
|
|
1261
|
+
}));
|
|
1262
|
+
const headerRow = {
|
|
1263
|
+
type: "tableRow",
|
|
1264
|
+
children: headerRowCells,
|
|
1265
|
+
isHeader: true
|
|
1266
|
+
};
|
|
1267
|
+
if (includePositions && headerLine) {
|
|
1268
|
+
headerRow.position = createPosition(headerLine, headerLine);
|
|
1269
|
+
}
|
|
1270
|
+
const rows = [headerRow];
|
|
1271
|
+
let i = startIndex + 2;
|
|
1272
|
+
while (i < lines.length) {
|
|
1273
|
+
const line = lines[i];
|
|
1274
|
+
if (!line)
|
|
1275
|
+
break;
|
|
1276
|
+
if (line.isBlank || !isTableRow(line.content)) {
|
|
1277
|
+
break;
|
|
1278
|
+
}
|
|
1279
|
+
if (isThematicBreak(line.content) || isAtxHeading(line.content)) {
|
|
1280
|
+
break;
|
|
1281
|
+
}
|
|
1282
|
+
const cells = splitTableRow(line.content);
|
|
1283
|
+
const rowCells = [];
|
|
1284
|
+
for (let j = 0;j < headerCells.length; j++) {
|
|
1285
|
+
const cellContent = cells[j] ?? "";
|
|
1286
|
+
rowCells.push({
|
|
1287
|
+
type: "tableCell",
|
|
1288
|
+
children: parseInline(cellContent, context.references),
|
|
1289
|
+
align: alignments[j] ?? undefined,
|
|
1290
|
+
isHeader: false
|
|
1291
|
+
});
|
|
1292
|
+
}
|
|
1293
|
+
const bodyRow = {
|
|
1294
|
+
type: "tableRow",
|
|
1295
|
+
children: rowCells,
|
|
1296
|
+
isHeader: false
|
|
1297
|
+
};
|
|
1298
|
+
if (includePositions) {
|
|
1299
|
+
bodyRow.position = createPosition(line, line);
|
|
1300
|
+
}
|
|
1301
|
+
rows.push(bodyRow);
|
|
1302
|
+
i++;
|
|
1303
|
+
}
|
|
1304
|
+
const node = {
|
|
1305
|
+
type: "table",
|
|
1306
|
+
children: rows,
|
|
1307
|
+
align: alignments
|
|
1308
|
+
};
|
|
1309
|
+
if (includePositions) {
|
|
1310
|
+
const endLine = lines[i - 1] ?? headerLine;
|
|
1311
|
+
node.position = createPosition(headerLine, endLine);
|
|
1312
|
+
}
|
|
1313
|
+
return { block: node, consumed: i - startIndex };
|
|
1314
|
+
}
|
|
1315
|
+
function parseHtmlBlock(lines, startIndex, htmlType, includePositions) {
|
|
1316
|
+
const contentLines = [];
|
|
1317
|
+
let i = startIndex;
|
|
1318
|
+
while (i < lines.length) {
|
|
1319
|
+
const line = lines[i];
|
|
1320
|
+
if (!line)
|
|
1321
|
+
break;
|
|
1322
|
+
contentLines.push(line.raw);
|
|
1323
|
+
if (closesHtmlBlock(line.raw, htmlType)) {
|
|
1324
|
+
if (htmlType === 6 || htmlType === 7) {
|
|
1325
|
+
contentLines.pop();
|
|
1326
|
+
}
|
|
1327
|
+
i++;
|
|
1328
|
+
break;
|
|
1329
|
+
}
|
|
1330
|
+
i++;
|
|
1331
|
+
}
|
|
1332
|
+
const node = {
|
|
1333
|
+
type: "htmlBlock",
|
|
1334
|
+
value: contentLines.join(`
|
|
1335
|
+
`),
|
|
1336
|
+
htmlType
|
|
1337
|
+
};
|
|
1338
|
+
if (includePositions) {
|
|
1339
|
+
const startLine = lines[startIndex];
|
|
1340
|
+
const endLine = lines[i - 1] ?? startLine;
|
|
1341
|
+
if (startLine && endLine) {
|
|
1342
|
+
node.position = createPosition(startLine, endLine);
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
return { block: node, consumed: i - startIndex };
|
|
1346
|
+
}
|
|
1347
|
+
function parseBlockquote(lines, startIndex, context, includePositions, depth) {
|
|
1348
|
+
const quoteLines = [];
|
|
1349
|
+
let i = startIndex;
|
|
1350
|
+
let hadBlankLine = false;
|
|
1351
|
+
while (i < lines.length) {
|
|
1352
|
+
const line = lines[i];
|
|
1353
|
+
if (!line)
|
|
1354
|
+
break;
|
|
1355
|
+
const match = BLOCKQUOTE_REGEX.exec(line.raw.trimStart());
|
|
1356
|
+
if (match) {
|
|
1357
|
+
const content = line.raw.trimStart().slice(match[0].length);
|
|
1358
|
+
quoteLines.push(content);
|
|
1359
|
+
i++;
|
|
1360
|
+
continue;
|
|
1361
|
+
}
|
|
1362
|
+
if (!line.isBlank && !hadBlankLine) {
|
|
1363
|
+
quoteLines.push(line.raw);
|
|
1364
|
+
i++;
|
|
1365
|
+
continue;
|
|
1366
|
+
}
|
|
1367
|
+
if (line.isBlank) {
|
|
1368
|
+
hadBlankLine = true;
|
|
1369
|
+
const nextLine = lines[i + 1];
|
|
1370
|
+
if (nextLine && BLOCKQUOTE_REGEX.test(nextLine.raw.trimStart())) {
|
|
1371
|
+
quoteLines.push("");
|
|
1372
|
+
i++;
|
|
1373
|
+
continue;
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1376
|
+
break;
|
|
1377
|
+
}
|
|
1378
|
+
const innerContent = quoteLines.join(`
|
|
1379
|
+
`);
|
|
1380
|
+
const innerContext = {
|
|
1381
|
+
...context,
|
|
1382
|
+
inBlockquote: true,
|
|
1383
|
+
blockquoteDepth: context.blockquoteDepth + 1
|
|
1384
|
+
};
|
|
1385
|
+
const { blocks } = parseBlocks(innerContent, innerContext, includePositions, depth + 1);
|
|
1386
|
+
const node = {
|
|
1387
|
+
type: "blockquote",
|
|
1388
|
+
children: blocks
|
|
1389
|
+
};
|
|
1390
|
+
if (includePositions) {
|
|
1391
|
+
const startLine = lines[startIndex];
|
|
1392
|
+
const endLine = lines[i - 1];
|
|
1393
|
+
if (startLine && endLine) {
|
|
1394
|
+
node.position = createPosition(startLine, endLine);
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
return { block: node, consumed: i - startIndex };
|
|
1398
|
+
}
|
|
1399
|
+
function parseList(lines, startIndex, firstItem, context, includePositions, depth) {
|
|
1400
|
+
const items = [];
|
|
1401
|
+
let i = startIndex;
|
|
1402
|
+
let spread = false;
|
|
1403
|
+
let hadBlankBetweenItems = false;
|
|
1404
|
+
while (i < lines.length) {
|
|
1405
|
+
const line = lines[i];
|
|
1406
|
+
if (!line)
|
|
1407
|
+
break;
|
|
1408
|
+
if (line.isBlank) {
|
|
1409
|
+
hadBlankBetweenItems = true;
|
|
1410
|
+
i++;
|
|
1411
|
+
continue;
|
|
1412
|
+
}
|
|
1413
|
+
const itemMatch = isListItemStart(line.raw);
|
|
1414
|
+
if (itemMatch) {
|
|
1415
|
+
const sameType = itemMatch.ordered === firstItem.ordered;
|
|
1416
|
+
const sameMarkerFamily = firstItem.ordered || itemMatch.marker === firstItem.marker || ["-", "*", "+"].includes(itemMatch.marker);
|
|
1417
|
+
if (!sameType || !sameMarkerFamily) {
|
|
1418
|
+
break;
|
|
1419
|
+
}
|
|
1420
|
+
const { item, consumed } = parseListItem(lines, i, itemMatch.indent, itemMatch.marker, context, includePositions, depth);
|
|
1421
|
+
if (hadBlankBetweenItems && items.length > 0) {
|
|
1422
|
+
spread = true;
|
|
1423
|
+
}
|
|
1424
|
+
items.push(item);
|
|
1425
|
+
i += consumed;
|
|
1426
|
+
hadBlankBetweenItems = false;
|
|
1427
|
+
continue;
|
|
1428
|
+
}
|
|
1429
|
+
if (line.indent >= firstItem.indent) {
|
|
1430
|
+
break;
|
|
1431
|
+
}
|
|
1432
|
+
break;
|
|
1433
|
+
}
|
|
1434
|
+
const node = {
|
|
1435
|
+
type: "list",
|
|
1436
|
+
ordered: firstItem.ordered,
|
|
1437
|
+
children: items,
|
|
1438
|
+
marker: firstItem.marker,
|
|
1439
|
+
spread
|
|
1440
|
+
};
|
|
1441
|
+
if (firstItem.ordered && firstItem.start !== undefined) {
|
|
1442
|
+
node.start = firstItem.start;
|
|
1443
|
+
}
|
|
1444
|
+
if (includePositions) {
|
|
1445
|
+
const startLine = lines[startIndex];
|
|
1446
|
+
const endLine = lines[i - 1];
|
|
1447
|
+
if (startLine && endLine) {
|
|
1448
|
+
node.position = createPosition(startLine, endLine);
|
|
1449
|
+
}
|
|
1450
|
+
}
|
|
1451
|
+
return { block: node, consumed: i - startIndex };
|
|
1452
|
+
}
|
|
1453
|
+
function parseListItem(lines, startIndex, itemIndent, marker, context, includePositions, depth) {
|
|
1454
|
+
const itemLines = [];
|
|
1455
|
+
let i = startIndex;
|
|
1456
|
+
let hasBlank = false;
|
|
1457
|
+
let spread = false;
|
|
1458
|
+
const firstLine = lines[startIndex];
|
|
1459
|
+
if (firstLine) {
|
|
1460
|
+
const match = ORDERED_LIST_REGEX.exec(firstLine.raw.trimStart()) ?? UNORDERED_LIST_REGEX.exec(firstLine.raw.trimStart());
|
|
1461
|
+
if (match) {
|
|
1462
|
+
const content = firstLine.raw.trimStart().slice(match[0].length);
|
|
1463
|
+
itemLines.push(content);
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
i++;
|
|
1467
|
+
while (i < lines.length) {
|
|
1468
|
+
const line = lines[i];
|
|
1469
|
+
if (!line)
|
|
1470
|
+
break;
|
|
1471
|
+
if (line.isBlank) {
|
|
1472
|
+
hasBlank = true;
|
|
1473
|
+
itemLines.push("");
|
|
1474
|
+
i++;
|
|
1475
|
+
continue;
|
|
1476
|
+
}
|
|
1477
|
+
const itemMatch = isListItemStart(line.raw);
|
|
1478
|
+
if (itemMatch && line.indent < itemIndent) {
|
|
1479
|
+
break;
|
|
1480
|
+
}
|
|
1481
|
+
if (line.indent >= itemIndent) {
|
|
1482
|
+
if (hasBlank) {
|
|
1483
|
+
spread = true;
|
|
1484
|
+
}
|
|
1485
|
+
itemLines.push(removeIndent(line.raw, itemIndent));
|
|
1486
|
+
i++;
|
|
1487
|
+
continue;
|
|
1488
|
+
}
|
|
1489
|
+
if (itemMatch) {
|
|
1490
|
+
break;
|
|
1491
|
+
}
|
|
1492
|
+
if (!hasBlank) {
|
|
1493
|
+
itemLines.push(line.raw);
|
|
1494
|
+
i++;
|
|
1495
|
+
continue;
|
|
1496
|
+
}
|
|
1497
|
+
break;
|
|
1498
|
+
}
|
|
1499
|
+
while (itemLines.length > 0 && itemLines[itemLines.length - 1] === "") {
|
|
1500
|
+
itemLines.pop();
|
|
1501
|
+
}
|
|
1502
|
+
const innerContent = itemLines.join(`
|
|
1503
|
+
`);
|
|
1504
|
+
const innerContext = {
|
|
1505
|
+
...context,
|
|
1506
|
+
listDepth: context.listDepth + 1
|
|
1507
|
+
};
|
|
1508
|
+
const { blocks } = parseBlocks(innerContent, innerContext, includePositions, depth + 1);
|
|
1509
|
+
const node = {
|
|
1510
|
+
type: "listItem",
|
|
1511
|
+
children: blocks,
|
|
1512
|
+
marker,
|
|
1513
|
+
spread
|
|
1514
|
+
};
|
|
1515
|
+
if (includePositions) {
|
|
1516
|
+
const startLine = lines[startIndex];
|
|
1517
|
+
const endLine = lines[i - 1];
|
|
1518
|
+
if (startLine && endLine) {
|
|
1519
|
+
node.position = createPosition(startLine, endLine);
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
return { item: node, consumed: i - startIndex };
|
|
1523
|
+
}
|
|
1524
|
+
function parseParagraph(lines, startIndex, context, includePositions) {
|
|
1525
|
+
const paragraphLines = [];
|
|
1526
|
+
let i = startIndex;
|
|
1527
|
+
while (i < lines.length) {
|
|
1528
|
+
const line = lines[i];
|
|
1529
|
+
if (!line)
|
|
1530
|
+
break;
|
|
1531
|
+
if (line.isBlank) {
|
|
1532
|
+
break;
|
|
1533
|
+
}
|
|
1534
|
+
if (paragraphLines.length > 0) {
|
|
1535
|
+
const setextMatch = isSetextUnderline(line.content);
|
|
1536
|
+
if (setextMatch && line.indent < 4) {
|
|
1537
|
+
const level = setextMatch[1]?.[0] === "=" ? 1 : 2;
|
|
1538
|
+
const text2 = paragraphLines.join(`
|
|
1539
|
+
`).trim();
|
|
1540
|
+
const node2 = {
|
|
1541
|
+
type: "heading",
|
|
1542
|
+
level,
|
|
1543
|
+
children: parseInline(text2, context.references),
|
|
1544
|
+
style: "setext"
|
|
1545
|
+
};
|
|
1546
|
+
if (includePositions) {
|
|
1547
|
+
const startLine = lines[startIndex];
|
|
1548
|
+
if (startLine) {
|
|
1549
|
+
node2.position = createPosition(startLine, line);
|
|
1550
|
+
}
|
|
1551
|
+
}
|
|
1552
|
+
return { block: node2, consumed: i - startIndex + 1 };
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
if (isThematicBreak(line.content) || isAtxHeading(line.content) || isFencedCodeStart(line.raw) || getHtmlBlockType(line.content) > 0 && line.indent < 4 || isBlockquoteStart(line.raw)) {
|
|
1556
|
+
break;
|
|
1557
|
+
}
|
|
1558
|
+
const listMatch = isListItemStart(line.raw);
|
|
1559
|
+
if (listMatch) {
|
|
1560
|
+
if (!listMatch.ordered || listMatch.start === 1) {
|
|
1561
|
+
break;
|
|
1562
|
+
}
|
|
1563
|
+
}
|
|
1564
|
+
paragraphLines.push(line.content);
|
|
1565
|
+
i++;
|
|
1566
|
+
}
|
|
1567
|
+
if (paragraphLines.length === 0) {
|
|
1568
|
+
return { block: null, consumed: 1 };
|
|
1569
|
+
}
|
|
1570
|
+
const text = paragraphLines.join(`
|
|
1571
|
+
`).trim();
|
|
1572
|
+
const node = {
|
|
1573
|
+
type: "paragraph",
|
|
1574
|
+
children: parseInline(text, context.references)
|
|
1575
|
+
};
|
|
1576
|
+
if (includePositions) {
|
|
1577
|
+
const startLine = lines[startIndex];
|
|
1578
|
+
const endLine = lines[i - 1] ?? startLine;
|
|
1579
|
+
if (startLine && endLine) {
|
|
1580
|
+
node.position = createPosition(startLine, endLine);
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
return { block: node, consumed: i - startIndex };
|
|
1584
|
+
}
|
|
1585
|
+
|
|
1586
|
+
// src/schemas.ts
|
|
1587
|
+
import { z } from "zod";
|
|
1588
|
+
var DEFAULT_MAX_BUFFER_SIZE = 10 * 1024 * 1024;
|
|
1589
|
+
var positionSchema = z.object({
|
|
1590
|
+
startLine: z.number().int().min(1),
|
|
1591
|
+
startColumn: z.number().int().min(1),
|
|
1592
|
+
endLine: z.number().int().min(1),
|
|
1593
|
+
endColumn: z.number().int().min(1),
|
|
1594
|
+
startOffset: z.number().int().min(0),
|
|
1595
|
+
endOffset: z.number().int().min(0)
|
|
1596
|
+
});
|
|
1597
|
+
var inlineNodeBase = z.object({
|
|
1598
|
+
position: positionSchema.optional()
|
|
1599
|
+
});
|
|
1600
|
+
var textNodeSchema = inlineNodeBase.extend({
|
|
1601
|
+
type: z.literal("text"),
|
|
1602
|
+
value: z.string()
|
|
1603
|
+
});
|
|
1604
|
+
var codeSpanNodeSchema = inlineNodeBase.extend({
|
|
1605
|
+
type: z.literal("codeSpan"),
|
|
1606
|
+
value: z.string()
|
|
1607
|
+
});
|
|
1608
|
+
var hardBreakNodeSchema = inlineNodeBase.extend({
|
|
1609
|
+
type: z.literal("hardBreak")
|
|
1610
|
+
});
|
|
1611
|
+
var softBreakNodeSchema = inlineNodeBase.extend({
|
|
1612
|
+
type: z.literal("softBreak")
|
|
1613
|
+
});
|
|
1614
|
+
var htmlInlineNodeSchema = inlineNodeBase.extend({
|
|
1615
|
+
type: z.literal("htmlInline"),
|
|
1616
|
+
value: z.string()
|
|
1617
|
+
});
|
|
1618
|
+
var emphasisNodeSchema = inlineNodeBase.extend({
|
|
1619
|
+
type: z.literal("emphasis"),
|
|
1620
|
+
children: z.array(z.unknown()),
|
|
1621
|
+
marker: z.enum(["*", "_"])
|
|
1622
|
+
});
|
|
1623
|
+
var strongNodeSchema = inlineNodeBase.extend({
|
|
1624
|
+
type: z.literal("strong"),
|
|
1625
|
+
children: z.array(z.unknown()),
|
|
1626
|
+
marker: z.enum(["**", "__"])
|
|
1627
|
+
});
|
|
1628
|
+
var linkNodeSchema = inlineNodeBase.extend({
|
|
1629
|
+
type: z.literal("link"),
|
|
1630
|
+
url: z.string(),
|
|
1631
|
+
children: z.array(z.unknown()),
|
|
1632
|
+
title: z.string().optional(),
|
|
1633
|
+
reference: z.string().optional()
|
|
1634
|
+
});
|
|
1635
|
+
var imageNodeSchema = inlineNodeBase.extend({
|
|
1636
|
+
type: z.literal("image"),
|
|
1637
|
+
alt: z.string(),
|
|
1638
|
+
url: z.string(),
|
|
1639
|
+
title: z.string().optional(),
|
|
1640
|
+
reference: z.string().optional()
|
|
1641
|
+
});
|
|
1642
|
+
var inlineNodeSchema = z.union([
|
|
1643
|
+
textNodeSchema,
|
|
1644
|
+
codeSpanNodeSchema,
|
|
1645
|
+
hardBreakNodeSchema,
|
|
1646
|
+
softBreakNodeSchema,
|
|
1647
|
+
htmlInlineNodeSchema,
|
|
1648
|
+
emphasisNodeSchema,
|
|
1649
|
+
strongNodeSchema,
|
|
1650
|
+
linkNodeSchema,
|
|
1651
|
+
imageNodeSchema
|
|
1652
|
+
]);
|
|
1653
|
+
var blockNodeBase = z.object({
|
|
1654
|
+
position: positionSchema.optional()
|
|
1655
|
+
});
|
|
1656
|
+
var thematicBreakNodeSchema = blockNodeBase.extend({
|
|
1657
|
+
type: z.literal("thematicBreak"),
|
|
1658
|
+
marker: z.enum(["-", "*", "_"])
|
|
1659
|
+
});
|
|
1660
|
+
var headingNodeSchema = blockNodeBase.extend({
|
|
1661
|
+
type: z.literal("heading"),
|
|
1662
|
+
level: z.number().int().min(1).max(6),
|
|
1663
|
+
children: z.array(z.unknown()),
|
|
1664
|
+
style: z.enum(["atx", "setext"])
|
|
1665
|
+
});
|
|
1666
|
+
var codeBlockNodeSchema = blockNodeBase.extend({
|
|
1667
|
+
type: z.literal("codeBlock"),
|
|
1668
|
+
value: z.string(),
|
|
1669
|
+
style: z.enum(["fenced", "indented"]),
|
|
1670
|
+
lang: z.string().optional(),
|
|
1671
|
+
meta: z.string().optional(),
|
|
1672
|
+
fence: z.enum(["`", "~"]).optional(),
|
|
1673
|
+
fenceLength: z.number().int().min(3).optional()
|
|
1674
|
+
});
|
|
1675
|
+
var htmlBlockNodeSchema = blockNodeBase.extend({
|
|
1676
|
+
type: z.literal("htmlBlock"),
|
|
1677
|
+
value: z.string(),
|
|
1678
|
+
htmlType: z.number().int().min(1).max(7)
|
|
1679
|
+
});
|
|
1680
|
+
var paragraphNodeSchema = blockNodeBase.extend({
|
|
1681
|
+
type: z.literal("paragraph"),
|
|
1682
|
+
children: z.array(z.unknown())
|
|
1683
|
+
});
|
|
1684
|
+
var linkReferenceDefinitionSchema = blockNodeBase.extend({
|
|
1685
|
+
type: z.literal("linkReferenceDefinition"),
|
|
1686
|
+
label: z.string(),
|
|
1687
|
+
url: z.string(),
|
|
1688
|
+
title: z.string().optional()
|
|
1689
|
+
});
|
|
1690
|
+
var blockquoteNodeSchema = blockNodeBase.extend({
|
|
1691
|
+
type: z.literal("blockquote"),
|
|
1692
|
+
children: z.array(z.unknown())
|
|
1693
|
+
});
|
|
1694
|
+
var listItemNodeSchema = blockNodeBase.extend({
|
|
1695
|
+
type: z.literal("listItem"),
|
|
1696
|
+
children: z.array(z.unknown()),
|
|
1697
|
+
marker: z.enum(["-", "*", "+", ")", "."]),
|
|
1698
|
+
spread: z.boolean(),
|
|
1699
|
+
checked: z.boolean().optional()
|
|
1700
|
+
});
|
|
1701
|
+
var listNodeSchema = blockNodeBase.extend({
|
|
1702
|
+
type: z.literal("list"),
|
|
1703
|
+
ordered: z.boolean(),
|
|
1704
|
+
start: z.number().int().min(0).optional(),
|
|
1705
|
+
spread: z.boolean(),
|
|
1706
|
+
marker: z.enum(["-", "*", "+", ")", "."]),
|
|
1707
|
+
children: z.array(z.unknown())
|
|
1708
|
+
});
|
|
1709
|
+
var tableCellNodeSchema = blockNodeBase.extend({
|
|
1710
|
+
type: z.literal("tableCell"),
|
|
1711
|
+
children: z.array(z.unknown()),
|
|
1712
|
+
align: z.enum(["left", "center", "right"]).optional(),
|
|
1713
|
+
isHeader: z.boolean()
|
|
1714
|
+
});
|
|
1715
|
+
var tableRowNodeSchema = blockNodeBase.extend({
|
|
1716
|
+
type: z.literal("tableRow"),
|
|
1717
|
+
children: z.array(z.unknown()),
|
|
1718
|
+
isHeader: z.boolean()
|
|
1719
|
+
});
|
|
1720
|
+
var tableNodeSchema = blockNodeBase.extend({
|
|
1721
|
+
type: z.literal("table"),
|
|
1722
|
+
children: z.array(z.unknown()),
|
|
1723
|
+
align: z.array(z.enum(["left", "center", "right"]).nullable())
|
|
1724
|
+
});
|
|
1725
|
+
var blockNodeSchema = z.union([
|
|
1726
|
+
thematicBreakNodeSchema,
|
|
1727
|
+
headingNodeSchema,
|
|
1728
|
+
codeBlockNodeSchema,
|
|
1729
|
+
htmlBlockNodeSchema,
|
|
1730
|
+
paragraphNodeSchema,
|
|
1731
|
+
linkReferenceDefinitionSchema,
|
|
1732
|
+
blockquoteNodeSchema,
|
|
1733
|
+
listItemNodeSchema,
|
|
1734
|
+
listNodeSchema,
|
|
1735
|
+
tableNodeSchema
|
|
1736
|
+
]);
|
|
1737
|
+
var documentNodeSchema = z.object({
|
|
1738
|
+
type: z.literal("document"),
|
|
1739
|
+
children: z.array(z.unknown()),
|
|
1740
|
+
references: z.record(z.string(), z.object({
|
|
1741
|
+
type: z.literal("linkReferenceDefinition"),
|
|
1742
|
+
label: z.string(),
|
|
1743
|
+
url: z.string(),
|
|
1744
|
+
title: z.string().optional(),
|
|
1745
|
+
position: positionSchema.optional()
|
|
1746
|
+
})).optional(),
|
|
1747
|
+
position: positionSchema.optional()
|
|
1748
|
+
});
|
|
1749
|
+
var markdownDocumentSchema = z.object({
|
|
1750
|
+
root: documentNodeSchema,
|
|
1751
|
+
lineEnding: z.enum([`
|
|
1752
|
+
`, `\r
|
|
1753
|
+
`]),
|
|
1754
|
+
source: z.string().optional()
|
|
1755
|
+
});
|
|
1756
|
+
var parseOptionsSchema = z.object({
|
|
1757
|
+
positions: z.boolean().optional().default(true),
|
|
1758
|
+
preserveSource: z.boolean().optional().default(false)
|
|
1759
|
+
});
|
|
1760
|
+
var generateOptionsSchema = z.object({
|
|
1761
|
+
lineEnding: z.enum([`
|
|
1762
|
+
`, `\r
|
|
1763
|
+
`]).optional().default(`
|
|
1764
|
+
`),
|
|
1765
|
+
indent: z.number().int().min(1).max(8).optional().default(3),
|
|
1766
|
+
setext: z.boolean().optional().default(false),
|
|
1767
|
+
fence: z.enum(["`", "~"]).optional().default("`"),
|
|
1768
|
+
fenceLength: z.number().int().min(3).optional().default(3),
|
|
1769
|
+
emphasis: z.enum(["*", "_"]).optional().default("*"),
|
|
1770
|
+
strong: z.enum(["**", "__"]).optional().default("**"),
|
|
1771
|
+
bullet: z.enum(["-", "*", "+"]).optional().default("-"),
|
|
1772
|
+
orderedMarker: z.enum([")", "."]).optional().default("."),
|
|
1773
|
+
thematicBreak: z.enum(["-", "*", "_"]).optional().default("-"),
|
|
1774
|
+
thematicBreakLength: z.number().int().min(3).optional().default(3)
|
|
1775
|
+
});
|
|
1776
|
+
var streamOptionsSchema = parseOptionsSchema.extend({
|
|
1777
|
+
chunkSize: z.number().int().positive().optional().default(65536),
|
|
1778
|
+
maxBufferSize: z.number().int().positive().optional().default(DEFAULT_MAX_BUFFER_SIZE)
|
|
1779
|
+
});
|
|
1780
|
+
|
|
1781
|
+
// src/parser.ts
|
|
1782
|
+
function parse(content, options) {
|
|
1783
|
+
try {
|
|
1784
|
+
const data = parseOrThrow(content, options);
|
|
1785
|
+
return { success: true, data };
|
|
1786
|
+
} catch (error) {
|
|
1787
|
+
return {
|
|
1788
|
+
success: false,
|
|
1789
|
+
error: error instanceof Error ? error : new Error(String(error))
|
|
1790
|
+
};
|
|
1791
|
+
}
|
|
1792
|
+
}
|
|
1793
|
+
function parseOrThrow(content, options) {
|
|
1794
|
+
if (options !== undefined) {
|
|
1795
|
+
parseOptionsSchema.parse(options);
|
|
1796
|
+
}
|
|
1797
|
+
const includePositions = options?.positions ?? true;
|
|
1798
|
+
const preserveSource = options?.preserveSource ?? false;
|
|
1799
|
+
if (!content || content.trim() === "") {
|
|
1800
|
+
return {
|
|
1801
|
+
root: {
|
|
1802
|
+
type: "document",
|
|
1803
|
+
children: [],
|
|
1804
|
+
references: {}
|
|
1805
|
+
},
|
|
1806
|
+
lineEnding: `
|
|
1807
|
+
`
|
|
1808
|
+
};
|
|
1809
|
+
}
|
|
1810
|
+
const lineEnding = detectLineEnding(content);
|
|
1811
|
+
const normalized = normalizeLineEndings(content);
|
|
1812
|
+
const { blocks, references } = parseBlocks(normalized, undefined, includePositions);
|
|
1813
|
+
const referencesObj = {};
|
|
1814
|
+
for (const [label, ref] of references) {
|
|
1815
|
+
referencesObj[label] = {
|
|
1816
|
+
type: "linkReferenceDefinition",
|
|
1817
|
+
label,
|
|
1818
|
+
url: ref.url,
|
|
1819
|
+
title: ref.title
|
|
1820
|
+
};
|
|
1821
|
+
}
|
|
1822
|
+
const document = {
|
|
1823
|
+
root: {
|
|
1824
|
+
type: "document",
|
|
1825
|
+
children: blocks,
|
|
1826
|
+
references: Object.keys(referencesObj).length > 0 ? referencesObj : undefined
|
|
1827
|
+
},
|
|
1828
|
+
lineEnding
|
|
1829
|
+
};
|
|
1830
|
+
if (preserveSource) {
|
|
1831
|
+
document.source = content;
|
|
1832
|
+
}
|
|
1833
|
+
return document;
|
|
1834
|
+
}
|
|
1835
|
+
function parseBuffer(buffer, options) {
|
|
1836
|
+
try {
|
|
1837
|
+
const data = parseBufferOrThrow(buffer, options);
|
|
1838
|
+
return { success: true, data };
|
|
1839
|
+
} catch (error) {
|
|
1840
|
+
return {
|
|
1841
|
+
success: false,
|
|
1842
|
+
error: error instanceof Error ? error : new Error(String(error))
|
|
1843
|
+
};
|
|
1844
|
+
}
|
|
1845
|
+
}
|
|
1846
|
+
function parseBufferOrThrow(buffer, options) {
|
|
1847
|
+
const content = decodeBuffer(buffer);
|
|
1848
|
+
return parseOrThrow(content, options);
|
|
1849
|
+
}
|
|
1850
|
+
function parseToAst(content, options) {
|
|
1851
|
+
return parseOrThrow(content, options).root;
|
|
1852
|
+
}
|
|
1853
|
+
function isValidMarkdown(content) {
|
|
1854
|
+
const result = parse(content);
|
|
1855
|
+
return result.success;
|
|
1856
|
+
}
|
|
1857
|
+
// src/generator.ts
|
|
1858
|
+
var DEFAULT_OPTIONS = {
|
|
1859
|
+
lineEnding: `
|
|
1860
|
+
`,
|
|
1861
|
+
indent: 3,
|
|
1862
|
+
setext: false,
|
|
1863
|
+
fence: "`",
|
|
1864
|
+
fenceLength: 3,
|
|
1865
|
+
emphasis: "*",
|
|
1866
|
+
strong: "**",
|
|
1867
|
+
bullet: "-",
|
|
1868
|
+
orderedMarker: ".",
|
|
1869
|
+
thematicBreak: "-",
|
|
1870
|
+
thematicBreakLength: 3
|
|
1871
|
+
};
|
|
1872
|
+
function generate(document, options) {
|
|
1873
|
+
if (options !== undefined) {
|
|
1874
|
+
generateOptionsSchema.parse(options);
|
|
1875
|
+
}
|
|
1876
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
1877
|
+
const root = "root" in document ? document.root : document;
|
|
1878
|
+
return generateDocument(root, opts);
|
|
1879
|
+
}
|
|
1880
|
+
function generateNode(node, options) {
|
|
1881
|
+
if (options !== undefined) {
|
|
1882
|
+
generateOptionsSchema.parse(options);
|
|
1883
|
+
}
|
|
1884
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
1885
|
+
if (node.type === "document") {
|
|
1886
|
+
return generateDocument(node, opts);
|
|
1887
|
+
}
|
|
1888
|
+
if (isBlockNode(node)) {
|
|
1889
|
+
return generateBlock(node, opts, 0);
|
|
1890
|
+
}
|
|
1891
|
+
return generateInline(node, opts);
|
|
1892
|
+
}
|
|
1893
|
+
function createGenerator(options) {
|
|
1894
|
+
if (options !== undefined) {
|
|
1895
|
+
generateOptionsSchema.parse(options);
|
|
1896
|
+
}
|
|
1897
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
1898
|
+
const blocks = [];
|
|
1899
|
+
return {
|
|
1900
|
+
addNode(node) {
|
|
1901
|
+
if (node.type === "document") {
|
|
1902
|
+
for (const child of node.children) {
|
|
1903
|
+
blocks.push(generateBlock(child, opts, 0));
|
|
1904
|
+
}
|
|
1905
|
+
} else if (isBlockNode(node)) {
|
|
1906
|
+
blocks.push(generateBlock(node, opts, 0));
|
|
1907
|
+
} else {
|
|
1908
|
+
blocks.push(generateInline(node, opts));
|
|
1909
|
+
}
|
|
1910
|
+
},
|
|
1911
|
+
toString() {
|
|
1912
|
+
return blocks.join(opts.lineEnding + opts.lineEnding);
|
|
1913
|
+
},
|
|
1914
|
+
toStream() {
|
|
1915
|
+
const blocksSnapshot = [...blocks];
|
|
1916
|
+
const lineEnding = opts.lineEnding;
|
|
1917
|
+
let index = 0;
|
|
1918
|
+
return new ReadableStream({
|
|
1919
|
+
pull(controller) {
|
|
1920
|
+
if (index >= blocksSnapshot.length) {
|
|
1921
|
+
controller.close();
|
|
1922
|
+
return;
|
|
1923
|
+
}
|
|
1924
|
+
const block = blocksSnapshot[index];
|
|
1925
|
+
const isLast = index === blocksSnapshot.length - 1;
|
|
1926
|
+
controller.enqueue(isLast ? block : block + lineEnding + lineEnding);
|
|
1927
|
+
index++;
|
|
1928
|
+
}
|
|
1929
|
+
});
|
|
1930
|
+
}
|
|
1931
|
+
};
|
|
1932
|
+
}
|
|
1933
|
+
function isBlockNode(node) {
|
|
1934
|
+
return [
|
|
1935
|
+
"thematicBreak",
|
|
1936
|
+
"heading",
|
|
1937
|
+
"codeBlock",
|
|
1938
|
+
"htmlBlock",
|
|
1939
|
+
"paragraph",
|
|
1940
|
+
"linkReferenceDefinition",
|
|
1941
|
+
"blockquote",
|
|
1942
|
+
"list",
|
|
1943
|
+
"listItem",
|
|
1944
|
+
"table"
|
|
1945
|
+
].includes(node.type);
|
|
1946
|
+
}
|
|
1947
|
+
function generateDocument(node, opts) {
|
|
1948
|
+
const blocks = node.children.map((child) => generateBlock(child, opts, 0));
|
|
1949
|
+
return blocks.join(opts.lineEnding + opts.lineEnding);
|
|
1950
|
+
}
|
|
1951
|
+
function generateBlock(node, opts, depth) {
|
|
1952
|
+
switch (node.type) {
|
|
1953
|
+
case "thematicBreak":
|
|
1954
|
+
return generateThematicBreak(node, opts);
|
|
1955
|
+
case "heading":
|
|
1956
|
+
return generateHeading(node, opts);
|
|
1957
|
+
case "codeBlock":
|
|
1958
|
+
return generateCodeBlock(node, opts);
|
|
1959
|
+
case "htmlBlock":
|
|
1960
|
+
return generateHtmlBlock(node);
|
|
1961
|
+
case "paragraph":
|
|
1962
|
+
return generateParagraph(node, opts);
|
|
1963
|
+
case "blockquote":
|
|
1964
|
+
return generateBlockquote(node, opts, depth);
|
|
1965
|
+
case "list":
|
|
1966
|
+
return generateList(node, opts, depth);
|
|
1967
|
+
case "listItem":
|
|
1968
|
+
return generateListItem(node, opts, depth);
|
|
1969
|
+
case "table":
|
|
1970
|
+
return generateTable(node, opts);
|
|
1971
|
+
case "linkReferenceDefinition":
|
|
1972
|
+
return generateLinkReferenceDefinition(node);
|
|
1973
|
+
default:
|
|
1974
|
+
return "";
|
|
1975
|
+
}
|
|
1976
|
+
}
|
|
1977
|
+
function generateThematicBreak(node, opts) {
|
|
1978
|
+
const char = node.marker ?? opts.thematicBreak;
|
|
1979
|
+
return repeat(char, opts.thematicBreakLength);
|
|
1980
|
+
}
|
|
1981
|
+
function generateHeading(node, opts) {
|
|
1982
|
+
const text = node.children.map((child) => generateInline(child, opts)).join("");
|
|
1983
|
+
if (opts.setext && (node.level === 1 || node.level === 2)) {
|
|
1984
|
+
const underlineChar = node.level === 1 ? "=" : "-";
|
|
1985
|
+
const underline = repeat(underlineChar, Math.max(text.length, 3));
|
|
1986
|
+
return text + opts.lineEnding + underline;
|
|
1987
|
+
}
|
|
1988
|
+
return repeat("#", node.level) + " " + text;
|
|
1989
|
+
}
|
|
1990
|
+
function generateCodeBlock(node, opts) {
|
|
1991
|
+
if (node.style === "indented") {
|
|
1992
|
+
const lines2 = node.value.split(`
|
|
1993
|
+
`);
|
|
1994
|
+
return lines2.map((line) => " " + line).join(opts.lineEnding);
|
|
1995
|
+
}
|
|
1996
|
+
const fence = opts.fence;
|
|
1997
|
+
const fenceLength = node.fenceLength ?? opts.fenceLength;
|
|
1998
|
+
const fenceStr = repeat(fence, fenceLength);
|
|
1999
|
+
let infoString = node.lang ?? "";
|
|
2000
|
+
if (node.meta) {
|
|
2001
|
+
infoString += " " + node.meta;
|
|
2002
|
+
}
|
|
2003
|
+
const lines = [fenceStr + infoString, node.value, fenceStr];
|
|
2004
|
+
return lines.join(opts.lineEnding);
|
|
2005
|
+
}
|
|
2006
|
+
function generateHtmlBlock(node) {
|
|
2007
|
+
return node.value;
|
|
2008
|
+
}
|
|
2009
|
+
function generateParagraph(node, opts) {
|
|
2010
|
+
return node.children.map((child) => generateInline(child, opts)).join("");
|
|
2011
|
+
}
|
|
2012
|
+
function generateBlockquote(node, opts, depth) {
|
|
2013
|
+
const content = node.children.map((child) => generateBlock(child, opts, depth)).join(opts.lineEnding + opts.lineEnding);
|
|
2014
|
+
const lines = content.split(opts.lineEnding);
|
|
2015
|
+
return lines.map((line) => line ? "> " + line : ">").join(opts.lineEnding);
|
|
2016
|
+
}
|
|
2017
|
+
function generateList(node, opts, depth) {
|
|
2018
|
+
const items = [];
|
|
2019
|
+
let counter = node.start ?? 1;
|
|
2020
|
+
for (const item of node.children) {
|
|
2021
|
+
const marker = node.ordered ? `${counter}${node.marker ?? opts.orderedMarker}` : node.marker ?? opts.bullet;
|
|
2022
|
+
const content = generateListItem(item, opts, depth + 1);
|
|
2023
|
+
const lines = content.split(opts.lineEnding);
|
|
2024
|
+
const firstLine = marker + " " + (lines[0] ?? "");
|
|
2025
|
+
const indent = repeat(" ", marker.length + 1);
|
|
2026
|
+
const restLines = lines.slice(1).map((line) => line ? indent + line : "");
|
|
2027
|
+
items.push([firstLine, ...restLines].join(opts.lineEnding));
|
|
2028
|
+
counter++;
|
|
2029
|
+
}
|
|
2030
|
+
const separator = node.spread ? opts.lineEnding + opts.lineEnding : opts.lineEnding;
|
|
2031
|
+
return items.join(separator);
|
|
2032
|
+
}
|
|
2033
|
+
function generateListItem(node, opts, depth) {
|
|
2034
|
+
let prefix = "";
|
|
2035
|
+
if (node.checked !== undefined) {
|
|
2036
|
+
prefix = node.checked ? "[x] " : "[ ] ";
|
|
2037
|
+
}
|
|
2038
|
+
const content = node.children.map((child) => generateBlock(child, opts, depth)).join(opts.lineEnding + opts.lineEnding);
|
|
2039
|
+
return prefix + content;
|
|
2040
|
+
}
|
|
2041
|
+
function generateLinkReferenceDefinition(node) {
|
|
2042
|
+
let result = `[${node.label}]: ${encodeUrl(node.url)}`;
|
|
2043
|
+
if (node.title) {
|
|
2044
|
+
result += ` "${node.title}"`;
|
|
2045
|
+
}
|
|
2046
|
+
return result;
|
|
2047
|
+
}
|
|
2048
|
+
function generateTable(node, opts) {
|
|
2049
|
+
const rows = [];
|
|
2050
|
+
const headerRow = node.children.find((r) => r.isHeader);
|
|
2051
|
+
const bodyRows = node.children.filter((r) => !r.isHeader);
|
|
2052
|
+
if (headerRow) {
|
|
2053
|
+
rows.push(generateTableRow(headerRow, opts));
|
|
2054
|
+
const delimiterCells = node.align.map((align) => {
|
|
2055
|
+
if (align === "left")
|
|
2056
|
+
return ":---";
|
|
2057
|
+
if (align === "center")
|
|
2058
|
+
return ":---:";
|
|
2059
|
+
if (align === "right")
|
|
2060
|
+
return "---:";
|
|
2061
|
+
return "---";
|
|
2062
|
+
});
|
|
2063
|
+
rows.push("| " + delimiterCells.join(" | ") + " |");
|
|
2064
|
+
}
|
|
2065
|
+
for (const row of bodyRows) {
|
|
2066
|
+
rows.push(generateTableRow(row, opts));
|
|
2067
|
+
}
|
|
2068
|
+
return rows.join(opts.lineEnding);
|
|
2069
|
+
}
|
|
2070
|
+
function generateTableRow(node, opts) {
|
|
2071
|
+
const cells = node.children.map((cell) => generateTableCell(cell, opts));
|
|
2072
|
+
return "| " + cells.join(" | ") + " |";
|
|
2073
|
+
}
|
|
2074
|
+
function generateTableCell(node, opts) {
|
|
2075
|
+
return node.children.map((child) => generateInline(child, opts)).join("");
|
|
2076
|
+
}
|
|
2077
|
+
function generateInline(node, opts) {
|
|
2078
|
+
switch (node.type) {
|
|
2079
|
+
case "text":
|
|
2080
|
+
return node.value;
|
|
2081
|
+
case "codeSpan":
|
|
2082
|
+
return generateCodeSpan(node.value);
|
|
2083
|
+
case "emphasis":
|
|
2084
|
+
return generateEmphasis(node, opts);
|
|
2085
|
+
case "strong":
|
|
2086
|
+
return generateStrong(node, opts);
|
|
2087
|
+
case "link":
|
|
2088
|
+
return generateLink(node, opts);
|
|
2089
|
+
case "image":
|
|
2090
|
+
return generateImage(node);
|
|
2091
|
+
case "hardBreak":
|
|
2092
|
+
return " " + opts.lineEnding;
|
|
2093
|
+
case "softBreak":
|
|
2094
|
+
return opts.lineEnding;
|
|
2095
|
+
case "htmlInline":
|
|
2096
|
+
return node.value;
|
|
2097
|
+
default:
|
|
2098
|
+
return "";
|
|
2099
|
+
}
|
|
2100
|
+
}
|
|
2101
|
+
function generateCodeSpan(value) {
|
|
2102
|
+
let maxBackticks = 0;
|
|
2103
|
+
let current = 0;
|
|
2104
|
+
for (const char of value) {
|
|
2105
|
+
if (char === "`") {
|
|
2106
|
+
current++;
|
|
2107
|
+
maxBackticks = Math.max(maxBackticks, current);
|
|
2108
|
+
} else {
|
|
2109
|
+
current = 0;
|
|
2110
|
+
}
|
|
2111
|
+
}
|
|
2112
|
+
const backtickCount = maxBackticks + 1;
|
|
2113
|
+
const backticks = "`".repeat(backtickCount);
|
|
2114
|
+
const needsPadding = backtickCount > 1 || value.startsWith("`") || value.endsWith("`") || value.startsWith(" ") && value.endsWith(" ") && value.length > 0;
|
|
2115
|
+
if (needsPadding) {
|
|
2116
|
+
return backticks + " " + value + " " + backticks;
|
|
2117
|
+
}
|
|
2118
|
+
return backticks + value + backticks;
|
|
2119
|
+
}
|
|
2120
|
+
function generateEmphasis(node, opts) {
|
|
2121
|
+
const marker = node.marker ?? opts.emphasis;
|
|
2122
|
+
const content = node.children.map((child) => generateInline(child, opts)).join("");
|
|
2123
|
+
return marker + content + marker;
|
|
2124
|
+
}
|
|
2125
|
+
function generateStrong(node, opts) {
|
|
2126
|
+
const marker = node.marker ?? opts.strong;
|
|
2127
|
+
const content = node.children.map((child) => generateInline(child, opts)).join("");
|
|
2128
|
+
return marker + content + marker;
|
|
2129
|
+
}
|
|
2130
|
+
function generateLink(node, opts) {
|
|
2131
|
+
const text = node.children.map((child) => generateInline(child, opts)).join("");
|
|
2132
|
+
const url = encodeUrl(node.url);
|
|
2133
|
+
if (node.title) {
|
|
2134
|
+
return `[${text}](${url} "${node.title}")`;
|
|
2135
|
+
}
|
|
2136
|
+
return `[${text}](${url})`;
|
|
2137
|
+
}
|
|
2138
|
+
function generateImage(node) {
|
|
2139
|
+
const url = encodeUrl(node.url);
|
|
2140
|
+
if (node.title) {
|
|
2141
|
+
return ``;
|
|
2142
|
+
}
|
|
2143
|
+
return ``;
|
|
2144
|
+
}
|
|
2145
|
+
function generateHeadingString(level, text, style = "atx") {
|
|
2146
|
+
if (style === "setext" && (level === 1 || level === 2)) {
|
|
2147
|
+
const underlineChar = level === 1 ? "=" : "-";
|
|
2148
|
+
const underline = repeat(underlineChar, Math.max(text.length, 3));
|
|
2149
|
+
return text + `
|
|
2150
|
+
` + underline;
|
|
2151
|
+
}
|
|
2152
|
+
return repeat("#", level) + " " + text;
|
|
2153
|
+
}
|
|
2154
|
+
function generateLinkString(text, url, title) {
|
|
2155
|
+
const encodedUrl = encodeUrl(url);
|
|
2156
|
+
if (title) {
|
|
2157
|
+
return `[${text}](${encodedUrl} "${title}")`;
|
|
2158
|
+
}
|
|
2159
|
+
return `[${text}](${encodedUrl})`;
|
|
2160
|
+
}
|
|
2161
|
+
function generateImageString(alt, url, title) {
|
|
2162
|
+
const encodedUrl = encodeUrl(url);
|
|
2163
|
+
if (title) {
|
|
2164
|
+
return ``;
|
|
2165
|
+
}
|
|
2166
|
+
return ``;
|
|
2167
|
+
}
|
|
2168
|
+
function generateCodeBlockString(code, lang, style = "fenced") {
|
|
2169
|
+
if (style === "indented") {
|
|
2170
|
+
return code.split(`
|
|
2171
|
+
`).map((line) => " " + line).join(`
|
|
2172
|
+
`);
|
|
2173
|
+
}
|
|
2174
|
+
const fence = "```";
|
|
2175
|
+
const infoString = lang ?? "";
|
|
2176
|
+
return fence + infoString + `
|
|
2177
|
+
` + code + `
|
|
2178
|
+
` + fence;
|
|
2179
|
+
}
|
|
2180
|
+
function generateListString(items, ordered = false, start = 1) {
|
|
2181
|
+
return items.map((item, i) => {
|
|
2182
|
+
const marker = ordered ? `${start + i}.` : "-";
|
|
2183
|
+
return `${marker} ${item}`;
|
|
2184
|
+
}).join(`
|
|
2185
|
+
`);
|
|
2186
|
+
}
|
|
2187
|
+
function generateBlockquoteString(content) {
|
|
2188
|
+
return content.split(`
|
|
2189
|
+
`).map((line) => line ? "> " + line : ">").join(`
|
|
2190
|
+
`);
|
|
2191
|
+
}
|
|
2192
|
+
function generateEmphasisString(text, marker = "*") {
|
|
2193
|
+
return marker + text + marker;
|
|
2194
|
+
}
|
|
2195
|
+
function generateStrongString(text, marker = "**") {
|
|
2196
|
+
return marker + text + marker;
|
|
2197
|
+
}
|
|
2198
|
+
function generateInlineCodeString(text) {
|
|
2199
|
+
return generateCodeSpan(text);
|
|
2200
|
+
}
|
|
2201
|
+
function generateTableString(headers, rows, alignments) {
|
|
2202
|
+
if (headers.length === 0) {
|
|
2203
|
+
return "";
|
|
2204
|
+
}
|
|
2205
|
+
const columnCount = headers.length;
|
|
2206
|
+
const escapeCell = (cell) => {
|
|
2207
|
+
return cell.replace(/\|/g, "\\|");
|
|
2208
|
+
};
|
|
2209
|
+
const headerRow = "| " + headers.map((h) => escapeCell(h)).join(" | ") + " |";
|
|
2210
|
+
const delimiterCells = headers.map((_, i) => {
|
|
2211
|
+
const align = alignments?.[i];
|
|
2212
|
+
if (align === "left")
|
|
2213
|
+
return ":---";
|
|
2214
|
+
if (align === "center")
|
|
2215
|
+
return ":---:";
|
|
2216
|
+
if (align === "right")
|
|
2217
|
+
return "---:";
|
|
2218
|
+
return "---";
|
|
2219
|
+
});
|
|
2220
|
+
const delimiterRow = "| " + delimiterCells.join(" | ") + " |";
|
|
2221
|
+
const bodyRows = rows.map((row) => {
|
|
2222
|
+
const paddedRow = [...row];
|
|
2223
|
+
while (paddedRow.length < columnCount) {
|
|
2224
|
+
paddedRow.push("");
|
|
2225
|
+
}
|
|
2226
|
+
const cells = paddedRow.slice(0, columnCount).map((c) => escapeCell(c));
|
|
2227
|
+
return "| " + cells.join(" | ") + " |";
|
|
2228
|
+
});
|
|
2229
|
+
return [headerRow, delimiterRow, ...bodyRows].join(`
|
|
2230
|
+
`);
|
|
2231
|
+
}
|
|
2232
|
+
function generateTaskListString(items) {
|
|
2233
|
+
return items.map((item) => `- [${item.checked ? "x" : " "}] ${item.text}`).join(`
|
|
2234
|
+
`);
|
|
2235
|
+
}
|
|
2236
|
+
function generateStrikethroughString(text) {
|
|
2237
|
+
return `~~${text}~~`;
|
|
2238
|
+
}
|
|
2239
|
+
// src/html-renderer.ts
|
|
2240
|
+
var DEFAULT_OPTIONS2 = {
|
|
2241
|
+
sanitizeHtml: true,
|
|
2242
|
+
externalLinksNewTab: false,
|
|
2243
|
+
classPrefix: "",
|
|
2244
|
+
softBreakAsBr: false,
|
|
2245
|
+
transformUrl: (url) => url,
|
|
2246
|
+
elementAttributes: {}
|
|
2247
|
+
};
|
|
2248
|
+
function renderToHtml(document, options) {
|
|
2249
|
+
const opts = { ...DEFAULT_OPTIONS2, ...options };
|
|
2250
|
+
const root = "root" in document ? document.root : document;
|
|
2251
|
+
return renderDocument(root, opts);
|
|
2252
|
+
}
|
|
2253
|
+
function renderNodeToHtml(node, options) {
|
|
2254
|
+
const opts = { ...DEFAULT_OPTIONS2, ...options };
|
|
2255
|
+
if (node.type === "document") {
|
|
2256
|
+
return renderDocument(node, opts);
|
|
2257
|
+
}
|
|
2258
|
+
if (isBlockNode2(node)) {
|
|
2259
|
+
return renderBlock(node, opts);
|
|
2260
|
+
}
|
|
2261
|
+
return renderInline(node, opts);
|
|
2262
|
+
}
|
|
2263
|
+
function isBlockNode2(node) {
|
|
2264
|
+
return [
|
|
2265
|
+
"thematicBreak",
|
|
2266
|
+
"heading",
|
|
2267
|
+
"codeBlock",
|
|
2268
|
+
"htmlBlock",
|
|
2269
|
+
"paragraph",
|
|
2270
|
+
"linkReferenceDefinition",
|
|
2271
|
+
"blockquote",
|
|
2272
|
+
"list",
|
|
2273
|
+
"listItem",
|
|
2274
|
+
"table"
|
|
2275
|
+
].includes(node.type);
|
|
2276
|
+
}
|
|
2277
|
+
function renderDocument(node, opts) {
|
|
2278
|
+
return node.children.map((child) => renderBlock(child, opts)).join(`
|
|
2279
|
+
`);
|
|
2280
|
+
}
|
|
2281
|
+
function renderBlock(node, opts) {
|
|
2282
|
+
switch (node.type) {
|
|
2283
|
+
case "thematicBreak":
|
|
2284
|
+
return renderThematicBreak(node, opts);
|
|
2285
|
+
case "heading":
|
|
2286
|
+
return renderHeading(node, opts);
|
|
2287
|
+
case "codeBlock":
|
|
2288
|
+
return renderCodeBlock(node, opts);
|
|
2289
|
+
case "htmlBlock":
|
|
2290
|
+
return renderHtmlBlock(node, opts);
|
|
2291
|
+
case "paragraph":
|
|
2292
|
+
return renderParagraph(node, opts);
|
|
2293
|
+
case "blockquote":
|
|
2294
|
+
return renderBlockquote(node, opts);
|
|
2295
|
+
case "list":
|
|
2296
|
+
return renderList(node, opts);
|
|
2297
|
+
case "listItem":
|
|
2298
|
+
return renderListItem(node, opts);
|
|
2299
|
+
case "table":
|
|
2300
|
+
return renderTable(node, opts);
|
|
2301
|
+
case "linkReferenceDefinition":
|
|
2302
|
+
return "";
|
|
2303
|
+
default:
|
|
2304
|
+
return "";
|
|
2305
|
+
}
|
|
2306
|
+
}
|
|
2307
|
+
function renderThematicBreak(_node, opts) {
|
|
2308
|
+
const className = opts.classPrefix ? ` class="${opts.classPrefix}hr"` : "";
|
|
2309
|
+
return `<hr${className} />`;
|
|
2310
|
+
}
|
|
2311
|
+
function renderHeading(node, opts) {
|
|
2312
|
+
const tag = `h${node.level}`;
|
|
2313
|
+
const content = node.children.map((child) => renderInline(child, opts)).join("");
|
|
2314
|
+
const textContent = extractTextContent(node.children);
|
|
2315
|
+
const headingId = generateHeadingId(textContent);
|
|
2316
|
+
const idAttr = headingId ? ` id="${headingId}"` : "";
|
|
2317
|
+
const attrs = buildAttributes(opts.elementAttributes.heading, opts.classPrefix, tag);
|
|
2318
|
+
return `<${tag}${idAttr}${attrs}>${content}</${tag}>`;
|
|
2319
|
+
}
|
|
2320
|
+
function renderCodeBlock(node, opts) {
|
|
2321
|
+
const escapedCode = encodeHtmlEntities(node.value);
|
|
2322
|
+
const langClass = node.lang ? ` class="language-${encodeHtmlEntities(node.lang)}"` : "";
|
|
2323
|
+
const attrs = buildAttributes(opts.elementAttributes.codeBlock, opts.classPrefix, "pre");
|
|
2324
|
+
return `<pre${attrs}><code${langClass}>${escapedCode}</code></pre>`;
|
|
2325
|
+
}
|
|
2326
|
+
function renderHtmlBlock(node, opts) {
|
|
2327
|
+
if (opts.sanitizeHtml) {
|
|
2328
|
+
return `<div class="raw-html">${encodeHtmlEntities(node.value)}</div>`;
|
|
2329
|
+
}
|
|
2330
|
+
return node.value;
|
|
2331
|
+
}
|
|
2332
|
+
function renderParagraph(node, opts) {
|
|
2333
|
+
const content = node.children.map((child) => renderInline(child, opts)).join("");
|
|
2334
|
+
const className = opts.classPrefix ? ` class="${opts.classPrefix}p"` : "";
|
|
2335
|
+
return `<p${className}>${content}</p>`;
|
|
2336
|
+
}
|
|
2337
|
+
function renderBlockquote(node, opts) {
|
|
2338
|
+
const content = node.children.map((child) => renderBlock(child, opts)).join(`
|
|
2339
|
+
`);
|
|
2340
|
+
const className = opts.classPrefix ? ` class="${opts.classPrefix}blockquote"` : "";
|
|
2341
|
+
return `<blockquote${className}>
|
|
2342
|
+
${content}
|
|
2343
|
+
</blockquote>`;
|
|
2344
|
+
}
|
|
2345
|
+
function renderList(node, opts) {
|
|
2346
|
+
const tag = node.ordered ? "ol" : "ul";
|
|
2347
|
+
const startAttr = node.ordered && node.start && node.start !== 1 ? ` start="${node.start}"` : "";
|
|
2348
|
+
const className = opts.classPrefix ? ` class="${opts.classPrefix}${tag}"` : "";
|
|
2349
|
+
const items = node.children.map((item) => renderListItem(item, opts)).join(`
|
|
2350
|
+
`);
|
|
2351
|
+
return `<${tag}${startAttr}${className}>
|
|
2352
|
+
${items}
|
|
2353
|
+
</${tag}>`;
|
|
2354
|
+
}
|
|
2355
|
+
function renderListItem(node, opts) {
|
|
2356
|
+
const className = opts.classPrefix ? ` class="${opts.classPrefix}li"` : "";
|
|
2357
|
+
if (node.checked !== undefined) {
|
|
2358
|
+
const checkbox = `<input type="checkbox"${node.checked ? " checked" : ""} disabled />`;
|
|
2359
|
+
const content2 = node.children.map((child) => renderBlock(child, opts)).join(`
|
|
2360
|
+
`);
|
|
2361
|
+
const unwrappedContent = content2.replace(/^<p>|<\/p>$/g, "");
|
|
2362
|
+
return `<li${className}>${checkbox} ${unwrappedContent}</li>`;
|
|
2363
|
+
}
|
|
2364
|
+
if (node.children.length === 1 && node.children[0]?.type === "paragraph") {
|
|
2365
|
+
const paragraph = node.children[0];
|
|
2366
|
+
const content2 = paragraph.children.map((child) => renderInline(child, opts)).join("");
|
|
2367
|
+
return `<li${className}>${content2}</li>`;
|
|
2368
|
+
}
|
|
2369
|
+
const content = node.children.map((child) => renderBlock(child, opts)).join(`
|
|
2370
|
+
`);
|
|
2371
|
+
return `<li${className}>
|
|
2372
|
+
${content}
|
|
2373
|
+
</li>`;
|
|
2374
|
+
}
|
|
2375
|
+
function renderTable(node, opts) {
|
|
2376
|
+
const rows = node.children;
|
|
2377
|
+
const headerRow = rows.find((r) => r.isHeader);
|
|
2378
|
+
const bodyRows = rows.filter((r) => !r.isHeader);
|
|
2379
|
+
const className = opts.classPrefix ? ` class="${opts.classPrefix}table"` : "";
|
|
2380
|
+
let html = `<table${className}>`;
|
|
2381
|
+
if (headerRow) {
|
|
2382
|
+
html += `
|
|
2383
|
+
<thead>
|
|
2384
|
+
` + renderTableRow(headerRow, opts) + `
|
|
2385
|
+
</thead>`;
|
|
2386
|
+
}
|
|
2387
|
+
if (bodyRows.length > 0) {
|
|
2388
|
+
html += `
|
|
2389
|
+
<tbody>
|
|
2390
|
+
` + bodyRows.map((r) => renderTableRow(r, opts)).join(`
|
|
2391
|
+
`) + `
|
|
2392
|
+
</tbody>`;
|
|
2393
|
+
}
|
|
2394
|
+
html += `
|
|
2395
|
+
</table>`;
|
|
2396
|
+
return html;
|
|
2397
|
+
}
|
|
2398
|
+
function renderTableRow(node, opts) {
|
|
2399
|
+
const cells = node.children.map((cell) => renderTableCell(cell, opts)).join("");
|
|
2400
|
+
return `<tr>${cells}</tr>`;
|
|
2401
|
+
}
|
|
2402
|
+
function renderTableCell(node, opts) {
|
|
2403
|
+
const tag = node.isHeader ? "th" : "td";
|
|
2404
|
+
const alignStyle = node.align ? ` style="text-align: ${node.align}"` : "";
|
|
2405
|
+
const content = node.children.map((child) => renderInline(child, opts)).join("");
|
|
2406
|
+
return `<${tag}${alignStyle}>${content}</${tag}>`;
|
|
2407
|
+
}
|
|
2408
|
+
function renderInline(node, opts) {
|
|
2409
|
+
switch (node.type) {
|
|
2410
|
+
case "text":
|
|
2411
|
+
return renderText(node);
|
|
2412
|
+
case "codeSpan":
|
|
2413
|
+
return renderCodeSpan(node, opts);
|
|
2414
|
+
case "emphasis":
|
|
2415
|
+
return renderEmphasis(node, opts);
|
|
2416
|
+
case "strong":
|
|
2417
|
+
return renderStrong(node, opts);
|
|
2418
|
+
case "link":
|
|
2419
|
+
return renderLink(node, opts);
|
|
2420
|
+
case "image":
|
|
2421
|
+
return renderImage(node, opts);
|
|
2422
|
+
case "hardBreak":
|
|
2423
|
+
return renderHardBreak();
|
|
2424
|
+
case "softBreak":
|
|
2425
|
+
return renderSoftBreak(opts);
|
|
2426
|
+
case "htmlInline":
|
|
2427
|
+
return renderHtmlInline(node, opts);
|
|
2428
|
+
default:
|
|
2429
|
+
return "";
|
|
2430
|
+
}
|
|
2431
|
+
}
|
|
2432
|
+
function renderText(node) {
|
|
2433
|
+
return encodeHtmlEntities(node.value);
|
|
2434
|
+
}
|
|
2435
|
+
function renderCodeSpan(node, opts) {
|
|
2436
|
+
const className = opts.classPrefix ? ` class="${opts.classPrefix}code"` : "";
|
|
2437
|
+
return `<code${className}>${encodeHtmlEntities(node.value)}</code>`;
|
|
2438
|
+
}
|
|
2439
|
+
function renderEmphasis(node, opts) {
|
|
2440
|
+
const content = node.children.map((child) => renderInline(child, opts)).join("");
|
|
2441
|
+
const className = opts.classPrefix ? ` class="${opts.classPrefix}em"` : "";
|
|
2442
|
+
return `<em${className}>${content}</em>`;
|
|
2443
|
+
}
|
|
2444
|
+
function renderStrong(node, opts) {
|
|
2445
|
+
const content = node.children.map((child) => renderInline(child, opts)).join("");
|
|
2446
|
+
const className = opts.classPrefix ? ` class="${opts.classPrefix}strong"` : "";
|
|
2447
|
+
return `<strong${className}>${content}</strong>`;
|
|
2448
|
+
}
|
|
2449
|
+
function renderLink(node, opts) {
|
|
2450
|
+
const content = node.children.map((child) => renderInline(child, opts)).join("");
|
|
2451
|
+
const url = encodeHtmlEntities(opts.transformUrl(node.url, "link"));
|
|
2452
|
+
const title = node.title ? ` title="${encodeHtmlEntities(node.title)}"` : "";
|
|
2453
|
+
let extraAttrs = "";
|
|
2454
|
+
if (opts.externalLinksNewTab && isExternalUrl(node.url)) {
|
|
2455
|
+
extraAttrs = ' target="_blank" rel="noopener noreferrer"';
|
|
2456
|
+
}
|
|
2457
|
+
const customAttrs = buildAttributes(opts.elementAttributes.link, opts.classPrefix, "a");
|
|
2458
|
+
return `<a href="${url}"${title}${extraAttrs}${customAttrs}>${content}</a>`;
|
|
2459
|
+
}
|
|
2460
|
+
function renderImage(node, opts) {
|
|
2461
|
+
const url = encodeHtmlEntities(opts.transformUrl(node.url, "image"));
|
|
2462
|
+
const alt = encodeHtmlEntities(node.alt);
|
|
2463
|
+
const title = node.title ? ` title="${encodeHtmlEntities(node.title)}"` : "";
|
|
2464
|
+
const attrs = buildAttributes(opts.elementAttributes.image, opts.classPrefix, "img");
|
|
2465
|
+
return `<img src="${url}" alt="${alt}"${title}${attrs} />`;
|
|
2466
|
+
}
|
|
2467
|
+
function renderHardBreak() {
|
|
2468
|
+
return "<br />";
|
|
2469
|
+
}
|
|
2470
|
+
function renderSoftBreak(opts) {
|
|
2471
|
+
return opts.softBreakAsBr ? "<br />" : `
|
|
2472
|
+
`;
|
|
2473
|
+
}
|
|
2474
|
+
function renderHtmlInline(node, opts) {
|
|
2475
|
+
if (opts.sanitizeHtml) {
|
|
2476
|
+
return encodeHtmlEntities(node.value);
|
|
2477
|
+
}
|
|
2478
|
+
return node.value;
|
|
2479
|
+
}
|
|
2480
|
+
function isExternalUrl(url) {
|
|
2481
|
+
return url.startsWith("http://") || url.startsWith("https://");
|
|
2482
|
+
}
|
|
2483
|
+
function generateHeadingId(text) {
|
|
2484
|
+
return text.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").slice(0, 50);
|
|
2485
|
+
}
|
|
2486
|
+
function extractTextContent(nodes) {
|
|
2487
|
+
return nodes.map((node) => {
|
|
2488
|
+
if (node.type === "text")
|
|
2489
|
+
return node.value;
|
|
2490
|
+
if (node.type === "codeSpan")
|
|
2491
|
+
return node.value;
|
|
2492
|
+
if ("children" in node && Array.isArray(node.children)) {
|
|
2493
|
+
return extractTextContent(node.children);
|
|
2494
|
+
}
|
|
2495
|
+
return "";
|
|
2496
|
+
}).join("");
|
|
2497
|
+
}
|
|
2498
|
+
function buildAttributes(customAttrs, classPrefix, elementName) {
|
|
2499
|
+
const attrs = [];
|
|
2500
|
+
if (classPrefix) {
|
|
2501
|
+
attrs.push(`class="${classPrefix}${elementName}"`);
|
|
2502
|
+
}
|
|
2503
|
+
if (customAttrs) {
|
|
2504
|
+
for (const [key, value] of Object.entries(customAttrs)) {
|
|
2505
|
+
attrs.push(`${key}="${encodeHtmlEntities(value)}"`);
|
|
2506
|
+
}
|
|
2507
|
+
}
|
|
2508
|
+
return attrs.length > 0 ? ` ${attrs.join(" ")}` : "";
|
|
2509
|
+
}
|
|
2510
|
+
// src/html-parser.ts
|
|
2511
|
+
var SELF_CLOSING_TAGS = new Set([
|
|
2512
|
+
"area",
|
|
2513
|
+
"base",
|
|
2514
|
+
"br",
|
|
2515
|
+
"col",
|
|
2516
|
+
"embed",
|
|
2517
|
+
"hr",
|
|
2518
|
+
"img",
|
|
2519
|
+
"input",
|
|
2520
|
+
"link",
|
|
2521
|
+
"meta",
|
|
2522
|
+
"param",
|
|
2523
|
+
"source",
|
|
2524
|
+
"track",
|
|
2525
|
+
"wbr"
|
|
2526
|
+
]);
|
|
2527
|
+
var BLOCK_TAGS = new Set([
|
|
2528
|
+
"address",
|
|
2529
|
+
"article",
|
|
2530
|
+
"aside",
|
|
2531
|
+
"blockquote",
|
|
2532
|
+
"dd",
|
|
2533
|
+
"details",
|
|
2534
|
+
"dialog",
|
|
2535
|
+
"div",
|
|
2536
|
+
"dl",
|
|
2537
|
+
"dt",
|
|
2538
|
+
"fieldset",
|
|
2539
|
+
"figcaption",
|
|
2540
|
+
"figure",
|
|
2541
|
+
"footer",
|
|
2542
|
+
"form",
|
|
2543
|
+
"h1",
|
|
2544
|
+
"h2",
|
|
2545
|
+
"h3",
|
|
2546
|
+
"h4",
|
|
2547
|
+
"h5",
|
|
2548
|
+
"h6",
|
|
2549
|
+
"header",
|
|
2550
|
+
"hgroup",
|
|
2551
|
+
"hr",
|
|
2552
|
+
"li",
|
|
2553
|
+
"main",
|
|
2554
|
+
"nav",
|
|
2555
|
+
"ol",
|
|
2556
|
+
"p",
|
|
2557
|
+
"pre",
|
|
2558
|
+
"section",
|
|
2559
|
+
"table",
|
|
2560
|
+
"tbody",
|
|
2561
|
+
"td",
|
|
2562
|
+
"tfoot",
|
|
2563
|
+
"th",
|
|
2564
|
+
"thead",
|
|
2565
|
+
"tr",
|
|
2566
|
+
"ul"
|
|
2567
|
+
]);
|
|
2568
|
+
function parseHtml(html) {
|
|
2569
|
+
let pos = 0;
|
|
2570
|
+
function parseNodes() {
|
|
2571
|
+
const result = [];
|
|
2572
|
+
while (pos < html.length) {
|
|
2573
|
+
if (html.startsWith("</", pos)) {
|
|
2574
|
+
break;
|
|
2575
|
+
}
|
|
2576
|
+
if (html.startsWith("<!--", pos)) {
|
|
2577
|
+
const endPos = html.indexOf("-->", pos + 4);
|
|
2578
|
+
if (endPos !== -1) {
|
|
2579
|
+
pos = endPos + 3;
|
|
2580
|
+
continue;
|
|
2581
|
+
}
|
|
2582
|
+
}
|
|
2583
|
+
if (html[pos] === "<") {
|
|
2584
|
+
const element = parseElement();
|
|
2585
|
+
if (element) {
|
|
2586
|
+
result.push(element);
|
|
2587
|
+
continue;
|
|
2588
|
+
}
|
|
2589
|
+
}
|
|
2590
|
+
const text = parseText();
|
|
2591
|
+
if (text) {
|
|
2592
|
+
result.push(text);
|
|
2593
|
+
}
|
|
2594
|
+
}
|
|
2595
|
+
return result;
|
|
2596
|
+
}
|
|
2597
|
+
function parseElement() {
|
|
2598
|
+
if (html[pos] !== "<")
|
|
2599
|
+
return null;
|
|
2600
|
+
const tagStart = pos + 1;
|
|
2601
|
+
let tagEnd = tagStart;
|
|
2602
|
+
while (tagEnd < html.length && !/[\s/>]/.test(html[tagEnd])) {
|
|
2603
|
+
tagEnd++;
|
|
2604
|
+
}
|
|
2605
|
+
const tag = html.slice(tagStart, tagEnd).toLowerCase();
|
|
2606
|
+
if (!tag || tag.startsWith("!"))
|
|
2607
|
+
return null;
|
|
2608
|
+
const attributes = {};
|
|
2609
|
+
let attrPos = tagEnd;
|
|
2610
|
+
while (attrPos < html.length) {
|
|
2611
|
+
while (attrPos < html.length && /\s/.test(html[attrPos])) {
|
|
2612
|
+
attrPos++;
|
|
2613
|
+
}
|
|
2614
|
+
if (html[attrPos] === ">" || html.startsWith("/>", attrPos)) {
|
|
2615
|
+
break;
|
|
2616
|
+
}
|
|
2617
|
+
let attrNameEnd = attrPos;
|
|
2618
|
+
while (attrNameEnd < html.length && !/[\s=/>]/.test(html[attrNameEnd])) {
|
|
2619
|
+
attrNameEnd++;
|
|
2620
|
+
}
|
|
2621
|
+
const attrName = html.slice(attrPos, attrNameEnd).toLowerCase();
|
|
2622
|
+
attrPos = attrNameEnd;
|
|
2623
|
+
while (attrPos < html.length && /\s/.test(html[attrPos])) {
|
|
2624
|
+
attrPos++;
|
|
2625
|
+
}
|
|
2626
|
+
if (html[attrPos] === "=") {
|
|
2627
|
+
attrPos++;
|
|
2628
|
+
while (attrPos < html.length && /\s/.test(html[attrPos])) {
|
|
2629
|
+
attrPos++;
|
|
2630
|
+
}
|
|
2631
|
+
let value;
|
|
2632
|
+
const quote = html[attrPos];
|
|
2633
|
+
if (quote === '"' || quote === "'") {
|
|
2634
|
+
attrPos++;
|
|
2635
|
+
const valueEnd = html.indexOf(quote, attrPos);
|
|
2636
|
+
if (valueEnd !== -1) {
|
|
2637
|
+
value = html.slice(attrPos, valueEnd);
|
|
2638
|
+
attrPos = valueEnd + 1;
|
|
2639
|
+
} else {
|
|
2640
|
+
value = "";
|
|
2641
|
+
}
|
|
2642
|
+
} else {
|
|
2643
|
+
let valueEnd = attrPos;
|
|
2644
|
+
while (valueEnd < html.length && !/[\s>]/.test(html[valueEnd])) {
|
|
2645
|
+
valueEnd++;
|
|
2646
|
+
}
|
|
2647
|
+
value = html.slice(attrPos, valueEnd);
|
|
2648
|
+
attrPos = valueEnd;
|
|
2649
|
+
}
|
|
2650
|
+
attributes[attrName] = decodeHtmlEntities2(value);
|
|
2651
|
+
} else if (attrName) {
|
|
2652
|
+
attributes[attrName] = "";
|
|
2653
|
+
}
|
|
2654
|
+
}
|
|
2655
|
+
const selfClosing = html.startsWith("/>", attrPos) || SELF_CLOSING_TAGS.has(tag);
|
|
2656
|
+
if (html.startsWith("/>", attrPos)) {
|
|
2657
|
+
pos = attrPos + 2;
|
|
2658
|
+
} else {
|
|
2659
|
+
const closePos = html.indexOf(">", attrPos);
|
|
2660
|
+
if (closePos === -1)
|
|
2661
|
+
return null;
|
|
2662
|
+
pos = closePos + 1;
|
|
2663
|
+
}
|
|
2664
|
+
if (selfClosing) {
|
|
2665
|
+
return { type: "element", tag, attributes };
|
|
2666
|
+
}
|
|
2667
|
+
const children = parseNodes();
|
|
2668
|
+
const closingTag = `</${tag}>`;
|
|
2669
|
+
const closingPos = html.toLowerCase().indexOf(closingTag.toLowerCase(), pos);
|
|
2670
|
+
if (closingPos !== -1) {
|
|
2671
|
+
pos = closingPos + closingTag.length;
|
|
2672
|
+
}
|
|
2673
|
+
return { type: "element", tag, attributes, children };
|
|
2674
|
+
}
|
|
2675
|
+
function parseText() {
|
|
2676
|
+
const start = pos;
|
|
2677
|
+
while (pos < html.length && html[pos] !== "<") {
|
|
2678
|
+
pos++;
|
|
2679
|
+
}
|
|
2680
|
+
const content = html.slice(start, pos);
|
|
2681
|
+
if (!content)
|
|
2682
|
+
return null;
|
|
2683
|
+
return { type: "text", content: decodeHtmlEntities2(content) };
|
|
2684
|
+
}
|
|
2685
|
+
return parseNodes();
|
|
2686
|
+
}
|
|
2687
|
+
function decodeHtmlEntities2(text) {
|
|
2688
|
+
const entities = {
|
|
2689
|
+
"&": "&",
|
|
2690
|
+
"<": "<",
|
|
2691
|
+
">": ">",
|
|
2692
|
+
""": '"',
|
|
2693
|
+
"'": "'",
|
|
2694
|
+
" ": " ",
|
|
2695
|
+
"–": "–",
|
|
2696
|
+
"—": "—",
|
|
2697
|
+
"‘": "‘",
|
|
2698
|
+
"’": "’",
|
|
2699
|
+
"“": "“",
|
|
2700
|
+
"”": "”",
|
|
2701
|
+
"©": "©",
|
|
2702
|
+
"®": "®",
|
|
2703
|
+
"™": "™",
|
|
2704
|
+
"…": "…"
|
|
2705
|
+
};
|
|
2706
|
+
let result = text;
|
|
2707
|
+
for (const [entity, char] of Object.entries(entities)) {
|
|
2708
|
+
result = result.split(entity).join(char);
|
|
2709
|
+
}
|
|
2710
|
+
result = result.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(Number.parseInt(code, 10)));
|
|
2711
|
+
result = result.replace(/&#x([0-9a-fA-F]+);/g, (_, code) => String.fromCharCode(Number.parseInt(code, 16)));
|
|
2712
|
+
return result;
|
|
2713
|
+
}
|
|
2714
|
+
function htmlAstToMarkdownAst(nodes, options = {}) {
|
|
2715
|
+
const blocks = convertNodesToBlocks(nodes, options);
|
|
2716
|
+
return {
|
|
2717
|
+
type: "document",
|
|
2718
|
+
children: blocks
|
|
2719
|
+
};
|
|
2720
|
+
}
|
|
2721
|
+
function convertNodesToBlocks(nodes, options) {
|
|
2722
|
+
const blocks = [];
|
|
2723
|
+
for (const node of nodes) {
|
|
2724
|
+
const converted = convertNodeToBlocks(node, options);
|
|
2725
|
+
blocks.push(...converted);
|
|
2726
|
+
}
|
|
2727
|
+
return blocks;
|
|
2728
|
+
}
|
|
2729
|
+
function convertNodeToBlocks(node, options) {
|
|
2730
|
+
if (node.type === "text") {
|
|
2731
|
+
const trimmed = options.preserveWhitespace ? node.content || "" : (node.content || "").trim();
|
|
2732
|
+
if (!trimmed)
|
|
2733
|
+
return [];
|
|
2734
|
+
const paragraph = {
|
|
2735
|
+
type: "paragraph",
|
|
2736
|
+
children: [{ type: "text", value: trimmed }]
|
|
2737
|
+
};
|
|
2738
|
+
return [paragraph];
|
|
2739
|
+
}
|
|
2740
|
+
if (node.type === "comment") {
|
|
2741
|
+
return [];
|
|
2742
|
+
}
|
|
2743
|
+
const tag = node.tag || "";
|
|
2744
|
+
const children = node.children || [];
|
|
2745
|
+
const attributes = node.attributes || {};
|
|
2746
|
+
if (/^h([1-6])$/.test(tag)) {
|
|
2747
|
+
const level = Number.parseInt(tag[1], 10);
|
|
2748
|
+
const heading = {
|
|
2749
|
+
type: "heading",
|
|
2750
|
+
level,
|
|
2751
|
+
style: options.headingStyle || "atx",
|
|
2752
|
+
children: convertNodesToInline(children, options)
|
|
2753
|
+
};
|
|
2754
|
+
return [heading];
|
|
2755
|
+
}
|
|
2756
|
+
if (tag === "p") {
|
|
2757
|
+
const inlineChildren2 = convertNodesToInline(children, options);
|
|
2758
|
+
if (inlineChildren2.length === 0)
|
|
2759
|
+
return [];
|
|
2760
|
+
const paragraph = {
|
|
2761
|
+
type: "paragraph",
|
|
2762
|
+
children: inlineChildren2
|
|
2763
|
+
};
|
|
2764
|
+
return [paragraph];
|
|
2765
|
+
}
|
|
2766
|
+
if (tag === "hr") {
|
|
2767
|
+
const hr = {
|
|
2768
|
+
type: "thematicBreak",
|
|
2769
|
+
marker: "-"
|
|
2770
|
+
};
|
|
2771
|
+
return [hr];
|
|
2772
|
+
}
|
|
2773
|
+
if (tag === "blockquote") {
|
|
2774
|
+
const blockquote = {
|
|
2775
|
+
type: "blockquote",
|
|
2776
|
+
children: convertNodesToBlocks(children, options)
|
|
2777
|
+
};
|
|
2778
|
+
return [blockquote];
|
|
2779
|
+
}
|
|
2780
|
+
if (tag === "ul") {
|
|
2781
|
+
const list = {
|
|
2782
|
+
type: "list",
|
|
2783
|
+
ordered: false,
|
|
2784
|
+
spread: false,
|
|
2785
|
+
marker: options.bulletMarker || "-",
|
|
2786
|
+
children: convertListItems(children, options)
|
|
2787
|
+
};
|
|
2788
|
+
return [list];
|
|
2789
|
+
}
|
|
2790
|
+
if (tag === "ol") {
|
|
2791
|
+
const start = attributes.start ? Number.parseInt(attributes.start, 10) : 1;
|
|
2792
|
+
const list = {
|
|
2793
|
+
type: "list",
|
|
2794
|
+
ordered: true,
|
|
2795
|
+
start,
|
|
2796
|
+
spread: false,
|
|
2797
|
+
marker: ".",
|
|
2798
|
+
children: convertListItems(children, options)
|
|
2799
|
+
};
|
|
2800
|
+
return [list];
|
|
2801
|
+
}
|
|
2802
|
+
if (tag === "pre") {
|
|
2803
|
+
const codeChild = children.find((c) => c.type === "element" && c.tag === "code");
|
|
2804
|
+
const content = codeChild ? extractTextContent2(codeChild.children || []) : extractTextContent2(children);
|
|
2805
|
+
const codeAttrs = codeChild?.attributes || {};
|
|
2806
|
+
const className = codeAttrs.class || "";
|
|
2807
|
+
const langMatch = className.match(/language-(\w+)/);
|
|
2808
|
+
const lang = langMatch ? langMatch[1] : undefined;
|
|
2809
|
+
const codeBlock = {
|
|
2810
|
+
type: "codeBlock",
|
|
2811
|
+
style: options.codeBlockStyle || "fenced",
|
|
2812
|
+
fence: options.fence || "`",
|
|
2813
|
+
fenceLength: 3,
|
|
2814
|
+
lang,
|
|
2815
|
+
value: content.trim()
|
|
2816
|
+
};
|
|
2817
|
+
return [codeBlock];
|
|
2818
|
+
}
|
|
2819
|
+
if (tag === "table") {
|
|
2820
|
+
const table = convertTable(children, options);
|
|
2821
|
+
if (table)
|
|
2822
|
+
return [table];
|
|
2823
|
+
return [];
|
|
2824
|
+
}
|
|
2825
|
+
if ([
|
|
2826
|
+
"div",
|
|
2827
|
+
"section",
|
|
2828
|
+
"article",
|
|
2829
|
+
"main",
|
|
2830
|
+
"aside",
|
|
2831
|
+
"nav",
|
|
2832
|
+
"header",
|
|
2833
|
+
"footer"
|
|
2834
|
+
].includes(tag)) {
|
|
2835
|
+
return convertNodesToBlocks(children, options);
|
|
2836
|
+
}
|
|
2837
|
+
if (tag === "figure") {
|
|
2838
|
+
const blocks = [];
|
|
2839
|
+
for (const child of children) {
|
|
2840
|
+
if (child.type === "element") {
|
|
2841
|
+
if (child.tag === "img") {
|
|
2842
|
+
const imgBlocks = convertNodeToBlocks(child, options);
|
|
2843
|
+
blocks.push(...imgBlocks);
|
|
2844
|
+
} else if (child.tag === "figcaption") {
|
|
2845
|
+
const caption = extractTextContent2(child.children || []).trim();
|
|
2846
|
+
if (caption) {
|
|
2847
|
+
const paragraph = {
|
|
2848
|
+
type: "paragraph",
|
|
2849
|
+
children: [
|
|
2850
|
+
{
|
|
2851
|
+
type: "emphasis",
|
|
2852
|
+
marker: "*",
|
|
2853
|
+
children: [{ type: "text", value: caption }]
|
|
2854
|
+
}
|
|
2855
|
+
]
|
|
2856
|
+
};
|
|
2857
|
+
blocks.push(paragraph);
|
|
2858
|
+
}
|
|
2859
|
+
}
|
|
2860
|
+
}
|
|
2861
|
+
}
|
|
2862
|
+
return blocks;
|
|
2863
|
+
}
|
|
2864
|
+
if (tag === "img") {
|
|
2865
|
+
const src = attributes.src || "";
|
|
2866
|
+
const alt = attributes.alt || "";
|
|
2867
|
+
const title = attributes.title;
|
|
2868
|
+
const image = {
|
|
2869
|
+
type: "image",
|
|
2870
|
+
url: src,
|
|
2871
|
+
alt,
|
|
2872
|
+
title
|
|
2873
|
+
};
|
|
2874
|
+
const paragraph = {
|
|
2875
|
+
type: "paragraph",
|
|
2876
|
+
children: [image]
|
|
2877
|
+
};
|
|
2878
|
+
return [paragraph];
|
|
2879
|
+
}
|
|
2880
|
+
const inlineChildren = convertNodesToInline(children, options);
|
|
2881
|
+
if (inlineChildren.length > 0) {
|
|
2882
|
+
const hasContent = inlineChildren.some((c) => c.type !== "text" || c.type === "text" && c.value.trim());
|
|
2883
|
+
if (hasContent) {
|
|
2884
|
+
const paragraph = {
|
|
2885
|
+
type: "paragraph",
|
|
2886
|
+
children: inlineChildren
|
|
2887
|
+
};
|
|
2888
|
+
return [paragraph];
|
|
2889
|
+
}
|
|
2890
|
+
}
|
|
2891
|
+
return [];
|
|
2892
|
+
}
|
|
2893
|
+
function convertListItems(nodes, options) {
|
|
2894
|
+
const items = [];
|
|
2895
|
+
for (const node of nodes) {
|
|
2896
|
+
if (node.type === "element" && node.tag === "li") {
|
|
2897
|
+
const children = node.children || [];
|
|
2898
|
+
const hasBlockChildren = children.some((c) => c.type === "element" && c.tag && BLOCK_TAGS.has(c.tag) && c.tag !== "li");
|
|
2899
|
+
let itemChildren;
|
|
2900
|
+
if (hasBlockChildren) {
|
|
2901
|
+
itemChildren = convertNodesToBlocks(children, options);
|
|
2902
|
+
} else {
|
|
2903
|
+
const inlineContent = convertNodesToInline(children, options);
|
|
2904
|
+
if (inlineContent.length > 0) {
|
|
2905
|
+
itemChildren = [
|
|
2906
|
+
{
|
|
2907
|
+
type: "paragraph",
|
|
2908
|
+
children: inlineContent
|
|
2909
|
+
}
|
|
2910
|
+
];
|
|
2911
|
+
} else {
|
|
2912
|
+
itemChildren = [];
|
|
2913
|
+
}
|
|
2914
|
+
}
|
|
2915
|
+
const checkbox = children.find((c) => c.type === "element" && c.tag === "input" && c.attributes?.type === "checkbox");
|
|
2916
|
+
const item = {
|
|
2917
|
+
type: "listItem",
|
|
2918
|
+
marker: "-",
|
|
2919
|
+
spread: false,
|
|
2920
|
+
children: itemChildren
|
|
2921
|
+
};
|
|
2922
|
+
if (checkbox) {
|
|
2923
|
+
item.checked = checkbox.attributes?.checked !== undefined;
|
|
2924
|
+
}
|
|
2925
|
+
items.push(item);
|
|
2926
|
+
}
|
|
2927
|
+
}
|
|
2928
|
+
return items;
|
|
2929
|
+
}
|
|
2930
|
+
function convertNodesToInline(nodes, options) {
|
|
2931
|
+
const result = [];
|
|
2932
|
+
for (const node of nodes) {
|
|
2933
|
+
const converted = convertNodeToInline(node, options);
|
|
2934
|
+
result.push(...converted);
|
|
2935
|
+
}
|
|
2936
|
+
return result;
|
|
2937
|
+
}
|
|
2938
|
+
function convertNodeToInline(node, options) {
|
|
2939
|
+
if (node.type === "text") {
|
|
2940
|
+
const content = options.preserveWhitespace ? node.content || "" : normalizeWhitespace(node.content || "");
|
|
2941
|
+
if (!content)
|
|
2942
|
+
return [];
|
|
2943
|
+
return [{ type: "text", value: content }];
|
|
2944
|
+
}
|
|
2945
|
+
if (node.type === "comment") {
|
|
2946
|
+
return [];
|
|
2947
|
+
}
|
|
2948
|
+
const tag = node.tag || "";
|
|
2949
|
+
const children = node.children || [];
|
|
2950
|
+
const attributes = node.attributes || {};
|
|
2951
|
+
if (tag === "strong" || tag === "b") {
|
|
2952
|
+
const strong = {
|
|
2953
|
+
type: "strong",
|
|
2954
|
+
marker: options.strongMarker || "**",
|
|
2955
|
+
children: convertNodesToInline(children, options)
|
|
2956
|
+
};
|
|
2957
|
+
return [strong];
|
|
2958
|
+
}
|
|
2959
|
+
if (tag === "em" || tag === "i") {
|
|
2960
|
+
const emphasis = {
|
|
2961
|
+
type: "emphasis",
|
|
2962
|
+
marker: options.emphasisMarker || "*",
|
|
2963
|
+
children: convertNodesToInline(children, options)
|
|
2964
|
+
};
|
|
2965
|
+
return [emphasis];
|
|
2966
|
+
}
|
|
2967
|
+
if (tag === "s" || tag === "del" || tag === "strike") {
|
|
2968
|
+
const content = extractTextContent2(children);
|
|
2969
|
+
return [{ type: "text", value: `~~${content}~~` }];
|
|
2970
|
+
}
|
|
2971
|
+
if (tag === "code") {
|
|
2972
|
+
const content = extractTextContent2(children);
|
|
2973
|
+
const codeSpan = {
|
|
2974
|
+
type: "codeSpan",
|
|
2975
|
+
value: content
|
|
2976
|
+
};
|
|
2977
|
+
return [codeSpan];
|
|
2978
|
+
}
|
|
2979
|
+
if (tag === "a") {
|
|
2980
|
+
const href = attributes.href || "";
|
|
2981
|
+
const title = attributes.title;
|
|
2982
|
+
const link = {
|
|
2983
|
+
type: "link",
|
|
2984
|
+
url: href,
|
|
2985
|
+
title,
|
|
2986
|
+
children: convertNodesToInline(children, options)
|
|
2987
|
+
};
|
|
2988
|
+
return [link];
|
|
2989
|
+
}
|
|
2990
|
+
if (tag === "img") {
|
|
2991
|
+
const src = attributes.src || "";
|
|
2992
|
+
const alt = attributes.alt || "";
|
|
2993
|
+
const title = attributes.title;
|
|
2994
|
+
const image = {
|
|
2995
|
+
type: "image",
|
|
2996
|
+
url: src,
|
|
2997
|
+
alt,
|
|
2998
|
+
title
|
|
2999
|
+
};
|
|
3000
|
+
return [image];
|
|
3001
|
+
}
|
|
3002
|
+
if (tag === "br") {
|
|
3003
|
+
const hardBreak = {
|
|
3004
|
+
type: "hardBreak"
|
|
3005
|
+
};
|
|
3006
|
+
return [hardBreak];
|
|
3007
|
+
}
|
|
3008
|
+
if (tag === "sub" || tag === "sup") {
|
|
3009
|
+
const content = extractTextContent2(children);
|
|
3010
|
+
return [{ type: "text", value: content }];
|
|
3011
|
+
}
|
|
3012
|
+
if (["span", "small", "mark", "u", "abbr", "cite", "q", "time"].includes(tag)) {
|
|
3013
|
+
return convertNodesToInline(children, options);
|
|
3014
|
+
}
|
|
3015
|
+
const textContent = extractTextContent2([node]);
|
|
3016
|
+
if (textContent.trim()) {
|
|
3017
|
+
return [{ type: "text", value: normalizeWhitespace(textContent) }];
|
|
3018
|
+
}
|
|
3019
|
+
return [];
|
|
3020
|
+
}
|
|
3021
|
+
function convertTable(nodes, options) {
|
|
3022
|
+
const rows = [];
|
|
3023
|
+
let headerRow = null;
|
|
3024
|
+
let alignments = [];
|
|
3025
|
+
for (const node of nodes) {
|
|
3026
|
+
if (node.type !== "element")
|
|
3027
|
+
continue;
|
|
3028
|
+
if (node.tag === "thead") {
|
|
3029
|
+
const theadRows = extractTableRows(node.children || [], true, options);
|
|
3030
|
+
if (theadRows.length > 0) {
|
|
3031
|
+
headerRow = theadRows[0];
|
|
3032
|
+
alignments = extractAlignments(node.children || []);
|
|
3033
|
+
}
|
|
3034
|
+
} else if (node.tag === "tbody") {
|
|
3035
|
+
const tbodyRows = extractTableRows(node.children || [], false, options);
|
|
3036
|
+
rows.push(...tbodyRows);
|
|
3037
|
+
} else if (node.tag === "tr") {
|
|
3038
|
+
if (!headerRow) {
|
|
3039
|
+
headerRow = convertTableRow(node, true, options);
|
|
3040
|
+
alignments = extractRowAlignments(node);
|
|
3041
|
+
} else {
|
|
3042
|
+
rows.push(convertTableRow(node, false, options));
|
|
3043
|
+
}
|
|
3044
|
+
}
|
|
3045
|
+
}
|
|
3046
|
+
if (!headerRow && rows.length === 0)
|
|
3047
|
+
return null;
|
|
3048
|
+
if (!headerRow && rows.length > 0) {
|
|
3049
|
+
headerRow = rows.shift();
|
|
3050
|
+
}
|
|
3051
|
+
const allRows = headerRow ? [headerRow, ...rows] : rows;
|
|
3052
|
+
if (allRows.length === 0)
|
|
3053
|
+
return null;
|
|
3054
|
+
for (const row of allRows) {
|
|
3055
|
+
for (let i = 0;i < row.children.length; i++) {
|
|
3056
|
+
const cell = row.children[i];
|
|
3057
|
+
if (cell && alignments[i]) {
|
|
3058
|
+
cell.align = alignments[i] ?? undefined;
|
|
3059
|
+
}
|
|
3060
|
+
}
|
|
3061
|
+
}
|
|
3062
|
+
return {
|
|
3063
|
+
type: "table",
|
|
3064
|
+
align: alignments,
|
|
3065
|
+
children: allRows
|
|
3066
|
+
};
|
|
3067
|
+
}
|
|
3068
|
+
function extractTableRows(nodes, isHeader, options) {
|
|
3069
|
+
const rows = [];
|
|
3070
|
+
for (const node of nodes) {
|
|
3071
|
+
if (node.type === "element" && node.tag === "tr") {
|
|
3072
|
+
rows.push(convertTableRow(node, isHeader, options));
|
|
3073
|
+
}
|
|
3074
|
+
}
|
|
3075
|
+
return rows;
|
|
3076
|
+
}
|
|
3077
|
+
function convertTableRow(node, isHeader, options) {
|
|
3078
|
+
const cells = [];
|
|
3079
|
+
for (const child of node.children || []) {
|
|
3080
|
+
if (child.type === "element" && (child.tag === "td" || child.tag === "th")) {
|
|
3081
|
+
const cell = {
|
|
3082
|
+
type: "tableCell",
|
|
3083
|
+
isHeader: child.tag === "th" || isHeader,
|
|
3084
|
+
children: convertNodesToInline(child.children || [], options)
|
|
3085
|
+
};
|
|
3086
|
+
const align = child.attributes?.align?.toLowerCase();
|
|
3087
|
+
const style = child.attributes?.style || "";
|
|
3088
|
+
const styleAlign = style.match(/text-align:\s*(left|center|right)/i);
|
|
3089
|
+
if (align === "left" || align === "center" || align === "right") {
|
|
3090
|
+
cell.align = align;
|
|
3091
|
+
} else if (styleAlign) {
|
|
3092
|
+
cell.align = styleAlign[1].toLowerCase();
|
|
3093
|
+
}
|
|
3094
|
+
cells.push(cell);
|
|
3095
|
+
}
|
|
3096
|
+
}
|
|
3097
|
+
return {
|
|
3098
|
+
type: "tableRow",
|
|
3099
|
+
isHeader,
|
|
3100
|
+
children: cells
|
|
3101
|
+
};
|
|
3102
|
+
}
|
|
3103
|
+
function extractAlignments(nodes) {
|
|
3104
|
+
for (const node of nodes) {
|
|
3105
|
+
if (node.type === "element" && node.tag === "tr") {
|
|
3106
|
+
return extractRowAlignments(node);
|
|
3107
|
+
}
|
|
3108
|
+
}
|
|
3109
|
+
return [];
|
|
3110
|
+
}
|
|
3111
|
+
function extractRowAlignments(row) {
|
|
3112
|
+
const alignments = [];
|
|
3113
|
+
for (const child of row.children || []) {
|
|
3114
|
+
if (child.type === "element" && (child.tag === "td" || child.tag === "th")) {
|
|
3115
|
+
const align = child.attributes?.align?.toLowerCase();
|
|
3116
|
+
const style = child.attributes?.style || "";
|
|
3117
|
+
const styleAlign = style.match(/text-align:\s*(left|center|right)/i);
|
|
3118
|
+
if (align === "left" || align === "center" || align === "right") {
|
|
3119
|
+
alignments.push(align);
|
|
3120
|
+
} else if (styleAlign) {
|
|
3121
|
+
alignments.push(styleAlign[1].toLowerCase());
|
|
3122
|
+
} else {
|
|
3123
|
+
alignments.push(null);
|
|
3124
|
+
}
|
|
3125
|
+
}
|
|
3126
|
+
}
|
|
3127
|
+
return alignments;
|
|
3128
|
+
}
|
|
3129
|
+
function extractTextContent2(nodes) {
|
|
3130
|
+
let text = "";
|
|
3131
|
+
for (const node of nodes) {
|
|
3132
|
+
if (node.type === "text") {
|
|
3133
|
+
text += node.content || "";
|
|
3134
|
+
} else if (node.type === "element" && node.children) {
|
|
3135
|
+
text += extractTextContent2(node.children);
|
|
3136
|
+
}
|
|
3137
|
+
}
|
|
3138
|
+
return text;
|
|
3139
|
+
}
|
|
3140
|
+
function normalizeWhitespace(text) {
|
|
3141
|
+
return text.replace(/\s+/g, " ");
|
|
3142
|
+
}
|
|
3143
|
+
function htmlToMarkdown(html, options = {}) {
|
|
3144
|
+
const cleanHtml = html.replace(/<!DOCTYPE[^>]*>/gi, "").replace(/<\/?html[^>]*>/gi, "").replace(/<head[\s\S]*?<\/head>/gi, "").replace(/<\/?body[^>]*>/gi, "").trim();
|
|
3145
|
+
if (!cleanHtml)
|
|
3146
|
+
return "";
|
|
3147
|
+
const htmlAst = parseHtml(cleanHtml);
|
|
3148
|
+
const markdownAst = htmlAstToMarkdownAst(htmlAst, options);
|
|
3149
|
+
const generateOptions = {};
|
|
3150
|
+
if (options.fence !== undefined)
|
|
3151
|
+
generateOptions.fence = options.fence;
|
|
3152
|
+
if (options.emphasisMarker !== undefined)
|
|
3153
|
+
generateOptions.emphasis = options.emphasisMarker;
|
|
3154
|
+
if (options.strongMarker !== undefined)
|
|
3155
|
+
generateOptions.strong = options.strongMarker;
|
|
3156
|
+
if (options.bulletMarker !== undefined)
|
|
3157
|
+
generateOptions.bullet = options.bulletMarker;
|
|
3158
|
+
if (options.headingStyle === "setext")
|
|
3159
|
+
generateOptions.setext = true;
|
|
3160
|
+
return generate(markdownAst, generateOptions);
|
|
3161
|
+
}
|
|
3162
|
+
// src/stream.ts
|
|
3163
|
+
function createStreamingParserState(options) {
|
|
3164
|
+
return {
|
|
3165
|
+
buffer: "",
|
|
3166
|
+
lineBuffer: [],
|
|
3167
|
+
references: new Map,
|
|
3168
|
+
includePositions: options?.positions ?? true,
|
|
3169
|
+
maxBufferSize: options?.maxBufferSize ?? DEFAULT_MAX_BUFFER_SIZE
|
|
3170
|
+
};
|
|
3171
|
+
}
|
|
3172
|
+
async function* parseStream(input, options) {
|
|
3173
|
+
if (options !== undefined) {
|
|
3174
|
+
streamOptionsSchema.parse(options);
|
|
3175
|
+
}
|
|
3176
|
+
const state = createStreamingParserState(options);
|
|
3177
|
+
let iterable;
|
|
3178
|
+
if (input instanceof ReadableStream) {
|
|
3179
|
+
const reader = input.getReader();
|
|
3180
|
+
const decoder = new TextDecoder;
|
|
3181
|
+
iterable = {
|
|
3182
|
+
async* [Symbol.asyncIterator]() {
|
|
3183
|
+
while (true) {
|
|
3184
|
+
const { done, value } = await reader.read();
|
|
3185
|
+
if (done)
|
|
3186
|
+
break;
|
|
3187
|
+
yield decoder.decode(value, { stream: true });
|
|
3188
|
+
}
|
|
3189
|
+
const final = decoder.decode();
|
|
3190
|
+
if (final)
|
|
3191
|
+
yield final;
|
|
3192
|
+
}
|
|
3193
|
+
};
|
|
3194
|
+
} else {
|
|
3195
|
+
iterable = input;
|
|
3196
|
+
}
|
|
3197
|
+
for await (const chunk of iterable) {
|
|
3198
|
+
yield* processStreamChunk(chunk, state);
|
|
3199
|
+
}
|
|
3200
|
+
yield* flushStreamBuffer(state);
|
|
3201
|
+
const document = buildDocument(state);
|
|
3202
|
+
yield { type: "complete", document };
|
|
3203
|
+
}
|
|
3204
|
+
function* processStreamChunk(chunk, state) {
|
|
3205
|
+
state.buffer += chunk;
|
|
3206
|
+
if (state.buffer.length > state.maxBufferSize) {
|
|
3207
|
+
yield {
|
|
3208
|
+
type: "error",
|
|
3209
|
+
error: `Buffer size exceeded maximum of ${state.maxBufferSize} bytes`
|
|
3210
|
+
};
|
|
3211
|
+
return;
|
|
3212
|
+
}
|
|
3213
|
+
const normalized = normalizeLineEndings(state.buffer);
|
|
3214
|
+
const lines = normalized.split(`
|
|
3215
|
+
`);
|
|
3216
|
+
state.buffer = lines.pop() ?? "";
|
|
3217
|
+
state.lineBuffer.push(...lines);
|
|
3218
|
+
yield* processLineBuffer(state);
|
|
3219
|
+
}
|
|
3220
|
+
function* processLineBuffer(state) {
|
|
3221
|
+
while (state.lineBuffer.length > 0) {
|
|
3222
|
+
const { block, consumedLines } = extractNextBlock(state);
|
|
3223
|
+
if (block) {
|
|
3224
|
+
yield { type: "block", data: block };
|
|
3225
|
+
state.lineBuffer.splice(0, consumedLines);
|
|
3226
|
+
} else {
|
|
3227
|
+
break;
|
|
3228
|
+
}
|
|
3229
|
+
}
|
|
3230
|
+
}
|
|
3231
|
+
function extractNextBlock(state) {
|
|
3232
|
+
if (state.lineBuffer.length === 0) {
|
|
3233
|
+
return { block: null, consumedLines: 0 };
|
|
3234
|
+
}
|
|
3235
|
+
let endIndex = -1;
|
|
3236
|
+
let blankCount = 0;
|
|
3237
|
+
let inFencedCode = false;
|
|
3238
|
+
let fenceChar = "";
|
|
3239
|
+
let fenceLength = 0;
|
|
3240
|
+
let closingFenceRegex = null;
|
|
3241
|
+
for (let i = 0;i < state.lineBuffer.length; i++) {
|
|
3242
|
+
const line = state.lineBuffer[i] ?? "";
|
|
3243
|
+
const trimmedLine = line.trimStart();
|
|
3244
|
+
const fenceMatch = /^(`{3,}|~{3,})/.exec(trimmedLine);
|
|
3245
|
+
if (fenceMatch) {
|
|
3246
|
+
if (!inFencedCode) {
|
|
3247
|
+
inFencedCode = true;
|
|
3248
|
+
fenceChar = fenceMatch[1]?.[0] ?? "`";
|
|
3249
|
+
fenceLength = fenceMatch[1]?.length ?? 3;
|
|
3250
|
+
closingFenceRegex = new RegExp(`^${fenceChar}{${fenceLength},}\\s*$`);
|
|
3251
|
+
} else if (closingFenceRegex?.test(trimmedLine)) {
|
|
3252
|
+
inFencedCode = false;
|
|
3253
|
+
closingFenceRegex = null;
|
|
3254
|
+
endIndex = i + 1;
|
|
3255
|
+
break;
|
|
3256
|
+
}
|
|
3257
|
+
continue;
|
|
3258
|
+
}
|
|
3259
|
+
if (inFencedCode)
|
|
3260
|
+
continue;
|
|
3261
|
+
if (trimmedLine === "") {
|
|
3262
|
+
blankCount++;
|
|
3263
|
+
if (blankCount >= 2 && i > 0) {
|
|
3264
|
+
endIndex = i;
|
|
3265
|
+
break;
|
|
3266
|
+
}
|
|
3267
|
+
} else {
|
|
3268
|
+
blankCount = 0;
|
|
3269
|
+
}
|
|
3270
|
+
}
|
|
3271
|
+
if (inFencedCode || endIndex < 0) {
|
|
3272
|
+
if (state.lineBuffer.length > 100) {
|
|
3273
|
+
endIndex = Math.min(50, state.lineBuffer.length);
|
|
3274
|
+
} else {
|
|
3275
|
+
return { block: null, consumedLines: 0 };
|
|
3276
|
+
}
|
|
3277
|
+
}
|
|
3278
|
+
const content = state.lineBuffer.slice(0, endIndex).join(`
|
|
3279
|
+
`);
|
|
3280
|
+
const context = createBlockContext();
|
|
3281
|
+
context.references = state.references;
|
|
3282
|
+
const { blocks, references } = parseBlocks(content, context, state.includePositions);
|
|
3283
|
+
for (const [key, value] of references) {
|
|
3284
|
+
state.references.set(key, value);
|
|
3285
|
+
}
|
|
3286
|
+
if (blocks.length > 0) {
|
|
3287
|
+
return { block: blocks[0] ?? null, consumedLines: endIndex };
|
|
3288
|
+
}
|
|
3289
|
+
return { block: null, consumedLines: endIndex };
|
|
3290
|
+
}
|
|
3291
|
+
function* flushStreamBuffer(state) {
|
|
3292
|
+
if (state.buffer.trim()) {
|
|
3293
|
+
state.lineBuffer.push(state.buffer);
|
|
3294
|
+
}
|
|
3295
|
+
state.buffer = "";
|
|
3296
|
+
if (state.lineBuffer.length > 0) {
|
|
3297
|
+
const content = state.lineBuffer.join(`
|
|
3298
|
+
`);
|
|
3299
|
+
const context = createBlockContext();
|
|
3300
|
+
context.references = state.references;
|
|
3301
|
+
const { blocks, references } = parseBlocks(content, context, state.includePositions);
|
|
3302
|
+
for (const [key, value] of references) {
|
|
3303
|
+
state.references.set(key, value);
|
|
3304
|
+
}
|
|
3305
|
+
for (const block of blocks) {
|
|
3306
|
+
yield { type: "block", data: block };
|
|
3307
|
+
}
|
|
3308
|
+
}
|
|
3309
|
+
}
|
|
3310
|
+
function buildDocument(state) {
|
|
3311
|
+
const referencesObj = {};
|
|
3312
|
+
for (const [label, ref] of state.references) {
|
|
3313
|
+
referencesObj[label] = {
|
|
3314
|
+
type: "linkReferenceDefinition",
|
|
3315
|
+
label,
|
|
3316
|
+
url: ref.url,
|
|
3317
|
+
title: ref.title
|
|
3318
|
+
};
|
|
3319
|
+
}
|
|
3320
|
+
return {
|
|
3321
|
+
root: {
|
|
3322
|
+
type: "document",
|
|
3323
|
+
children: [],
|
|
3324
|
+
references: Object.keys(referencesObj).length > 0 ? referencesObj : undefined
|
|
3325
|
+
},
|
|
3326
|
+
lineEnding: `
|
|
3327
|
+
`
|
|
3328
|
+
};
|
|
3329
|
+
}
|
|
3330
|
+
async function parseStreamToDocument(input, options) {
|
|
3331
|
+
const blocks = [];
|
|
3332
|
+
let document = null;
|
|
3333
|
+
for await (const event of parseStream(input, options)) {
|
|
3334
|
+
switch (event.type) {
|
|
3335
|
+
case "block":
|
|
3336
|
+
blocks.push(event.data);
|
|
3337
|
+
break;
|
|
3338
|
+
case "complete":
|
|
3339
|
+
document = event.document;
|
|
3340
|
+
break;
|
|
3341
|
+
case "error":
|
|
3342
|
+
throw new Error(event.error);
|
|
3343
|
+
}
|
|
3344
|
+
}
|
|
3345
|
+
if (!document) {
|
|
3346
|
+
throw new Error("Stream ended without completion event");
|
|
3347
|
+
}
|
|
3348
|
+
document.root.children = blocks;
|
|
3349
|
+
return document;
|
|
3350
|
+
}
|
|
3351
|
+
async function* parseBufferStream(buffer, options) {
|
|
3352
|
+
const content = decodeBuffer(buffer);
|
|
3353
|
+
const chunkSize = options?.chunkSize ?? 65536;
|
|
3354
|
+
async function* chunkGenerator() {
|
|
3355
|
+
for (let i = 0;i < content.length; i += chunkSize) {
|
|
3356
|
+
yield content.slice(i, i + chunkSize);
|
|
3357
|
+
}
|
|
3358
|
+
}
|
|
3359
|
+
yield* parseStream(chunkGenerator(), options);
|
|
3360
|
+
}
|
|
3361
|
+
async function* parseBatchStream(files, options) {
|
|
3362
|
+
let errorCount = 0;
|
|
3363
|
+
for (let i = 0;i < files.length; i++) {
|
|
3364
|
+
const file = files[i];
|
|
3365
|
+
if (!file)
|
|
3366
|
+
continue;
|
|
3367
|
+
const filename = file.filename;
|
|
3368
|
+
const content = file.content;
|
|
3369
|
+
yield { type: "file_start", fileIndex: i, filename };
|
|
3370
|
+
try {
|
|
3371
|
+
const blocks = [];
|
|
3372
|
+
async function* chunkGenerator() {
|
|
3373
|
+
const chunkSize = options?.chunkSize ?? 65536;
|
|
3374
|
+
for (let j = 0;j < content.length; j += chunkSize) {
|
|
3375
|
+
yield content.slice(j, j + chunkSize);
|
|
3376
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
3377
|
+
}
|
|
3378
|
+
}
|
|
3379
|
+
for await (const event of parseStream(chunkGenerator(), options)) {
|
|
3380
|
+
switch (event.type) {
|
|
3381
|
+
case "block":
|
|
3382
|
+
yield { type: "block", fileIndex: i, data: event.data };
|
|
3383
|
+
blocks.push(event.data);
|
|
3384
|
+
break;
|
|
3385
|
+
case "complete": {
|
|
3386
|
+
const doc = {
|
|
3387
|
+
...event.document,
|
|
3388
|
+
root: {
|
|
3389
|
+
...event.document.root,
|
|
3390
|
+
children: blocks
|
|
3391
|
+
}
|
|
3392
|
+
};
|
|
3393
|
+
yield {
|
|
3394
|
+
type: "file_complete",
|
|
3395
|
+
fileIndex: i,
|
|
3396
|
+
filename,
|
|
3397
|
+
document: doc
|
|
3398
|
+
};
|
|
3399
|
+
break;
|
|
3400
|
+
}
|
|
3401
|
+
case "error":
|
|
3402
|
+
throw new Error(event.error);
|
|
3403
|
+
}
|
|
3404
|
+
}
|
|
3405
|
+
} catch (err) {
|
|
3406
|
+
errorCount++;
|
|
3407
|
+
yield {
|
|
3408
|
+
type: "file_error",
|
|
3409
|
+
fileIndex: i,
|
|
3410
|
+
filename,
|
|
3411
|
+
error: err instanceof Error ? err.message : String(err)
|
|
3412
|
+
};
|
|
3413
|
+
}
|
|
3414
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
3415
|
+
}
|
|
3416
|
+
yield {
|
|
3417
|
+
type: "batch_complete",
|
|
3418
|
+
totalFiles: files.length,
|
|
3419
|
+
errorCount
|
|
3420
|
+
};
|
|
3421
|
+
}
|
|
3422
|
+
async function parseBatchStreamToArray(files, options) {
|
|
3423
|
+
const results = files.map((file, index) => ({
|
|
3424
|
+
fileIndex: index,
|
|
3425
|
+
filename: file.filename
|
|
3426
|
+
}));
|
|
3427
|
+
for await (const event of parseBatchStream(files, options)) {
|
|
3428
|
+
switch (event.type) {
|
|
3429
|
+
case "file_complete": {
|
|
3430
|
+
const result = results[event.fileIndex];
|
|
3431
|
+
if (result) {
|
|
3432
|
+
result.document = event.document;
|
|
3433
|
+
}
|
|
3434
|
+
break;
|
|
3435
|
+
}
|
|
3436
|
+
case "file_error": {
|
|
3437
|
+
const result = results[event.fileIndex];
|
|
3438
|
+
if (result) {
|
|
3439
|
+
result.error = event.error;
|
|
3440
|
+
}
|
|
3441
|
+
break;
|
|
3442
|
+
}
|
|
3443
|
+
}
|
|
3444
|
+
}
|
|
3445
|
+
return results;
|
|
3446
|
+
}
|
|
3447
|
+
export {
|
|
3448
|
+
thematicBreakNodeSchema,
|
|
3449
|
+
textNodeSchema,
|
|
3450
|
+
tableRowNodeSchema,
|
|
3451
|
+
tableNodeSchema,
|
|
3452
|
+
tableCellNodeSchema,
|
|
3453
|
+
strongNodeSchema,
|
|
3454
|
+
streamOptionsSchema,
|
|
3455
|
+
softBreakNodeSchema,
|
|
3456
|
+
renderToHtml,
|
|
3457
|
+
renderNodeToHtml,
|
|
3458
|
+
positionSchema,
|
|
3459
|
+
parseToAst,
|
|
3460
|
+
parseStreamToDocument,
|
|
3461
|
+
parseStream,
|
|
3462
|
+
parseOrThrow,
|
|
3463
|
+
parseOptionsSchema,
|
|
3464
|
+
parseHtml,
|
|
3465
|
+
parseBufferStream,
|
|
3466
|
+
parseBufferOrThrow,
|
|
3467
|
+
parseBuffer,
|
|
3468
|
+
parseBatchStreamToArray,
|
|
3469
|
+
parseBatchStream,
|
|
3470
|
+
parse,
|
|
3471
|
+
paragraphNodeSchema,
|
|
3472
|
+
normalizeMarkdownEmphasis,
|
|
3473
|
+
normalizeLineEndings,
|
|
3474
|
+
normalizeEscapedNewlines,
|
|
3475
|
+
markdownDocumentSchema,
|
|
3476
|
+
listNodeSchema,
|
|
3477
|
+
listItemNodeSchema,
|
|
3478
|
+
linkReferenceDefinitionSchema,
|
|
3479
|
+
linkNodeSchema,
|
|
3480
|
+
isValidMarkdown,
|
|
3481
|
+
inlineNodeSchema,
|
|
3482
|
+
imageNodeSchema,
|
|
3483
|
+
htmlToMarkdown,
|
|
3484
|
+
htmlInlineNodeSchema,
|
|
3485
|
+
htmlBlockNodeSchema,
|
|
3486
|
+
htmlAstToMarkdownAst,
|
|
3487
|
+
headingNodeSchema,
|
|
3488
|
+
hardBreakNodeSchema,
|
|
3489
|
+
generateTaskListString,
|
|
3490
|
+
generateTableString,
|
|
3491
|
+
generateStrongString,
|
|
3492
|
+
generateStrikethroughString,
|
|
3493
|
+
generateOptionsSchema,
|
|
3494
|
+
generateNode,
|
|
3495
|
+
generateListString,
|
|
3496
|
+
generateLinkString,
|
|
3497
|
+
generateInlineCodeString,
|
|
3498
|
+
generateImageString,
|
|
3499
|
+
generateHeadingString,
|
|
3500
|
+
generateEmphasisString,
|
|
3501
|
+
generateCodeBlockString,
|
|
3502
|
+
generateBlockquoteString,
|
|
3503
|
+
generate,
|
|
3504
|
+
emphasisNodeSchema,
|
|
3505
|
+
documentNodeSchema,
|
|
3506
|
+
createGenerator,
|
|
3507
|
+
codeSpanNodeSchema,
|
|
3508
|
+
codeBlockNodeSchema,
|
|
3509
|
+
blockquoteNodeSchema,
|
|
3510
|
+
blockNodeSchema,
|
|
3511
|
+
DEFAULT_MAX_BUFFER_SIZE
|
|
3512
|
+
};
|