markdown-parser 0.0.8 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,2699 @@
1
+ import { decodeHTMLStrict } from 'entities';
2
+
3
+ function parseInline(input, options) {
4
+ const nodes = [];
5
+ const brackets = [];
6
+ let characterCursor = 0;
7
+ while(characterCursor < input.length){
8
+ const marker = input.charAt(characterCursor);
9
+ if (marker === "\n") {
10
+ const startIndex = characterCursor;
11
+ let numOfPrecedingSpaces = 0;
12
+ while(true){
13
+ const currentIndex = startIndex - numOfPrecedingSpaces - 1;
14
+ if (currentIndex < 0) break;
15
+ if (input.charAt(currentIndex) !== " ") break;
16
+ numOfPrecedingSpaces++;
17
+ }
18
+ const lastNode = nodes[nodes.length - 1];
19
+ if (lastNode?.type === "text") {
20
+ const content = lastNode.text;
21
+ lastNode.text = content.slice(0, content.length - numOfPrecedingSpaces);
22
+ }
23
+ if (numOfPrecedingSpaces >= 2) {
24
+ nodes.push({
25
+ type: "hardbreak"
26
+ });
27
+ } else {
28
+ nodes.push({
29
+ type: "softbreak"
30
+ });
31
+ }
32
+ // Skip any leading spaces or tabs in the next line.
33
+ const numOfSpaces = getNumOfConsecutiveCharacters(input, {
34
+ characters: [
35
+ " "
36
+ ],
37
+ startIndex: startIndex + 1
38
+ });
39
+ characterCursor += 1 /* Skip the newline character */ + numOfSpaces;
40
+ } else if (marker === "\\") {
41
+ // If "\" is followed by a newline, it is a forced hard line break.
42
+ const nextCharacter = input.charAt(characterCursor + 1);
43
+ if (nextCharacter === "\n") {
44
+ nodes.push({
45
+ type: "hardbreak"
46
+ });
47
+ characterCursor += 2; // Skip the "\" and the following newline character
48
+ // Skip any leading spaces or tabs in the next line.
49
+ const numOfSpaces = getNumOfConsecutiveCharacters(input, {
50
+ characters: [
51
+ " ",
52
+ "\t"
53
+ ],
54
+ startIndex: characterCursor
55
+ });
56
+ characterCursor += numOfSpaces;
57
+ continue;
58
+ }
59
+ if (isAsciiPunctuationCharacter(nextCharacter)) {
60
+ nodes.push({
61
+ type: "text",
62
+ text: nextCharacter
63
+ });
64
+ characterCursor += 2;
65
+ continue;
66
+ }
67
+ nodes.push({
68
+ type: "text",
69
+ text: marker
70
+ });
71
+ characterCursor += 1;
72
+ } else if (marker === "`") {
73
+ const numOfOpeningBackticks = getNumOfConsecutiveCharacters(input, {
74
+ characters: [
75
+ "`"
76
+ ],
77
+ startIndex: characterCursor
78
+ });
79
+ let hasClosingBackticks = false;
80
+ const openerIndex = characterCursor + numOfOpeningBackticks;
81
+ let closerIndex = openerIndex;
82
+ while(closerIndex < input.length){
83
+ closerIndex = input.indexOf("`", closerIndex);
84
+ if (closerIndex === -1) break;
85
+ const numOfClosingBackticks = getNumOfConsecutiveCharacters(input, {
86
+ characters: [
87
+ "`"
88
+ ],
89
+ startIndex: closerIndex
90
+ });
91
+ // If the number of consecutive backticks in the closing backticks is the same as the number of consecutive backticks at the start of the code span, it is a valid code span.
92
+ if (numOfClosingBackticks === numOfOpeningBackticks) {
93
+ hasClosingBackticks = true;
94
+ break;
95
+ }
96
+ // Move the closer index to the next backtick.
97
+ closerIndex += numOfClosingBackticks;
98
+ }
99
+ if (hasClosingBackticks) {
100
+ let content = input.slice(openerIndex, closerIndex).replace(/\n/g, " "); // Replace newlines inside the captured text with a space as per CommonMark spec.
101
+ if (content[0] === " " && content[content.length - 1] === " ") {
102
+ if (content.trim().length > 0) {
103
+ content = content.slice(1, content.length - 1);
104
+ }
105
+ }
106
+ nodes.push({
107
+ type: "code-span",
108
+ text: content
109
+ });
110
+ characterCursor = closerIndex + numOfOpeningBackticks;
111
+ } else {
112
+ nodes.push({
113
+ type: "text",
114
+ text: marker.repeat(numOfOpeningBackticks)
115
+ });
116
+ characterCursor += numOfOpeningBackticks;
117
+ }
118
+ } else if (marker === "*" || marker === "_") {
119
+ const numOfMarkers = getNumOfConsecutiveCharacters(input, {
120
+ characters: [
121
+ marker
122
+ ],
123
+ startIndex: characterCursor
124
+ });
125
+ const previousCharacter = input[characterCursor - 1] ?? " ";
126
+ const nextCharacter = input[characterCursor + numOfMarkers] ?? " ";
127
+ const isPreviousCharacterPunctuation = isAsciiPunctuationCharacter(previousCharacter) || isUnicodePunctuationCharacter(previousCharacter);
128
+ const isNextCharacterPunctuation = isAsciiPunctuationCharacter(nextCharacter) || isUnicodePunctuationCharacter(nextCharacter);
129
+ const isPreviousCharacterWhitespace = isWhiteSpaceCharacter(previousCharacter);
130
+ const isNextCharacterWhitespace = isWhiteSpaceCharacter(nextCharacter);
131
+ /**
132
+ * A left-flanking delimiter run is a delimiter run that is:
133
+ * (1) not followed by Unicode whitespace, and either
134
+ * (2a) not followed by a Unicode punctuation character, or (2b) followed by a Unicode punctuation character and preceded by Unicode whitespace or a Unicode punctuation character.
135
+ * Read more: https://spec.commonmark.org/0.31.2/#left-flanking-delimiter-run
136
+ */ const isLeftFlanking = !isNextCharacterWhitespace && (!isNextCharacterPunctuation || isPreviousCharacterWhitespace || isPreviousCharacterPunctuation);
137
+ /**
138
+ * A right-flanking delimiter run is a delimiter run that is
139
+ * (1) not preceded by Unicode whitespace, and either
140
+ * (2a) not preceded by a Unicode punctuation character, or (2b) preceded by a Unicode punctuation character and followed by Unicode whitespace or a Unicode punctuation character.
141
+ * Read more: https://spec.commonmark.org/0.31.2/#right-flanking-delimiter-run
142
+ */ const isRightFlanking = !isPreviousCharacterWhitespace && (!isPreviousCharacterPunctuation || isNextCharacterWhitespace || isNextCharacterPunctuation);
143
+ // For emphasis characters, we create a special node that could evolve to emphasis nodes or become simple text nodes, depending on whether they have a matching opener/closer.
144
+ if (marker === "*") {
145
+ nodes.push({
146
+ type: "emphasis-delimiter",
147
+ marker,
148
+ canOpen: isLeftFlanking,
149
+ canClose: isRightFlanking,
150
+ count: numOfMarkers,
151
+ content: marker.repeat(numOfMarkers)
152
+ });
153
+ } else {
154
+ nodes.push({
155
+ type: "emphasis-delimiter",
156
+ marker,
157
+ canOpen: isLeftFlanking && (!isRightFlanking || isPreviousCharacterPunctuation),
158
+ canClose: isRightFlanking && (!isLeftFlanking || isNextCharacterPunctuation),
159
+ count: numOfMarkers,
160
+ content: marker.repeat(numOfMarkers)
161
+ });
162
+ }
163
+ characterCursor += numOfMarkers;
164
+ } else if (marker === "[") {
165
+ const node = {
166
+ type: "text",
167
+ text: marker
168
+ };
169
+ nodes.push(node);
170
+ brackets.push({
171
+ marker: "[",
172
+ node,
173
+ startIndex: characterCursor,
174
+ isActive: true
175
+ });
176
+ characterCursor += 1;
177
+ } else if (marker === "!" && input.charAt(characterCursor + 1) === "[") {
178
+ const node = {
179
+ type: "text",
180
+ text: "!["
181
+ };
182
+ nodes.push(node);
183
+ brackets.push({
184
+ marker: "![",
185
+ node,
186
+ startIndex: characterCursor,
187
+ isActive: true
188
+ });
189
+ characterCursor += 2;
190
+ } else if (marker === "]") {
191
+ const startIndex = characterCursor;
192
+ // Pop the last bracket from the bracket stack.
193
+ const openerBracket = brackets.pop();
194
+ // If the last bracket doesn't exist or is inactive, we render the closing bracket as text.
195
+ if (openerBracket === undefined || !openerBracket.isActive) {
196
+ nodes.push({
197
+ type: "text",
198
+ text: marker
199
+ });
200
+ characterCursor += 1;
201
+ continue;
202
+ }
203
+ const target = parseLinkTarget(input, {
204
+ startIndex: characterCursor + 1
205
+ });
206
+ let href;
207
+ let title;
208
+ if (target !== null) {
209
+ href = target.href;
210
+ title = target.title;
211
+ characterCursor = target.endIndex + 1;
212
+ } else {
213
+ let label;
214
+ const referenceLabel = parseReferenceLinkLabel(input, {
215
+ startIndex: characterCursor + 1
216
+ });
217
+ if (referenceLabel !== null) {
218
+ // If the reference label is a full label (e.g. [label]), we use the explicit label.
219
+ if (referenceLabel.label.length > 0) {
220
+ label = referenceLabel.label;
221
+ } else {
222
+ label = extractLinkLabel(input, {
223
+ startIndex: openerBracket.marker === "[" ? openerBracket.startIndex + 1 : openerBracket.startIndex + 2,
224
+ endIndex: startIndex
225
+ });
226
+ }
227
+ characterCursor = referenceLabel.endIndex + 1;
228
+ } else {
229
+ label = extractLinkLabel(input, {
230
+ startIndex: openerBracket.marker === "[" ? openerBracket.startIndex + 1 : openerBracket.startIndex + 2,
231
+ endIndex: startIndex
232
+ });
233
+ // Advance the character cursor by 1 to skip the closing bracket
234
+ characterCursor = startIndex + 1;
235
+ }
236
+ const definition = label !== null ? options.referenceDefinitions.get(normalizeReference(label)) : undefined;
237
+ if (definition === undefined) {
238
+ nodes.push({
239
+ type: "text",
240
+ text: marker
241
+ });
242
+ characterCursor = startIndex + 1;
243
+ continue;
244
+ }
245
+ href = definition.href;
246
+ title = definition.title;
247
+ }
248
+ const openerNodeIndex = nodes.indexOf(openerBracket.node);
249
+ const children = parseEmphasisDelimiterNodes(nodes.splice(openerNodeIndex + 1));
250
+ nodes[openerNodeIndex] = {
251
+ type: openerBracket.marker === "[" ? "link" : "image",
252
+ href: encodeUnsafeChars(href),
253
+ title,
254
+ children
255
+ };
256
+ // As per CommonMark specification, links cannot contain other links. When we form a link, we mark all the '[' brackets as inactive so that if a closing bracket ']' is encountered, it will be rendered as text instead of forming a link.
257
+ if (openerBracket.marker === "[") {
258
+ for (const bracket of brackets){
259
+ if (bracket.marker === "[") {
260
+ bracket.isActive = false; // This bracket's '[' now requires its ']' to be rendered as text.
261
+ }
262
+ }
263
+ }
264
+ } else if (marker === "<") {
265
+ // Attempt to parse the content as an autolink.
266
+ const emailMatch = input.slice(characterCursor).match(EMAIL_REGEX);
267
+ if (emailMatch !== null) {
268
+ const email = emailMatch[0].slice(1, emailMatch[0].length - 1);
269
+ let destination;
270
+ try {
271
+ destination = encodeUnsafeChars("mailto:" + email);
272
+ } catch {
273
+ destination = "mailto:" + email;
274
+ }
275
+ nodes.push({
276
+ type: "link",
277
+ href: destination,
278
+ children: [
279
+ {
280
+ type: "text",
281
+ text: email
282
+ }
283
+ ]
284
+ });
285
+ characterCursor += emailMatch[0].length;
286
+ continue;
287
+ }
288
+ const uriMatch = input.slice(characterCursor).match(AUTOLINK_REGEX);
289
+ if (uriMatch !== null) {
290
+ const uri = uriMatch[0].slice(1, uriMatch[0].length - 1);
291
+ let destination;
292
+ try {
293
+ destination = encodeUnsafeChars(uri);
294
+ } catch {
295
+ destination = uri;
296
+ }
297
+ nodes.push({
298
+ type: "link",
299
+ href: destination,
300
+ children: [
301
+ {
302
+ type: "text",
303
+ text: uri
304
+ }
305
+ ]
306
+ });
307
+ characterCursor += uriMatch[0].length;
308
+ continue;
309
+ }
310
+ // If we failed to parse the content as an autolink, attempt to parse it as a raw HTML tag.
311
+ const htmlTagMatch = input.slice(characterCursor).match(HTML_TAG_REGEX);
312
+ if (htmlTagMatch !== null) {
313
+ nodes.push({
314
+ type: "html",
315
+ content: htmlTagMatch[0]
316
+ });
317
+ characterCursor += htmlTagMatch[0].length;
318
+ continue;
319
+ }
320
+ nodes.push({
321
+ type: "text",
322
+ text: marker
323
+ });
324
+ characterCursor += 1;
325
+ } else if (marker === "&") {
326
+ const match = input.slice(characterCursor).match(ENTITY_REGEX);
327
+ if (match !== null) {
328
+ const entity = match[0];
329
+ nodes.push({
330
+ type: "text",
331
+ text: decodeHTMLStrict(entity)
332
+ });
333
+ characterCursor += entity.length;
334
+ continue;
335
+ }
336
+ nodes.push({
337
+ type: "text",
338
+ text: marker
339
+ });
340
+ characterCursor += 1;
341
+ } else {
342
+ let endIndex = characterCursor;
343
+ while(endIndex < input.length){
344
+ const character = input.charAt(endIndex);
345
+ if (character === "\n") break;
346
+ if (character === "\\") break;
347
+ if (character === "`") break;
348
+ if (character === "*") break;
349
+ if (character === "_") break;
350
+ if (character === "[") break;
351
+ if (character === "]") break;
352
+ if (character === "!") break;
353
+ if (character === "<") break;
354
+ if (character === "&") break;
355
+ endIndex++;
356
+ }
357
+ // If no characters were consumed, consume at least one character. This handles standalone "!" character not followed by "[" (which falls through from the above checks since it's not an image opener)
358
+ if (endIndex === characterCursor) {
359
+ endIndex++;
360
+ }
361
+ nodes.push({
362
+ type: "text",
363
+ text: input.slice(characterCursor, endIndex)
364
+ });
365
+ characterCursor = endIndex;
366
+ }
367
+ }
368
+ return mergeAdjacentTextNodes(parseEmphasisDelimiterNodes(nodes));
369
+ }
370
+ function parseEmphasisDelimiterNodes(nodes) {
371
+ const result = nodes;
372
+ let closerIndex = 0;
373
+ while(closerIndex < result.length){
374
+ const node = result[closerIndex];
375
+ if (node === undefined) break;
376
+ // Skip delimiters that cannot close an emphasis. E.g., in '*foo bar', the '*' is left-flanking only, so we skip it as it cannot close an emphasis.
377
+ if (node.type !== "emphasis-delimiter" || !node.canClose) {
378
+ closerIndex++;
379
+ continue;
380
+ }
381
+ const closer = node;
382
+ // Search backward from the closer to find the nearest matching opener.
383
+ const openerInfo = findMatchingOpenerForCloser(result, {
384
+ node: closer,
385
+ index: closerIndex
386
+ });
387
+ // If no matching opener is found, we continue to the next delimiter after handling the case where the closer cannot open emphasis either.
388
+ if (openerInfo === null) {
389
+ // If a matching opener isn't found for the closer and if the closer cannot open emphasis either, we replace the delimiter with a text node containing the delimiter's content.
390
+ if (!closer.canOpen) {
391
+ result[closerIndex] = {
392
+ type: "text",
393
+ text: closer.content
394
+ };
395
+ }
396
+ closerIndex++;
397
+ continue;
398
+ }
399
+ const opener = openerInfo.node;
400
+ const openerIndex = openerInfo.index;
401
+ // If the opener and closer both have at least two marker characters, it is a strong emphasis.
402
+ const isStrong = opener.content.length >= 2 && closer.content.length >= 2;
403
+ // Depending on whether it is a strong emphasis or not, we remove the last two or one characters from the opener node text and the first two or one characters from the closer node text.
404
+ opener.content = opener.content.slice(0, isStrong ? -2 : -1);
405
+ closer.content = closer.content.slice(isStrong ? 2 : 1);
406
+ // Collect the nodes between opener and closer to be wrapped in the emphasis node.
407
+ // Any emphasis-delimiter nodes are replaced with text nodes containing the delimiter's content as they can't participate in future matches at this level.
408
+ const children = result.slice(openerIndex + 1, closerIndex).map((node)=>{
409
+ if (node.type === "emphasis-delimiter") {
410
+ return {
411
+ type: "text",
412
+ text: node.content
413
+ };
414
+ }
415
+ return node;
416
+ });
417
+ result.splice(openerIndex + 1, closerIndex - openerIndex - 1, {
418
+ type: isStrong ? "strong" : "emphasis",
419
+ children
420
+ });
421
+ // If the opener is fully consumed (i.e., no remaining characters), we remove it from the result array.
422
+ if (opener.content.length === 0) {
423
+ result.splice(openerIndex, 1);
424
+ }
425
+ // If the closer is fully consumed (i.e., no remaining characters), we remove it from the result array.
426
+ if (closer.content.length === 0) {
427
+ result.splice(result.indexOf(closer), 1);
428
+ }
429
+ // Move the closer index to the node after the opener as we've collapsed the nodes between opener and closer into a single emphasis node.
430
+ closerIndex = openerIndex + 1;
431
+ }
432
+ return result.map((node)=>{
433
+ if (node.type === "emphasis-delimiter") {
434
+ return {
435
+ type: "text",
436
+ text: node.content
437
+ };
438
+ }
439
+ return node;
440
+ });
441
+ }
442
+ function findMatchingOpenerForCloser(nodes, closer) {
443
+ for(let i = closer.index - 1; i >= 0; i--){
444
+ const node = nodes[i];
445
+ if (node?.type !== "emphasis-delimiter") continue;
446
+ // Skip delimiters that cannot open an emphasis or are not the same marker as the closer.
447
+ if (!node.canOpen) continue;
448
+ if (node.marker !== closer.node.marker) continue;
449
+ /**
450
+ * Check if the delimiter pair violates the rule of three.
451
+ * In the CommonMark emphasis rules, when a delimiter run can both open and close, a special constraint applies to decide whether a pair of runs should match:
452
+ * If the sum of the run lengths of the opener and closer is a multiple of 3, then they do not match unless both run lengths are themselves multiples of 3.
453
+ */ if ((node.canClose || closer.node.canOpen) && (node.count + closer.node.count) % 3 === 0 && (node.count % 3 !== 0 || closer.node.count % 3 !== 0)) {
454
+ continue;
455
+ }
456
+ return {
457
+ node,
458
+ index: i
459
+ };
460
+ }
461
+ return null;
462
+ }
463
+ /**
464
+ * Counts the number of consecutive characters from a set starting at a given position.
465
+ *
466
+ * @param input - The string to search within.
467
+ * @param options.characters - Array of characters to match (any character in the array counts).
468
+ * @param options.startIndex - The position in the input to start counting from.
469
+ * @returns The count of consecutive matching characters (0 if none match).
470
+ *
471
+ * @example
472
+ * getNumOfConsecutiveCharacters("```ts", { characters: ["`"], startIndex: 0 }) // 3
473
+ * getNumOfConsecutiveCharacters(" \thello", { characters: [" ", "\t"], startIndex: 0 }) // 3
474
+ */ function getNumOfConsecutiveCharacters(input, { characters, startIndex }) {
475
+ let numOfConsecutiveCharacters = 0;
476
+ while(true){
477
+ if (startIndex + numOfConsecutiveCharacters >= input.length) break;
478
+ if (!characters.includes(input.charAt(startIndex + numOfConsecutiveCharacters))) {
479
+ break;
480
+ }
481
+ numOfConsecutiveCharacters++;
482
+ }
483
+ return numOfConsecutiveCharacters;
484
+ }
485
+ /**
486
+ * Merges adjacent text nodes in an array of LeafNodes into a single text node, recursively processing any children arrays found in nodes with a "children" property.
487
+ *
488
+ * For example, the input:
489
+ * [
490
+ * { type: "text", text: "foo" },
491
+ * { type: "text", text: "bar" },
492
+ * { type: "emphasis", children: [
493
+ * { type: "text", text: "baz" },
494
+ * { type: "text", text: "qux" }
495
+ * ]}
496
+ * ]
497
+ * will be transformed to:
498
+ * [
499
+ * { type: "text", text: "foobar" },
500
+ * { type: "emphasis", children: [
501
+ * { type: "text", text: "bazqux" }
502
+ * ]}
503
+ * ]
504
+ *
505
+ * @param nodes An array of LeafNode objects (e.g. text, link, emphasis, etc.).
506
+ * @returns A new array of LeafNodes where no two adjacent nodes have type "text", including recursively merged children arrays.
507
+ */ function mergeAdjacentTextNodes(nodes) {
508
+ const result = [];
509
+ for (const node of nodes){
510
+ if (node.type === "text") {
511
+ const lastNode = result[result.length - 1];
512
+ if (lastNode?.type === "text") {
513
+ lastNode.text += node.text;
514
+ } else {
515
+ result.push(node);
516
+ }
517
+ } else {
518
+ // Recursively merge text nodes for any children.
519
+ if ("children" in node) {
520
+ node.children = mergeAdjacentTextNodes(node.children);
521
+ }
522
+ result.push(node);
523
+ }
524
+ }
525
+ return result;
526
+ }
527
+ /**
528
+ * Checks if the character is an ASCII punctuation character.
529
+ * An ASCII punctuation character is !, ", #, $, %, &, ', (, ), *, +, ,, -, ., / (U+0021–2F), :, ;, <, =, >, ?, @ (U+003A–0040), [, \, ], ^, _, ` (U+005B–0060), {, |, }, or ~ (U+007B–007E).
530
+ * @param character - The character to check.
531
+ * @returns true if the character is an ASCII punctuation character, false otherwise.
532
+ *
533
+ * The following characters are considered ASCII punctuation characters:
534
+ * - ! (U+0021)
535
+ * - " (U+0022)
536
+ * - # (U+0023)
537
+ * - $ (U+0024)
538
+ * - % (U+0025)
539
+ * - & (U+0026)
540
+ * - ' (U+0027)
541
+ * - ( (U+0028)
542
+ * - ) (U+0029)
543
+ * - * (U+002A)
544
+ * - + (U+002B)
545
+ * - , (U+002C)
546
+ * - - (U+002D)
547
+ * - . (U+002E)
548
+ * - / (U+002F)
549
+ * - : (U+003A)
550
+ * - ; (U+003B)
551
+ * - < (U+003C)
552
+ * - = (U+003D)
553
+ * - > (U+003E)
554
+ * - ? (U+003F)
555
+ * - @ (U+0040)
556
+ * - [ (U+005B)
557
+ * - \ (U+005C)
558
+ * - ] (U+005D)
559
+ * - ^ (U+005E)
560
+ * - _ (U+005F)
561
+ * - ` (U+0060)
562
+ * - { (U+007B)
563
+ * - | (U+007C)
564
+ * - } (U+007D)
565
+ * - ~ (U+007E)
566
+ *
567
+ * https://spec.commonmark.org/0.31.2/#ascii-punctuation-character
568
+ */ function isAsciiPunctuationCharacter(character) {
569
+ switch(character){
570
+ case "!":
571
+ case '"':
572
+ case "#":
573
+ case "$":
574
+ case "%":
575
+ case "&":
576
+ case "'":
577
+ case "(":
578
+ case ")":
579
+ case "*":
580
+ case "+":
581
+ case ",":
582
+ case "-":
583
+ case ".":
584
+ case "/":
585
+ case ":":
586
+ case ";":
587
+ case "<":
588
+ case "=":
589
+ case ">":
590
+ case "?":
591
+ case "@":
592
+ case "[":
593
+ case "\\":
594
+ case "]":
595
+ case "^":
596
+ case "_":
597
+ case "`":
598
+ case "{":
599
+ case "|":
600
+ case "}":
601
+ case "~":
602
+ return true;
603
+ default:
604
+ return false;
605
+ }
606
+ }
607
+ const UNICODE_P_REGEX = /[!-#%-*,-/:;?@[-\]_{}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061D-\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1B7D\u1B7E\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u2E52-\u2E5D\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD803[\uDEAD\uDF55-\uDF59\uDF86-\uDF89]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC8\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5A\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDEB9\uDF3C-\uDF3E]|\uD806[\uDC3B\uDD44-\uDD46\uDDE2\uDE3F-\uDE46\uDE9A-\uDE9C\uDE9E-\uDEA2\uDF00-\uDF09]|\uD807[\uDC41-\uDC45\uDC70\uDC71\uDEF7\uDEF8\uDF43-\uDF4F\uDFFF]|\uD809[\uDC70-\uDC74]|\uD80B[\uDFF1\uDFF2]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD81B[\uDE97-\uDE9A\uDFE2]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]/;
608
+ const UNICODE_S_REGEX = /[$+<->^`|~\xA2-\xA6\xA8\xA9\xAC\xAE-\xB1\xB4\xB8\xD7\xF7\u02C2-\u02C5\u02D2-\u02DF\u02E5-\u02EB\u02ED\u02EF-\u02FF\u0375\u0384\u0385\u03F6\u0482\u058D-\u058F\u0606-\u0608\u060B\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u07FE\u07FF\u0888\u09F2\u09F3\u09FA\u09FB\u0AF1\u0B70\u0BF3-\u0BFA\u0C7F\u0D4F\u0D79\u0E3F\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u166D\u17DB\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u1FBD\u1FBF-\u1FC1\u1FCD-\u1FCF\u1FDD-\u1FDF\u1FED-\u1FEF\u1FFD\u1FFE\u2044\u2052\u207A-\u207C\u208A-\u208C\u20A0-\u20C0\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116-\u2118\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u2140-\u2144\u214A-\u214D\u214F\u218A\u218B\u2190-\u2307\u230C-\u2328\u232B-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u2767\u2794-\u27C4\u27C7-\u27E5\u27F0-\u2982\u2999-\u29D7\u29DC-\u29FB\u29FE-\u2B73\u2B76-\u2B95\u2B97-\u2BFF\u2CE5-\u2CEA\u2E50\u2E51\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFF\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u309B\u309C\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u31EF\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA700-\uA716\uA720\uA721\uA789\uA78A\uA828-\uA82B\uA836-\uA839\uAA77-\uAA79\uAB5B\uAB6A\uAB6B\uFB29\uFBB2-\uFBC2\uFD40-\uFD4F\uFDCF\uFDFC-\uFDFF\uFE62\uFE64-\uFE66\uFE69\uFF04\uFF0B\uFF1C-\uFF1E\uFF3E\uFF40\uFF5C\uFF5E\uFFE0-\uFFE6\uFFE8-\uFFEE\uFFFC\uFFFD]|\uD800[\uDD37-\uDD3F\uDD79-\uDD89\uDD8C-\uDD8E\uDD90-\uDD9C\uDDA0\uDDD0-\uDDFC]|\uD802[\uDC77\uDC78\uDEC8]|\uD805\uDF3F|\uD807[\uDFD5-\uDFF1]|\uD81A[\uDF3C-\uDF3F\uDF45]|\uD82F\uDC9C|\uD833[\uDF50-\uDFC3]|\uD834[\uDC00-\uDCF5\uDD00-\uDD26\uDD29-\uDD64\uDD6A-\uDD6C\uDD83\uDD84\uDD8C-\uDDA9\uDDAE-\uDDEA\uDE00-\uDE41\uDE45\uDF00-\uDF56]|\uD835[\uDEC1\uDEDB\uDEFB\uDF15\uDF35\uDF4F\uDF6F\uDF89\uDFA9\uDFC3]|\uD836[\uDC00-\uDDFF\uDE37-\uDE3A\uDE6D-\uDE74\uDE76-\uDE83\uDE85\uDE86]|\uD838[\uDD4F\uDEFF]|\uD83B[\uDCAC\uDCB0\uDD2E\uDEF0\uDEF1]|\uD83C[\uDC00-\uDC2B\uDC30-\uDC93\uDCA0-\uDCAE\uDCB1-\uDCBF\uDCC1-\uDCCF\uDCD1-\uDCF5\uDD0D-\uDDAD\uDDE6-\uDE02\uDE10-\uDE3B\uDE40-\uDE48\uDE50\uDE51\uDE60-\uDE65\uDF00-\uDFFF]|\uD83D[\uDC00-\uDED7\uDEDC-\uDEEC\uDEF0-\uDEFC\uDF00-\uDF76\uDF7B-\uDFD9\uDFE0-\uDFEB\uDFF0]|\uD83E[\uDC00-\uDC0B\uDC10-\uDC47\uDC50-\uDC59\uDC60-\uDC87\uDC90-\uDCAD\uDCB0\uDCB1\uDD00-\uDE53\uDE60-\uDE6D\uDE70-\uDE7C\uDE80-\uDE88\uDE90-\uDEBD\uDEBF-\uDEC5\uDECE-\uDEDB\uDEE0-\uDEE8\uDEF0-\uDEF8\uDF00-\uDF92\uDF94-\uDFCA]/;
609
+ /**
610
+ * Checks if the character is a unicode punctuation character.
611
+ * A Unicode punctuation character is a character in the Unicode P (puncuation) or S (symbol) general categories.
612
+ * @param character - The character to check.
613
+ * @returns true if the character is a punctuation character, false otherwise.
614
+ *
615
+ * https://spec.commonmark.org/0.31.2/#unicode-punctuation-character
616
+ */ function isUnicodePunctuationCharacter(character) {
617
+ return UNICODE_P_REGEX.test(character) || UNICODE_S_REGEX.test(character);
618
+ }
619
+ function isWhiteSpaceCharacter(character) {
620
+ const code = character.charCodeAt(0);
621
+ if (code >= 0x2000 && code <= 0x200a) {
622
+ return true;
623
+ }
624
+ switch(code){
625
+ case 0x09:
626
+ /* \t */ case 0x0a:
627
+ /* \n */ case 0x0b:
628
+ /* \v */ case 0x0c:
629
+ /* \f */ case 0x0d:
630
+ /* \r */ case 0x20:
631
+ case 0xa0:
632
+ case 0x1680:
633
+ case 0x202f:
634
+ case 0x205f:
635
+ case 0x3000:
636
+ return true;
637
+ }
638
+ return false;
639
+ }
640
+ function parseLinkTarget(input, options) {
641
+ const startIndex = options.startIndex;
642
+ if (input.charAt(startIndex) !== "(") return null;
643
+ // Count the number of whitespaces and newlines after the left parenthesis.
644
+ const numOfWhitespacesAfterLeftParenthesis = getNumOfConsecutiveCharacters(input, {
645
+ characters: [
646
+ " ",
647
+ "\t",
648
+ "\n"
649
+ ],
650
+ startIndex: startIndex + 1
651
+ });
652
+ const destinationStartIndex = startIndex + numOfWhitespacesAfterLeftParenthesis + 1; // Skip whitespaces and newlines after the left parenthesis
653
+ const destination = parseLinkDestination(input, {
654
+ startIndex: destinationStartIndex
655
+ });
656
+ if (destination === null || destination.endIndex === destinationStartIndex) {
657
+ return null;
658
+ }
659
+ const destinationEndIndex = destination.endIndex;
660
+ const numOfWhitespacesAfterHref = getNumOfConsecutiveCharacters(input, {
661
+ characters: [
662
+ " ",
663
+ "\t",
664
+ "\n"
665
+ ],
666
+ startIndex: destinationEndIndex + 1
667
+ });
668
+ const titleStartIndex = destinationEndIndex + numOfWhitespacesAfterHref + 1;
669
+ const title = parseLinkTitle(input, {
670
+ startIndex: titleStartIndex
671
+ });
672
+ let targetEndIndex = destination.endIndex;
673
+ if (title !== null) {
674
+ targetEndIndex = title.endIndex;
675
+ }
676
+ const numOfWhitespaces = getNumOfConsecutiveCharacters(input, {
677
+ characters: [
678
+ " ",
679
+ "\t",
680
+ "\n"
681
+ ],
682
+ startIndex: targetEndIndex + 1
683
+ });
684
+ targetEndIndex += numOfWhitespaces + 1;
685
+ // Link target must end with a right parenthesis.
686
+ if (input.charAt(targetEndIndex) !== ")") {
687
+ return null;
688
+ }
689
+ return {
690
+ href: destination.href,
691
+ title: title?.title,
692
+ raw: input.slice(startIndex, targetEndIndex + 1),
693
+ endIndex: targetEndIndex
694
+ };
695
+ }
696
+ function parseLinkDestination(input, options) {
697
+ const startIndex = options.startIndex;
698
+ // If the destination start index is out of bounds, the link target is invalid.
699
+ if (startIndex >= input.length) return null;
700
+ if (input.charAt(startIndex) === "<") {
701
+ const destinationStartIndex = startIndex + 1;
702
+ let destinationEndIndex = destinationStartIndex;
703
+ while(destinationEndIndex < input.length){
704
+ const character = input.charAt(destinationEndIndex);
705
+ if (character === "\n") return null;
706
+ if (character === "<") return null;
707
+ if (character === ">") {
708
+ return {
709
+ href: unescapeString(input.slice(destinationStartIndex, destinationEndIndex)),
710
+ raw: input.slice(destinationStartIndex, destinationEndIndex + 1),
711
+ endIndex: destinationEndIndex
712
+ };
713
+ }
714
+ // If the current character is a backslash and it is not the last character, it is an escaped character, so we skip the current character and the next character.
715
+ if (character === "\\" && destinationEndIndex + 1 < input.length) {
716
+ destinationEndIndex += 2;
717
+ continue;
718
+ }
719
+ destinationEndIndex++;
720
+ }
721
+ return null;
722
+ } else {
723
+ const destinationStartIndex = startIndex;
724
+ let destinationEndIndex = destinationStartIndex;
725
+ let numOfLeftParentheses = 0;
726
+ while(destinationEndIndex < input.length){
727
+ const characterCode = input.charCodeAt(destinationEndIndex);
728
+ if (input.charCodeAt(destinationEndIndex) === 0x20 /* SPACE */ ) break;
729
+ // Handle ASCII control characters
730
+ if (characterCode < 0x20 || characterCode === 0x7f) {
731
+ break;
732
+ }
733
+ if (characterCode === 0x5c /* BACKSLASH */ && destinationEndIndex + 1 < input.length) {
734
+ if (input.charCodeAt(destinationEndIndex + 1) === 0x20 /* SPACE */ ) {
735
+ break;
736
+ }
737
+ destinationEndIndex += 2;
738
+ continue;
739
+ }
740
+ if (characterCode === 0x28 /* LEFT_PARENTHESIS */ ) {
741
+ numOfLeftParentheses++;
742
+ if (numOfLeftParentheses > 32) return null;
743
+ }
744
+ if (characterCode === 0x29 /* RIGHT_PARENTHESIS */ ) {
745
+ if (numOfLeftParentheses === 0) break;
746
+ numOfLeftParentheses--;
747
+ }
748
+ destinationEndIndex++;
749
+ }
750
+ // Unbalanced parentheses make the destination invalid
751
+ if (numOfLeftParentheses !== 0) return null;
752
+ const raw = input.slice(destinationStartIndex, destinationEndIndex);
753
+ return {
754
+ href: unescapeString(raw),
755
+ raw,
756
+ endIndex: destinationEndIndex - 1
757
+ };
758
+ }
759
+ }
760
+ function parseLinkTitle(input, options) {
761
+ const startIndex = options.startIndex;
762
+ // If the title start index is out of bounds, the link title is invalid.
763
+ if (startIndex >= input.length) return null;
764
+ const openingQuoteCharacter = input.charAt(startIndex);
765
+ // Link title must start with ", ', or (
766
+ if (![
767
+ '"',
768
+ "'",
769
+ "("
770
+ ].includes(openingQuoteCharacter)) {
771
+ return null;
772
+ }
773
+ const closingQuoteCharacter = openingQuoteCharacter === "(" ? ")" : openingQuoteCharacter;
774
+ let endIndex = startIndex + 1;
775
+ while(endIndex < input.length){
776
+ const character = input.charAt(endIndex);
777
+ if (character === closingQuoteCharacter) {
778
+ return {
779
+ raw: input.slice(startIndex, endIndex + 1),
780
+ title: unescapeString(input.slice(startIndex + 1, endIndex)),
781
+ endIndex: endIndex
782
+ };
783
+ }
784
+ // For parenthesis-delimited titles, unescaped `(` is not allowed; this prevents ambiguity in parsing.
785
+ if (character === "(" && openingQuoteCharacter === "(") {
786
+ return null;
787
+ }
788
+ // If the current character is a backslash and it is not the last character, it is an escaped character, so we skip the current character and the next character.
789
+ if (character === "\\" && endIndex + 1 < input.length) {
790
+ endIndex += 2;
791
+ continue;
792
+ }
793
+ // Regular characters (including newlines) are allowed in the title.
794
+ endIndex++;
795
+ }
796
+ return null;
797
+ }
798
+ function parseReferenceLinkLabel(input, options) {
799
+ const startIndex = options.startIndex;
800
+ if (startIndex >= input.length) return null;
801
+ if (input.charAt(startIndex) !== "[") return null;
802
+ // Handle the special case of a collapsed reference '[]'. We return an empty label in this case.
803
+ if (input.charAt(startIndex + 1) === "]") {
804
+ return {
805
+ label: "",
806
+ raw: input.slice(startIndex, startIndex + 2),
807
+ endIndex: startIndex + 1
808
+ };
809
+ }
810
+ let label = "";
811
+ let endIndex = startIndex + 1;
812
+ while(endIndex < input.length){
813
+ const character = input.charAt(endIndex);
814
+ // A reference label that is too long is invalid.
815
+ if (label.length > 999) return null;
816
+ if (character === "\\") {
817
+ const nextCharacter = input.charAt(endIndex + 1);
818
+ // Only ']', '[' and '\' need to be escaped within labels. Other backslashes are preserved literally for label matching.
819
+ if ([
820
+ "]",
821
+ "[",
822
+ "\\"
823
+ ].includes(nextCharacter)) {
824
+ label += nextCharacter;
825
+ endIndex += 2;
826
+ } else {
827
+ label += character;
828
+ endIndex++;
829
+ }
830
+ continue;
831
+ }
832
+ // Unescaped `[` characters are not allowed in reference link labels. This prevents ambiguous nested bracket parsing.
833
+ if (character === "[") return null;
834
+ // If the current character is a closing bracket, we return the reference label.
835
+ if (character === "]") {
836
+ // An empty reference label is invalid.
837
+ if (label.trim().length === 0) return null;
838
+ return {
839
+ label,
840
+ raw: input.slice(startIndex, endIndex + 1),
841
+ endIndex: endIndex
842
+ };
843
+ }
844
+ label += character;
845
+ endIndex++;
846
+ }
847
+ // Reached the end of text without finding a closing bracket, so the reference label is invalid.
848
+ return null;
849
+ }
850
+ function extractLinkLabel(input, options) {
851
+ let label = "";
852
+ let index = options.startIndex;
853
+ while(index < options.endIndex){
854
+ if (index >= input.length) break;
855
+ // A label that is too long is invalid.
856
+ if (label.length > 999) return null;
857
+ const character = input.charAt(index);
858
+ if (character === "\\") {
859
+ // Only ']', '[' and '\' can be escaped within labels. Other backslashes are preserved literally for label matching.
860
+ if ([
861
+ "]",
862
+ "[",
863
+ "\\"
864
+ ].includes(input.charAt(index + 1))) {
865
+ label += input.charAt(index + 1);
866
+ index += 2;
867
+ } else {
868
+ label += character;
869
+ index++;
870
+ }
871
+ continue;
872
+ }
873
+ label += character;
874
+ index++;
875
+ }
876
+ // An empty label is invalid.
877
+ if (label.trim().length === 0) return null;
878
+ return label;
879
+ }
880
+ function normalizeReference(reference) {
881
+ return reference.trim().replace(/\s+/g, " ").toLowerCase().toUpperCase();
882
+ }
883
+ const UNESCAPE_MD_RE = /\\([!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~])/g;
884
+ const ENTITY_RE = /&([a-z#][a-z0-9]{1,31});/gi;
885
+ const UNESCAPE_ALL_RE = new RegExp(UNESCAPE_MD_RE.source + "|" + ENTITY_RE.source, "gi");
886
+ const DIGITAL_ENTITY_TEST_RE = /^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))$/i;
887
+ function unescapeString(text) {
888
+ if (text.indexOf("\\") < 0 && text.indexOf("&") < 0) return text;
889
+ return text.replace(UNESCAPE_ALL_RE, (match, escaped, entity)=>{
890
+ if (escaped) return escaped;
891
+ if (entity.charAt(0) === "#" && DIGITAL_ENTITY_TEST_RE.test(entity)) {
892
+ const code = entity.charAt(1).toLowerCase() === "x" ? parseInt(entity.slice(2), 16) : parseInt(entity.slice(1), 10);
893
+ if (isValidEntityCode(code)) {
894
+ return fromCodePoint(code);
895
+ }
896
+ return match;
897
+ }
898
+ const decoded = decodeHTMLStrict(match);
899
+ if (decoded !== match) {
900
+ return decoded;
901
+ }
902
+ return match;
903
+ });
904
+ }
905
+ function isValidEntityCode(code) {
906
+ // Broken sequence
907
+ if (code >= 0xd800 && code <= 0xdfff) return false;
908
+ // Never used
909
+ if (code >= 0xfdd0 && code <= 0xfdef) return false;
910
+ if ((code & 0xffff) === 0xffff || (code & 0xffff) === 0xfffe) return false;
911
+ // Control codes
912
+ if (code >= 0x00 && code <= 0x08) return false;
913
+ if (code === 0x0b) return false;
914
+ if (code >= 0x0e && code <= 0x1f) return false;
915
+ if (code >= 0x7f && code <= 0x9f) return false;
916
+ // Out of range
917
+ if (code > 0x10ffff) return false;
918
+ return true;
919
+ }
920
+ function fromCodePoint(code) {
921
+ if (code > 0xffff) {
922
+ code -= 0x10000;
923
+ const surrogate1 = 0xd800 + (code >> 10);
924
+ const surrogate2 = 0xdc00 + (code & 0x3ff);
925
+ return String.fromCharCode(surrogate1, surrogate2);
926
+ }
927
+ return String.fromCharCode(code);
928
+ }
929
+ /**
930
+ * A cached lookup table used to quickly percent-encode ASCII characters (0–127).
931
+ *
932
+ * Key: a string of "allowed" (unencoded) characters besides alphanumerics.
933
+ * Value: an array of length 128 where:
934
+ * - table[code] is the literal character if it is allowed as-is
935
+ * - otherwise table[code] is the percent-encoded form (e.g., "%2F")
936
+ */ const asciiEncodeTableCache = Object.create(null);
937
+ /**
938
+ * Builds (or retrieves from cache) a lookup table for ASCII characters.
939
+ *
940
+ * Rules:
941
+ * - Alphanumerics [0-9A-Za-z] are always left unencoded.
942
+ * - Everything else is percent-encoded by default.
943
+ * - Characters present in `allowedChars` are left unencoded as well.
944
+ *
945
+ * This table is only for ASCII 0–127. Non-ASCII characters are handled with
946
+ * `encodeURIComponent` at runtime.
947
+ */ function getAsciiEncodeTable(allowedChars) {
948
+ const cachedTable = asciiEncodeTableCache[allowedChars];
949
+ if (cachedTable) return cachedTable;
950
+ const table = [];
951
+ asciiEncodeTableCache[allowedChars] = table;
952
+ // Pre-fill for all ASCII codes.
953
+ for(let code = 0; code < 128; code++){
954
+ const ch = String.fromCharCode(code);
955
+ // Always allow unencoded alphanumeric characters.
956
+ if (/^[0-9a-z]$/i.test(ch)) {
957
+ table.push(ch);
958
+ continue;
959
+ }
960
+ // Percent-encode any other ASCII character by default.
961
+ const hex = code.toString(16).toUpperCase();
962
+ table.push(`%${hex.padStart(2, "0")}`);
963
+ }
964
+ // Override: allow any characters provided in `allowedChars` to pass through.
965
+ for(let i = 0; i < allowedChars.length; i++){
966
+ const allowedChar = allowedChars[i];
967
+ if (allowedChar === undefined) continue;
968
+ const code = allowedChars.charCodeAt(i);
969
+ // Only affects ASCII entries; non-ASCII isn't in the table anyway.
970
+ if (code < 128) {
971
+ table[code] = allowedChar;
972
+ }
973
+ }
974
+ return table;
975
+ }
976
+ /**
977
+ * Characters allowed unencoded by default (in addition to alphanumerics).
978
+ */ const DEFAULT_ALLOWED_CHARS = ";/?:@&=+$,-_.!~*'()#";
979
+ /**
980
+ * Encodes "unsafe" characters using percent-encoding while optionally preserving
981
+ * already-escaped sequences (e.g., "%2F").
982
+ */ function encodeUnsafeChars(input, allowedChars, keepExistingEscapes) {
983
+ // If the second argument isn't a string, treat it as `keepExistingEscapes`.
984
+ // This preserves the original API shape:
985
+ // encodeUnsafeChars(input, keepExistingEscapes)
986
+ if (typeof allowedChars !== "string") {
987
+ keepExistingEscapes = allowedChars;
988
+ allowedChars = DEFAULT_ALLOWED_CHARS;
989
+ }
990
+ // Default behavior: preserve valid "%HH" sequences.
991
+ if (typeof keepExistingEscapes === "undefined") {
992
+ keepExistingEscapes = true;
993
+ }
994
+ const asciiTable = getAsciiEncodeTable(allowedChars);
995
+ let encoded = "";
996
+ for(let i = 0; i < input.length; i++){
997
+ const currentCharacter = input[i];
998
+ if (currentCharacter === undefined) continue;
999
+ const codeUnit = input.charCodeAt(i);
1000
+ // If requested, preserve correct percent-escape sequences: "%[0-9A-Fa-f]{2}"
1001
+ if (keepExistingEscapes && codeUnit === 0x25 /* '%' */ && i + 2 < input.length) {
1002
+ const maybeHex = input.slice(i + 1, i + 3);
1003
+ if (/^[0-9a-f]{2}$/i.test(maybeHex)) {
1004
+ encoded += input.slice(i, i + 3);
1005
+ i += 2; // skip the two hex digits as well
1006
+ continue;
1007
+ }
1008
+ }
1009
+ // Fast path: ASCII characters use our lookup table.
1010
+ if (codeUnit < 128) {
1011
+ encoded += asciiTable[codeUnit];
1012
+ continue;
1013
+ }
1014
+ // Handle UTF-16 surrogate pairs and invalid surrogate code units.
1015
+ // Surrogates range: 0xD800–0xDFFF
1016
+ if (codeUnit >= 0xd800 && codeUnit <= 0xdfff) {
1017
+ const isHighSurrogate = codeUnit >= 0xd800 && codeUnit <= 0xdbff;
1018
+ if (isHighSurrogate && i + 1 < input.length) {
1019
+ const nextCodeUnit = input.charCodeAt(i + 1);
1020
+ const isLowSurrogate = nextCodeUnit >= 0xdc00 && nextCodeUnit <= 0xdfff;
1021
+ // Valid surrogate pair: encode the combined code point.
1022
+ if (isLowSurrogate) {
1023
+ encoded += encodeURIComponent(currentCharacter + input[i + 1]);
1024
+ i++; // consume the low surrogate
1025
+ continue;
1026
+ }
1027
+ }
1028
+ // Invalid surrogate (unpaired): emit UTF-8 for U+FFFD replacement char.
1029
+ encoded += "%EF%BF%BD";
1030
+ continue;
1031
+ }
1032
+ // Non-ASCII, non-surrogate: rely on encodeURIComponent for correct UTF-8 encoding.
1033
+ encoded += encodeURIComponent(currentCharacter);
1034
+ }
1035
+ return encoded;
1036
+ }
1037
+ const ENTITY_REGEX = /^&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});/i;
1038
+ // Pattern for valid HTML attribute names (starts with letter/underscore/colon).
1039
+ const ATTRIBUTE_NAME = "[a-zA-Z_:][a-zA-Z0-9:._-]*";
1040
+ // Pattern for unquoted attribute values (no quotes, spaces, or special chars).
1041
+ const UNQUOTED = "[^\"'=<>`\\x00-\\x20]+";
1042
+ // Pattern for single-quoted attribute values.
1043
+ const SINGLE_QUOTED = "'[^']*'";
1044
+ // Pattern for double-quoted attribute values.
1045
+ const DOUBLE_QUOTED = '"[^"]*"';
1046
+ // Combined pattern for any valid attribute value format.
1047
+ const ATTRIBUTE_VALUE = "(?:" + UNQUOTED + "|" + SINGLE_QUOTED + "|" + DOUBLE_QUOTED + ")";
1048
+ // Pattern for a complete HTML attribute (name with optional value).
1049
+ const ATTRIBUTE = "(?:\\s+" + ATTRIBUTE_NAME + "(?:\\s*=\\s*" + ATTRIBUTE_VALUE + ")?)";
1050
+ // Pattern for an opening HTML tag (e.g., `<div>`, `<img />`).
1051
+ const OPEN_TAG = "<[A-Za-z][A-Za-z0-9\\-]*" + ATTRIBUTE + "*\\s*\\/?>";
1052
+ // Pattern for a closing HTML tag (e.g., `</div>`).
1053
+ const CLOSE_TAG = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>";
1054
+ // Pattern for HTML comments (e.g., `<!-- comment -->`).
1055
+ const COMMENT = "<!---?>|<!--(?:[^-]|-[^-]|--[^>])*-->";
1056
+ // Pattern for processing instructions (e.g., `<?xml ... ?>`).
1057
+ const PROCESSING = "<[?][\\s\\S]*?[?]>";
1058
+ // Pattern for DOCTYPE and other declarations (e.g., `<!DOCTYPE html>`).
1059
+ const DECLARATION = "<![A-Za-z][^>]*>";
1060
+ // Pattern for CDATA sections (e.g., `<![CDATA[ ... ]]>`).
1061
+ const CDATA = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>";
1062
+ // Regex to match any inline HTML construct at the start of a string. Used for detecting HTML tags within inline content.
1063
+ const HTML_TAG_REGEX = new RegExp("^(?:" + OPEN_TAG + "|" + CLOSE_TAG + "|" + COMMENT + "|" + PROCESSING + "|" + DECLARATION + "|" + CDATA + ")");
1064
+ const EMAIL_REGEX = /^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/;
1065
+ const AUTOLINK_REGEX = /^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>/i;
1066
+
1067
+ /**
1068
+ * Splits a chunk of text into lines.
1069
+ *
1070
+ * Supported line endings:
1071
+ * - LF -> "\n" (line feed)
1072
+ * - CRLF -> "\r\n" (carriage return + line feed)
1073
+ * - CR -> "\r" (carriage return)
1074
+ *
1075
+ * @example
1076
+ * ```ts
1077
+ * const lines = new LineSplitter().split("Hello\nWorld\r\nTest\r");
1078
+ * console.log(lines); // ["Hello", "World", "Test"]
1079
+ * ```
1080
+ */ class LineSplitter {
1081
+ #partialLine;
1082
+ #hasPendingCR;
1083
+ /**
1084
+ * Splits the given chunk of text passed as parameter into lines. By default, the last partial line is not returned.
1085
+ * @param chunk - The chunk of text to split into lines.
1086
+ * @param options.stream - A boolean flag indicating whether the last partial line is included in the returned lines. Defaults to false.
1087
+ * @returns An array of lines after splitting the given chunk of text.
1088
+ */ split(chunk = "", options) {
1089
+ const stream = options?.stream ?? false;
1090
+ const lines = [];
1091
+ let lineStartIndex = 0; // Start index of the current candidate line within this chunk.
1092
+ // If the last chunk ended with "\r" (CR), ignore a leading "\n" (LF) here (CRLF boundary).
1093
+ if (this.#hasPendingCR) {
1094
+ if (chunk[0] === "\n") lineStartIndex = 1;
1095
+ this.#hasPendingCR = false;
1096
+ }
1097
+ for(let i = lineStartIndex; i < chunk.length; i++){
1098
+ const currentChar = chunk[i];
1099
+ // Check if the current character is newline (\n), which indicates the end of a line.
1100
+ if (currentChar === "\n") {
1101
+ // Prepend the partial line to the text from the current line and add it to the lines array.
1102
+ lines.push(this.#partialLine + chunk.slice(lineStartIndex, i));
1103
+ // Reset the partial line to an empty string.
1104
+ this.#partialLine = "";
1105
+ lineStartIndex = i + 1; // Set the next start index to the next character after the newline.
1106
+ } else if (currentChar === "\r") {
1107
+ // (1) CRLF is fully contained within this chunk. Emit the line up to the CR, skip the following LF, and continue.
1108
+ if (i + 1 < chunk.length && chunk[i + 1] === "\n") {
1109
+ lines.push(this.#partialLine + chunk.slice(lineStartIndex, i));
1110
+ this.#partialLine = "";
1111
+ i += 1; // Skip the next character ("\n") since it's part of the CRLF sequence.
1112
+ lineStartIndex = i + 1; // Set the next start index to the next character after the CRLF sequence.
1113
+ } else if (i === chunk.length - 1) {
1114
+ lines.push(this.#partialLine + chunk.slice(lineStartIndex, i));
1115
+ this.#partialLine = "";
1116
+ lineStartIndex = i + 1;
1117
+ this.#hasPendingCR = true; // Set the flag to true to indicate that the current chunk ended with a carriage return (\r) and we may ignore a leading "\n" next time.
1118
+ } else {
1119
+ lines.push(this.#partialLine + chunk.slice(lineStartIndex, i));
1120
+ this.#partialLine = "";
1121
+ lineStartIndex = i + 1;
1122
+ }
1123
+ }
1124
+ }
1125
+ // Append any remaining text to the buffer to ensure that any text after the last newline is included in the next chunk.
1126
+ this.#partialLine += chunk.slice(lineStartIndex);
1127
+ // If not streaming and there is a partial line, add it to the lines array and reset the partial line.
1128
+ if (!stream && this.#partialLine.length > 0) {
1129
+ lines.push(this.#partialLine);
1130
+ this.#partialLine = "";
1131
+ }
1132
+ return lines;
1133
+ }
1134
+ constructor(){
1135
+ this.#partialLine = ""; // Holds any text from the previous chunks that didn't end with a newline.
1136
+ this.#hasPendingCR = false; // Indicates if the previous chunk ended with a carriage return (\r), so we may ignore a leading "\n" next.
1137
+ }
1138
+ }
1139
+
1140
+ class MarkdownParser {
1141
+ parse(input, options) {
1142
+ const stream = options?.stream ?? false;
1143
+ const lines = this.splitter.split(input, {
1144
+ stream
1145
+ });
1146
+ for (const line of lines){
1147
+ this.parseLine(line.replace(/\0/g, "\uFFFD"));
1148
+ this.nextLineIndex++;
1149
+ }
1150
+ // If stream is false, we need to finalize all remaining open blocks before returning the nodes.
1151
+ if (!stream) {
1152
+ // Close the latest spine of the root node; this finalizes all remaining open blocks before returning the nodes.
1153
+ closeRightmostPath(this.root);
1154
+ }
1155
+ // Parse reference link definitions for finalized/closed blocks.
1156
+ this.parseReferenceLinkDefinitions(this.root);
1157
+ const nodes = [];
1158
+ let i = this.nextNodeIndex;
1159
+ for(; i < this.root.children.length; i++){
1160
+ const node = this.root.children[i];
1161
+ if (node === undefined || !node.isClosed) break;
1162
+ nodes.push(this.convertInternalBlockToPublicBlock(node));
1163
+ }
1164
+ this.nextNodeIndex = i;
1165
+ return nodes;
1166
+ }
1167
+ parseLine(line) {
1168
+ const lineIndex = this.nextLineIndex;
1169
+ // We start from the root node and descend through the rightmost path of the current node until we find the last open node that the current line continues.
1170
+ let lastMatchedNode = this.root;
1171
+ while(true){
1172
+ if (!("children" in lastMatchedNode)) break;
1173
+ // If there are no children in the current node or if the last/rightmost child of the current node is not open, we do not traverse any further, in which case the last matched node is the current node.
1174
+ const child = lastMatchedNode.children.at(-1);
1175
+ if (child === undefined || child.isClosed) break;
1176
+ // We now match the current line against this child node and check if the line continues the child node or not.
1177
+ // If the child node is a blockquote node, we check if the current line continues the blockquote or not.
1178
+ if (child.type === "blockquote") {
1179
+ const blockquote = parseBlockquoteLine(line);
1180
+ // If the current line continues the blockquote, we update the line (by removing the blockquote marker and the optional space after it) and continue attempting to match the line with the rightmost child of the blockquote node and so on.
1181
+ if (blockquote !== null) {
1182
+ line = blockquote.content;
1183
+ lastMatchedNode = child;
1184
+ child.endLineIndex = lineIndex;
1185
+ continue;
1186
+ } else {
1187
+ break;
1188
+ }
1189
+ }
1190
+ if (child.type === "list") {
1191
+ const item = child.children.at(-1);
1192
+ if (item === undefined || item.isClosed) break;
1193
+ if (isLineEmpty(line)) {
1194
+ const lastItemChild = item.children.at(-1);
1195
+ // If the current line is empty and there are no children in the list item, we close the list item.
1196
+ if (lastItemChild === undefined) {
1197
+ closeRightmostPath(child);
1198
+ return;
1199
+ } else {
1200
+ lastMatchedNode = item;
1201
+ continue;
1202
+ }
1203
+ }
1204
+ // If the current line is not empty, we check if it's indented enough to continue the list item.
1205
+ // If it is, we update the line (by removing the leading indent) and continue attempting to match the line with the rightmost child of the list item node and so on.
1206
+ const numOfColumns = getLeadingNonspaceColumn(line);
1207
+ if (numOfColumns >= child.numOfColumns) {
1208
+ line = sliceLeadingIndent(line, child.numOfColumns);
1209
+ lastMatchedNode = item;
1210
+ continue;
1211
+ } else {
1212
+ break;
1213
+ }
1214
+ }
1215
+ // If the child node is a fenced code block node, we check if the current line closes the fenced code block or continues it.
1216
+ if (child.type === "fenced-code-block") {
1217
+ const marker = child.marker;
1218
+ const numOfMarkers = child.numOfMarkers;
1219
+ // If the current line closes the fenced code block, we mark the fenced code block as closed and exit as the line has been fully processed and doesn't require any further processing.
1220
+ if (isCodeFenceEnd(line, {
1221
+ marker,
1222
+ numOfMarkers
1223
+ })) {
1224
+ child.endLineIndex = lineIndex;
1225
+ child.isClosed = true;
1226
+ return;
1227
+ } else {
1228
+ child.endLineIndex = lineIndex;
1229
+ child.lines.push(sliceLeadingIndent(line, child.indentLevel));
1230
+ return;
1231
+ }
1232
+ }
1233
+ // If the child node is an indented code block node, we check if the current line closes the indented code block or continues it.
1234
+ if (child.type === "indented-code-block") {
1235
+ // We continue the indented code block if the current line is indented enough or if it's empty.
1236
+ if (isIndentedCodeLine(line)) {
1237
+ child.endLineIndex = lineIndex;
1238
+ child.lines.push(sliceLeadingIndent(line, 4));
1239
+ return;
1240
+ } else if (isLineEmpty(line)) {
1241
+ child.endLineIndex = lineIndex;
1242
+ child.lines.push("");
1243
+ return;
1244
+ } else {
1245
+ child.isClosed = true;
1246
+ break;
1247
+ }
1248
+ }
1249
+ if (child.type === "html-block") {
1250
+ // If the HTML block can be interrupted by a blank line and the current line is empty, we mark the HTML block as closed.
1251
+ if (child.canBeInterruptedByBlankLine && isLineEmpty(line)) {
1252
+ child.isClosed = true;
1253
+ return;
1254
+ }
1255
+ child.endLineIndex = lineIndex;
1256
+ child.lines.push(line);
1257
+ // If the HTML block ends with the current line, we mark the HTML block as closed.
1258
+ if (child.endPattern?.test(line.trim())) {
1259
+ child.isClosed = true;
1260
+ return;
1261
+ }
1262
+ return;
1263
+ }
1264
+ // If the child node is a paragraph or table node, we check if the current line is empty or not
1265
+ if (child.type === "paragraph" || child.type === "table") {
1266
+ // If the current line is empty, we mark the paragraph or table node as closed and exit as the line has been fully processed and doesn't require any further processing.
1267
+ if (isLineEmpty(line)) {
1268
+ child.isClosed = true;
1269
+ return;
1270
+ } else {
1271
+ break;
1272
+ }
1273
+ }
1274
+ }
1275
+ // Represents the node where newly matched nodes are expected to be added. This may not always be true for exceptional cases, like paragraph continuation.
1276
+ let currentContainer = lastMatchedNode;
1277
+ while(true){
1278
+ // Blockquote
1279
+ const blockquote = parseBlockquoteLine(line);
1280
+ if (blockquote !== null) {
1281
+ const node = {
1282
+ type: "blockquote",
1283
+ children: [],
1284
+ isClosed: false,
1285
+ parent: currentContainer,
1286
+ startLineIndex: lineIndex,
1287
+ endLineIndex: lineIndex
1288
+ };
1289
+ addNode(node);
1290
+ currentContainer = node;
1291
+ line = blockquote.content;
1292
+ continue;
1293
+ }
1294
+ // ATX heading
1295
+ const heading = parseATXHeading(line);
1296
+ if (heading !== null) {
1297
+ addNode({
1298
+ type: "heading",
1299
+ level: heading.level,
1300
+ content: heading.content,
1301
+ isClosed: true,
1302
+ parent: currentContainer,
1303
+ startLineIndex: lineIndex,
1304
+ endLineIndex: lineIndex
1305
+ });
1306
+ break;
1307
+ }
1308
+ // Fenced code block
1309
+ const fencedCode = parseCodeFenceStart(line);
1310
+ if (fencedCode !== null) {
1311
+ addNode({
1312
+ type: "fenced-code-block",
1313
+ indentLevel: fencedCode.indentLevel,
1314
+ numOfMarkers: fencedCode.numOfMarkers,
1315
+ marker: fencedCode.marker,
1316
+ info: fencedCode.info,
1317
+ lines: [],
1318
+ isClosed: false,
1319
+ parent: currentContainer,
1320
+ startLineIndex: lineIndex,
1321
+ endLineIndex: lineIndex
1322
+ });
1323
+ break;
1324
+ }
1325
+ // HTML block
1326
+ const htmlBlock = parseHTMLBlockStart(line);
1327
+ if (htmlBlock !== null) {
1328
+ const node = {
1329
+ type: "html-block",
1330
+ endPattern: htmlBlock.endPattern,
1331
+ canBeInterruptedByBlankLine: htmlBlock.canBeInterruptedByBlankLine,
1332
+ lines: [
1333
+ line
1334
+ ],
1335
+ isClosed: false,
1336
+ parent: currentContainer,
1337
+ startLineIndex: lineIndex,
1338
+ endLineIndex: lineIndex
1339
+ };
1340
+ if (htmlBlock.canInterruptParagraph) {
1341
+ addNode(node);
1342
+ // If the HTML block also ends with the current line, we mark the HTML block as closed.
1343
+ if (node.endPattern?.test(line.trim())) {
1344
+ node.isClosed = true;
1345
+ }
1346
+ break;
1347
+ } else {
1348
+ const deepestOpenNode = getDeepestOpenNodeOnRightmostPath(currentContainer);
1349
+ if (deepestOpenNode === null || deepestOpenNode.type !== "paragraph") {
1350
+ addNode(node);
1351
+ // If the HTML block also ends with the current line, we mark the HTML block as closed.
1352
+ if (node.endPattern?.test(line.trim())) {
1353
+ node.isClosed = true;
1354
+ }
1355
+ break;
1356
+ }
1357
+ }
1358
+ }
1359
+ const lastChild = currentContainer.children.at(-1);
1360
+ if (lastChild?.type === "paragraph" && !lastChild.isClosed) {
1361
+ // Table
1362
+ const table = parseTableStartLine({
1363
+ firstLine: lastChild.lines.at(-1),
1364
+ secondLine: line
1365
+ });
1366
+ if (table !== null) {
1367
+ // Remove the last line from the paragraph node as it'll be replaced by the table node
1368
+ lastChild.lines.pop();
1369
+ // If the paragraph node has no lines left, we remove the paragraph node from the container node
1370
+ if (lastChild.lines.length === 0) {
1371
+ currentContainer.children.pop();
1372
+ }
1373
+ addNode({
1374
+ type: "table",
1375
+ alignments: table.alignments,
1376
+ head: {
1377
+ cells: table.head.cells
1378
+ },
1379
+ body: {
1380
+ rows: []
1381
+ },
1382
+ isClosed: false,
1383
+ parent: currentContainer,
1384
+ startLineIndex: lineIndex,
1385
+ endLineIndex: lineIndex
1386
+ });
1387
+ break;
1388
+ }
1389
+ // Setext heading
1390
+ const heading = parseSetextHeading(line);
1391
+ if (heading !== null) {
1392
+ let lines = lastChild.lines;
1393
+ const originalNumOfLines = lines.length;
1394
+ // Parse link reference definitions (if any) from the paragraph lines
1395
+ let definition = parseLinkReferenceDefinition(lines);
1396
+ while(definition !== null){
1397
+ const { label, href, title } = definition.definition;
1398
+ if (!this.referenceDefinitions.has(label)) {
1399
+ this.referenceDefinitions.set(label, {
1400
+ href,
1401
+ title
1402
+ });
1403
+ }
1404
+ lines = lines.slice(definition.nextLineIndex);
1405
+ definition = parseLinkReferenceDefinition(lines);
1406
+ }
1407
+ // If there are remaining lines after parsing link reference definitions, we create a new heading node with the remaining lines
1408
+ if (lines.length > 0) {
1409
+ // Remove the current paragraph node from the container node as it'll replaced by a heading node
1410
+ currentContainer.children.pop();
1411
+ addNode({
1412
+ type: "heading",
1413
+ level: heading.level,
1414
+ content: lines.join("\n").trim(),
1415
+ isClosed: true,
1416
+ parent: currentContainer,
1417
+ startLineIndex: lastChild.startLineIndex - (originalNumOfLines - lines.length),
1418
+ endLineIndex: lineIndex
1419
+ });
1420
+ break;
1421
+ } else {
1422
+ lastChild.lines.push(line);
1423
+ break;
1424
+ }
1425
+ }
1426
+ }
1427
+ // Thematic break
1428
+ if (isSeparator(line)) {
1429
+ addNode({
1430
+ type: "thematic-break",
1431
+ isClosed: true,
1432
+ parent: currentContainer,
1433
+ startLineIndex: lineIndex,
1434
+ endLineIndex: lineIndex
1435
+ });
1436
+ break;
1437
+ }
1438
+ // List
1439
+ const list = parseListItem(line);
1440
+ if (list !== null) {
1441
+ // If the last child of the current container is a paragraph node and it's not closed, we check if the list can interrupt the paragraph or not.
1442
+ const lastChild = currentContainer.children.at(-1);
1443
+ if (lastChild?.type === "paragraph" && !lastChild.isClosed) {
1444
+ if (isLineEmpty(list.content) || list.kind === "ordered" && list.value !== 1) {
1445
+ lastChild.endLineIndex = lineIndex;
1446
+ lastChild.lines.push(line);
1447
+ break;
1448
+ }
1449
+ }
1450
+ let parent;
1451
+ if (list.kind === "ordered") {
1452
+ const lastChild = currentContainer.children.at(-1);
1453
+ if (lastChild?.type === "list" && !lastChild.isClosed && lastChild.kind === "ordered" && lastChild.delimiter === list.delimiter) {
1454
+ parent = lastChild;
1455
+ } else {
1456
+ parent = {
1457
+ type: "list",
1458
+ kind: "ordered",
1459
+ start: list.value,
1460
+ delimiter: list.delimiter,
1461
+ numOfColumns: list.numOfColumns,
1462
+ children: [],
1463
+ isClosed: false,
1464
+ parent: currentContainer,
1465
+ startLineIndex: lineIndex,
1466
+ endLineIndex: lineIndex,
1467
+ isTight: true
1468
+ };
1469
+ addNode(parent);
1470
+ }
1471
+ } else {
1472
+ const lastChild = currentContainer.children.at(-1);
1473
+ if (lastChild?.type === "list" && !lastChild.isClosed && lastChild.kind === "unordered" && lastChild.marker === list.marker) {
1474
+ parent = lastChild;
1475
+ } else {
1476
+ parent = {
1477
+ type: "list",
1478
+ kind: "unordered",
1479
+ marker: list.marker,
1480
+ children: [],
1481
+ isClosed: false,
1482
+ parent: currentContainer,
1483
+ numOfColumns: list.numOfColumns,
1484
+ startLineIndex: lineIndex,
1485
+ endLineIndex: lineIndex,
1486
+ isTight: true
1487
+ };
1488
+ addNode(parent);
1489
+ }
1490
+ }
1491
+ parent.numOfColumns = list.numOfColumns;
1492
+ const item = {
1493
+ type: "list-item",
1494
+ children: [],
1495
+ isClosed: false,
1496
+ parent: parent,
1497
+ startLineIndex: lineIndex,
1498
+ endLineIndex: lineIndex
1499
+ };
1500
+ addNode(item);
1501
+ currentContainer = item;
1502
+ line = list.content;
1503
+ continue;
1504
+ }
1505
+ const latestOpenNode = getDeepestOpenNodeOnRightmostPath(currentContainer);
1506
+ // If the latest open node is not a paragraph and the current line is indented, we create a new indented code block node
1507
+ if (isIndentedCodeLine(line) && (latestOpenNode === null || latestOpenNode.type !== "paragraph")) {
1508
+ addNode({
1509
+ type: "indented-code-block",
1510
+ lines: [
1511
+ sliceLeadingIndent(line, 4)
1512
+ ],
1513
+ isClosed: false,
1514
+ parent: currentContainer,
1515
+ startLineIndex: lineIndex,
1516
+ endLineIndex: lineIndex
1517
+ });
1518
+ break;
1519
+ }
1520
+ if (isLineEmpty(line)) {
1521
+ closeRightmostPath(currentContainer);
1522
+ break;
1523
+ }
1524
+ // Table continuation
1525
+ if (lastChild?.type === "table" && !lastChild.isClosed) {
1526
+ lastChild.endLineIndex = lineIndex;
1527
+ const numOfColumns = lastChild.head.cells.length;
1528
+ let cells = parseTableRow(line);
1529
+ if (cells.length > numOfColumns) {
1530
+ // Trim to max allowed columns
1531
+ cells = cells.slice(0, numOfColumns);
1532
+ } else if (cells.length < numOfColumns) {
1533
+ // Fill the remainder with empty strings
1534
+ cells = [
1535
+ ...cells,
1536
+ ...Array(numOfColumns - cells.length).fill("")
1537
+ ];
1538
+ }
1539
+ lastChild.body.rows.push({
1540
+ cells
1541
+ });
1542
+ break;
1543
+ }
1544
+ // Paragraph
1545
+ if (latestOpenNode?.type === "paragraph") {
1546
+ latestOpenNode.endLineIndex = lineIndex;
1547
+ latestOpenNode.lines.push(line);
1548
+ break;
1549
+ } else {
1550
+ addNode({
1551
+ type: "paragraph",
1552
+ lines: [
1553
+ line
1554
+ ],
1555
+ isClosed: false,
1556
+ parent: currentContainer,
1557
+ startLineIndex: lineIndex,
1558
+ endLineIndex: lineIndex
1559
+ });
1560
+ break;
1561
+ }
1562
+ }
1563
+ }
1564
+ parseReferenceLinkDefinitions(parent) {
1565
+ for(let i = 0; i < parent.children.length; i++){
1566
+ const block = parent.children[i];
1567
+ if (block === undefined) break;
1568
+ // If a block is not closed/finalized yet, we do not parse reference link definitions for it
1569
+ if (!block.isClosed) break;
1570
+ if (block.type === "paragraph") {
1571
+ let lines = block.lines;
1572
+ let definition = parseLinkReferenceDefinition(lines);
1573
+ while(definition !== null){
1574
+ const { label, href, title } = definition.definition;
1575
+ if (!this.referenceDefinitions.has(label)) {
1576
+ this.referenceDefinitions.set(label, {
1577
+ href,
1578
+ title
1579
+ });
1580
+ }
1581
+ lines = lines.slice(definition.nextLineIndex);
1582
+ definition = parseLinkReferenceDefinition(lines);
1583
+ }
1584
+ if (lines.length > 0) {
1585
+ block.lines = lines;
1586
+ } else {
1587
+ parent.children.splice(i, 1);
1588
+ i--;
1589
+ }
1590
+ } else if (block.type === "blockquote") {
1591
+ this.parseReferenceLinkDefinitions(block);
1592
+ } else if (block.type === "list") {
1593
+ for (const item of block.children){
1594
+ this.parseReferenceLinkDefinitions(item);
1595
+ }
1596
+ }
1597
+ }
1598
+ }
1599
+ convertInternalBlockToPublicBlock(block) {
1600
+ switch(block.type){
1601
+ case "blockquote":
1602
+ return {
1603
+ type: "blockquote",
1604
+ children: block.children.map((child)=>this.convertInternalBlockToPublicBlock(child))
1605
+ };
1606
+ case "heading":
1607
+ return {
1608
+ type: "heading",
1609
+ level: block.level,
1610
+ children: parseInline(block.content, {
1611
+ referenceDefinitions: this.referenceDefinitions
1612
+ })
1613
+ };
1614
+ case "paragraph":
1615
+ {
1616
+ return {
1617
+ type: "paragraph",
1618
+ children: parseInline(block.lines.join("\n").trim(), {
1619
+ referenceDefinitions: this.referenceDefinitions
1620
+ })
1621
+ };
1622
+ }
1623
+ case "thematic-break":
1624
+ return {
1625
+ type: "thematic-break"
1626
+ };
1627
+ case "fenced-code-block":
1628
+ return {
1629
+ type: "code-block",
1630
+ content: block.lines.length > 0 ? block.lines.join("\n") + "\n" : "",
1631
+ info: block.info
1632
+ };
1633
+ case "indented-code-block":
1634
+ {
1635
+ let startIndex = 0;
1636
+ let endIndex = block.lines.length - 1;
1637
+ while(startIndex < endIndex){
1638
+ const line = block.lines[startIndex];
1639
+ if (line === undefined) break;
1640
+ if (!isLineEmpty(line)) break;
1641
+ startIndex++;
1642
+ }
1643
+ while(endIndex > startIndex){
1644
+ const line = block.lines[endIndex];
1645
+ if (line === undefined) break;
1646
+ if (!isLineEmpty(line)) break;
1647
+ endIndex--;
1648
+ }
1649
+ return {
1650
+ type: "code-block",
1651
+ content: block.lines.slice(startIndex, endIndex + 1).join("\n") + "\n"
1652
+ };
1653
+ }
1654
+ case "html-block":
1655
+ return {
1656
+ type: "html-block",
1657
+ content: block.lines.join("\n")
1658
+ };
1659
+ case "list":
1660
+ {
1661
+ const items = block.children.map((item)=>({
1662
+ children: item.children.map((child)=>this.convertInternalBlockToPublicBlock(child))
1663
+ }));
1664
+ if (block.kind === "ordered") {
1665
+ return {
1666
+ type: "list",
1667
+ kind: "ordered",
1668
+ start: block.start,
1669
+ tight: block.isTight,
1670
+ items: items
1671
+ };
1672
+ } else {
1673
+ return {
1674
+ type: "list",
1675
+ kind: "unordered",
1676
+ marker: block.marker,
1677
+ tight: block.isTight,
1678
+ items: items
1679
+ };
1680
+ }
1681
+ }
1682
+ case "table":
1683
+ {
1684
+ return {
1685
+ type: "table",
1686
+ head: {
1687
+ cells: block.head.cells.map((cell, index)=>({
1688
+ align: block.alignments[index],
1689
+ children: parseInline(cell, {
1690
+ referenceDefinitions: this.referenceDefinitions
1691
+ })
1692
+ }))
1693
+ },
1694
+ body: {
1695
+ rows: block.body.rows.map((row)=>({
1696
+ cells: row.cells.map((cell, index)=>({
1697
+ align: block.alignments[index],
1698
+ children: parseInline(cell, {
1699
+ referenceDefinitions: this.referenceDefinitions
1700
+ })
1701
+ }))
1702
+ }))
1703
+ }
1704
+ };
1705
+ }
1706
+ }
1707
+ }
1708
+ constructor(){
1709
+ this.splitter = new LineSplitter();
1710
+ this.root = {
1711
+ type: "root",
1712
+ children: [],
1713
+ parent: null
1714
+ };
1715
+ this.nextLineIndex = 0;
1716
+ this.nextNodeIndex = 0;
1717
+ this.referenceDefinitions = new Map();
1718
+ }
1719
+ }
1720
+ function addNode(node) {
1721
+ // Ensure we don't have any dangling "open" nodes on the last-child chain before inserting a new sibling at this level.
1722
+ closeRightmostPath(node.parent);
1723
+ // Insert the new node as the next child of the parent.
1724
+ if (node.type === "list-item") {
1725
+ // This 'if' condition exists only for TypeScript's type narrowing purposes.
1726
+ node.parent.children.push(node);
1727
+ } else {
1728
+ node.parent.children.push(node);
1729
+ }
1730
+ }
1731
+ /**
1732
+ * Closes any currently-open nodes along the parent's "rightmost path" (i.e., repeatedly following the last child) until it reaches:
1733
+ * - a closed node, or
1734
+ * - the end of the chain (no children).
1735
+ *
1736
+ * This is used to "seal" any unfinished blocks before adding a new sibling block to the parent node.
1737
+ *
1738
+ * @param parent The parent/container node whose last-child chain will be traversed and closed.
1739
+ * @param options.endLineIndex The end line index to update the nodes with.
1740
+ */ function closeRightmostPath(parent) {
1741
+ // Find the deepest open node on the rightmost path of the parent container. Then, we close the node, traverse up the parent chain, closing the parent nodes until we reach the root node.
1742
+ // You'll notice that this is not the most efficient way to do this, because we basically travel to the leaf from the parent and then back up to the parent. But, it is required for list tightness calculation.
1743
+ // We could definitely optimize this by keeping track of the deepest open node from parsing the last line, but that would require a lot of additional state management, which is not worth the complexity for now.
1744
+ let currentNode = getDeepestOpenNodeOnRightmostPath(parent);
1745
+ while(currentNode !== parent){
1746
+ if (currentNode === null) break;
1747
+ currentNode.isClosed = true;
1748
+ if (currentNode.type === "list") {
1749
+ currentNode.isTight = isListTight(currentNode);
1750
+ }
1751
+ switch(currentNode.type){
1752
+ case "blockquote":
1753
+ case "list":
1754
+ case "list-item":
1755
+ {
1756
+ // When closing a container node, we update the start and end line indices based on their first and last child's start and end line indices.
1757
+ const firstChild = currentNode.children.at(0);
1758
+ const lastChild = currentNode.children.at(-1);
1759
+ if (firstChild !== undefined) {
1760
+ currentNode.startLineIndex = Math.min(currentNode.startLineIndex, firstChild.startLineIndex);
1761
+ }
1762
+ if (lastChild !== undefined) {
1763
+ currentNode.endLineIndex = Math.max(currentNode.endLineIndex, lastChild.endLineIndex);
1764
+ }
1765
+ break;
1766
+ }
1767
+ }
1768
+ if (currentNode.parent?.type === "root") break;
1769
+ currentNode = currentNode.parent;
1770
+ }
1771
+ }
1772
+ function isListTight(list) {
1773
+ // 1) Check if there are gaps between list items
1774
+ const items = list.children;
1775
+ for(let i = 0; i < items.length - 1; i++){
1776
+ const a = items[i];
1777
+ const b = items[i + 1];
1778
+ if (a === undefined || b === undefined) break;
1779
+ if (a.endLineIndex < b.startLineIndex - 1) {
1780
+ return false;
1781
+ }
1782
+ }
1783
+ // 2) Check if there are gaps between block children inside each item
1784
+ for (const item of items){
1785
+ const blocks = item.children;
1786
+ for(let j = 0; j < blocks.length - 1; j++){
1787
+ const a = blocks[j];
1788
+ const b = blocks[j + 1];
1789
+ if (a === undefined || b === undefined) break;
1790
+ if (a.endLineIndex < b.startLineIndex - 1) {
1791
+ return false;
1792
+ }
1793
+ }
1794
+ }
1795
+ return true;
1796
+ }
1797
+ function getDeepestOpenNodeOnRightmostPath(node) {
1798
+ const child = node.children.at(-1);
1799
+ // If the last child of the current node is undefined or closed, we return null to indicate that there is no latest open node.
1800
+ if (child === undefined || child.isClosed) return null;
1801
+ // At this point, the last child is guaranteed to be open, so depending on whether the node can contain more nodes, we traverse deeper into the spine or return the last child as is.
1802
+ switch(child.type){
1803
+ case "blockquote":
1804
+ case "list":
1805
+ case "list-item":
1806
+ {
1807
+ const deepestOpenNode = getDeepestOpenNodeOnRightmostPath(child);
1808
+ if (deepestOpenNode !== null) return deepestOpenNode;
1809
+ return child;
1810
+ }
1811
+ default:
1812
+ return child;
1813
+ }
1814
+ }
1815
+ /**
1816
+ * Determines if a line qualifies as an indented code block line.
1817
+ *
1818
+ * Per CommonMark spec, a line with 4 or more spaces of indentation (after tab expansion)
1819
+ * is considered part of an indented code block, unless it's within another block structure.
1820
+ *
1821
+ * @param line - The line to check.
1822
+ * @returns True if the line has 4+ columns of indentation, false otherwise.
1823
+ */ function isIndentedCodeLine(line) {
1824
+ const column = getLeadingNonspaceColumn(line);
1825
+ return column >= 4;
1826
+ }
1827
+ /**
1828
+ * Attempts to parse a line as the opening of a fenced code block.
1829
+ *
1830
+ * Per CommonMark spec, a code fence opening consists of:
1831
+ * - Optional indentation (0-3 spaces)
1832
+ * - At least 3 consecutive backticks (`) or tildes (~)
1833
+ * - Optional info string (language identifier and metadata)
1834
+ *
1835
+ * For backtick fences, the info string cannot contain backticks.
1836
+ *
1837
+ * @param line - The line to parse.
1838
+ * @returns An object with fence details if valid, or null if not a fence opening.
1839
+ * - indentLevel: The number of leading spaces (for content de-indentation)
1840
+ * - numOfMarkers: The count of fence characters (must match or exceed for closing)
1841
+ * - marker: The fence character used ("~" or "`")
1842
+ * - info: The optional info string (language, etc.), or undefined if empty
1843
+ */ function parseCodeFenceStart(line) {
1844
+ const indentColumns = getLeadingNonspaceColumn(line);
1845
+ if (indentColumns > 3) return null;
1846
+ line = line.trim();
1847
+ // Fences require at least 3 identical markers; if fewer than 3 characters remain in the line, it cannot be a fence.
1848
+ if (line.length < 3) return null;
1849
+ // Only ~ (tilde) or ` (backtick) can start a fence.
1850
+ const marker = line.charAt(0);
1851
+ if (marker !== "~" && marker !== "`") {
1852
+ return null;
1853
+ }
1854
+ /**
1855
+ * Count the number of consecutive markers in the line. If the line does not contain at least 3 markers, it cannot be a fence.
1856
+ * ┌───┬───┬───┬───┬───┐
1857
+ * │ 0 | 1 | 2 | 3 | 4 |
1858
+ * ├───┼───┼───┼───┼───┼
1859
+ * │ ~ | ~ | ~ | t | s |
1860
+ * └───┴───┴───┴───┴───┘
1861
+ * ▲ ▲
1862
+ * │ │
1863
+ * └───────┘
1864
+ */ let numOfMarkers = 1;
1865
+ while(true){
1866
+ if (numOfMarkers >= line.length) break;
1867
+ if (line.charAt(numOfMarkers) !== marker) break;
1868
+ numOfMarkers++;
1869
+ }
1870
+ if (numOfMarkers < 3) return null;
1871
+ /**
1872
+ * 'info string' is the text between the opening fence and the end of the line. It typically includes the language name and optional meta.
1873
+ * ┌───┬───┬───┬───┬───┐
1874
+ * │ 0 | 1 | 2 | 3 | 4 |
1875
+ * ├───┼───┼───┼───┼───┼
1876
+ * │ ~ | ~ | ~ | t | s |
1877
+ * └───┴───┴───┴───┴───┘
1878
+ * ▲ ▲
1879
+ * │ │
1880
+ * └───┘
1881
+ */ const info = line.slice(numOfMarkers).trim();
1882
+ // For backtick fences only: if the info string itself contains a backtick, it is not a valid fence.
1883
+ if (marker === "`" && info.indexOf("`") >= 0) {
1884
+ return null;
1885
+ }
1886
+ return {
1887
+ indentLevel: indentColumns,
1888
+ numOfMarkers,
1889
+ marker,
1890
+ info: info === "" ? undefined : info
1891
+ };
1892
+ }
1893
+ /**
1894
+ * Determines if a line is a valid closing fence for a code block.
1895
+ *
1896
+ * A valid closing fence must satisfy these conditions (per CommonMark):
1897
+ * 1. Indented by at most 3 spaces
1898
+ * 2. Consists of the same character as the opening fence (` or ~)
1899
+ * 3. Contains at least as many fence characters as the opening fence
1900
+ * 4. Contains only the fence characters (no info string allowed)
1901
+ *
1902
+ * @param line - The line to check.
1903
+ * @param options.marker - The fence character from the opening fence ('`' or '~').
1904
+ * @param options.numOfMarkers - The number of fence characters in the opening fence.
1905
+ * @returns `true` if the line is a valid closing fence, `false` otherwise.
1906
+ *
1907
+ * @example
1908
+ * ```typescript
1909
+ * // Opening fence was: ```
1910
+ * isCodeFenceEnd("```", { marker: '`', numOfMarkers: 3 }); // true
1911
+ * isCodeFenceEnd("````", { marker: '`', numOfMarkers: 3 }); // true (more markers OK)
1912
+ * isCodeFenceEnd("``", { marker: '`', numOfMarkers: 3 }); // false (too few)
1913
+ * isCodeFenceEnd("~~~", { marker: '`', numOfMarkers: 3 }); // false (wrong marker)
1914
+ * isCodeFenceEnd("``` ", { marker: '`', numOfMarkers: 3 }); // true (trailing space trimmed)
1915
+ * isCodeFenceEnd("```js", { marker: '`', numOfMarkers: 3 }); // false (has info string)
1916
+ * ```
1917
+ *
1918
+ * @see https://spec.commonmark.org/0.31.2/#fenced-code-blocks
1919
+ */ function isCodeFenceEnd(line, options) {
1920
+ // The closing fence must be indented by at most 3 columns.
1921
+ const indent = getLeadingNonspaceColumn(line);
1922
+ if (indent > 3) return false;
1923
+ line = line.trim();
1924
+ // Line must start with the **same** marker character to qualify as a closing fence.
1925
+ if (line.charAt(0) !== options.marker) return false;
1926
+ // Count the number of consecutive markers in the line. If the line does not contain the same number of markers as the opening fence, it cannot be a closing fence.
1927
+ let numOfMarkers = 1;
1928
+ while(true){
1929
+ if (numOfMarkers >= line.length) break;
1930
+ if (line.charAt(numOfMarkers) !== options.marker) break;
1931
+ numOfMarkers++;
1932
+ }
1933
+ if (numOfMarkers < options.numOfMarkers) return false;
1934
+ // The closing fence cannot contain any other characters (except spaces and tabs) after the sequence of markers. Since we trimmed the line, we only need to check if there are any characters left.
1935
+ if (line.length > numOfMarkers) return false;
1936
+ return true;
1937
+ }
1938
+ /**
1939
+ * Returns the index of the first non-whitespace character in the line. Tabs are not expanded and treated as a single space.
1940
+ * @param line - The line to get the index of the first non-whitespace character of.
1941
+ * @returns The index of the first non-whitespace character in the line.
1942
+ *
1943
+ * The index of the first non-whitespace character in the following example is 3.
1944
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
1945
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
1946
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
1947
+ * │ ␣ | ␣ | ␣ | T | h | i | s | ␣ | ␣ |
1948
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
1949
+ * ▲
1950
+ *
1951
+ * The index of the first non-whitespace character in the following example is 3.
1952
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
1953
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
1954
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
1955
+ * │ ␣ | ␣ | ⇥ | T | h | i | s | ␣ | ␣ |
1956
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
1957
+ * ▲
1958
+ */ function getFirstNonspaceIndex(line) {
1959
+ let index = 0; // The index of the first non-whitespace character in the line.
1960
+ for(let i = 0; i < line.length; i++){
1961
+ const character = line.charAt(i);
1962
+ if (isSpaceOrTab(character)) {
1963
+ index++;
1964
+ } else {
1965
+ break;
1966
+ }
1967
+ }
1968
+ return index;
1969
+ }
1970
+ /**
1971
+ * Returns the indentation columns/width of the line. Tabs are expanded to the next multiple of 4 spaces.
1972
+ * The value is the effective cursor position before the first non-whitespace character. Spaces add one column each; tabs add a variable number to reach the next tab stop.
1973
+ *
1974
+ * @param line - The line to get the indentation columns/width of.
1975
+ * @returns The indentation columns/width of the line.
1976
+ *
1977
+ * The indentation width of the line in the following example is 3. The line begins with three spaces.
1978
+ * The three spaces move the cursor to column 3.
1979
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
1980
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
1981
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
1982
+ * │ ␣ | ␣ | ␣ | T | h | i | s | ␣ | ␣ |
1983
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
1984
+ *
1985
+ * The indentation width of the line in the following example is 4. The line begins with a tab.
1986
+ * The tab advances to the next multiple of 4, which is column 4.
1987
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
1988
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
1989
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
1990
+ * │ ⇥ | T | h | i | s | ␣ | ␣ | ␣ | ␣ |
1991
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
1992
+ *
1993
+ * The indentation width of the line in the following example is 4. The line begins with two spaces, then a tab.
1994
+ * The two spaces move the cursor to column 2. The tab then advances to the next multiple of 4, which is column 4.
1995
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
1996
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
1997
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
1998
+ * │ ␣ | ␣ | ⇥ | T | h | i | s | ␣ | ␣ |
1999
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
2000
+ */ function getLeadingNonspaceColumn(line) {
2001
+ let columns = 0;
2002
+ for(let i = 0; i < line.length; i++){
2003
+ const ch = line.charAt(i);
2004
+ if (ch === " ") {
2005
+ columns += 1;
2006
+ } else if (ch === "\t") {
2007
+ columns += 4 - columns % 4;
2008
+ } else {
2009
+ break;
2010
+ }
2011
+ }
2012
+ return columns;
2013
+ }
2014
+ /**
2015
+ * Removes up to the specified visual columns from the start of the line, expanding tabs at 4-space stops.
2016
+ * If we cut through a tab, we add the remaining spaces back (partial tab expansion).
2017
+ *
2018
+ * @param line - The line to remove the leading indent from.
2019
+ * @param width - The width of the leading indent to slice.
2020
+ * @returns The line with the leading indent removed.
2021
+ */ function sliceLeadingIndent(line, width) {
2022
+ let indent = 0; // Indicates the indentation level of the line (tabs are expanded to the next multiple of 4 spaces)
2023
+ let startIndex = 0;
2024
+ while(startIndex < line.length){
2025
+ if (line.charAt(startIndex) === " ") {
2026
+ indent++;
2027
+ } else if (line.charAt(startIndex) === "\t") {
2028
+ indent += 4 - indent % 4;
2029
+ } else {
2030
+ break;
2031
+ }
2032
+ startIndex++;
2033
+ }
2034
+ /**
2035
+ * Fix any partial tab expanstion: if a tab pushed the indentation level past the required indent, we cannot "slice half a tab",
2036
+ * so we add the remaining spaces to the start of the line to preserve the intended remaining indentation.
2037
+ *
2038
+ * For example, if the line is indented by 2 spaces, and the width is 4, after consuming the 2 spaces, indent = 2 and startIndex = 0,
2039
+ * so we need to add 4 - 2 = 2 spaces to the start of the line to preserve the intended remaining indentation.
2040
+ */ if (indent > width) {
2041
+ return " ".repeat(indent - width) + line.slice(startIndex);
2042
+ } else {
2043
+ return line.slice(startIndex);
2044
+ }
2045
+ }
2046
+ /**
2047
+ * Checks if the line is a separator. Must satisfy the following conditions:
2048
+ * - The first non-space character in the line must be a -, _, or * character.
2049
+ * - There must be at least 3 markers in the line. Spaces and tabs are allowed in the line. Any other character is not allowed.
2050
+ * @param line - The line to check.
2051
+ * @returns true if the line is a separator, false otherwise.
2052
+ */ function isSeparator(line) {
2053
+ if (isIndentedCodeLine(line)) return false;
2054
+ line = line.trim();
2055
+ const marker = line.charAt(0);
2056
+ // Only -, _, and * can be used to create a separator.
2057
+ if (marker !== "-" && marker !== "_" && marker !== "*") {
2058
+ return false;
2059
+ }
2060
+ /**
2061
+ * Count the number of markers in the line. If the line does not contain at least 3 markers, it is not a separator.
2062
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
2063
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
2064
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
2065
+ * │ ␣ | * | ␣ | * | ␣ | * | ␣ | ␣ | ␣ |
2066
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
2067
+ * ▲ ▲ ▲
2068
+ */ let markerCount = 1;
2069
+ for(let i = 1; i < line.length; i++){
2070
+ const character = line.charAt(i);
2071
+ // Markers can be mixed with spaces and tabs.
2072
+ if (isSpaceOrTab(character)) {
2073
+ continue;
2074
+ }
2075
+ if (character !== marker) {
2076
+ return false;
2077
+ }
2078
+ markerCount++;
2079
+ }
2080
+ if (markerCount < 3) return false;
2081
+ return true;
2082
+ }
2083
+ function parseBlockquoteLine(line) {
2084
+ if (isIndentedCodeLine(line)) return null;
2085
+ const firstNonspaceIndex = getFirstNonspaceIndex(line);
2086
+ /**
2087
+ * The first non-whitespace character in the line must be a > character.
2088
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
2089
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
2090
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
2091
+ * │ ␣ | ␣ | ␣ | > | ␣ | T | h | i | s |
2092
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
2093
+ * ▲
2094
+ */ if (line.charAt(firstNonspaceIndex) !== ">") return null;
2095
+ let characterIndex = firstNonspaceIndex + 1; // Skip the blockquote marker.
2096
+ // Count the number of columns (tabs are expanded to the next multiple of 4 spaces) from the beginning of the line to the start of the content of the blockquote line.
2097
+ let numOfColumns = characterIndex;
2098
+ while(characterIndex < line.length){
2099
+ if (line.charAt(characterIndex) === "\t") {
2100
+ numOfColumns += 4 - numOfColumns % 4;
2101
+ characterIndex++;
2102
+ } else if (line.charAt(characterIndex) === " ") {
2103
+ numOfColumns++;
2104
+ characterIndex++;
2105
+ } else {
2106
+ break;
2107
+ }
2108
+ }
2109
+ // Append the leading spaces to the content of the blockquote line.
2110
+ const content = " ".repeat(numOfColumns - firstNonspaceIndex - 1) + line.slice(characterIndex);
2111
+ // Consume the first space after the blockquote marker (if it exists)
2112
+ if (content.charAt(0) === " ") {
2113
+ return {
2114
+ content: content.slice(1)
2115
+ };
2116
+ }
2117
+ return {
2118
+ content
2119
+ };
2120
+ }
2121
+ /**
2122
+ * Attempts to parse a line as an ATX-style heading.
2123
+ *
2124
+ * Per CommonMark spec, an ATX heading consists of:
2125
+ * - Optional indentation (0-3 spaces)
2126
+ * - 1-6 consecutive # characters
2127
+ * - Optional space/tab followed by heading content
2128
+ * - Optional closing sequence of # characters (preceded by space)
2129
+ *
2130
+ * Examples:
2131
+ * "# Heading 1" -> { level: 1, content: "Heading 1" }
2132
+ * "## Title ##" -> { level: 2, content: "Title" }
2133
+ * "### Empty" -> { level: 3, content: "Empty" }
2134
+ * "#No space" -> null (space required after #)
2135
+ * "####### Too many" -> null (max 6 levels)
2136
+ *
2137
+ * @param line - The line to parse.
2138
+ * @returns An object with heading level and content if valid, or null if not a heading.
2139
+ */ function parseATXHeading(line) {
2140
+ if (isIndentedCodeLine(line)) return null;
2141
+ line = line.trim();
2142
+ // A heading must start with a # character.
2143
+ if (line.charAt(0) !== "#") return null;
2144
+ /**
2145
+ * Count the number of consecutive # characters in the line.
2146
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
2147
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
2148
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
2149
+ * │ # | # | # | # | ␣ | T | h | i | s |
2150
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
2151
+ * ▲ ▲
2152
+ * │ │
2153
+ * └───────────┘
2154
+ */ let numOfOpeningHashes = 1; // The number of consecutive # characters in the line; initialized to 1 because the first character is a #.
2155
+ while(true){
2156
+ if (numOfOpeningHashes >= line.length) break;
2157
+ if (line.charAt(numOfOpeningHashes) !== "#") break;
2158
+ numOfOpeningHashes++;
2159
+ }
2160
+ // A heading must have a level between 1 and 6. If the level is greater than 6, it is not a heading.
2161
+ if (numOfOpeningHashes > 6) return null;
2162
+ /**
2163
+ * If there are more characters after the # characters and the character immediately after the # characters is not a space or a tab, it is not a heading.
2164
+ *
2165
+ * The following example is not a heading because the character immediately after the # characters is not a space or a tab.
2166
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┐
2167
+ * │ # | # | # | # | T | h | i | s |
2168
+ * └───┴───┴───┴───┴───┴───┴───┴───┘
2169
+ * ▲
2170
+ */ if (numOfOpeningHashes < line.length && line.charAt(numOfOpeningHashes) !== " " && line.charAt(numOfOpeningHashes) !== "\t") {
2171
+ return null;
2172
+ }
2173
+ /**
2174
+ * Count the number of consecutive # characters at the end of the line.
2175
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬────┬────┬────┐
2176
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
2177
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼────┼────┼────┤
2178
+ * │ # | # | # | # | ␣ | T | h | i | s | ␣ | # | # | # |
2179
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┴───┴────┴────┴────┘
2180
+ * ▲ ▲
2181
+ * │ │
2182
+ * └─────────┘
2183
+ */ let numOfClosingHashes = 0; // The number of consecutive # characters at the end of the line.
2184
+ while(true){
2185
+ if (line.length - numOfClosingHashes - 1 <= numOfOpeningHashes) break;
2186
+ if (line.charAt(line.length - numOfClosingHashes - 1) !== "#") break;
2187
+ numOfClosingHashes++;
2188
+ }
2189
+ const contentEndIndex = isSpaceOrTab(line.charAt(line.length - numOfClosingHashes - 1)) ? line.length - numOfClosingHashes : line.length;
2190
+ return {
2191
+ level: numOfOpeningHashes,
2192
+ content: line.slice(numOfOpeningHashes, contentEndIndex).trim()
2193
+ };
2194
+ }
2195
+ /**
2196
+ * Checks if the line is a setext heading. Must satisfy the following conditions:
2197
+ * - The first non-whitespace character in the line must be a valid marker (minus or equals sign).
2198
+ * - All characters in the line after stripping leading and trailing whitespace must be the same as the marker.
2199
+ * @param line - The line to check.
2200
+ * @returns true if the line is a setext heading, false otherwise.
2201
+ */ function parseSetextHeading(line) {
2202
+ if (isIndentedCodeLine(line)) return null;
2203
+ line = line.trim(); // Trim the line to remove leading and trailing whitespace.
2204
+ // A heading must start with a minus or equals sign.
2205
+ const marker = line.charAt(0);
2206
+ if (marker !== "=" && marker !== "-") return null;
2207
+ // Ensure that all other characters in the line are the same as the marker.
2208
+ for (const character of line){
2209
+ if (character !== marker) return null;
2210
+ }
2211
+ if (marker === "=") return {
2212
+ level: 1
2213
+ };
2214
+ return {
2215
+ level: 2
2216
+ };
2217
+ }
2218
+ function parseTableStartLine({ firstLine, secondLine }) {
2219
+ if (firstLine === undefined || secondLine === undefined) return null;
2220
+ if (isIndentedCodeLine(secondLine)) return null;
2221
+ if (isLineEmpty(secondLine)) return null;
2222
+ firstLine = firstLine.trim();
2223
+ secondLine = secondLine.trim();
2224
+ // The first character in the second line must be a pipe, colon, or dash.
2225
+ if (secondLine.charAt(0) !== "|" && secondLine.charAt(0) !== ":" && secondLine.charAt(0) !== "-") {
2226
+ return null;
2227
+ }
2228
+ // If the first character in the second line is a dash, it must not be followed by a space or tab.
2229
+ if (secondLine.charAt(0) === "-" && isSpaceOrTab(secondLine.charAt(1))) {
2230
+ return null;
2231
+ }
2232
+ // If the first line does not contain any pipe characters, it is not a table.
2233
+ if (firstLine.indexOf("|") === -1) {
2234
+ return null;
2235
+ }
2236
+ // The second line must not contain any other characters than pipes, colons, dashes, spaces, or tabs.
2237
+ for(let i = 1; i < secondLine.length; i++){
2238
+ if (secondLine.charAt(i) === "|" || secondLine.charAt(i) === ":" || secondLine.charAt(i) === "-" || isSpaceOrTab(secondLine.charAt(i))) {
2239
+ continue;
2240
+ }
2241
+ return null;
2242
+ }
2243
+ /**
2244
+ * | foo | bar | baz | ← first line (header row)
2245
+ * | :--- | :---: | ---: | ← second line (delimiter row)
2246
+ * ▲ ▲ ▲
2247
+ * │ │ └────── right alignment
2248
+ * │ │
2249
+ * │ └──────────────── center alignment
2250
+ * │
2251
+ * └──────────────────────── left alignment
2252
+ */ const delimiterCells = secondLine.split("|");
2253
+ const alignments = [];
2254
+ for(let i = 0; i < delimiterCells.length; i++){
2255
+ const cell = delimiterCells[i]?.trim();
2256
+ // An empty cell is only allowed at the start or end of the line.
2257
+ if (!cell) {
2258
+ if (i === 0 || i === delimiterCells.length - 1) continue;
2259
+ return null;
2260
+ }
2261
+ // A delimiter cell must contain only dashes and optional colons at the start and end.
2262
+ if (!/^:?-+:?$/.test(cell)) return null;
2263
+ if (cell.charAt(cell.length - 1) === ":") {
2264
+ if (cell.charAt(0) === ":") {
2265
+ alignments.push("center");
2266
+ } else {
2267
+ alignments.push("right");
2268
+ }
2269
+ } else if (cell.charAt(0) === ":") {
2270
+ alignments.push("left");
2271
+ } else {
2272
+ alignments.push(undefined);
2273
+ }
2274
+ }
2275
+ const headerCells = parseTableRow(firstLine);
2276
+ if (headerCells.length === 0 || headerCells.length !== alignments.length) {
2277
+ return null;
2278
+ }
2279
+ return {
2280
+ head: {
2281
+ cells: headerCells
2282
+ },
2283
+ alignments
2284
+ };
2285
+ }
2286
+ function parseTableRow(line) {
2287
+ /**
2288
+ * Cell contents in table rows can include escaped pipe characters. We need to split the line on "|" but not on escaped "|" characters.
2289
+ *
2290
+ * "a|b|c" → ["a","b","c"]
2291
+ * "a\|b|c" → ["a|b","c"]
2292
+ * "\|a\|b\|c" → ["|a|b|c"]
2293
+ * "\\\|b" → ["\|b"] (only the "\" right before "|" is removed)
2294
+ *
2295
+ * foo \| bar | baz
2296
+ * ▲ ▲
2297
+ * │ └────── split here
2298
+ * │
2299
+ * └────── do not split here (escaped pipe character)
2300
+ */ const cells = line.trim().split(/(?<!\\)\|/) // Split on unescaped pipe characters (i.e., pipe characters not preceded by "\")
2301
+ .map((cell)=>cell.replace(/\\\|/g, "|")) // Unescape the remaining "\|" (i.e., replace "\|" with "|")
2302
+ .map((cell)=>cell.trim());
2303
+ // Empty cells are only allowed at the start or end of the line. If there are any empty cells, we remove them.
2304
+ if (cells.length > 0) {
2305
+ if (cells[0] === "") cells.shift();
2306
+ if (cells[cells.length - 1] === "") cells.pop();
2307
+ }
2308
+ return cells;
2309
+ }
2310
+ function parseLinkReferenceDefinition(lines) {
2311
+ const firstLine = lines[0];
2312
+ if (firstLine === undefined) return null;
2313
+ let nextLineIndex = 1;
2314
+ let content = firstLine.trim() + "\n";
2315
+ // A link reference definition must start with a left bracket '['
2316
+ if (content.charAt(0) !== "[") return null;
2317
+ let label = "";
2318
+ let characterCursor = 1; // Skip the opening bracket '['
2319
+ while(characterCursor < content.length){
2320
+ const character = content.charAt(characterCursor);
2321
+ if (character === "\\") {
2322
+ const nextCharacter = content.charAt(characterCursor + 1);
2323
+ // Only ']', '[' and '\' need to be escaped within labels. Escaping them allows them to appear literally in the label
2324
+ if ([
2325
+ "]",
2326
+ "[",
2327
+ "\\"
2328
+ ].includes(nextCharacter)) {
2329
+ label += nextCharacter;
2330
+ characterCursor += 2;
2331
+ } else {
2332
+ label += character;
2333
+ characterCursor++;
2334
+ }
2335
+ continue;
2336
+ }
2337
+ // Unescaped `[` characters are not allowed in reference link labels. This prevents ambiguous nested bracket parsing.
2338
+ if (character === "[") return null;
2339
+ // If the current character is a closing bracket, we return the reference label.
2340
+ if (character === "]") break;
2341
+ if (character === "\n") {
2342
+ const nextLine = lines[nextLineIndex];
2343
+ if (nextLine === undefined) return null;
2344
+ content += nextLine.trim() + "\n";
2345
+ label += "\n";
2346
+ characterCursor++;
2347
+ nextLineIndex++;
2348
+ continue;
2349
+ }
2350
+ label += character;
2351
+ characterCursor++;
2352
+ }
2353
+ if (label.length === 0 || label.length > 999) return null;
2354
+ if (label.trim().length === 0) return null;
2355
+ characterCursor++; // Skip the closing bracket ']'
2356
+ if (content.charAt(characterCursor) !== ":") return null;
2357
+ characterCursor++; // Skip the colon ':'
2358
+ // Skip optional whitespaces after the colon ':'
2359
+ while(true){
2360
+ const character = content.charAt(characterCursor);
2361
+ if (character === " " || character === "\t") {
2362
+ characterCursor++;
2363
+ continue;
2364
+ }
2365
+ if (character === "\n") {
2366
+ const nextLine = lines[nextLineIndex];
2367
+ if (nextLine === undefined) return null;
2368
+ content += nextLine.trim() + "\n";
2369
+ characterCursor++;
2370
+ nextLineIndex++;
2371
+ continue;
2372
+ }
2373
+ break;
2374
+ }
2375
+ const destination = parseLinkDestination(content, {
2376
+ startIndex: characterCursor
2377
+ });
2378
+ // If the destination is invalid, the link reference definition is invalid (for both stream and non-stream parsing)
2379
+ if (destination === null) return null;
2380
+ characterCursor = destination.endIndex + 1;
2381
+ const characterCursorAfterDestination = characterCursor;
2382
+ const nextLineIndexAfterDestination = nextLineIndex;
2383
+ const destinationIsFollowedByNewline = content.charAt(characterCursor) === "\n";
2384
+ if (!destinationIsFollowedByNewline && ![
2385
+ " ",
2386
+ "\t"
2387
+ ].includes(content.charAt(characterCursor))) {
2388
+ return null;
2389
+ }
2390
+ // Skip optional whitespaces after the destination
2391
+ while(true){
2392
+ const character = content.charAt(characterCursor);
2393
+ if (character === " " || character === "\t") {
2394
+ characterCursor++;
2395
+ continue;
2396
+ }
2397
+ if (character === "\n") {
2398
+ const nextLine = lines[nextLineIndex];
2399
+ if (nextLine === undefined) break;
2400
+ content += nextLine.trim() + "\n";
2401
+ characterCursor++;
2402
+ nextLineIndex++;
2403
+ continue;
2404
+ }
2405
+ break;
2406
+ }
2407
+ let title;
2408
+ const openingQuoteCharacter = content.charAt(characterCursor);
2409
+ // Reference link title must start with ", ', or (
2410
+ if ([
2411
+ '"',
2412
+ "'",
2413
+ "("
2414
+ ].includes(openingQuoteCharacter)) {
2415
+ const closingQuoteCharacter = openingQuoteCharacter === "(" ? ")" : openingQuoteCharacter;
2416
+ characterCursor++; // Skip the opening quote character
2417
+ const titleStartIndex = characterCursor;
2418
+ while(true){
2419
+ const character = content.charAt(characterCursor);
2420
+ if (character === closingQuoteCharacter) {
2421
+ title = unescapeString(content.slice(titleStartIndex, characterCursor));
2422
+ characterCursor++; // Skip the closing quote character
2423
+ break;
2424
+ }
2425
+ // For parenthesis-delimited titles, unescaped `(` is not allowed; this prevents ambiguity in parsing.
2426
+ if (character === "(" && openingQuoteCharacter === "(") {
2427
+ break;
2428
+ }
2429
+ // If the current character is a backslash and it is not the last character, it is an escaped character, so we skip the current character and the next character.
2430
+ if (character === "\\" && characterCursor + 1 < content.length) {
2431
+ characterCursor += 2;
2432
+ continue;
2433
+ }
2434
+ if (character === "\n") {
2435
+ const nextLine = lines[nextLineIndex];
2436
+ if (nextLine === undefined) break;
2437
+ content += nextLine.trim() + "\n";
2438
+ characterCursor++;
2439
+ nextLineIndex++;
2440
+ continue;
2441
+ }
2442
+ // Regular characters (including newlines) are allowed in the title.
2443
+ characterCursor++;
2444
+ }
2445
+ }
2446
+ // Title must noot be followed by any more content
2447
+ if (content.charAt(characterCursor) !== "\n") {
2448
+ title = undefined;
2449
+ }
2450
+ if (title === undefined) {
2451
+ // If the destination is not followed by a newline and the title is invalid, the link reference definition is invalid
2452
+ if (!destinationIsFollowedByNewline) {
2453
+ return null;
2454
+ } else {
2455
+ characterCursor = characterCursorAfterDestination + 1;
2456
+ nextLineIndex = nextLineIndexAfterDestination;
2457
+ }
2458
+ }
2459
+ return {
2460
+ definition: {
2461
+ label: normalizeReference(label),
2462
+ href: destination.href,
2463
+ title
2464
+ },
2465
+ nextLineIndex
2466
+ };
2467
+ }
2468
+ function parseListItem(line) {
2469
+ let item;
2470
+ const orderedListItem = parseOrderedListItem(line);
2471
+ if (orderedListItem !== null) {
2472
+ item = {
2473
+ kind: "ordered",
2474
+ ...orderedListItem
2475
+ };
2476
+ } else {
2477
+ const unorderedListItem = parseUnorderedListItem(line);
2478
+ if (unorderedListItem !== null) {
2479
+ item = {
2480
+ kind: "unordered",
2481
+ ...unorderedListItem
2482
+ };
2483
+ } else {
2484
+ return null;
2485
+ }
2486
+ }
2487
+ let content = item.content;
2488
+ let numOfColumns = item.numOfColumns;
2489
+ if (item.numOfColumnsAfterMarker >= 5 || item.numOfColumnsAfterMarker <= 0 || isLineEmpty(item.content)) {
2490
+ content = " ".repeat(item.numOfColumnsAfterMarker) + content;
2491
+ if (content.charAt(0) === " ") {
2492
+ content = content.slice(1);
2493
+ }
2494
+ // The baseline content indentation is one space past the marker
2495
+ numOfColumns = numOfColumns - item.numOfColumnsAfterMarker + 1;
2496
+ }
2497
+ if (item.kind === "ordered") {
2498
+ return {
2499
+ kind: "ordered",
2500
+ content,
2501
+ numOfColumns,
2502
+ value: item.value,
2503
+ delimiter: item.delimiter
2504
+ };
2505
+ } else {
2506
+ return {
2507
+ kind: "unordered",
2508
+ content,
2509
+ numOfColumns,
2510
+ marker: item.marker
2511
+ };
2512
+ }
2513
+ }
2514
+ /**
2515
+ * Checks if the line represents the start of an ordered list item. Must satisfy the following conditions:
2516
+ * - The first non-whitespace character in the line must be a digit (0-9). It can be followed by characters that are also digits (0-9).
2517
+ * - There must be at most 9 digits in the line.
2518
+ * - The character immediately after the digits must be a period '.' character or a closing parenthesis ')' character.
2519
+ * - The character immediately after the digits must be a space or a tab (if it exists).
2520
+ * @param line - The line to check.
2521
+ * @returns The value of the ordered list item and the number of digits in the line if the line is an ordered list item, null otherwise.
2522
+ */ function parseOrderedListItem(line) {
2523
+ if (isIndentedCodeLine(line)) return null;
2524
+ const firstNonspaceIndex = getFirstNonspaceIndex(line);
2525
+ // The first character after whitespaces must be a digit.
2526
+ if (line.charCodeAt(firstNonspaceIndex) < 0x30 /* 0 */ || line.charCodeAt(firstNonspaceIndex) > 0x39 /* 9 */ ) {
2527
+ return null;
2528
+ }
2529
+ /**
2530
+ * Count the number of consecutive digits in the line (starting from the first non-whitespace character)
2531
+ *
2532
+ * In the following example, the number of digits is 1.
2533
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
2534
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
2535
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
2536
+ * │ ␣ | 1 | ) | ␣ | T | h | i | s | ␣ |
2537
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
2538
+ * ▲
2539
+ *
2540
+ * In the following example, the number of digits is 1.
2541
+ * ┌───┬───┬───┬───┬───┬───┬───┬───┬───┐
2542
+ * │ 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
2543
+ * ├───┼───┼───┼───┼───┼───┼───┼───┼───┤
2544
+ * │ ␣ | 1 | 2 | ) | ␣ | T | h | e | ␣ |
2545
+ * └───┴───┴───┴───┴───┴───┴───┴───┴───┘
2546
+ * ▲ ▲
2547
+ * │ │
2548
+ * └───┘
2549
+ */ let numOfDigits = 1;
2550
+ while(true){
2551
+ if (firstNonspaceIndex + numOfDigits >= line.length) break;
2552
+ if (line.charCodeAt(firstNonspaceIndex + numOfDigits) >= 0x30 /* 0 */ && line.charCodeAt(firstNonspaceIndex + numOfDigits) <= 0x39 /* 9 */ ) {
2553
+ numOfDigits++;
2554
+ // A valid ordered list item cannot have more than 9 digits.
2555
+ if (numOfDigits > 9) return null;
2556
+ continue;
2557
+ }
2558
+ break;
2559
+ }
2560
+ const delimiter = line.charAt(firstNonspaceIndex + numOfDigits);
2561
+ // The character immediately after the digits (delimiter) must be a period '.' character or a closing parenthesis ')' character.
2562
+ if (delimiter !== "." && delimiter !== ")") return null;
2563
+ // The digits must be a valid number.
2564
+ const value = parseInt(line.slice(firstNonspaceIndex, firstNonspaceIndex + numOfDigits), 10);
2565
+ if (!Number.isFinite(value)) return null;
2566
+ // Skip the digits and the period or closing parenthesis.
2567
+ let characterIndex = firstNonspaceIndex + numOfDigits + 1;
2568
+ // The character immediately after the digits must be a space or a tab if the content is not empty
2569
+ if (characterIndex < line.length && line.charAt(characterIndex) !== " " && line.charAt(characterIndex) !== "\t") {
2570
+ return null;
2571
+ }
2572
+ // Count the total number of columns (tabs are expanded to the next multiple of 4 spaces) from the beginning of the line to the start of the content of the ordered list item.
2573
+ let numOfColumns = characterIndex;
2574
+ while(characterIndex < line.length){
2575
+ if (line.charAt(characterIndex) === "\t") {
2576
+ numOfColumns += 4 - numOfColumns % 4;
2577
+ characterIndex++;
2578
+ } else if (line.charAt(characterIndex) === " ") {
2579
+ numOfColumns++;
2580
+ characterIndex++;
2581
+ } else {
2582
+ break;
2583
+ }
2584
+ }
2585
+ const numOfColumnsAfterMarker = numOfColumns - (firstNonspaceIndex + numOfDigits + 1);
2586
+ return {
2587
+ content: line.slice(characterIndex),
2588
+ numOfColumns,
2589
+ numOfColumnsAfterMarker,
2590
+ value,
2591
+ delimiter
2592
+ };
2593
+ }
2594
+ function parseUnorderedListItem(line) {
2595
+ if (isIndentedCodeLine(line)) return null;
2596
+ const firstNonspaceIndex = getFirstNonspaceIndex(line);
2597
+ const marker = line.charAt(firstNonspaceIndex);
2598
+ if (marker !== "*" && marker !== "+" && marker !== "-") {
2599
+ return null;
2600
+ }
2601
+ let characterIndex = firstNonspaceIndex + 1;
2602
+ // The character immediately after the marker must be a space or a tab if the content is not empty
2603
+ if (characterIndex < line.length && line.charAt(characterIndex) !== " " && line.charAt(characterIndex) !== "\t") {
2604
+ return null;
2605
+ }
2606
+ // Count the total number of columns (tabs are expanded to the next multiple of 4 spaces) from the beginning of the line to the start of the content of the ordered list item.
2607
+ let numOfColumns = characterIndex;
2608
+ while(characterIndex < line.length){
2609
+ if (line.charAt(characterIndex) === "\t") {
2610
+ numOfColumns += 4 - numOfColumns % 4;
2611
+ characterIndex++;
2612
+ } else if (line.charAt(characterIndex) === " ") {
2613
+ numOfColumns++;
2614
+ characterIndex++;
2615
+ } else {
2616
+ break;
2617
+ }
2618
+ }
2619
+ const numOfColumnsAfterMarker = numOfColumns - (firstNonspaceIndex + 1);
2620
+ return {
2621
+ content: line.slice(characterIndex),
2622
+ marker,
2623
+ numOfColumns,
2624
+ numOfColumnsAfterMarker
2625
+ };
2626
+ }
2627
+ // List of HTML block detection sequences per CommonMark specification.
2628
+ const HTML_SEQUENCES = [
2629
+ {
2630
+ startPattern: /^<(?:script|pre|textarea|style)(?:\s|>|$)/i,
2631
+ endPattern: /<\/(?:script|pre|textarea|style)>/i,
2632
+ canInterruptParagraph: true,
2633
+ canBeInterruptedByBlankLine: false
2634
+ },
2635
+ {
2636
+ startPattern: /^<!--/,
2637
+ endPattern: /-->/,
2638
+ canInterruptParagraph: true,
2639
+ canBeInterruptedByBlankLine: false
2640
+ },
2641
+ {
2642
+ startPattern: /^<\?/,
2643
+ endPattern: /\?>/,
2644
+ canInterruptParagraph: true,
2645
+ canBeInterruptedByBlankLine: false
2646
+ },
2647
+ {
2648
+ startPattern: /^<![A-Za-z]/,
2649
+ endPattern: />/,
2650
+ canInterruptParagraph: true,
2651
+ canBeInterruptedByBlankLine: false
2652
+ },
2653
+ {
2654
+ startPattern: /^<!\[CDATA\[/,
2655
+ endPattern: /\]\]>/,
2656
+ canInterruptParagraph: true,
2657
+ canBeInterruptedByBlankLine: false
2658
+ },
2659
+ {
2660
+ startPattern: /^<[/]?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|search|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s|[/]?[>]|$)/i,
2661
+ canInterruptParagraph: true,
2662
+ canBeInterruptedByBlankLine: true
2663
+ },
2664
+ {
2665
+ startPattern: new RegExp("^(?:" + OPEN_TAG + "|" + CLOSE_TAG + ")\\s*$", "i"),
2666
+ canInterruptParagraph: false,
2667
+ canBeInterruptedByBlankLine: true
2668
+ }
2669
+ ];
2670
+ /**
2671
+ * Attempts to parse a line as the opening of an HTML block.
2672
+ *
2673
+ * @param line - The line to parse.
2674
+ * @returns An object with the end pattern of the HTML block and whether it can interrupt a paragraph.
2675
+ */ function parseHTMLBlockStart(line) {
2676
+ if (isIndentedCodeLine(line)) return null;
2677
+ const firstNonspaceIndex = getFirstNonspaceIndex(line);
2678
+ if (line.charAt(firstNonspaceIndex) !== "<") return null;
2679
+ const sequence = HTML_SEQUENCES.find((sequence)=>sequence.startPattern.test(line.slice(firstNonspaceIndex)));
2680
+ if (sequence === undefined) return null;
2681
+ return {
2682
+ endPattern: sequence.endPattern,
2683
+ canInterruptParagraph: sequence.canInterruptParagraph,
2684
+ canBeInterruptedByBlankLine: sequence.canBeInterruptedByBlankLine
2685
+ };
2686
+ }
2687
+ /**
2688
+ * Checks if a line is empty (contains only whitespace).
2689
+ *
2690
+ * @param line - The line to check.
2691
+ * @returns True if the line contains only whitespace characters, false otherwise.
2692
+ */ function isLineEmpty(line) {
2693
+ return line.trim().length === 0;
2694
+ }
2695
+ function isSpaceOrTab(char) {
2696
+ return char === " " || char === "\t";
2697
+ }
2698
+
2699
+ export { MarkdownParser };