@incremark/core 0.2.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1541 @@
1
+ import { Lexer, lexer } from 'marked';
2
+
3
+ // src/parser/ast/MarkedAstBuildter.ts
4
+
5
+ // src/extensions/html-extension/index.ts
6
+ var DEFAULT_TAG_BLACKLIST = [
7
+ "script",
8
+ "style",
9
+ "iframe",
10
+ "object",
11
+ "embed",
12
+ "form",
13
+ "input",
14
+ "button",
15
+ "textarea",
16
+ "select",
17
+ "meta",
18
+ "link",
19
+ "base",
20
+ "frame",
21
+ "frameset",
22
+ "applet",
23
+ "noscript",
24
+ "template"
25
+ ];
26
+ var DEFAULT_ATTR_BLACKLIST = [
27
+ // 事件属性通过正则匹配
28
+ "formaction",
29
+ "xlink:href",
30
+ "xmlns",
31
+ "srcdoc"
32
+ ];
33
+ var DEFAULT_PROTOCOL_BLACKLIST = [
34
+ "javascript:",
35
+ "vbscript:",
36
+ "data:"
37
+ // 注意:data:image/ 会被特殊处理允许
38
+ ];
39
+ var URL_ATTRS = ["href", "src", "action", "formaction", "poster", "background"];
40
+ var VOID_ELEMENTS = ["br", "hr", "img", "input", "meta", "link", "area", "base", "col", "embed", "source", "track", "wbr"];
41
+ function detectHtmlContentType(html) {
42
+ const trimmed = html.trim();
43
+ if (!trimmed) return "unknown";
44
+ if (!trimmed.startsWith("<")) return "unknown";
45
+ const closingMatch = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
46
+ if (closingMatch) {
47
+ return "closing";
48
+ }
49
+ const singleTagMatch = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)(\s[^]*?)?(\/?)>$/);
50
+ if (singleTagMatch) {
51
+ const [fullMatch, tagName, attrsString, selfClosingSlash] = singleTagMatch;
52
+ if (attrsString) {
53
+ let inQuote = "";
54
+ let hasUnquotedBracket = false;
55
+ for (let i = 0; i < attrsString.length; i++) {
56
+ const char = attrsString[i];
57
+ if (inQuote) {
58
+ if (char === inQuote) inQuote = "";
59
+ } else {
60
+ if (char === '"' || char === "'") inQuote = char;
61
+ else if (char === "<") {
62
+ hasUnquotedBracket = true;
63
+ break;
64
+ }
65
+ }
66
+ }
67
+ if (hasUnquotedBracket) {
68
+ return "fragment";
69
+ }
70
+ }
71
+ const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
72
+ return isSelfClosing ? "self-closing" : "opening";
73
+ }
74
+ let bracketCount = 0;
75
+ for (const char of trimmed) {
76
+ if (char === "<") bracketCount++;
77
+ }
78
+ if (bracketCount > 1) {
79
+ return "fragment";
80
+ }
81
+ return "unknown";
82
+ }
83
+ function parseHtmlTag(html) {
84
+ const trimmed = html.trim();
85
+ const contentType = detectHtmlContentType(trimmed);
86
+ if (contentType !== "opening" && contentType !== "closing" && contentType !== "self-closing") {
87
+ return null;
88
+ }
89
+ if (contentType === "closing") {
90
+ const match2 = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
91
+ if (!match2) return null;
92
+ return {
93
+ tagName: match2[1].toLowerCase(),
94
+ attrs: {},
95
+ isClosing: true,
96
+ isSelfClosing: false,
97
+ rawHtml: html
98
+ };
99
+ }
100
+ const match = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)(\s[^]*?)?(\/?)>$/);
101
+ if (!match) return null;
102
+ const [, tagName, attrsString, selfClosingSlash] = match;
103
+ const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
104
+ const attrs = {};
105
+ if (attrsString) {
106
+ const attrRegex = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
107
+ let attrMatch;
108
+ while ((attrMatch = attrRegex.exec(attrsString)) !== null) {
109
+ const [, name, doubleQuoted, singleQuoted, unquoted] = attrMatch;
110
+ const value = doubleQuoted ?? singleQuoted ?? unquoted ?? "";
111
+ attrs[name.toLowerCase()] = decodeHtmlEntities(value);
112
+ }
113
+ }
114
+ return {
115
+ tagName: tagName.toLowerCase(),
116
+ attrs,
117
+ isClosing: false,
118
+ isSelfClosing,
119
+ rawHtml: html
120
+ };
121
+ }
122
+ function decodeHtmlEntities(text) {
123
+ const entities = {
124
+ "&amp;": "&",
125
+ "&lt;": "<",
126
+ "&gt;": ">",
127
+ "&quot;": '"',
128
+ "&#39;": "'",
129
+ "&apos;": "'",
130
+ "&nbsp;": " "
131
+ };
132
+ return text.replace(/&(?:#(\d+)|#x([a-fA-F0-9]+)|([a-zA-Z]+));/g, (match, dec, hex, name) => {
133
+ if (dec) return String.fromCharCode(parseInt(dec, 10));
134
+ if (hex) return String.fromCharCode(parseInt(hex, 16));
135
+ return entities[`&${name};`] || match;
136
+ });
137
+ }
138
+ function parseTagDirect(tag) {
139
+ const trimmed = tag.trim();
140
+ const closingMatch = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
141
+ if (closingMatch) {
142
+ return {
143
+ tagName: closingMatch[1].toLowerCase(),
144
+ attrs: {},
145
+ isClosing: true,
146
+ isSelfClosing: false,
147
+ rawHtml: tag
148
+ };
149
+ }
150
+ const openMatch = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)([\s\S]*?)(\/?)>$/);
151
+ if (!openMatch) return null;
152
+ const [, tagName, attrsString, selfClosingSlash] = openMatch;
153
+ const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
154
+ const attrs = {};
155
+ if (attrsString) {
156
+ const attrRegex = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
157
+ let attrMatch;
158
+ while ((attrMatch = attrRegex.exec(attrsString)) !== null) {
159
+ const [, name, doubleQuoted, singleQuoted, unquoted] = attrMatch;
160
+ const value = doubleQuoted ?? singleQuoted ?? unquoted ?? "";
161
+ attrs[name.toLowerCase()] = decodeHtmlEntities(value);
162
+ }
163
+ }
164
+ return {
165
+ tagName: tagName.toLowerCase(),
166
+ attrs,
167
+ isClosing: false,
168
+ isSelfClosing,
169
+ rawHtml: tag
170
+ };
171
+ }
172
+ function parseHtmlFragment(html, options = {}) {
173
+ const result = [];
174
+ const stack = [];
175
+ const tokenRegex = /(<\/?[a-zA-Z][^>]*>)|([^<]+)/g;
176
+ let match;
177
+ while ((match = tokenRegex.exec(html)) !== null) {
178
+ const [, tag, text] = match;
179
+ if (tag) {
180
+ const parsed = parseTagDirect(tag);
181
+ if (!parsed) continue;
182
+ if (isTagBlacklisted(parsed.tagName, options)) {
183
+ continue;
184
+ }
185
+ if (parsed.isClosing) {
186
+ let found = false;
187
+ for (let i = stack.length - 1; i >= 0; i--) {
188
+ if (stack[i].tagName === parsed.tagName) {
189
+ const node = stack.pop();
190
+ if (stack.length > 0) {
191
+ stack[stack.length - 1].children.push(node);
192
+ } else {
193
+ result.push(node);
194
+ }
195
+ found = true;
196
+ break;
197
+ }
198
+ }
199
+ if (!found) continue;
200
+ } else {
201
+ const sanitizedAttrs = sanitizeAttrs(parsed.attrs, options);
202
+ const node = {
203
+ type: "htmlElement",
204
+ tagName: parsed.tagName,
205
+ attrs: sanitizedAttrs,
206
+ children: [],
207
+ data: options.preserveRawHtml !== false ? {
208
+ rawHtml: tag,
209
+ parsed: true
210
+ } : void 0
211
+ };
212
+ if (parsed.isSelfClosing) {
213
+ if (stack.length > 0) {
214
+ stack[stack.length - 1].children.push(node);
215
+ } else {
216
+ result.push(node);
217
+ }
218
+ } else {
219
+ stack.push(node);
220
+ }
221
+ }
222
+ } else if (text && text.trim()) {
223
+ const textNode = {
224
+ type: "text",
225
+ value: text
226
+ };
227
+ if (stack.length > 0) {
228
+ stack[stack.length - 1].children.push(textNode);
229
+ }
230
+ }
231
+ }
232
+ while (stack.length > 0) {
233
+ const node = stack.pop();
234
+ if (stack.length > 0) {
235
+ stack[stack.length - 1].children.push(node);
236
+ } else {
237
+ result.push(node);
238
+ }
239
+ }
240
+ return result;
241
+ }
242
+ function isTagBlacklisted(tagName, options) {
243
+ const blacklist = options.tagBlacklist ?? DEFAULT_TAG_BLACKLIST;
244
+ return blacklist.includes(tagName.toLowerCase());
245
+ }
246
+ function isAttrBlacklisted(attrName, options) {
247
+ const name = attrName.toLowerCase();
248
+ const blacklist = options.attrBlacklist ?? DEFAULT_ATTR_BLACKLIST;
249
+ if (name.startsWith("on")) return true;
250
+ return blacklist.includes(name);
251
+ }
252
+ function isProtocolDangerous(url, options) {
253
+ const protocolBlacklist = options.protocolBlacklist ?? DEFAULT_PROTOCOL_BLACKLIST;
254
+ const normalizedUrl = url.trim().toLowerCase();
255
+ for (const protocol of protocolBlacklist) {
256
+ if (normalizedUrl.startsWith(protocol)) {
257
+ if (protocol === "data:" && normalizedUrl.startsWith("data:image/")) {
258
+ return false;
259
+ }
260
+ return true;
261
+ }
262
+ }
263
+ return false;
264
+ }
265
+ function sanitizeAttrs(attrs, options) {
266
+ const result = {};
267
+ for (const [name, value] of Object.entries(attrs)) {
268
+ if (isAttrBlacklisted(name, options)) continue;
269
+ if (URL_ATTRS.includes(name.toLowerCase())) {
270
+ if (isProtocolDangerous(value, options)) continue;
271
+ }
272
+ result[name] = value;
273
+ }
274
+ return result;
275
+ }
276
+ function isHtmlNode(node) {
277
+ return node.type === "html";
278
+ }
279
+ function hasChildren(node) {
280
+ return "children" in node && Array.isArray(node.children);
281
+ }
282
+ function mergeFragmentedHtmlNodes(nodes) {
283
+ const result = [];
284
+ let i = 0;
285
+ while (i < nodes.length) {
286
+ const node = nodes[i];
287
+ if (!isHtmlNode(node)) {
288
+ result.push(node);
289
+ i++;
290
+ continue;
291
+ }
292
+ const unclosedTags = findUnclosedTags(node.value);
293
+ if (unclosedTags.length === 0) {
294
+ result.push(node);
295
+ i++;
296
+ continue;
297
+ }
298
+ const mergedParts = [node.value];
299
+ let j = i + 1;
300
+ let currentUnclosed = [...unclosedTags];
301
+ while (j < nodes.length && currentUnclosed.length > 0) {
302
+ const nextNode = nodes[j];
303
+ if (isHtmlNode(nextNode)) {
304
+ const closingInfo = checkClosingTags(nextNode.value, currentUnclosed);
305
+ if (closingInfo.hasRelevantClosing) {
306
+ mergedParts.push(nextNode.value);
307
+ currentUnclosed = closingInfo.remainingUnclosed;
308
+ if (currentUnclosed.length === 0) {
309
+ j++;
310
+ break;
311
+ }
312
+ } else {
313
+ mergedParts.push(nextNode.value);
314
+ }
315
+ } else {
316
+ break;
317
+ }
318
+ j++;
319
+ }
320
+ if (mergedParts.length > 1) {
321
+ const mergedValue = mergedParts.join("\n");
322
+ const mergedNode = {
323
+ type: "html",
324
+ value: mergedValue
325
+ };
326
+ result.push(mergedNode);
327
+ i = j;
328
+ } else {
329
+ result.push(node);
330
+ i++;
331
+ }
332
+ }
333
+ return result;
334
+ }
335
+ function findUnclosedTags(html) {
336
+ const tagStack = [];
337
+ const tagRegex = /<\/?([a-zA-Z][a-zA-Z0-9-]*)[^>]*\/?>/g;
338
+ let match;
339
+ while ((match = tagRegex.exec(html)) !== null) {
340
+ const fullTag = match[0];
341
+ const tagName = match[1].toLowerCase();
342
+ if (VOID_ELEMENTS.includes(tagName) || fullTag.endsWith("/>")) {
343
+ continue;
344
+ }
345
+ if (fullTag.startsWith("</")) {
346
+ const lastIndex = tagStack.lastIndexOf(tagName);
347
+ if (lastIndex !== -1) {
348
+ tagStack.splice(lastIndex, 1);
349
+ }
350
+ } else {
351
+ tagStack.push(tagName);
352
+ }
353
+ }
354
+ return tagStack;
355
+ }
356
+ function checkClosingTags(html, unclosedTags) {
357
+ const remaining = [...unclosedTags];
358
+ let hasRelevant = false;
359
+ const closeTagRegex = /<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>/g;
360
+ let match;
361
+ while ((match = closeTagRegex.exec(html)) !== null) {
362
+ const tagName = match[1].toLowerCase();
363
+ const index = remaining.lastIndexOf(tagName);
364
+ if (index !== -1) {
365
+ remaining.splice(index, 1);
366
+ hasRelevant = true;
367
+ }
368
+ }
369
+ return {
370
+ hasRelevantClosing: hasRelevant,
371
+ remainingUnclosed: remaining
372
+ };
373
+ }
374
+ function processHtmlNodesInArray(nodes, options) {
375
+ const mergedNodes = mergeFragmentedHtmlNodes(nodes);
376
+ const result = [];
377
+ let i = 0;
378
+ while (i < mergedNodes.length) {
379
+ const node = mergedNodes[i];
380
+ if (isHtmlNode(node)) {
381
+ const contentType = detectHtmlContentType(node.value);
382
+ if (contentType === "fragment") {
383
+ const fragmentNodes = parseHtmlFragment(node.value, options);
384
+ if (fragmentNodes.length > 0) {
385
+ result.push(...fragmentNodes);
386
+ } else {
387
+ result.push(node);
388
+ }
389
+ i++;
390
+ } else if (contentType === "self-closing") {
391
+ const parsed = parseHtmlTag(node.value);
392
+ if (parsed && !isTagBlacklisted(parsed.tagName, options)) {
393
+ const elementNode = {
394
+ type: "htmlElement",
395
+ tagName: parsed.tagName,
396
+ attrs: sanitizeAttrs(parsed.attrs, options),
397
+ children: [],
398
+ data: options.preserveRawHtml !== false ? {
399
+ rawHtml: node.value,
400
+ parsed: true,
401
+ originalType: "html"
402
+ } : void 0
403
+ };
404
+ result.push(elementNode);
405
+ }
406
+ i++;
407
+ } else if (contentType === "closing") {
408
+ i++;
409
+ } else if (contentType === "opening") {
410
+ const parsed = parseHtmlTag(node.value);
411
+ if (!parsed || isTagBlacklisted(parsed.tagName, options)) {
412
+ i++;
413
+ continue;
414
+ }
415
+ const tagName = parsed.tagName;
416
+ const contentNodes = [];
417
+ let depth = 1;
418
+ let j = i + 1;
419
+ let foundClosing = false;
420
+ while (j < mergedNodes.length && depth > 0) {
421
+ const nextNode = mergedNodes[j];
422
+ if (isHtmlNode(nextNode)) {
423
+ const nextType = detectHtmlContentType(nextNode.value);
424
+ if (nextType === "closing") {
425
+ const nextParsed = parseHtmlTag(nextNode.value);
426
+ if (nextParsed && nextParsed.tagName === tagName) {
427
+ depth--;
428
+ if (depth === 0) {
429
+ foundClosing = true;
430
+ break;
431
+ }
432
+ }
433
+ } else if (nextType === "opening") {
434
+ const nextParsed = parseHtmlTag(nextNode.value);
435
+ if (nextParsed && nextParsed.tagName === tagName) {
436
+ depth++;
437
+ }
438
+ }
439
+ }
440
+ contentNodes.push(nextNode);
441
+ j++;
442
+ }
443
+ const elementNode = {
444
+ type: "htmlElement",
445
+ tagName: parsed.tagName,
446
+ attrs: sanitizeAttrs(parsed.attrs, options),
447
+ children: processHtmlNodesInArray(contentNodes, options),
448
+ data: options.preserveRawHtml !== false ? {
449
+ rawHtml: node.value,
450
+ parsed: true,
451
+ originalType: "html"
452
+ } : void 0
453
+ };
454
+ result.push(elementNode);
455
+ i = foundClosing ? j + 1 : j;
456
+ } else {
457
+ result.push(node);
458
+ i++;
459
+ }
460
+ } else {
461
+ if (hasChildren(node)) {
462
+ const processed = processHtmlNodesInArray(
463
+ node.children,
464
+ options
465
+ );
466
+ result.push({
467
+ ...node,
468
+ children: processed
469
+ });
470
+ } else {
471
+ result.push(node);
472
+ }
473
+ i++;
474
+ }
475
+ }
476
+ return result;
477
+ }
478
+ function transformHtmlNodes(ast, options = {}) {
479
+ return {
480
+ ...ast,
481
+ children: processHtmlNodesInArray(ast.children, options)
482
+ };
483
+ }
484
+
485
+ // src/parser/ast/types.ts
486
+ function extractMarkedExtensions(plugins) {
487
+ const extensions = [];
488
+ for (const plugin of plugins) {
489
+ if ((plugin.type === "marked" || plugin.type === "both") && plugin.marked) {
490
+ extensions.push(...plugin.marked.extensions);
491
+ }
492
+ }
493
+ return extensions;
494
+ }
495
+
496
+ // src/extensions/marked-extensions/explicitDefinitionExtension.ts
497
+ function createExplicitDefinitionExtension() {
498
+ return {
499
+ name: "explicitDefinition",
500
+ level: "block",
501
+ // 🔑 关键修复:start 必须匹配完整的 definition 模式 [id]:,
502
+ // 而不能只匹配 [,否则会把 ![alt][id] 中的 [alt] 误认为是 definition 开头
503
+ // 同时排除脚注定义 [^id]:
504
+ start(src) {
505
+ const match = src.match(/^ {0,3}\[(?!\^)[^\]]+\]:/m);
506
+ return match?.index;
507
+ },
508
+ tokenizer(src) {
509
+ const rule = /^ {0,3}\[(?!\^)[^\]]+\]:.*?(?:\n+|$)/;
510
+ const match = rule.exec(src);
511
+ if (match) {
512
+ const raw = match[0];
513
+ const contentMatch = raw.match(
514
+ /^ {0,3}\[([^\]]+)\]:\s*(\S+)(?:\s+["'(](.*?)["')])?/
515
+ );
516
+ if (contentMatch) {
517
+ const identifier = contentMatch[1].toLowerCase();
518
+ const url = contentMatch[2];
519
+ const title = contentMatch[3];
520
+ if (this.lexer?.tokens?.links) {
521
+ this.lexer.tokens.links[identifier] = { href: url, title };
522
+ }
523
+ return {
524
+ type: "explicitDefinition",
525
+ raw,
526
+ identifier,
527
+ url,
528
+ title
529
+ };
530
+ }
531
+ return { type: "explicitDefinition", raw, identifier: "", url: "" };
532
+ }
533
+ return void 0;
534
+ },
535
+ renderer() {
536
+ return "";
537
+ }
538
+ };
539
+ }
540
+
541
+ // src/extensions/marked-extensions/optimisticReferenceExtension.ts
542
+ function createOptimisticReferenceExtension() {
543
+ return {
544
+ name: "optimisticReference",
545
+ level: "inline",
546
+ start(src) {
547
+ return src.match(/!?\[/)?.index;
548
+ },
549
+ tokenizer(src) {
550
+ const rule = /^(!?)\[((?:\[[^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*)\](?:\s*\[((?:\[[^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*)\])?/;
551
+ const match = rule.exec(src);
552
+ if (match) {
553
+ const fullMatch = match[0];
554
+ if (src.length > fullMatch.length && src[fullMatch.length] === "(") {
555
+ return void 0;
556
+ }
557
+ if (src.length > fullMatch.length && src[fullMatch.length] === ":") {
558
+ return void 0;
559
+ }
560
+ const isImage = match[1] === "!";
561
+ const text = match[2];
562
+ const refRaw = match[3];
563
+ if (text.startsWith("^")) {
564
+ return void 0;
565
+ }
566
+ let identifier = "";
567
+ let referenceType = "shortcut";
568
+ if (refRaw !== void 0) {
569
+ if (refRaw === "") {
570
+ referenceType = "collapsed";
571
+ identifier = text;
572
+ } else {
573
+ referenceType = "full";
574
+ identifier = refRaw;
575
+ }
576
+ } else {
577
+ referenceType = "shortcut";
578
+ identifier = text;
579
+ if (text.match(/^[ xX]$/)) {
580
+ return void 0;
581
+ }
582
+ }
583
+ return {
584
+ type: "optimisticReference",
585
+ raw: fullMatch,
586
+ isImage,
587
+ text,
588
+ identifier: identifier.toLowerCase(),
589
+ label: identifier,
590
+ referenceType
591
+ };
592
+ }
593
+ return void 0;
594
+ },
595
+ renderer() {
596
+ return "";
597
+ }
598
+ };
599
+ }
600
+
601
+ // src/extensions/marked-extensions/mathExtension.ts
602
+ function createBlockMathExtension() {
603
+ return {
604
+ name: "blockMath",
605
+ level: "block",
606
+ start(src) {
607
+ const match = src.match(/^ {0,3}\$\$/m);
608
+ return match?.index;
609
+ },
610
+ tokenizer(src) {
611
+ const rule = /^ {0,3}\$\$([\s\S]*?)\$\$ *(?:\n+|$)/;
612
+ const match = rule.exec(src);
613
+ if (match) {
614
+ return {
615
+ type: "blockMath",
616
+ raw: match[0],
617
+ text: match[1].trim()
618
+ };
619
+ }
620
+ return void 0;
621
+ },
622
+ renderer() {
623
+ return "";
624
+ }
625
+ };
626
+ }
627
+ function createInlineMathExtension() {
628
+ return {
629
+ name: "inlineMath",
630
+ level: "inline",
631
+ start(src) {
632
+ const index = src.indexOf("$");
633
+ if (index === -1) return void 0;
634
+ if (src[index + 1] === "$") return void 0;
635
+ return index;
636
+ },
637
+ tokenizer(src) {
638
+ const rule = /^\$(?!\$)((?:\\.|[^\\\n$])+?)\$(?!\d)/;
639
+ const match = rule.exec(src);
640
+ if (match) {
641
+ return {
642
+ type: "inlineMath",
643
+ raw: match[0],
644
+ text: match[1].trim()
645
+ };
646
+ }
647
+ return void 0;
648
+ },
649
+ renderer() {
650
+ return "";
651
+ }
652
+ };
653
+ }
654
+
655
+ // src/extensions/marked-extensions/footnoteDefinitionExtension.ts
656
+ function createFootnoteDefinitionExtension() {
657
+ return {
658
+ name: "footnoteDefinitionBlock",
659
+ level: "block",
660
+ start(src) {
661
+ const match = src.match(/^ {0,3}\[\^[^\]]+\]:/m);
662
+ return match?.index;
663
+ },
664
+ tokenizer(src) {
665
+ const firstLineRule = /^ {0,3}\[\^([a-zA-Z0-9_-]+)\]:\s*(.*)/;
666
+ const firstLineMatch = firstLineRule.exec(src);
667
+ if (!firstLineMatch) return void 0;
668
+ const identifier = firstLineMatch[1];
669
+ let content = firstLineMatch[2];
670
+ let raw = firstLineMatch[0];
671
+ const remaining = src.slice(raw.length);
672
+ const lines = remaining.split("\n");
673
+ let lineIndex = 0;
674
+ if (lines[0] === "" && remaining.startsWith("\n")) {
675
+ lineIndex = 1;
676
+ raw += "\n";
677
+ content += "\n";
678
+ }
679
+ while (lineIndex < lines.length) {
680
+ const line = lines[lineIndex];
681
+ if (line.trim() === "") {
682
+ let hasIndentedLineAfter = false;
683
+ for (let j = lineIndex + 1; j < lines.length; j++) {
684
+ const nextLine = lines[j];
685
+ if (nextLine.trim() === "") continue;
686
+ if (nextLine.match(/^( |\t)/)) {
687
+ hasIndentedLineAfter = true;
688
+ }
689
+ break;
690
+ }
691
+ if (hasIndentedLineAfter) {
692
+ raw += line + (lineIndex < lines.length - 1 ? "\n" : "");
693
+ content += "\n" + line;
694
+ lineIndex++;
695
+ continue;
696
+ } else {
697
+ break;
698
+ }
699
+ }
700
+ if (line.match(/^( |\t)/)) {
701
+ raw += line + (lineIndex < lines.length - 1 ? "\n" : "");
702
+ content += "\n" + line;
703
+ lineIndex++;
704
+ continue;
705
+ }
706
+ if (line.match(/^ {0,3}\[\^[^\]]+\]:/)) {
707
+ break;
708
+ }
709
+ break;
710
+ }
711
+ const trimmedContent = content.replace(/\n+$/, "");
712
+ return {
713
+ type: "footnoteDefinitionBlock",
714
+ raw,
715
+ identifier,
716
+ content: trimmedContent
717
+ };
718
+ },
719
+ renderer() {
720
+ return "";
721
+ }
722
+ };
723
+ }
724
+
725
+ // src/extensions/marked-extensions/inlineHtmlExtension.ts
726
+ var SELF_CLOSING_TAGS = /* @__PURE__ */ new Set([
727
+ "area",
728
+ "base",
729
+ "br",
730
+ "col",
731
+ "embed",
732
+ "hr",
733
+ "img",
734
+ "input",
735
+ "link",
736
+ "meta",
737
+ "param",
738
+ "source",
739
+ "track",
740
+ "wbr"
741
+ ]);
742
+ function createInlineHtmlExtension() {
743
+ return {
744
+ name: "inlineHtml",
745
+ level: "inline",
746
+ start(src) {
747
+ const index = src.indexOf("<");
748
+ if (index === -1) return void 0;
749
+ const afterLt = src.slice(index + 1);
750
+ if (!/^[a-zA-Z\/]/.test(afterLt)) return void 0;
751
+ return index;
752
+ },
753
+ tokenizer(src) {
754
+ const completeTagMatch = matchCompleteHtmlElement(src);
755
+ if (completeTagMatch) {
756
+ return {
757
+ type: "inlineHtml",
758
+ raw: completeTagMatch,
759
+ text: completeTagMatch
760
+ };
761
+ }
762
+ const selfClosingMatch = matchSelfClosingTag(src);
763
+ if (selfClosingMatch) {
764
+ return {
765
+ type: "inlineHtml",
766
+ raw: selfClosingMatch,
767
+ text: selfClosingMatch
768
+ };
769
+ }
770
+ return void 0;
771
+ },
772
+ renderer() {
773
+ return "";
774
+ }
775
+ };
776
+ }
777
+ function matchCompleteHtmlElement(src) {
778
+ const openTagMatch = /^<([a-zA-Z][a-zA-Z0-9]*)((?:\s+[a-zA-Z_:][a-zA-Z0-9_.:-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*)\s*>/.exec(src);
779
+ if (!openTagMatch) return null;
780
+ const tagName = openTagMatch[1].toLowerCase();
781
+ const openTag = openTagMatch[0];
782
+ if (SELF_CLOSING_TAGS.has(tagName)) {
783
+ return openTag;
784
+ }
785
+ const afterOpenTag = src.slice(openTag.length);
786
+ let depth = 1;
787
+ let pos = 0;
788
+ const openPattern = new RegExp(`<${tagName}(?:\\s[^>]*)?>`, "gi");
789
+ const closePattern = new RegExp(`</${tagName}>`, "gi");
790
+ while (depth > 0 && pos < afterOpenTag.length) {
791
+ openPattern.lastIndex = pos;
792
+ closePattern.lastIndex = pos;
793
+ const nextOpen = openPattern.exec(afterOpenTag);
794
+ const nextClose = closePattern.exec(afterOpenTag);
795
+ if (!nextClose) {
796
+ return null;
797
+ }
798
+ if (nextOpen && nextOpen.index < nextClose.index) {
799
+ depth++;
800
+ pos = nextOpen.index + nextOpen[0].length;
801
+ } else {
802
+ depth--;
803
+ pos = nextClose.index + nextClose[0].length;
804
+ }
805
+ }
806
+ if (depth === 0) {
807
+ return src.slice(0, openTag.length + pos);
808
+ }
809
+ return null;
810
+ }
811
+ function matchSelfClosingTag(src) {
812
+ const explicitSelfClosing = /^<([a-zA-Z][a-zA-Z0-9]*)((?:\s+[a-zA-Z_:][a-zA-Z0-9_.:-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*)\s*\/>/.exec(src);
813
+ if (explicitSelfClosing) {
814
+ return explicitSelfClosing[0];
815
+ }
816
+ const implicitSelfClosing = /^<([a-zA-Z][a-zA-Z0-9]*)((?:\s+[a-zA-Z_:][a-zA-Z0-9_.:-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*)\s*>/.exec(src);
817
+ if (implicitSelfClosing && SELF_CLOSING_TAGS.has(implicitSelfClosing[1].toLowerCase())) {
818
+ return implicitSelfClosing[0];
819
+ }
820
+ return null;
821
+ }
822
+ function transformBlockMath(token) {
823
+ return {
824
+ type: "math",
825
+ value: token.text,
826
+ meta: null
827
+ };
828
+ }
829
+ function transformFootnoteDefinitionBlock(token, ctx) {
830
+ const children = ctx.parseFootnoteContent(token.content);
831
+ return {
832
+ type: "footnoteDefinition",
833
+ identifier: token.identifier,
834
+ label: token.identifier,
835
+ children
836
+ };
837
+ }
838
+ function transformExplicitDefinition(token) {
839
+ if (!token.identifier || !token.url) return null;
840
+ return {
841
+ type: "definition",
842
+ identifier: token.identifier,
843
+ label: token.identifier,
844
+ url: token.url,
845
+ title: token.title ?? null
846
+ };
847
+ }
848
+ function transformDef(token) {
849
+ if (token.tag.startsWith("^")) {
850
+ const footnoteId = token.tag.slice(1);
851
+ return {
852
+ type: "footnoteDefinition",
853
+ identifier: footnoteId,
854
+ label: footnoteId,
855
+ children: [
856
+ {
857
+ type: "paragraph",
858
+ children: [{ type: "text", value: token.href }]
859
+ }
860
+ ]
861
+ };
862
+ }
863
+ return {
864
+ type: "definition",
865
+ identifier: token.tag,
866
+ label: token.tag,
867
+ url: token.href,
868
+ title: token.title ?? null
869
+ };
870
+ }
871
+ function transformContainer(token, ctx) {
872
+ const attributes = {};
873
+ const attrRegex = /([a-zA-Z0-9_-]+)=?("([^"]*)"|'([^']*)'|([^ ]*))?/g;
874
+ let match;
875
+ while ((match = attrRegex.exec(token.attrs)) !== null) {
876
+ attributes[match[1]] = match[3] || match[4] || match[5] || "";
877
+ }
878
+ const children = ctx.transformTokensWithPosition(token.tokens);
879
+ return {
880
+ type: "containerDirective",
881
+ name: token.name,
882
+ attributes,
883
+ children
884
+ };
885
+ }
886
+ function transformFootnoteDefToken(token, ctx) {
887
+ return {
888
+ type: "footnoteDefinition",
889
+ identifier: token.identifier,
890
+ label: token.identifier,
891
+ children: [
892
+ {
893
+ type: "paragraph",
894
+ children: ctx.transformInline(token.tokens)
895
+ }
896
+ ]
897
+ };
898
+ }
899
+ function transformHeading(token, ctx) {
900
+ return {
901
+ type: "heading",
902
+ depth: token.depth,
903
+ children: ctx.transformInline(token.tokens)
904
+ };
905
+ }
906
+ function transformParagraph(token, ctx) {
907
+ return {
908
+ type: "paragraph",
909
+ children: ctx.transformInline(token.tokens)
910
+ };
911
+ }
912
+ function transformCode(token) {
913
+ return {
914
+ type: "code",
915
+ lang: token.lang || null,
916
+ meta: null,
917
+ // 对齐 micromark 输出
918
+ value: token.text
919
+ };
920
+ }
921
+ function transformBlockquote(token, ctx) {
922
+ const children = ctx.transformTokens(token.tokens);
923
+ return {
924
+ type: "blockquote",
925
+ children
926
+ };
927
+ }
928
+ function transformList(token, ctx) {
929
+ const children = token.items.map((item) => ({
930
+ type: "listItem",
931
+ spread: item.loose,
932
+ checked: item.checked ?? null,
933
+ // 对齐 micromark 输出(GFM 任务列表)
934
+ children: ctx.transformTokens(item.tokens)
935
+ }));
936
+ return {
937
+ type: "list",
938
+ ordered: token.ordered,
939
+ start: token.ordered ? token.start || 1 : null,
940
+ // 对齐 micromark:有序列表有 start,无序列表为 null
941
+ spread: token.loose,
942
+ children
943
+ };
944
+ }
945
+ function transformTable(token, ctx) {
946
+ const headerCells = token.header.map((cell) => ({
947
+ type: "tableCell",
948
+ children: ctx.transformInline(cell.tokens)
949
+ }));
950
+ const bodyRows = token.rows.map((row) => ({
951
+ type: "tableRow",
952
+ children: row.map((cell) => ({
953
+ type: "tableCell",
954
+ children: ctx.transformInline(cell.tokens)
955
+ }))
956
+ }));
957
+ return {
958
+ type: "table",
959
+ align: token.align,
960
+ children: [{ type: "tableRow", children: headerCells }, ...bodyRows]
961
+ };
962
+ }
963
+ function transformHr() {
964
+ return { type: "thematicBreak" };
965
+ }
966
+ function transformHtml(token) {
967
+ return {
968
+ type: "html",
969
+ value: token.text
970
+ };
971
+ }
972
+ function transformTextBlock(token, ctx) {
973
+ if (token.tokens) {
974
+ return {
975
+ type: "paragraph",
976
+ children: ctx.transformInline(token.tokens)
977
+ };
978
+ }
979
+ return {
980
+ type: "paragraph",
981
+ children: [{ type: "text", value: token.text }]
982
+ };
983
+ }
984
+ function transformInlineMath(token) {
985
+ return {
986
+ type: "inlineMath",
987
+ value: token.text
988
+ };
989
+ }
990
+ function transformOptimisticReference(token, ctx) {
991
+ if (token.isImage) {
992
+ return {
993
+ type: "imageReference",
994
+ identifier: token.identifier,
995
+ label: token.label,
996
+ referenceType: token.referenceType,
997
+ alt: token.text
998
+ };
999
+ }
1000
+ const labelChildren = ctx.transformInline(new Lexer().inlineTokens(token.text));
1001
+ return {
1002
+ type: "linkReference",
1003
+ identifier: token.identifier,
1004
+ label: token.label,
1005
+ referenceType: token.referenceType,
1006
+ children: labelChildren.length ? labelChildren : [{ type: "text", value: token.text }]
1007
+ };
1008
+ }
1009
+ function transformLink(token, ctx) {
1010
+ if (token.text.startsWith("^") && token.text.length > 1) {
1011
+ const footnoteId = token.text.slice(1);
1012
+ return {
1013
+ type: "footnoteReference",
1014
+ identifier: footnoteId,
1015
+ label: footnoteId
1016
+ };
1017
+ }
1018
+ return {
1019
+ type: "link",
1020
+ url: token.href,
1021
+ title: token.title || null,
1022
+ // 对齐 micromark 输出
1023
+ children: ctx.transformInline(token.tokens)
1024
+ };
1025
+ }
1026
+ function transformImage(token) {
1027
+ return {
1028
+ type: "image",
1029
+ url: token.href,
1030
+ title: token.title || null,
1031
+ // 对齐 micromark 输出
1032
+ alt: token.text
1033
+ };
1034
+ }
1035
+ function transformText(token) {
1036
+ const results = [];
1037
+ const text = token.text;
1038
+ const footnoteRegex = /\[\^([a-zA-Z0-9_-]+)\]/g;
1039
+ let lastIndex = 0;
1040
+ let match;
1041
+ while ((match = footnoteRegex.exec(text)) !== null) {
1042
+ if (match.index > lastIndex) {
1043
+ results.push({
1044
+ type: "text",
1045
+ value: text.substring(lastIndex, match.index)
1046
+ });
1047
+ }
1048
+ results.push({
1049
+ type: "footnoteReference",
1050
+ identifier: match[1],
1051
+ label: match[1]
1052
+ });
1053
+ lastIndex = match.index + match[0].length;
1054
+ }
1055
+ if (lastIndex < text.length) {
1056
+ results.push({
1057
+ type: "text",
1058
+ value: text.substring(lastIndex)
1059
+ });
1060
+ }
1061
+ return results;
1062
+ }
1063
+ function transformStrong(token, ctx) {
1064
+ return {
1065
+ type: "strong",
1066
+ children: ctx.transformInline(token.tokens)
1067
+ };
1068
+ }
1069
+ function transformEmphasis(token, ctx) {
1070
+ return {
1071
+ type: "emphasis",
1072
+ children: ctx.transformInline(token.tokens)
1073
+ };
1074
+ }
1075
+ function transformCodespan(token) {
1076
+ return {
1077
+ type: "inlineCode",
1078
+ value: token.text
1079
+ };
1080
+ }
1081
+ function transformBreak() {
1082
+ return { type: "break" };
1083
+ }
1084
+ function transformDelete(token, ctx) {
1085
+ return {
1086
+ type: "delete",
1087
+ children: ctx.transformInline(token.tokens)
1088
+ };
1089
+ }
1090
+ function transformInlineHtml(token) {
1091
+ const parsed = parseHtmlFragment(token.text);
1092
+ if (parsed.length > 0) {
1093
+ return parsed;
1094
+ }
1095
+ return { type: "text", value: token.text };
1096
+ }
1097
+ function isTokenType(token, type) {
1098
+ return token.type === type;
1099
+ }
1100
+ var builtinBlockTransformers = {
1101
+ blockMath: (token) => {
1102
+ if (isTokenType(token, "blockMath")) return transformBlockMath(token);
1103
+ return null;
1104
+ },
1105
+ footnoteDefinitionBlock: (token, ctx) => {
1106
+ if (isTokenType(token, "footnoteDefinitionBlock"))
1107
+ return transformFootnoteDefinitionBlock(token, ctx);
1108
+ return null;
1109
+ },
1110
+ explicitDefinition: (token) => {
1111
+ if (isTokenType(token, "explicitDefinition"))
1112
+ return transformExplicitDefinition(token);
1113
+ return null;
1114
+ },
1115
+ def: (token) => {
1116
+ if (isTokenType(token, "def")) return transformDef(token);
1117
+ return null;
1118
+ },
1119
+ container: (token, ctx) => {
1120
+ if (isTokenType(token, "container")) return transformContainer(token, ctx);
1121
+ return null;
1122
+ },
1123
+ footnoteDefinition: (token, ctx) => {
1124
+ if (isTokenType(token, "footnoteDefinition"))
1125
+ return transformFootnoteDefToken(token, ctx);
1126
+ return null;
1127
+ },
1128
+ heading: (token, ctx) => {
1129
+ if (isTokenType(token, "heading")) return transformHeading(token, ctx);
1130
+ return null;
1131
+ },
1132
+ paragraph: (token, ctx) => {
1133
+ if (isTokenType(token, "paragraph")) return transformParagraph(token, ctx);
1134
+ return null;
1135
+ },
1136
+ code: (token) => {
1137
+ if (isTokenType(token, "code")) return transformCode(token);
1138
+ return null;
1139
+ },
1140
+ blockquote: (token, ctx) => {
1141
+ if (isTokenType(token, "blockquote")) return transformBlockquote(token, ctx);
1142
+ return null;
1143
+ },
1144
+ list: (token, ctx) => {
1145
+ if (isTokenType(token, "list")) return transformList(token, ctx);
1146
+ return null;
1147
+ },
1148
+ table: (token, ctx) => {
1149
+ if (isTokenType(token, "table")) return transformTable(token, ctx);
1150
+ return null;
1151
+ },
1152
+ hr: () => transformHr(),
1153
+ html: (token) => {
1154
+ if (isTokenType(token, "html")) return transformHtml(token);
1155
+ return null;
1156
+ },
1157
+ space: () => null,
1158
+ text: (token, ctx) => {
1159
+ if (isTokenType(token, "text")) return transformTextBlock(token, ctx);
1160
+ return null;
1161
+ }
1162
+ };
1163
+ var builtinInlineTransformers = {
1164
+ inlineMath: (token) => {
1165
+ if (isTokenType(token, "inlineMath")) return transformInlineMath(token);
1166
+ return null;
1167
+ },
1168
+ optimisticReference: (token, ctx) => {
1169
+ if (isTokenType(token, "optimisticReference"))
1170
+ return transformOptimisticReference(token, ctx);
1171
+ return null;
1172
+ },
1173
+ link: (token, ctx) => {
1174
+ if (isTokenType(token, "link")) return transformLink(token, ctx);
1175
+ return null;
1176
+ },
1177
+ image: (token) => {
1178
+ if (isTokenType(token, "image")) return transformImage(token);
1179
+ return null;
1180
+ },
1181
+ text: (token) => {
1182
+ if (isTokenType(token, "text")) return transformText(token);
1183
+ return null;
1184
+ },
1185
+ escape: (token) => {
1186
+ if (isTokenType(token, "escape")) return transformText(token);
1187
+ return null;
1188
+ },
1189
+ strong: (token, ctx) => {
1190
+ if (isTokenType(token, "strong")) return transformStrong(token, ctx);
1191
+ return null;
1192
+ },
1193
+ em: (token, ctx) => {
1194
+ if (isTokenType(token, "em")) return transformEmphasis(token, ctx);
1195
+ return null;
1196
+ },
1197
+ codespan: (token) => {
1198
+ if (isTokenType(token, "codespan")) return transformCodespan(token);
1199
+ return null;
1200
+ },
1201
+ br: () => transformBreak(),
1202
+ del: (token, ctx) => {
1203
+ if (isTokenType(token, "del")) return transformDelete(token, ctx);
1204
+ return null;
1205
+ },
1206
+ inlineHtml: (token) => {
1207
+ if (isTokenType(token, "inlineHtml")) return transformInlineHtml(token);
1208
+ return null;
1209
+ }
1210
+ };
1211
+ function transformBlockToken(token, ctx) {
1212
+ const tokenType = token.type;
1213
+ if (ctx.customBlockTransformers?.[tokenType]) {
1214
+ const result = ctx.customBlockTransformers[tokenType](token, ctx);
1215
+ if (result !== void 0) return result;
1216
+ }
1217
+ if (builtinBlockTransformers[tokenType]) {
1218
+ const result = builtinBlockTransformers[tokenType](token, ctx);
1219
+ if (result !== void 0) return result;
1220
+ }
1221
+ if ("text" in token && typeof token.text === "string") {
1222
+ const paragraph = {
1223
+ type: "paragraph",
1224
+ children: [{ type: "text", value: token.text }]
1225
+ };
1226
+ return paragraph;
1227
+ }
1228
+ return null;
1229
+ }
1230
+ function transformInlineToken(token, ctx) {
1231
+ const tokenType = token.type;
1232
+ if (ctx.customInlineTransformers?.[tokenType]) {
1233
+ const result = ctx.customInlineTransformers[tokenType](token, ctx);
1234
+ if (result !== void 0) return result;
1235
+ }
1236
+ if (builtinInlineTransformers[tokenType]) {
1237
+ const result = builtinInlineTransformers[tokenType](token, ctx);
1238
+ if (result !== void 0) return result;
1239
+ }
1240
+ if ("text" in token && typeof token.text === "string") {
1241
+ const text = { type: "text", value: token.text };
1242
+ return text;
1243
+ }
1244
+ return null;
1245
+ }
1246
+
1247
+ // src/parser/ast/MarkedAstBuildter.ts
1248
+ var MarkedAstBuilder = class {
1249
+ constructor(options = {}) {
1250
+ this.options = options;
1251
+ this.containerConfig = typeof options.containers === "object" ? options.containers : options.containers === true ? {} : void 0;
1252
+ this.htmlTreeOptions = typeof options.htmlTree === "object" ? options.htmlTree : options.htmlTree === true ? {} : void 0;
1253
+ if (options.plugins) {
1254
+ this.userExtensions.push(...extractMarkedExtensions(options.plugins));
1255
+ }
1256
+ if (options.markedExtensions) {
1257
+ this.userExtensions.push(...options.markedExtensions);
1258
+ }
1259
+ this.transformContext = {
1260
+ transformTokens: this.transformTokens.bind(this),
1261
+ transformTokensWithPosition: this.transformTokensWithPosition.bind(this),
1262
+ transformInline: this.transformInline.bind(this),
1263
+ parseFootnoteContent: this.parseFootnoteContent.bind(this)
1264
+ };
1265
+ }
1266
+ containerConfig;
1267
+ htmlTreeOptions;
1268
+ globalLinks = {};
1269
+ /** 用户传入的 marked 扩展 */
1270
+ userExtensions = [];
1271
+ /** 转换上下文(用于递归转换) */
1272
+ transformContext;
1273
+ parse(text) {
1274
+ const normalizedText = text.replace(/[\u00A0\u200b\u202f]/g, " ");
1275
+ const optimisticRefExt = createOptimisticReferenceExtension();
1276
+ const explicitDefExt = createExplicitDefinitionExtension();
1277
+ const footnoteDefExt = createFootnoteDefinitionExtension();
1278
+ const userBlockExts = [];
1279
+ const userBlockStartExts = [];
1280
+ const userInlineExts = [];
1281
+ const userInlineStartExts = [];
1282
+ for (const ext of this.userExtensions) {
1283
+ if (ext.level === "block") {
1284
+ if (ext.tokenizer) userBlockExts.push(ext.tokenizer);
1285
+ if (ext.start) userBlockStartExts.push(ext.start);
1286
+ } else if (ext.level === "inline") {
1287
+ if (ext.tokenizer) userInlineExts.push(ext.tokenizer);
1288
+ if (ext.start) userInlineStartExts.push(ext.start);
1289
+ }
1290
+ }
1291
+ const blockExts = [
1292
+ footnoteDefExt.tokenizer,
1293
+ explicitDefExt.tokenizer,
1294
+ ...userBlockExts
1295
+ ];
1296
+ const blockStartExts = [
1297
+ footnoteDefExt.start,
1298
+ explicitDefExt.start,
1299
+ ...userBlockStartExts
1300
+ ];
1301
+ const inlineExts = [optimisticRefExt.tokenizer, ...userInlineExts];
1302
+ const inlineStartExts = [optimisticRefExt.start, ...userInlineStartExts];
1303
+ if (this.options.math) {
1304
+ const blockMathExt = createBlockMathExtension();
1305
+ const inlineMathExt = createInlineMathExtension();
1306
+ blockExts.unshift(blockMathExt.tokenizer);
1307
+ blockStartExts.unshift(blockMathExt.start);
1308
+ inlineExts.unshift(inlineMathExt.tokenizer);
1309
+ inlineStartExts.unshift(inlineMathExt.start);
1310
+ }
1311
+ if (this.htmlTreeOptions) {
1312
+ const inlineHtmlExt = createInlineHtmlExtension();
1313
+ inlineExts.unshift(inlineHtmlExt.tokenizer);
1314
+ inlineStartExts.unshift(inlineHtmlExt.start);
1315
+ }
1316
+ const lexerOptions = {
1317
+ gfm: true,
1318
+ breaks: false,
1319
+ // 关闭软换行转 break,与 Micromark 保持一致
1320
+ ...this.options,
1321
+ extensions: {
1322
+ inline: inlineExts,
1323
+ startInline: inlineStartExts,
1324
+ block: blockExts,
1325
+ startBlock: blockStartExts
1326
+ }
1327
+ };
1328
+ const lexerInstance = new Lexer(lexerOptions);
1329
+ if (lexerInstance.tokens && lexerInstance.tokens.links) {
1330
+ Object.assign(lexerInstance.tokens.links, this.globalLinks);
1331
+ }
1332
+ let tokens = lexerInstance.lex(normalizedText);
1333
+ if (lexerInstance.tokens && lexerInstance.tokens.links) {
1334
+ Object.assign(this.globalLinks, lexerInstance.tokens.links);
1335
+ }
1336
+ tokens = this.preprocessTokens(tokens);
1337
+ let children = this.transformTokensWithPosition(tokens);
1338
+ if (this.htmlTreeOptions) {
1339
+ children = this.processHtmlNodes(children);
1340
+ }
1341
+ return {
1342
+ type: "root",
1343
+ children
1344
+ };
1345
+ }
1346
+ /**
1347
+ * 预处理 tokens
1348
+ *
1349
+ * 处理容器指令和遗留的脚注定义(从 paragraph 中提取)
1350
+ */
1351
+ preprocessTokens(tokens) {
1352
+ const result = [];
1353
+ let i = 0;
1354
+ while (i < tokens.length) {
1355
+ const token = tokens[i];
1356
+ if (token.type === "paragraph") {
1357
+ const text = token.text;
1358
+ const footnoteMatch = text.match(/^\[\^([a-zA-Z0-9_-]+)\]:\s+([\s\S]*)$/);
1359
+ if (footnoteMatch) {
1360
+ const defToken = {
1361
+ type: "footnoteDefinition",
1362
+ identifier: footnoteMatch[1],
1363
+ text: footnoteMatch[2],
1364
+ tokens: new Lexer().inlineTokens(footnoteMatch[2]),
1365
+ raw: token.raw
1366
+ };
1367
+ result.push(defToken);
1368
+ i++;
1369
+ continue;
1370
+ }
1371
+ const containerStartMatch = text.match(/^:::(\s*)([a-zA-Z0-9_-]+)(.*?)(\n|$)/);
1372
+ if (containerStartMatch) {
1373
+ const name = containerStartMatch[2];
1374
+ const attrs = containerStartMatch[3].trim();
1375
+ let rawAccumulator = "";
1376
+ let j = i;
1377
+ let depth = 0;
1378
+ let foundEnd = false;
1379
+ let contentRaw = "";
1380
+ while (j < tokens.length) {
1381
+ const currentToken = tokens[j];
1382
+ rawAccumulator += currentToken.raw;
1383
+ const lines = rawAccumulator.split("\n");
1384
+ depth = 0;
1385
+ let startLineIndex = -1;
1386
+ let endLineIndex = -1;
1387
+ for (let k = 0; k < lines.length; k++) {
1388
+ const line = lines[k];
1389
+ if (line.match(/^:::(\s*)([a-zA-Z0-9_-]+)/)) {
1390
+ if (depth === 0 && startLineIndex === -1) startLineIndex = k;
1391
+ depth++;
1392
+ } else if (line.trim() === ":::") {
1393
+ depth--;
1394
+ if (depth === 0) {
1395
+ endLineIndex = k;
1396
+ foundEnd = true;
1397
+ break;
1398
+ }
1399
+ }
1400
+ }
1401
+ if (foundEnd) {
1402
+ const contentLines = lines.slice(startLineIndex + 1, endLineIndex);
1403
+ contentRaw = contentLines.join("\n");
1404
+ const remainingLines = lines.slice(endLineIndex + 1);
1405
+ const remainingText = remainingLines.join("\n");
1406
+ const containerToken = {
1407
+ type: "container",
1408
+ name,
1409
+ attrs,
1410
+ tokens: this.preprocessTokens(lexer(contentRaw)),
1411
+ raw: rawAccumulator
1412
+ };
1413
+ result.push(containerToken);
1414
+ if (remainingText.trim()) {
1415
+ const remainingTokens = this.preprocessTokens(lexer(remainingText));
1416
+ result.push(...remainingTokens);
1417
+ }
1418
+ i = j + 1;
1419
+ break;
1420
+ }
1421
+ j++;
1422
+ }
1423
+ if (foundEnd) continue;
1424
+ }
1425
+ }
1426
+ result.push(token);
1427
+ i++;
1428
+ }
1429
+ return result;
1430
+ }
1431
+ /**
1432
+ * 转换 tokens 为 MDAST 节点(带位置信息)
1433
+ */
1434
+ transformTokensWithPosition(tokens) {
1435
+ if (!tokens) return [];
1436
+ const results = [];
1437
+ let currentOffset = 0;
1438
+ for (const token of tokens) {
1439
+ const rawLength = token.raw?.length ?? 0;
1440
+ const node = transformBlockToken(token, this.transformContext);
1441
+ if (node) {
1442
+ node.position = {
1443
+ start: { line: 0, column: 0, offset: currentOffset },
1444
+ end: { line: 0, column: 0, offset: currentOffset + rawLength }
1445
+ };
1446
+ results.push(node);
1447
+ }
1448
+ currentOffset += rawLength;
1449
+ }
1450
+ return results;
1451
+ }
1452
+ /**
1453
+ * 转换 tokens 为 MDAST 节点(不带位置信息)
1454
+ */
1455
+ transformTokens(tokens) {
1456
+ if (!tokens) return [];
1457
+ return tokens.map((t) => transformBlockToken(t, this.transformContext)).filter(Boolean);
1458
+ }
1459
+ /**
1460
+ * 转换行内 tokens
1461
+ */
1462
+ transformInline(tokens) {
1463
+ if (!tokens) return [];
1464
+ const results = [];
1465
+ for (const token of tokens) {
1466
+ const result = transformInlineToken(token, this.transformContext);
1467
+ if (result) {
1468
+ if (Array.isArray(result)) {
1469
+ results.push(...result);
1470
+ } else {
1471
+ results.push(result);
1472
+ }
1473
+ }
1474
+ }
1475
+ return results;
1476
+ }
1477
+ /**
1478
+ * 解析脚注内容为 AST 节点
1479
+ */
1480
+ parseFootnoteContent(content) {
1481
+ if (!content.trim()) {
1482
+ return [];
1483
+ }
1484
+ const normalizedContent = content.split("\n").map((line, index) => {
1485
+ if (index === 0) return line;
1486
+ if (line.startsWith(" ")) return line.slice(4);
1487
+ if (line.startsWith(" ")) return line.slice(1);
1488
+ return line;
1489
+ }).join("\n");
1490
+ const contentLexer = new Lexer({ gfm: true, breaks: true });
1491
+ const tokens = contentLexer.lex(normalizedContent);
1492
+ return this.transformTokens(tokens);
1493
+ }
1494
+ /**
1495
+ * 处理 HTML 节点
1496
+ *
1497
+ * 使用 html-extension 的 transformHtmlNodes 来处理:
1498
+ * - 合并被空行分割的 HTML 节点
1499
+ * - 将 HTML 解析为 HtmlElementNode 树结构
1500
+ */
1501
+ processHtmlNodes(nodes) {
1502
+ const tempRoot = {
1503
+ type: "root",
1504
+ children: nodes
1505
+ };
1506
+ const transformed = transformHtmlNodes(tempRoot, this.htmlTreeOptions);
1507
+ return transformed.children;
1508
+ }
1509
+ /**
1510
+ * 将 AST 节点转换为 ParsedBlock
1511
+ */
1512
+ nodesToBlocks(nodes, startOffset, rawText, status, generateBlockId) {
1513
+ const blocks = [];
1514
+ for (const node of nodes) {
1515
+ const relativeStart = node.position?.start?.offset ?? 0;
1516
+ const relativeEnd = node.position?.end?.offset ?? rawText.length;
1517
+ const nodeText = rawText.substring(relativeStart, relativeEnd);
1518
+ const absoluteStart = startOffset + relativeStart;
1519
+ const absoluteEnd = startOffset + relativeEnd;
1520
+ blocks.push({
1521
+ id: generateBlockId(),
1522
+ status,
1523
+ node,
1524
+ startOffset: absoluteStart,
1525
+ endOffset: absoluteEnd,
1526
+ rawText: nodeText
1527
+ });
1528
+ }
1529
+ return blocks;
1530
+ }
1531
+ };
1532
+ var AstBuilder = MarkedAstBuilder;
1533
+
1534
+ // src/engines/marked/index.ts
1535
+ function createMarkedBuilder(options = {}) {
1536
+ return new MarkedAstBuilder(options);
1537
+ }
1538
+
1539
+ export { AstBuilder, MarkedAstBuilder, createMarkedBuilder };
1540
+ //# sourceMappingURL=index.js.map
1541
+ //# sourceMappingURL=index.js.map