@incremark/core 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1161 @@
1
+ import { fromMarkdown } from 'mdast-util-from-markdown';
2
+ import { gfmFromMarkdown } from 'mdast-util-gfm';
3
+ import { gfm } from 'micromark-extension-gfm';
4
+ import { gfmFootnoteFromMarkdown } from 'mdast-util-gfm-footnote';
5
+ import { math } from 'micromark-extension-math';
6
+ import { mathFromMarkdown } from 'mdast-util-math';
7
+ import { directive } from 'micromark-extension-directive';
8
+ import { directiveFromMarkdown } from 'mdast-util-directive';
9
+ import { codes, constants, types } from 'micromark-util-symbol';
10
+ import { markdownLineEndingOrSpace } from 'micromark-util-character';
11
+ import { factoryDestination } from 'micromark-factory-destination';
12
+ import { factoryTitle } from 'micromark-factory-title';
13
+ import { factoryLabel } from 'micromark-factory-label';
14
+ import { factoryWhitespace } from 'micromark-factory-whitespace';
15
+ import { gfmFootnote } from 'micromark-extension-gfm-footnote';
16
+ import { normalizeIdentifier } from 'micromark-util-normalize-identifier';
17
+
18
+ // src/parser/ast/MicromarkAstBuilder.ts
19
+
20
+ // src/extensions/html-extension/index.ts
21
+ var DEFAULT_TAG_BLACKLIST = [
22
+ "script",
23
+ "style",
24
+ "iframe",
25
+ "object",
26
+ "embed",
27
+ "form",
28
+ "input",
29
+ "button",
30
+ "textarea",
31
+ "select",
32
+ "meta",
33
+ "link",
34
+ "base",
35
+ "frame",
36
+ "frameset",
37
+ "applet",
38
+ "noscript",
39
+ "template"
40
+ ];
41
+ var DEFAULT_ATTR_BLACKLIST = [
42
+ // 事件属性通过正则匹配
43
+ "formaction",
44
+ "xlink:href",
45
+ "xmlns",
46
+ "srcdoc"
47
+ ];
48
+ var DEFAULT_PROTOCOL_BLACKLIST = [
49
+ "javascript:",
50
+ "vbscript:",
51
+ "data:"
52
+ // 注意:data:image/ 会被特殊处理允许
53
+ ];
54
+ var URL_ATTRS = ["href", "src", "action", "formaction", "poster", "background"];
55
+ var VOID_ELEMENTS = ["br", "hr", "img", "input", "meta", "link", "area", "base", "col", "embed", "source", "track", "wbr"];
56
+ function detectHtmlContentType(html) {
57
+ const trimmed = html.trim();
58
+ if (!trimmed) return "unknown";
59
+ if (!trimmed.startsWith("<")) return "unknown";
60
+ const closingMatch = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
61
+ if (closingMatch) {
62
+ return "closing";
63
+ }
64
+ const singleTagMatch = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)(\s[^]*?)?(\/?)>$/);
65
+ if (singleTagMatch) {
66
+ const [fullMatch, tagName, attrsString, selfClosingSlash] = singleTagMatch;
67
+ if (attrsString) {
68
+ let inQuote = "";
69
+ let hasUnquotedBracket = false;
70
+ for (let i = 0; i < attrsString.length; i++) {
71
+ const char = attrsString[i];
72
+ if (inQuote) {
73
+ if (char === inQuote) inQuote = "";
74
+ } else {
75
+ if (char === '"' || char === "'") inQuote = char;
76
+ else if (char === "<") {
77
+ hasUnquotedBracket = true;
78
+ break;
79
+ }
80
+ }
81
+ }
82
+ if (hasUnquotedBracket) {
83
+ return "fragment";
84
+ }
85
+ }
86
+ const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
87
+ return isSelfClosing ? "self-closing" : "opening";
88
+ }
89
+ let bracketCount = 0;
90
+ for (const char of trimmed) {
91
+ if (char === "<") bracketCount++;
92
+ }
93
+ if (bracketCount > 1) {
94
+ return "fragment";
95
+ }
96
+ return "unknown";
97
+ }
98
+ function parseHtmlTag(html) {
99
+ const trimmed = html.trim();
100
+ const contentType = detectHtmlContentType(trimmed);
101
+ if (contentType !== "opening" && contentType !== "closing" && contentType !== "self-closing") {
102
+ return null;
103
+ }
104
+ if (contentType === "closing") {
105
+ const match2 = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
106
+ if (!match2) return null;
107
+ return {
108
+ tagName: match2[1].toLowerCase(),
109
+ attrs: {},
110
+ isClosing: true,
111
+ isSelfClosing: false,
112
+ rawHtml: html
113
+ };
114
+ }
115
+ const match = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)(\s[^]*?)?(\/?)>$/);
116
+ if (!match) return null;
117
+ const [, tagName, attrsString, selfClosingSlash] = match;
118
+ const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
119
+ const attrs = {};
120
+ if (attrsString) {
121
+ const attrRegex = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
122
+ let attrMatch;
123
+ while ((attrMatch = attrRegex.exec(attrsString)) !== null) {
124
+ const [, name, doubleQuoted, singleQuoted, unquoted] = attrMatch;
125
+ const value = doubleQuoted ?? singleQuoted ?? unquoted ?? "";
126
+ attrs[name.toLowerCase()] = decodeHtmlEntities(value);
127
+ }
128
+ }
129
+ return {
130
+ tagName: tagName.toLowerCase(),
131
+ attrs,
132
+ isClosing: false,
133
+ isSelfClosing,
134
+ rawHtml: html
135
+ };
136
+ }
137
+ function decodeHtmlEntities(text) {
138
+ const entities = {
139
+ "&amp;": "&",
140
+ "&lt;": "<",
141
+ "&gt;": ">",
142
+ "&quot;": '"',
143
+ "&#39;": "'",
144
+ "&apos;": "'",
145
+ "&nbsp;": " "
146
+ };
147
+ return text.replace(/&(?:#(\d+)|#x([a-fA-F0-9]+)|([a-zA-Z]+));/g, (match, dec, hex, name) => {
148
+ if (dec) return String.fromCharCode(parseInt(dec, 10));
149
+ if (hex) return String.fromCharCode(parseInt(hex, 16));
150
+ return entities[`&${name};`] || match;
151
+ });
152
+ }
153
+ function parseTagDirect(tag) {
154
+ const trimmed = tag.trim();
155
+ const closingMatch = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
156
+ if (closingMatch) {
157
+ return {
158
+ tagName: closingMatch[1].toLowerCase(),
159
+ attrs: {},
160
+ isClosing: true,
161
+ isSelfClosing: false,
162
+ rawHtml: tag
163
+ };
164
+ }
165
+ const openMatch = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)([\s\S]*?)(\/?)>$/);
166
+ if (!openMatch) return null;
167
+ const [, tagName, attrsString, selfClosingSlash] = openMatch;
168
+ const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
169
+ const attrs = {};
170
+ if (attrsString) {
171
+ const attrRegex = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
172
+ let attrMatch;
173
+ while ((attrMatch = attrRegex.exec(attrsString)) !== null) {
174
+ const [, name, doubleQuoted, singleQuoted, unquoted] = attrMatch;
175
+ const value = doubleQuoted ?? singleQuoted ?? unquoted ?? "";
176
+ attrs[name.toLowerCase()] = decodeHtmlEntities(value);
177
+ }
178
+ }
179
+ return {
180
+ tagName: tagName.toLowerCase(),
181
+ attrs,
182
+ isClosing: false,
183
+ isSelfClosing,
184
+ rawHtml: tag
185
+ };
186
+ }
187
+ function parseHtmlFragment(html, options = {}) {
188
+ const result = [];
189
+ const stack = [];
190
+ const tokenRegex = /(<\/?[a-zA-Z][^>]*>)|([^<]+)/g;
191
+ let match;
192
+ while ((match = tokenRegex.exec(html)) !== null) {
193
+ const [, tag, text] = match;
194
+ if (tag) {
195
+ const parsed = parseTagDirect(tag);
196
+ if (!parsed) continue;
197
+ if (isTagBlacklisted(parsed.tagName, options)) {
198
+ continue;
199
+ }
200
+ if (parsed.isClosing) {
201
+ let found = false;
202
+ for (let i = stack.length - 1; i >= 0; i--) {
203
+ if (stack[i].tagName === parsed.tagName) {
204
+ const node = stack.pop();
205
+ if (stack.length > 0) {
206
+ stack[stack.length - 1].children.push(node);
207
+ } else {
208
+ result.push(node);
209
+ }
210
+ found = true;
211
+ break;
212
+ }
213
+ }
214
+ if (!found) continue;
215
+ } else {
216
+ const sanitizedAttrs = sanitizeAttrs(parsed.attrs, options);
217
+ const node = {
218
+ type: "htmlElement",
219
+ tagName: parsed.tagName,
220
+ attrs: sanitizedAttrs,
221
+ children: [],
222
+ data: options.preserveRawHtml !== false ? {
223
+ rawHtml: tag,
224
+ parsed: true
225
+ } : void 0
226
+ };
227
+ if (parsed.isSelfClosing) {
228
+ if (stack.length > 0) {
229
+ stack[stack.length - 1].children.push(node);
230
+ } else {
231
+ result.push(node);
232
+ }
233
+ } else {
234
+ stack.push(node);
235
+ }
236
+ }
237
+ } else if (text && text.trim()) {
238
+ const textNode = {
239
+ type: "text",
240
+ value: text
241
+ };
242
+ if (stack.length > 0) {
243
+ stack[stack.length - 1].children.push(textNode);
244
+ }
245
+ }
246
+ }
247
+ while (stack.length > 0) {
248
+ const node = stack.pop();
249
+ if (stack.length > 0) {
250
+ stack[stack.length - 1].children.push(node);
251
+ } else {
252
+ result.push(node);
253
+ }
254
+ }
255
+ return result;
256
+ }
257
+ function isTagBlacklisted(tagName, options) {
258
+ const blacklist = options.tagBlacklist ?? DEFAULT_TAG_BLACKLIST;
259
+ return blacklist.includes(tagName.toLowerCase());
260
+ }
261
+ function isAttrBlacklisted(attrName, options) {
262
+ const name = attrName.toLowerCase();
263
+ const blacklist = options.attrBlacklist ?? DEFAULT_ATTR_BLACKLIST;
264
+ if (name.startsWith("on")) return true;
265
+ return blacklist.includes(name);
266
+ }
267
+ function isProtocolDangerous(url, options) {
268
+ const protocolBlacklist = options.protocolBlacklist ?? DEFAULT_PROTOCOL_BLACKLIST;
269
+ const normalizedUrl = url.trim().toLowerCase();
270
+ for (const protocol of protocolBlacklist) {
271
+ if (normalizedUrl.startsWith(protocol)) {
272
+ if (protocol === "data:" && normalizedUrl.startsWith("data:image/")) {
273
+ return false;
274
+ }
275
+ return true;
276
+ }
277
+ }
278
+ return false;
279
+ }
280
+ function sanitizeAttrs(attrs, options) {
281
+ const result = {};
282
+ for (const [name, value] of Object.entries(attrs)) {
283
+ if (isAttrBlacklisted(name, options)) continue;
284
+ if (URL_ATTRS.includes(name.toLowerCase())) {
285
+ if (isProtocolDangerous(value, options)) continue;
286
+ }
287
+ result[name] = value;
288
+ }
289
+ return result;
290
+ }
291
+ function isHtmlNode(node) {
292
+ return node.type === "html";
293
+ }
294
+ function hasChildren(node) {
295
+ return "children" in node && Array.isArray(node.children);
296
+ }
297
+ function mergeFragmentedHtmlNodes(nodes) {
298
+ const result = [];
299
+ let i = 0;
300
+ while (i < nodes.length) {
301
+ const node = nodes[i];
302
+ if (!isHtmlNode(node)) {
303
+ result.push(node);
304
+ i++;
305
+ continue;
306
+ }
307
+ const unclosedTags = findUnclosedTags(node.value);
308
+ if (unclosedTags.length === 0) {
309
+ result.push(node);
310
+ i++;
311
+ continue;
312
+ }
313
+ const mergedParts = [node.value];
314
+ let j = i + 1;
315
+ let currentUnclosed = [...unclosedTags];
316
+ while (j < nodes.length && currentUnclosed.length > 0) {
317
+ const nextNode = nodes[j];
318
+ if (isHtmlNode(nextNode)) {
319
+ const closingInfo = checkClosingTags(nextNode.value, currentUnclosed);
320
+ if (closingInfo.hasRelevantClosing) {
321
+ mergedParts.push(nextNode.value);
322
+ currentUnclosed = closingInfo.remainingUnclosed;
323
+ if (currentUnclosed.length === 0) {
324
+ j++;
325
+ break;
326
+ }
327
+ } else {
328
+ mergedParts.push(nextNode.value);
329
+ }
330
+ } else {
331
+ break;
332
+ }
333
+ j++;
334
+ }
335
+ if (mergedParts.length > 1) {
336
+ const mergedValue = mergedParts.join("\n");
337
+ const mergedNode = {
338
+ type: "html",
339
+ value: mergedValue
340
+ };
341
+ result.push(mergedNode);
342
+ i = j;
343
+ } else {
344
+ result.push(node);
345
+ i++;
346
+ }
347
+ }
348
+ return result;
349
+ }
350
+ function findUnclosedTags(html) {
351
+ const tagStack = [];
352
+ const tagRegex = /<\/?([a-zA-Z][a-zA-Z0-9-]*)[^>]*\/?>/g;
353
+ let match;
354
+ while ((match = tagRegex.exec(html)) !== null) {
355
+ const fullTag = match[0];
356
+ const tagName = match[1].toLowerCase();
357
+ if (VOID_ELEMENTS.includes(tagName) || fullTag.endsWith("/>")) {
358
+ continue;
359
+ }
360
+ if (fullTag.startsWith("</")) {
361
+ const lastIndex = tagStack.lastIndexOf(tagName);
362
+ if (lastIndex !== -1) {
363
+ tagStack.splice(lastIndex, 1);
364
+ }
365
+ } else {
366
+ tagStack.push(tagName);
367
+ }
368
+ }
369
+ return tagStack;
370
+ }
371
+ function checkClosingTags(html, unclosedTags) {
372
+ const remaining = [...unclosedTags];
373
+ let hasRelevant = false;
374
+ const closeTagRegex = /<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>/g;
375
+ let match;
376
+ while ((match = closeTagRegex.exec(html)) !== null) {
377
+ const tagName = match[1].toLowerCase();
378
+ const index = remaining.lastIndexOf(tagName);
379
+ if (index !== -1) {
380
+ remaining.splice(index, 1);
381
+ hasRelevant = true;
382
+ }
383
+ }
384
+ return {
385
+ hasRelevantClosing: hasRelevant,
386
+ remainingUnclosed: remaining
387
+ };
388
+ }
389
+ function processHtmlNodesInArray(nodes, options) {
390
+ const mergedNodes = mergeFragmentedHtmlNodes(nodes);
391
+ const result = [];
392
+ let i = 0;
393
+ while (i < mergedNodes.length) {
394
+ const node = mergedNodes[i];
395
+ if (isHtmlNode(node)) {
396
+ const contentType = detectHtmlContentType(node.value);
397
+ if (contentType === "fragment") {
398
+ const fragmentNodes = parseHtmlFragment(node.value, options);
399
+ if (fragmentNodes.length > 0) {
400
+ result.push(...fragmentNodes);
401
+ } else {
402
+ result.push(node);
403
+ }
404
+ i++;
405
+ } else if (contentType === "self-closing") {
406
+ const parsed = parseHtmlTag(node.value);
407
+ if (parsed && !isTagBlacklisted(parsed.tagName, options)) {
408
+ const elementNode = {
409
+ type: "htmlElement",
410
+ tagName: parsed.tagName,
411
+ attrs: sanitizeAttrs(parsed.attrs, options),
412
+ children: [],
413
+ data: options.preserveRawHtml !== false ? {
414
+ rawHtml: node.value,
415
+ parsed: true,
416
+ originalType: "html"
417
+ } : void 0
418
+ };
419
+ result.push(elementNode);
420
+ }
421
+ i++;
422
+ } else if (contentType === "closing") {
423
+ i++;
424
+ } else if (contentType === "opening") {
425
+ const parsed = parseHtmlTag(node.value);
426
+ if (!parsed || isTagBlacklisted(parsed.tagName, options)) {
427
+ i++;
428
+ continue;
429
+ }
430
+ const tagName = parsed.tagName;
431
+ const contentNodes = [];
432
+ let depth = 1;
433
+ let j = i + 1;
434
+ let foundClosing = false;
435
+ while (j < mergedNodes.length && depth > 0) {
436
+ const nextNode = mergedNodes[j];
437
+ if (isHtmlNode(nextNode)) {
438
+ const nextType = detectHtmlContentType(nextNode.value);
439
+ if (nextType === "closing") {
440
+ const nextParsed = parseHtmlTag(nextNode.value);
441
+ if (nextParsed && nextParsed.tagName === tagName) {
442
+ depth--;
443
+ if (depth === 0) {
444
+ foundClosing = true;
445
+ break;
446
+ }
447
+ }
448
+ } else if (nextType === "opening") {
449
+ const nextParsed = parseHtmlTag(nextNode.value);
450
+ if (nextParsed && nextParsed.tagName === tagName) {
451
+ depth++;
452
+ }
453
+ }
454
+ }
455
+ contentNodes.push(nextNode);
456
+ j++;
457
+ }
458
+ const elementNode = {
459
+ type: "htmlElement",
460
+ tagName: parsed.tagName,
461
+ attrs: sanitizeAttrs(parsed.attrs, options),
462
+ children: processHtmlNodesInArray(contentNodes, options),
463
+ data: options.preserveRawHtml !== false ? {
464
+ rawHtml: node.value,
465
+ parsed: true,
466
+ originalType: "html"
467
+ } : void 0
468
+ };
469
+ result.push(elementNode);
470
+ i = foundClosing ? j + 1 : j;
471
+ } else {
472
+ result.push(node);
473
+ i++;
474
+ }
475
+ } else {
476
+ if (hasChildren(node)) {
477
+ const processed = processHtmlNodesInArray(
478
+ node.children,
479
+ options
480
+ );
481
+ result.push({
482
+ ...node,
483
+ children: processed
484
+ });
485
+ } else {
486
+ result.push(node);
487
+ }
488
+ i++;
489
+ }
490
+ }
491
+ return result;
492
+ }
493
+ function transformHtmlNodes(ast, options = {}) {
494
+ return {
495
+ ...ast,
496
+ children: processHtmlNodesInArray(ast.children, options)
497
+ };
498
+ }
499
+ function micromarkReferenceExtension() {
500
+ return {
501
+ // 在 text 中使用 codes.rightSquareBracket 键覆盖 labelEnd
502
+ text: {
503
+ [codes.rightSquareBracket]: {
504
+ name: "labelEnd",
505
+ resolveAll: resolveAllLabelEnd,
506
+ resolveTo: resolveToLabelEnd,
507
+ tokenize: tokenizeLabelEnd,
508
+ // 添加 add: 'before' 确保先被尝试
509
+ add: "before"
510
+ }
511
+ }
512
+ };
513
+ }
514
+ function resolveAllLabelEnd(events) {
515
+ let index = -1;
516
+ const newEvents = [];
517
+ while (++index < events.length) {
518
+ const token = events[index][1];
519
+ newEvents.push(events[index]);
520
+ if (token.type === types.labelImage || token.type === types.labelLink || token.type === types.labelEnd) {
521
+ const offset = token.type === types.labelImage ? 4 : 2;
522
+ token.type = types.data;
523
+ index += offset;
524
+ }
525
+ }
526
+ if (events.length !== newEvents.length) {
527
+ events.length = 0;
528
+ events.push(...newEvents);
529
+ }
530
+ return events;
531
+ }
532
+ function resolveToLabelEnd(events, context) {
533
+ let index = events.length;
534
+ let offset = 0;
535
+ let token;
536
+ let open;
537
+ let close;
538
+ let media;
539
+ while (index--) {
540
+ token = events[index][1];
541
+ if (open !== void 0) {
542
+ if (token.type === types.link || token.type === types.labelLink && token._inactive) {
543
+ break;
544
+ }
545
+ if (events[index][0] === "enter" && token.type === types.labelLink) {
546
+ token._inactive = true;
547
+ }
548
+ } else if (close !== void 0) {
549
+ if (events[index][0] === "enter" && (token.type === types.labelImage || token.type === types.labelLink) && !token._balanced) {
550
+ open = index;
551
+ if (token.type !== types.labelLink) {
552
+ offset = 2;
553
+ break;
554
+ }
555
+ }
556
+ } else if (token.type === types.labelEnd) {
557
+ close = index;
558
+ }
559
+ }
560
+ if (open === void 0 || close === void 0) {
561
+ return events;
562
+ }
563
+ const group = {
564
+ type: events[open][1].type === types.labelLink ? types.link : types.image,
565
+ start: { ...events[open][1].start },
566
+ end: { ...events[events.length - 1][1].end }
567
+ };
568
+ const label = {
569
+ type: types.label,
570
+ start: { ...events[open][1].start },
571
+ end: { ...events[close][1].end }
572
+ };
573
+ const text = {
574
+ type: types.labelText,
575
+ start: { ...events[open + offset + 2][1].end },
576
+ end: { ...events[close - 2][1].start }
577
+ };
578
+ media = [
579
+ ["enter", group, context],
580
+ ["enter", label, context]
581
+ ];
582
+ media.push(...events.slice(open + 1, open + offset + 3));
583
+ media.push(["enter", text, context]);
584
+ media.push(...events.slice(open + offset + 4, close - 3));
585
+ media.push(
586
+ ["exit", text, context],
587
+ events[close - 2],
588
+ events[close - 1],
589
+ ["exit", label, context]
590
+ );
591
+ media.push(...events.slice(close + 1));
592
+ media.push(["exit", group, context]);
593
+ events.splice(open, events.length - open, ...media);
594
+ return events;
595
+ }
596
+ function tokenizeLabelEnd(effects, ok, nok) {
597
+ const self = this;
598
+ let index = self.events.length;
599
+ let labelStart;
600
+ while (index--) {
601
+ if ((self.events[index][1].type === types.labelImage || self.events[index][1].type === types.labelLink) && !self.events[index][1]._balanced) {
602
+ labelStart = self.events[index][1];
603
+ break;
604
+ }
605
+ }
606
+ return start;
607
+ function start(code) {
608
+ if (!labelStart) {
609
+ return nok(code);
610
+ }
611
+ if (labelStart._inactive) {
612
+ return labelEndNok(code);
613
+ }
614
+ if (labelStart.type === types.labelLink) {
615
+ const labelText = self.sliceSerialize({ start: labelStart.end, end: self.now() });
616
+ if (labelText.startsWith("^")) {
617
+ return nok(code);
618
+ }
619
+ }
620
+ effects.enter(types.labelEnd);
621
+ effects.enter(types.labelMarker);
622
+ effects.consume(code);
623
+ effects.exit(types.labelMarker);
624
+ effects.exit(types.labelEnd);
625
+ return after;
626
+ }
627
+ function after(code) {
628
+ if (code === codes.leftParenthesis) {
629
+ return effects.attempt(
630
+ {
631
+ tokenize: tokenizeResource,
632
+ partial: false
633
+ },
634
+ labelEndOk,
635
+ labelEndNok
636
+ // 修复:resource 解析失败时返回 nok
637
+ )(code);
638
+ }
639
+ if (code === codes.leftSquareBracket) {
640
+ return effects.attempt(
641
+ {
642
+ tokenize: tokenizeReferenceFull,
643
+ partial: false
644
+ },
645
+ labelEndOk,
646
+ referenceNotFull
647
+ // 修改:即使不是 full reference,也尝试 collapsed
648
+ )(code);
649
+ }
650
+ return labelEndOk(code);
651
+ }
652
+ function referenceNotFull(code) {
653
+ return effects.attempt(
654
+ {
655
+ tokenize: tokenizeReferenceCollapsed,
656
+ partial: false
657
+ },
658
+ labelEndOk,
659
+ labelEndOk
660
+ // 修改:即使失败也返回 ok
661
+ )(code);
662
+ }
663
+ function labelEndOk(code) {
664
+ return ok(code);
665
+ }
666
+ function labelEndNok(code) {
667
+ labelStart._balanced = true;
668
+ return nok(code);
669
+ }
670
+ }
671
+ function tokenizeResource(effects, ok, nok) {
672
+ return resourceStart;
673
+ function resourceStart(code) {
674
+ if (code !== codes.leftParenthesis) {
675
+ return nok(code);
676
+ }
677
+ effects.enter(types.resource);
678
+ effects.enter(types.resourceMarker);
679
+ effects.consume(code);
680
+ effects.exit(types.resourceMarker);
681
+ return resourceBefore;
682
+ }
683
+ function resourceBefore(code) {
684
+ return markdownLineEndingOrSpace(code) ? factoryWhitespace(effects, resourceOpen)(code) : resourceOpen(code);
685
+ }
686
+ function resourceOpen(code) {
687
+ if (code === codes.rightParenthesis) {
688
+ return resourceEnd(code);
689
+ }
690
+ return factoryDestination(
691
+ effects,
692
+ resourceDestinationAfter,
693
+ resourceDestinationMissing,
694
+ types.resourceDestination,
695
+ types.resourceDestinationLiteral,
696
+ types.resourceDestinationLiteralMarker,
697
+ types.resourceDestinationRaw,
698
+ types.resourceDestinationString,
699
+ constants.linkResourceDestinationBalanceMax
700
+ )(code);
701
+ }
702
+ function resourceDestinationAfter(code) {
703
+ return markdownLineEndingOrSpace(code) ? factoryWhitespace(effects, resourceBetween)(code) : resourceEnd(code);
704
+ }
705
+ function resourceDestinationMissing(code) {
706
+ return nok(code);
707
+ }
708
+ function resourceBetween(code) {
709
+ if (code === codes.quotationMark || code === codes.apostrophe || code === codes.leftParenthesis) {
710
+ return factoryTitle(
711
+ effects,
712
+ resourceTitleAfter,
713
+ nok,
714
+ types.resourceTitle,
715
+ types.resourceTitleMarker,
716
+ types.resourceTitleString
717
+ )(code);
718
+ }
719
+ return resourceEnd(code);
720
+ }
721
+ function resourceTitleAfter(code) {
722
+ return markdownLineEndingOrSpace(code) ? factoryWhitespace(effects, resourceEnd)(code) : resourceEnd(code);
723
+ }
724
+ function resourceEnd(code) {
725
+ if (code === codes.rightParenthesis) {
726
+ effects.enter(types.resourceMarker);
727
+ effects.consume(code);
728
+ effects.exit(types.resourceMarker);
729
+ effects.exit(types.resource);
730
+ return ok;
731
+ }
732
+ return nok(code);
733
+ }
734
+ }
735
+ function tokenizeReferenceFull(effects, ok, nok) {
736
+ const self = this;
737
+ return referenceFull;
738
+ function referenceFull(code) {
739
+ if (code !== codes.leftSquareBracket) {
740
+ return nok(code);
741
+ }
742
+ return factoryLabel.call(
743
+ self,
744
+ effects,
745
+ referenceFullAfter,
746
+ referenceFullMissing,
747
+ types.reference,
748
+ types.referenceMarker,
749
+ types.referenceString
750
+ )(code);
751
+ }
752
+ function referenceFullAfter(code) {
753
+ return ok(code);
754
+ }
755
+ function referenceFullMissing(code) {
756
+ return nok(code);
757
+ }
758
+ }
759
+ function tokenizeReferenceCollapsed(effects, ok, nok) {
760
+ return referenceCollapsedStart;
761
+ function referenceCollapsedStart(code) {
762
+ if (code !== codes.leftSquareBracket) {
763
+ return nok(code);
764
+ }
765
+ effects.enter(types.reference);
766
+ effects.enter(types.referenceMarker);
767
+ effects.consume(code);
768
+ effects.exit(types.referenceMarker);
769
+ return referenceCollapsedOpen;
770
+ }
771
+ function referenceCollapsedOpen(code) {
772
+ if (code === codes.rightSquareBracket) {
773
+ effects.enter(types.referenceMarker);
774
+ effects.consume(code);
775
+ effects.exit(types.referenceMarker);
776
+ effects.exit(types.reference);
777
+ return ok;
778
+ }
779
+ return nok(code);
780
+ }
781
+ }
782
+ function gfmFootnoteIncremental() {
783
+ const original = gfmFootnote();
784
+ return {
785
+ ...original,
786
+ text: {
787
+ ...original.text,
788
+ // 覆盖 text[91] (`[` 的处理) - 这是脚注引用解析的起点
789
+ [codes.leftSquareBracket]: {
790
+ ...original.text[codes.leftSquareBracket],
791
+ tokenize: tokenizeGfmFootnoteCallIncremental
792
+ },
793
+ // 覆盖 text[93] (`]` 的处理) - 用于处理 ![^1] 这样的情况
794
+ [codes.rightSquareBracket]: {
795
+ ...original.text[codes.rightSquareBracket],
796
+ tokenize: tokenizePotentialGfmFootnoteCallIncremental
797
+ }
798
+ }
799
+ };
800
+ }
801
+ function tokenizeGfmFootnoteCallIncremental(effects, ok, nok) {
802
+ let size = 0;
803
+ let data = false;
804
+ return start;
805
+ function start(code) {
806
+ if (code !== codes.leftSquareBracket) {
807
+ return nok(code);
808
+ }
809
+ effects.enter("gfmFootnoteCall");
810
+ effects.enter("gfmFootnoteCallLabelMarker");
811
+ effects.consume(code);
812
+ effects.exit("gfmFootnoteCallLabelMarker");
813
+ return callStart;
814
+ }
815
+ function callStart(code) {
816
+ if (code !== codes.caret) {
817
+ return nok(code);
818
+ }
819
+ effects.enter("gfmFootnoteCallMarker");
820
+ effects.consume(code);
821
+ effects.exit("gfmFootnoteCallMarker");
822
+ effects.enter("gfmFootnoteCallString");
823
+ const token = effects.enter("chunkString");
824
+ token.contentType = "string";
825
+ return callData;
826
+ }
827
+ function callData(code) {
828
+ if (
829
+ // 太长
830
+ size > constants.linkReferenceSizeMax || // 右括号但没有数据
831
+ code === codes.rightSquareBracket && !data || // EOF、换行、空格、制表符、左括号不支持
832
+ code === codes.eof || code === codes.leftSquareBracket || markdownLineEndingOrSpace(code)
833
+ ) {
834
+ return nok(code);
835
+ }
836
+ if (code === codes.rightSquareBracket) {
837
+ effects.exit("chunkString");
838
+ effects.exit("gfmFootnoteCallString");
839
+ effects.enter("gfmFootnoteCallLabelMarker");
840
+ effects.consume(code);
841
+ effects.exit("gfmFootnoteCallLabelMarker");
842
+ effects.exit("gfmFootnoteCall");
843
+ return ok;
844
+ }
845
+ if (!markdownLineEndingOrSpace(code)) {
846
+ data = true;
847
+ }
848
+ size++;
849
+ effects.consume(code);
850
+ return code === codes.backslash ? callEscape : callData;
851
+ }
852
+ function callEscape(code) {
853
+ if (code === codes.leftSquareBracket || code === codes.backslash || code === codes.rightSquareBracket) {
854
+ effects.consume(code);
855
+ size++;
856
+ return callData;
857
+ }
858
+ return callData(code);
859
+ }
860
+ }
861
+ function tokenizePotentialGfmFootnoteCallIncremental(effects, ok, nok) {
862
+ const self = this;
863
+ let index = self.events.length;
864
+ let labelStart;
865
+ while (index--) {
866
+ const token = self.events[index][1];
867
+ if (token.type === "labelImage") {
868
+ labelStart = token;
869
+ break;
870
+ }
871
+ if (token.type === "gfmFootnoteCall" || token.type === "labelLink" || token.type === "label" || token.type === "image" || token.type === "link") {
872
+ break;
873
+ }
874
+ }
875
+ return start;
876
+ function start(code) {
877
+ if (code !== codes.rightSquareBracket) {
878
+ return nok(code);
879
+ }
880
+ if (!labelStart || !labelStart._balanced) {
881
+ return nok(code);
882
+ }
883
+ const id = normalizeIdentifier(
884
+ self.sliceSerialize({
885
+ start: labelStart.end,
886
+ end: self.now()
887
+ })
888
+ );
889
+ if (id.codePointAt(0) !== codes.caret) {
890
+ return nok(code);
891
+ }
892
+ effects.enter("gfmFootnoteCallLabelMarker");
893
+ effects.consume(code);
894
+ effects.exit("gfmFootnoteCallLabelMarker");
895
+ return ok(code);
896
+ }
897
+ }
898
+
899
+ // src/parser/ast/types.ts
900
+ function extractMicromarkExtensions(plugins) {
901
+ const extensions = [];
902
+ const mdastExtensions = [];
903
+ for (const plugin of plugins) {
904
+ if ((plugin.type === "micromark" || plugin.type === "both") && plugin.micromark) {
905
+ extensions.push(...plugin.micromark.extensions);
906
+ mdastExtensions.push(...plugin.micromark.mdastExtensions);
907
+ }
908
+ }
909
+ return { extensions, mdastExtensions };
910
+ }
911
+
912
+ // src/parser/ast/MicromarkAstBuilder.ts
913
+ var INLINE_CONTAINER_TYPES = [
914
+ "paragraph",
915
+ "heading",
916
+ "tableCell",
917
+ "delete",
918
+ "emphasis",
919
+ "strong",
920
+ "link",
921
+ "linkReference"
922
+ ];
923
+ function isInlineContainer(node) {
924
+ return INLINE_CONTAINER_TYPES.includes(node.type);
925
+ }
926
+ var MicromarkAstBuilder = class {
927
+ options;
928
+ containerConfig;
929
+ htmlTreeConfig;
930
+ /** 缓存的扩展实例,避免每次 parse 都重新创建 */
931
+ cachedExtensions = [];
932
+ cachedMdastExtensions = [];
933
+ constructor(options = {}) {
934
+ this.options = options;
935
+ this.containerConfig = this.computeContainerConfig(options);
936
+ this.htmlTreeConfig = this.computeHtmlTreeConfig(options);
937
+ this.initExtensions();
938
+ }
939
+ /**
940
+ * 初始化并缓存扩展实例
941
+ */
942
+ initExtensions() {
943
+ if (this.options.gfm) {
944
+ this.cachedExtensions.push(gfm());
945
+ this.cachedMdastExtensions.push(...gfmFromMarkdown(), gfmFootnoteFromMarkdown());
946
+ }
947
+ if (this.options.math) {
948
+ this.cachedExtensions.push(math());
949
+ this.cachedMdastExtensions.push(mathFromMarkdown());
950
+ }
951
+ if (this.containerConfig !== void 0) {
952
+ this.cachedExtensions.push(directive());
953
+ this.cachedMdastExtensions.push(directiveFromMarkdown());
954
+ }
955
+ if (this.options.plugins) {
956
+ const { extensions, mdastExtensions } = extractMicromarkExtensions(this.options.plugins);
957
+ this.cachedExtensions.push(...extensions);
958
+ this.cachedMdastExtensions.push(...mdastExtensions);
959
+ }
960
+ if (this.options.extensions) {
961
+ this.cachedExtensions.push(...this.options.extensions);
962
+ }
963
+ if (this.options.mdastExtensions) {
964
+ this.cachedMdastExtensions.push(...this.options.mdastExtensions);
965
+ }
966
+ if (this.options.gfm) {
967
+ this.cachedExtensions.push(gfmFootnoteIncremental());
968
+ }
969
+ this.cachedExtensions.push(micromarkReferenceExtension());
970
+ }
971
+ /**
972
+ * 计算容器配置
973
+ */
974
+ computeContainerConfig(options) {
975
+ const containers = options.containers;
976
+ if (!containers) return void 0;
977
+ return containers === true ? {} : containers;
978
+ }
979
+ /**
980
+ * 计算 HTML 树配置
981
+ */
982
+ computeHtmlTreeConfig(options) {
983
+ const htmlTree = options.htmlTree;
984
+ if (!htmlTree) return void 0;
985
+ return htmlTree === true ? {} : htmlTree;
986
+ }
987
+ /**
988
+ * 解析文本为 AST
989
+ *
990
+ * @param text Markdown 文本
991
+ * @returns AST
992
+ */
993
+ parse(text) {
994
+ const ast = fromMarkdown(text, {
995
+ extensions: this.cachedExtensions,
996
+ mdastExtensions: this.cachedMdastExtensions
997
+ });
998
+ if (this.htmlTreeConfig) {
999
+ return transformHtmlNodes(ast, this.htmlTreeConfig);
1000
+ } else {
1001
+ return this.convertHtmlToText(ast);
1002
+ }
1003
+ }
1004
+ /**
1005
+ * 将 HTML 节点转换为纯文本(当未启用 HTML 树转换时)
1006
+ *
1007
+ * @param ast AST
1008
+ * @returns 转换后的 AST
1009
+ */
1010
+ convertHtmlToText(ast) {
1011
+ return {
1012
+ ...ast,
1013
+ children: this.processBlockChildren(ast.children)
1014
+ };
1015
+ }
1016
+ /**
1017
+ * 处理块级节点
1018
+ */
1019
+ processBlockChildren(children) {
1020
+ return children.map((node) => {
1021
+ if (node.type === "html") {
1022
+ return this.convertBlockHtmlToParagraph(node);
1023
+ }
1024
+ if ("children" in node && Array.isArray(node.children)) {
1025
+ const parent = node;
1026
+ const children2 = isInlineContainer(node) ? this.processInlineChildren(parent.children) : this.processBlockChildren(parent.children);
1027
+ return {
1028
+ ...parent,
1029
+ children: children2
1030
+ };
1031
+ }
1032
+ return node;
1033
+ });
1034
+ }
1035
+ /**
1036
+ * 处理内联节点
1037
+ */
1038
+ processInlineChildren(children) {
1039
+ return children.map((node) => {
1040
+ const n = node;
1041
+ if (n.type === "html") {
1042
+ return this.convertInlineHtmlToText(n);
1043
+ }
1044
+ if ("children" in n && Array.isArray(n.children)) {
1045
+ const parent = n;
1046
+ return {
1047
+ ...parent,
1048
+ children: this.processInlineChildren(parent.children)
1049
+ };
1050
+ }
1051
+ return n;
1052
+ });
1053
+ }
1054
+ /**
1055
+ * 将块级 HTML 节点转换为段落
1056
+ */
1057
+ convertBlockHtmlToParagraph(htmlNode) {
1058
+ const textNode = {
1059
+ type: "text",
1060
+ value: htmlNode.value
1061
+ };
1062
+ const paragraphNode = {
1063
+ type: "paragraph",
1064
+ children: [textNode],
1065
+ position: htmlNode.position
1066
+ };
1067
+ return paragraphNode;
1068
+ }
1069
+ /**
1070
+ * 将内联 HTML 节点转换为纯文本节点
1071
+ */
1072
+ convertInlineHtmlToText(htmlNode) {
1073
+ return {
1074
+ type: "text",
1075
+ value: htmlNode.value,
1076
+ position: htmlNode.position
1077
+ };
1078
+ }
1079
+ /**
1080
+ * 将 AST 节点转换为 ParsedBlock
1081
+ *
1082
+ * @param nodes AST 节点列表
1083
+ * @param startOffset 起始偏移量
1084
+ * @param rawText 原始文本
1085
+ * @param status 块状态
1086
+ * @param generateBlockId 生成块 ID 的函数
1087
+ * @returns ParsedBlock 列表
1088
+ */
1089
+ nodesToBlocks(nodes, startOffset, rawText, status, generateBlockId) {
1090
+ const blocks = [];
1091
+ for (const node of nodes) {
1092
+ const relativeStart = node.position?.start?.offset ?? 0;
1093
+ const relativeEnd = node.position?.end?.offset ?? 1;
1094
+ const nodeText = rawText.substring(relativeStart, relativeEnd);
1095
+ const absoluteStart = startOffset + relativeStart;
1096
+ const absoluteEnd = startOffset + relativeEnd;
1097
+ blocks.push({
1098
+ id: generateBlockId(),
1099
+ status,
1100
+ node,
1101
+ startOffset: absoluteStart,
1102
+ endOffset: absoluteEnd,
1103
+ rawText: nodeText
1104
+ });
1105
+ }
1106
+ return blocks;
1107
+ }
1108
+ };
1109
+
1110
+ // src/engines/micromark/index.ts
1111
+ function createMicromarkBuilder(options = {}) {
1112
+ return new MicromarkAstBuilder(options);
1113
+ }
1114
+ /**
1115
+ * @file Micromark 扩展:支持增量解析的 Reference 语法
1116
+ *
1117
+ * @description
1118
+ * 在增量解析场景中,引用式图片/链接(如 `![Alt][id]`)可能在定义(`[id]: url`)之前出现。
1119
+ * 标准 micromark 会检查 parser.defined,如果 id 未定义就解析为文本。
1120
+ *
1121
+ * 本扩展通过覆盖 labelEnd 构造,移除 parser.defined 检查,
1122
+ * 使得 reference 语法总是被解析为 reference token,
1123
+ * 由渲染层根据实际的 definitionMap 决定如何渲染。
1124
+ *
1125
+ * @module micromark-reference-extension
1126
+ *
1127
+ * @features
1128
+ * - ✅ 支持所有 resource 语法(带 title 的图片/链接)
1129
+ * - ✅ 支持所有 reference 语法(full, collapsed, shortcut)
1130
+ * - ✅ 延迟验证:解析时不检查定义是否存在
1131
+ * - ✅ 使用官方 factory 函数,保证与 CommonMark 标准一致
1132
+ *
1133
+ * @dependencies
1134
+ * - micromark-factory-destination: 解析 URL(支持尖括号、括号平衡)
1135
+ * - micromark-factory-title: 解析 title(支持三种引号,支持多行)
1136
+ * - micromark-factory-label: 解析 label(支持转义、长度限制)
1137
+ * - micromark-factory-whitespace: 解析空白符(正确生成 lineEnding/linePrefix token)
1138
+ * - micromark-util-character: 字符判断工具
1139
+ * - micromark-util-symbol: 常量(codes, types, constants)
1140
+ * - micromark-util-types: TypeScript 类型定义
1141
+ *
1142
+ * @see {@link https://github.com/micromark/micromark} - micromark 官方文档
1143
+ * @see {@link https://spec.commonmark.org/0.30/#images} - CommonMark 图片规范
1144
+ * @see {@link https://spec.commonmark.org/0.30/#links} - CommonMark 链接规范
1145
+ *
1146
+ * @example
1147
+ * ```typescript
1148
+ * import { micromarkReferenceExtension } from './micromark-reference-extension'
1149
+ * import { fromMarkdown } from 'mdast-util-from-markdown'
1150
+ *
1151
+ * const extensions = [micromarkReferenceExtension()]
1152
+ * const ast = fromMarkdown(text, { extensions })
1153
+ * ```
1154
+ *
1155
+ * @author Incremark Team
1156
+ * @license MIT
1157
+ */
1158
+
1159
+ export { MicromarkAstBuilder, createMicromarkBuilder };
1160
+ //# sourceMappingURL=index.js.map
1161
+ //# sourceMappingURL=index.js.map