@incremark/core 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.en.md +132 -23
- package/dist/MarkedAstBuildter-BsjxZko_.d.ts +72 -0
- package/dist/detector/index.d.ts +1 -1
- package/dist/engines/marked/index.d.ts +29 -0
- package/dist/engines/marked/index.js +1541 -0
- package/dist/engines/marked/index.js.map +1 -0
- package/dist/engines/micromark/index.d.ts +106 -0
- package/dist/engines/micromark/index.js +1161 -0
- package/dist/engines/micromark/index.js.map +1 -0
- package/dist/{index-CfgnWMWh.d.ts → index-mZ7yCqNH.d.ts} +1 -1
- package/dist/index.d.ts +59 -16
- package/dist/index.js +1170 -639
- package/dist/index.js.map +1 -1
- package/dist/types-C_EW5vfp.d.ts +123 -0
- package/package.json +17 -3
|
@@ -0,0 +1,1161 @@
|
|
|
1
|
+
import { fromMarkdown } from 'mdast-util-from-markdown';
|
|
2
|
+
import { gfmFromMarkdown } from 'mdast-util-gfm';
|
|
3
|
+
import { gfm } from 'micromark-extension-gfm';
|
|
4
|
+
import { gfmFootnoteFromMarkdown } from 'mdast-util-gfm-footnote';
|
|
5
|
+
import { math } from 'micromark-extension-math';
|
|
6
|
+
import { mathFromMarkdown } from 'mdast-util-math';
|
|
7
|
+
import { directive } from 'micromark-extension-directive';
|
|
8
|
+
import { directiveFromMarkdown } from 'mdast-util-directive';
|
|
9
|
+
import { codes, constants, types } from 'micromark-util-symbol';
|
|
10
|
+
import { markdownLineEndingOrSpace } from 'micromark-util-character';
|
|
11
|
+
import { factoryDestination } from 'micromark-factory-destination';
|
|
12
|
+
import { factoryTitle } from 'micromark-factory-title';
|
|
13
|
+
import { factoryLabel } from 'micromark-factory-label';
|
|
14
|
+
import { factoryWhitespace } from 'micromark-factory-whitespace';
|
|
15
|
+
import { gfmFootnote } from 'micromark-extension-gfm-footnote';
|
|
16
|
+
import { normalizeIdentifier } from 'micromark-util-normalize-identifier';
|
|
17
|
+
|
|
18
|
+
// src/parser/ast/MicromarkAstBuilder.ts
|
|
19
|
+
|
|
20
|
+
// src/extensions/html-extension/index.ts
|
|
21
|
+
var DEFAULT_TAG_BLACKLIST = [
|
|
22
|
+
"script",
|
|
23
|
+
"style",
|
|
24
|
+
"iframe",
|
|
25
|
+
"object",
|
|
26
|
+
"embed",
|
|
27
|
+
"form",
|
|
28
|
+
"input",
|
|
29
|
+
"button",
|
|
30
|
+
"textarea",
|
|
31
|
+
"select",
|
|
32
|
+
"meta",
|
|
33
|
+
"link",
|
|
34
|
+
"base",
|
|
35
|
+
"frame",
|
|
36
|
+
"frameset",
|
|
37
|
+
"applet",
|
|
38
|
+
"noscript",
|
|
39
|
+
"template"
|
|
40
|
+
];
|
|
41
|
+
var DEFAULT_ATTR_BLACKLIST = [
|
|
42
|
+
// 事件属性通过正则匹配
|
|
43
|
+
"formaction",
|
|
44
|
+
"xlink:href",
|
|
45
|
+
"xmlns",
|
|
46
|
+
"srcdoc"
|
|
47
|
+
];
|
|
48
|
+
var DEFAULT_PROTOCOL_BLACKLIST = [
|
|
49
|
+
"javascript:",
|
|
50
|
+
"vbscript:",
|
|
51
|
+
"data:"
|
|
52
|
+
// 注意:data:image/ 会被特殊处理允许
|
|
53
|
+
];
|
|
54
|
+
var URL_ATTRS = ["href", "src", "action", "formaction", "poster", "background"];
|
|
55
|
+
var VOID_ELEMENTS = ["br", "hr", "img", "input", "meta", "link", "area", "base", "col", "embed", "source", "track", "wbr"];
|
|
56
|
+
function detectHtmlContentType(html) {
|
|
57
|
+
const trimmed = html.trim();
|
|
58
|
+
if (!trimmed) return "unknown";
|
|
59
|
+
if (!trimmed.startsWith("<")) return "unknown";
|
|
60
|
+
const closingMatch = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
|
|
61
|
+
if (closingMatch) {
|
|
62
|
+
return "closing";
|
|
63
|
+
}
|
|
64
|
+
const singleTagMatch = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)(\s[^]*?)?(\/?)>$/);
|
|
65
|
+
if (singleTagMatch) {
|
|
66
|
+
const [fullMatch, tagName, attrsString, selfClosingSlash] = singleTagMatch;
|
|
67
|
+
if (attrsString) {
|
|
68
|
+
let inQuote = "";
|
|
69
|
+
let hasUnquotedBracket = false;
|
|
70
|
+
for (let i = 0; i < attrsString.length; i++) {
|
|
71
|
+
const char = attrsString[i];
|
|
72
|
+
if (inQuote) {
|
|
73
|
+
if (char === inQuote) inQuote = "";
|
|
74
|
+
} else {
|
|
75
|
+
if (char === '"' || char === "'") inQuote = char;
|
|
76
|
+
else if (char === "<") {
|
|
77
|
+
hasUnquotedBracket = true;
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (hasUnquotedBracket) {
|
|
83
|
+
return "fragment";
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
|
|
87
|
+
return isSelfClosing ? "self-closing" : "opening";
|
|
88
|
+
}
|
|
89
|
+
let bracketCount = 0;
|
|
90
|
+
for (const char of trimmed) {
|
|
91
|
+
if (char === "<") bracketCount++;
|
|
92
|
+
}
|
|
93
|
+
if (bracketCount > 1) {
|
|
94
|
+
return "fragment";
|
|
95
|
+
}
|
|
96
|
+
return "unknown";
|
|
97
|
+
}
|
|
98
|
+
function parseHtmlTag(html) {
|
|
99
|
+
const trimmed = html.trim();
|
|
100
|
+
const contentType = detectHtmlContentType(trimmed);
|
|
101
|
+
if (contentType !== "opening" && contentType !== "closing" && contentType !== "self-closing") {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
if (contentType === "closing") {
|
|
105
|
+
const match2 = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
|
|
106
|
+
if (!match2) return null;
|
|
107
|
+
return {
|
|
108
|
+
tagName: match2[1].toLowerCase(),
|
|
109
|
+
attrs: {},
|
|
110
|
+
isClosing: true,
|
|
111
|
+
isSelfClosing: false,
|
|
112
|
+
rawHtml: html
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
const match = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)(\s[^]*?)?(\/?)>$/);
|
|
116
|
+
if (!match) return null;
|
|
117
|
+
const [, tagName, attrsString, selfClosingSlash] = match;
|
|
118
|
+
const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
|
|
119
|
+
const attrs = {};
|
|
120
|
+
if (attrsString) {
|
|
121
|
+
const attrRegex = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
|
|
122
|
+
let attrMatch;
|
|
123
|
+
while ((attrMatch = attrRegex.exec(attrsString)) !== null) {
|
|
124
|
+
const [, name, doubleQuoted, singleQuoted, unquoted] = attrMatch;
|
|
125
|
+
const value = doubleQuoted ?? singleQuoted ?? unquoted ?? "";
|
|
126
|
+
attrs[name.toLowerCase()] = decodeHtmlEntities(value);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return {
|
|
130
|
+
tagName: tagName.toLowerCase(),
|
|
131
|
+
attrs,
|
|
132
|
+
isClosing: false,
|
|
133
|
+
isSelfClosing,
|
|
134
|
+
rawHtml: html
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
function decodeHtmlEntities(text) {
|
|
138
|
+
const entities = {
|
|
139
|
+
"&": "&",
|
|
140
|
+
"<": "<",
|
|
141
|
+
">": ">",
|
|
142
|
+
""": '"',
|
|
143
|
+
"'": "'",
|
|
144
|
+
"'": "'",
|
|
145
|
+
" ": " "
|
|
146
|
+
};
|
|
147
|
+
return text.replace(/&(?:#(\d+)|#x([a-fA-F0-9]+)|([a-zA-Z]+));/g, (match, dec, hex, name) => {
|
|
148
|
+
if (dec) return String.fromCharCode(parseInt(dec, 10));
|
|
149
|
+
if (hex) return String.fromCharCode(parseInt(hex, 16));
|
|
150
|
+
return entities[`&${name};`] || match;
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
function parseTagDirect(tag) {
|
|
154
|
+
const trimmed = tag.trim();
|
|
155
|
+
const closingMatch = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
|
|
156
|
+
if (closingMatch) {
|
|
157
|
+
return {
|
|
158
|
+
tagName: closingMatch[1].toLowerCase(),
|
|
159
|
+
attrs: {},
|
|
160
|
+
isClosing: true,
|
|
161
|
+
isSelfClosing: false,
|
|
162
|
+
rawHtml: tag
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
const openMatch = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)([\s\S]*?)(\/?)>$/);
|
|
166
|
+
if (!openMatch) return null;
|
|
167
|
+
const [, tagName, attrsString, selfClosingSlash] = openMatch;
|
|
168
|
+
const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
|
|
169
|
+
const attrs = {};
|
|
170
|
+
if (attrsString) {
|
|
171
|
+
const attrRegex = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
|
|
172
|
+
let attrMatch;
|
|
173
|
+
while ((attrMatch = attrRegex.exec(attrsString)) !== null) {
|
|
174
|
+
const [, name, doubleQuoted, singleQuoted, unquoted] = attrMatch;
|
|
175
|
+
const value = doubleQuoted ?? singleQuoted ?? unquoted ?? "";
|
|
176
|
+
attrs[name.toLowerCase()] = decodeHtmlEntities(value);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return {
|
|
180
|
+
tagName: tagName.toLowerCase(),
|
|
181
|
+
attrs,
|
|
182
|
+
isClosing: false,
|
|
183
|
+
isSelfClosing,
|
|
184
|
+
rawHtml: tag
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
function parseHtmlFragment(html, options = {}) {
|
|
188
|
+
const result = [];
|
|
189
|
+
const stack = [];
|
|
190
|
+
const tokenRegex = /(<\/?[a-zA-Z][^>]*>)|([^<]+)/g;
|
|
191
|
+
let match;
|
|
192
|
+
while ((match = tokenRegex.exec(html)) !== null) {
|
|
193
|
+
const [, tag, text] = match;
|
|
194
|
+
if (tag) {
|
|
195
|
+
const parsed = parseTagDirect(tag);
|
|
196
|
+
if (!parsed) continue;
|
|
197
|
+
if (isTagBlacklisted(parsed.tagName, options)) {
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
if (parsed.isClosing) {
|
|
201
|
+
let found = false;
|
|
202
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
203
|
+
if (stack[i].tagName === parsed.tagName) {
|
|
204
|
+
const node = stack.pop();
|
|
205
|
+
if (stack.length > 0) {
|
|
206
|
+
stack[stack.length - 1].children.push(node);
|
|
207
|
+
} else {
|
|
208
|
+
result.push(node);
|
|
209
|
+
}
|
|
210
|
+
found = true;
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
if (!found) continue;
|
|
215
|
+
} else {
|
|
216
|
+
const sanitizedAttrs = sanitizeAttrs(parsed.attrs, options);
|
|
217
|
+
const node = {
|
|
218
|
+
type: "htmlElement",
|
|
219
|
+
tagName: parsed.tagName,
|
|
220
|
+
attrs: sanitizedAttrs,
|
|
221
|
+
children: [],
|
|
222
|
+
data: options.preserveRawHtml !== false ? {
|
|
223
|
+
rawHtml: tag,
|
|
224
|
+
parsed: true
|
|
225
|
+
} : void 0
|
|
226
|
+
};
|
|
227
|
+
if (parsed.isSelfClosing) {
|
|
228
|
+
if (stack.length > 0) {
|
|
229
|
+
stack[stack.length - 1].children.push(node);
|
|
230
|
+
} else {
|
|
231
|
+
result.push(node);
|
|
232
|
+
}
|
|
233
|
+
} else {
|
|
234
|
+
stack.push(node);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
} else if (text && text.trim()) {
|
|
238
|
+
const textNode = {
|
|
239
|
+
type: "text",
|
|
240
|
+
value: text
|
|
241
|
+
};
|
|
242
|
+
if (stack.length > 0) {
|
|
243
|
+
stack[stack.length - 1].children.push(textNode);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
while (stack.length > 0) {
|
|
248
|
+
const node = stack.pop();
|
|
249
|
+
if (stack.length > 0) {
|
|
250
|
+
stack[stack.length - 1].children.push(node);
|
|
251
|
+
} else {
|
|
252
|
+
result.push(node);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
return result;
|
|
256
|
+
}
|
|
257
|
+
function isTagBlacklisted(tagName, options) {
|
|
258
|
+
const blacklist = options.tagBlacklist ?? DEFAULT_TAG_BLACKLIST;
|
|
259
|
+
return blacklist.includes(tagName.toLowerCase());
|
|
260
|
+
}
|
|
261
|
+
function isAttrBlacklisted(attrName, options) {
|
|
262
|
+
const name = attrName.toLowerCase();
|
|
263
|
+
const blacklist = options.attrBlacklist ?? DEFAULT_ATTR_BLACKLIST;
|
|
264
|
+
if (name.startsWith("on")) return true;
|
|
265
|
+
return blacklist.includes(name);
|
|
266
|
+
}
|
|
267
|
+
function isProtocolDangerous(url, options) {
|
|
268
|
+
const protocolBlacklist = options.protocolBlacklist ?? DEFAULT_PROTOCOL_BLACKLIST;
|
|
269
|
+
const normalizedUrl = url.trim().toLowerCase();
|
|
270
|
+
for (const protocol of protocolBlacklist) {
|
|
271
|
+
if (normalizedUrl.startsWith(protocol)) {
|
|
272
|
+
if (protocol === "data:" && normalizedUrl.startsWith("data:image/")) {
|
|
273
|
+
return false;
|
|
274
|
+
}
|
|
275
|
+
return true;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
return false;
|
|
279
|
+
}
|
|
280
|
+
function sanitizeAttrs(attrs, options) {
|
|
281
|
+
const result = {};
|
|
282
|
+
for (const [name, value] of Object.entries(attrs)) {
|
|
283
|
+
if (isAttrBlacklisted(name, options)) continue;
|
|
284
|
+
if (URL_ATTRS.includes(name.toLowerCase())) {
|
|
285
|
+
if (isProtocolDangerous(value, options)) continue;
|
|
286
|
+
}
|
|
287
|
+
result[name] = value;
|
|
288
|
+
}
|
|
289
|
+
return result;
|
|
290
|
+
}
|
|
291
|
+
function isHtmlNode(node) {
|
|
292
|
+
return node.type === "html";
|
|
293
|
+
}
|
|
294
|
+
function hasChildren(node) {
|
|
295
|
+
return "children" in node && Array.isArray(node.children);
|
|
296
|
+
}
|
|
297
|
+
function mergeFragmentedHtmlNodes(nodes) {
|
|
298
|
+
const result = [];
|
|
299
|
+
let i = 0;
|
|
300
|
+
while (i < nodes.length) {
|
|
301
|
+
const node = nodes[i];
|
|
302
|
+
if (!isHtmlNode(node)) {
|
|
303
|
+
result.push(node);
|
|
304
|
+
i++;
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
const unclosedTags = findUnclosedTags(node.value);
|
|
308
|
+
if (unclosedTags.length === 0) {
|
|
309
|
+
result.push(node);
|
|
310
|
+
i++;
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
const mergedParts = [node.value];
|
|
314
|
+
let j = i + 1;
|
|
315
|
+
let currentUnclosed = [...unclosedTags];
|
|
316
|
+
while (j < nodes.length && currentUnclosed.length > 0) {
|
|
317
|
+
const nextNode = nodes[j];
|
|
318
|
+
if (isHtmlNode(nextNode)) {
|
|
319
|
+
const closingInfo = checkClosingTags(nextNode.value, currentUnclosed);
|
|
320
|
+
if (closingInfo.hasRelevantClosing) {
|
|
321
|
+
mergedParts.push(nextNode.value);
|
|
322
|
+
currentUnclosed = closingInfo.remainingUnclosed;
|
|
323
|
+
if (currentUnclosed.length === 0) {
|
|
324
|
+
j++;
|
|
325
|
+
break;
|
|
326
|
+
}
|
|
327
|
+
} else {
|
|
328
|
+
mergedParts.push(nextNode.value);
|
|
329
|
+
}
|
|
330
|
+
} else {
|
|
331
|
+
break;
|
|
332
|
+
}
|
|
333
|
+
j++;
|
|
334
|
+
}
|
|
335
|
+
if (mergedParts.length > 1) {
|
|
336
|
+
const mergedValue = mergedParts.join("\n");
|
|
337
|
+
const mergedNode = {
|
|
338
|
+
type: "html",
|
|
339
|
+
value: mergedValue
|
|
340
|
+
};
|
|
341
|
+
result.push(mergedNode);
|
|
342
|
+
i = j;
|
|
343
|
+
} else {
|
|
344
|
+
result.push(node);
|
|
345
|
+
i++;
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
return result;
|
|
349
|
+
}
|
|
350
|
+
function findUnclosedTags(html) {
|
|
351
|
+
const tagStack = [];
|
|
352
|
+
const tagRegex = /<\/?([a-zA-Z][a-zA-Z0-9-]*)[^>]*\/?>/g;
|
|
353
|
+
let match;
|
|
354
|
+
while ((match = tagRegex.exec(html)) !== null) {
|
|
355
|
+
const fullTag = match[0];
|
|
356
|
+
const tagName = match[1].toLowerCase();
|
|
357
|
+
if (VOID_ELEMENTS.includes(tagName) || fullTag.endsWith("/>")) {
|
|
358
|
+
continue;
|
|
359
|
+
}
|
|
360
|
+
if (fullTag.startsWith("</")) {
|
|
361
|
+
const lastIndex = tagStack.lastIndexOf(tagName);
|
|
362
|
+
if (lastIndex !== -1) {
|
|
363
|
+
tagStack.splice(lastIndex, 1);
|
|
364
|
+
}
|
|
365
|
+
} else {
|
|
366
|
+
tagStack.push(tagName);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return tagStack;
|
|
370
|
+
}
|
|
371
|
+
function checkClosingTags(html, unclosedTags) {
|
|
372
|
+
const remaining = [...unclosedTags];
|
|
373
|
+
let hasRelevant = false;
|
|
374
|
+
const closeTagRegex = /<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>/g;
|
|
375
|
+
let match;
|
|
376
|
+
while ((match = closeTagRegex.exec(html)) !== null) {
|
|
377
|
+
const tagName = match[1].toLowerCase();
|
|
378
|
+
const index = remaining.lastIndexOf(tagName);
|
|
379
|
+
if (index !== -1) {
|
|
380
|
+
remaining.splice(index, 1);
|
|
381
|
+
hasRelevant = true;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
return {
|
|
385
|
+
hasRelevantClosing: hasRelevant,
|
|
386
|
+
remainingUnclosed: remaining
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
function processHtmlNodesInArray(nodes, options) {
|
|
390
|
+
const mergedNodes = mergeFragmentedHtmlNodes(nodes);
|
|
391
|
+
const result = [];
|
|
392
|
+
let i = 0;
|
|
393
|
+
while (i < mergedNodes.length) {
|
|
394
|
+
const node = mergedNodes[i];
|
|
395
|
+
if (isHtmlNode(node)) {
|
|
396
|
+
const contentType = detectHtmlContentType(node.value);
|
|
397
|
+
if (contentType === "fragment") {
|
|
398
|
+
const fragmentNodes = parseHtmlFragment(node.value, options);
|
|
399
|
+
if (fragmentNodes.length > 0) {
|
|
400
|
+
result.push(...fragmentNodes);
|
|
401
|
+
} else {
|
|
402
|
+
result.push(node);
|
|
403
|
+
}
|
|
404
|
+
i++;
|
|
405
|
+
} else if (contentType === "self-closing") {
|
|
406
|
+
const parsed = parseHtmlTag(node.value);
|
|
407
|
+
if (parsed && !isTagBlacklisted(parsed.tagName, options)) {
|
|
408
|
+
const elementNode = {
|
|
409
|
+
type: "htmlElement",
|
|
410
|
+
tagName: parsed.tagName,
|
|
411
|
+
attrs: sanitizeAttrs(parsed.attrs, options),
|
|
412
|
+
children: [],
|
|
413
|
+
data: options.preserveRawHtml !== false ? {
|
|
414
|
+
rawHtml: node.value,
|
|
415
|
+
parsed: true,
|
|
416
|
+
originalType: "html"
|
|
417
|
+
} : void 0
|
|
418
|
+
};
|
|
419
|
+
result.push(elementNode);
|
|
420
|
+
}
|
|
421
|
+
i++;
|
|
422
|
+
} else if (contentType === "closing") {
|
|
423
|
+
i++;
|
|
424
|
+
} else if (contentType === "opening") {
|
|
425
|
+
const parsed = parseHtmlTag(node.value);
|
|
426
|
+
if (!parsed || isTagBlacklisted(parsed.tagName, options)) {
|
|
427
|
+
i++;
|
|
428
|
+
continue;
|
|
429
|
+
}
|
|
430
|
+
const tagName = parsed.tagName;
|
|
431
|
+
const contentNodes = [];
|
|
432
|
+
let depth = 1;
|
|
433
|
+
let j = i + 1;
|
|
434
|
+
let foundClosing = false;
|
|
435
|
+
while (j < mergedNodes.length && depth > 0) {
|
|
436
|
+
const nextNode = mergedNodes[j];
|
|
437
|
+
if (isHtmlNode(nextNode)) {
|
|
438
|
+
const nextType = detectHtmlContentType(nextNode.value);
|
|
439
|
+
if (nextType === "closing") {
|
|
440
|
+
const nextParsed = parseHtmlTag(nextNode.value);
|
|
441
|
+
if (nextParsed && nextParsed.tagName === tagName) {
|
|
442
|
+
depth--;
|
|
443
|
+
if (depth === 0) {
|
|
444
|
+
foundClosing = true;
|
|
445
|
+
break;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
} else if (nextType === "opening") {
|
|
449
|
+
const nextParsed = parseHtmlTag(nextNode.value);
|
|
450
|
+
if (nextParsed && nextParsed.tagName === tagName) {
|
|
451
|
+
depth++;
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
contentNodes.push(nextNode);
|
|
456
|
+
j++;
|
|
457
|
+
}
|
|
458
|
+
const elementNode = {
|
|
459
|
+
type: "htmlElement",
|
|
460
|
+
tagName: parsed.tagName,
|
|
461
|
+
attrs: sanitizeAttrs(parsed.attrs, options),
|
|
462
|
+
children: processHtmlNodesInArray(contentNodes, options),
|
|
463
|
+
data: options.preserveRawHtml !== false ? {
|
|
464
|
+
rawHtml: node.value,
|
|
465
|
+
parsed: true,
|
|
466
|
+
originalType: "html"
|
|
467
|
+
} : void 0
|
|
468
|
+
};
|
|
469
|
+
result.push(elementNode);
|
|
470
|
+
i = foundClosing ? j + 1 : j;
|
|
471
|
+
} else {
|
|
472
|
+
result.push(node);
|
|
473
|
+
i++;
|
|
474
|
+
}
|
|
475
|
+
} else {
|
|
476
|
+
if (hasChildren(node)) {
|
|
477
|
+
const processed = processHtmlNodesInArray(
|
|
478
|
+
node.children,
|
|
479
|
+
options
|
|
480
|
+
);
|
|
481
|
+
result.push({
|
|
482
|
+
...node,
|
|
483
|
+
children: processed
|
|
484
|
+
});
|
|
485
|
+
} else {
|
|
486
|
+
result.push(node);
|
|
487
|
+
}
|
|
488
|
+
i++;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
return result;
|
|
492
|
+
}
|
|
493
|
+
function transformHtmlNodes(ast, options = {}) {
|
|
494
|
+
return {
|
|
495
|
+
...ast,
|
|
496
|
+
children: processHtmlNodesInArray(ast.children, options)
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
function micromarkReferenceExtension() {
|
|
500
|
+
return {
|
|
501
|
+
// 在 text 中使用 codes.rightSquareBracket 键覆盖 labelEnd
|
|
502
|
+
text: {
|
|
503
|
+
[codes.rightSquareBracket]: {
|
|
504
|
+
name: "labelEnd",
|
|
505
|
+
resolveAll: resolveAllLabelEnd,
|
|
506
|
+
resolveTo: resolveToLabelEnd,
|
|
507
|
+
tokenize: tokenizeLabelEnd,
|
|
508
|
+
// 添加 add: 'before' 确保先被尝试
|
|
509
|
+
add: "before"
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
}
|
|
514
|
+
function resolveAllLabelEnd(events) {
|
|
515
|
+
let index = -1;
|
|
516
|
+
const newEvents = [];
|
|
517
|
+
while (++index < events.length) {
|
|
518
|
+
const token = events[index][1];
|
|
519
|
+
newEvents.push(events[index]);
|
|
520
|
+
if (token.type === types.labelImage || token.type === types.labelLink || token.type === types.labelEnd) {
|
|
521
|
+
const offset = token.type === types.labelImage ? 4 : 2;
|
|
522
|
+
token.type = types.data;
|
|
523
|
+
index += offset;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
if (events.length !== newEvents.length) {
|
|
527
|
+
events.length = 0;
|
|
528
|
+
events.push(...newEvents);
|
|
529
|
+
}
|
|
530
|
+
return events;
|
|
531
|
+
}
|
|
532
|
+
function resolveToLabelEnd(events, context) {
|
|
533
|
+
let index = events.length;
|
|
534
|
+
let offset = 0;
|
|
535
|
+
let token;
|
|
536
|
+
let open;
|
|
537
|
+
let close;
|
|
538
|
+
let media;
|
|
539
|
+
while (index--) {
|
|
540
|
+
token = events[index][1];
|
|
541
|
+
if (open !== void 0) {
|
|
542
|
+
if (token.type === types.link || token.type === types.labelLink && token._inactive) {
|
|
543
|
+
break;
|
|
544
|
+
}
|
|
545
|
+
if (events[index][0] === "enter" && token.type === types.labelLink) {
|
|
546
|
+
token._inactive = true;
|
|
547
|
+
}
|
|
548
|
+
} else if (close !== void 0) {
|
|
549
|
+
if (events[index][0] === "enter" && (token.type === types.labelImage || token.type === types.labelLink) && !token._balanced) {
|
|
550
|
+
open = index;
|
|
551
|
+
if (token.type !== types.labelLink) {
|
|
552
|
+
offset = 2;
|
|
553
|
+
break;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
} else if (token.type === types.labelEnd) {
|
|
557
|
+
close = index;
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
if (open === void 0 || close === void 0) {
|
|
561
|
+
return events;
|
|
562
|
+
}
|
|
563
|
+
const group = {
|
|
564
|
+
type: events[open][1].type === types.labelLink ? types.link : types.image,
|
|
565
|
+
start: { ...events[open][1].start },
|
|
566
|
+
end: { ...events[events.length - 1][1].end }
|
|
567
|
+
};
|
|
568
|
+
const label = {
|
|
569
|
+
type: types.label,
|
|
570
|
+
start: { ...events[open][1].start },
|
|
571
|
+
end: { ...events[close][1].end }
|
|
572
|
+
};
|
|
573
|
+
const text = {
|
|
574
|
+
type: types.labelText,
|
|
575
|
+
start: { ...events[open + offset + 2][1].end },
|
|
576
|
+
end: { ...events[close - 2][1].start }
|
|
577
|
+
};
|
|
578
|
+
media = [
|
|
579
|
+
["enter", group, context],
|
|
580
|
+
["enter", label, context]
|
|
581
|
+
];
|
|
582
|
+
media.push(...events.slice(open + 1, open + offset + 3));
|
|
583
|
+
media.push(["enter", text, context]);
|
|
584
|
+
media.push(...events.slice(open + offset + 4, close - 3));
|
|
585
|
+
media.push(
|
|
586
|
+
["exit", text, context],
|
|
587
|
+
events[close - 2],
|
|
588
|
+
events[close - 1],
|
|
589
|
+
["exit", label, context]
|
|
590
|
+
);
|
|
591
|
+
media.push(...events.slice(close + 1));
|
|
592
|
+
media.push(["exit", group, context]);
|
|
593
|
+
events.splice(open, events.length - open, ...media);
|
|
594
|
+
return events;
|
|
595
|
+
}
|
|
596
|
+
function tokenizeLabelEnd(effects, ok, nok) {
|
|
597
|
+
const self = this;
|
|
598
|
+
let index = self.events.length;
|
|
599
|
+
let labelStart;
|
|
600
|
+
while (index--) {
|
|
601
|
+
if ((self.events[index][1].type === types.labelImage || self.events[index][1].type === types.labelLink) && !self.events[index][1]._balanced) {
|
|
602
|
+
labelStart = self.events[index][1];
|
|
603
|
+
break;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
return start;
|
|
607
|
+
function start(code) {
|
|
608
|
+
if (!labelStart) {
|
|
609
|
+
return nok(code);
|
|
610
|
+
}
|
|
611
|
+
if (labelStart._inactive) {
|
|
612
|
+
return labelEndNok(code);
|
|
613
|
+
}
|
|
614
|
+
if (labelStart.type === types.labelLink) {
|
|
615
|
+
const labelText = self.sliceSerialize({ start: labelStart.end, end: self.now() });
|
|
616
|
+
if (labelText.startsWith("^")) {
|
|
617
|
+
return nok(code);
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
effects.enter(types.labelEnd);
|
|
621
|
+
effects.enter(types.labelMarker);
|
|
622
|
+
effects.consume(code);
|
|
623
|
+
effects.exit(types.labelMarker);
|
|
624
|
+
effects.exit(types.labelEnd);
|
|
625
|
+
return after;
|
|
626
|
+
}
|
|
627
|
+
function after(code) {
|
|
628
|
+
if (code === codes.leftParenthesis) {
|
|
629
|
+
return effects.attempt(
|
|
630
|
+
{
|
|
631
|
+
tokenize: tokenizeResource,
|
|
632
|
+
partial: false
|
|
633
|
+
},
|
|
634
|
+
labelEndOk,
|
|
635
|
+
labelEndNok
|
|
636
|
+
// 修复:resource 解析失败时返回 nok
|
|
637
|
+
)(code);
|
|
638
|
+
}
|
|
639
|
+
if (code === codes.leftSquareBracket) {
|
|
640
|
+
return effects.attempt(
|
|
641
|
+
{
|
|
642
|
+
tokenize: tokenizeReferenceFull,
|
|
643
|
+
partial: false
|
|
644
|
+
},
|
|
645
|
+
labelEndOk,
|
|
646
|
+
referenceNotFull
|
|
647
|
+
// 修改:即使不是 full reference,也尝试 collapsed
|
|
648
|
+
)(code);
|
|
649
|
+
}
|
|
650
|
+
return labelEndOk(code);
|
|
651
|
+
}
|
|
652
|
+
function referenceNotFull(code) {
|
|
653
|
+
return effects.attempt(
|
|
654
|
+
{
|
|
655
|
+
tokenize: tokenizeReferenceCollapsed,
|
|
656
|
+
partial: false
|
|
657
|
+
},
|
|
658
|
+
labelEndOk,
|
|
659
|
+
labelEndOk
|
|
660
|
+
// 修改:即使失败也返回 ok
|
|
661
|
+
)(code);
|
|
662
|
+
}
|
|
663
|
+
function labelEndOk(code) {
|
|
664
|
+
return ok(code);
|
|
665
|
+
}
|
|
666
|
+
function labelEndNok(code) {
|
|
667
|
+
labelStart._balanced = true;
|
|
668
|
+
return nok(code);
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
function tokenizeResource(effects, ok, nok) {
|
|
672
|
+
return resourceStart;
|
|
673
|
+
function resourceStart(code) {
|
|
674
|
+
if (code !== codes.leftParenthesis) {
|
|
675
|
+
return nok(code);
|
|
676
|
+
}
|
|
677
|
+
effects.enter(types.resource);
|
|
678
|
+
effects.enter(types.resourceMarker);
|
|
679
|
+
effects.consume(code);
|
|
680
|
+
effects.exit(types.resourceMarker);
|
|
681
|
+
return resourceBefore;
|
|
682
|
+
}
|
|
683
|
+
function resourceBefore(code) {
|
|
684
|
+
return markdownLineEndingOrSpace(code) ? factoryWhitespace(effects, resourceOpen)(code) : resourceOpen(code);
|
|
685
|
+
}
|
|
686
|
+
function resourceOpen(code) {
|
|
687
|
+
if (code === codes.rightParenthesis) {
|
|
688
|
+
return resourceEnd(code);
|
|
689
|
+
}
|
|
690
|
+
return factoryDestination(
|
|
691
|
+
effects,
|
|
692
|
+
resourceDestinationAfter,
|
|
693
|
+
resourceDestinationMissing,
|
|
694
|
+
types.resourceDestination,
|
|
695
|
+
types.resourceDestinationLiteral,
|
|
696
|
+
types.resourceDestinationLiteralMarker,
|
|
697
|
+
types.resourceDestinationRaw,
|
|
698
|
+
types.resourceDestinationString,
|
|
699
|
+
constants.linkResourceDestinationBalanceMax
|
|
700
|
+
)(code);
|
|
701
|
+
}
|
|
702
|
+
function resourceDestinationAfter(code) {
|
|
703
|
+
return markdownLineEndingOrSpace(code) ? factoryWhitespace(effects, resourceBetween)(code) : resourceEnd(code);
|
|
704
|
+
}
|
|
705
|
+
function resourceDestinationMissing(code) {
|
|
706
|
+
return nok(code);
|
|
707
|
+
}
|
|
708
|
+
function resourceBetween(code) {
|
|
709
|
+
if (code === codes.quotationMark || code === codes.apostrophe || code === codes.leftParenthesis) {
|
|
710
|
+
return factoryTitle(
|
|
711
|
+
effects,
|
|
712
|
+
resourceTitleAfter,
|
|
713
|
+
nok,
|
|
714
|
+
types.resourceTitle,
|
|
715
|
+
types.resourceTitleMarker,
|
|
716
|
+
types.resourceTitleString
|
|
717
|
+
)(code);
|
|
718
|
+
}
|
|
719
|
+
return resourceEnd(code);
|
|
720
|
+
}
|
|
721
|
+
function resourceTitleAfter(code) {
|
|
722
|
+
return markdownLineEndingOrSpace(code) ? factoryWhitespace(effects, resourceEnd)(code) : resourceEnd(code);
|
|
723
|
+
}
|
|
724
|
+
function resourceEnd(code) {
|
|
725
|
+
if (code === codes.rightParenthesis) {
|
|
726
|
+
effects.enter(types.resourceMarker);
|
|
727
|
+
effects.consume(code);
|
|
728
|
+
effects.exit(types.resourceMarker);
|
|
729
|
+
effects.exit(types.resource);
|
|
730
|
+
return ok;
|
|
731
|
+
}
|
|
732
|
+
return nok(code);
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
function tokenizeReferenceFull(effects, ok, nok) {
|
|
736
|
+
const self = this;
|
|
737
|
+
return referenceFull;
|
|
738
|
+
function referenceFull(code) {
|
|
739
|
+
if (code !== codes.leftSquareBracket) {
|
|
740
|
+
return nok(code);
|
|
741
|
+
}
|
|
742
|
+
return factoryLabel.call(
|
|
743
|
+
self,
|
|
744
|
+
effects,
|
|
745
|
+
referenceFullAfter,
|
|
746
|
+
referenceFullMissing,
|
|
747
|
+
types.reference,
|
|
748
|
+
types.referenceMarker,
|
|
749
|
+
types.referenceString
|
|
750
|
+
)(code);
|
|
751
|
+
}
|
|
752
|
+
function referenceFullAfter(code) {
|
|
753
|
+
return ok(code);
|
|
754
|
+
}
|
|
755
|
+
function referenceFullMissing(code) {
|
|
756
|
+
return nok(code);
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
function tokenizeReferenceCollapsed(effects, ok, nok) {
|
|
760
|
+
return referenceCollapsedStart;
|
|
761
|
+
function referenceCollapsedStart(code) {
|
|
762
|
+
if (code !== codes.leftSquareBracket) {
|
|
763
|
+
return nok(code);
|
|
764
|
+
}
|
|
765
|
+
effects.enter(types.reference);
|
|
766
|
+
effects.enter(types.referenceMarker);
|
|
767
|
+
effects.consume(code);
|
|
768
|
+
effects.exit(types.referenceMarker);
|
|
769
|
+
return referenceCollapsedOpen;
|
|
770
|
+
}
|
|
771
|
+
function referenceCollapsedOpen(code) {
|
|
772
|
+
if (code === codes.rightSquareBracket) {
|
|
773
|
+
effects.enter(types.referenceMarker);
|
|
774
|
+
effects.consume(code);
|
|
775
|
+
effects.exit(types.referenceMarker);
|
|
776
|
+
effects.exit(types.reference);
|
|
777
|
+
return ok;
|
|
778
|
+
}
|
|
779
|
+
return nok(code);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
function gfmFootnoteIncremental() {
|
|
783
|
+
const original = gfmFootnote();
|
|
784
|
+
return {
|
|
785
|
+
...original,
|
|
786
|
+
text: {
|
|
787
|
+
...original.text,
|
|
788
|
+
// 覆盖 text[91] (`[` 的处理) - 这是脚注引用解析的起点
|
|
789
|
+
[codes.leftSquareBracket]: {
|
|
790
|
+
...original.text[codes.leftSquareBracket],
|
|
791
|
+
tokenize: tokenizeGfmFootnoteCallIncremental
|
|
792
|
+
},
|
|
793
|
+
// 覆盖 text[93] (`]` 的处理) - 用于处理 ![^1] 这样的情况
|
|
794
|
+
[codes.rightSquareBracket]: {
|
|
795
|
+
...original.text[codes.rightSquareBracket],
|
|
796
|
+
tokenize: tokenizePotentialGfmFootnoteCallIncremental
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
};
|
|
800
|
+
}
|
|
801
|
+
function tokenizeGfmFootnoteCallIncremental(effects, ok, nok) {
|
|
802
|
+
let size = 0;
|
|
803
|
+
let data = false;
|
|
804
|
+
return start;
|
|
805
|
+
function start(code) {
|
|
806
|
+
if (code !== codes.leftSquareBracket) {
|
|
807
|
+
return nok(code);
|
|
808
|
+
}
|
|
809
|
+
effects.enter("gfmFootnoteCall");
|
|
810
|
+
effects.enter("gfmFootnoteCallLabelMarker");
|
|
811
|
+
effects.consume(code);
|
|
812
|
+
effects.exit("gfmFootnoteCallLabelMarker");
|
|
813
|
+
return callStart;
|
|
814
|
+
}
|
|
815
|
+
function callStart(code) {
|
|
816
|
+
if (code !== codes.caret) {
|
|
817
|
+
return nok(code);
|
|
818
|
+
}
|
|
819
|
+
effects.enter("gfmFootnoteCallMarker");
|
|
820
|
+
effects.consume(code);
|
|
821
|
+
effects.exit("gfmFootnoteCallMarker");
|
|
822
|
+
effects.enter("gfmFootnoteCallString");
|
|
823
|
+
const token = effects.enter("chunkString");
|
|
824
|
+
token.contentType = "string";
|
|
825
|
+
return callData;
|
|
826
|
+
}
|
|
827
|
+
function callData(code) {
|
|
828
|
+
if (
|
|
829
|
+
// 太长
|
|
830
|
+
size > constants.linkReferenceSizeMax || // 右括号但没有数据
|
|
831
|
+
code === codes.rightSquareBracket && !data || // EOF、换行、空格、制表符、左括号不支持
|
|
832
|
+
code === codes.eof || code === codes.leftSquareBracket || markdownLineEndingOrSpace(code)
|
|
833
|
+
) {
|
|
834
|
+
return nok(code);
|
|
835
|
+
}
|
|
836
|
+
if (code === codes.rightSquareBracket) {
|
|
837
|
+
effects.exit("chunkString");
|
|
838
|
+
effects.exit("gfmFootnoteCallString");
|
|
839
|
+
effects.enter("gfmFootnoteCallLabelMarker");
|
|
840
|
+
effects.consume(code);
|
|
841
|
+
effects.exit("gfmFootnoteCallLabelMarker");
|
|
842
|
+
effects.exit("gfmFootnoteCall");
|
|
843
|
+
return ok;
|
|
844
|
+
}
|
|
845
|
+
if (!markdownLineEndingOrSpace(code)) {
|
|
846
|
+
data = true;
|
|
847
|
+
}
|
|
848
|
+
size++;
|
|
849
|
+
effects.consume(code);
|
|
850
|
+
return code === codes.backslash ? callEscape : callData;
|
|
851
|
+
}
|
|
852
|
+
function callEscape(code) {
|
|
853
|
+
if (code === codes.leftSquareBracket || code === codes.backslash || code === codes.rightSquareBracket) {
|
|
854
|
+
effects.consume(code);
|
|
855
|
+
size++;
|
|
856
|
+
return callData;
|
|
857
|
+
}
|
|
858
|
+
return callData(code);
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
function tokenizePotentialGfmFootnoteCallIncremental(effects, ok, nok) {
|
|
862
|
+
const self = this;
|
|
863
|
+
let index = self.events.length;
|
|
864
|
+
let labelStart;
|
|
865
|
+
while (index--) {
|
|
866
|
+
const token = self.events[index][1];
|
|
867
|
+
if (token.type === "labelImage") {
|
|
868
|
+
labelStart = token;
|
|
869
|
+
break;
|
|
870
|
+
}
|
|
871
|
+
if (token.type === "gfmFootnoteCall" || token.type === "labelLink" || token.type === "label" || token.type === "image" || token.type === "link") {
|
|
872
|
+
break;
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
return start;
|
|
876
|
+
function start(code) {
|
|
877
|
+
if (code !== codes.rightSquareBracket) {
|
|
878
|
+
return nok(code);
|
|
879
|
+
}
|
|
880
|
+
if (!labelStart || !labelStart._balanced) {
|
|
881
|
+
return nok(code);
|
|
882
|
+
}
|
|
883
|
+
const id = normalizeIdentifier(
|
|
884
|
+
self.sliceSerialize({
|
|
885
|
+
start: labelStart.end,
|
|
886
|
+
end: self.now()
|
|
887
|
+
})
|
|
888
|
+
);
|
|
889
|
+
if (id.codePointAt(0) !== codes.caret) {
|
|
890
|
+
return nok(code);
|
|
891
|
+
}
|
|
892
|
+
effects.enter("gfmFootnoteCallLabelMarker");
|
|
893
|
+
effects.consume(code);
|
|
894
|
+
effects.exit("gfmFootnoteCallLabelMarker");
|
|
895
|
+
return ok(code);
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
// src/parser/ast/types.ts
|
|
900
|
+
function extractMicromarkExtensions(plugins) {
|
|
901
|
+
const extensions = [];
|
|
902
|
+
const mdastExtensions = [];
|
|
903
|
+
for (const plugin of plugins) {
|
|
904
|
+
if ((plugin.type === "micromark" || plugin.type === "both") && plugin.micromark) {
|
|
905
|
+
extensions.push(...plugin.micromark.extensions);
|
|
906
|
+
mdastExtensions.push(...plugin.micromark.mdastExtensions);
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
return { extensions, mdastExtensions };
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// src/parser/ast/MicromarkAstBuilder.ts
|
|
913
|
+
var INLINE_CONTAINER_TYPES = [
|
|
914
|
+
"paragraph",
|
|
915
|
+
"heading",
|
|
916
|
+
"tableCell",
|
|
917
|
+
"delete",
|
|
918
|
+
"emphasis",
|
|
919
|
+
"strong",
|
|
920
|
+
"link",
|
|
921
|
+
"linkReference"
|
|
922
|
+
];
|
|
923
|
+
function isInlineContainer(node) {
|
|
924
|
+
return INLINE_CONTAINER_TYPES.includes(node.type);
|
|
925
|
+
}
|
|
926
|
+
var MicromarkAstBuilder = class {
|
|
927
|
+
options;
|
|
928
|
+
containerConfig;
|
|
929
|
+
htmlTreeConfig;
|
|
930
|
+
/** 缓存的扩展实例,避免每次 parse 都重新创建 */
|
|
931
|
+
cachedExtensions = [];
|
|
932
|
+
cachedMdastExtensions = [];
|
|
933
|
+
constructor(options = {}) {
|
|
934
|
+
this.options = options;
|
|
935
|
+
this.containerConfig = this.computeContainerConfig(options);
|
|
936
|
+
this.htmlTreeConfig = this.computeHtmlTreeConfig(options);
|
|
937
|
+
this.initExtensions();
|
|
938
|
+
}
|
|
939
|
+
/**
|
|
940
|
+
* 初始化并缓存扩展实例
|
|
941
|
+
*/
|
|
942
|
+
initExtensions() {
|
|
943
|
+
if (this.options.gfm) {
|
|
944
|
+
this.cachedExtensions.push(gfm());
|
|
945
|
+
this.cachedMdastExtensions.push(...gfmFromMarkdown(), gfmFootnoteFromMarkdown());
|
|
946
|
+
}
|
|
947
|
+
if (this.options.math) {
|
|
948
|
+
this.cachedExtensions.push(math());
|
|
949
|
+
this.cachedMdastExtensions.push(mathFromMarkdown());
|
|
950
|
+
}
|
|
951
|
+
if (this.containerConfig !== void 0) {
|
|
952
|
+
this.cachedExtensions.push(directive());
|
|
953
|
+
this.cachedMdastExtensions.push(directiveFromMarkdown());
|
|
954
|
+
}
|
|
955
|
+
if (this.options.plugins) {
|
|
956
|
+
const { extensions, mdastExtensions } = extractMicromarkExtensions(this.options.plugins);
|
|
957
|
+
this.cachedExtensions.push(...extensions);
|
|
958
|
+
this.cachedMdastExtensions.push(...mdastExtensions);
|
|
959
|
+
}
|
|
960
|
+
if (this.options.extensions) {
|
|
961
|
+
this.cachedExtensions.push(...this.options.extensions);
|
|
962
|
+
}
|
|
963
|
+
if (this.options.mdastExtensions) {
|
|
964
|
+
this.cachedMdastExtensions.push(...this.options.mdastExtensions);
|
|
965
|
+
}
|
|
966
|
+
if (this.options.gfm) {
|
|
967
|
+
this.cachedExtensions.push(gfmFootnoteIncremental());
|
|
968
|
+
}
|
|
969
|
+
this.cachedExtensions.push(micromarkReferenceExtension());
|
|
970
|
+
}
|
|
971
|
+
/**
|
|
972
|
+
* 计算容器配置
|
|
973
|
+
*/
|
|
974
|
+
computeContainerConfig(options) {
|
|
975
|
+
const containers = options.containers;
|
|
976
|
+
if (!containers) return void 0;
|
|
977
|
+
return containers === true ? {} : containers;
|
|
978
|
+
}
|
|
979
|
+
/**
|
|
980
|
+
* 计算 HTML 树配置
|
|
981
|
+
*/
|
|
982
|
+
computeHtmlTreeConfig(options) {
|
|
983
|
+
const htmlTree = options.htmlTree;
|
|
984
|
+
if (!htmlTree) return void 0;
|
|
985
|
+
return htmlTree === true ? {} : htmlTree;
|
|
986
|
+
}
|
|
987
|
+
/**
|
|
988
|
+
* 解析文本为 AST
|
|
989
|
+
*
|
|
990
|
+
* @param text Markdown 文本
|
|
991
|
+
* @returns AST
|
|
992
|
+
*/
|
|
993
|
+
parse(text) {
|
|
994
|
+
const ast = fromMarkdown(text, {
|
|
995
|
+
extensions: this.cachedExtensions,
|
|
996
|
+
mdastExtensions: this.cachedMdastExtensions
|
|
997
|
+
});
|
|
998
|
+
if (this.htmlTreeConfig) {
|
|
999
|
+
return transformHtmlNodes(ast, this.htmlTreeConfig);
|
|
1000
|
+
} else {
|
|
1001
|
+
return this.convertHtmlToText(ast);
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
/**
|
|
1005
|
+
* 将 HTML 节点转换为纯文本(当未启用 HTML 树转换时)
|
|
1006
|
+
*
|
|
1007
|
+
* @param ast AST
|
|
1008
|
+
* @returns 转换后的 AST
|
|
1009
|
+
*/
|
|
1010
|
+
convertHtmlToText(ast) {
|
|
1011
|
+
return {
|
|
1012
|
+
...ast,
|
|
1013
|
+
children: this.processBlockChildren(ast.children)
|
|
1014
|
+
};
|
|
1015
|
+
}
|
|
1016
|
+
/**
|
|
1017
|
+
* 处理块级节点
|
|
1018
|
+
*/
|
|
1019
|
+
processBlockChildren(children) {
|
|
1020
|
+
return children.map((node) => {
|
|
1021
|
+
if (node.type === "html") {
|
|
1022
|
+
return this.convertBlockHtmlToParagraph(node);
|
|
1023
|
+
}
|
|
1024
|
+
if ("children" in node && Array.isArray(node.children)) {
|
|
1025
|
+
const parent = node;
|
|
1026
|
+
const children2 = isInlineContainer(node) ? this.processInlineChildren(parent.children) : this.processBlockChildren(parent.children);
|
|
1027
|
+
return {
|
|
1028
|
+
...parent,
|
|
1029
|
+
children: children2
|
|
1030
|
+
};
|
|
1031
|
+
}
|
|
1032
|
+
return node;
|
|
1033
|
+
});
|
|
1034
|
+
}
|
|
1035
|
+
/**
|
|
1036
|
+
* 处理内联节点
|
|
1037
|
+
*/
|
|
1038
|
+
processInlineChildren(children) {
|
|
1039
|
+
return children.map((node) => {
|
|
1040
|
+
const n = node;
|
|
1041
|
+
if (n.type === "html") {
|
|
1042
|
+
return this.convertInlineHtmlToText(n);
|
|
1043
|
+
}
|
|
1044
|
+
if ("children" in n && Array.isArray(n.children)) {
|
|
1045
|
+
const parent = n;
|
|
1046
|
+
return {
|
|
1047
|
+
...parent,
|
|
1048
|
+
children: this.processInlineChildren(parent.children)
|
|
1049
|
+
};
|
|
1050
|
+
}
|
|
1051
|
+
return n;
|
|
1052
|
+
});
|
|
1053
|
+
}
|
|
1054
|
+
/**
|
|
1055
|
+
* 将块级 HTML 节点转换为段落
|
|
1056
|
+
*/
|
|
1057
|
+
convertBlockHtmlToParagraph(htmlNode) {
|
|
1058
|
+
const textNode = {
|
|
1059
|
+
type: "text",
|
|
1060
|
+
value: htmlNode.value
|
|
1061
|
+
};
|
|
1062
|
+
const paragraphNode = {
|
|
1063
|
+
type: "paragraph",
|
|
1064
|
+
children: [textNode],
|
|
1065
|
+
position: htmlNode.position
|
|
1066
|
+
};
|
|
1067
|
+
return paragraphNode;
|
|
1068
|
+
}
|
|
1069
|
+
/**
|
|
1070
|
+
* 将内联 HTML 节点转换为纯文本节点
|
|
1071
|
+
*/
|
|
1072
|
+
convertInlineHtmlToText(htmlNode) {
|
|
1073
|
+
return {
|
|
1074
|
+
type: "text",
|
|
1075
|
+
value: htmlNode.value,
|
|
1076
|
+
position: htmlNode.position
|
|
1077
|
+
};
|
|
1078
|
+
}
|
|
1079
|
+
/**
|
|
1080
|
+
* 将 AST 节点转换为 ParsedBlock
|
|
1081
|
+
*
|
|
1082
|
+
* @param nodes AST 节点列表
|
|
1083
|
+
* @param startOffset 起始偏移量
|
|
1084
|
+
* @param rawText 原始文本
|
|
1085
|
+
* @param status 块状态
|
|
1086
|
+
* @param generateBlockId 生成块 ID 的函数
|
|
1087
|
+
* @returns ParsedBlock 列表
|
|
1088
|
+
*/
|
|
1089
|
+
nodesToBlocks(nodes, startOffset, rawText, status, generateBlockId) {
|
|
1090
|
+
const blocks = [];
|
|
1091
|
+
for (const node of nodes) {
|
|
1092
|
+
const relativeStart = node.position?.start?.offset ?? 0;
|
|
1093
|
+
const relativeEnd = node.position?.end?.offset ?? 1;
|
|
1094
|
+
const nodeText = rawText.substring(relativeStart, relativeEnd);
|
|
1095
|
+
const absoluteStart = startOffset + relativeStart;
|
|
1096
|
+
const absoluteEnd = startOffset + relativeEnd;
|
|
1097
|
+
blocks.push({
|
|
1098
|
+
id: generateBlockId(),
|
|
1099
|
+
status,
|
|
1100
|
+
node,
|
|
1101
|
+
startOffset: absoluteStart,
|
|
1102
|
+
endOffset: absoluteEnd,
|
|
1103
|
+
rawText: nodeText
|
|
1104
|
+
});
|
|
1105
|
+
}
|
|
1106
|
+
return blocks;
|
|
1107
|
+
}
|
|
1108
|
+
};
|
|
1109
|
+
|
|
1110
|
+
// src/engines/micromark/index.ts
|
|
1111
|
+
function createMicromarkBuilder(options = {}) {
|
|
1112
|
+
return new MicromarkAstBuilder(options);
|
|
1113
|
+
}
|
|
1114
|
+
/**
|
|
1115
|
+
* @file Micromark 扩展:支持增量解析的 Reference 语法
|
|
1116
|
+
*
|
|
1117
|
+
* @description
|
|
1118
|
+
* 在增量解析场景中,引用式图片/链接(如 `![Alt][id]`)可能在定义(`[id]: url`)之前出现。
|
|
1119
|
+
* 标准 micromark 会检查 parser.defined,如果 id 未定义就解析为文本。
|
|
1120
|
+
*
|
|
1121
|
+
* 本扩展通过覆盖 labelEnd 构造,移除 parser.defined 检查,
|
|
1122
|
+
* 使得 reference 语法总是被解析为 reference token,
|
|
1123
|
+
* 由渲染层根据实际的 definitionMap 决定如何渲染。
|
|
1124
|
+
*
|
|
1125
|
+
* @module micromark-reference-extension
|
|
1126
|
+
*
|
|
1127
|
+
* @features
|
|
1128
|
+
* - ✅ 支持所有 resource 语法(带 title 的图片/链接)
|
|
1129
|
+
* - ✅ 支持所有 reference 语法(full, collapsed, shortcut)
|
|
1130
|
+
* - ✅ 延迟验证:解析时不检查定义是否存在
|
|
1131
|
+
* - ✅ 使用官方 factory 函数,保证与 CommonMark 标准一致
|
|
1132
|
+
*
|
|
1133
|
+
* @dependencies
|
|
1134
|
+
* - micromark-factory-destination: 解析 URL(支持尖括号、括号平衡)
|
|
1135
|
+
* - micromark-factory-title: 解析 title(支持三种引号,支持多行)
|
|
1136
|
+
* - micromark-factory-label: 解析 label(支持转义、长度限制)
|
|
1137
|
+
* - micromark-factory-whitespace: 解析空白符(正确生成 lineEnding/linePrefix token)
|
|
1138
|
+
* - micromark-util-character: 字符判断工具
|
|
1139
|
+
* - micromark-util-symbol: 常量(codes, types, constants)
|
|
1140
|
+
* - micromark-util-types: TypeScript 类型定义
|
|
1141
|
+
*
|
|
1142
|
+
* @see {@link https://github.com/micromark/micromark} - micromark 官方文档
|
|
1143
|
+
* @see {@link https://spec.commonmark.org/0.30/#images} - CommonMark 图片规范
|
|
1144
|
+
* @see {@link https://spec.commonmark.org/0.30/#links} - CommonMark 链接规范
|
|
1145
|
+
*
|
|
1146
|
+
* @example
|
|
1147
|
+
* ```typescript
|
|
1148
|
+
* import { micromarkReferenceExtension } from './micromark-reference-extension'
|
|
1149
|
+
* import { fromMarkdown } from 'mdast-util-from-markdown'
|
|
1150
|
+
*
|
|
1151
|
+
* const extensions = [micromarkReferenceExtension()]
|
|
1152
|
+
* const ast = fromMarkdown(text, { extensions })
|
|
1153
|
+
* ```
|
|
1154
|
+
*
|
|
1155
|
+
* @author Incremark Team
|
|
1156
|
+
* @license MIT
|
|
1157
|
+
*/
|
|
1158
|
+
|
|
1159
|
+
export { MicromarkAstBuilder, createMicromarkBuilder };
|
|
1160
|
+
//# sourceMappingURL=index.js.map
|
|
1161
|
+
//# sourceMappingURL=index.js.map
|