@incremark/core 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.en.md +132 -23
- package/dist/MarkedAstBuildter-BsjxZko_.d.ts +72 -0
- package/dist/detector/index.d.ts +118 -1
- package/dist/detector/index.js +196 -118
- package/dist/detector/index.js.map +1 -1
- package/dist/engines/marked/index.d.ts +29 -0
- package/dist/engines/marked/index.js +1541 -0
- package/dist/engines/marked/index.js.map +1 -0
- package/dist/engines/micromark/index.d.ts +106 -0
- package/dist/engines/micromark/index.js +1161 -0
- package/dist/engines/micromark/index.js.map +1 -0
- package/dist/index-mZ7yCqNH.d.ts +225 -0
- package/dist/index.d.ts +68 -54
- package/dist/index.js +1908 -1198
- package/dist/index.js.map +1 -1
- package/dist/types-C_EW5vfp.d.ts +123 -0
- package/dist/utils/index.d.ts +17 -1
- package/dist/utils/index.js +21 -1
- package/dist/utils/index.js.map +1 -1
- package/package.json +18 -3
- package/dist/index-BMUkM7mT.d.ts +0 -422
|
@@ -0,0 +1,1541 @@
|
|
|
1
|
+
import { Lexer, lexer } from 'marked';
|
|
2
|
+
|
|
3
|
+
// src/parser/ast/MarkedAstBuildter.ts
|
|
4
|
+
|
|
5
|
+
// src/extensions/html-extension/index.ts
|
|
6
|
+
var DEFAULT_TAG_BLACKLIST = [
|
|
7
|
+
"script",
|
|
8
|
+
"style",
|
|
9
|
+
"iframe",
|
|
10
|
+
"object",
|
|
11
|
+
"embed",
|
|
12
|
+
"form",
|
|
13
|
+
"input",
|
|
14
|
+
"button",
|
|
15
|
+
"textarea",
|
|
16
|
+
"select",
|
|
17
|
+
"meta",
|
|
18
|
+
"link",
|
|
19
|
+
"base",
|
|
20
|
+
"frame",
|
|
21
|
+
"frameset",
|
|
22
|
+
"applet",
|
|
23
|
+
"noscript",
|
|
24
|
+
"template"
|
|
25
|
+
];
|
|
26
|
+
var DEFAULT_ATTR_BLACKLIST = [
|
|
27
|
+
// 事件属性通过正则匹配
|
|
28
|
+
"formaction",
|
|
29
|
+
"xlink:href",
|
|
30
|
+
"xmlns",
|
|
31
|
+
"srcdoc"
|
|
32
|
+
];
|
|
33
|
+
var DEFAULT_PROTOCOL_BLACKLIST = [
|
|
34
|
+
"javascript:",
|
|
35
|
+
"vbscript:",
|
|
36
|
+
"data:"
|
|
37
|
+
// 注意:data:image/ 会被特殊处理允许
|
|
38
|
+
];
|
|
39
|
+
var URL_ATTRS = ["href", "src", "action", "formaction", "poster", "background"];
|
|
40
|
+
var VOID_ELEMENTS = ["br", "hr", "img", "input", "meta", "link", "area", "base", "col", "embed", "source", "track", "wbr"];
|
|
41
|
+
function detectHtmlContentType(html) {
|
|
42
|
+
const trimmed = html.trim();
|
|
43
|
+
if (!trimmed) return "unknown";
|
|
44
|
+
if (!trimmed.startsWith("<")) return "unknown";
|
|
45
|
+
const closingMatch = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
|
|
46
|
+
if (closingMatch) {
|
|
47
|
+
return "closing";
|
|
48
|
+
}
|
|
49
|
+
const singleTagMatch = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)(\s[^]*?)?(\/?)>$/);
|
|
50
|
+
if (singleTagMatch) {
|
|
51
|
+
const [fullMatch, tagName, attrsString, selfClosingSlash] = singleTagMatch;
|
|
52
|
+
if (attrsString) {
|
|
53
|
+
let inQuote = "";
|
|
54
|
+
let hasUnquotedBracket = false;
|
|
55
|
+
for (let i = 0; i < attrsString.length; i++) {
|
|
56
|
+
const char = attrsString[i];
|
|
57
|
+
if (inQuote) {
|
|
58
|
+
if (char === inQuote) inQuote = "";
|
|
59
|
+
} else {
|
|
60
|
+
if (char === '"' || char === "'") inQuote = char;
|
|
61
|
+
else if (char === "<") {
|
|
62
|
+
hasUnquotedBracket = true;
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
if (hasUnquotedBracket) {
|
|
68
|
+
return "fragment";
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
|
|
72
|
+
return isSelfClosing ? "self-closing" : "opening";
|
|
73
|
+
}
|
|
74
|
+
let bracketCount = 0;
|
|
75
|
+
for (const char of trimmed) {
|
|
76
|
+
if (char === "<") bracketCount++;
|
|
77
|
+
}
|
|
78
|
+
if (bracketCount > 1) {
|
|
79
|
+
return "fragment";
|
|
80
|
+
}
|
|
81
|
+
return "unknown";
|
|
82
|
+
}
|
|
83
|
+
function parseHtmlTag(html) {
|
|
84
|
+
const trimmed = html.trim();
|
|
85
|
+
const contentType = detectHtmlContentType(trimmed);
|
|
86
|
+
if (contentType !== "opening" && contentType !== "closing" && contentType !== "self-closing") {
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
if (contentType === "closing") {
|
|
90
|
+
const match2 = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
|
|
91
|
+
if (!match2) return null;
|
|
92
|
+
return {
|
|
93
|
+
tagName: match2[1].toLowerCase(),
|
|
94
|
+
attrs: {},
|
|
95
|
+
isClosing: true,
|
|
96
|
+
isSelfClosing: false,
|
|
97
|
+
rawHtml: html
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
const match = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)(\s[^]*?)?(\/?)>$/);
|
|
101
|
+
if (!match) return null;
|
|
102
|
+
const [, tagName, attrsString, selfClosingSlash] = match;
|
|
103
|
+
const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
|
|
104
|
+
const attrs = {};
|
|
105
|
+
if (attrsString) {
|
|
106
|
+
const attrRegex = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
|
|
107
|
+
let attrMatch;
|
|
108
|
+
while ((attrMatch = attrRegex.exec(attrsString)) !== null) {
|
|
109
|
+
const [, name, doubleQuoted, singleQuoted, unquoted] = attrMatch;
|
|
110
|
+
const value = doubleQuoted ?? singleQuoted ?? unquoted ?? "";
|
|
111
|
+
attrs[name.toLowerCase()] = decodeHtmlEntities(value);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return {
|
|
115
|
+
tagName: tagName.toLowerCase(),
|
|
116
|
+
attrs,
|
|
117
|
+
isClosing: false,
|
|
118
|
+
isSelfClosing,
|
|
119
|
+
rawHtml: html
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
function decodeHtmlEntities(text) {
|
|
123
|
+
const entities = {
|
|
124
|
+
"&": "&",
|
|
125
|
+
"<": "<",
|
|
126
|
+
">": ">",
|
|
127
|
+
""": '"',
|
|
128
|
+
"'": "'",
|
|
129
|
+
"'": "'",
|
|
130
|
+
" ": " "
|
|
131
|
+
};
|
|
132
|
+
return text.replace(/&(?:#(\d+)|#x([a-fA-F0-9]+)|([a-zA-Z]+));/g, (match, dec, hex, name) => {
|
|
133
|
+
if (dec) return String.fromCharCode(parseInt(dec, 10));
|
|
134
|
+
if (hex) return String.fromCharCode(parseInt(hex, 16));
|
|
135
|
+
return entities[`&${name};`] || match;
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
function parseTagDirect(tag) {
|
|
139
|
+
const trimmed = tag.trim();
|
|
140
|
+
const closingMatch = trimmed.match(/^<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>$/);
|
|
141
|
+
if (closingMatch) {
|
|
142
|
+
return {
|
|
143
|
+
tagName: closingMatch[1].toLowerCase(),
|
|
144
|
+
attrs: {},
|
|
145
|
+
isClosing: true,
|
|
146
|
+
isSelfClosing: false,
|
|
147
|
+
rawHtml: tag
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
const openMatch = trimmed.match(/^<([a-zA-Z][a-zA-Z0-9-]*)([\s\S]*?)(\/?)>$/);
|
|
151
|
+
if (!openMatch) return null;
|
|
152
|
+
const [, tagName, attrsString, selfClosingSlash] = openMatch;
|
|
153
|
+
const isSelfClosing = selfClosingSlash === "/" || VOID_ELEMENTS.includes(tagName.toLowerCase());
|
|
154
|
+
const attrs = {};
|
|
155
|
+
if (attrsString) {
|
|
156
|
+
const attrRegex = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+)))?/g;
|
|
157
|
+
let attrMatch;
|
|
158
|
+
while ((attrMatch = attrRegex.exec(attrsString)) !== null) {
|
|
159
|
+
const [, name, doubleQuoted, singleQuoted, unquoted] = attrMatch;
|
|
160
|
+
const value = doubleQuoted ?? singleQuoted ?? unquoted ?? "";
|
|
161
|
+
attrs[name.toLowerCase()] = decodeHtmlEntities(value);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return {
|
|
165
|
+
tagName: tagName.toLowerCase(),
|
|
166
|
+
attrs,
|
|
167
|
+
isClosing: false,
|
|
168
|
+
isSelfClosing,
|
|
169
|
+
rawHtml: tag
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
function parseHtmlFragment(html, options = {}) {
|
|
173
|
+
const result = [];
|
|
174
|
+
const stack = [];
|
|
175
|
+
const tokenRegex = /(<\/?[a-zA-Z][^>]*>)|([^<]+)/g;
|
|
176
|
+
let match;
|
|
177
|
+
while ((match = tokenRegex.exec(html)) !== null) {
|
|
178
|
+
const [, tag, text] = match;
|
|
179
|
+
if (tag) {
|
|
180
|
+
const parsed = parseTagDirect(tag);
|
|
181
|
+
if (!parsed) continue;
|
|
182
|
+
if (isTagBlacklisted(parsed.tagName, options)) {
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
if (parsed.isClosing) {
|
|
186
|
+
let found = false;
|
|
187
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
188
|
+
if (stack[i].tagName === parsed.tagName) {
|
|
189
|
+
const node = stack.pop();
|
|
190
|
+
if (stack.length > 0) {
|
|
191
|
+
stack[stack.length - 1].children.push(node);
|
|
192
|
+
} else {
|
|
193
|
+
result.push(node);
|
|
194
|
+
}
|
|
195
|
+
found = true;
|
|
196
|
+
break;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
if (!found) continue;
|
|
200
|
+
} else {
|
|
201
|
+
const sanitizedAttrs = sanitizeAttrs(parsed.attrs, options);
|
|
202
|
+
const node = {
|
|
203
|
+
type: "htmlElement",
|
|
204
|
+
tagName: parsed.tagName,
|
|
205
|
+
attrs: sanitizedAttrs,
|
|
206
|
+
children: [],
|
|
207
|
+
data: options.preserveRawHtml !== false ? {
|
|
208
|
+
rawHtml: tag,
|
|
209
|
+
parsed: true
|
|
210
|
+
} : void 0
|
|
211
|
+
};
|
|
212
|
+
if (parsed.isSelfClosing) {
|
|
213
|
+
if (stack.length > 0) {
|
|
214
|
+
stack[stack.length - 1].children.push(node);
|
|
215
|
+
} else {
|
|
216
|
+
result.push(node);
|
|
217
|
+
}
|
|
218
|
+
} else {
|
|
219
|
+
stack.push(node);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
} else if (text && text.trim()) {
|
|
223
|
+
const textNode = {
|
|
224
|
+
type: "text",
|
|
225
|
+
value: text
|
|
226
|
+
};
|
|
227
|
+
if (stack.length > 0) {
|
|
228
|
+
stack[stack.length - 1].children.push(textNode);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
while (stack.length > 0) {
|
|
233
|
+
const node = stack.pop();
|
|
234
|
+
if (stack.length > 0) {
|
|
235
|
+
stack[stack.length - 1].children.push(node);
|
|
236
|
+
} else {
|
|
237
|
+
result.push(node);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return result;
|
|
241
|
+
}
|
|
242
|
+
function isTagBlacklisted(tagName, options) {
|
|
243
|
+
const blacklist = options.tagBlacklist ?? DEFAULT_TAG_BLACKLIST;
|
|
244
|
+
return blacklist.includes(tagName.toLowerCase());
|
|
245
|
+
}
|
|
246
|
+
function isAttrBlacklisted(attrName, options) {
|
|
247
|
+
const name = attrName.toLowerCase();
|
|
248
|
+
const blacklist = options.attrBlacklist ?? DEFAULT_ATTR_BLACKLIST;
|
|
249
|
+
if (name.startsWith("on")) return true;
|
|
250
|
+
return blacklist.includes(name);
|
|
251
|
+
}
|
|
252
|
+
function isProtocolDangerous(url, options) {
|
|
253
|
+
const protocolBlacklist = options.protocolBlacklist ?? DEFAULT_PROTOCOL_BLACKLIST;
|
|
254
|
+
const normalizedUrl = url.trim().toLowerCase();
|
|
255
|
+
for (const protocol of protocolBlacklist) {
|
|
256
|
+
if (normalizedUrl.startsWith(protocol)) {
|
|
257
|
+
if (protocol === "data:" && normalizedUrl.startsWith("data:image/")) {
|
|
258
|
+
return false;
|
|
259
|
+
}
|
|
260
|
+
return true;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return false;
|
|
264
|
+
}
|
|
265
|
+
function sanitizeAttrs(attrs, options) {
|
|
266
|
+
const result = {};
|
|
267
|
+
for (const [name, value] of Object.entries(attrs)) {
|
|
268
|
+
if (isAttrBlacklisted(name, options)) continue;
|
|
269
|
+
if (URL_ATTRS.includes(name.toLowerCase())) {
|
|
270
|
+
if (isProtocolDangerous(value, options)) continue;
|
|
271
|
+
}
|
|
272
|
+
result[name] = value;
|
|
273
|
+
}
|
|
274
|
+
return result;
|
|
275
|
+
}
|
|
276
|
+
function isHtmlNode(node) {
|
|
277
|
+
return node.type === "html";
|
|
278
|
+
}
|
|
279
|
+
function hasChildren(node) {
|
|
280
|
+
return "children" in node && Array.isArray(node.children);
|
|
281
|
+
}
|
|
282
|
+
function mergeFragmentedHtmlNodes(nodes) {
|
|
283
|
+
const result = [];
|
|
284
|
+
let i = 0;
|
|
285
|
+
while (i < nodes.length) {
|
|
286
|
+
const node = nodes[i];
|
|
287
|
+
if (!isHtmlNode(node)) {
|
|
288
|
+
result.push(node);
|
|
289
|
+
i++;
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
const unclosedTags = findUnclosedTags(node.value);
|
|
293
|
+
if (unclosedTags.length === 0) {
|
|
294
|
+
result.push(node);
|
|
295
|
+
i++;
|
|
296
|
+
continue;
|
|
297
|
+
}
|
|
298
|
+
const mergedParts = [node.value];
|
|
299
|
+
let j = i + 1;
|
|
300
|
+
let currentUnclosed = [...unclosedTags];
|
|
301
|
+
while (j < nodes.length && currentUnclosed.length > 0) {
|
|
302
|
+
const nextNode = nodes[j];
|
|
303
|
+
if (isHtmlNode(nextNode)) {
|
|
304
|
+
const closingInfo = checkClosingTags(nextNode.value, currentUnclosed);
|
|
305
|
+
if (closingInfo.hasRelevantClosing) {
|
|
306
|
+
mergedParts.push(nextNode.value);
|
|
307
|
+
currentUnclosed = closingInfo.remainingUnclosed;
|
|
308
|
+
if (currentUnclosed.length === 0) {
|
|
309
|
+
j++;
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
312
|
+
} else {
|
|
313
|
+
mergedParts.push(nextNode.value);
|
|
314
|
+
}
|
|
315
|
+
} else {
|
|
316
|
+
break;
|
|
317
|
+
}
|
|
318
|
+
j++;
|
|
319
|
+
}
|
|
320
|
+
if (mergedParts.length > 1) {
|
|
321
|
+
const mergedValue = mergedParts.join("\n");
|
|
322
|
+
const mergedNode = {
|
|
323
|
+
type: "html",
|
|
324
|
+
value: mergedValue
|
|
325
|
+
};
|
|
326
|
+
result.push(mergedNode);
|
|
327
|
+
i = j;
|
|
328
|
+
} else {
|
|
329
|
+
result.push(node);
|
|
330
|
+
i++;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
return result;
|
|
334
|
+
}
|
|
335
|
+
function findUnclosedTags(html) {
|
|
336
|
+
const tagStack = [];
|
|
337
|
+
const tagRegex = /<\/?([a-zA-Z][a-zA-Z0-9-]*)[^>]*\/?>/g;
|
|
338
|
+
let match;
|
|
339
|
+
while ((match = tagRegex.exec(html)) !== null) {
|
|
340
|
+
const fullTag = match[0];
|
|
341
|
+
const tagName = match[1].toLowerCase();
|
|
342
|
+
if (VOID_ELEMENTS.includes(tagName) || fullTag.endsWith("/>")) {
|
|
343
|
+
continue;
|
|
344
|
+
}
|
|
345
|
+
if (fullTag.startsWith("</")) {
|
|
346
|
+
const lastIndex = tagStack.lastIndexOf(tagName);
|
|
347
|
+
if (lastIndex !== -1) {
|
|
348
|
+
tagStack.splice(lastIndex, 1);
|
|
349
|
+
}
|
|
350
|
+
} else {
|
|
351
|
+
tagStack.push(tagName);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
return tagStack;
|
|
355
|
+
}
|
|
356
|
+
function checkClosingTags(html, unclosedTags) {
|
|
357
|
+
const remaining = [...unclosedTags];
|
|
358
|
+
let hasRelevant = false;
|
|
359
|
+
const closeTagRegex = /<\/([a-zA-Z][a-zA-Z0-9-]*)\s*>/g;
|
|
360
|
+
let match;
|
|
361
|
+
while ((match = closeTagRegex.exec(html)) !== null) {
|
|
362
|
+
const tagName = match[1].toLowerCase();
|
|
363
|
+
const index = remaining.lastIndexOf(tagName);
|
|
364
|
+
if (index !== -1) {
|
|
365
|
+
remaining.splice(index, 1);
|
|
366
|
+
hasRelevant = true;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return {
|
|
370
|
+
hasRelevantClosing: hasRelevant,
|
|
371
|
+
remainingUnclosed: remaining
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
function processHtmlNodesInArray(nodes, options) {
|
|
375
|
+
const mergedNodes = mergeFragmentedHtmlNodes(nodes);
|
|
376
|
+
const result = [];
|
|
377
|
+
let i = 0;
|
|
378
|
+
while (i < mergedNodes.length) {
|
|
379
|
+
const node = mergedNodes[i];
|
|
380
|
+
if (isHtmlNode(node)) {
|
|
381
|
+
const contentType = detectHtmlContentType(node.value);
|
|
382
|
+
if (contentType === "fragment") {
|
|
383
|
+
const fragmentNodes = parseHtmlFragment(node.value, options);
|
|
384
|
+
if (fragmentNodes.length > 0) {
|
|
385
|
+
result.push(...fragmentNodes);
|
|
386
|
+
} else {
|
|
387
|
+
result.push(node);
|
|
388
|
+
}
|
|
389
|
+
i++;
|
|
390
|
+
} else if (contentType === "self-closing") {
|
|
391
|
+
const parsed = parseHtmlTag(node.value);
|
|
392
|
+
if (parsed && !isTagBlacklisted(parsed.tagName, options)) {
|
|
393
|
+
const elementNode = {
|
|
394
|
+
type: "htmlElement",
|
|
395
|
+
tagName: parsed.tagName,
|
|
396
|
+
attrs: sanitizeAttrs(parsed.attrs, options),
|
|
397
|
+
children: [],
|
|
398
|
+
data: options.preserveRawHtml !== false ? {
|
|
399
|
+
rawHtml: node.value,
|
|
400
|
+
parsed: true,
|
|
401
|
+
originalType: "html"
|
|
402
|
+
} : void 0
|
|
403
|
+
};
|
|
404
|
+
result.push(elementNode);
|
|
405
|
+
}
|
|
406
|
+
i++;
|
|
407
|
+
} else if (contentType === "closing") {
|
|
408
|
+
i++;
|
|
409
|
+
} else if (contentType === "opening") {
|
|
410
|
+
const parsed = parseHtmlTag(node.value);
|
|
411
|
+
if (!parsed || isTagBlacklisted(parsed.tagName, options)) {
|
|
412
|
+
i++;
|
|
413
|
+
continue;
|
|
414
|
+
}
|
|
415
|
+
const tagName = parsed.tagName;
|
|
416
|
+
const contentNodes = [];
|
|
417
|
+
let depth = 1;
|
|
418
|
+
let j = i + 1;
|
|
419
|
+
let foundClosing = false;
|
|
420
|
+
while (j < mergedNodes.length && depth > 0) {
|
|
421
|
+
const nextNode = mergedNodes[j];
|
|
422
|
+
if (isHtmlNode(nextNode)) {
|
|
423
|
+
const nextType = detectHtmlContentType(nextNode.value);
|
|
424
|
+
if (nextType === "closing") {
|
|
425
|
+
const nextParsed = parseHtmlTag(nextNode.value);
|
|
426
|
+
if (nextParsed && nextParsed.tagName === tagName) {
|
|
427
|
+
depth--;
|
|
428
|
+
if (depth === 0) {
|
|
429
|
+
foundClosing = true;
|
|
430
|
+
break;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
} else if (nextType === "opening") {
|
|
434
|
+
const nextParsed = parseHtmlTag(nextNode.value);
|
|
435
|
+
if (nextParsed && nextParsed.tagName === tagName) {
|
|
436
|
+
depth++;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
contentNodes.push(nextNode);
|
|
441
|
+
j++;
|
|
442
|
+
}
|
|
443
|
+
const elementNode = {
|
|
444
|
+
type: "htmlElement",
|
|
445
|
+
tagName: parsed.tagName,
|
|
446
|
+
attrs: sanitizeAttrs(parsed.attrs, options),
|
|
447
|
+
children: processHtmlNodesInArray(contentNodes, options),
|
|
448
|
+
data: options.preserveRawHtml !== false ? {
|
|
449
|
+
rawHtml: node.value,
|
|
450
|
+
parsed: true,
|
|
451
|
+
originalType: "html"
|
|
452
|
+
} : void 0
|
|
453
|
+
};
|
|
454
|
+
result.push(elementNode);
|
|
455
|
+
i = foundClosing ? j + 1 : j;
|
|
456
|
+
} else {
|
|
457
|
+
result.push(node);
|
|
458
|
+
i++;
|
|
459
|
+
}
|
|
460
|
+
} else {
|
|
461
|
+
if (hasChildren(node)) {
|
|
462
|
+
const processed = processHtmlNodesInArray(
|
|
463
|
+
node.children,
|
|
464
|
+
options
|
|
465
|
+
);
|
|
466
|
+
result.push({
|
|
467
|
+
...node,
|
|
468
|
+
children: processed
|
|
469
|
+
});
|
|
470
|
+
} else {
|
|
471
|
+
result.push(node);
|
|
472
|
+
}
|
|
473
|
+
i++;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
return result;
|
|
477
|
+
}
|
|
478
|
+
function transformHtmlNodes(ast, options = {}) {
|
|
479
|
+
return {
|
|
480
|
+
...ast,
|
|
481
|
+
children: processHtmlNodesInArray(ast.children, options)
|
|
482
|
+
};
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
// src/parser/ast/types.ts
|
|
486
|
+
function extractMarkedExtensions(plugins) {
|
|
487
|
+
const extensions = [];
|
|
488
|
+
for (const plugin of plugins) {
|
|
489
|
+
if ((plugin.type === "marked" || plugin.type === "both") && plugin.marked) {
|
|
490
|
+
extensions.push(...plugin.marked.extensions);
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
return extensions;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// src/extensions/marked-extensions/explicitDefinitionExtension.ts
|
|
497
|
+
function createExplicitDefinitionExtension() {
|
|
498
|
+
return {
|
|
499
|
+
name: "explicitDefinition",
|
|
500
|
+
level: "block",
|
|
501
|
+
// 🔑 关键修复:start 必须匹配完整的 definition 模式 [id]:,
|
|
502
|
+
// 而不能只匹配 [,否则会把 ![alt][id] 中的 [alt] 误认为是 definition 开头
|
|
503
|
+
// 同时排除脚注定义 [^id]:
|
|
504
|
+
start(src) {
|
|
505
|
+
const match = src.match(/^ {0,3}\[(?!\^)[^\]]+\]:/m);
|
|
506
|
+
return match?.index;
|
|
507
|
+
},
|
|
508
|
+
tokenizer(src) {
|
|
509
|
+
const rule = /^ {0,3}\[(?!\^)[^\]]+\]:.*?(?:\n+|$)/;
|
|
510
|
+
const match = rule.exec(src);
|
|
511
|
+
if (match) {
|
|
512
|
+
const raw = match[0];
|
|
513
|
+
const contentMatch = raw.match(
|
|
514
|
+
/^ {0,3}\[([^\]]+)\]:\s*(\S+)(?:\s+["'(](.*?)["')])?/
|
|
515
|
+
);
|
|
516
|
+
if (contentMatch) {
|
|
517
|
+
const identifier = contentMatch[1].toLowerCase();
|
|
518
|
+
const url = contentMatch[2];
|
|
519
|
+
const title = contentMatch[3];
|
|
520
|
+
if (this.lexer?.tokens?.links) {
|
|
521
|
+
this.lexer.tokens.links[identifier] = { href: url, title };
|
|
522
|
+
}
|
|
523
|
+
return {
|
|
524
|
+
type: "explicitDefinition",
|
|
525
|
+
raw,
|
|
526
|
+
identifier,
|
|
527
|
+
url,
|
|
528
|
+
title
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
return { type: "explicitDefinition", raw, identifier: "", url: "" };
|
|
532
|
+
}
|
|
533
|
+
return void 0;
|
|
534
|
+
},
|
|
535
|
+
renderer() {
|
|
536
|
+
return "";
|
|
537
|
+
}
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// src/extensions/marked-extensions/optimisticReferenceExtension.ts
|
|
542
|
+
function createOptimisticReferenceExtension() {
|
|
543
|
+
return {
|
|
544
|
+
name: "optimisticReference",
|
|
545
|
+
level: "inline",
|
|
546
|
+
start(src) {
|
|
547
|
+
return src.match(/!?\[/)?.index;
|
|
548
|
+
},
|
|
549
|
+
tokenizer(src) {
|
|
550
|
+
const rule = /^(!?)\[((?:\[[^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*)\](?:\s*\[((?:\[[^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*)\])?/;
|
|
551
|
+
const match = rule.exec(src);
|
|
552
|
+
if (match) {
|
|
553
|
+
const fullMatch = match[0];
|
|
554
|
+
if (src.length > fullMatch.length && src[fullMatch.length] === "(") {
|
|
555
|
+
return void 0;
|
|
556
|
+
}
|
|
557
|
+
if (src.length > fullMatch.length && src[fullMatch.length] === ":") {
|
|
558
|
+
return void 0;
|
|
559
|
+
}
|
|
560
|
+
const isImage = match[1] === "!";
|
|
561
|
+
const text = match[2];
|
|
562
|
+
const refRaw = match[3];
|
|
563
|
+
if (text.startsWith("^")) {
|
|
564
|
+
return void 0;
|
|
565
|
+
}
|
|
566
|
+
let identifier = "";
|
|
567
|
+
let referenceType = "shortcut";
|
|
568
|
+
if (refRaw !== void 0) {
|
|
569
|
+
if (refRaw === "") {
|
|
570
|
+
referenceType = "collapsed";
|
|
571
|
+
identifier = text;
|
|
572
|
+
} else {
|
|
573
|
+
referenceType = "full";
|
|
574
|
+
identifier = refRaw;
|
|
575
|
+
}
|
|
576
|
+
} else {
|
|
577
|
+
referenceType = "shortcut";
|
|
578
|
+
identifier = text;
|
|
579
|
+
if (text.match(/^[ xX]$/)) {
|
|
580
|
+
return void 0;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
return {
|
|
584
|
+
type: "optimisticReference",
|
|
585
|
+
raw: fullMatch,
|
|
586
|
+
isImage,
|
|
587
|
+
text,
|
|
588
|
+
identifier: identifier.toLowerCase(),
|
|
589
|
+
label: identifier,
|
|
590
|
+
referenceType
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
return void 0;
|
|
594
|
+
},
|
|
595
|
+
renderer() {
|
|
596
|
+
return "";
|
|
597
|
+
}
|
|
598
|
+
};
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
// src/extensions/marked-extensions/mathExtension.ts
|
|
602
|
+
function createBlockMathExtension() {
|
|
603
|
+
return {
|
|
604
|
+
name: "blockMath",
|
|
605
|
+
level: "block",
|
|
606
|
+
start(src) {
|
|
607
|
+
const match = src.match(/^ {0,3}\$\$/m);
|
|
608
|
+
return match?.index;
|
|
609
|
+
},
|
|
610
|
+
tokenizer(src) {
|
|
611
|
+
const rule = /^ {0,3}\$\$([\s\S]*?)\$\$ *(?:\n+|$)/;
|
|
612
|
+
const match = rule.exec(src);
|
|
613
|
+
if (match) {
|
|
614
|
+
return {
|
|
615
|
+
type: "blockMath",
|
|
616
|
+
raw: match[0],
|
|
617
|
+
text: match[1].trim()
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
return void 0;
|
|
621
|
+
},
|
|
622
|
+
renderer() {
|
|
623
|
+
return "";
|
|
624
|
+
}
|
|
625
|
+
};
|
|
626
|
+
}
|
|
627
|
+
function createInlineMathExtension() {
|
|
628
|
+
return {
|
|
629
|
+
name: "inlineMath",
|
|
630
|
+
level: "inline",
|
|
631
|
+
start(src) {
|
|
632
|
+
const index = src.indexOf("$");
|
|
633
|
+
if (index === -1) return void 0;
|
|
634
|
+
if (src[index + 1] === "$") return void 0;
|
|
635
|
+
return index;
|
|
636
|
+
},
|
|
637
|
+
tokenizer(src) {
|
|
638
|
+
const rule = /^\$(?!\$)((?:\\.|[^\\\n$])+?)\$(?!\d)/;
|
|
639
|
+
const match = rule.exec(src);
|
|
640
|
+
if (match) {
|
|
641
|
+
return {
|
|
642
|
+
type: "inlineMath",
|
|
643
|
+
raw: match[0],
|
|
644
|
+
text: match[1].trim()
|
|
645
|
+
};
|
|
646
|
+
}
|
|
647
|
+
return void 0;
|
|
648
|
+
},
|
|
649
|
+
renderer() {
|
|
650
|
+
return "";
|
|
651
|
+
}
|
|
652
|
+
};
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
// src/extensions/marked-extensions/footnoteDefinitionExtension.ts
|
|
656
|
+
function createFootnoteDefinitionExtension() {
|
|
657
|
+
return {
|
|
658
|
+
name: "footnoteDefinitionBlock",
|
|
659
|
+
level: "block",
|
|
660
|
+
start(src) {
|
|
661
|
+
const match = src.match(/^ {0,3}\[\^[^\]]+\]:/m);
|
|
662
|
+
return match?.index;
|
|
663
|
+
},
|
|
664
|
+
tokenizer(src) {
|
|
665
|
+
const firstLineRule = /^ {0,3}\[\^([a-zA-Z0-9_-]+)\]:\s*(.*)/;
|
|
666
|
+
const firstLineMatch = firstLineRule.exec(src);
|
|
667
|
+
if (!firstLineMatch) return void 0;
|
|
668
|
+
const identifier = firstLineMatch[1];
|
|
669
|
+
let content = firstLineMatch[2];
|
|
670
|
+
let raw = firstLineMatch[0];
|
|
671
|
+
const remaining = src.slice(raw.length);
|
|
672
|
+
const lines = remaining.split("\n");
|
|
673
|
+
let lineIndex = 0;
|
|
674
|
+
if (lines[0] === "" && remaining.startsWith("\n")) {
|
|
675
|
+
lineIndex = 1;
|
|
676
|
+
raw += "\n";
|
|
677
|
+
content += "\n";
|
|
678
|
+
}
|
|
679
|
+
while (lineIndex < lines.length) {
|
|
680
|
+
const line = lines[lineIndex];
|
|
681
|
+
if (line.trim() === "") {
|
|
682
|
+
let hasIndentedLineAfter = false;
|
|
683
|
+
for (let j = lineIndex + 1; j < lines.length; j++) {
|
|
684
|
+
const nextLine = lines[j];
|
|
685
|
+
if (nextLine.trim() === "") continue;
|
|
686
|
+
if (nextLine.match(/^( |\t)/)) {
|
|
687
|
+
hasIndentedLineAfter = true;
|
|
688
|
+
}
|
|
689
|
+
break;
|
|
690
|
+
}
|
|
691
|
+
if (hasIndentedLineAfter) {
|
|
692
|
+
raw += line + (lineIndex < lines.length - 1 ? "\n" : "");
|
|
693
|
+
content += "\n" + line;
|
|
694
|
+
lineIndex++;
|
|
695
|
+
continue;
|
|
696
|
+
} else {
|
|
697
|
+
break;
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
if (line.match(/^( |\t)/)) {
|
|
701
|
+
raw += line + (lineIndex < lines.length - 1 ? "\n" : "");
|
|
702
|
+
content += "\n" + line;
|
|
703
|
+
lineIndex++;
|
|
704
|
+
continue;
|
|
705
|
+
}
|
|
706
|
+
if (line.match(/^ {0,3}\[\^[^\]]+\]:/)) {
|
|
707
|
+
break;
|
|
708
|
+
}
|
|
709
|
+
break;
|
|
710
|
+
}
|
|
711
|
+
const trimmedContent = content.replace(/\n+$/, "");
|
|
712
|
+
return {
|
|
713
|
+
type: "footnoteDefinitionBlock",
|
|
714
|
+
raw,
|
|
715
|
+
identifier,
|
|
716
|
+
content: trimmedContent
|
|
717
|
+
};
|
|
718
|
+
},
|
|
719
|
+
renderer() {
|
|
720
|
+
return "";
|
|
721
|
+
}
|
|
722
|
+
};
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
// src/extensions/marked-extensions/inlineHtmlExtension.ts
|
|
726
|
+
var SELF_CLOSING_TAGS = /* @__PURE__ */ new Set([
|
|
727
|
+
"area",
|
|
728
|
+
"base",
|
|
729
|
+
"br",
|
|
730
|
+
"col",
|
|
731
|
+
"embed",
|
|
732
|
+
"hr",
|
|
733
|
+
"img",
|
|
734
|
+
"input",
|
|
735
|
+
"link",
|
|
736
|
+
"meta",
|
|
737
|
+
"param",
|
|
738
|
+
"source",
|
|
739
|
+
"track",
|
|
740
|
+
"wbr"
|
|
741
|
+
]);
|
|
742
|
+
function createInlineHtmlExtension() {
|
|
743
|
+
return {
|
|
744
|
+
name: "inlineHtml",
|
|
745
|
+
level: "inline",
|
|
746
|
+
start(src) {
|
|
747
|
+
const index = src.indexOf("<");
|
|
748
|
+
if (index === -1) return void 0;
|
|
749
|
+
const afterLt = src.slice(index + 1);
|
|
750
|
+
if (!/^[a-zA-Z\/]/.test(afterLt)) return void 0;
|
|
751
|
+
return index;
|
|
752
|
+
},
|
|
753
|
+
tokenizer(src) {
|
|
754
|
+
const completeTagMatch = matchCompleteHtmlElement(src);
|
|
755
|
+
if (completeTagMatch) {
|
|
756
|
+
return {
|
|
757
|
+
type: "inlineHtml",
|
|
758
|
+
raw: completeTagMatch,
|
|
759
|
+
text: completeTagMatch
|
|
760
|
+
};
|
|
761
|
+
}
|
|
762
|
+
const selfClosingMatch = matchSelfClosingTag(src);
|
|
763
|
+
if (selfClosingMatch) {
|
|
764
|
+
return {
|
|
765
|
+
type: "inlineHtml",
|
|
766
|
+
raw: selfClosingMatch,
|
|
767
|
+
text: selfClosingMatch
|
|
768
|
+
};
|
|
769
|
+
}
|
|
770
|
+
return void 0;
|
|
771
|
+
},
|
|
772
|
+
renderer() {
|
|
773
|
+
return "";
|
|
774
|
+
}
|
|
775
|
+
};
|
|
776
|
+
}
|
|
777
|
+
function matchCompleteHtmlElement(src) {
|
|
778
|
+
const openTagMatch = /^<([a-zA-Z][a-zA-Z0-9]*)((?:\s+[a-zA-Z_:][a-zA-Z0-9_.:-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*)\s*>/.exec(src);
|
|
779
|
+
if (!openTagMatch) return null;
|
|
780
|
+
const tagName = openTagMatch[1].toLowerCase();
|
|
781
|
+
const openTag = openTagMatch[0];
|
|
782
|
+
if (SELF_CLOSING_TAGS.has(tagName)) {
|
|
783
|
+
return openTag;
|
|
784
|
+
}
|
|
785
|
+
const afterOpenTag = src.slice(openTag.length);
|
|
786
|
+
let depth = 1;
|
|
787
|
+
let pos = 0;
|
|
788
|
+
const openPattern = new RegExp(`<${tagName}(?:\\s[^>]*)?>`, "gi");
|
|
789
|
+
const closePattern = new RegExp(`</${tagName}>`, "gi");
|
|
790
|
+
while (depth > 0 && pos < afterOpenTag.length) {
|
|
791
|
+
openPattern.lastIndex = pos;
|
|
792
|
+
closePattern.lastIndex = pos;
|
|
793
|
+
const nextOpen = openPattern.exec(afterOpenTag);
|
|
794
|
+
const nextClose = closePattern.exec(afterOpenTag);
|
|
795
|
+
if (!nextClose) {
|
|
796
|
+
return null;
|
|
797
|
+
}
|
|
798
|
+
if (nextOpen && nextOpen.index < nextClose.index) {
|
|
799
|
+
depth++;
|
|
800
|
+
pos = nextOpen.index + nextOpen[0].length;
|
|
801
|
+
} else {
|
|
802
|
+
depth--;
|
|
803
|
+
pos = nextClose.index + nextClose[0].length;
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
if (depth === 0) {
|
|
807
|
+
return src.slice(0, openTag.length + pos);
|
|
808
|
+
}
|
|
809
|
+
return null;
|
|
810
|
+
}
|
|
811
|
+
function matchSelfClosingTag(src) {
|
|
812
|
+
const explicitSelfClosing = /^<([a-zA-Z][a-zA-Z0-9]*)((?:\s+[a-zA-Z_:][a-zA-Z0-9_.:-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*)\s*\/>/.exec(src);
|
|
813
|
+
if (explicitSelfClosing) {
|
|
814
|
+
return explicitSelfClosing[0];
|
|
815
|
+
}
|
|
816
|
+
const implicitSelfClosing = /^<([a-zA-Z][a-zA-Z0-9]*)((?:\s+[a-zA-Z_:][a-zA-Z0-9_.:-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*)\s*>/.exec(src);
|
|
817
|
+
if (implicitSelfClosing && SELF_CLOSING_TAGS.has(implicitSelfClosing[1].toLowerCase())) {
|
|
818
|
+
return implicitSelfClosing[0];
|
|
819
|
+
}
|
|
820
|
+
return null;
|
|
821
|
+
}
|
|
822
|
+
function transformBlockMath(token) {
|
|
823
|
+
return {
|
|
824
|
+
type: "math",
|
|
825
|
+
value: token.text,
|
|
826
|
+
meta: null
|
|
827
|
+
};
|
|
828
|
+
}
|
|
829
|
+
function transformFootnoteDefinitionBlock(token, ctx) {
|
|
830
|
+
const children = ctx.parseFootnoteContent(token.content);
|
|
831
|
+
return {
|
|
832
|
+
type: "footnoteDefinition",
|
|
833
|
+
identifier: token.identifier,
|
|
834
|
+
label: token.identifier,
|
|
835
|
+
children
|
|
836
|
+
};
|
|
837
|
+
}
|
|
838
|
+
function transformExplicitDefinition(token) {
|
|
839
|
+
if (!token.identifier || !token.url) return null;
|
|
840
|
+
return {
|
|
841
|
+
type: "definition",
|
|
842
|
+
identifier: token.identifier,
|
|
843
|
+
label: token.identifier,
|
|
844
|
+
url: token.url,
|
|
845
|
+
title: token.title ?? null
|
|
846
|
+
};
|
|
847
|
+
}
|
|
848
|
+
function transformDef(token) {
|
|
849
|
+
if (token.tag.startsWith("^")) {
|
|
850
|
+
const footnoteId = token.tag.slice(1);
|
|
851
|
+
return {
|
|
852
|
+
type: "footnoteDefinition",
|
|
853
|
+
identifier: footnoteId,
|
|
854
|
+
label: footnoteId,
|
|
855
|
+
children: [
|
|
856
|
+
{
|
|
857
|
+
type: "paragraph",
|
|
858
|
+
children: [{ type: "text", value: token.href }]
|
|
859
|
+
}
|
|
860
|
+
]
|
|
861
|
+
};
|
|
862
|
+
}
|
|
863
|
+
return {
|
|
864
|
+
type: "definition",
|
|
865
|
+
identifier: token.tag,
|
|
866
|
+
label: token.tag,
|
|
867
|
+
url: token.href,
|
|
868
|
+
title: token.title ?? null
|
|
869
|
+
};
|
|
870
|
+
}
|
|
871
|
+
function transformContainer(token, ctx) {
|
|
872
|
+
const attributes = {};
|
|
873
|
+
const attrRegex = /([a-zA-Z0-9_-]+)=?("([^"]*)"|'([^']*)'|([^ ]*))?/g;
|
|
874
|
+
let match;
|
|
875
|
+
while ((match = attrRegex.exec(token.attrs)) !== null) {
|
|
876
|
+
attributes[match[1]] = match[3] || match[4] || match[5] || "";
|
|
877
|
+
}
|
|
878
|
+
const children = ctx.transformTokensWithPosition(token.tokens);
|
|
879
|
+
return {
|
|
880
|
+
type: "containerDirective",
|
|
881
|
+
name: token.name,
|
|
882
|
+
attributes,
|
|
883
|
+
children
|
|
884
|
+
};
|
|
885
|
+
}
|
|
886
|
+
function transformFootnoteDefToken(token, ctx) {
|
|
887
|
+
return {
|
|
888
|
+
type: "footnoteDefinition",
|
|
889
|
+
identifier: token.identifier,
|
|
890
|
+
label: token.identifier,
|
|
891
|
+
children: [
|
|
892
|
+
{
|
|
893
|
+
type: "paragraph",
|
|
894
|
+
children: ctx.transformInline(token.tokens)
|
|
895
|
+
}
|
|
896
|
+
]
|
|
897
|
+
};
|
|
898
|
+
}
|
|
899
|
+
function transformHeading(token, ctx) {
|
|
900
|
+
return {
|
|
901
|
+
type: "heading",
|
|
902
|
+
depth: token.depth,
|
|
903
|
+
children: ctx.transformInline(token.tokens)
|
|
904
|
+
};
|
|
905
|
+
}
|
|
906
|
+
function transformParagraph(token, ctx) {
|
|
907
|
+
return {
|
|
908
|
+
type: "paragraph",
|
|
909
|
+
children: ctx.transformInline(token.tokens)
|
|
910
|
+
};
|
|
911
|
+
}
|
|
912
|
+
function transformCode(token) {
|
|
913
|
+
return {
|
|
914
|
+
type: "code",
|
|
915
|
+
lang: token.lang || null,
|
|
916
|
+
meta: null,
|
|
917
|
+
// 对齐 micromark 输出
|
|
918
|
+
value: token.text
|
|
919
|
+
};
|
|
920
|
+
}
|
|
921
|
+
function transformBlockquote(token, ctx) {
|
|
922
|
+
const children = ctx.transformTokens(token.tokens);
|
|
923
|
+
return {
|
|
924
|
+
type: "blockquote",
|
|
925
|
+
children
|
|
926
|
+
};
|
|
927
|
+
}
|
|
928
|
+
function transformList(token, ctx) {
|
|
929
|
+
const children = token.items.map((item) => ({
|
|
930
|
+
type: "listItem",
|
|
931
|
+
spread: item.loose,
|
|
932
|
+
checked: item.checked ?? null,
|
|
933
|
+
// 对齐 micromark 输出(GFM 任务列表)
|
|
934
|
+
children: ctx.transformTokens(item.tokens)
|
|
935
|
+
}));
|
|
936
|
+
return {
|
|
937
|
+
type: "list",
|
|
938
|
+
ordered: token.ordered,
|
|
939
|
+
start: token.ordered ? token.start || 1 : null,
|
|
940
|
+
// 对齐 micromark:有序列表有 start,无序列表为 null
|
|
941
|
+
spread: token.loose,
|
|
942
|
+
children
|
|
943
|
+
};
|
|
944
|
+
}
|
|
945
|
+
function transformTable(token, ctx) {
|
|
946
|
+
const headerCells = token.header.map((cell) => ({
|
|
947
|
+
type: "tableCell",
|
|
948
|
+
children: ctx.transformInline(cell.tokens)
|
|
949
|
+
}));
|
|
950
|
+
const bodyRows = token.rows.map((row) => ({
|
|
951
|
+
type: "tableRow",
|
|
952
|
+
children: row.map((cell) => ({
|
|
953
|
+
type: "tableCell",
|
|
954
|
+
children: ctx.transformInline(cell.tokens)
|
|
955
|
+
}))
|
|
956
|
+
}));
|
|
957
|
+
return {
|
|
958
|
+
type: "table",
|
|
959
|
+
align: token.align,
|
|
960
|
+
children: [{ type: "tableRow", children: headerCells }, ...bodyRows]
|
|
961
|
+
};
|
|
962
|
+
}
|
|
963
|
+
function transformHr() {
|
|
964
|
+
return { type: "thematicBreak" };
|
|
965
|
+
}
|
|
966
|
+
function transformHtml(token) {
|
|
967
|
+
return {
|
|
968
|
+
type: "html",
|
|
969
|
+
value: token.text
|
|
970
|
+
};
|
|
971
|
+
}
|
|
972
|
+
function transformTextBlock(token, ctx) {
|
|
973
|
+
if (token.tokens) {
|
|
974
|
+
return {
|
|
975
|
+
type: "paragraph",
|
|
976
|
+
children: ctx.transformInline(token.tokens)
|
|
977
|
+
};
|
|
978
|
+
}
|
|
979
|
+
return {
|
|
980
|
+
type: "paragraph",
|
|
981
|
+
children: [{ type: "text", value: token.text }]
|
|
982
|
+
};
|
|
983
|
+
}
|
|
984
|
+
function transformInlineMath(token) {
|
|
985
|
+
return {
|
|
986
|
+
type: "inlineMath",
|
|
987
|
+
value: token.text
|
|
988
|
+
};
|
|
989
|
+
}
|
|
990
|
+
function transformOptimisticReference(token, ctx) {
|
|
991
|
+
if (token.isImage) {
|
|
992
|
+
return {
|
|
993
|
+
type: "imageReference",
|
|
994
|
+
identifier: token.identifier,
|
|
995
|
+
label: token.label,
|
|
996
|
+
referenceType: token.referenceType,
|
|
997
|
+
alt: token.text
|
|
998
|
+
};
|
|
999
|
+
}
|
|
1000
|
+
const labelChildren = ctx.transformInline(new Lexer().inlineTokens(token.text));
|
|
1001
|
+
return {
|
|
1002
|
+
type: "linkReference",
|
|
1003
|
+
identifier: token.identifier,
|
|
1004
|
+
label: token.label,
|
|
1005
|
+
referenceType: token.referenceType,
|
|
1006
|
+
children: labelChildren.length ? labelChildren : [{ type: "text", value: token.text }]
|
|
1007
|
+
};
|
|
1008
|
+
}
|
|
1009
|
+
function transformLink(token, ctx) {
|
|
1010
|
+
if (token.text.startsWith("^") && token.text.length > 1) {
|
|
1011
|
+
const footnoteId = token.text.slice(1);
|
|
1012
|
+
return {
|
|
1013
|
+
type: "footnoteReference",
|
|
1014
|
+
identifier: footnoteId,
|
|
1015
|
+
label: footnoteId
|
|
1016
|
+
};
|
|
1017
|
+
}
|
|
1018
|
+
return {
|
|
1019
|
+
type: "link",
|
|
1020
|
+
url: token.href,
|
|
1021
|
+
title: token.title || null,
|
|
1022
|
+
// 对齐 micromark 输出
|
|
1023
|
+
children: ctx.transformInline(token.tokens)
|
|
1024
|
+
};
|
|
1025
|
+
}
|
|
1026
|
+
function transformImage(token) {
|
|
1027
|
+
return {
|
|
1028
|
+
type: "image",
|
|
1029
|
+
url: token.href,
|
|
1030
|
+
title: token.title || null,
|
|
1031
|
+
// 对齐 micromark 输出
|
|
1032
|
+
alt: token.text
|
|
1033
|
+
};
|
|
1034
|
+
}
|
|
1035
|
+
function transformText(token) {
|
|
1036
|
+
const results = [];
|
|
1037
|
+
const text = token.text;
|
|
1038
|
+
const footnoteRegex = /\[\^([a-zA-Z0-9_-]+)\]/g;
|
|
1039
|
+
let lastIndex = 0;
|
|
1040
|
+
let match;
|
|
1041
|
+
while ((match = footnoteRegex.exec(text)) !== null) {
|
|
1042
|
+
if (match.index > lastIndex) {
|
|
1043
|
+
results.push({
|
|
1044
|
+
type: "text",
|
|
1045
|
+
value: text.substring(lastIndex, match.index)
|
|
1046
|
+
});
|
|
1047
|
+
}
|
|
1048
|
+
results.push({
|
|
1049
|
+
type: "footnoteReference",
|
|
1050
|
+
identifier: match[1],
|
|
1051
|
+
label: match[1]
|
|
1052
|
+
});
|
|
1053
|
+
lastIndex = match.index + match[0].length;
|
|
1054
|
+
}
|
|
1055
|
+
if (lastIndex < text.length) {
|
|
1056
|
+
results.push({
|
|
1057
|
+
type: "text",
|
|
1058
|
+
value: text.substring(lastIndex)
|
|
1059
|
+
});
|
|
1060
|
+
}
|
|
1061
|
+
return results;
|
|
1062
|
+
}
|
|
1063
|
+
function transformStrong(token, ctx) {
|
|
1064
|
+
return {
|
|
1065
|
+
type: "strong",
|
|
1066
|
+
children: ctx.transformInline(token.tokens)
|
|
1067
|
+
};
|
|
1068
|
+
}
|
|
1069
|
+
function transformEmphasis(token, ctx) {
|
|
1070
|
+
return {
|
|
1071
|
+
type: "emphasis",
|
|
1072
|
+
children: ctx.transformInline(token.tokens)
|
|
1073
|
+
};
|
|
1074
|
+
}
|
|
1075
|
+
function transformCodespan(token) {
|
|
1076
|
+
return {
|
|
1077
|
+
type: "inlineCode",
|
|
1078
|
+
value: token.text
|
|
1079
|
+
};
|
|
1080
|
+
}
|
|
1081
|
+
function transformBreak() {
|
|
1082
|
+
return { type: "break" };
|
|
1083
|
+
}
|
|
1084
|
+
function transformDelete(token, ctx) {
|
|
1085
|
+
return {
|
|
1086
|
+
type: "delete",
|
|
1087
|
+
children: ctx.transformInline(token.tokens)
|
|
1088
|
+
};
|
|
1089
|
+
}
|
|
1090
|
+
function transformInlineHtml(token) {
|
|
1091
|
+
const parsed = parseHtmlFragment(token.text);
|
|
1092
|
+
if (parsed.length > 0) {
|
|
1093
|
+
return parsed;
|
|
1094
|
+
}
|
|
1095
|
+
return { type: "text", value: token.text };
|
|
1096
|
+
}
|
|
1097
|
+
function isTokenType(token, type) {
|
|
1098
|
+
return token.type === type;
|
|
1099
|
+
}
|
|
1100
|
+
var builtinBlockTransformers = {
|
|
1101
|
+
blockMath: (token) => {
|
|
1102
|
+
if (isTokenType(token, "blockMath")) return transformBlockMath(token);
|
|
1103
|
+
return null;
|
|
1104
|
+
},
|
|
1105
|
+
footnoteDefinitionBlock: (token, ctx) => {
|
|
1106
|
+
if (isTokenType(token, "footnoteDefinitionBlock"))
|
|
1107
|
+
return transformFootnoteDefinitionBlock(token, ctx);
|
|
1108
|
+
return null;
|
|
1109
|
+
},
|
|
1110
|
+
explicitDefinition: (token) => {
|
|
1111
|
+
if (isTokenType(token, "explicitDefinition"))
|
|
1112
|
+
return transformExplicitDefinition(token);
|
|
1113
|
+
return null;
|
|
1114
|
+
},
|
|
1115
|
+
def: (token) => {
|
|
1116
|
+
if (isTokenType(token, "def")) return transformDef(token);
|
|
1117
|
+
return null;
|
|
1118
|
+
},
|
|
1119
|
+
container: (token, ctx) => {
|
|
1120
|
+
if (isTokenType(token, "container")) return transformContainer(token, ctx);
|
|
1121
|
+
return null;
|
|
1122
|
+
},
|
|
1123
|
+
footnoteDefinition: (token, ctx) => {
|
|
1124
|
+
if (isTokenType(token, "footnoteDefinition"))
|
|
1125
|
+
return transformFootnoteDefToken(token, ctx);
|
|
1126
|
+
return null;
|
|
1127
|
+
},
|
|
1128
|
+
heading: (token, ctx) => {
|
|
1129
|
+
if (isTokenType(token, "heading")) return transformHeading(token, ctx);
|
|
1130
|
+
return null;
|
|
1131
|
+
},
|
|
1132
|
+
paragraph: (token, ctx) => {
|
|
1133
|
+
if (isTokenType(token, "paragraph")) return transformParagraph(token, ctx);
|
|
1134
|
+
return null;
|
|
1135
|
+
},
|
|
1136
|
+
code: (token) => {
|
|
1137
|
+
if (isTokenType(token, "code")) return transformCode(token);
|
|
1138
|
+
return null;
|
|
1139
|
+
},
|
|
1140
|
+
blockquote: (token, ctx) => {
|
|
1141
|
+
if (isTokenType(token, "blockquote")) return transformBlockquote(token, ctx);
|
|
1142
|
+
return null;
|
|
1143
|
+
},
|
|
1144
|
+
list: (token, ctx) => {
|
|
1145
|
+
if (isTokenType(token, "list")) return transformList(token, ctx);
|
|
1146
|
+
return null;
|
|
1147
|
+
},
|
|
1148
|
+
table: (token, ctx) => {
|
|
1149
|
+
if (isTokenType(token, "table")) return transformTable(token, ctx);
|
|
1150
|
+
return null;
|
|
1151
|
+
},
|
|
1152
|
+
hr: () => transformHr(),
|
|
1153
|
+
html: (token) => {
|
|
1154
|
+
if (isTokenType(token, "html")) return transformHtml(token);
|
|
1155
|
+
return null;
|
|
1156
|
+
},
|
|
1157
|
+
space: () => null,
|
|
1158
|
+
text: (token, ctx) => {
|
|
1159
|
+
if (isTokenType(token, "text")) return transformTextBlock(token, ctx);
|
|
1160
|
+
return null;
|
|
1161
|
+
}
|
|
1162
|
+
};
|
|
1163
|
+
var builtinInlineTransformers = {
|
|
1164
|
+
inlineMath: (token) => {
|
|
1165
|
+
if (isTokenType(token, "inlineMath")) return transformInlineMath(token);
|
|
1166
|
+
return null;
|
|
1167
|
+
},
|
|
1168
|
+
optimisticReference: (token, ctx) => {
|
|
1169
|
+
if (isTokenType(token, "optimisticReference"))
|
|
1170
|
+
return transformOptimisticReference(token, ctx);
|
|
1171
|
+
return null;
|
|
1172
|
+
},
|
|
1173
|
+
link: (token, ctx) => {
|
|
1174
|
+
if (isTokenType(token, "link")) return transformLink(token, ctx);
|
|
1175
|
+
return null;
|
|
1176
|
+
},
|
|
1177
|
+
image: (token) => {
|
|
1178
|
+
if (isTokenType(token, "image")) return transformImage(token);
|
|
1179
|
+
return null;
|
|
1180
|
+
},
|
|
1181
|
+
text: (token) => {
|
|
1182
|
+
if (isTokenType(token, "text")) return transformText(token);
|
|
1183
|
+
return null;
|
|
1184
|
+
},
|
|
1185
|
+
escape: (token) => {
|
|
1186
|
+
if (isTokenType(token, "escape")) return transformText(token);
|
|
1187
|
+
return null;
|
|
1188
|
+
},
|
|
1189
|
+
strong: (token, ctx) => {
|
|
1190
|
+
if (isTokenType(token, "strong")) return transformStrong(token, ctx);
|
|
1191
|
+
return null;
|
|
1192
|
+
},
|
|
1193
|
+
em: (token, ctx) => {
|
|
1194
|
+
if (isTokenType(token, "em")) return transformEmphasis(token, ctx);
|
|
1195
|
+
return null;
|
|
1196
|
+
},
|
|
1197
|
+
codespan: (token) => {
|
|
1198
|
+
if (isTokenType(token, "codespan")) return transformCodespan(token);
|
|
1199
|
+
return null;
|
|
1200
|
+
},
|
|
1201
|
+
br: () => transformBreak(),
|
|
1202
|
+
del: (token, ctx) => {
|
|
1203
|
+
if (isTokenType(token, "del")) return transformDelete(token, ctx);
|
|
1204
|
+
return null;
|
|
1205
|
+
},
|
|
1206
|
+
inlineHtml: (token) => {
|
|
1207
|
+
if (isTokenType(token, "inlineHtml")) return transformInlineHtml(token);
|
|
1208
|
+
return null;
|
|
1209
|
+
}
|
|
1210
|
+
};
|
|
1211
|
+
function transformBlockToken(token, ctx) {
|
|
1212
|
+
const tokenType = token.type;
|
|
1213
|
+
if (ctx.customBlockTransformers?.[tokenType]) {
|
|
1214
|
+
const result = ctx.customBlockTransformers[tokenType](token, ctx);
|
|
1215
|
+
if (result !== void 0) return result;
|
|
1216
|
+
}
|
|
1217
|
+
if (builtinBlockTransformers[tokenType]) {
|
|
1218
|
+
const result = builtinBlockTransformers[tokenType](token, ctx);
|
|
1219
|
+
if (result !== void 0) return result;
|
|
1220
|
+
}
|
|
1221
|
+
if ("text" in token && typeof token.text === "string") {
|
|
1222
|
+
const paragraph = {
|
|
1223
|
+
type: "paragraph",
|
|
1224
|
+
children: [{ type: "text", value: token.text }]
|
|
1225
|
+
};
|
|
1226
|
+
return paragraph;
|
|
1227
|
+
}
|
|
1228
|
+
return null;
|
|
1229
|
+
}
|
|
1230
|
+
function transformInlineToken(token, ctx) {
|
|
1231
|
+
const tokenType = token.type;
|
|
1232
|
+
if (ctx.customInlineTransformers?.[tokenType]) {
|
|
1233
|
+
const result = ctx.customInlineTransformers[tokenType](token, ctx);
|
|
1234
|
+
if (result !== void 0) return result;
|
|
1235
|
+
}
|
|
1236
|
+
if (builtinInlineTransformers[tokenType]) {
|
|
1237
|
+
const result = builtinInlineTransformers[tokenType](token, ctx);
|
|
1238
|
+
if (result !== void 0) return result;
|
|
1239
|
+
}
|
|
1240
|
+
if ("text" in token && typeof token.text === "string") {
|
|
1241
|
+
const text = { type: "text", value: token.text };
|
|
1242
|
+
return text;
|
|
1243
|
+
}
|
|
1244
|
+
return null;
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
// src/parser/ast/MarkedAstBuildter.ts
|
|
1248
|
+
var MarkedAstBuilder = class {
|
|
1249
|
+
constructor(options = {}) {
|
|
1250
|
+
this.options = options;
|
|
1251
|
+
this.containerConfig = typeof options.containers === "object" ? options.containers : options.containers === true ? {} : void 0;
|
|
1252
|
+
this.htmlTreeOptions = typeof options.htmlTree === "object" ? options.htmlTree : options.htmlTree === true ? {} : void 0;
|
|
1253
|
+
if (options.plugins) {
|
|
1254
|
+
this.userExtensions.push(...extractMarkedExtensions(options.plugins));
|
|
1255
|
+
}
|
|
1256
|
+
if (options.markedExtensions) {
|
|
1257
|
+
this.userExtensions.push(...options.markedExtensions);
|
|
1258
|
+
}
|
|
1259
|
+
this.transformContext = {
|
|
1260
|
+
transformTokens: this.transformTokens.bind(this),
|
|
1261
|
+
transformTokensWithPosition: this.transformTokensWithPosition.bind(this),
|
|
1262
|
+
transformInline: this.transformInline.bind(this),
|
|
1263
|
+
parseFootnoteContent: this.parseFootnoteContent.bind(this)
|
|
1264
|
+
};
|
|
1265
|
+
}
|
|
1266
|
+
containerConfig;
|
|
1267
|
+
htmlTreeOptions;
|
|
1268
|
+
globalLinks = {};
|
|
1269
|
+
/** 用户传入的 marked 扩展 */
|
|
1270
|
+
userExtensions = [];
|
|
1271
|
+
/** 转换上下文(用于递归转换) */
|
|
1272
|
+
transformContext;
|
|
1273
|
+
parse(text) {
|
|
1274
|
+
const normalizedText = text.replace(/[\u00A0\u200b\u202f]/g, " ");
|
|
1275
|
+
const optimisticRefExt = createOptimisticReferenceExtension();
|
|
1276
|
+
const explicitDefExt = createExplicitDefinitionExtension();
|
|
1277
|
+
const footnoteDefExt = createFootnoteDefinitionExtension();
|
|
1278
|
+
const userBlockExts = [];
|
|
1279
|
+
const userBlockStartExts = [];
|
|
1280
|
+
const userInlineExts = [];
|
|
1281
|
+
const userInlineStartExts = [];
|
|
1282
|
+
for (const ext of this.userExtensions) {
|
|
1283
|
+
if (ext.level === "block") {
|
|
1284
|
+
if (ext.tokenizer) userBlockExts.push(ext.tokenizer);
|
|
1285
|
+
if (ext.start) userBlockStartExts.push(ext.start);
|
|
1286
|
+
} else if (ext.level === "inline") {
|
|
1287
|
+
if (ext.tokenizer) userInlineExts.push(ext.tokenizer);
|
|
1288
|
+
if (ext.start) userInlineStartExts.push(ext.start);
|
|
1289
|
+
}
|
|
1290
|
+
}
|
|
1291
|
+
const blockExts = [
|
|
1292
|
+
footnoteDefExt.tokenizer,
|
|
1293
|
+
explicitDefExt.tokenizer,
|
|
1294
|
+
...userBlockExts
|
|
1295
|
+
];
|
|
1296
|
+
const blockStartExts = [
|
|
1297
|
+
footnoteDefExt.start,
|
|
1298
|
+
explicitDefExt.start,
|
|
1299
|
+
...userBlockStartExts
|
|
1300
|
+
];
|
|
1301
|
+
const inlineExts = [optimisticRefExt.tokenizer, ...userInlineExts];
|
|
1302
|
+
const inlineStartExts = [optimisticRefExt.start, ...userInlineStartExts];
|
|
1303
|
+
if (this.options.math) {
|
|
1304
|
+
const blockMathExt = createBlockMathExtension();
|
|
1305
|
+
const inlineMathExt = createInlineMathExtension();
|
|
1306
|
+
blockExts.unshift(blockMathExt.tokenizer);
|
|
1307
|
+
blockStartExts.unshift(blockMathExt.start);
|
|
1308
|
+
inlineExts.unshift(inlineMathExt.tokenizer);
|
|
1309
|
+
inlineStartExts.unshift(inlineMathExt.start);
|
|
1310
|
+
}
|
|
1311
|
+
if (this.htmlTreeOptions) {
|
|
1312
|
+
const inlineHtmlExt = createInlineHtmlExtension();
|
|
1313
|
+
inlineExts.unshift(inlineHtmlExt.tokenizer);
|
|
1314
|
+
inlineStartExts.unshift(inlineHtmlExt.start);
|
|
1315
|
+
}
|
|
1316
|
+
const lexerOptions = {
|
|
1317
|
+
gfm: true,
|
|
1318
|
+
breaks: false,
|
|
1319
|
+
// 关闭软换行转 break,与 Micromark 保持一致
|
|
1320
|
+
...this.options,
|
|
1321
|
+
extensions: {
|
|
1322
|
+
inline: inlineExts,
|
|
1323
|
+
startInline: inlineStartExts,
|
|
1324
|
+
block: blockExts,
|
|
1325
|
+
startBlock: blockStartExts
|
|
1326
|
+
}
|
|
1327
|
+
};
|
|
1328
|
+
const lexerInstance = new Lexer(lexerOptions);
|
|
1329
|
+
if (lexerInstance.tokens && lexerInstance.tokens.links) {
|
|
1330
|
+
Object.assign(lexerInstance.tokens.links, this.globalLinks);
|
|
1331
|
+
}
|
|
1332
|
+
let tokens = lexerInstance.lex(normalizedText);
|
|
1333
|
+
if (lexerInstance.tokens && lexerInstance.tokens.links) {
|
|
1334
|
+
Object.assign(this.globalLinks, lexerInstance.tokens.links);
|
|
1335
|
+
}
|
|
1336
|
+
tokens = this.preprocessTokens(tokens);
|
|
1337
|
+
let children = this.transformTokensWithPosition(tokens);
|
|
1338
|
+
if (this.htmlTreeOptions) {
|
|
1339
|
+
children = this.processHtmlNodes(children);
|
|
1340
|
+
}
|
|
1341
|
+
return {
|
|
1342
|
+
type: "root",
|
|
1343
|
+
children
|
|
1344
|
+
};
|
|
1345
|
+
}
|
|
1346
|
+
/**
|
|
1347
|
+
* 预处理 tokens
|
|
1348
|
+
*
|
|
1349
|
+
* 处理容器指令和遗留的脚注定义(从 paragraph 中提取)
|
|
1350
|
+
*/
|
|
1351
|
+
preprocessTokens(tokens) {
|
|
1352
|
+
const result = [];
|
|
1353
|
+
let i = 0;
|
|
1354
|
+
while (i < tokens.length) {
|
|
1355
|
+
const token = tokens[i];
|
|
1356
|
+
if (token.type === "paragraph") {
|
|
1357
|
+
const text = token.text;
|
|
1358
|
+
const footnoteMatch = text.match(/^\[\^([a-zA-Z0-9_-]+)\]:\s+([\s\S]*)$/);
|
|
1359
|
+
if (footnoteMatch) {
|
|
1360
|
+
const defToken = {
|
|
1361
|
+
type: "footnoteDefinition",
|
|
1362
|
+
identifier: footnoteMatch[1],
|
|
1363
|
+
text: footnoteMatch[2],
|
|
1364
|
+
tokens: new Lexer().inlineTokens(footnoteMatch[2]),
|
|
1365
|
+
raw: token.raw
|
|
1366
|
+
};
|
|
1367
|
+
result.push(defToken);
|
|
1368
|
+
i++;
|
|
1369
|
+
continue;
|
|
1370
|
+
}
|
|
1371
|
+
const containerStartMatch = text.match(/^:::(\s*)([a-zA-Z0-9_-]+)(.*?)(\n|$)/);
|
|
1372
|
+
if (containerStartMatch) {
|
|
1373
|
+
const name = containerStartMatch[2];
|
|
1374
|
+
const attrs = containerStartMatch[3].trim();
|
|
1375
|
+
let rawAccumulator = "";
|
|
1376
|
+
let j = i;
|
|
1377
|
+
let depth = 0;
|
|
1378
|
+
let foundEnd = false;
|
|
1379
|
+
let contentRaw = "";
|
|
1380
|
+
while (j < tokens.length) {
|
|
1381
|
+
const currentToken = tokens[j];
|
|
1382
|
+
rawAccumulator += currentToken.raw;
|
|
1383
|
+
const lines = rawAccumulator.split("\n");
|
|
1384
|
+
depth = 0;
|
|
1385
|
+
let startLineIndex = -1;
|
|
1386
|
+
let endLineIndex = -1;
|
|
1387
|
+
for (let k = 0; k < lines.length; k++) {
|
|
1388
|
+
const line = lines[k];
|
|
1389
|
+
if (line.match(/^:::(\s*)([a-zA-Z0-9_-]+)/)) {
|
|
1390
|
+
if (depth === 0 && startLineIndex === -1) startLineIndex = k;
|
|
1391
|
+
depth++;
|
|
1392
|
+
} else if (line.trim() === ":::") {
|
|
1393
|
+
depth--;
|
|
1394
|
+
if (depth === 0) {
|
|
1395
|
+
endLineIndex = k;
|
|
1396
|
+
foundEnd = true;
|
|
1397
|
+
break;
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
if (foundEnd) {
|
|
1402
|
+
const contentLines = lines.slice(startLineIndex + 1, endLineIndex);
|
|
1403
|
+
contentRaw = contentLines.join("\n");
|
|
1404
|
+
const remainingLines = lines.slice(endLineIndex + 1);
|
|
1405
|
+
const remainingText = remainingLines.join("\n");
|
|
1406
|
+
const containerToken = {
|
|
1407
|
+
type: "container",
|
|
1408
|
+
name,
|
|
1409
|
+
attrs,
|
|
1410
|
+
tokens: this.preprocessTokens(lexer(contentRaw)),
|
|
1411
|
+
raw: rawAccumulator
|
|
1412
|
+
};
|
|
1413
|
+
result.push(containerToken);
|
|
1414
|
+
if (remainingText.trim()) {
|
|
1415
|
+
const remainingTokens = this.preprocessTokens(lexer(remainingText));
|
|
1416
|
+
result.push(...remainingTokens);
|
|
1417
|
+
}
|
|
1418
|
+
i = j + 1;
|
|
1419
|
+
break;
|
|
1420
|
+
}
|
|
1421
|
+
j++;
|
|
1422
|
+
}
|
|
1423
|
+
if (foundEnd) continue;
|
|
1424
|
+
}
|
|
1425
|
+
}
|
|
1426
|
+
result.push(token);
|
|
1427
|
+
i++;
|
|
1428
|
+
}
|
|
1429
|
+
return result;
|
|
1430
|
+
}
|
|
1431
|
+
/**
|
|
1432
|
+
* 转换 tokens 为 MDAST 节点(带位置信息)
|
|
1433
|
+
*/
|
|
1434
|
+
transformTokensWithPosition(tokens) {
|
|
1435
|
+
if (!tokens) return [];
|
|
1436
|
+
const results = [];
|
|
1437
|
+
let currentOffset = 0;
|
|
1438
|
+
for (const token of tokens) {
|
|
1439
|
+
const rawLength = token.raw?.length ?? 0;
|
|
1440
|
+
const node = transformBlockToken(token, this.transformContext);
|
|
1441
|
+
if (node) {
|
|
1442
|
+
node.position = {
|
|
1443
|
+
start: { line: 0, column: 0, offset: currentOffset },
|
|
1444
|
+
end: { line: 0, column: 0, offset: currentOffset + rawLength }
|
|
1445
|
+
};
|
|
1446
|
+
results.push(node);
|
|
1447
|
+
}
|
|
1448
|
+
currentOffset += rawLength;
|
|
1449
|
+
}
|
|
1450
|
+
return results;
|
|
1451
|
+
}
|
|
1452
|
+
/**
|
|
1453
|
+
* 转换 tokens 为 MDAST 节点(不带位置信息)
|
|
1454
|
+
*/
|
|
1455
|
+
transformTokens(tokens) {
|
|
1456
|
+
if (!tokens) return [];
|
|
1457
|
+
return tokens.map((t) => transformBlockToken(t, this.transformContext)).filter(Boolean);
|
|
1458
|
+
}
|
|
1459
|
+
/**
|
|
1460
|
+
* 转换行内 tokens
|
|
1461
|
+
*/
|
|
1462
|
+
transformInline(tokens) {
|
|
1463
|
+
if (!tokens) return [];
|
|
1464
|
+
const results = [];
|
|
1465
|
+
for (const token of tokens) {
|
|
1466
|
+
const result = transformInlineToken(token, this.transformContext);
|
|
1467
|
+
if (result) {
|
|
1468
|
+
if (Array.isArray(result)) {
|
|
1469
|
+
results.push(...result);
|
|
1470
|
+
} else {
|
|
1471
|
+
results.push(result);
|
|
1472
|
+
}
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
return results;
|
|
1476
|
+
}
|
|
1477
|
+
/**
|
|
1478
|
+
* 解析脚注内容为 AST 节点
|
|
1479
|
+
*/
|
|
1480
|
+
parseFootnoteContent(content) {
|
|
1481
|
+
if (!content.trim()) {
|
|
1482
|
+
return [];
|
|
1483
|
+
}
|
|
1484
|
+
const normalizedContent = content.split("\n").map((line, index) => {
|
|
1485
|
+
if (index === 0) return line;
|
|
1486
|
+
if (line.startsWith(" ")) return line.slice(4);
|
|
1487
|
+
if (line.startsWith(" ")) return line.slice(1);
|
|
1488
|
+
return line;
|
|
1489
|
+
}).join("\n");
|
|
1490
|
+
const contentLexer = new Lexer({ gfm: true, breaks: true });
|
|
1491
|
+
const tokens = contentLexer.lex(normalizedContent);
|
|
1492
|
+
return this.transformTokens(tokens);
|
|
1493
|
+
}
|
|
1494
|
+
/**
|
|
1495
|
+
* 处理 HTML 节点
|
|
1496
|
+
*
|
|
1497
|
+
* 使用 html-extension 的 transformHtmlNodes 来处理:
|
|
1498
|
+
* - 合并被空行分割的 HTML 节点
|
|
1499
|
+
* - 将 HTML 解析为 HtmlElementNode 树结构
|
|
1500
|
+
*/
|
|
1501
|
+
processHtmlNodes(nodes) {
|
|
1502
|
+
const tempRoot = {
|
|
1503
|
+
type: "root",
|
|
1504
|
+
children: nodes
|
|
1505
|
+
};
|
|
1506
|
+
const transformed = transformHtmlNodes(tempRoot, this.htmlTreeOptions);
|
|
1507
|
+
return transformed.children;
|
|
1508
|
+
}
|
|
1509
|
+
/**
|
|
1510
|
+
* 将 AST 节点转换为 ParsedBlock
|
|
1511
|
+
*/
|
|
1512
|
+
nodesToBlocks(nodes, startOffset, rawText, status, generateBlockId) {
|
|
1513
|
+
const blocks = [];
|
|
1514
|
+
for (const node of nodes) {
|
|
1515
|
+
const relativeStart = node.position?.start?.offset ?? 0;
|
|
1516
|
+
const relativeEnd = node.position?.end?.offset ?? rawText.length;
|
|
1517
|
+
const nodeText = rawText.substring(relativeStart, relativeEnd);
|
|
1518
|
+
const absoluteStart = startOffset + relativeStart;
|
|
1519
|
+
const absoluteEnd = startOffset + relativeEnd;
|
|
1520
|
+
blocks.push({
|
|
1521
|
+
id: generateBlockId(),
|
|
1522
|
+
status,
|
|
1523
|
+
node,
|
|
1524
|
+
startOffset: absoluteStart,
|
|
1525
|
+
endOffset: absoluteEnd,
|
|
1526
|
+
rawText: nodeText
|
|
1527
|
+
});
|
|
1528
|
+
}
|
|
1529
|
+
return blocks;
|
|
1530
|
+
}
|
|
1531
|
+
};
|
|
1532
|
+
var AstBuilder = MarkedAstBuilder;
|
|
1533
|
+
|
|
1534
|
+
// src/engines/marked/index.ts
|
|
1535
|
+
function createMarkedBuilder(options = {}) {
|
|
1536
|
+
return new MarkedAstBuilder(options);
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
export { AstBuilder, MarkedAstBuilder, createMarkedBuilder };
|
|
1540
|
+
//# sourceMappingURL=index.js.map
|
|
1541
|
+
//# sourceMappingURL=index.js.map
|