@incremark/core 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.en.md +132 -23
- package/dist/MarkedAstBuildter-BsjxZko_.d.ts +72 -0
- package/dist/detector/index.d.ts +118 -1
- package/dist/detector/index.js +196 -118
- package/dist/detector/index.js.map +1 -1
- package/dist/engines/marked/index.d.ts +29 -0
- package/dist/engines/marked/index.js +1541 -0
- package/dist/engines/marked/index.js.map +1 -0
- package/dist/engines/micromark/index.d.ts +106 -0
- package/dist/engines/micromark/index.js +1161 -0
- package/dist/engines/micromark/index.js.map +1 -0
- package/dist/index-mZ7yCqNH.d.ts +225 -0
- package/dist/index.d.ts +68 -54
- package/dist/index.js +1908 -1198
- package/dist/index.js.map +1 -1
- package/dist/types-C_EW5vfp.d.ts +123 -0
- package/dist/utils/index.d.ts +17 -1
- package/dist/utils/index.js +21 -1
- package/dist/utils/index.js.map +1 -1
- package/package.json +18 -3
- package/dist/index-BMUkM7mT.d.ts +0 -422
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { Root, RootContent } from 'mdast';
|
|
2
|
+
import { Extension } from 'micromark-util-types';
|
|
3
|
+
import { Extension as Extension$1 } from 'mdast-util-from-markdown';
|
|
4
|
+
import { MarkedExtension } from 'marked';
|
|
5
|
+
import { b as ParserOptions, C as ContainerConfig, B as BlockStatus, P as ParsedBlock } from './index-mZ7yCqNH.js';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* AST 构建器统一接口和类型定义
|
|
9
|
+
*
|
|
10
|
+
* 支持两种引擎:
|
|
11
|
+
* - marked: 极速模式,速度更快
|
|
12
|
+
* - micromark: 稳定模式,更可靠,支持 div 内嵌 markdown
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* 引擎类型
|
|
17
|
+
*/
|
|
18
|
+
type EngineType = 'marked' | 'micromark';
|
|
19
|
+
/**
|
|
20
|
+
* AST 构建器接口
|
|
21
|
+
* 所有引擎实现必须遵循此接口
|
|
22
|
+
*/
|
|
23
|
+
interface IAstBuilder {
|
|
24
|
+
/** 容器配置(用于边界检测) */
|
|
25
|
+
readonly containerConfig: ContainerConfig | undefined;
|
|
26
|
+
/**
|
|
27
|
+
* 解析文本为 AST
|
|
28
|
+
* @param text Markdown 文本
|
|
29
|
+
* @returns AST
|
|
30
|
+
*/
|
|
31
|
+
parse(text: string): Root;
|
|
32
|
+
/**
|
|
33
|
+
* 将 AST 节点转换为 ParsedBlock
|
|
34
|
+
*/
|
|
35
|
+
nodesToBlocks(nodes: RootContent[], startOffset: number, rawText: string, status: BlockStatus, generateBlockId: () => string): ParsedBlock[];
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Marked 引擎扩展配置
|
|
39
|
+
*/
|
|
40
|
+
interface MarkedEngineExtension {
|
|
41
|
+
/** marked 扩展列表 */
|
|
42
|
+
extensions: MarkedExtension[];
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Micromark 引擎扩展配置
|
|
46
|
+
*/
|
|
47
|
+
interface MicromarkEngineExtension {
|
|
48
|
+
/** micromark 语法扩展 */
|
|
49
|
+
extensions: Extension[];
|
|
50
|
+
/** mdast 转换扩展 */
|
|
51
|
+
mdastExtensions: Extension$1[];
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* 统一插件格式
|
|
55
|
+
*
|
|
56
|
+
* 插件可以同时支持多个引擎,运行时会根据当前引擎选择对应配置
|
|
57
|
+
*
|
|
58
|
+
* @example
|
|
59
|
+
* ```ts
|
|
60
|
+
* const myPlugin: IncremarkPlugin = {
|
|
61
|
+
* name: 'my-plugin',
|
|
62
|
+
* type: 'both', // 支持两种引擎
|
|
63
|
+
* marked: {
|
|
64
|
+
* extensions: [myMarkedExtension]
|
|
65
|
+
* },
|
|
66
|
+
* micromark: {
|
|
67
|
+
* extensions: [myMicromarkExt],
|
|
68
|
+
* mdastExtensions: [myMdastExt]
|
|
69
|
+
* }
|
|
70
|
+
* }
|
|
71
|
+
* ```
|
|
72
|
+
*/
|
|
73
|
+
interface IncremarkPlugin {
|
|
74
|
+
/** 插件名称 */
|
|
75
|
+
name: string;
|
|
76
|
+
/**
|
|
77
|
+
* 插件支持的引擎类型
|
|
78
|
+
* - 'marked': 仅支持 marked 引擎
|
|
79
|
+
* - 'micromark': 仅支持 micromark 引擎
|
|
80
|
+
* - 'both': 同时支持两种引擎
|
|
81
|
+
*/
|
|
82
|
+
type: 'marked' | 'micromark' | 'both';
|
|
83
|
+
/**
|
|
84
|
+
* Marked 引擎配置
|
|
85
|
+
* 当 type 为 'marked' 或 'both' 时必须提供
|
|
86
|
+
*/
|
|
87
|
+
marked?: MarkedEngineExtension;
|
|
88
|
+
/**
|
|
89
|
+
* Micromark 引擎配置
|
|
90
|
+
* 当 type 为 'micromark' 或 'both' 时必须提供
|
|
91
|
+
*/
|
|
92
|
+
micromark?: MicromarkEngineExtension;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* 引擎特定的解析器选项
|
|
96
|
+
*
|
|
97
|
+
* 注意:不再包含 engine 选项,引擎切换通过注入 astBuilder 类实现
|
|
98
|
+
* 这样可以确保 tree-shaking 正常工作
|
|
99
|
+
*/
|
|
100
|
+
interface EngineParserOptions extends Omit<ParserOptions, 'extensions' | 'mdastExtensions'> {
|
|
101
|
+
/**
|
|
102
|
+
* 统一插件列表
|
|
103
|
+
* 插件会根据当前引擎自动选择对应的扩展配置
|
|
104
|
+
*/
|
|
105
|
+
plugins?: IncremarkPlugin[];
|
|
106
|
+
/**
|
|
107
|
+
* Micromark 扩展(仅 micromark 引擎使用)
|
|
108
|
+
* @deprecated 建议使用 plugins 统一配置
|
|
109
|
+
*/
|
|
110
|
+
extensions?: Extension[];
|
|
111
|
+
/**
|
|
112
|
+
* Mdast 扩展(仅 micromark 引擎使用)
|
|
113
|
+
* @deprecated 建议使用 plugins 统一配置
|
|
114
|
+
*/
|
|
115
|
+
mdastExtensions?: Extension$1[];
|
|
116
|
+
/**
|
|
117
|
+
* Marked 扩展(仅 marked 引擎使用)
|
|
118
|
+
* @deprecated 建议使用 plugins 统一配置
|
|
119
|
+
*/
|
|
120
|
+
markedExtensions?: MarkedExtension[];
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export type { EngineParserOptions as E, IAstBuilder as I, MarkedEngineExtension as M, EngineType as a, IncremarkPlugin as b, MicromarkEngineExtension as c };
|
package/dist/utils/index.d.ts
CHANGED
|
@@ -23,5 +23,21 @@ declare function splitLines(text: string): string[];
|
|
|
23
23
|
declare function joinLines(lines: string[], start: number, end: number): string;
|
|
24
24
|
declare function isDefinitionNode(node: RootContent): node is Definition;
|
|
25
25
|
declare function isFootnoteDefinitionNode(node: RootContent): node is FootnoteDefinition;
|
|
26
|
+
/**
|
|
27
|
+
* AST 节点遍历器
|
|
28
|
+
* 深度优先遍历 AST 节点
|
|
29
|
+
*
|
|
30
|
+
* @param node 起始节点
|
|
31
|
+
* @param visitor 访问者函数,返回 true 可以提前终止遍历
|
|
32
|
+
*/
|
|
33
|
+
declare function traverseAst(node: RootContent, visitor: (node: RootContent) => boolean | void): void;
|
|
34
|
+
/**
|
|
35
|
+
* 从 AST 节点中收集指定类型的节点
|
|
36
|
+
*
|
|
37
|
+
* @param node 起始节点
|
|
38
|
+
* @param predicate 匹配谓词
|
|
39
|
+
* @returns 匹配的节点列表
|
|
40
|
+
*/
|
|
41
|
+
declare function collectAstNodes<T extends RootContent>(node: RootContent, predicate: (node: RootContent) => node is T): T[];
|
|
26
42
|
|
|
27
|
-
export { calculateLineOffset, generateId, isDefinitionNode, isFootnoteDefinitionNode, joinLines, resetIdCounter, splitLines };
|
|
43
|
+
export { calculateLineOffset, collectAstNodes, generateId, isDefinitionNode, isFootnoteDefinitionNode, joinLines, resetIdCounter, splitLines, traverseAst };
|
package/dist/utils/index.js
CHANGED
|
@@ -25,7 +25,27 @@ function isDefinitionNode(node) {
|
|
|
25
25
|
function isFootnoteDefinitionNode(node) {
|
|
26
26
|
return node.type === "footnoteDefinition";
|
|
27
27
|
}
|
|
28
|
+
function traverseAst(node, visitor) {
|
|
29
|
+
const stopEarly = visitor(node);
|
|
30
|
+
if (stopEarly === true) {
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
if ("children" in node && Array.isArray(node.children)) {
|
|
34
|
+
for (const child of node.children) {
|
|
35
|
+
traverseAst(child, visitor);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function collectAstNodes(node, predicate) {
|
|
40
|
+
const results = [];
|
|
41
|
+
traverseAst(node, (node2) => {
|
|
42
|
+
if (predicate(node2)) {
|
|
43
|
+
results.push(node2);
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
return results;
|
|
47
|
+
}
|
|
28
48
|
|
|
29
|
-
export { calculateLineOffset, generateId, isDefinitionNode, isFootnoteDefinitionNode, joinLines, resetIdCounter, splitLines };
|
|
49
|
+
export { calculateLineOffset, collectAstNodes, generateId, isDefinitionNode, isFootnoteDefinitionNode, joinLines, resetIdCounter, splitLines, traverseAst };
|
|
30
50
|
//# sourceMappingURL=index.js.map
|
|
31
51
|
//# sourceMappingURL=index.js.map
|
package/dist/utils/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/utils/index.ts"],"names":[],"mappings":";AASA,IAAI,SAAA,GAAY,CAAA;AACT,SAAS,UAAA,CAAW,SAAS,OAAA,EAAiB;AACnD,EAAA,OAAO,CAAA,EAAG,MAAM,CAAA,CAAA,EAAI,EAAE,SAAS,CAAA,CAAA;AACjC;AAKO,SAAS,cAAA,GAAuB;AACrC,EAAA,SAAA,GAAY,CAAA;AACd;AAKO,SAAS,mBAAA,CAAoB,OAAiB,SAAA,EAA2B;AAC9E,EAAA,IAAI,MAAA,GAAS,CAAA;AACb,EAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,aAAa,CAAA,GAAI,KAAA,CAAM,QAAQ,CAAA,EAAA,EAAK;AACtD,IAAA,MAAA,IAAU,KAAA,CAAM,CAAC,CAAA,CAAE,MAAA,GAAS,CAAA;AAAA,EAC9B;AACA,EAAA,OAAO,MAAA;AACT;AAKO,SAAS,WAAW,IAAA,EAAwB;AACjD,EAAA,OAAO,IAAA,CAAK,MAAM,IAAI,CAAA;AACxB;AAKO,SAAS,SAAA,CAAU,KAAA,EAAiB,KAAA,EAAe,GAAA,EAAqB;AAC7E,EAAA,OAAO,MAAM,KAAA,CAAM,KAAA,EAAO,MAAM,CAAC,CAAA,CAAE,KAAK,IAAI,CAAA;AAC9C;AAEO,SAAS,iBAAiB,IAAA,EAAuC;AACtE,EAAA,OAAO,KAAK,IAAA,KAAS,YAAA;AACvB;AAEO,SAAS,yBAAyB,IAAA,EAA+C;AACtF,EAAA,OAAO,KAAK,IAAA,KAAS,oBAAA;AACvB","file":"index.js","sourcesContent":["/**\n * 工具函数\n */\n\nimport type { Definition, FootnoteDefinition, RootContent } from \"mdast\"\n\n/**\n * 生成唯一 ID\n */\nlet idCounter = 0\nexport function generateId(prefix = 'block'): string {\n return `${prefix}-${++idCounter}`\n}\n\n/**\n * 重置 ID 计数器(用于测试)\n */\nexport function resetIdCounter(): void {\n idCounter = 0\n}\n\n/**\n * 计算行的偏移量\n */\nexport function calculateLineOffset(lines: string[], lineIndex: number): number {\n let offset = 0\n for (let i = 0; i < lineIndex && i < lines.length; i++) {\n offset += lines[i].length + 1 // +1 for newline\n }\n return offset\n}\n\n/**\n * 将文本按行分割\n */\nexport function splitLines(text: string): string[] {\n return text.split('\\n')\n}\n\n/**\n * 合并行为文本\n */\nexport function joinLines(lines: string[], start: number, end: number): string {\n return lines.slice(start, end + 1).join('\\n')\n}\n\nexport function isDefinitionNode(node: RootContent): node is Definition {\n return node.type === 'definition'\n}\n\nexport function isFootnoteDefinitionNode(node: RootContent): node is FootnoteDefinition {\n return node.type === 'footnoteDefinition'\n}"]}
|
|
1
|
+
{"version":3,"sources":["../../src/utils/index.ts"],"names":["node"],"mappings":";AASA,IAAI,SAAA,GAAY,CAAA;AACT,SAAS,UAAA,CAAW,SAAS,OAAA,EAAiB;AACnD,EAAA,OAAO,CAAA,EAAG,MAAM,CAAA,CAAA,EAAI,EAAE,SAAS,CAAA,CAAA;AACjC;AAKO,SAAS,cAAA,GAAuB;AACrC,EAAA,SAAA,GAAY,CAAA;AACd;AAKO,SAAS,mBAAA,CAAoB,OAAiB,SAAA,EAA2B;AAC9E,EAAA,IAAI,MAAA,GAAS,CAAA;AACb,EAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,aAAa,CAAA,GAAI,KAAA,CAAM,QAAQ,CAAA,EAAA,EAAK;AACtD,IAAA,MAAA,IAAU,KAAA,CAAM,CAAC,CAAA,CAAE,MAAA,GAAS,CAAA;AAAA,EAC9B;AACA,EAAA,OAAO,MAAA;AACT;AAKO,SAAS,WAAW,IAAA,EAAwB;AACjD,EAAA,OAAO,IAAA,CAAK,MAAM,IAAI,CAAA;AACxB;AAKO,SAAS,SAAA,CAAU,KAAA,EAAiB,KAAA,EAAe,GAAA,EAAqB;AAC7E,EAAA,OAAO,MAAM,KAAA,CAAM,KAAA,EAAO,MAAM,CAAC,CAAA,CAAE,KAAK,IAAI,CAAA;AAC9C;AAEO,SAAS,iBAAiB,IAAA,EAAuC;AACtE,EAAA,OAAO,KAAK,IAAA,KAAS,YAAA;AACvB;AAEO,SAAS,yBAAyB,IAAA,EAA+C;AACtF,EAAA,OAAO,KAAK,IAAA,KAAS,oBAAA;AACvB;AASO,SAAS,WAAA,CACd,MACA,OAAA,EACM;AAEN,EAAA,MAAM,SAAA,GAAY,QAAQ,IAAI,CAAA;AAC9B,EAAA,IAAI,cAAc,IAAA,EAAM;AACtB,IAAA;AAAA,EACF;AAGA,EAAA,IAAI,cAAc,IAAA,IAAQ,KAAA,CAAM,OAAA,CAAQ,IAAA,CAAK,QAAQ,CAAA,EAAG;AACtD,IAAA,KAAA,MAAW,KAAA,IAAS,KAAK,QAAA,EAAU;AACjC,MAAA,WAAA,CAAY,OAAsB,OAAO,CAAA;AAAA,IAC3C;AAAA,EACF;AACF;AASO,SAAS,eAAA,CACd,MACA,SAAA,EACK;AACL,EAAA,MAAM,UAAe,EAAC;AAEtB,EAAA,WAAA,CAAY,IAAA,EAAM,CAACA,KAAAA,KAAS;AAC1B,IAAA,IAAI,SAAA,CAAUA,KAAI,CAAA,EAAG;AACnB,MAAA,OAAA,CAAQ,KAAKA,KAAI,CAAA;AAAA,IACnB;AAAA,EACF,CAAC,CAAA;AAED,EAAA,OAAO,OAAA;AACT","file":"index.js","sourcesContent":["/**\n * 工具函数\n */\n\nimport type { Definition, FootnoteDefinition, RootContent } from \"mdast\"\n\n/**\n * 生成唯一 ID\n */\nlet idCounter = 0\nexport function generateId(prefix = 'block'): string {\n return `${prefix}-${++idCounter}`\n}\n\n/**\n * 重置 ID 计数器(用于测试)\n */\nexport function resetIdCounter(): void {\n idCounter = 0\n}\n\n/**\n * 计算行的偏移量\n */\nexport function calculateLineOffset(lines: string[], lineIndex: number): number {\n let offset = 0\n for (let i = 0; i < lineIndex && i < lines.length; i++) {\n offset += lines[i].length + 1 // +1 for newline\n }\n return offset\n}\n\n/**\n * 将文本按行分割\n */\nexport function splitLines(text: string): string[] {\n return text.split('\\n')\n}\n\n/**\n * 合并行为文本\n */\nexport function joinLines(lines: string[], start: number, end: number): string {\n return lines.slice(start, end + 1).join('\\n')\n}\n\nexport function isDefinitionNode(node: RootContent): node is Definition {\n return node.type === 'definition'\n}\n\nexport function isFootnoteDefinitionNode(node: RootContent): node is FootnoteDefinition {\n return node.type === 'footnoteDefinition'\n}\n\n/**\n * AST 节点遍历器\n * 深度优先遍历 AST 节点\n *\n * @param node 起始节点\n * @param visitor 访问者函数,返回 true 可以提前终止遍历\n */\nexport function traverseAst(\n node: RootContent,\n visitor: (node: RootContent) => boolean | void\n): void {\n // 访问当前节点\n const stopEarly = visitor(node)\n if (stopEarly === true) {\n return\n }\n\n // 递归遍历子节点\n if ('children' in node && Array.isArray(node.children)) {\n for (const child of node.children) {\n traverseAst(child as RootContent, visitor)\n }\n }\n}\n\n/**\n * 从 AST 节点中收集指定类型的节点\n *\n * @param node 起始节点\n * @param predicate 匹配谓词\n * @returns 匹配的节点列表\n */\nexport function collectAstNodes<T extends RootContent>(\n node: RootContent,\n predicate: (node: RootContent) => node is T\n): T[] {\n const results: T[] = []\n\n traverseAst(node, (node) => {\n if (predicate(node)) {\n results.push(node)\n }\n })\n\n return results\n}"]}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@incremark/core",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "High-performance incremental markdown parser specifically designed for AI streaming output scenarios.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"module": "./dist/index.js",
|
|
@@ -18,6 +18,14 @@
|
|
|
18
18
|
"./utils": {
|
|
19
19
|
"types": "./dist/utils/index.d.ts",
|
|
20
20
|
"import": "./dist/utils/index.js"
|
|
21
|
+
},
|
|
22
|
+
"./engines/marked": {
|
|
23
|
+
"types": "./dist/engines/marked/index.d.ts",
|
|
24
|
+
"import": "./dist/engines/marked/index.js"
|
|
25
|
+
},
|
|
26
|
+
"./engines/micromark": {
|
|
27
|
+
"types": "./dist/engines/micromark/index.d.ts",
|
|
28
|
+
"import": "./dist/engines/micromark/index.js"
|
|
21
29
|
}
|
|
22
30
|
},
|
|
23
31
|
"files": [
|
|
@@ -27,6 +35,7 @@
|
|
|
27
35
|
"@types/lodash-es": "^4.17.12",
|
|
28
36
|
"@types/mdast": "^4.0.0",
|
|
29
37
|
"lodash-es": "^4.17.22",
|
|
38
|
+
"marked": "^17.0.1",
|
|
30
39
|
"mdast-util-directive": "^3.0.0",
|
|
31
40
|
"mdast-util-from-markdown": "^2.0.0",
|
|
32
41
|
"mdast-util-gfm": "^3.0.0",
|
|
@@ -55,8 +64,13 @@
|
|
|
55
64
|
"incremental",
|
|
56
65
|
"streaming",
|
|
57
66
|
"ai",
|
|
67
|
+
"chatgpt",
|
|
68
|
+
"llm",
|
|
58
69
|
"mdast",
|
|
59
|
-
"
|
|
70
|
+
"high-performance",
|
|
71
|
+
"typewriter",
|
|
72
|
+
"micromark",
|
|
73
|
+
"marked"
|
|
60
74
|
],
|
|
61
75
|
"license": "MIT",
|
|
62
76
|
"repository": {
|
|
@@ -70,6 +84,7 @@
|
|
|
70
84
|
"dev": "tsup --watch",
|
|
71
85
|
"test": "vitest",
|
|
72
86
|
"test:run": "vitest run",
|
|
87
|
+
"test:coverage": "vitest run -- --coverage",
|
|
73
88
|
"benchmark": "npx tsx src/benchmark/run.ts"
|
|
74
89
|
}
|
|
75
90
|
}
|
package/dist/index-BMUkM7mT.d.ts
DELETED
|
@@ -1,422 +0,0 @@
|
|
|
1
|
-
import { Parent, RootContent, Root, Definition, FootnoteDefinition } from 'mdast';
|
|
2
|
-
import { Extension as Extension$1 } from 'micromark-util-types';
|
|
3
|
-
import { Extension } from 'mdast-util-from-markdown';
|
|
4
|
-
|
|
5
|
-
declare module 'mdast' {
|
|
6
|
-
interface RootContentMap {
|
|
7
|
-
htmlElement: HtmlElementNode;
|
|
8
|
-
}
|
|
9
|
-
interface PhrasingContentMap {
|
|
10
|
-
htmlElement: HtmlElementNode;
|
|
11
|
-
}
|
|
12
|
-
}
|
|
13
|
-
/**
|
|
14
|
-
* 自定义 HTML 元素节点类型
|
|
15
|
-
*/
|
|
16
|
-
interface HtmlElementNode extends Parent {
|
|
17
|
-
type: 'htmlElement';
|
|
18
|
-
tagName: string;
|
|
19
|
-
attrs: Record<string, string>;
|
|
20
|
-
children: RootContent[];
|
|
21
|
-
data?: {
|
|
22
|
-
rawHtml?: string;
|
|
23
|
-
parsed?: boolean;
|
|
24
|
-
originalType?: string;
|
|
25
|
-
};
|
|
26
|
-
}
|
|
27
|
-
/**
|
|
28
|
-
* HTML 属性信息
|
|
29
|
-
*/
|
|
30
|
-
interface HtmlAttrInfo {
|
|
31
|
-
name: string;
|
|
32
|
-
value: string;
|
|
33
|
-
}
|
|
34
|
-
/**
|
|
35
|
-
* 解析后的 HTML 标签信息
|
|
36
|
-
*/
|
|
37
|
-
interface ParsedHtmlTag {
|
|
38
|
-
tagName: string;
|
|
39
|
-
attrs: Record<string, string>;
|
|
40
|
-
isClosing: boolean;
|
|
41
|
-
isSelfClosing: boolean;
|
|
42
|
-
rawHtml: string;
|
|
43
|
-
}
|
|
44
|
-
/**
|
|
45
|
-
* HTML 树扩展配置
|
|
46
|
-
*/
|
|
47
|
-
interface HtmlTreeExtensionOptions {
|
|
48
|
-
/**
|
|
49
|
-
* 标签黑名单 - 这些标签会被过滤掉(XSS 防护)
|
|
50
|
-
* 默认包含危险标签:script, style, iframe, object, embed, form, input, button, textarea, select
|
|
51
|
-
*/
|
|
52
|
-
tagBlacklist?: string[];
|
|
53
|
-
/**
|
|
54
|
-
* 属性黑名单 - 这些属性会被过滤掉(XSS 防护)
|
|
55
|
-
* 默认包含所有 on* 事件属性和 javascript: 协议
|
|
56
|
-
*/
|
|
57
|
-
attrBlacklist?: string[];
|
|
58
|
-
/**
|
|
59
|
-
* 协议黑名单 - URL 属性中禁止的协议
|
|
60
|
-
* 默认包含 javascript:, vbscript:, data: (允许 data:image/)
|
|
61
|
-
*/
|
|
62
|
-
protocolBlacklist?: string[];
|
|
63
|
-
/**
|
|
64
|
-
* 是否保留原始 HTML 在 data 中
|
|
65
|
-
* 默认为 true
|
|
66
|
-
*/
|
|
67
|
-
preserveRawHtml?: boolean;
|
|
68
|
-
/**
|
|
69
|
-
* 自定义标签处理器
|
|
70
|
-
* 可以对特定标签进行自定义处理
|
|
71
|
-
*/
|
|
72
|
-
tagHandlers?: Record<string, (node: HtmlElementNode) => HtmlElementNode | null>;
|
|
73
|
-
}
|
|
74
|
-
/**
|
|
75
|
-
* 危险标签黑名单(XSS 防护)
|
|
76
|
-
*/
|
|
77
|
-
declare const DEFAULT_TAG_BLACKLIST: string[];
|
|
78
|
-
/**
|
|
79
|
-
* 危险属性黑名单(XSS 防护)
|
|
80
|
-
* 包含所有 on* 事件属性
|
|
81
|
-
*/
|
|
82
|
-
declare const DEFAULT_ATTR_BLACKLIST: string[];
|
|
83
|
-
/**
|
|
84
|
-
* 危险协议黑名单
|
|
85
|
-
*/
|
|
86
|
-
declare const DEFAULT_PROTOCOL_BLACKLIST: string[];
|
|
87
|
-
/**
|
|
88
|
-
* HTML 内容类型
|
|
89
|
-
*/
|
|
90
|
-
type HtmlContentType = 'opening' | 'closing' | 'self-closing' | 'fragment' | 'unknown';
|
|
91
|
-
/**
|
|
92
|
-
* 判断 HTML 内容的类型
|
|
93
|
-
* - opening: 单个开标签,如 <span class="foo">
|
|
94
|
-
* - closing: 单个闭标签,如 </span>
|
|
95
|
-
* - self-closing: 自闭合标签,如 <br /> 或 <img src="...">
|
|
96
|
-
* - fragment: 完整的 HTML 片段,包含多个标签
|
|
97
|
-
* - unknown: 无法识别
|
|
98
|
-
*/
|
|
99
|
-
declare function detectHtmlContentType(html: string): HtmlContentType;
|
|
100
|
-
/**
|
|
101
|
-
* 解析单个 HTML 标签(开标签、闭标签或自闭合标签)
|
|
102
|
-
* 只处理单个标签,不处理完整的 HTML 片段
|
|
103
|
-
*/
|
|
104
|
-
declare function parseHtmlTag(html: string): ParsedHtmlTag | null;
|
|
105
|
-
/**
|
|
106
|
-
* 解析完整的 HTML 片段为 AST
|
|
107
|
-
*/
|
|
108
|
-
declare function parseHtmlFragment(html: string, options?: HtmlTreeExtensionOptions): HtmlElementNode[];
|
|
109
|
-
/**
|
|
110
|
-
* 转换整个 AST,处理所有 HTML 节点
|
|
111
|
-
*/
|
|
112
|
-
declare function transformHtmlNodes(ast: Root, options?: HtmlTreeExtensionOptions): Root;
|
|
113
|
-
/**
|
|
114
|
-
* 创建 HTML 树转换器
|
|
115
|
-
* 这是一个 unified 兼容的转换器
|
|
116
|
-
*/
|
|
117
|
-
declare function createHtmlTreeTransformer(options?: HtmlTreeExtensionOptions): (tree: Root) => Root;
|
|
118
|
-
/**
|
|
119
|
-
* mdast-util-from-markdown 扩展
|
|
120
|
-
* 注意:此扩展主要用于类型声明,实际转换在后处理阶段完成
|
|
121
|
-
*/
|
|
122
|
-
declare const htmlTreeExtension: Extension;
|
|
123
|
-
/**
|
|
124
|
-
* 判断节点是否是 HtmlElementNode
|
|
125
|
-
*/
|
|
126
|
-
declare function isHtmlElementNode(node: RootContent): node is HtmlElementNode;
|
|
127
|
-
/**
|
|
128
|
-
* 遍历所有 HTML 元素节点
|
|
129
|
-
*/
|
|
130
|
-
declare function walkHtmlElements(node: RootContent | Root, callback: (node: HtmlElementNode, parent: Parent | Root | null) => void, parent?: Parent | Root | null): void;
|
|
131
|
-
/**
|
|
132
|
-
* 查找特定标签的所有节点
|
|
133
|
-
*/
|
|
134
|
-
declare function findHtmlElementsByTag(root: Root, tagName: string): HtmlElementNode[];
|
|
135
|
-
/**
|
|
136
|
-
* 将 HtmlElementNode 转回 HTML 字符串
|
|
137
|
-
*/
|
|
138
|
-
declare function htmlElementToString(node: HtmlElementNode): string;
|
|
139
|
-
|
|
140
|
-
/**
|
|
141
|
-
* Definition 映射类型
|
|
142
|
-
*/
|
|
143
|
-
interface DefinitionMap {
|
|
144
|
-
[identifier: string]: Definition;
|
|
145
|
-
}
|
|
146
|
-
interface FootnoteDefinitionMap {
|
|
147
|
-
[identifier: string]: FootnoteDefinition;
|
|
148
|
-
}
|
|
149
|
-
/**
|
|
150
|
-
* 解析块的状态
|
|
151
|
-
*/
|
|
152
|
-
type BlockStatus = 'pending' | 'stable' | 'completed';
|
|
153
|
-
/**
|
|
154
|
-
* AST 节点的通用接口(用于遍历)
|
|
155
|
-
* 统一定义,避免各模块重复声明
|
|
156
|
-
*/
|
|
157
|
-
interface AstNode {
|
|
158
|
-
type: string;
|
|
159
|
-
value?: string;
|
|
160
|
-
children?: AstNode[];
|
|
161
|
-
[key: string]: unknown;
|
|
162
|
-
}
|
|
163
|
-
/**
|
|
164
|
-
* 解析出的块
|
|
165
|
-
*/
|
|
166
|
-
interface ParsedBlock {
|
|
167
|
-
/** 块的唯一 ID */
|
|
168
|
-
id: string;
|
|
169
|
-
/** 块状态 */
|
|
170
|
-
status: BlockStatus;
|
|
171
|
-
/** AST 节点 */
|
|
172
|
-
node: RootContent;
|
|
173
|
-
/** 原始文本起始位置(相对于完整文档) */
|
|
174
|
-
startOffset: number;
|
|
175
|
-
/** 原始文本结束位置 */
|
|
176
|
-
endOffset: number;
|
|
177
|
-
/** 原始文本内容 */
|
|
178
|
-
rawText: string;
|
|
179
|
-
}
|
|
180
|
-
/**
|
|
181
|
-
* 增量更新事件
|
|
182
|
-
*/
|
|
183
|
-
interface IncrementalUpdate {
|
|
184
|
-
/** 新完成的块 */
|
|
185
|
-
completed: ParsedBlock[];
|
|
186
|
-
/** 更新的块(内容变化) */
|
|
187
|
-
updated: ParsedBlock[];
|
|
188
|
-
/** 当前正在解析中的块(可能不完整) */
|
|
189
|
-
pending: ParsedBlock[];
|
|
190
|
-
/** 完整的 AST(包含所有已解析的内容) */
|
|
191
|
-
ast: Root;
|
|
192
|
-
/** Definition 映射表(用于引用式图片和链接) */
|
|
193
|
-
definitions: DefinitionMap;
|
|
194
|
-
/** Footnote Definition 映射表 */
|
|
195
|
-
footnoteDefinitions: FootnoteDefinitionMap;
|
|
196
|
-
/** 脚注引用的出现顺序(用于渲染时排序) */
|
|
197
|
-
footnoteReferenceOrder: string[];
|
|
198
|
-
}
|
|
199
|
-
/**
|
|
200
|
-
* 容器语法配置
|
|
201
|
-
*/
|
|
202
|
-
interface ContainerConfig {
|
|
203
|
-
/** 容器标记字符,默认 ':' */
|
|
204
|
-
marker?: string;
|
|
205
|
-
/** 最小标记长度,默认 3 */
|
|
206
|
-
minMarkerLength?: number;
|
|
207
|
-
/** 允许的容器名称(如 ['warning', 'info', 'youtube']),undefined 表示允许所有 */
|
|
208
|
-
allowedNames?: string[];
|
|
209
|
-
}
|
|
210
|
-
/**
|
|
211
|
-
* 解析器状态变化事件
|
|
212
|
-
*/
|
|
213
|
-
interface ParserState {
|
|
214
|
-
/** 已完成的块 */
|
|
215
|
-
completedBlocks: ParsedBlock[];
|
|
216
|
-
/** 待处理的块 */
|
|
217
|
-
pendingBlocks: ParsedBlock[];
|
|
218
|
-
/** 完整的 Markdown 内容 */
|
|
219
|
-
markdown: string;
|
|
220
|
-
/** 完整的 AST */
|
|
221
|
-
ast: Root;
|
|
222
|
-
definitions: DefinitionMap;
|
|
223
|
-
footnoteDefinitions: FootnoteDefinitionMap;
|
|
224
|
-
}
|
|
225
|
-
/**
|
|
226
|
-
* 解析器配置
|
|
227
|
-
*/
|
|
228
|
-
interface ParserOptions {
|
|
229
|
-
/** 启用 GFM 扩展(表格、任务列表等) */
|
|
230
|
-
gfm?: boolean;
|
|
231
|
-
/**
|
|
232
|
-
* 启用数学公式支持($..$ 行内公式和 $$...$$ 块级公式)
|
|
233
|
-
* - false/undefined: 禁用(默认)
|
|
234
|
-
* - true: 启用数学公式解析
|
|
235
|
-
*/
|
|
236
|
-
math?: boolean;
|
|
237
|
-
/**
|
|
238
|
-
* 启用 ::: 容器语法支持(用于边界检测)
|
|
239
|
-
* - false: 禁用(默认)
|
|
240
|
-
* - true: 使用默认配置启用
|
|
241
|
-
* - ContainerConfig: 使用自定义配置启用
|
|
242
|
-
*/
|
|
243
|
-
containers?: boolean | ContainerConfig;
|
|
244
|
-
/**
|
|
245
|
-
* 启用 HTML 树转换
|
|
246
|
-
* - false/undefined: 禁用(默认),HTML 节点保持原始 type: 'html' 格式
|
|
247
|
-
* - true: 使用默认配置启用,将 HTML 节点转换为结构化的 htmlElement 节点
|
|
248
|
-
* - HtmlTreeExtensionOptions: 使用自定义配置启用(可配置黑名单等)
|
|
249
|
-
*/
|
|
250
|
-
htmlTree?: boolean | HtmlTreeExtensionOptions;
|
|
251
|
-
/** 自定义块边界检测函数 */
|
|
252
|
-
blockBoundaryDetector?: (content: string, position: number) => boolean;
|
|
253
|
-
/** 自定义 micromark 扩展(如 directive) */
|
|
254
|
-
extensions?: Extension$1[];
|
|
255
|
-
/** 自定义 mdast 扩展(如 directiveFromMarkdown) */
|
|
256
|
-
mdastExtensions?: Extension[];
|
|
257
|
-
/** 状态变化回调 */
|
|
258
|
-
onChange?: (state: ParserState) => void;
|
|
259
|
-
}
|
|
260
|
-
/**
|
|
261
|
-
* 块上下文
|
|
262
|
-
*/
|
|
263
|
-
interface BlockContext {
|
|
264
|
-
/** 当前是否在代码块中 */
|
|
265
|
-
inFencedCode: boolean;
|
|
266
|
-
/** 代码块的 fence 字符(` 或 ~) */
|
|
267
|
-
fenceChar?: string;
|
|
268
|
-
/** 代码块的 fence 长度 */
|
|
269
|
-
fenceLength?: number;
|
|
270
|
-
/** 当前列表嵌套深度 */
|
|
271
|
-
listDepth: number;
|
|
272
|
-
/** 当前引用嵌套深度 */
|
|
273
|
-
blockquoteDepth: number;
|
|
274
|
-
/** 当前是否在容器块中 */
|
|
275
|
-
inContainer: boolean;
|
|
276
|
-
/** 容器的标记长度 */
|
|
277
|
-
containerMarkerLength?: number;
|
|
278
|
-
/** 容器名称 */
|
|
279
|
-
containerName?: string;
|
|
280
|
-
/** 容器嵌套深度(支持嵌套容器) */
|
|
281
|
-
containerDepth: number;
|
|
282
|
-
/** 当前是否在列表中 */
|
|
283
|
-
inList: boolean;
|
|
284
|
-
/** 当前列表是否是有序列表 */
|
|
285
|
-
listOrdered?: boolean;
|
|
286
|
-
/** 当前列表的基础缩进 */
|
|
287
|
-
listIndent?: number;
|
|
288
|
-
/** 遇到空行后,列表可能结束(等待下一行确认) */
|
|
289
|
-
listMayEnd?: boolean;
|
|
290
|
-
/** 当前是否在脚注定义中 */
|
|
291
|
-
inFootnote?: boolean;
|
|
292
|
-
/** 脚注标识符 */
|
|
293
|
-
footnoteIdentifier?: string;
|
|
294
|
-
}
|
|
295
|
-
/**
|
|
296
|
-
* 容器检测结果
|
|
297
|
-
*/
|
|
298
|
-
interface ContainerMatch {
|
|
299
|
-
/** 容器名称 */
|
|
300
|
-
name: string;
|
|
301
|
-
/** 标记长度(冒号数量) */
|
|
302
|
-
markerLength: number;
|
|
303
|
-
/** 是否是结束标记 */
|
|
304
|
-
isEnd: boolean;
|
|
305
|
-
}
|
|
306
|
-
/**
|
|
307
|
-
* 块类型检测结果
|
|
308
|
-
*/
|
|
309
|
-
interface BlockTypeInfo {
|
|
310
|
-
type: string;
|
|
311
|
-
/** 是否是容器节点(可以包含其他块) */
|
|
312
|
-
isContainer: boolean;
|
|
313
|
-
/** 是否需要显式关闭(如代码块) */
|
|
314
|
-
requiresClosing: boolean;
|
|
315
|
-
/** 关闭模式 */
|
|
316
|
-
closingPattern?: RegExp;
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
/**
|
|
320
|
-
* 块类型检测与边界判断
|
|
321
|
-
*
|
|
322
|
-
* Markdown 块级元素的识别规则
|
|
323
|
-
*/
|
|
324
|
-
|
|
325
|
-
/**
|
|
326
|
-
* 检测行是否是代码块 fence 开始
|
|
327
|
-
*/
|
|
328
|
-
declare function detectFenceStart(line: string): {
|
|
329
|
-
char: string;
|
|
330
|
-
length: number;
|
|
331
|
-
} | null;
|
|
332
|
-
/**
|
|
333
|
-
* 检测行是否是代码块 fence 结束
|
|
334
|
-
*/
|
|
335
|
-
declare function detectFenceEnd(line: string, context: BlockContext): boolean;
|
|
336
|
-
/**
|
|
337
|
-
* 检测是否是空行或仅包含空白字符
|
|
338
|
-
*/
|
|
339
|
-
declare function isEmptyLine(line: string): boolean;
|
|
340
|
-
/**
|
|
341
|
-
* 检测是否是标题行
|
|
342
|
-
*/
|
|
343
|
-
declare function isHeading(line: string): boolean;
|
|
344
|
-
/**
|
|
345
|
-
* 检测是否是 thematic break(水平线)
|
|
346
|
-
*/
|
|
347
|
-
declare function isThematicBreak(line: string): boolean;
|
|
348
|
-
/**
|
|
349
|
-
* 检测是否是列表项开始
|
|
350
|
-
*
|
|
351
|
-
* CommonMark 规范:列表项可以是以下形式:
|
|
352
|
-
* - `- text`(无缩进)
|
|
353
|
-
* - `1. text`(有序列表)
|
|
354
|
-
* - ` - text`(缩进4个空格,作为上一个列表项的延续)
|
|
355
|
-
*
|
|
356
|
-
* 注意:` - text` 这种形式,虽然 `-` 后面没有空格,
|
|
357
|
-
* 但因为前面有4个空格的缩进,所以是列表项的有效形式。
|
|
358
|
-
*/
|
|
359
|
-
declare function isListItemStart(line: string): {
|
|
360
|
-
ordered: boolean;
|
|
361
|
-
indent: number;
|
|
362
|
-
} | null;
|
|
363
|
-
/**
|
|
364
|
-
* 检测是否是引用块开始
|
|
365
|
-
*/
|
|
366
|
-
declare function isBlockquoteStart(line: string): boolean;
|
|
367
|
-
/**
|
|
368
|
-
* 检测是否是 HTML 块
|
|
369
|
-
*/
|
|
370
|
-
declare function isHtmlBlock(line: string): boolean;
|
|
371
|
-
/**
|
|
372
|
-
* 检测表格分隔行
|
|
373
|
-
*/
|
|
374
|
-
declare function isTableDelimiter(line: string): boolean;
|
|
375
|
-
/**
|
|
376
|
-
* 检测是否是脚注定义的起始行
|
|
377
|
-
* 格式: [^id]: content
|
|
378
|
-
*
|
|
379
|
-
* @example
|
|
380
|
-
* isFootnoteDefinitionStart('[^1]: 脚注内容') // true
|
|
381
|
-
* isFootnoteDefinitionStart('[^note]: 内容') // true
|
|
382
|
-
* isFootnoteDefinitionStart(' 缩进内容') // false
|
|
383
|
-
*/
|
|
384
|
-
declare function isFootnoteDefinitionStart(line: string): boolean;
|
|
385
|
-
/**
|
|
386
|
-
* 检测是否是脚注定义的延续行(缩进行)
|
|
387
|
-
* 至少4个空格或1个tab
|
|
388
|
-
*
|
|
389
|
-
* @example
|
|
390
|
-
* isFootnoteContinuation(' 第二行') // true
|
|
391
|
-
* isFootnoteContinuation('\t第二行') // true
|
|
392
|
-
* isFootnoteContinuation(' 两个空格') // false
|
|
393
|
-
*/
|
|
394
|
-
declare function isFootnoteContinuation(line: string): boolean;
|
|
395
|
-
/**
|
|
396
|
-
* 检测容器开始或结束
|
|
397
|
-
*
|
|
398
|
-
* 支持格式:
|
|
399
|
-
* - ::: name 开始
|
|
400
|
-
* - ::: name attr 开始(带属性)
|
|
401
|
-
* - ::: 结束
|
|
402
|
-
* - :::::: name 开始(更长的标记,用于嵌套)
|
|
403
|
-
*/
|
|
404
|
-
declare function detectContainer(line: string, config?: ContainerConfig): ContainerMatch | null;
|
|
405
|
-
/**
|
|
406
|
-
* 检测容器结束
|
|
407
|
-
*/
|
|
408
|
-
declare function detectContainerEnd(line: string, context: BlockContext, config?: ContainerConfig): boolean;
|
|
409
|
-
/**
|
|
410
|
-
* 判断两行之间是否构成块边界
|
|
411
|
-
*/
|
|
412
|
-
declare function isBlockBoundary(prevLine: string, currentLine: string, context: BlockContext): boolean;
|
|
413
|
-
/**
|
|
414
|
-
* 创建初始上下文
|
|
415
|
-
*/
|
|
416
|
-
declare function createInitialContext(): BlockContext;
|
|
417
|
-
/**
|
|
418
|
-
* 更新上下文(处理一行后)
|
|
419
|
-
*/
|
|
420
|
-
declare function updateContext(line: string, context: BlockContext, containerConfig?: ContainerConfig | boolean): BlockContext;
|
|
421
|
-
|
|
422
|
-
export { type AstNode as A, type BlockStatus as B, type ContainerConfig as C, type DefinitionMap as D, findHtmlElementsByTag as E, type FootnoteDefinitionMap as F, htmlElementToString as G, DEFAULT_TAG_BLACKLIST as H, type IncrementalUpdate as I, DEFAULT_ATTR_BLACKLIST as J, DEFAULT_PROTOCOL_BLACKLIST as K, htmlTreeExtension as L, type HtmlElementNode as M, type HtmlAttrInfo as N, type ParsedHtmlTag as O, type ParserOptions as P, type HtmlTreeExtensionOptions as Q, type HtmlContentType as R, isFootnoteDefinitionStart as S, isFootnoteContinuation as T, type ParsedBlock as a, type ParserState as b, type BlockContext as c, type ContainerMatch as d, type BlockTypeInfo as e, detectFenceStart as f, detectFenceEnd as g, isHeading as h, isEmptyLine as i, isThematicBreak as j, isListItemStart as k, isBlockquoteStart as l, isHtmlBlock as m, isTableDelimiter as n, detectContainer as o, detectContainerEnd as p, isBlockBoundary as q, createInitialContext as r, createHtmlTreeTransformer as s, transformHtmlNodes as t, updateContext as u, parseHtmlTag as v, parseHtmlFragment as w, detectHtmlContentType as x, isHtmlElementNode as y, walkHtmlElements as z };
|