@incremark/core 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.en.md +132 -23
- package/dist/MarkedAstBuildter-BsjxZko_.d.ts +72 -0
- package/dist/detector/index.d.ts +118 -1
- package/dist/detector/index.js +196 -118
- package/dist/detector/index.js.map +1 -1
- package/dist/engines/marked/index.d.ts +29 -0
- package/dist/engines/marked/index.js +1541 -0
- package/dist/engines/marked/index.js.map +1 -0
- package/dist/engines/micromark/index.d.ts +106 -0
- package/dist/engines/micromark/index.js +1161 -0
- package/dist/engines/micromark/index.js.map +1 -0
- package/dist/index-mZ7yCqNH.d.ts +225 -0
- package/dist/index.d.ts +68 -54
- package/dist/index.js +1908 -1198
- package/dist/index.js.map +1 -1
- package/dist/types-C_EW5vfp.d.ts +123 -0
- package/dist/utils/index.d.ts +17 -1
- package/dist/utils/index.js +21 -1
- package/dist/utils/index.js.map +1 -1
- package/package.json +18 -3
- package/dist/index-BMUkM7mT.d.ts +0 -422
package/README.en.md
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
# @incremark/core
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
High-performance incremental Markdown parser core library, designed specifically for AI streaming output scenarios.
|
|
4
4
|
|
|
5
5
|
**[🇨🇳 中文](./README.md)** | 🇺🇸 English
|
|
6
6
|
|
|
7
|
-
##
|
|
7
|
+
## Core Advantages
|
|
8
8
|
|
|
9
|
-
- 🚀 **
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
9
|
+
- 🚀 **O(n) Complexity** - Incremental parsing, each character parsed at most once
|
|
10
|
+
- ⚡ **Dual-Engine Architecture** - Choice between Marked (fast) and Micromark (stable)
|
|
11
|
+
- 🔄 **Stream-Friendly** - Designed for AI streaming output scenarios
|
|
12
|
+
- ⌨️ **Typewriter Effect** - Built-in BlockTransformer for character-by-character display
|
|
13
|
+
- 🎯 **Smart Boundary Detection** - Accurately identifies Markdown block boundaries
|
|
14
|
+
- 📦 **Tree-shaking** - Only bundles Marked engine by default, load on demand
|
|
15
|
+
- 🔌 **Rich Extensions** - Support for footnotes, math formulas, custom containers, HTML parsing
|
|
13
16
|
|
|
14
17
|
## Installation
|
|
15
18
|
|
|
@@ -22,7 +25,11 @@ pnpm add @incremark/core
|
|
|
22
25
|
```ts
|
|
23
26
|
import { createIncremarkParser } from '@incremark/core'
|
|
24
27
|
|
|
25
|
-
const parser = createIncremarkParser({
|
|
28
|
+
const parser = createIncremarkParser({
|
|
29
|
+
gfm: true,
|
|
30
|
+
math: true,
|
|
31
|
+
containers: true
|
|
32
|
+
})
|
|
26
33
|
|
|
27
34
|
// Simulate streaming input
|
|
28
35
|
parser.append('# Hello\n')
|
|
@@ -34,6 +41,45 @@ console.log(parser.getCompletedBlocks())
|
|
|
34
41
|
console.log(parser.getAst())
|
|
35
42
|
```
|
|
36
43
|
|
|
44
|
+
## Dual-Engine Architecture
|
|
45
|
+
|
|
46
|
+
### Marked Engine (Default)
|
|
47
|
+
|
|
48
|
+
Fast mode, optimized for streaming scenarios:
|
|
49
|
+
|
|
50
|
+
```ts
|
|
51
|
+
import { createIncremarkParser } from '@incremark/core'
|
|
52
|
+
|
|
53
|
+
// Uses Marked engine by default
|
|
54
|
+
const parser = createIncremarkParser({ gfm: true, math: true })
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Features:**
|
|
58
|
+
- 🚀 Ultra-fast parsing, ideal for real-time AI chat
|
|
59
|
+
- 🔧 Custom extensions for footnotes, math, containers, inline HTML
|
|
60
|
+
- 📦 Tree-shaking friendly, default bundle only includes Marked
|
|
61
|
+
|
|
62
|
+
### Micromark Engine
|
|
63
|
+
|
|
64
|
+
Stable mode, strict CommonMark compliance:
|
|
65
|
+
|
|
66
|
+
```ts
|
|
67
|
+
import { createIncremarkParser } from '@incremark/core'
|
|
68
|
+
import { MicromarkAstBuilder } from '@incremark/core/engines/micromark'
|
|
69
|
+
|
|
70
|
+
// Use Micromark engine
|
|
71
|
+
const parser = createIncremarkParser({
|
|
72
|
+
astBuilder: MicromarkAstBuilder,
|
|
73
|
+
gfm: true,
|
|
74
|
+
math: true
|
|
75
|
+
})
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Features:**
|
|
79
|
+
- ✅ 100% CommonMark compatible
|
|
80
|
+
- 🔌 Rich micromark/mdast plugin ecosystem
|
|
81
|
+
- 🛡️ Battle-tested stability
|
|
82
|
+
|
|
37
83
|
## API
|
|
38
84
|
|
|
39
85
|
### createIncremarkParser(options)
|
|
@@ -42,10 +88,11 @@ Create a parser instance.
|
|
|
42
88
|
|
|
43
89
|
```ts
|
|
44
90
|
interface ParserOptions {
|
|
45
|
-
gfm?: boolean
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
91
|
+
gfm?: boolean // Enable GFM (tables, task lists, etc.)
|
|
92
|
+
math?: boolean // Enable math formulas ($..$ and $$..$$)
|
|
93
|
+
containers?: boolean // Enable ::: container syntax
|
|
94
|
+
htmlTree?: boolean // Enable HTML structured parsing
|
|
95
|
+
astBuilder?: AstBuilderClass // Custom AST builder (for engine switching)
|
|
49
96
|
}
|
|
50
97
|
```
|
|
51
98
|
|
|
@@ -55,56 +102,118 @@ Append content, returns incremental update.
|
|
|
55
102
|
|
|
56
103
|
### parser.finalize()
|
|
57
104
|
|
|
58
|
-
Complete parsing.
|
|
105
|
+
Complete parsing, mark remaining pending content as completed.
|
|
59
106
|
|
|
60
107
|
### parser.reset()
|
|
61
108
|
|
|
62
|
-
Reset state.
|
|
109
|
+
Reset parser state.
|
|
63
110
|
|
|
64
111
|
### parser.render(content)
|
|
65
112
|
|
|
66
|
-
|
|
113
|
+
One-time render complete Markdown (reset + append + finalize).
|
|
67
114
|
|
|
68
115
|
```ts
|
|
69
116
|
const update = parser.render('# Hello World')
|
|
70
|
-
console.log(update.completed) //
|
|
117
|
+
console.log(update.completed) // Completed blocks
|
|
71
118
|
```
|
|
72
119
|
|
|
73
|
-
### parser.
|
|
120
|
+
### parser.getAst()
|
|
74
121
|
|
|
75
|
-
Get
|
|
122
|
+
Get complete AST (mdast format).
|
|
76
123
|
|
|
77
124
|
### parser.getCompletedBlocks()
|
|
78
125
|
|
|
79
126
|
Get completed blocks.
|
|
80
127
|
|
|
81
|
-
### parser.
|
|
128
|
+
### parser.getBuffer()
|
|
82
129
|
|
|
83
|
-
Get
|
|
130
|
+
Get current buffer content.
|
|
84
131
|
|
|
85
|
-
|
|
132
|
+
## BlockTransformer
|
|
86
133
|
|
|
87
|
-
|
|
134
|
+
Typewriter effect controller, serves as middleware between parser and renderer.
|
|
135
|
+
|
|
136
|
+
### Basic Usage
|
|
137
|
+
|
|
138
|
+
```ts
|
|
139
|
+
import { createBlockTransformer, defaultPlugins } from '@incremark/core'
|
|
140
|
+
|
|
141
|
+
const transformer = createBlockTransformer({
|
|
142
|
+
charsPerTick: 2, // Display 2 characters per tick
|
|
143
|
+
tickInterval: 50, // Every 50ms
|
|
144
|
+
effect: 'fade-in', // Animation effect: none | fade-in | typing
|
|
145
|
+
plugins: defaultPlugins,
|
|
146
|
+
onChange: (displayBlocks) => {
|
|
147
|
+
render(displayBlocks)
|
|
148
|
+
}
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
// Push source blocks
|
|
152
|
+
transformer.push(sourceBlocks)
|
|
153
|
+
|
|
154
|
+
// Skip animation
|
|
155
|
+
transformer.skip()
|
|
156
|
+
|
|
157
|
+
// Reset
|
|
158
|
+
transformer.reset()
|
|
159
|
+
|
|
160
|
+
// Destroy
|
|
161
|
+
transformer.destroy()
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Configuration Options
|
|
165
|
+
|
|
166
|
+
```ts
|
|
167
|
+
interface TransformerOptions {
|
|
168
|
+
charsPerTick?: number | [number, number] // Characters per tick (supports random range)
|
|
169
|
+
tickInterval?: number // Interval in ms
|
|
170
|
+
effect?: 'none' | 'fade-in' | 'typing' // Animation effect
|
|
171
|
+
cursor?: string // Cursor character (typing effect)
|
|
172
|
+
plugins?: TransformerPlugin[] // Plugin list
|
|
173
|
+
onChange?: (blocks: DisplayBlock[]) => void
|
|
174
|
+
}
|
|
175
|
+
```
|
|
88
176
|
|
|
89
177
|
## Type Definitions
|
|
90
178
|
|
|
91
179
|
```ts
|
|
92
180
|
interface ParsedBlock {
|
|
93
181
|
id: string
|
|
94
|
-
status: 'pending' | '
|
|
182
|
+
status: 'pending' | 'completed'
|
|
95
183
|
node: RootContent
|
|
96
184
|
startOffset: number
|
|
97
185
|
endOffset: number
|
|
98
186
|
rawText: string
|
|
99
187
|
}
|
|
188
|
+
|
|
189
|
+
interface IncrementalUpdate {
|
|
190
|
+
completed: ParsedBlock[]
|
|
191
|
+
pending: ParsedBlock[]
|
|
192
|
+
ast: Root
|
|
193
|
+
definitions: DefinitionMap
|
|
194
|
+
footnoteDefinitions: FootnoteDefinitionMap
|
|
195
|
+
footnoteReferenceOrder: string[]
|
|
196
|
+
}
|
|
100
197
|
```
|
|
101
198
|
|
|
199
|
+
## Performance Comparison
|
|
200
|
+
|
|
201
|
+
Based on benchmark tests with 38 real Markdown documents:
|
|
202
|
+
|
|
203
|
+
| Comparison | Average Advantage |
|
|
204
|
+
|------------|-------------------|
|
|
205
|
+
| vs Streamdown | ~**6.1x faster** |
|
|
206
|
+
| vs ant-design-x | ~**7.2x faster** |
|
|
207
|
+
| vs markstream-vue | ~**28.3x faster** |
|
|
208
|
+
|
|
209
|
+
The longer the document, the greater the advantage (O(n) vs O(n²)).
|
|
210
|
+
|
|
102
211
|
## Framework Integration
|
|
103
212
|
|
|
104
213
|
- Vue: [@incremark/vue](../vue)
|
|
105
214
|
- React: [@incremark/react](../react)
|
|
215
|
+
- Svelte: [@incremark/svelte](../svelte)
|
|
106
216
|
|
|
107
217
|
## License
|
|
108
218
|
|
|
109
219
|
MIT
|
|
110
|
-
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { Root, RootContent } from 'mdast';
|
|
2
|
+
import { C as ContainerConfig, H as HtmlTreeExtensionOptions, B as BlockStatus, P as ParsedBlock } from './index-mZ7yCqNH.js';
|
|
3
|
+
import { I as IAstBuilder, E as EngineParserOptions } from './types-C_EW5vfp.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Marked AST 构建器(极速模式)
|
|
7
|
+
*
|
|
8
|
+
* 基于 marked 解析器
|
|
9
|
+
* 特点:
|
|
10
|
+
* - 速度更快
|
|
11
|
+
* - 适合流式渲染场景
|
|
12
|
+
*
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Marked AST 构建器
|
|
17
|
+
*
|
|
18
|
+
* 使用 marked 解析 Markdown,速度更快
|
|
19
|
+
* 适用于流式渲染和高性能场景
|
|
20
|
+
*/
|
|
21
|
+
declare class MarkedAstBuilder implements IAstBuilder {
|
|
22
|
+
private options;
|
|
23
|
+
readonly containerConfig: ContainerConfig | undefined;
|
|
24
|
+
readonly htmlTreeOptions: HtmlTreeExtensionOptions | undefined;
|
|
25
|
+
private globalLinks;
|
|
26
|
+
/** 用户传入的 marked 扩展 */
|
|
27
|
+
private readonly userExtensions;
|
|
28
|
+
/** 转换上下文(用于递归转换) */
|
|
29
|
+
private readonly transformContext;
|
|
30
|
+
constructor(options?: EngineParserOptions);
|
|
31
|
+
parse(text: string): Root;
|
|
32
|
+
/**
|
|
33
|
+
* 预处理 tokens
|
|
34
|
+
*
|
|
35
|
+
* 处理容器指令和遗留的脚注定义(从 paragraph 中提取)
|
|
36
|
+
*/
|
|
37
|
+
private preprocessTokens;
|
|
38
|
+
/**
|
|
39
|
+
* 转换 tokens 为 MDAST 节点(带位置信息)
|
|
40
|
+
*/
|
|
41
|
+
private transformTokensWithPosition;
|
|
42
|
+
/**
|
|
43
|
+
* 转换 tokens 为 MDAST 节点(不带位置信息)
|
|
44
|
+
*/
|
|
45
|
+
private transformTokens;
|
|
46
|
+
/**
|
|
47
|
+
* 转换行内 tokens
|
|
48
|
+
*/
|
|
49
|
+
private transformInline;
|
|
50
|
+
/**
|
|
51
|
+
* 解析脚注内容为 AST 节点
|
|
52
|
+
*/
|
|
53
|
+
private parseFootnoteContent;
|
|
54
|
+
/**
|
|
55
|
+
* 处理 HTML 节点
|
|
56
|
+
*
|
|
57
|
+
* 使用 html-extension 的 transformHtmlNodes 来处理:
|
|
58
|
+
* - 合并被空行分割的 HTML 节点
|
|
59
|
+
* - 将 HTML 解析为 HtmlElementNode 树结构
|
|
60
|
+
*/
|
|
61
|
+
private processHtmlNodes;
|
|
62
|
+
/**
|
|
63
|
+
* 将 AST 节点转换为 ParsedBlock
|
|
64
|
+
*/
|
|
65
|
+
nodesToBlocks(nodes: RootContent[], startOffset: number, rawText: string, status: BlockStatus, generateBlockId: () => string): ParsedBlock[];
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* AstBuilder 别名(向后兼容)
|
|
69
|
+
*/
|
|
70
|
+
declare const AstBuilder: typeof MarkedAstBuilder;
|
|
71
|
+
|
|
72
|
+
export { AstBuilder as A, MarkedAstBuilder as M };
|
package/dist/detector/index.d.ts
CHANGED
|
@@ -1,4 +1,121 @@
|
|
|
1
|
-
|
|
1
|
+
import { c as BlockContext, C as ContainerConfig, d as ContainerMatch } from '../index-mZ7yCqNH.js';
|
|
2
2
|
import 'mdast';
|
|
3
3
|
import 'micromark-util-types';
|
|
4
4
|
import 'mdast-util-from-markdown';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* 块类型检测与边界判断
|
|
8
|
+
*
|
|
9
|
+
* Markdown 块级元素的识别规则
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* 检测行是否是代码块 fence 开始
|
|
14
|
+
*/
|
|
15
|
+
declare function detectFenceStart(line: string): {
|
|
16
|
+
char: string;
|
|
17
|
+
length: number;
|
|
18
|
+
} | null;
|
|
19
|
+
/**
|
|
20
|
+
* 检测行是否是代码块 fence 结束
|
|
21
|
+
*/
|
|
22
|
+
declare function detectFenceEnd(line: string, context: BlockContext): boolean;
|
|
23
|
+
/**
|
|
24
|
+
* 检测是否是空行或仅包含空白字符
|
|
25
|
+
*/
|
|
26
|
+
declare function isEmptyLine(line: string): boolean;
|
|
27
|
+
/**
|
|
28
|
+
* 检测是否是 Setext 标题下划线(=== 或 ---)
|
|
29
|
+
* @param line 当前行
|
|
30
|
+
* @param prevLine 前一行
|
|
31
|
+
* @returns 是否是 Setext 标题下划线
|
|
32
|
+
*/
|
|
33
|
+
declare function isSetextHeadingUnderline(line: string, prevLine?: string): boolean;
|
|
34
|
+
/**
|
|
35
|
+
* 检测是否是标题行
|
|
36
|
+
*/
|
|
37
|
+
declare function isHeading(line: string): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* 检测是否是 thematic break(水平线)
|
|
40
|
+
*/
|
|
41
|
+
declare function isThematicBreak(line: string): boolean;
|
|
42
|
+
/**
|
|
43
|
+
* 检测是否是列表项开始
|
|
44
|
+
*
|
|
45
|
+
* CommonMark 规范:列表项可以是以下形式:
|
|
46
|
+
* - `- text`(无缩进)
|
|
47
|
+
* - `1. text`(有序列表)
|
|
48
|
+
* - ` - text`(缩进4个空格,作为上一个列表项的延续)
|
|
49
|
+
*
|
|
50
|
+
* 注意:` - text` 这种形式,虽然 `-` 后面没有空格,
|
|
51
|
+
* 但因为前面有4个空格的缩进,所以是列表项的有效形式。
|
|
52
|
+
*/
|
|
53
|
+
declare function isListItemStart(line: string): {
|
|
54
|
+
ordered: boolean;
|
|
55
|
+
indent: number;
|
|
56
|
+
} | null;
|
|
57
|
+
/**
|
|
58
|
+
* 检测是否是引用块开始
|
|
59
|
+
*/
|
|
60
|
+
declare function isBlockquoteStart(line: string): boolean;
|
|
61
|
+
/**
|
|
62
|
+
* 检测是否是 HTML 块
|
|
63
|
+
*/
|
|
64
|
+
declare function isHtmlBlock(line: string): boolean;
|
|
65
|
+
/**
|
|
66
|
+
* 检测表格分隔行
|
|
67
|
+
*/
|
|
68
|
+
declare function isTableDelimiter(line: string): boolean;
|
|
69
|
+
/**
|
|
70
|
+
* 检测是否是脚注定义的起始行
|
|
71
|
+
* 格式: [^id]: content
|
|
72
|
+
*
|
|
73
|
+
* @example
|
|
74
|
+
* isFootnoteDefinitionStart('[^1]: 脚注内容') // true
|
|
75
|
+
* isFootnoteDefinitionStart('[^note]: 内容') // true
|
|
76
|
+
* isFootnoteDefinitionStart(' 缩进内容') // false
|
|
77
|
+
*/
|
|
78
|
+
declare function isFootnoteDefinitionStart(line: string): boolean;
|
|
79
|
+
/**
|
|
80
|
+
* 检测是否是脚注定义的延续行(缩进行)
|
|
81
|
+
* 至少4个空格或1个tab
|
|
82
|
+
*
|
|
83
|
+
* @example
|
|
84
|
+
* isFootnoteContinuation(' 第二行') // true
|
|
85
|
+
* isFootnoteContinuation('\t第二行') // true
|
|
86
|
+
* isFootnoteContinuation(' 两个空格') // false
|
|
87
|
+
*/
|
|
88
|
+
declare function isFootnoteContinuation(line: string): boolean;
|
|
89
|
+
/**
|
|
90
|
+
* 检测容器开始或结束
|
|
91
|
+
*
|
|
92
|
+
* 支持格式:
|
|
93
|
+
* - ::: name 开始
|
|
94
|
+
* - ::: name attr 开始(带属性)
|
|
95
|
+
* - ::: 结束
|
|
96
|
+
* - :::::: name 开始(更长的标记,用于嵌套)
|
|
97
|
+
*/
|
|
98
|
+
declare function detectContainer(line: string, config?: ContainerConfig): ContainerMatch | null;
|
|
99
|
+
/**
|
|
100
|
+
* 检测容器结束
|
|
101
|
+
*/
|
|
102
|
+
declare function detectContainerEnd(line: string, context: BlockContext, config?: ContainerConfig): boolean;
|
|
103
|
+
/**
|
|
104
|
+
* 判断两行之间是否构成块边界
|
|
105
|
+
*/
|
|
106
|
+
declare function isBlockBoundary(prevLine: string, currentLine: string, context: BlockContext): boolean;
|
|
107
|
+
/**
|
|
108
|
+
* 创建初始上下文
|
|
109
|
+
*/
|
|
110
|
+
declare function createInitialContext(): BlockContext;
|
|
111
|
+
/**
|
|
112
|
+
* 更新上下文(处理一行后)
|
|
113
|
+
*
|
|
114
|
+
* @param line 当前行
|
|
115
|
+
* @param context 当前上下文
|
|
116
|
+
* @param containerConfig 容器配置
|
|
117
|
+
* @returns 更新后的上下文
|
|
118
|
+
*/
|
|
119
|
+
declare function updateContext(line: string, context: BlockContext, containerConfig?: ContainerConfig | boolean): BlockContext;
|
|
120
|
+
|
|
121
|
+
export { createInitialContext, detectContainer, detectContainerEnd, detectFenceEnd, detectFenceStart, isBlockBoundary, isBlockquoteStart, isEmptyLine, isFootnoteContinuation, isFootnoteDefinitionStart, isHeading, isHtmlBlock, isListItemStart, isSetextHeadingUnderline, isTableDelimiter, isThematicBreak, updateContext };
|