@incremark/core 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,476 @@
1
+ /**
2
+ * 增量 Markdown 解析器
3
+ *
4
+ * 设计思路:
5
+ * 1. 维护一个文本缓冲区,接收流式输入
6
+ * 2. 识别"稳定边界"(如空行、标题等),将已完成的块标记为 completed
7
+ * 3. 对于正在接收的块,每次重新解析,但只解析该块的内容
8
+ * 4. 复杂嵌套节点(如列表、引用)作为整体处理,直到确认完成
9
+ */
10
+
11
+ import { fromMarkdown } from 'mdast-util-from-markdown'
12
+ import { gfmFromMarkdown } from 'mdast-util-gfm'
13
+ import { gfm } from 'micromark-extension-gfm'
14
+ import type { Extension as MicromarkExtension } from 'micromark-util-types'
15
+ import type { Extension as MdastExtension } from 'mdast-util-from-markdown'
16
+
17
+ import type {
18
+ Root,
19
+ RootContent,
20
+ ParsedBlock,
21
+ IncrementalUpdate,
22
+ ParserOptions,
23
+ BlockStatus,
24
+ BlockContext,
25
+ ContainerConfig
26
+ } from '../types'
27
+
28
+ import {
29
+ createInitialContext,
30
+ updateContext,
31
+ isEmptyLine,
32
+ detectFenceStart,
33
+ isHeading,
34
+ isThematicBreak,
35
+ isBlockquoteStart,
36
+ isListItemStart,
37
+ detectContainer
38
+ } from '../detector'
39
+
40
+ // ============ 解析器类 ============
41
+
42
+ export class IncremarkParser {
43
+ private buffer = ''
44
+ private lines: string[] = []
45
+ /** 行偏移量前缀和:lineOffsets[i] = 第i行起始位置的偏移量 */
46
+ private lineOffsets: number[] = [0]
47
+ private completedBlocks: ParsedBlock[] = []
48
+ private pendingStartLine = 0
49
+ private blockIdCounter = 0
50
+ private context: BlockContext
51
+ private options: ParserOptions
52
+ /** 缓存的容器配置,避免重复计算 */
53
+ private cachedContainerConfig: ContainerConfig | undefined | null = null
54
+ /** 上次 append 返回的 pending blocks,用于 getAst 复用 */
55
+ private lastPendingBlocks: ParsedBlock[] = []
56
+
57
+ constructor(options: ParserOptions = {}) {
58
+ this.options = {
59
+ gfm: true,
60
+ ...options
61
+ }
62
+ this.context = createInitialContext()
63
+ // 初始化容器配置缓存
64
+ this.cachedContainerConfig = this.computeContainerConfig()
65
+ }
66
+
67
+ private generateBlockId(): string {
68
+ return `block-${++this.blockIdCounter}`
69
+ }
70
+
71
+ private computeContainerConfig(): ContainerConfig | undefined {
72
+ const containers = this.options.containers
73
+ if (!containers) return undefined
74
+ return containers === true ? {} : containers
75
+ }
76
+
77
+ private getContainerConfig(): ContainerConfig | undefined {
78
+ return this.cachedContainerConfig ?? undefined
79
+ }
80
+
81
+ private parse(text: string): Root {
82
+ const extensions: MicromarkExtension[] = []
83
+ const mdastExtensions: MdastExtension[] = []
84
+
85
+ if (this.options.gfm) {
86
+ extensions.push(gfm())
87
+ mdastExtensions.push(...gfmFromMarkdown())
88
+ }
89
+
90
+ // 如果用户传入了自定义扩展,添加它们
91
+ if (this.options.extensions) {
92
+ extensions.push(...this.options.extensions)
93
+ }
94
+ if (this.options.mdastExtensions) {
95
+ mdastExtensions.push(...this.options.mdastExtensions)
96
+ }
97
+
98
+ return fromMarkdown(text, { extensions, mdastExtensions })
99
+ }
100
+
101
+ /**
102
+ * 增量更新 lines 和 lineOffsets
103
+ * 只处理新增的内容,避免全量 split
104
+ */
105
+ private updateLines(): void {
106
+ const prevLineCount = this.lines.length
107
+
108
+ if (prevLineCount === 0) {
109
+ // 首次输入,直接 split
110
+ this.lines = this.buffer.split('\n')
111
+ this.lineOffsets = [0]
112
+ for (let i = 0; i < this.lines.length; i++) {
113
+ this.lineOffsets.push(this.lineOffsets[i] + this.lines[i].length + 1)
114
+ }
115
+ return
116
+ }
117
+
118
+ // 找到最后一个不完整的行(可能被新 chunk 续上)
119
+ const lastLineStart = this.lineOffsets[prevLineCount - 1]
120
+ const textFromLastLine = this.buffer.slice(lastLineStart)
121
+
122
+ // 重新 split 最后一行及之后的内容
123
+ const newLines = textFromLastLine.split('\n')
124
+
125
+ // 替换最后一行并追加新行
126
+ this.lines.length = prevLineCount - 1
127
+ this.lineOffsets.length = prevLineCount
128
+
129
+ for (let i = 0; i < newLines.length; i++) {
130
+ this.lines.push(newLines[i])
131
+ const prevOffset = this.lineOffsets[this.lineOffsets.length - 1]
132
+ this.lineOffsets.push(prevOffset + newLines[i].length + 1)
133
+ }
134
+ }
135
+
136
+ /**
137
+ * O(1) 获取行偏移量
138
+ */
139
+ private getLineOffset(lineIndex: number): number {
140
+ return this.lineOffsets[lineIndex] ?? 0
141
+ }
142
+
143
+ /**
144
+ * 查找稳定边界
145
+ * 返回稳定边界行号和该行对应的上下文(用于后续更新,避免重复计算)
146
+ */
147
+ private findStableBoundary(): { line: number; contextAtLine: BlockContext } {
148
+ let stableLine = -1
149
+ let stableContext: BlockContext = this.context
150
+ let tempContext = { ...this.context }
151
+ const containerConfig = this.getContainerConfig()
152
+
153
+ for (let i = this.pendingStartLine; i < this.lines.length; i++) {
154
+ const line = this.lines[i]
155
+ const wasInFencedCode = tempContext.inFencedCode
156
+ const wasInContainer = tempContext.inContainer
157
+ const wasContainerDepth = tempContext.containerDepth
158
+
159
+ tempContext = updateContext(line, tempContext, containerConfig)
160
+
161
+ if (wasInFencedCode && !tempContext.inFencedCode) {
162
+ if (i < this.lines.length - 1) {
163
+ stableLine = i
164
+ stableContext = { ...tempContext }
165
+ }
166
+ continue
167
+ }
168
+
169
+ if (tempContext.inFencedCode) {
170
+ continue
171
+ }
172
+
173
+ if (wasInContainer && wasContainerDepth === 1 && !tempContext.inContainer) {
174
+ if (i < this.lines.length - 1) {
175
+ stableLine = i
176
+ stableContext = { ...tempContext }
177
+ }
178
+ continue
179
+ }
180
+
181
+ if (tempContext.inContainer) {
182
+ continue
183
+ }
184
+
185
+ const stablePoint = this.checkStability(i, containerConfig)
186
+ if (stablePoint >= 0) {
187
+ stableLine = stablePoint
188
+ stableContext = { ...tempContext }
189
+ }
190
+ }
191
+
192
+ return { line: stableLine, contextAtLine: stableContext }
193
+ }
194
+
195
+ private checkStability(
196
+ lineIndex: number,
197
+ containerConfig: ContainerConfig | undefined
198
+ ): number {
199
+ // 第一行永远不稳定
200
+ if (lineIndex === 0) {
201
+ return -1
202
+ }
203
+
204
+ const line = this.lines[lineIndex]
205
+ const prevLine = this.lines[lineIndex - 1]
206
+
207
+ // 前一行是独立块(标题、分割线),该块已完成
208
+ if (isHeading(prevLine) || isThematicBreak(prevLine)) {
209
+ return lineIndex - 1
210
+ }
211
+
212
+ // 最后一行不稳定(可能还有更多内容)
213
+ if (lineIndex >= this.lines.length - 1) {
214
+ return -1
215
+ }
216
+
217
+ // 前一行非空时,如果当前行是新块开始,则前一块已完成
218
+ if (!isEmptyLine(prevLine)) {
219
+ // 新标题开始
220
+ if (isHeading(line)) {
221
+ return lineIndex - 1
222
+ }
223
+
224
+ // 新代码块开始
225
+ if (detectFenceStart(line)) {
226
+ return lineIndex - 1
227
+ }
228
+
229
+ // 新引用块开始(排除连续引用)
230
+ if (isBlockquoteStart(line) && !isBlockquoteStart(prevLine)) {
231
+ return lineIndex - 1
232
+ }
233
+
234
+ // 新列表开始(排除连续列表项)
235
+ if (isListItemStart(line) && !isListItemStart(prevLine)) {
236
+ return lineIndex - 1
237
+ }
238
+
239
+ // 新容器开始
240
+ if (containerConfig !== undefined) {
241
+ const container = detectContainer(line, containerConfig)
242
+ if (container && !container.isEnd) {
243
+ const prevContainer = detectContainer(prevLine, containerConfig)
244
+ if (!prevContainer || prevContainer.isEnd) {
245
+ return lineIndex - 1
246
+ }
247
+ }
248
+ }
249
+ }
250
+
251
+ // 空行标志段落结束
252
+ if (isEmptyLine(line) && !isEmptyLine(prevLine)) {
253
+ return lineIndex
254
+ }
255
+
256
+ return -1
257
+ }
258
+
259
+ private nodesToBlocks(
260
+ nodes: RootContent[],
261
+ startOffset: number,
262
+ rawText: string,
263
+ status: BlockStatus
264
+ ): ParsedBlock[] {
265
+ const blocks: ParsedBlock[] = []
266
+ let currentOffset = startOffset
267
+
268
+ for (const node of nodes) {
269
+ const nodeStart = node.position?.start?.offset ?? currentOffset
270
+ const nodeEnd = node.position?.end?.offset ?? currentOffset + 1
271
+ const nodeText = rawText.substring(nodeStart - startOffset, nodeEnd - startOffset)
272
+
273
+ blocks.push({
274
+ id: this.generateBlockId(),
275
+ status,
276
+ node,
277
+ startOffset: nodeStart,
278
+ endOffset: nodeEnd,
279
+ rawText: nodeText
280
+ })
281
+
282
+ currentOffset = nodeEnd
283
+ }
284
+
285
+ return blocks
286
+ }
287
+
288
+ /**
289
+ * 追加新的 chunk 并返回增量更新
290
+ */
291
+ append(chunk: string): IncrementalUpdate {
292
+ this.buffer += chunk
293
+ this.updateLines()
294
+
295
+ const { line: stableBoundary, contextAtLine } = this.findStableBoundary()
296
+
297
+ const update: IncrementalUpdate = {
298
+ completed: [],
299
+ updated: [],
300
+ pending: [],
301
+ ast: { type: 'root', children: [] }
302
+ }
303
+
304
+ if (stableBoundary >= this.pendingStartLine && stableBoundary >= 0) {
305
+ const stableText = this.lines.slice(this.pendingStartLine, stableBoundary + 1).join('\n')
306
+ const stableOffset = this.getLineOffset(this.pendingStartLine)
307
+
308
+ const ast = this.parse(stableText)
309
+ const newBlocks = this.nodesToBlocks(ast.children, stableOffset, stableText, 'completed')
310
+
311
+ this.completedBlocks.push(...newBlocks)
312
+ update.completed = newBlocks
313
+
314
+ // 直接使用 findStableBoundary 计算好的上下文,避免重复遍历
315
+ this.context = contextAtLine
316
+ this.pendingStartLine = stableBoundary + 1
317
+ }
318
+
319
+ if (this.pendingStartLine < this.lines.length) {
320
+ const pendingText = this.lines.slice(this.pendingStartLine).join('\n')
321
+
322
+ if (pendingText.trim()) {
323
+ const pendingOffset = this.getLineOffset(this.pendingStartLine)
324
+ const ast = this.parse(pendingText)
325
+
326
+ update.pending = this.nodesToBlocks(ast.children, pendingOffset, pendingText, 'pending')
327
+ }
328
+ }
329
+
330
+ // 缓存 pending blocks 供 getAst 使用
331
+ this.lastPendingBlocks = update.pending
332
+
333
+ update.ast = {
334
+ type: 'root',
335
+ children: [...this.completedBlocks.map((b) => b.node), ...update.pending.map((b) => b.node)]
336
+ }
337
+
338
+ // 触发状态变化回调
339
+ this.emitChange(update.pending)
340
+
341
+ return update
342
+ }
343
+
344
+ /**
345
+ * 触发状态变化回调
346
+ */
347
+ private emitChange(pendingBlocks: ParsedBlock[] = []): void {
348
+ if (this.options.onChange) {
349
+ this.options.onChange({
350
+ completedBlocks: this.completedBlocks,
351
+ pendingBlocks,
352
+ markdown: this.buffer,
353
+ ast: {
354
+ type: 'root',
355
+ children: [
356
+ ...this.completedBlocks.map((b) => b.node),
357
+ ...pendingBlocks.map((b) => b.node)
358
+ ]
359
+ }
360
+ })
361
+ }
362
+ }
363
+
364
+ /**
365
+ * 标记解析完成,处理剩余内容
366
+ * 也可用于强制中断时(如用户点击停止),将 pending 内容标记为 completed
367
+ */
368
+ finalize(): IncrementalUpdate {
369
+ const update: IncrementalUpdate = {
370
+ completed: [],
371
+ updated: [],
372
+ pending: [],
373
+ ast: { type: 'root', children: [] }
374
+ }
375
+
376
+ if (this.pendingStartLine < this.lines.length) {
377
+ const remainingText = this.lines.slice(this.pendingStartLine).join('\n')
378
+
379
+ if (remainingText.trim()) {
380
+ const remainingOffset = this.getLineOffset(this.pendingStartLine)
381
+ const ast = this.parse(remainingText)
382
+
383
+ const finalBlocks = this.nodesToBlocks(
384
+ ast.children,
385
+ remainingOffset,
386
+ remainingText,
387
+ 'completed'
388
+ )
389
+
390
+ this.completedBlocks.push(...finalBlocks)
391
+ update.completed = finalBlocks
392
+ }
393
+ }
394
+
395
+ // 清空 pending 缓存
396
+ this.lastPendingBlocks = []
397
+ this.pendingStartLine = this.lines.length
398
+
399
+ update.ast = {
400
+ type: 'root',
401
+ children: this.completedBlocks.map((b) => b.node)
402
+ }
403
+
404
+ // 触发状态变化回调
405
+ this.emitChange([])
406
+
407
+ return update
408
+ }
409
+
410
+ /**
411
+ * 强制中断解析,将所有待处理内容标记为完成
412
+ * 语义上等同于 finalize(),但名称更清晰
413
+ */
414
+ abort(): IncrementalUpdate {
415
+ return this.finalize()
416
+ }
417
+
418
+ /**
419
+ * 获取当前完整的 AST
420
+ * 复用上次 append 的 pending 结果,避免重复解析
421
+ */
422
+ getAst(): Root {
423
+ return {
424
+ type: 'root',
425
+ children: [
426
+ ...this.completedBlocks.map((b) => b.node),
427
+ ...this.lastPendingBlocks.map((b) => b.node)
428
+ ]
429
+ }
430
+ }
431
+
432
+ /**
433
+ * 获取所有已完成的块
434
+ */
435
+ getCompletedBlocks(): ParsedBlock[] {
436
+ return [...this.completedBlocks]
437
+ }
438
+
439
+ /**
440
+ * 获取当前缓冲区内容
441
+ */
442
+ getBuffer(): string {
443
+ return this.buffer
444
+ }
445
+
446
+ /**
447
+ * 设置状态变化回调(用于 DevTools 等)
448
+ */
449
+ setOnChange(callback: ((state: import('../types').ParserState) => void) | undefined): void {
450
+ this.options.onChange = callback
451
+ }
452
+
453
+ /**
454
+ * 重置解析器状态
455
+ */
456
+ reset(): void {
457
+ this.buffer = ''
458
+ this.lines = []
459
+ this.lineOffsets = [0]
460
+ this.completedBlocks = []
461
+ this.pendingStartLine = 0
462
+ this.blockIdCounter = 0
463
+ this.context = createInitialContext()
464
+ this.lastPendingBlocks = []
465
+
466
+ // 触发状态变化回调
467
+ this.emitChange([])
468
+ }
469
+ }
470
+
471
+ /**
472
+ * 创建 Incremark 解析器实例
473
+ */
474
+ export function createIncremarkParser(options?: ParserOptions): IncremarkParser {
475
+ return new IncremarkParser(options)
476
+ }
@@ -0,0 +1,2 @@
1
+ export { IncremarkParser, createIncremarkParser } from './IncremarkParser'
2
+
@@ -0,0 +1,144 @@
1
+ import type { Root, RootContent } from 'mdast'
2
+ import type { Extension as MicromarkExtension } from 'micromark-util-types'
3
+ import type { Extension as MdastExtension } from 'mdast-util-from-markdown'
4
+
5
+ /**
6
+ * 解析块的状态
7
+ */
8
+ export type BlockStatus =
9
+ | 'pending' // 正在接收中,可能不完整
10
+ | 'stable' // 可能完整,但下一个 chunk 可能会改变它
11
+ | 'completed' // 确认完成,不会再改变
12
+
13
+ /**
14
+ * 解析出的块
15
+ */
16
+ export interface ParsedBlock {
17
+ /** 块的唯一 ID */
18
+ id: string
19
+ /** 块状态 */
20
+ status: BlockStatus
21
+ /** AST 节点 */
22
+ node: RootContent
23
+ /** 原始文本起始位置(相对于完整文档) */
24
+ startOffset: number
25
+ /** 原始文本结束位置 */
26
+ endOffset: number
27
+ /** 原始文本内容 */
28
+ rawText: string
29
+ }
30
+
31
+ /**
32
+ * 增量更新事件
33
+ */
34
+ export interface IncrementalUpdate {
35
+ /** 新完成的块 */
36
+ completed: ParsedBlock[]
37
+ /** 更新的块(内容变化) */
38
+ updated: ParsedBlock[]
39
+ /** 当前正在解析中的块(可能不完整) */
40
+ pending: ParsedBlock[]
41
+ /** 完整的 AST(包含所有已解析的内容) */
42
+ ast: Root
43
+ }
44
+
45
+ /**
46
+ * 容器语法配置
47
+ */
48
+ export interface ContainerConfig {
49
+ /** 容器标记字符,默认 ':' */
50
+ marker?: string
51
+ /** 最小标记长度,默认 3 */
52
+ minMarkerLength?: number
53
+ /** 允许的容器名称(如 ['warning', 'info', 'youtube']),undefined 表示允许所有 */
54
+ allowedNames?: string[]
55
+ }
56
+
57
+ /**
58
+ * 解析器状态变化事件
59
+ */
60
+ export interface ParserState {
61
+ /** 已完成的块 */
62
+ completedBlocks: ParsedBlock[]
63
+ /** 待处理的块 */
64
+ pendingBlocks: ParsedBlock[]
65
+ /** 完整的 Markdown 内容 */
66
+ markdown: string
67
+ /** 完整的 AST */
68
+ ast: Root
69
+ }
70
+
71
+ /**
72
+ * 解析器配置
73
+ */
74
+ export interface ParserOptions {
75
+ /** 启用 GFM 扩展(表格、任务列表等) */
76
+ gfm?: boolean
77
+ /**
78
+ * 启用 ::: 容器语法支持(用于边界检测)
79
+ * - false: 禁用(默认)
80
+ * - true: 使用默认配置启用
81
+ * - ContainerConfig: 使用自定义配置启用
82
+ */
83
+ containers?: boolean | ContainerConfig
84
+ /** 自定义块边界检测函数 */
85
+ blockBoundaryDetector?: (content: string, position: number) => boolean
86
+ /** 自定义 micromark 扩展(如 directive) */
87
+ extensions?: MicromarkExtension[]
88
+ /** 自定义 mdast 扩展(如 directiveFromMarkdown) */
89
+ mdastExtensions?: MdastExtension[]
90
+ /** 状态变化回调 */
91
+ onChange?: (state: ParserState) => void
92
+ }
93
+
94
+ /**
95
+ * 块上下文
96
+ */
97
+ export interface BlockContext {
98
+ /** 当前是否在代码块中 */
99
+ inFencedCode: boolean
100
+ /** 代码块的 fence 字符(` 或 ~) */
101
+ fenceChar?: string
102
+ /** 代码块的 fence 长度 */
103
+ fenceLength?: number
104
+ /** 当前列表嵌套深度 */
105
+ listDepth: number
106
+ /** 当前引用嵌套深度 */
107
+ blockquoteDepth: number
108
+ /** 当前是否在容器块中 */
109
+ inContainer: boolean
110
+ /** 容器的标记长度 */
111
+ containerMarkerLength?: number
112
+ /** 容器名称 */
113
+ containerName?: string
114
+ /** 容器嵌套深度(支持嵌套容器) */
115
+ containerDepth: number
116
+ }
117
+
118
+ /**
119
+ * 容器检测结果
120
+ */
121
+ export interface ContainerMatch {
122
+ /** 容器名称 */
123
+ name: string
124
+ /** 标记长度(冒号数量) */
125
+ markerLength: number
126
+ /** 是否是结束标记 */
127
+ isEnd: boolean
128
+ }
129
+
130
+ /**
131
+ * 块类型检测结果
132
+ */
133
+ export interface BlockTypeInfo {
134
+ type: string
135
+ /** 是否是容器节点(可以包含其他块) */
136
+ isContainer: boolean
137
+ /** 是否需要显式关闭(如代码块) */
138
+ requiresClosing: boolean
139
+ /** 关闭模式 */
140
+ closingPattern?: RegExp
141
+ }
142
+
143
+ export type { Root, RootContent }
144
+
@@ -0,0 +1,44 @@
1
+ /**
2
+ * 工具函数
3
+ */
4
+
5
+ /**
6
+ * 生成唯一 ID
7
+ */
8
+ let idCounter = 0
9
+ export function generateId(prefix = 'block'): string {
10
+ return `${prefix}-${++idCounter}`
11
+ }
12
+
13
+ /**
14
+ * 重置 ID 计数器(用于测试)
15
+ */
16
+ export function resetIdCounter(): void {
17
+ idCounter = 0
18
+ }
19
+
20
+ /**
21
+ * 计算行的偏移量
22
+ */
23
+ export function calculateLineOffset(lines: string[], lineIndex: number): number {
24
+ let offset = 0
25
+ for (let i = 0; i < lineIndex && i < lines.length; i++) {
26
+ offset += lines[i].length + 1 // +1 for newline
27
+ }
28
+ return offset
29
+ }
30
+
31
+ /**
32
+ * 将文本按行分割
33
+ */
34
+ export function splitLines(text: string): string[] {
35
+ return text.split('\n')
36
+ }
37
+
38
+ /**
39
+ * 合并行为文本
40
+ */
41
+ export function joinLines(lines: string[], start: number, end: number): string {
42
+ return lines.slice(start, end + 1).join('\n')
43
+ }
44
+