@incremark/core 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,93 +0,0 @@
1
- import { runBenchmark } from './index'
2
-
3
- console.log('')
4
- console.log('🔬 Running Incremark Benchmark Suite')
5
- console.log('')
6
-
7
- // 不同文档长度的测试
8
- const documentSizes = [
9
- { name: 'Short (~1KB)', length: 1000 },
10
- { name: 'Medium (~5KB)', length: 5000 },
11
- { name: 'Long (~10KB)', length: 10000 },
12
- { name: 'Very Long (~20KB)', length: 20000 },
13
- ]
14
-
15
- const allResults: Array<{
16
- docSize: string
17
- docLength: number
18
- chunkSize: number
19
- timeSaved: number
20
- charsSaved: number
21
- speedup: number
22
- }> = []
23
-
24
- for (const doc of documentSizes) {
25
- console.log(`\n${'='.repeat(60)}`)
26
- console.log(`📄 Document Size: ${doc.name} (${doc.length} chars)`)
27
- console.log('='.repeat(60))
28
-
29
- // 测试不同 chunk 大小
30
- for (const chunkSize of [10, 50]) {
31
- console.log(`\n📦 Chunk size: ${chunkSize} chars\n`)
32
- const result = runBenchmark({
33
- chunkSize,
34
- iterations: 20,
35
- markdownLength: doc.length
36
- })
37
- allResults.push({
38
- docSize: doc.name,
39
- docLength: doc.length,
40
- chunkSize,
41
- timeSaved: result.timeSaved,
42
- charsSaved: result.charsSaved,
43
- speedup: result.speedup
44
- })
45
- }
46
- }
47
-
48
- // 汇总报告
49
- console.log('\n')
50
- console.log('='.repeat(80))
51
- console.log('📈 Complete Benchmark Summary')
52
- console.log('='.repeat(80))
53
- console.log('')
54
- console.log('| Document Size | Chunk | Time Saved | Chars Saved | Speedup |')
55
- console.log('|------------------|-------|------------|-------------|---------|')
56
- for (const r of allResults) {
57
- console.log(`| ${r.docSize.padEnd(16)} | ${r.chunkSize.toString().padEnd(5)} | ${r.timeSaved.toFixed(1).padStart(9)}% | ${r.charsSaved.toFixed(1).padStart(10)}% | ${r.speedup.toFixed(2).padStart(6)}x |`)
58
- }
59
- console.log('')
60
-
61
- // 计算平均值
62
- const avgTimeSaved = allResults.reduce((sum, r) => sum + r.timeSaved, 0) / allResults.length
63
- const avgCharsSaved = allResults.reduce((sum, r) => sum + r.charsSaved, 0) / allResults.length
64
- const avgSpeedup = allResults.reduce((sum, r) => sum + r.speedup, 0) / allResults.length
65
-
66
- // 按文档大小分组计算
67
- const byDocSize = new Map<string, typeof allResults>()
68
- for (const r of allResults) {
69
- if (!byDocSize.has(r.docSize)) {
70
- byDocSize.set(r.docSize, [])
71
- }
72
- byDocSize.get(r.docSize)!.push(r)
73
- }
74
-
75
- console.log('-'.repeat(80))
76
- console.log('')
77
- console.log('📊 Average by Document Size:')
78
- console.log('')
79
- for (const [size, results] of byDocSize) {
80
- const avgSpeed = results.reduce((sum, r) => sum + r.speedup, 0) / results.length
81
- const avgTime = results.reduce((sum, r) => sum + r.timeSaved, 0) / results.length
82
- console.log(` ${size}: ${avgSpeed.toFixed(2)}x faster, ${avgTime.toFixed(1)}% time saved`)
83
- }
84
- console.log('')
85
-
86
- console.log('-'.repeat(80))
87
- console.log('')
88
- console.log(`🎯 Overall Average:`)
89
- console.log(` Time Saved: ${avgTimeSaved.toFixed(1)}%`)
90
- console.log(` Chars Saved: ${avgCharsSaved.toFixed(1)}%`)
91
- console.log(` Speedup: ${avgSpeedup.toFixed(2)}x`)
92
- console.log('')
93
- console.log('='.repeat(80))
@@ -1,150 +0,0 @@
1
- import { describe, it, expect } from 'vitest'
2
- import {
3
- detectFenceStart,
4
- detectFenceEnd,
5
- isEmptyLine,
6
- isHeading,
7
- isThematicBreak,
8
- isListItemStart,
9
- isBlockquoteStart,
10
- detectContainer,
11
- createInitialContext
12
- } from './index'
13
-
14
- describe('块检测器', () => {
15
- describe('detectFenceStart', () => {
16
- it('检测反引号 fence', () => {
17
- const result = detectFenceStart('```js')
18
- expect(result).toEqual({ char: '`', length: 3 })
19
- })
20
-
21
- it('检测波浪号 fence', () => {
22
- const result = detectFenceStart('~~~python')
23
- expect(result).toEqual({ char: '~', length: 3 })
24
- })
25
-
26
- it('检测更长的 fence', () => {
27
- const result = detectFenceStart('`````')
28
- expect(result).toEqual({ char: '`', length: 5 })
29
- })
30
-
31
- it('非 fence 返回 null', () => {
32
- expect(detectFenceStart('普通文本')).toBeNull()
33
- expect(detectFenceStart('``不够长')).toBeNull()
34
- })
35
- })
36
-
37
- describe('detectFenceEnd', () => {
38
- it('匹配相同长度', () => {
39
- const context = { ...createInitialContext(), inFencedCode: true, fenceChar: '`', fenceLength: 3 }
40
- expect(detectFenceEnd('```', context)).toBe(true)
41
- })
42
-
43
- it('匹配更长的结束', () => {
44
- const context = { ...createInitialContext(), inFencedCode: true, fenceChar: '`', fenceLength: 3 }
45
- expect(detectFenceEnd('`````', context)).toBe(true)
46
- })
47
-
48
- it('不匹配更短的', () => {
49
- const context = { ...createInitialContext(), inFencedCode: true, fenceChar: '`', fenceLength: 5 }
50
- expect(detectFenceEnd('```', context)).toBe(false)
51
- })
52
- })
53
-
54
- describe('isEmptyLine', () => {
55
- it('空行', () => {
56
- expect(isEmptyLine('')).toBe(true)
57
- expect(isEmptyLine(' ')).toBe(true)
58
- expect(isEmptyLine('\t')).toBe(true)
59
- })
60
-
61
- it('非空行', () => {
62
- expect(isEmptyLine('内容')).toBe(false)
63
- expect(isEmptyLine(' a ')).toBe(false)
64
- })
65
- })
66
-
67
- describe('isHeading', () => {
68
- it('有效标题', () => {
69
- expect(isHeading('# H1')).toBe(true)
70
- expect(isHeading('## H2')).toBe(true)
71
- expect(isHeading('###### H6')).toBe(true)
72
- })
73
-
74
- it('无效标题', () => {
75
- expect(isHeading('####### H7')).toBe(false) // 超过 6 级
76
- expect(isHeading('#没有空格')).toBe(false)
77
- expect(isHeading('普通文本')).toBe(false)
78
- })
79
- })
80
-
81
- describe('isThematicBreak', () => {
82
- it('有效水平线', () => {
83
- expect(isThematicBreak('---')).toBe(true)
84
- expect(isThematicBreak('***')).toBe(true)
85
- expect(isThematicBreak('___')).toBe(true)
86
- expect(isThematicBreak('-----')).toBe(true)
87
- })
88
-
89
- it('无效水平线', () => {
90
- expect(isThematicBreak('--')).toBe(false)
91
- expect(isThematicBreak('- - -')).toBe(false) // 有空格
92
- })
93
- })
94
-
95
- describe('isListItemStart', () => {
96
- it('无序列表', () => {
97
- expect(isListItemStart('- item')).toEqual({ ordered: false, indent: 0 })
98
- expect(isListItemStart(' * item')).toEqual({ ordered: false, indent: 2 })
99
- })
100
-
101
- it('有序列表', () => {
102
- expect(isListItemStart('1. item')).toEqual({ ordered: true, indent: 0 })
103
- expect(isListItemStart(' 99. item')).toEqual({ ordered: true, indent: 2 })
104
- })
105
-
106
- it('非列表', () => {
107
- expect(isListItemStart('普通文本')).toBeNull()
108
- })
109
- })
110
-
111
- describe('isBlockquoteStart', () => {
112
- it('有效引用', () => {
113
- expect(isBlockquoteStart('> quote')).toBe(true)
114
- expect(isBlockquoteStart(' > quote')).toBe(true)
115
- })
116
-
117
- it('无效引用', () => {
118
- expect(isBlockquoteStart(' > 太多缩进')).toBe(false)
119
- })
120
- })
121
-
122
- describe('detectContainer', () => {
123
- it('检测容器开始', () => {
124
- const result = detectContainer('::: warning')
125
- expect(result).toEqual({ name: 'warning', markerLength: 3, isEnd: false })
126
- })
127
-
128
- it('检测容器结束', () => {
129
- const result = detectContainer(':::')
130
- expect(result).toEqual({ name: '', markerLength: 3, isEnd: true })
131
- })
132
-
133
- it('检测更长的标记', () => {
134
- const result = detectContainer('::::: outer')
135
- expect(result).toEqual({ name: 'outer', markerLength: 5, isEnd: false })
136
- })
137
-
138
- it('名称白名单', () => {
139
- const config = { allowedNames: ['warning', 'info'] }
140
- expect(detectContainer('::: warning', config)).not.toBeNull()
141
- expect(detectContainer('::: danger', config)).toBeNull()
142
- })
143
-
144
- it('非容器语法', () => {
145
- expect(detectContainer('普通文本')).toBeNull()
146
- expect(detectContainer(':: 太短')).toBeNull()
147
- })
148
- })
149
- })
150
-
@@ -1,330 +0,0 @@
1
- /**
2
- * 块类型检测与边界判断
3
- *
4
- * Markdown 块级元素的识别规则
5
- */
6
-
7
- import type { BlockContext, ContainerConfig, ContainerMatch } from '../types'
8
-
9
- // ============ 预编译正则表达式(性能优化) ============
10
-
11
- const RE_FENCE_START = /^(\s*)((`{3,})|(~{3,}))/
12
- const RE_EMPTY_LINE = /^\s*$/
13
- const RE_HEADING = /^#{1,6}\s/
14
- const RE_THEMATIC_BREAK = /^(\*{3,}|-{3,}|_{3,})\s*$/
15
- const RE_UNORDERED_LIST = /^(\s*)([-*+])\s/
16
- const RE_ORDERED_LIST = /^(\s*)(\d{1,9})[.)]\s/
17
- const RE_BLOCKQUOTE = /^\s{0,3}>/
18
- const RE_HTML_BLOCK_1 = /^\s{0,3}<(script|pre|style|textarea|!--|!DOCTYPE|\?|!\[CDATA\[)/i
19
- const RE_HTML_BLOCK_2 = /^\s{0,3}<\/?[a-zA-Z][a-zA-Z0-9-]*(\s|>|$)/
20
- const RE_TABLE_DELIMITER = /^\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)*\|?$/
21
- const RE_ESCAPE_SPECIAL = /[.*+?^${}()|[\]\\]/g
22
- const RE_FOOTNOTE_DEFINITION = /^\[\^[^\]]+\]:\s/
23
- const RE_FOOTNOTE_CONTINUATION = /^(?: |\t)/
24
-
25
- /** fence 结束模式缓存 */
26
- const fenceEndPatternCache = new Map<string, RegExp>()
27
-
28
- /** 容器模式缓存 */
29
- const containerPatternCache = new Map<string, RegExp>()
30
-
31
- // ============ 代码块检测 ============
32
-
33
- /**
34
- * 检测行是否是代码块 fence 开始
35
- */
36
- export function detectFenceStart(line: string): { char: string; length: number } | null {
37
- const match = line.match(RE_FENCE_START)
38
- if (match) {
39
- const fence = match[2]
40
- const char = fence[0]
41
- return { char, length: fence.length }
42
- }
43
- return null
44
- }
45
-
46
- /**
47
- * 检测行是否是代码块 fence 结束
48
- */
49
- export function detectFenceEnd(line: string, context: BlockContext): boolean {
50
- if (!context.inFencedCode || !context.fenceChar || !context.fenceLength) {
51
- return false
52
- }
53
-
54
- // 使用缓存的正则表达式
55
- const cacheKey = `${context.fenceChar}-${context.fenceLength}`
56
- let pattern = fenceEndPatternCache.get(cacheKey)
57
- if (!pattern) {
58
- pattern = new RegExp(`^\\s{0,3}${context.fenceChar}{${context.fenceLength},}\\s*$`)
59
- fenceEndPatternCache.set(cacheKey, pattern)
60
- }
61
- return pattern.test(line)
62
- }
63
-
64
- // ============ 行类型检测 ============
65
-
66
- /**
67
- * 检测是否是空行或仅包含空白字符
68
- */
69
- export function isEmptyLine(line: string): boolean {
70
- return RE_EMPTY_LINE.test(line)
71
- }
72
-
73
- /**
74
- * 检测是否是标题行
75
- */
76
- export function isHeading(line: string): boolean {
77
- return RE_HEADING.test(line)
78
- }
79
-
80
- /**
81
- * 检测是否是 thematic break(水平线)
82
- */
83
- export function isThematicBreak(line: string): boolean {
84
- return RE_THEMATIC_BREAK.test(line.trim())
85
- }
86
-
87
- /**
88
- * 检测是否是列表项开始
89
- */
90
- export function isListItemStart(line: string): { ordered: boolean; indent: number } | null {
91
- // 无序列表: - * +
92
- const unordered = line.match(RE_UNORDERED_LIST)
93
- if (unordered) {
94
- return { ordered: false, indent: unordered[1].length }
95
- }
96
-
97
- // 有序列表: 1. 2) 等
98
- const ordered = line.match(RE_ORDERED_LIST)
99
- if (ordered) {
100
- return { ordered: true, indent: ordered[1].length }
101
- }
102
-
103
- return null
104
- }
105
-
106
- /**
107
- * 检测是否是引用块开始
108
- */
109
- export function isBlockquoteStart(line: string): boolean {
110
- return RE_BLOCKQUOTE.test(line)
111
- }
112
-
113
- /**
114
- * 检测是否是 HTML 块
115
- */
116
- export function isHtmlBlock(line: string): boolean {
117
- return RE_HTML_BLOCK_1.test(line) || RE_HTML_BLOCK_2.test(line)
118
- }
119
-
120
- /**
121
- * 检测表格分隔行
122
- */
123
- export function isTableDelimiter(line: string): boolean {
124
- return RE_TABLE_DELIMITER.test(line.trim())
125
- }
126
-
127
- // ============ 脚注检测 ============
128
-
129
- /**
130
- * 检测是否是脚注定义的起始行
131
- * 格式: [^id]: content
132
- *
133
- * @example
134
- * isFootnoteDefinitionStart('[^1]: 脚注内容') // true
135
- * isFootnoteDefinitionStart('[^note]: 内容') // true
136
- * isFootnoteDefinitionStart(' 缩进内容') // false
137
- */
138
- export function isFootnoteDefinitionStart(line: string): boolean {
139
- return RE_FOOTNOTE_DEFINITION.test(line)
140
- }
141
-
142
- /**
143
- * 检测是否是脚注定义的延续行(缩进行)
144
- * 至少4个空格或1个tab
145
- *
146
- * @example
147
- * isFootnoteContinuation(' 第二行') // true
148
- * isFootnoteContinuation('\t第二行') // true
149
- * isFootnoteContinuation(' 两个空格') // false
150
- */
151
- export function isFootnoteContinuation(line: string): boolean {
152
- return RE_FOOTNOTE_CONTINUATION.test(line)
153
- }
154
-
155
- // ============ 容器检测 ============
156
-
157
- /**
158
- * 检测容器开始或结束
159
- *
160
- * 支持格式:
161
- * - ::: name 开始
162
- * - ::: name attr 开始(带属性)
163
- * - ::: 结束
164
- * - :::::: name 开始(更长的标记,用于嵌套)
165
- */
166
- export function detectContainer(line: string, config?: ContainerConfig): ContainerMatch | null {
167
- const marker = config?.marker || ':'
168
- const minLength = config?.minMarkerLength || 3
169
-
170
- // 使用缓存的正则表达式
171
- const cacheKey = `${marker}-${minLength}`
172
- let pattern = containerPatternCache.get(cacheKey)
173
- if (!pattern) {
174
- const escapedMarker = marker.replace(RE_ESCAPE_SPECIAL, '\\$&')
175
- pattern = new RegExp(
176
- `^(\\s*)(${escapedMarker}{${minLength},})(?:\\s+(\\w[\\w-]*))?(?:\\s+(.*))?\\s*$`
177
- )
178
- containerPatternCache.set(cacheKey, pattern)
179
- }
180
-
181
- const match = line.match(pattern)
182
- if (!match) {
183
- return null
184
- }
185
-
186
- const markerLength = match[2].length
187
- const name = match[3] || ''
188
- const isEnd = !name && !match[4]
189
-
190
- if (!isEnd && config?.allowedNames && config.allowedNames.length > 0) {
191
- if (!config.allowedNames.includes(name)) {
192
- return null
193
- }
194
- }
195
-
196
- return { name, markerLength, isEnd }
197
- }
198
-
199
- /**
200
- * 检测容器结束
201
- */
202
- export function detectContainerEnd(
203
- line: string,
204
- context: BlockContext,
205
- config?: ContainerConfig
206
- ): boolean {
207
- if (!context.inContainer || !context.containerMarkerLength) {
208
- return false
209
- }
210
-
211
- const result = detectContainer(line, config)
212
- if (!result) {
213
- return false
214
- }
215
-
216
- return result.isEnd && result.markerLength >= context.containerMarkerLength
217
- }
218
-
219
- // ============ 边界检测 ============
220
-
221
- /**
222
- * 判断两行之间是否构成块边界
223
- */
224
- export function isBlockBoundary(
225
- prevLine: string,
226
- currentLine: string,
227
- context: BlockContext
228
- ): boolean {
229
- if (context.inFencedCode) {
230
- return detectFenceEnd(currentLine, context)
231
- }
232
-
233
- if (isEmptyLine(prevLine) && !isEmptyLine(currentLine)) {
234
- return true
235
- }
236
-
237
- if (isHeading(currentLine) && !isEmptyLine(prevLine)) {
238
- return true
239
- }
240
-
241
- if (isThematicBreak(currentLine)) {
242
- return true
243
- }
244
-
245
- if (detectFenceStart(currentLine)) {
246
- return true
247
- }
248
-
249
- return false
250
- }
251
-
252
- // ============ 上下文管理 ============
253
-
254
- /**
255
- * 创建初始上下文
256
- */
257
- export function createInitialContext(): BlockContext {
258
- return {
259
- inFencedCode: false,
260
- listDepth: 0,
261
- blockquoteDepth: 0,
262
- inContainer: false,
263
- containerDepth: 0
264
- }
265
- }
266
-
267
- /**
268
- * 更新上下文(处理一行后)
269
- */
270
- export function updateContext(
271
- line: string,
272
- context: BlockContext,
273
- containerConfig?: ContainerConfig | boolean
274
- ): BlockContext {
275
- const newContext = { ...context }
276
-
277
- const containerCfg =
278
- containerConfig === true ? {} : containerConfig === false ? undefined : containerConfig
279
-
280
- // 代码块优先级最高
281
- if (context.inFencedCode) {
282
- if (detectFenceEnd(line, context)) {
283
- newContext.inFencedCode = false
284
- newContext.fenceChar = undefined
285
- newContext.fenceLength = undefined
286
- }
287
- return newContext
288
- }
289
-
290
- const fence = detectFenceStart(line)
291
- if (fence) {
292
- newContext.inFencedCode = true
293
- newContext.fenceChar = fence.char
294
- newContext.fenceLength = fence.length
295
- return newContext
296
- }
297
-
298
- // 容器处理
299
- if (containerCfg !== undefined) {
300
- if (context.inContainer) {
301
- if (detectContainerEnd(line, context, containerCfg)) {
302
- newContext.containerDepth = context.containerDepth - 1
303
- if (newContext.containerDepth === 0) {
304
- newContext.inContainer = false
305
- newContext.containerMarkerLength = undefined
306
- newContext.containerName = undefined
307
- }
308
- return newContext
309
- }
310
-
311
- const nested = detectContainer(line, containerCfg)
312
- if (nested && !nested.isEnd) {
313
- newContext.containerDepth = context.containerDepth + 1
314
- return newContext
315
- }
316
- } else {
317
- const container = detectContainer(line, containerCfg)
318
- if (container && !container.isEnd) {
319
- newContext.inContainer = true
320
- newContext.containerMarkerLength = container.markerLength
321
- newContext.containerName = container.name
322
- newContext.containerDepth = 1
323
- return newContext
324
- }
325
- }
326
- }
327
-
328
- return newContext
329
- }
330
-