@incremark/core 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,409 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { fromMarkdown } from 'mdast-util-from-markdown'
3
+ import { gfm } from 'micromark-extension-gfm'
4
+ import { gfmFromMarkdown } from 'mdast-util-gfm'
5
+ import type { Root, RootContent } from 'mdast'
6
+ import {
7
+ parseHtmlTag,
8
+ parseHtmlFragment,
9
+ transformHtmlNodes,
10
+ createHtmlTreeTransformer,
11
+ isHtmlElementNode,
12
+ walkHtmlElements,
13
+ findHtmlElementsByTag,
14
+ htmlElementToString,
15
+ detectHtmlContentType,
16
+ HTML_TAG_BLACKLIST,
17
+ type HtmlElementNode
18
+ } from './index'
19
+
20
+ describe('detectHtmlContentType', () => {
21
+ it('应该识别开标签', () => {
22
+ expect(detectHtmlContentType('<span>')).toBe('opening')
23
+ expect(detectHtmlContentType('<div class="test">')).toBe('opening')
24
+ expect(detectHtmlContentType('<span style="color: red;">')).toBe('opening')
25
+ })
26
+
27
+ it('应该识别闭标签', () => {
28
+ expect(detectHtmlContentType('</span>')).toBe('closing')
29
+ expect(detectHtmlContentType('</div>')).toBe('closing')
30
+ expect(detectHtmlContentType('</p >')).toBe('closing')
31
+ })
32
+
33
+ it('应该识别自闭合标签', () => {
34
+ expect(detectHtmlContentType('<br />')).toBe('self-closing')
35
+ expect(detectHtmlContentType('<br>')).toBe('self-closing')
36
+ expect(detectHtmlContentType('<img src="test.png">')).toBe('self-closing')
37
+ expect(detectHtmlContentType('<hr />')).toBe('self-closing')
38
+ })
39
+
40
+ it('应该识别 HTML 片段', () => {
41
+ expect(detectHtmlContentType('<div><span>text</span></div>')).toBe('fragment')
42
+ expect(detectHtmlContentType('<p>text</p>')).toBe('fragment')
43
+ expect(detectHtmlContentType('<div>\n <p>nested</p>\n</div>')).toBe('fragment')
44
+ })
45
+
46
+ it('应该处理多行属性的开标签', () => {
47
+ const multiLineTag = `<div style="
48
+ display: flex;
49
+ padding: 20px;
50
+ ">`
51
+ expect(detectHtmlContentType(multiLineTag)).toBe('opening')
52
+ })
53
+
54
+ it('应该返回 unknown 对于无效内容', () => {
55
+ expect(detectHtmlContentType('')).toBe('unknown')
56
+ expect(detectHtmlContentType('plain text')).toBe('unknown')
57
+ expect(detectHtmlContentType('not a tag')).toBe('unknown')
58
+ })
59
+ })
60
+
61
+ describe('parseHtmlTag', () => {
62
+ it('应该解析简单的开始标签', () => {
63
+ const result = parseHtmlTag('<span>')
64
+ expect(result).toEqual({
65
+ tagName: 'span',
66
+ attrs: {},
67
+ isClosing: false,
68
+ isSelfClosing: false,
69
+ rawHtml: '<span>'
70
+ })
71
+ })
72
+
73
+ it('应该解析带属性的标签', () => {
74
+ const result = parseHtmlTag('<div class="container" id="main">')
75
+ expect(result).toEqual({
76
+ tagName: 'div',
77
+ attrs: { class: 'container', id: 'main' },
78
+ isClosing: false,
79
+ isSelfClosing: false,
80
+ rawHtml: '<div class="container" id="main">'
81
+ })
82
+ })
83
+
84
+ it('应该解析结束标签', () => {
85
+ const result = parseHtmlTag('</span>')
86
+ expect(result).toEqual({
87
+ tagName: 'span',
88
+ attrs: {},
89
+ isClosing: true,
90
+ isSelfClosing: false,
91
+ rawHtml: '</span>'
92
+ })
93
+ })
94
+
95
+ it('应该解析自闭合标签', () => {
96
+ const result = parseHtmlTag('<br />')
97
+ expect(result).toBeTruthy()
98
+ expect(result?.tagName).toBe('br')
99
+ expect(result?.isSelfClosing).toBe(true)
100
+ })
101
+
102
+ it('应该解析 img 标签(隐式自闭合)', () => {
103
+ const result = parseHtmlTag('<img src="test.png" alt="test">')
104
+ expect(result).toBeTruthy()
105
+ expect(result?.tagName).toBe('img')
106
+ expect(result?.isSelfClosing).toBe(true)
107
+ expect(result?.attrs).toEqual({ src: 'test.png', alt: 'test' })
108
+ })
109
+
110
+ it('应该处理单引号属性值', () => {
111
+ const result = parseHtmlTag("<div class='test'>")
112
+ expect(result?.attrs).toEqual({ class: 'test' })
113
+ })
114
+
115
+ it('应该处理无引号属性值', () => {
116
+ const result = parseHtmlTag('<div data-id=123>')
117
+ expect(result?.attrs).toEqual({ 'data-id': '123' })
118
+ })
119
+
120
+ it('应该处理布尔属性', () => {
121
+ const result = parseHtmlTag('<input disabled readonly>')
122
+ expect(result?.attrs).toEqual({ disabled: '', readonly: '' })
123
+ })
124
+ })
125
+
126
+ describe('parseHtmlFragment', () => {
127
+ it('应该解析简单的 HTML 片段', () => {
128
+ const result = parseHtmlFragment('<span>hello</span>')
129
+ expect(result).toHaveLength(1)
130
+ expect(result[0].tagName).toBe('span')
131
+ expect(result[0].children).toHaveLength(1)
132
+ expect((result[0].children[0] as any).value).toBe('hello')
133
+ })
134
+
135
+ it('应该解析嵌套的 HTML', () => {
136
+ const result = parseHtmlFragment('<div><span>text</span></div>')
137
+ expect(result).toHaveLength(1)
138
+ expect(result[0].tagName).toBe('div')
139
+ expect(result[0].children).toHaveLength(1)
140
+ expect((result[0].children[0] as HtmlElementNode).tagName).toBe('span')
141
+ })
142
+
143
+ it('应该处理自闭合标签', () => {
144
+ const result = parseHtmlFragment('<br /><hr />')
145
+ expect(result).toHaveLength(2)
146
+ expect(result[0].tagName).toBe('br')
147
+ expect(result[1].tagName).toBe('hr')
148
+ })
149
+
150
+ it('应该过滤黑名单标签', () => {
151
+ const result = parseHtmlFragment('<script>alert(1)</script>')
152
+ expect(result).toHaveLength(0)
153
+ })
154
+
155
+ it('应该处理未闭合的标签', () => {
156
+ const result = parseHtmlFragment('<div><span>text</div>')
157
+ expect(result).toHaveLength(1)
158
+ expect(result[0].tagName).toBe('div')
159
+ // span 未闭合,应该被包含在 div 中
160
+ expect(result[0].children.some(c =>
161
+ isHtmlElementNode(c) && c.tagName === 'span'
162
+ )).toBe(true)
163
+ })
164
+
165
+ it('应该使用自定义黑名单', () => {
166
+ const result = parseHtmlFragment('<custom>text</custom>', {
167
+ tagBlacklist: ['custom']
168
+ })
169
+ expect(result).toHaveLength(0)
170
+ })
171
+ })
172
+
173
+ describe('XSS 防护', () => {
174
+ it('应该过滤 script 标签', () => {
175
+ const result = parseHtmlFragment('<script>alert("xss")</script>')
176
+ expect(result).toHaveLength(0)
177
+ })
178
+
179
+ it('应该过滤 iframe 标签', () => {
180
+ const result = parseHtmlFragment('<iframe src="evil.com"></iframe>')
181
+ expect(result).toHaveLength(0)
182
+ })
183
+
184
+ it('应该过滤 onclick 等事件属性', () => {
185
+ const result = parseHtmlFragment('<div onclick="alert(1)">text</div>')
186
+ expect(result).toHaveLength(1)
187
+ expect(result[0].attrs.onclick).toBeUndefined()
188
+ })
189
+
190
+ it('应该过滤 javascript: 协议', () => {
191
+ const result = parseHtmlFragment('<a href="javascript:alert(1)">link</a>')
192
+ expect(result).toHaveLength(1)
193
+ expect(result[0].attrs.href).toBeUndefined()
194
+ })
195
+
196
+ it('应该允许 data:image/', () => {
197
+ const result = parseHtmlFragment('<img src="data:image/png;base64,abc">')
198
+ expect(result).toHaveLength(1)
199
+ expect(result[0].attrs.src).toBe('data:image/png;base64,abc')
200
+ })
201
+
202
+ it('应该过滤 on* 事件属性', () => {
203
+ const events = ['onload', 'onerror', 'onmouseover', 'onfocus', 'onblur']
204
+ for (const event of events) {
205
+ const result = parseHtmlFragment(`<img ${event}="alert(1)">`)
206
+ expect(result[0].attrs[event]).toBeUndefined()
207
+ }
208
+ })
209
+ })
210
+
211
+ describe('transformHtmlNodes', () => {
212
+ it('应该转换 markdown 中的简单 HTML', () => {
213
+ const markdown = 'Hello <span>world</span>!'
214
+ const ast = fromMarkdown(markdown)
215
+ const transformed = transformHtmlNodes(ast)
216
+
217
+ // 检查是否有 htmlElement 节点
218
+ let foundHtmlElement = false
219
+ walkHtmlElements(transformed, (node) => {
220
+ if (node.tagName === 'span') {
221
+ foundHtmlElement = true
222
+ }
223
+ })
224
+ expect(foundHtmlElement).toBe(true)
225
+ })
226
+
227
+ it('应该处理块级 HTML', () => {
228
+ const markdown = `
229
+ <div class="container">
230
+ 内容
231
+ </div>
232
+ `
233
+ const ast = fromMarkdown(markdown)
234
+ const transformed = transformHtmlNodes(ast)
235
+
236
+ const divs = findHtmlElementsByTag(transformed, 'div')
237
+ expect(divs.length).toBeGreaterThan(0)
238
+ expect(divs[0].attrs.class).toBe('container')
239
+ })
240
+ })
241
+
242
+ describe('createHtmlTreeTransformer', () => {
243
+ it('应该创建可复用的转换器', () => {
244
+ const transformer = createHtmlTreeTransformer({
245
+ preserveRawHtml: false
246
+ })
247
+
248
+ const ast = fromMarkdown('<span>test</span>')
249
+ const result = transformer(ast)
250
+
251
+ expect(result.type).toBe('root')
252
+ })
253
+
254
+ it('应该支持自定义黑名单', () => {
255
+ const transformer = createHtmlTreeTransformer({
256
+ tagBlacklist: [...HTML_TAG_BLACKLIST, 'custom']
257
+ })
258
+
259
+ const ast = fromMarkdown('<custom>content</custom>')
260
+ const result = transformer(ast)
261
+
262
+ const customs = findHtmlElementsByTag(result, 'custom')
263
+ expect(customs).toHaveLength(0)
264
+ })
265
+ })
266
+
267
+ describe('isHtmlElementNode', () => {
268
+ it('应该正确识别 HtmlElementNode', () => {
269
+ const node: HtmlElementNode = {
270
+ type: 'htmlElement',
271
+ tagName: 'span',
272
+ attrs: {},
273
+ children: []
274
+ }
275
+ expect(isHtmlElementNode(node)).toBe(true)
276
+ })
277
+
278
+ it('应该拒绝其他节点类型', () => {
279
+ const node = { type: 'paragraph', children: [] }
280
+ expect(isHtmlElementNode(node as any)).toBe(false)
281
+ })
282
+ })
283
+
284
+ describe('walkHtmlElements', () => {
285
+ it('应该遍历所有 HTML 元素', () => {
286
+ const result = parseHtmlFragment('<div><span>a</span><span>b</span></div>')
287
+ const root: Root = { type: 'root', children: result as RootContent[] }
288
+
289
+ const visited: string[] = []
290
+ walkHtmlElements(root, (node) => {
291
+ visited.push(node.tagName)
292
+ })
293
+
294
+ expect(visited).toContain('div')
295
+ expect(visited).toContain('span')
296
+ expect(visited.filter(t => t === 'span')).toHaveLength(2)
297
+ })
298
+ })
299
+
300
+ describe('findHtmlElementsByTag', () => {
301
+ it('应该查找特定标签', () => {
302
+ const result = parseHtmlFragment('<div><span>a</span><p>b</p><span>c</span></div>')
303
+ const root: Root = { type: 'root', children: result as RootContent[] }
304
+
305
+ const spans = findHtmlElementsByTag(root, 'span')
306
+ expect(spans).toHaveLength(2)
307
+ })
308
+
309
+ it('应该不区分大小写', () => {
310
+ const result = parseHtmlFragment('<DIV>test</DIV>')
311
+ const root: Root = { type: 'root', children: result as RootContent[] }
312
+
313
+ const divs = findHtmlElementsByTag(root, 'div')
314
+ expect(divs).toHaveLength(1)
315
+ })
316
+ })
317
+
318
+ describe('htmlElementToString', () => {
319
+ it('应该将节点转回 HTML 字符串', () => {
320
+ const node: HtmlElementNode = {
321
+ type: 'htmlElement',
322
+ tagName: 'span',
323
+ attrs: { class: 'test' },
324
+ children: [{ type: 'text', value: 'hello' } as RootContent]
325
+ }
326
+
327
+ const html = htmlElementToString(node)
328
+ expect(html).toBe('<span class="test">hello</span>')
329
+ })
330
+
331
+ it('应该处理嵌套节点', () => {
332
+ const node: HtmlElementNode = {
333
+ type: 'htmlElement',
334
+ tagName: 'div',
335
+ attrs: {},
336
+ children: [{
337
+ type: 'htmlElement',
338
+ tagName: 'span',
339
+ attrs: {},
340
+ children: [{ type: 'text', value: 'nested' } as RootContent]
341
+ } as RootContent]
342
+ }
343
+
344
+ const html = htmlElementToString(node)
345
+ expect(html).toBe('<div><span>nested</span></div>')
346
+ })
347
+
348
+ it('应该处理自闭合标签', () => {
349
+ const node: HtmlElementNode = {
350
+ type: 'htmlElement',
351
+ tagName: 'br',
352
+ attrs: {},
353
+ children: []
354
+ }
355
+
356
+ const html = htmlElementToString(node)
357
+ expect(html).toBe('<br />')
358
+ })
359
+
360
+ it('应该转义属性值', () => {
361
+ const node: HtmlElementNode = {
362
+ type: 'htmlElement',
363
+ tagName: 'div',
364
+ attrs: { 'data-value': '<"test">' },
365
+ children: []
366
+ }
367
+
368
+ const html = htmlElementToString(node)
369
+ expect(html).toContain('&lt;&quot;test&quot;&gt;')
370
+ })
371
+ })
372
+
373
+ describe('复杂场景', () => {
374
+ it('应该处理 markdown 与 HTML 混合内容', () => {
375
+ // 注意:在 markdown 中,HTML 块内的内容不会被解析为 markdown
376
+ // 所以 **加粗** 会保持原样
377
+ const markdown = `# 标题
378
+
379
+ <div class="info">
380
+ 这是一段文字
381
+ </div>
382
+
383
+ 普通段落`
384
+ const ast = fromMarkdown(markdown)
385
+ const transformed = transformHtmlNodes(ast)
386
+
387
+ // 应该保留标题
388
+ expect(transformed.children.some(c => c.type === 'heading')).toBe(true)
389
+ // HTML 块会被转换为 htmlElement
390
+ expect(transformed.children.some(c => c.type === 'htmlElement' || c.type === 'html')).toBe(true)
391
+ })
392
+
393
+ it('应该处理表格中的 HTML', () => {
394
+ const markdown = `| 列1 | 列2 |
395
+ |-----|-----|
396
+ | <span>内容</span> | 普通文本 |`
397
+
398
+ // 需要启用 GFM 扩展才能解析表格
399
+ const ast = fromMarkdown(markdown, {
400
+ extensions: [gfm()],
401
+ mdastExtensions: [gfmFromMarkdown()]
402
+ })
403
+ const transformed = transformHtmlNodes(ast)
404
+
405
+ // 应该保留表格结构
406
+ expect(transformed.children.some(c => c.type === 'table')).toBe(true)
407
+ })
408
+ })
409
+