safe-mdx 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -0
- package/dist/html/html-and-md.test.js +14 -41
- package/dist/html/html-and-md.test.js.map +1 -1
- package/dist/html/html-to-mdx-ast.d.ts +26 -1
- package/dist/html/html-to-mdx-ast.d.ts.map +1 -1
- package/dist/html/html-to-mdx-ast.js +40 -0
- package/dist/html/html-to-mdx-ast.js.map +1 -1
- package/dist/incremental-parse.d.ts +41 -0
- package/dist/incremental-parse.d.ts.map +1 -0
- package/dist/incremental-parse.js +139 -0
- package/dist/incremental-parse.js.map +1 -0
- package/dist/incremental-parse.test.d.ts +2 -0
- package/dist/incremental-parse.test.d.ts.map +1 -0
- package/dist/incremental-parse.test.js +299 -0
- package/dist/incremental-parse.test.js.map +1 -0
- package/dist/markdown-html.test.d.ts +2 -0
- package/dist/markdown-html.test.d.ts.map +1 -0
- package/dist/markdown-html.test.js +129 -0
- package/dist/markdown-html.test.js.map +1 -0
- package/dist/markdown.d.ts +3 -0
- package/dist/markdown.d.ts.map +1 -0
- package/dist/markdown.js +4 -0
- package/dist/markdown.js.map +1 -0
- package/dist/parse.d.ts +9 -2
- package/dist/parse.d.ts.map +1 -1
- package/dist/parse.js +24 -12
- package/dist/parse.js.map +1 -1
- package/dist/safe-mdx.d.ts.map +1 -1
- package/dist/safe-mdx.js +6 -24
- package/dist/safe-mdx.js.map +1 -1
- package/package.json +9 -1
- package/src/html/html-and-md.test.ts +15 -47
- package/src/html/html-to-mdx-ast.ts +53 -1
- package/src/incremental-parse.test.ts +315 -0
- package/src/incremental-parse.ts +219 -0
- package/src/markdown-html.test.tsx +144 -0
- package/src/markdown.ts +4 -0
- package/src/parse.ts +36 -13
- package/src/safe-mdx.test.tsx +2 -0
- package/src/safe-mdx.tsx +6 -26
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
// Verifies segment-cached parsing for streaming MDX without tying the API to React rendering.
|
|
2
|
+
import { describe, expect, test } from 'vitest'
|
|
3
|
+
|
|
4
|
+
import { mdxParse } from './parse.ts'
|
|
5
|
+
import { createMdxProcessor, parseMarkdownIncremental, type SegmentCache } from './incremental-parse.ts'
|
|
6
|
+
|
|
7
|
+
function nodeSummary(ast: any) {
|
|
8
|
+
return ast.children.map((node: any) => ({
|
|
9
|
+
type: node.type,
|
|
10
|
+
value: node.value,
|
|
11
|
+
name: node.name,
|
|
12
|
+
start: node.position?.start,
|
|
13
|
+
end: node.position?.end,
|
|
14
|
+
}))
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
describe('parseMarkdownIncremental', () => {
|
|
18
|
+
test('parses MDX into an mdast root and reports no errors', () => {
|
|
19
|
+
const cache: SegmentCache = new Map()
|
|
20
|
+
const result = parseMarkdownIncremental({
|
|
21
|
+
markdown: '# Hello\n\n<Alert>streaming</Alert>\n\nTail',
|
|
22
|
+
cache,
|
|
23
|
+
trailingNodes: 1,
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
expect(result.errors).toMatchInlineSnapshot(`[]`)
|
|
27
|
+
expect(nodeSummary(result.mdast)).toMatchInlineSnapshot(`
|
|
28
|
+
[
|
|
29
|
+
{
|
|
30
|
+
"end": {
|
|
31
|
+
"column": 8,
|
|
32
|
+
"line": 1,
|
|
33
|
+
"offset": 7,
|
|
34
|
+
},
|
|
35
|
+
"name": undefined,
|
|
36
|
+
"start": {
|
|
37
|
+
"column": 1,
|
|
38
|
+
"line": 1,
|
|
39
|
+
"offset": 0,
|
|
40
|
+
},
|
|
41
|
+
"type": "heading",
|
|
42
|
+
"value": undefined,
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"end": {
|
|
46
|
+
"column": 25,
|
|
47
|
+
"line": 3,
|
|
48
|
+
"offset": 33,
|
|
49
|
+
},
|
|
50
|
+
"name": "Alert",
|
|
51
|
+
"start": {
|
|
52
|
+
"column": 1,
|
|
53
|
+
"line": 3,
|
|
54
|
+
"offset": 9,
|
|
55
|
+
},
|
|
56
|
+
"type": "mdxJsxFlowElement",
|
|
57
|
+
"value": undefined,
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"end": {
|
|
61
|
+
"column": 5,
|
|
62
|
+
"line": 5,
|
|
63
|
+
"offset": 39,
|
|
64
|
+
},
|
|
65
|
+
"name": undefined,
|
|
66
|
+
"start": {
|
|
67
|
+
"column": 1,
|
|
68
|
+
"line": 5,
|
|
69
|
+
"offset": 35,
|
|
70
|
+
},
|
|
71
|
+
"type": "paragraph",
|
|
72
|
+
"value": undefined,
|
|
73
|
+
},
|
|
74
|
+
]
|
|
75
|
+
`)
|
|
76
|
+
expect(Array.from(cache.keys())).toMatchInlineSnapshot(`
|
|
77
|
+
[
|
|
78
|
+
0,
|
|
79
|
+
7,
|
|
80
|
+
]
|
|
81
|
+
`)
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
test('reuses cached stable segments', () => {
|
|
85
|
+
const cache: SegmentCache = new Map()
|
|
86
|
+
let parseCalls = 0
|
|
87
|
+
const parse = (markdown: string) => {
|
|
88
|
+
parseCalls++
|
|
89
|
+
return mdxParse(markdown)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
parseMarkdownIncremental({
|
|
93
|
+
markdown: '# Title\n\nFirst paragraph\n\nSecond paragraph',
|
|
94
|
+
cache,
|
|
95
|
+
trailingNodes: 0,
|
|
96
|
+
parse,
|
|
97
|
+
})
|
|
98
|
+
parseMarkdownIncremental({
|
|
99
|
+
markdown: '# Title\n\nFirst paragraph\n\nSecond paragraph',
|
|
100
|
+
cache,
|
|
101
|
+
trailingNodes: 0,
|
|
102
|
+
parse,
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
expect(parseCalls).toMatchInlineSnapshot(`1`)
|
|
106
|
+
expect(Array.from(cache.entries()).map(([start, entry]) => ({ start, end: entry.end }))).toMatchInlineSnapshot(`
|
|
107
|
+
[
|
|
108
|
+
{
|
|
109
|
+
"end": 7,
|
|
110
|
+
"start": 0,
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"end": 24,
|
|
114
|
+
"start": 7,
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"end": 42,
|
|
118
|
+
"start": 24,
|
|
119
|
+
},
|
|
120
|
+
]
|
|
121
|
+
`)
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
test('keeps positions correct after cached prefixes', () => {
|
|
125
|
+
const cache: SegmentCache = new Map()
|
|
126
|
+
|
|
127
|
+
parseMarkdownIncremental({
|
|
128
|
+
markdown: '# Title\n\nFirst paragraph\n\nLive',
|
|
129
|
+
cache,
|
|
130
|
+
trailingNodes: 1,
|
|
131
|
+
})
|
|
132
|
+
const result = parseMarkdownIncremental({
|
|
133
|
+
markdown: '# Title\n\nFirst paragraph\n\nLive tail\n\nNew paragraph',
|
|
134
|
+
cache,
|
|
135
|
+
trailingNodes: 1,
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
expect(result.errors).toMatchInlineSnapshot(`[]`)
|
|
139
|
+
expect(nodeSummary(result.mdast)).toMatchInlineSnapshot(`
|
|
140
|
+
[
|
|
141
|
+
{
|
|
142
|
+
"end": {
|
|
143
|
+
"column": 8,
|
|
144
|
+
"line": 1,
|
|
145
|
+
"offset": 7,
|
|
146
|
+
},
|
|
147
|
+
"name": undefined,
|
|
148
|
+
"start": {
|
|
149
|
+
"column": 1,
|
|
150
|
+
"line": 1,
|
|
151
|
+
"offset": 0,
|
|
152
|
+
},
|
|
153
|
+
"type": "heading",
|
|
154
|
+
"value": undefined,
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"end": {
|
|
158
|
+
"column": 16,
|
|
159
|
+
"line": 3,
|
|
160
|
+
"offset": 24,
|
|
161
|
+
},
|
|
162
|
+
"name": undefined,
|
|
163
|
+
"start": {
|
|
164
|
+
"column": 1,
|
|
165
|
+
"line": 3,
|
|
166
|
+
"offset": 9,
|
|
167
|
+
},
|
|
168
|
+
"type": "paragraph",
|
|
169
|
+
"value": undefined,
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
"end": {
|
|
173
|
+
"column": 10,
|
|
174
|
+
"line": 5,
|
|
175
|
+
"offset": 35,
|
|
176
|
+
},
|
|
177
|
+
"name": undefined,
|
|
178
|
+
"start": {
|
|
179
|
+
"column": 1,
|
|
180
|
+
"line": 5,
|
|
181
|
+
"offset": 26,
|
|
182
|
+
},
|
|
183
|
+
"type": "paragraph",
|
|
184
|
+
"value": undefined,
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"end": {
|
|
188
|
+
"column": 14,
|
|
189
|
+
"line": 7,
|
|
190
|
+
"offset": 50,
|
|
191
|
+
},
|
|
192
|
+
"name": undefined,
|
|
193
|
+
"start": {
|
|
194
|
+
"column": 1,
|
|
195
|
+
"line": 7,
|
|
196
|
+
"offset": 37,
|
|
197
|
+
},
|
|
198
|
+
"type": "paragraph",
|
|
199
|
+
"value": undefined,
|
|
200
|
+
},
|
|
201
|
+
]
|
|
202
|
+
`)
|
|
203
|
+
})
|
|
204
|
+
|
|
205
|
+
test('returns partial cached AST instead of throwing on invalid live tail', () => {
|
|
206
|
+
const cache: SegmentCache = new Map()
|
|
207
|
+
|
|
208
|
+
parseMarkdownIncremental({
|
|
209
|
+
markdown: '# Stable\n\nDone\n\nTail',
|
|
210
|
+
cache,
|
|
211
|
+
trailingNodes: 1,
|
|
212
|
+
})
|
|
213
|
+
const result = parseMarkdownIncremental({
|
|
214
|
+
markdown: '# Stable\n\nDone\n\n<Card',
|
|
215
|
+
cache,
|
|
216
|
+
trailingNodes: 1,
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
expect(result.errors.map((error) => ({
|
|
220
|
+
message: error.message,
|
|
221
|
+
line: error.line,
|
|
222
|
+
column: error.column,
|
|
223
|
+
offset: error.offset,
|
|
224
|
+
}))).toMatchInlineSnapshot(`
|
|
225
|
+
[
|
|
226
|
+
{
|
|
227
|
+
"column": 6,
|
|
228
|
+
"line": 5,
|
|
229
|
+
"message": "Unexpected end of file in name, expected a name character such as letters, digits, \`$\`, or \`_\`; whitespace before attributes; or the end of the tag",
|
|
230
|
+
"offset": 21,
|
|
231
|
+
},
|
|
232
|
+
]
|
|
233
|
+
`)
|
|
234
|
+
expect(nodeSummary(result.mdast)).toMatchInlineSnapshot(`
|
|
235
|
+
[
|
|
236
|
+
{
|
|
237
|
+
"end": {
|
|
238
|
+
"column": 9,
|
|
239
|
+
"line": 1,
|
|
240
|
+
"offset": 8,
|
|
241
|
+
},
|
|
242
|
+
"name": undefined,
|
|
243
|
+
"start": {
|
|
244
|
+
"column": 1,
|
|
245
|
+
"line": 1,
|
|
246
|
+
"offset": 0,
|
|
247
|
+
},
|
|
248
|
+
"type": "heading",
|
|
249
|
+
"value": undefined,
|
|
250
|
+
},
|
|
251
|
+
{
|
|
252
|
+
"end": {
|
|
253
|
+
"column": 5,
|
|
254
|
+
"line": 3,
|
|
255
|
+
"offset": 14,
|
|
256
|
+
},
|
|
257
|
+
"name": undefined,
|
|
258
|
+
"start": {
|
|
259
|
+
"column": 1,
|
|
260
|
+
"line": 3,
|
|
261
|
+
"offset": 10,
|
|
262
|
+
},
|
|
263
|
+
"type": "paragraph",
|
|
264
|
+
"value": undefined,
|
|
265
|
+
},
|
|
266
|
+
]
|
|
267
|
+
`)
|
|
268
|
+
})
|
|
269
|
+
|
|
270
|
+
test('uses custom processor plugins', () => {
|
|
271
|
+
const cache: SegmentCache = new Map()
|
|
272
|
+
const processor = createMdxProcessor({
|
|
273
|
+
remarkPlugins: [remarkUppercaseText],
|
|
274
|
+
})
|
|
275
|
+
|
|
276
|
+
const result = parseMarkdownIncremental({
|
|
277
|
+
markdown: 'hello **world**',
|
|
278
|
+
cache,
|
|
279
|
+
processor,
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
expect(result.errors).toMatchInlineSnapshot(`[]`)
|
|
283
|
+
expect(textValues(result.mdast)).toMatchInlineSnapshot(`
|
|
284
|
+
[
|
|
285
|
+
"HELLO ",
|
|
286
|
+
"WORLD",
|
|
287
|
+
]
|
|
288
|
+
`)
|
|
289
|
+
})
|
|
290
|
+
})
|
|
291
|
+
|
|
292
|
+
function remarkUppercaseText() {
|
|
293
|
+
return (tree: any) => {
|
|
294
|
+
walk(tree, (node) => {
|
|
295
|
+
if (node.type === 'text') {
|
|
296
|
+
node.value = node.value.toUpperCase()
|
|
297
|
+
}
|
|
298
|
+
})
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function textValues(node: any): string[] {
|
|
303
|
+
const values: string[] = []
|
|
304
|
+
walk(node, (child) => {
|
|
305
|
+
if (child.type === 'text') values.push(child.value)
|
|
306
|
+
})
|
|
307
|
+
return values
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function walk(node: any, visit: (node: any) => void) {
|
|
311
|
+
visit(node)
|
|
312
|
+
if (Array.isArray(node.children)) {
|
|
313
|
+
for (const child of node.children) walk(child, visit)
|
|
314
|
+
}
|
|
315
|
+
}
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
// Segment-cached markdown parsing for streaming MDX input. It reuses stable mdast nodes and reparses only the live tail.
|
|
2
|
+
import type { Root, RootContent } from 'mdast'
|
|
3
|
+
|
|
4
|
+
import { createMdxProcessor, mdxParse, type MdxProcessor } from './parse.ts'
|
|
5
|
+
|
|
6
|
+
export { createMdxProcessor }
|
|
7
|
+
|
|
8
|
+
export interface SegmentEntry {
|
|
9
|
+
len: number
|
|
10
|
+
hash: number
|
|
11
|
+
end: number
|
|
12
|
+
nodes: RootContent[]
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export type SegmentCache = Map<number, SegmentEntry>
|
|
16
|
+
|
|
17
|
+
export interface IncrementalParseError {
|
|
18
|
+
message: string
|
|
19
|
+
line?: number
|
|
20
|
+
column?: number
|
|
21
|
+
offset?: number
|
|
22
|
+
cause: ParseErrorCause
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
type ParseErrorCause = string | {
|
|
26
|
+
message?: string
|
|
27
|
+
place?: {
|
|
28
|
+
line?: number
|
|
29
|
+
column?: number
|
|
30
|
+
offset?: number
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
type MutableAstNode = {
|
|
35
|
+
position?: {
|
|
36
|
+
start?: Point
|
|
37
|
+
end?: Point
|
|
38
|
+
}
|
|
39
|
+
children?: MutableAstNode[]
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
type Point = {
|
|
43
|
+
line?: number
|
|
44
|
+
column?: number
|
|
45
|
+
offset?: number
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface IncrementalParseOptions {
|
|
49
|
+
markdown: string
|
|
50
|
+
cache: SegmentCache
|
|
51
|
+
/** Number of unstable top-level nodes to keep out of the cache. Defaults to 2. */
|
|
52
|
+
trailingNodes?: number
|
|
53
|
+
/** Custom unified processor. Use `createMdxProcessor({ remarkPlugins })` for plugin customization. */
|
|
54
|
+
processor?: MdxProcessor
|
|
55
|
+
/** Fully custom parse hook. Takes precedence over `processor`. */
|
|
56
|
+
parse?: (markdown: string) => Root
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface IncrementalParseResult {
|
|
60
|
+
mdast: Root
|
|
61
|
+
errors: IncrementalParseError[]
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function parseMarkdownIncremental({
|
|
65
|
+
markdown,
|
|
66
|
+
cache,
|
|
67
|
+
trailingNodes = 2,
|
|
68
|
+
processor,
|
|
69
|
+
parse,
|
|
70
|
+
}: IncrementalParseOptions): IncrementalParseResult {
|
|
71
|
+
const children: RootContent[] = []
|
|
72
|
+
const errors: IncrementalParseError[] = []
|
|
73
|
+
const parseSlice = parse ?? ((code: string) => parseWithProcessor(code, processor))
|
|
74
|
+
|
|
75
|
+
for (let offset = 0; offset < markdown.length;) {
|
|
76
|
+
const entry = cache.get(offset)
|
|
77
|
+
if (entry) {
|
|
78
|
+
const slice = markdown.slice(offset, entry.end)
|
|
79
|
+
if (slice.length === entry.len && quickHash(slice) === entry.hash) {
|
|
80
|
+
children.push(...entry.nodes)
|
|
81
|
+
offset = entry.end
|
|
82
|
+
continue
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const rest = markdown.slice(offset)
|
|
87
|
+
if (!rest) break
|
|
88
|
+
const basePoint = pointAtOffset(markdown, offset)
|
|
89
|
+
|
|
90
|
+
try {
|
|
91
|
+
const ast = parseSlice(rest)
|
|
92
|
+
children.push(...ast.children.map((node) => adjustNodePositions(node, basePoint)))
|
|
93
|
+
} catch (cause) {
|
|
94
|
+
errors.push(parseErrorFromCause(cause, basePoint))
|
|
95
|
+
}
|
|
96
|
+
break
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const stableChildren = trailingNodes > 0 ? children.slice(0, -trailingNodes) : children
|
|
100
|
+
let segmentStart = 0
|
|
101
|
+
for (const node of stableChildren) {
|
|
102
|
+
const end = node.position?.end.offset
|
|
103
|
+
if (typeof end !== 'number' || end < segmentStart) continue
|
|
104
|
+
|
|
105
|
+
const slice = markdown.slice(segmentStart, end)
|
|
106
|
+
cache.set(segmentStart, {
|
|
107
|
+
len: slice.length,
|
|
108
|
+
hash: quickHash(slice),
|
|
109
|
+
end,
|
|
110
|
+
nodes: [node],
|
|
111
|
+
})
|
|
112
|
+
segmentStart = end
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
trimCache(cache)
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
mdast: {
|
|
119
|
+
type: 'root',
|
|
120
|
+
children,
|
|
121
|
+
position: {
|
|
122
|
+
start: { line: 1, column: 1, offset: 0 },
|
|
123
|
+
end: pointAtOffset(markdown, markdown.length),
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
errors,
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function parseWithProcessor(markdown: string, processor?: MdxProcessor): Root {
|
|
131
|
+
if (!processor) return mdxParse(markdown)
|
|
132
|
+
|
|
133
|
+
const file = processor.processSync(markdown)
|
|
134
|
+
const ast = file.data.ast
|
|
135
|
+
const isRoot = (value: any): value is Root => {
|
|
136
|
+
return value?.type === 'root' && Array.isArray(value?.children)
|
|
137
|
+
}
|
|
138
|
+
if (!isRoot(ast)) {
|
|
139
|
+
throw new Error('Processor did not expose mdast at file.data.ast')
|
|
140
|
+
}
|
|
141
|
+
return ast
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function quickHash(s: string): number {
|
|
145
|
+
let h = 0x811c9dc5
|
|
146
|
+
for (let i = 0; i < s.length; i++) {
|
|
147
|
+
h ^= s.charCodeAt(i)
|
|
148
|
+
h = (h * 0x01000193) >>> 0
|
|
149
|
+
}
|
|
150
|
+
return h
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function pointAtOffset(text: string, offset: number) {
|
|
154
|
+
let line = 1
|
|
155
|
+
let column = 1
|
|
156
|
+
|
|
157
|
+
for (let i = 0; i < offset; i++) {
|
|
158
|
+
if (text[i] === '\n') {
|
|
159
|
+
line++
|
|
160
|
+
column = 1
|
|
161
|
+
} else {
|
|
162
|
+
column++
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return { line, column, offset }
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function adjustNodePositions<T extends MutableAstNode>(node: T, basePoint: { line: number, column: number, offset: number }): T {
|
|
170
|
+
const position = node.position
|
|
171
|
+
if (position) {
|
|
172
|
+
adjustPoint(position.start, basePoint)
|
|
173
|
+
adjustPoint(position.end, basePoint)
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (Array.isArray(node.children)) {
|
|
177
|
+
node.children = node.children.map((child) => adjustNodePositions(child, basePoint))
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return node
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function adjustPoint(point: Point | undefined, basePoint: { line: number, column: number, offset: number }) {
|
|
184
|
+
if (!point) return
|
|
185
|
+
|
|
186
|
+
if (typeof point.offset === 'number') {
|
|
187
|
+
point.offset += basePoint.offset
|
|
188
|
+
}
|
|
189
|
+
if (typeof point.line === 'number') {
|
|
190
|
+
point.line += basePoint.line - 1
|
|
191
|
+
}
|
|
192
|
+
if (typeof point.column === 'number' && point.line === basePoint.line) {
|
|
193
|
+
point.column += basePoint.column - 1
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function parseErrorFromCause(cause: ParseErrorCause, basePoint: { line: number, column: number, offset: number }): IncrementalParseError {
|
|
198
|
+
const place = typeof cause === 'string' ? undefined : cause.place
|
|
199
|
+
const point = place ? { ...place } : undefined
|
|
200
|
+
if (point) adjustPoint(point, basePoint)
|
|
201
|
+
|
|
202
|
+
return {
|
|
203
|
+
message: typeof cause === 'string' ? cause : cause.message ?? String(cause),
|
|
204
|
+
line: point?.line,
|
|
205
|
+
column: point?.column,
|
|
206
|
+
offset: point?.offset,
|
|
207
|
+
cause,
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function trimCache(cache: SegmentCache) {
|
|
212
|
+
const maxCacheSize = 300
|
|
213
|
+
if (cache.size <= maxCacheSize) return
|
|
214
|
+
|
|
215
|
+
const keys = Array.from(cache.keys()).sort((a, b) => a - b)
|
|
216
|
+
for (const key of keys.slice(0, cache.size - maxCacheSize)) {
|
|
217
|
+
cache.delete(key)
|
|
218
|
+
}
|
|
219
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
// Tests for rendering markdown (not MDX) that contains raw HTML blocks.
|
|
2
|
+
// The remarkHtmlToMdx plugin is used as a pre-processing step to convert
|
|
3
|
+
// `html` AST nodes (produced by plain remark) into mdxJsx nodes before
|
|
4
|
+
// MdastToJsx ever sees them. This way linkedom stays out of the main bundle.
|
|
5
|
+
import dedent from 'dedent'
|
|
6
|
+
import React from 'react'
|
|
7
|
+
import { renderToStaticMarkup } from 'react-dom/server'
|
|
8
|
+
import { remark } from 'remark'
|
|
9
|
+
import remarkGfm from 'remark-gfm'
|
|
10
|
+
import { expect, test, describe } from 'vitest'
|
|
11
|
+
import type { Root } from 'mdast'
|
|
12
|
+
import { MdastToJsx } from './safe-mdx.tsx'
|
|
13
|
+
import { remarkHtmlToMdx } from './markdown.ts'
|
|
14
|
+
import { validHtmlElements } from './html/valid-html-elements.ts'
|
|
15
|
+
|
|
16
|
+
const components = {
|
|
17
|
+
Heading({ children, ...props }) {
|
|
18
|
+
return React.createElement('h1', props, children)
|
|
19
|
+
},
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// convertTagName that only keeps standard HTML elements, drops everything else.
|
|
23
|
+
// This mirrors what the old case 'html' did inside MdastToJsx.
|
|
24
|
+
const onlyValidHtml = ({ tagName }: { tagName: string }) =>
|
|
25
|
+
validHtmlElements.has(tagName.toLowerCase()) ? tagName.toLowerCase() : ''
|
|
26
|
+
|
|
27
|
+
// Parse with plain remark + remarkHtmlToMdx pre-processor, then render via MdastToJsx.
|
|
28
|
+
// No html nodes remain by the time MdastToJsx sees the AST.
|
|
29
|
+
function render(markdown: string) {
|
|
30
|
+
const processor = remark()
|
|
31
|
+
.use(remarkGfm)
|
|
32
|
+
.use(remarkHtmlToMdx, { convertTagName: onlyValidHtml })
|
|
33
|
+
const mdast = processor.parse(markdown) as Root
|
|
34
|
+
processor.runSync(mdast)
|
|
35
|
+
const visitor = new MdastToJsx({ markdown, mdast, components })
|
|
36
|
+
const result = visitor.run()
|
|
37
|
+
const html = renderToStaticMarkup(result)
|
|
38
|
+
return { errors: visitor.errors || [], html }
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
describe('remarkHtmlToMdx pre-processor with plain remark', () => {
|
|
42
|
+
test('renders a block-level HTML div', () => {
|
|
43
|
+
const { html, errors } = render(dedent`
|
|
44
|
+
# Title
|
|
45
|
+
|
|
46
|
+
<div class="box">hello world</div>
|
|
47
|
+
|
|
48
|
+
Some paragraph after.
|
|
49
|
+
`)
|
|
50
|
+
expect(errors).toMatchInlineSnapshot(`[]`)
|
|
51
|
+
expect(html).toMatchInlineSnapshot(`"<h1>Title</h1><div class="box">hello world</div><p>Some paragraph after.</p>"`)
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
test('inline HTML in a paragraph is split by remark into per-tag html nodes (known limitation)', () => {
|
|
55
|
+
// remark parses inline HTML at tag boundaries, so <strong>bold</strong> becomes:
|
|
56
|
+
// html("<strong>") → empty element, text("bold"), html("</strong>") → dropped
|
|
57
|
+
// Block-level HTML (own line + blank lines) is the only reliably handled case.
|
|
58
|
+
const { html, errors } = render(
|
|
59
|
+
'Some text with <strong>bold</strong> and <em>italic</em> inline.'
|
|
60
|
+
)
|
|
61
|
+
expect(errors).toMatchInlineSnapshot(`[]`)
|
|
62
|
+
expect(html).toMatchInlineSnapshot(`"<p>Some text with <strong></strong>bold and <em></em>italic inline.</p>"`)
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
test('strips unknown custom elements but keeps their text children', () => {
|
|
66
|
+
// <callout> is not in validHtmlElements, so the tag wrapper is dropped
|
|
67
|
+
const { html, errors } = render(dedent`
|
|
68
|
+
<callout icon="💡">
|
|
69
|
+
Important note
|
|
70
|
+
</callout>
|
|
71
|
+
`)
|
|
72
|
+
expect(errors).toMatchInlineSnapshot(`[]`)
|
|
73
|
+
expect(html).toMatchInlineSnapshot(`"Important note"`)
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
test('renders self-closing void element hr', () => {
|
|
77
|
+
const { html, errors } = render(dedent`
|
|
78
|
+
Before
|
|
79
|
+
|
|
80
|
+
<hr>
|
|
81
|
+
|
|
82
|
+
After
|
|
83
|
+
`)
|
|
84
|
+
expect(errors).toMatchInlineSnapshot(`[]`)
|
|
85
|
+
expect(html).toMatchInlineSnapshot(`"<p>Before</p><hr/><p>After</p>"`)
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
test('renders HTML anchor with name attribute', () => {
|
|
89
|
+
const { html, errors } = render(dedent`
|
|
90
|
+
<a name="section-one"></a>
|
|
91
|
+
|
|
92
|
+
## Section One
|
|
93
|
+
`)
|
|
94
|
+
expect(errors).toMatchInlineSnapshot(`[]`)
|
|
95
|
+
expect(html).toMatchInlineSnapshot(`"<p><a name="section-one"></a></p><h2>Section One</h2>"`)
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
test('converts class attribute to className', () => {
|
|
99
|
+
// Note: span without blank-line separation is treated as inline HTML by remark —
|
|
100
|
+
// the opening tag and text content become separate nodes (see inline limitation above).
|
|
101
|
+
const { html, errors } = render(dedent`
|
|
102
|
+
<span class="highlight">highlighted text</span>
|
|
103
|
+
`)
|
|
104
|
+
expect(errors).toMatchInlineSnapshot(`[]`)
|
|
105
|
+
expect(html).toMatchInlineSnapshot(`"<p><span class="highlight"></span>highlighted text</p>"`)
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
test('renders a table from raw HTML', () => {
|
|
109
|
+
const { html, errors } = render(dedent`
|
|
110
|
+
<table>
|
|
111
|
+
<tr><th>Name</th><th>Value</th></tr>
|
|
112
|
+
<tr><td>foo</td><td>bar</td></tr>
|
|
113
|
+
</table>
|
|
114
|
+
`)
|
|
115
|
+
expect(errors).toMatchInlineSnapshot(`[]`)
|
|
116
|
+
expect(html).toMatchInlineSnapshot(`"<table><tr><th>Name</th><th>Value</th></tr><tr><td>foo</td><td>bar</td></tr></table>"`)
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
test('mixed markdown and HTML block', () => {
|
|
120
|
+
const { html, errors } = render(dedent`
|
|
121
|
+
## My Section
|
|
122
|
+
|
|
123
|
+
<div class="card">
|
|
124
|
+
Card content here
|
|
125
|
+
</div>
|
|
126
|
+
|
|
127
|
+
Back to **markdown**.
|
|
128
|
+
`)
|
|
129
|
+
expect(errors).toMatchInlineSnapshot(`[]`)
|
|
130
|
+
expect(html).toMatchInlineSnapshot(`"<h2>My Section</h2><div class="card">Card content here</div><p>Back to <strong>markdown</strong>.</p>"`)
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
test('without the plugin, html nodes are silently skipped by MdastToJsx', () => {
|
|
134
|
+
// Verify the old case 'html' path is gone — without the plugin, html nodes are dropped
|
|
135
|
+
const processor = remark().use(remarkGfm) // no remarkHtmlToMdx
|
|
136
|
+
const mdast = processor.parse(dedent`
|
|
137
|
+
<div class="box">hello world</div>
|
|
138
|
+
`) as Root
|
|
139
|
+
const visitor = new MdastToJsx({ markdown: '', mdast, components })
|
|
140
|
+
const result = visitor.run()
|
|
141
|
+
const html = renderToStaticMarkup(result)
|
|
142
|
+
expect(html).toMatchInlineSnapshot(`""`)
|
|
143
|
+
})
|
|
144
|
+
})
|
package/src/markdown.ts
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
// Pre-processing utilities for rendering plain markdown (not MDX) that contains raw HTML.
|
|
2
|
+
// Import from 'safe-mdx/markdown' to keep linkedom out of the main bundle.
|
|
3
|
+
export { remarkHtmlToMdx, parseHtmlToMdxAst } from './html/html-to-mdx-ast.ts'
|
|
4
|
+
export type { RemarkHtmlToMdxOptions } from './html/html-to-mdx-ast.ts'
|