safe-mdx 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -14
- package/dist/assets/HtmlToJsxConverter-Ds0bTjpw.js +24 -0
- package/dist/assets/_commonjsHelpers-CqkleIqs.js +1 -0
- package/dist/assets/index-B5fPOjPt.css +1 -0
- package/dist/assets/index-B7ATSoRE.js +9 -0
- package/dist/assets/index-BwZ2FTRd.js +146 -0
- package/dist/assets/index-R1UqLMGJ.js +1 -0
- package/dist/assets/index-c0qeY2gs.js +9 -0
- package/dist/assets/jsx-runtime-BhZZLbvw.js +9 -0
- package/dist/assets/jsx-runtime-NArryeSM.js +1 -0
- package/dist/assets/react-Ca6JzGpx.js +1 -0
- package/dist/assets/react-dom-BYRHYqYl.js +1 -0
- package/dist/dynamic-esm-component.d.ts.map +1 -1
- package/dist/dynamic-esm-component.js +16 -1
- package/dist/dynamic-esm-component.js.map +1 -1
- package/dist/html/attributes.d.ts +19 -0
- package/dist/html/attributes.d.ts.map +1 -0
- package/dist/html/attributes.js +289 -0
- package/dist/html/attributes.js.map +1 -0
- package/dist/html/convert-attributes.d.ts +6 -0
- package/dist/html/convert-attributes.d.ts.map +1 -0
- package/dist/html/convert-attributes.js +43 -0
- package/dist/html/convert-attributes.js.map +1 -0
- package/dist/html/domparser-browser.d.ts +4 -0
- package/dist/html/domparser-browser.d.ts.map +1 -0
- package/dist/html/domparser-browser.js +7 -0
- package/dist/html/domparser-browser.js.map +1 -0
- package/dist/html/domparser.d.ts +2 -0
- package/dist/html/domparser.d.ts.map +1 -0
- package/dist/html/domparser.js +5 -0
- package/dist/html/domparser.js.map +1 -0
- package/dist/html/html-to-mdx-ast.d.ts +23 -0
- package/dist/html/html-to-mdx-ast.d.ts.map +1 -0
- package/dist/html/html-to-mdx-ast.js +227 -0
- package/dist/html/html-to-mdx-ast.js.map +1 -0
- package/dist/html/html-to-mdx-ast.test.d.ts +2 -0
- package/dist/html/html-to-mdx-ast.test.d.ts.map +1 -0
- package/dist/html/html-to-mdx-ast.test.js +324 -0
- package/dist/html/html-to-mdx-ast.test.js.map +1 -0
- package/dist/html/remark-mdx-jsx-normalize.d.ts +10 -0
- package/dist/html/remark-mdx-jsx-normalize.d.ts.map +1 -0
- package/dist/html/remark-mdx-jsx-normalize.js +117 -0
- package/dist/html/remark-mdx-jsx-normalize.js.map +1 -0
- package/dist/html/valid-html-elements.d.ts +10 -0
- package/dist/html/valid-html-elements.d.ts.map +1 -0
- package/dist/html/valid-html-elements.js +50 -0
- package/dist/html/valid-html-elements.js.map +1 -0
- package/dist/index.html +19 -0
- package/dist/parse.d.ts +2 -0
- package/dist/parse.d.ts.map +1 -1
- package/dist/parse.js +2 -0
- package/dist/parse.js.map +1 -1
- package/dist/safe-mdx.d.ts +2 -2
- package/dist/safe-mdx.d.ts.map +1 -1
- package/dist/safe-mdx.js +39 -77
- package/dist/safe-mdx.js.map +1 -1
- package/dist/safe-mdx.test.js +161 -8
- package/dist/safe-mdx.test.js.map +1 -1
- package/package.json +27 -6
- package/src/dynamic-esm-component.tsx +40 -10
- package/src/html/README +17 -0
- package/src/html/attributes.ts +297 -0
- package/src/html/convert-attributes.ts +59 -0
- package/src/html/domparser-browser.ts +6 -0
- package/src/html/domparser.ts +5 -0
- package/src/html/html-to-mdx-ast.test.ts +365 -0
- package/src/html/html-to-mdx-ast.ts +304 -0
- package/src/html/remark-mdx-jsx-normalize.ts +128 -0
- package/src/html/valid-html-elements.ts +65 -0
- package/src/parse.ts +3 -0
- package/src/safe-mdx.test.tsx +178 -12
- package/src/safe-mdx.tsx +61 -93
- package/dist/HtmlToJsxConverter.d.ts +0 -10
- package/dist/HtmlToJsxConverter.d.ts.map +0 -1
- package/dist/HtmlToJsxConverter.js +0 -22
- package/dist/HtmlToJsxConverter.js.map +0 -1
- package/dist/plugins.d.ts +0 -12
- package/dist/plugins.d.ts.map +0 -1
- package/dist/plugins.js +0 -68
- package/dist/plugins.js.map +0 -1
- package/src/HtmlToJsxConverter.tsx +0 -37
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import type { Root, RootContent, PhrasingContent } from 'mdast'
|
|
2
|
+
import type { MdxJsxTextElement, MdxJsxFlowElement } from 'mdast-util-mdx-jsx'
|
|
3
|
+
import { visitParents } from 'unist-util-visit-parents'
|
|
4
|
+
import type { Node, Parent } from 'unist'
|
|
5
|
+
|
|
6
|
+
// Type definitions for MDX and MDAST content types
|
|
7
|
+
// type FlowContent = Blockquote | Code | Heading | Html | List | ThematicBreak | Content
|
|
8
|
+
// type PhrasingContent = Break | Emphasis | Html | Image | ImageReference | InlineCode | Link | LinkReference | Strong | Text
|
|
9
|
+
// type MdxJsxFlowContent = MdxJsxFlowElement | FlowContent
|
|
10
|
+
// type MdxJsxPhrasingContent = MdxJsxTextElement | PhrasingContent
|
|
11
|
+
|
|
12
|
+
/** Parents that require phrasing/inline children */
|
|
13
|
+
const PHRASE_CONTAINERS = new Set([
|
|
14
|
+
'paragraph',
|
|
15
|
+
'heading',
|
|
16
|
+
'emphasis',
|
|
17
|
+
'strong',
|
|
18
|
+
'delete',
|
|
19
|
+
'link',
|
|
20
|
+
'linkReference',
|
|
21
|
+
'tableCell',
|
|
22
|
+
'mdxJsxTextElement', // MDX JSX text elements should contain phrasing
|
|
23
|
+
])
|
|
24
|
+
|
|
25
|
+
/** Parents that accept/expect flow (block) content */
|
|
26
|
+
const FLOW_CONTAINERS = new Set([
|
|
27
|
+
'root',
|
|
28
|
+
'listItem',
|
|
29
|
+
'blockquote',
|
|
30
|
+
'footnoteDefinition',
|
|
31
|
+
'mdxJsxFlowElement', // MDX JSX flow elements should contain flow
|
|
32
|
+
])
|
|
33
|
+
|
|
34
|
+
/** Check if a node represents phrasing content */
|
|
35
|
+
function isPhrasing(node: Node): boolean {
|
|
36
|
+
const phrasingTypes = new Set([
|
|
37
|
+
'text',
|
|
38
|
+
'emphasis',
|
|
39
|
+
'strong',
|
|
40
|
+
'delete',
|
|
41
|
+
'html',
|
|
42
|
+
'image',
|
|
43
|
+
'imageReference',
|
|
44
|
+
'inlineCode',
|
|
45
|
+
'link',
|
|
46
|
+
'linkReference',
|
|
47
|
+
'break',
|
|
48
|
+
'mdxJsxTextElement',
|
|
49
|
+
])
|
|
50
|
+
return phrasingTypes.has(node.type)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Tags that are typically block-level elements */
|
|
54
|
+
const blockLevelTags = new Set([
|
|
55
|
+
'div',
|
|
56
|
+
'p',
|
|
57
|
+
'blockquote',
|
|
58
|
+
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
59
|
+
'ul', 'ol', 'li',
|
|
60
|
+
'pre',
|
|
61
|
+
'hr',
|
|
62
|
+
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td',
|
|
63
|
+
'section', 'article', 'aside', 'nav', 'header', 'footer', 'main',
|
|
64
|
+
'figure', 'figcaption',
|
|
65
|
+
// Notion-specific block elements
|
|
66
|
+
'callout',
|
|
67
|
+
'columns', 'column',
|
|
68
|
+
'page',
|
|
69
|
+
'database',
|
|
70
|
+
'data-source',
|
|
71
|
+
'audio', 'video', 'file', 'pdf', 'embed',
|
|
72
|
+
'synced_block', 'synced_block_reference',
|
|
73
|
+
'meeting-notes', 'summary', 'notes', 'transcript',
|
|
74
|
+
'table_of_contents',
|
|
75
|
+
'unknown',
|
|
76
|
+
'image', // Images can be block-level in Notion
|
|
77
|
+
])
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* remark plugin: make mdxJsx* element kinds match their context.
|
|
81
|
+
* - Inside phrasing parents → mdxJsxTextElement
|
|
82
|
+
* - Inside flow parents → mdxJsxFlowElement
|
|
83
|
+
* - Elements with block-level tag names → mdxJsxFlowElement
|
|
84
|
+
* - Elements containing non-phrasing children → mdxJsxFlowElement
|
|
85
|
+
*/
|
|
86
|
+
export default function remarkMdxJsxNormalize() {
|
|
87
|
+
return function transform(tree: Root) {
|
|
88
|
+
visitParents(tree, isMdxJsx, (node, ancestors) => {
|
|
89
|
+
const element = node as MdxJsxTextElement | MdxJsxFlowElement
|
|
90
|
+
const parent = ancestors[ancestors.length - 1] as Parent | undefined
|
|
91
|
+
if (!parent) return
|
|
92
|
+
|
|
93
|
+
const parentType = parent.type
|
|
94
|
+
const parentExpectsPhrasing = PHRASE_CONTAINERS.has(parentType)
|
|
95
|
+
const parentExpectsFlow = FLOW_CONTAINERS.has(parentType)
|
|
96
|
+
|
|
97
|
+
// Check element properties
|
|
98
|
+
const hasBlockTag = element.name ? blockLevelTags.has(element.name.toLowerCase()) : false
|
|
99
|
+
const children = (element.children || []) as RootContent[]
|
|
100
|
+
const containsNonPhrasing = children.some((c) => !isPhrasing(c))
|
|
101
|
+
|
|
102
|
+
// Determine desired type
|
|
103
|
+
let desired: 'mdxJsxTextElement' | 'mdxJsxFlowElement' = element.type
|
|
104
|
+
|
|
105
|
+
// Priority rules:
|
|
106
|
+
// 1. If it has a block-level tag name, it should be flow
|
|
107
|
+
// 2. If it contains non-phrasing children, it should be flow
|
|
108
|
+
// 3. Otherwise, match parent context
|
|
109
|
+
if (hasBlockTag || containsNonPhrasing) {
|
|
110
|
+
desired = 'mdxJsxFlowElement'
|
|
111
|
+
} else if (parentExpectsPhrasing) {
|
|
112
|
+
desired = 'mdxJsxTextElement'
|
|
113
|
+
} else if (parentExpectsFlow) {
|
|
114
|
+
desired = 'mdxJsxFlowElement'
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Apply the change if needed
|
|
118
|
+
if (element.type !== desired) {
|
|
119
|
+
element.type = desired
|
|
120
|
+
}
|
|
121
|
+
})
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Check if a node is an MDX JSX element */
|
|
126
|
+
function isMdxJsx(node: Node): boolean {
|
|
127
|
+
return node.type === 'mdxJsxTextElement' || node.type === 'mdxJsxFlowElement'
|
|
128
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// List of valid HTML elements that should be preserved
|
|
2
|
+
// All other elements will be filtered out (return empty string)
|
|
3
|
+
export const validHtmlElements = new Set([
|
|
4
|
+
// Document metadata
|
|
5
|
+
'base', 'head', 'link', 'meta', 'style', 'title',
|
|
6
|
+
|
|
7
|
+
// Content sectioning
|
|
8
|
+
'address', 'article', 'aside', 'footer', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
9
|
+
'main', 'nav', 'section',
|
|
10
|
+
|
|
11
|
+
// Text content
|
|
12
|
+
'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul',
|
|
13
|
+
|
|
14
|
+
// Inline text semantics
|
|
15
|
+
'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em', 'i', 'kbd',
|
|
16
|
+
'mark', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'small', 'span', 'strong', 'sub', 'sup',
|
|
17
|
+
'time', 'u', 'var', 'wbr',
|
|
18
|
+
|
|
19
|
+
// Image and multimedia
|
|
20
|
+
'area', 'audio', 'img', 'map', 'track', 'video',
|
|
21
|
+
|
|
22
|
+
// Embedded content
|
|
23
|
+
'embed', 'iframe', 'object', 'param', 'picture', 'portal', 'source',
|
|
24
|
+
|
|
25
|
+
// SVG and MathML
|
|
26
|
+
'svg', 'math', 'path', // Added 'path' from nativeTags
|
|
27
|
+
|
|
28
|
+
// Scripting
|
|
29
|
+
'canvas', 'noscript', 'script',
|
|
30
|
+
|
|
31
|
+
// Demarcating edits
|
|
32
|
+
'del', 'ins',
|
|
33
|
+
|
|
34
|
+
// Table content
|
|
35
|
+
'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
|
|
36
|
+
|
|
37
|
+
// Forms
|
|
38
|
+
'button', 'datalist', 'fieldset', 'form', 'input', 'label', 'legend', 'meter', 'optgroup',
|
|
39
|
+
'option', 'output', 'progress', 'select', 'textarea',
|
|
40
|
+
|
|
41
|
+
// Interactive elements
|
|
42
|
+
'details', 'dialog', 'menu', 'summary',
|
|
43
|
+
|
|
44
|
+
// Web Components
|
|
45
|
+
'slot', 'template',
|
|
46
|
+
])
|
|
47
|
+
|
|
48
|
+
// Export as an array for backward compatibility with nativeTags
|
|
49
|
+
export const nativeTags = Array.from(validHtmlElements) as readonly string[]
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Convert HTML tag name to JSX component name
|
|
53
|
+
* Returns empty string if the tag is not a valid HTML element
|
|
54
|
+
*/
|
|
55
|
+
export function htmlTagNameConverter({ tagName }: { tagName: string }): string {
|
|
56
|
+
const lowerTag = tagName.toLowerCase()
|
|
57
|
+
|
|
58
|
+
// Check if it's a valid HTML element
|
|
59
|
+
if (validHtmlElements.has(lowerTag)) {
|
|
60
|
+
return lowerTag
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Return empty string for non-HTML elements
|
|
64
|
+
return ''
|
|
65
|
+
}
|
package/src/parse.ts
CHANGED
|
@@ -5,6 +5,9 @@ import { Root, RootContent } from 'mdast'
|
|
|
5
5
|
import { remark } from 'remark'
|
|
6
6
|
import remarkGfm from 'remark-gfm'
|
|
7
7
|
import remarkMdx from 'remark-mdx'
|
|
8
|
+
import { parseHtmlToMdxAst } from './html/html-to-mdx-ast.js'
|
|
9
|
+
|
|
10
|
+
export { parseHtmlToMdxAst }
|
|
8
11
|
|
|
9
12
|
export function mdxParse(code: string) {
|
|
10
13
|
const file = mdxProcessor.processSync(code)
|
package/src/safe-mdx.test.tsx
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import dedent from 'dedent'
|
|
2
|
-
import { htmlToJsx } from 'html-to-jsx-transform'
|
|
3
2
|
import React from 'react'
|
|
4
3
|
import { renderToStaticMarkup } from 'react-dom/server'
|
|
5
4
|
import { expect, test } from 'vitest'
|
|
@@ -29,16 +28,6 @@ function render(code, componentPropsSchema?: ComponentPropsSchema, allowClientEs
|
|
|
29
28
|
return { result, errors: visitor.errors || [], html }
|
|
30
29
|
}
|
|
31
30
|
|
|
32
|
-
test('htmlToJsx', () => {
|
|
33
|
-
expect(htmlToJsx('<p x="y">')).toMatchInlineSnapshot(`"<p x="y" />"`)
|
|
34
|
-
expect(htmlToJsx('<p>text</p>')).toMatchInlineSnapshot(`"<p>text</p>"`)
|
|
35
|
-
expect(htmlToJsx('before <p>text</p>')).toMatchInlineSnapshot(
|
|
36
|
-
`"<>before <p>text</p></>"`,
|
|
37
|
-
)
|
|
38
|
-
expect(htmlToJsx('<nonexisting>text</nonexisting>')).toMatchInlineSnapshot(
|
|
39
|
-
`"<nonexisting>text</nonexisting>"`,
|
|
40
|
-
)
|
|
41
|
-
})
|
|
42
31
|
|
|
43
32
|
test('reference links with titles', () => {
|
|
44
33
|
const code = dedent`
|
|
@@ -2269,6 +2258,66 @@ test('kitchen sink', () => {
|
|
|
2269
2258
|
`)
|
|
2270
2259
|
})
|
|
2271
2260
|
|
|
2261
|
+
test('mdx jsx with unknown components are ignored', () => {
|
|
2262
|
+
// Note: In MDX, <custom-element> is treated as MDX JSX, not raw HTML
|
|
2263
|
+
// Unknown JSX components are ignored completely (including their content)
|
|
2264
|
+
const code = dedent`
|
|
2265
|
+
# Heading with JSX
|
|
2266
|
+
|
|
2267
|
+
This is a paragraph with some JSX components.
|
|
2268
|
+
|
|
2269
|
+
<div>This is a valid div</div>
|
|
2270
|
+
|
|
2271
|
+
<CustomElement>This unknown component should be ignored</CustomElement>
|
|
2272
|
+
|
|
2273
|
+
<span className="highlight">This span is valid</span>
|
|
2274
|
+
|
|
2275
|
+
<AnotherUnknown>Another unknown component content</AnotherUnknown>
|
|
2276
|
+
|
|
2277
|
+
More text after JSX.
|
|
2278
|
+
`
|
|
2279
|
+
|
|
2280
|
+
const { html, result, errors } = render(code)
|
|
2281
|
+
|
|
2282
|
+
// Check that valid HTML elements are present
|
|
2283
|
+
expect(html).toContain('<div>This is a valid div</div>')
|
|
2284
|
+
expect(html).toContain('<span')
|
|
2285
|
+
expect(html).toContain('This span is valid</span>')
|
|
2286
|
+
|
|
2287
|
+
// Check that unknown components are completely ignored
|
|
2288
|
+
expect(html).not.toContain('CustomElement')
|
|
2289
|
+
expect(html).not.toContain('AnotherUnknown')
|
|
2290
|
+
expect(html).not.toContain('This unknown component should be ignored')
|
|
2291
|
+
expect(html).not.toContain('Another unknown component content')
|
|
2292
|
+
|
|
2293
|
+
// Check that errors were generated for unknown components
|
|
2294
|
+
expect(errors).toHaveLength(2)
|
|
2295
|
+
expect(errors[0].message).toContain('Unsupported jsx component CustomElement')
|
|
2296
|
+
expect(errors[1].message).toContain('Unsupported jsx component AnotherUnknown')
|
|
2297
|
+
|
|
2298
|
+
expect(result).toMatchInlineSnapshot(`
|
|
2299
|
+
<React.Fragment>
|
|
2300
|
+
<h1>
|
|
2301
|
+
Heading with JSX
|
|
2302
|
+
</h1>
|
|
2303
|
+
<p>
|
|
2304
|
+
This is a paragraph with some JSX components.
|
|
2305
|
+
</p>
|
|
2306
|
+
<div>
|
|
2307
|
+
This is a valid div
|
|
2308
|
+
</div>
|
|
2309
|
+
<span
|
|
2310
|
+
className="highlight"
|
|
2311
|
+
>
|
|
2312
|
+
This span is valid
|
|
2313
|
+
</span>
|
|
2314
|
+
<p>
|
|
2315
|
+
More text after JSX.
|
|
2316
|
+
</p>
|
|
2317
|
+
</React.Fragment>
|
|
2318
|
+
`)
|
|
2319
|
+
})
|
|
2320
|
+
|
|
2272
2321
|
test('code block rendering', () => {
|
|
2273
2322
|
const code = dedent`
|
|
2274
2323
|
`
|
|
@@ -2904,7 +2953,7 @@ test('ESM imports from https URLs', () => {
|
|
|
2904
2953
|
|
|
2905
2954
|
// Since these are dynamic imports that only work on client, the server render should return null
|
|
2906
2955
|
const html = renderToStaticMarkup(result)
|
|
2907
|
-
expect(html).toMatchInlineSnapshot(`"<h1>Hello</h1>"`)
|
|
2956
|
+
expect(html).toMatchInlineSnapshot(`"<link href="https://esm.sh" rel="dns-prefetch"/><link rel="preconnect" href="https://esm.sh"/><h1>Hello</h1>"`)
|
|
2908
2957
|
|
|
2909
2958
|
expect(visitor.errors).toEqual([])
|
|
2910
2959
|
})
|
|
@@ -3334,3 +3383,120 @@ test('jsx component with complex array props should show clear error message', (
|
|
|
3334
3383
|
expect(expressionError!.message).toContain('Functions are not supported')
|
|
3335
3384
|
expect(expressionError!.line).toBe(1)
|
|
3336
3385
|
})
|
|
3386
|
+
|
|
3387
|
+
test('override renderNode to wrap bold text in colored span', () => {
|
|
3388
|
+
const code = dedent`
|
|
3389
|
+
This is **bold text** and this is regular text.
|
|
3390
|
+
|
|
3391
|
+
Another line with **more bold** content.
|
|
3392
|
+
`
|
|
3393
|
+
|
|
3394
|
+
const mdast = mdxParse(code)
|
|
3395
|
+
const visitor = new MdastToJsx({
|
|
3396
|
+
markdown: code,
|
|
3397
|
+
mdast,
|
|
3398
|
+
components,
|
|
3399
|
+
renderNode: (node, transform) => {
|
|
3400
|
+
if (node.type === 'strong') {
|
|
3401
|
+
return (
|
|
3402
|
+
<span style={{ color: 'red', fontWeight: 'bold' }}>
|
|
3403
|
+
{node.children?.map(child => transform(child))}
|
|
3404
|
+
</span>
|
|
3405
|
+
)
|
|
3406
|
+
}
|
|
3407
|
+
// Return undefined to use default rendering
|
|
3408
|
+
return undefined
|
|
3409
|
+
}
|
|
3410
|
+
})
|
|
3411
|
+
|
|
3412
|
+
const result = visitor.run()
|
|
3413
|
+
const html = renderToStaticMarkup(result)
|
|
3414
|
+
|
|
3415
|
+
expect(html).toMatchInlineSnapshot(`"<p>This is <span style="color:red;font-weight:bold">bold text</span> and this is regular text.</p><p>Another line with <span style="color:red;font-weight:bold">more bold</span> content.</p>"`)
|
|
3416
|
+
})
|
|
3417
|
+
|
|
3418
|
+
test("skip unknown elements in raw HTML content", () => {
|
|
3419
|
+
const { html } = render(`
|
|
3420
|
+
Some text before
|
|
3421
|
+
<html><body>
|
|
3422
|
+
<p>Valid paragraph</p>
|
|
3423
|
+
<unknonw>This content should be preserved</unknonw>
|
|
3424
|
+
<div>Valid div</div>
|
|
3425
|
+
<faketag>Another preserved content</faketag>
|
|
3426
|
+
</body></html>
|
|
3427
|
+
Some text after
|
|
3428
|
+
`);
|
|
3429
|
+
expect(html).toMatchInlineSnapshot(`"<p>Some text before</p><p>Some text after</p>"`);
|
|
3430
|
+
});
|
|
3431
|
+
|
|
3432
|
+
test("skip unknown elements in complex nested HTML structures", () => {
|
|
3433
|
+
const { html } = render(`
|
|
3434
|
+
# Main Title
|
|
3435
|
+
|
|
3436
|
+
<article>
|
|
3437
|
+
<header>
|
|
3438
|
+
<h1>Article Title</h1>
|
|
3439
|
+
<customheader>
|
|
3440
|
+
<p>This paragraph should be preserved</p>
|
|
3441
|
+
<time>2024-01-01</time>
|
|
3442
|
+
</customheader>
|
|
3443
|
+
</header>
|
|
3444
|
+
|
|
3445
|
+
<section>
|
|
3446
|
+
<blockquote>
|
|
3447
|
+
<p>A famous quote</p>
|
|
3448
|
+
<customcite>
|
|
3449
|
+
<strong>- Author Name</strong>
|
|
3450
|
+
<unknowntag>
|
|
3451
|
+
<em>from a book</em>
|
|
3452
|
+
<span>published in <b>2024</b></span>
|
|
3453
|
+
</unknowntag>
|
|
3454
|
+
</customcite>
|
|
3455
|
+
</blockquote>
|
|
3456
|
+
|
|
3457
|
+
<fakesection>
|
|
3458
|
+
<h2>Nested Heading</h2>
|
|
3459
|
+
<ul>
|
|
3460
|
+
<li>First item with <invalidtag>nested <code>code</code></invalidtag></li>
|
|
3461
|
+
<li>Second item</li>
|
|
3462
|
+
<customli>Third item should show</customli>
|
|
3463
|
+
</ul>
|
|
3464
|
+
</fakesection>
|
|
3465
|
+
</section>
|
|
3466
|
+
|
|
3467
|
+
<footer>
|
|
3468
|
+
<customfooter>
|
|
3469
|
+
<nav>
|
|
3470
|
+
<a href="#top">Back to top</a>
|
|
3471
|
+
<fakelink>
|
|
3472
|
+
<span>Contact us</span>
|
|
3473
|
+
</fakelink>
|
|
3474
|
+
</nav>
|
|
3475
|
+
</customfooter>
|
|
3476
|
+
</footer>
|
|
3477
|
+
</article>
|
|
3478
|
+
|
|
3479
|
+
## Another Section
|
|
3480
|
+
|
|
3481
|
+
<div>
|
|
3482
|
+
<customelement>
|
|
3483
|
+
<table>
|
|
3484
|
+
<thead>
|
|
3485
|
+
<tr>
|
|
3486
|
+
<th>Header 1</th>
|
|
3487
|
+
<customth>Header 2 content</customth>
|
|
3488
|
+
</tr>
|
|
3489
|
+
</thead>
|
|
3490
|
+
<tbody>
|
|
3491
|
+
<customrow>
|
|
3492
|
+
<td>Cell 1</td>
|
|
3493
|
+
<td>Cell 2</td>
|
|
3494
|
+
</customrow>
|
|
3495
|
+
</tbody>
|
|
3496
|
+
</table>
|
|
3497
|
+
</customelement>
|
|
3498
|
+
</div>
|
|
3499
|
+
`);
|
|
3500
|
+
|
|
3501
|
+
expect(html).toMatchInlineSnapshot(`"<h1>Main Title</h1><article><header><h1>Article Title</h1></header><section><blockquote><p>A famous quote</p></blockquote></section><footer></footer></article><h2>Another Section</h2><div></div>"`);
|
|
3502
|
+
});
|
package/src/safe-mdx.tsx
CHANGED
|
@@ -1,25 +1,16 @@
|
|
|
1
|
-
import React, {
|
|
1
|
+
import React, { cloneElement } from 'react'
|
|
2
2
|
|
|
3
3
|
import type { StandardSchemaV1 } from '@standard-schema/spec'
|
|
4
|
+
import type { JSXElement } from 'estree-jsx'
|
|
5
|
+
import Evaluate from 'eval-estree-expression'
|
|
4
6
|
import type { Node, Parent, Root, RootContent } from 'mdast'
|
|
5
7
|
import type { MdxJsxFlowElement, MdxJsxTextElement } from 'mdast-util-mdx-jsx'
|
|
6
|
-
import type {
|
|
7
|
-
JSXElement,
|
|
8
|
-
JSXAttribute,
|
|
9
|
-
JSXText,
|
|
10
|
-
JSXExpressionContainer,
|
|
11
|
-
} from 'estree-jsx'
|
|
12
|
-
import Evaluate from 'eval-estree-expression'
|
|
13
8
|
|
|
14
9
|
import { Fragment, ReactNode } from 'react'
|
|
15
10
|
import { DynamicEsmComponent } from './dynamic-esm-component.js'
|
|
16
|
-
import {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
import('./HtmlToJsxConverter.js').then((module) => ({
|
|
20
|
-
default: module.HtmlToJsxConverter,
|
|
21
|
-
})),
|
|
22
|
-
)
|
|
11
|
+
import { extractComponentInfo, parseEsmImports } from './esm-parser.js'
|
|
12
|
+
import { htmlToMdxAst } from './html/html-to-mdx-ast.js'
|
|
13
|
+
import { validHtmlElements, nativeTags } from './html/valid-html-elements.js'
|
|
23
14
|
|
|
24
15
|
export type MyRootContent = RootContent | Root
|
|
25
16
|
|
|
@@ -258,7 +249,10 @@ export class MdastToJsx {
|
|
|
258
249
|
|
|
259
250
|
return this.createElement(
|
|
260
251
|
Component,
|
|
261
|
-
this.addLineNumberToProps(
|
|
252
|
+
this.addLineNumberToProps(
|
|
253
|
+
{ ...attrs, importUrl, componentName },
|
|
254
|
+
node,
|
|
255
|
+
),
|
|
262
256
|
this.mapJsxChildren(node),
|
|
263
257
|
)
|
|
264
258
|
} else {
|
|
@@ -444,7 +438,11 @@ export class MdastToJsx {
|
|
|
444
438
|
onError({
|
|
445
439
|
message: `Failed to evaluate expression attribute: ${attr.value
|
|
446
440
|
.replace(/\n+/g, ' ')
|
|
447
|
-
.replace(/ +/g, ' ')}. ${
|
|
441
|
+
.replace(/ +/g, ' ')}. ${
|
|
442
|
+
error instanceof Error
|
|
443
|
+
? error.message
|
|
444
|
+
: String(error)
|
|
445
|
+
}`,
|
|
448
446
|
line: attr.position?.start?.line,
|
|
449
447
|
})
|
|
450
448
|
}
|
|
@@ -453,7 +451,11 @@ export class MdastToJsx {
|
|
|
453
451
|
onError({
|
|
454
452
|
message: `Failed to evaluate expression attribute: ${attr.value
|
|
455
453
|
.replace(/\n+/g, ' ')
|
|
456
|
-
.replace(/ +/g, ' ')}. ${
|
|
454
|
+
.replace(/ +/g, ' ')}. ${
|
|
455
|
+
error instanceof Error
|
|
456
|
+
? error.message
|
|
457
|
+
: String(error)
|
|
458
|
+
}`,
|
|
457
459
|
line: attr.position?.start?.line,
|
|
458
460
|
})
|
|
459
461
|
}
|
|
@@ -536,7 +538,13 @@ export class MdastToJsx {
|
|
|
536
538
|
continue
|
|
537
539
|
} catch (error) {
|
|
538
540
|
onError({
|
|
539
|
-
message: `Failed to evaluate expression attribute: ${
|
|
541
|
+
message: `Failed to evaluate expression attribute: ${
|
|
542
|
+
attr.name
|
|
543
|
+
}={${v.value}}. ${
|
|
544
|
+
error instanceof Error
|
|
545
|
+
? error.message
|
|
546
|
+
: String(error)
|
|
547
|
+
}`,
|
|
540
548
|
line: attr.position?.start?.line,
|
|
541
549
|
})
|
|
542
550
|
}
|
|
@@ -634,14 +642,26 @@ export class MdastToJsx {
|
|
|
634
642
|
return result
|
|
635
643
|
} catch (error) {
|
|
636
644
|
this.errors.push({
|
|
637
|
-
message: `Failed to evaluate expression: ${
|
|
645
|
+
message: `Failed to evaluate expression: ${
|
|
646
|
+
node.value
|
|
647
|
+
}. ${
|
|
648
|
+
error instanceof Error
|
|
649
|
+
? error.message
|
|
650
|
+
: String(error)
|
|
651
|
+
}`,
|
|
638
652
|
line: node.position?.start?.line,
|
|
639
653
|
})
|
|
640
654
|
}
|
|
641
655
|
}
|
|
642
656
|
} catch (error) {
|
|
643
657
|
this.errors.push({
|
|
644
|
-
message: `Failed to evaluate expression: ${
|
|
658
|
+
message: `Failed to evaluate expression: ${
|
|
659
|
+
node.value
|
|
660
|
+
}. ${
|
|
661
|
+
error instanceof Error
|
|
662
|
+
? error.message
|
|
663
|
+
: String(error)
|
|
664
|
+
}`,
|
|
645
665
|
line: node.position?.start?.line,
|
|
646
666
|
})
|
|
647
667
|
}
|
|
@@ -908,15 +928,26 @@ export class MdastToJsx {
|
|
|
908
928
|
return []
|
|
909
929
|
}
|
|
910
930
|
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
931
|
+
// Parse HTML to MDX AST using the new approach
|
|
932
|
+
const mdxAst = htmlToMdxAst({
|
|
933
|
+
html: text,
|
|
934
|
+
convertTagName: ({ tagName }) => {
|
|
935
|
+
const lowerTag = tagName.toLowerCase()
|
|
936
|
+
// Only keep valid HTML elements
|
|
937
|
+
if (validHtmlElements.has(lowerTag)) {
|
|
938
|
+
return lowerTag
|
|
939
|
+
}
|
|
940
|
+
// Return empty string for non-HTML elements
|
|
941
|
+
return ''
|
|
942
|
+
}
|
|
943
|
+
})
|
|
944
|
+
|
|
945
|
+
// Process the MDX AST nodes
|
|
946
|
+
if (Array.isArray(mdxAst)) {
|
|
947
|
+
return mdxAst.map(child => this.mdastTransformer(child))
|
|
948
|
+
} else {
|
|
949
|
+
return this.mdastTransformer(mdxAst)
|
|
950
|
+
}
|
|
920
951
|
}
|
|
921
952
|
case 'imageReference': {
|
|
922
953
|
return []
|
|
@@ -978,69 +1009,6 @@ function safeJsonParse(str: string) {
|
|
|
978
1009
|
}
|
|
979
1010
|
}
|
|
980
1011
|
|
|
981
|
-
const nativeTags = [
|
|
982
|
-
'blockquote',
|
|
983
|
-
'strong',
|
|
984
|
-
'em',
|
|
985
|
-
'del',
|
|
986
|
-
'hr',
|
|
987
|
-
'a',
|
|
988
|
-
'b',
|
|
989
|
-
'br',
|
|
990
|
-
'button',
|
|
991
|
-
'div',
|
|
992
|
-
'form',
|
|
993
|
-
'h1',
|
|
994
|
-
'h2',
|
|
995
|
-
'h3',
|
|
996
|
-
'h4',
|
|
997
|
-
'head',
|
|
998
|
-
'iframe',
|
|
999
|
-
'img',
|
|
1000
|
-
'input',
|
|
1001
|
-
'label',
|
|
1002
|
-
'li',
|
|
1003
|
-
'link',
|
|
1004
|
-
'ol',
|
|
1005
|
-
'p',
|
|
1006
|
-
'path',
|
|
1007
|
-
'picture',
|
|
1008
|
-
'script',
|
|
1009
|
-
'section',
|
|
1010
|
-
'source',
|
|
1011
|
-
'span',
|
|
1012
|
-
'sub',
|
|
1013
|
-
'sup',
|
|
1014
|
-
'svg',
|
|
1015
|
-
'table',
|
|
1016
|
-
'tbody',
|
|
1017
|
-
'td',
|
|
1018
|
-
'tfoot',
|
|
1019
|
-
'th',
|
|
1020
|
-
'thead',
|
|
1021
|
-
'tr',
|
|
1022
|
-
'ul',
|
|
1023
|
-
'video',
|
|
1024
|
-
'code',
|
|
1025
|
-
'pre',
|
|
1026
|
-
'figure',
|
|
1027
|
-
'canvas',
|
|
1028
|
-
'details',
|
|
1029
|
-
'dl',
|
|
1030
|
-
'dt',
|
|
1031
|
-
'dd',
|
|
1032
|
-
'fieldset',
|
|
1033
|
-
'footer',
|
|
1034
|
-
'header',
|
|
1035
|
-
'legend',
|
|
1036
|
-
'main',
|
|
1037
|
-
'mark',
|
|
1038
|
-
'nav',
|
|
1039
|
-
'progress',
|
|
1040
|
-
'summary',
|
|
1041
|
-
'time',
|
|
1042
|
-
] as const
|
|
1043
|
-
|
|
1044
1012
|
type ComponentsMap = { [k in (typeof nativeTags)[number]]?: any } & {
|
|
1045
1013
|
[key: string]: any
|
|
1046
1014
|
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import React from 'react';
|
|
2
|
-
import type { MdastToJsx, MyRootContent } from './safe-mdx.js';
|
|
3
|
-
interface HtmlToJsxConverterProps {
|
|
4
|
-
htmlText: string;
|
|
5
|
-
instance: MdastToJsx;
|
|
6
|
-
node: MyRootContent;
|
|
7
|
-
}
|
|
8
|
-
export declare function HtmlToJsxConverter({ htmlText, instance, node, }: HtmlToJsxConverterProps): string | number | bigint | true | React.ReactElement<unknown, string | React.JSXElementConstructor<any>> | Iterable<React.ReactNode> | Promise<string | number | bigint | boolean | React.ReactPortal | React.ReactElement<unknown, string | React.JSXElementConstructor<any>> | Iterable<React.ReactNode>>;
|
|
9
|
-
export {};
|
|
10
|
-
//# sourceMappingURL=HtmlToJsxConverter.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"HtmlToJsxConverter.d.ts","sourceRoot":"","sources":["../src/HtmlToJsxConverter.tsx"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAA;AAEzB,OAAO,KAAK,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAI9D,UAAU,uBAAuB;IAC7B,QAAQ,EAAE,MAAM,CAAA;IAChB,QAAQ,EAAE,UAAU,CAAA;IACpB,IAAI,EAAE,aAAa,CAAA;CACtB;AAED,wBAAgB,kBAAkB,CAAC,EAC/B,QAAQ,EACR,QAAQ,EACR,IAAI,GACP,EAAE,uBAAuB,+SAoBzB"}
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import React from 'react';
|
|
2
|
-
import { htmlToJsx } from 'html-to-jsx-transform';
|
|
3
|
-
React;
|
|
4
|
-
export function HtmlToJsxConverter({ htmlText, instance, node, }) {
|
|
5
|
-
try {
|
|
6
|
-
const jsx = htmlToJsx(htmlText);
|
|
7
|
-
const originalJsxStr = instance.jsxStr;
|
|
8
|
-
instance.jsxStr = jsx;
|
|
9
|
-
const result = instance.jsxTransformer(node);
|
|
10
|
-
instance.jsxStr = originalJsxStr;
|
|
11
|
-
if (Array.isArray(result)) {
|
|
12
|
-
console.log(`Unexpected array result`);
|
|
13
|
-
return null;
|
|
14
|
-
}
|
|
15
|
-
return result || null;
|
|
16
|
-
}
|
|
17
|
-
catch (error) {
|
|
18
|
-
console.error('Error converting HTML to JSX:', error);
|
|
19
|
-
return null;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
//# sourceMappingURL=HtmlToJsxConverter.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"HtmlToJsxConverter.js","sourceRoot":"","sources":["../src/HtmlToJsxConverter.tsx"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAA;AACzB,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAA;AAGjD,KAAK,CAAA;AAQL,MAAM,UAAU,kBAAkB,CAAC,EAC/B,QAAQ,EACR,QAAQ,EACR,IAAI,GACkB;IACtB,IAAI;QACA,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAA;QAC/B,MAAM,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAA;QACtC,QAAQ,CAAC,MAAM,GAAG,GAAG,CAAA;QAErB,MAAM,MAAM,GAAG,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,CAAA;QAE5C,QAAQ,CAAC,MAAM,GAAG,cAAc,CAAA;QAEhC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACvB,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAA;YACtC,OAAO,IAAI,CAAA;SACd;QAED,OAAO,MAAM,IAAI,IAAI,CAAA;KACxB;IAAC,OAAO,KAAK,EAAE;QACZ,OAAO,CAAC,KAAK,CAAC,+BAA+B,EAAE,KAAK,CAAC,CAAA;QACrD,OAAO,IAAI,CAAA;KACd;AACL,CAAC"}
|