safe-mdx 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -14
- package/dist/assets/HtmlToJsxConverter-Ds0bTjpw.js +24 -0
- package/dist/assets/_commonjsHelpers-CqkleIqs.js +1 -0
- package/dist/assets/index-B5fPOjPt.css +1 -0
- package/dist/assets/index-B7ATSoRE.js +9 -0
- package/dist/assets/index-BwZ2FTRd.js +146 -0
- package/dist/assets/index-R1UqLMGJ.js +1 -0
- package/dist/assets/index-c0qeY2gs.js +9 -0
- package/dist/assets/jsx-runtime-BhZZLbvw.js +9 -0
- package/dist/assets/jsx-runtime-NArryeSM.js +1 -0
- package/dist/assets/react-Ca6JzGpx.js +1 -0
- package/dist/assets/react-dom-BYRHYqYl.js +1 -0
- package/dist/html/attributes.d.ts +19 -0
- package/dist/html/attributes.d.ts.map +1 -0
- package/dist/html/attributes.js +289 -0
- package/dist/html/attributes.js.map +1 -0
- package/dist/html/convert-attributes.d.ts +6 -0
- package/dist/html/convert-attributes.d.ts.map +1 -0
- package/dist/html/convert-attributes.js +43 -0
- package/dist/html/convert-attributes.js.map +1 -0
- package/dist/html/domparser-browser.d.ts +4 -0
- package/dist/html/domparser-browser.d.ts.map +1 -0
- package/dist/html/domparser-browser.js +7 -0
- package/dist/html/domparser-browser.js.map +1 -0
- package/dist/html/domparser.d.ts +2 -0
- package/dist/html/domparser.d.ts.map +1 -0
- package/dist/html/domparser.js +5 -0
- package/dist/html/domparser.js.map +1 -0
- package/dist/html/html-to-mdx-ast.d.ts +23 -0
- package/dist/html/html-to-mdx-ast.d.ts.map +1 -0
- package/dist/html/html-to-mdx-ast.js +227 -0
- package/dist/html/html-to-mdx-ast.js.map +1 -0
- package/dist/html/html-to-mdx-ast.test.d.ts +2 -0
- package/dist/html/html-to-mdx-ast.test.d.ts.map +1 -0
- package/dist/html/html-to-mdx-ast.test.js +324 -0
- package/dist/html/html-to-mdx-ast.test.js.map +1 -0
- package/dist/html/remark-mdx-jsx-normalize.d.ts +10 -0
- package/dist/html/remark-mdx-jsx-normalize.d.ts.map +1 -0
- package/dist/html/remark-mdx-jsx-normalize.js +117 -0
- package/dist/html/remark-mdx-jsx-normalize.js.map +1 -0
- package/dist/html/valid-html-elements.d.ts +10 -0
- package/dist/html/valid-html-elements.d.ts.map +1 -0
- package/dist/html/valid-html-elements.js +50 -0
- package/dist/html/valid-html-elements.js.map +1 -0
- package/dist/index.html +19 -0
- package/dist/parse.d.ts +2 -0
- package/dist/parse.d.ts.map +1 -1
- package/dist/parse.js +2 -0
- package/dist/parse.js.map +1 -1
- package/dist/safe-mdx.d.ts +1 -1
- package/dist/safe-mdx.d.ts.map +1 -1
- package/dist/safe-mdx.js +23 -71
- package/dist/safe-mdx.js.map +1 -1
- package/dist/safe-mdx.test.js +161 -8
- package/dist/safe-mdx.test.js.map +1 -1
- package/package.json +27 -6
- package/src/html/README +17 -0
- package/src/html/attributes.ts +297 -0
- package/src/html/convert-attributes.ts +59 -0
- package/src/html/domparser-browser.ts +6 -0
- package/src/html/domparser.ts +5 -0
- package/src/html/html-to-mdx-ast.test.ts +365 -0
- package/src/html/html-to-mdx-ast.ts +304 -0
- package/src/html/remark-mdx-jsx-normalize.ts +128 -0
- package/src/html/valid-html-elements.ts +65 -0
- package/src/parse.ts +3 -0
- package/src/safe-mdx.test.tsx +178 -12
- package/src/safe-mdx.tsx +23 -79
- package/dist/HtmlToJsxConverter.d.ts +0 -10
- package/dist/HtmlToJsxConverter.d.ts.map +0 -1
- package/dist/HtmlToJsxConverter.js +0 -22
- package/dist/HtmlToJsxConverter.js.map +0 -1
- package/dist/plugins.d.ts +0 -12
- package/dist/plugins.d.ts.map +0 -1
- package/dist/plugins.js +0 -68
- package/dist/plugins.js.map +0 -1
- package/src/HtmlToJsxConverter.tsx +0 -37
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
import type { Root, RootContent, Text as MdastText } from 'mdast'
|
|
2
|
+
import type {
|
|
3
|
+
MdxJsxAttribute,
|
|
4
|
+
MdxJsxAttributeValueExpression,
|
|
5
|
+
MdxJsxTextElement,
|
|
6
|
+
} from 'mdast-util-mdx-jsx'
|
|
7
|
+
import type { Processor } from 'unified'
|
|
8
|
+
import { convertAttributeNameToJSX } from './convert-attributes.js'
|
|
9
|
+
import { parseHTML } from './domparser.js'
|
|
10
|
+
|
|
11
|
+
// Re-export the normalize plugin
|
|
12
|
+
export { default as remarkMdxJsxNormalize } from './remark-mdx-jsx-normalize.js'
|
|
13
|
+
|
|
14
|
+
// Type for converting tag names
|
|
15
|
+
export type ConvertTagName = (args: { tagName: string }) => string
|
|
16
|
+
|
|
17
|
+
// Type for converting text to mdast nodes - now returns AST nodes directly
|
|
18
|
+
export type TextToMdast = (args: {
|
|
19
|
+
text: string
|
|
20
|
+
}) => RootContent | RootContent[]
|
|
21
|
+
|
|
22
|
+
// Type for converting attribute values
|
|
23
|
+
export type ConvertAttributeValue = (args: {
|
|
24
|
+
name: string
|
|
25
|
+
value: string
|
|
26
|
+
tagName: string
|
|
27
|
+
}) => string
|
|
28
|
+
|
|
29
|
+
// Options for parsing HTML to MDX AST
|
|
30
|
+
export interface ParseHtmlToMdxAstOptions {
|
|
31
|
+
html: string
|
|
32
|
+
onError?: (error: unknown, text: string) => void
|
|
33
|
+
convertTagName?: ConvertTagName
|
|
34
|
+
textToMdast?: TextToMdast
|
|
35
|
+
convertAttributeValue?: ConvertAttributeValue
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Type guard functions for DOM nodes
|
|
39
|
+
function isCommentNode(node: Node): node is Comment {
|
|
40
|
+
return node.nodeType === 8 // Node.COMMENT_NODE
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function isTextNode(node: Node): node is Text {
|
|
44
|
+
return node.nodeType === 3 // Node.TEXT_NODE
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function isElementNode(node: Node): node is Element {
|
|
48
|
+
return node.nodeType === 1 // Node.ELEMENT_NODE
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Default tag name converter (no transformation)
|
|
52
|
+
function defaultConvertTagName({ tagName }: { tagName: string }): string {
|
|
53
|
+
return tagName.toLowerCase()
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Default attribute value converter (no transformation)
|
|
57
|
+
function defaultConvertAttributeValue({
|
|
58
|
+
value,
|
|
59
|
+
}: {
|
|
60
|
+
name: string
|
|
61
|
+
value: string
|
|
62
|
+
tagName: string
|
|
63
|
+
}): string {
|
|
64
|
+
return value
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Convert HTML attribute to MDX JSX attribute
|
|
68
|
+
function convertAttribute(
|
|
69
|
+
attr: Attr,
|
|
70
|
+
tagName: string,
|
|
71
|
+
options?: ParseHtmlToMdxAstOptions,
|
|
72
|
+
): MdxJsxAttribute {
|
|
73
|
+
let jsxName = convertAttributeNameToJSX(attr.name)
|
|
74
|
+
|
|
75
|
+
// Apply attribute value transformation
|
|
76
|
+
const convertAttrValue =
|
|
77
|
+
options?.convertAttributeValue || defaultConvertAttributeValue
|
|
78
|
+
let value = convertAttrValue({
|
|
79
|
+
name: attr.name,
|
|
80
|
+
value: attr.value,
|
|
81
|
+
tagName,
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
// Handle boolean attributes
|
|
85
|
+
if (value === '' || value === attr.name) {
|
|
86
|
+
return {
|
|
87
|
+
type: 'mdxJsxAttribute',
|
|
88
|
+
name: jsxName,
|
|
89
|
+
value: null, // boolean true
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Handle special number attributes
|
|
94
|
+
const numberAttrs = [
|
|
95
|
+
'tabIndex',
|
|
96
|
+
'cols',
|
|
97
|
+
'rows',
|
|
98
|
+
'size',
|
|
99
|
+
'span',
|
|
100
|
+
'colSpan',
|
|
101
|
+
'rowSpan',
|
|
102
|
+
'border',
|
|
103
|
+
]
|
|
104
|
+
if (numberAttrs.includes(jsxName) && value && !isNaN(Number(value))) {
|
|
105
|
+
return {
|
|
106
|
+
type: 'mdxJsxAttribute',
|
|
107
|
+
name: jsxName,
|
|
108
|
+
value: {
|
|
109
|
+
type: 'mdxJsxAttributeValueExpression',
|
|
110
|
+
value: value,
|
|
111
|
+
data: {
|
|
112
|
+
estree: {
|
|
113
|
+
type: 'Program',
|
|
114
|
+
sourceType: 'module',
|
|
115
|
+
body: [
|
|
116
|
+
{
|
|
117
|
+
type: 'ExpressionStatement',
|
|
118
|
+
expression: {
|
|
119
|
+
type: 'Literal',
|
|
120
|
+
value: Number(value),
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
],
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
} satisfies MdxJsxAttributeValueExpression,
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Handle style attribute - for now keep as string
|
|
131
|
+
// if (jsxName === 'style' && value.includes(':')) {
|
|
132
|
+
// // Could enhance to parse CSS to object
|
|
133
|
+
// return {
|
|
134
|
+
// type: 'mdxJsxAttribute',
|
|
135
|
+
// name: jsxName,
|
|
136
|
+
// value: {
|
|
137
|
+
// type: 'mdxJsxAttributeValueExpression',
|
|
138
|
+
// value: `{${JSON.stringify(parseStyleString(value))}}`,
|
|
139
|
+
// data: {
|
|
140
|
+
// estree: parseExpression(JSON.stringify(parseStyleString(value))),
|
|
141
|
+
// },
|
|
142
|
+
// },
|
|
143
|
+
// }
|
|
144
|
+
// }
|
|
145
|
+
|
|
146
|
+
// String value
|
|
147
|
+
return {
|
|
148
|
+
type: 'mdxJsxAttribute',
|
|
149
|
+
name: jsxName,
|
|
150
|
+
value: value,
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Convert DOM node to MDX AST nodes
|
|
155
|
+
function htmlNodeToMdxAst(
|
|
156
|
+
node: Node,
|
|
157
|
+
options?: ParseHtmlToMdxAstOptions,
|
|
158
|
+
): RootContent | RootContent[] {
|
|
159
|
+
if (isCommentNode(node)) {
|
|
160
|
+
// Convert comments to MDX JSX expression with comment
|
|
161
|
+
// For now, return as HTML node
|
|
162
|
+
// return {
|
|
163
|
+
// type: 'html',
|
|
164
|
+
// value: `<!-- ${node.data} -->`
|
|
165
|
+
// } as Html
|
|
166
|
+
return []
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (isTextNode(node)) {
|
|
170
|
+
const textValue = node.textContent || ''
|
|
171
|
+
|
|
172
|
+
// If we have a textToMdast converter, use it
|
|
173
|
+
if (options?.textToMdast) {
|
|
174
|
+
try {
|
|
175
|
+
const result = options.textToMdast({ text: textValue })
|
|
176
|
+
return result
|
|
177
|
+
} catch (error) {
|
|
178
|
+
// Call onError callback if provided, otherwise log
|
|
179
|
+
if (options.onError) {
|
|
180
|
+
options.onError(error, textValue)
|
|
181
|
+
} else {
|
|
182
|
+
console.error('Failed to convert text to mdast:', error)
|
|
183
|
+
console.error('Text content:', textValue)
|
|
184
|
+
}
|
|
185
|
+
// Fallback to simple text node
|
|
186
|
+
return {
|
|
187
|
+
type: 'text',
|
|
188
|
+
value: textValue,
|
|
189
|
+
} satisfies MdastText
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Default: return simple text node
|
|
194
|
+
return {
|
|
195
|
+
type: 'text',
|
|
196
|
+
value: textValue,
|
|
197
|
+
} satisfies MdastText
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (!isElementNode(node)) {
|
|
201
|
+
return []
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const convertTagNameFn = options?.convertTagName || defaultConvertTagName
|
|
205
|
+
// Use localName which is always lowercase in both browser and linkedom
|
|
206
|
+
const componentName = convertTagNameFn({ tagName: node.localName })
|
|
207
|
+
|
|
208
|
+
// If convertTagName returns empty string, skip this element and only return its children
|
|
209
|
+
if (componentName === '') {
|
|
210
|
+
// Process children but skip the element wrapper
|
|
211
|
+
const children: RootContent[] = []
|
|
212
|
+
for (const child of Array.from(node.childNodes)) {
|
|
213
|
+
const result = htmlNodeToMdxAst(child, options)
|
|
214
|
+
if (Array.isArray(result)) {
|
|
215
|
+
children.push(...result)
|
|
216
|
+
} else {
|
|
217
|
+
children.push(result)
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
return children
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Convert attributes
|
|
224
|
+
const attributes: MdxJsxAttribute[] = []
|
|
225
|
+
for (const attr of Array.from(node.attributes)) {
|
|
226
|
+
attributes.push(convertAttribute(attr, node.tagName, options))
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Process children
|
|
230
|
+
const children: RootContent[] = []
|
|
231
|
+
for (const child of Array.from(node.childNodes)) {
|
|
232
|
+
const result = htmlNodeToMdxAst(child, options)
|
|
233
|
+
if (Array.isArray(result)) {
|
|
234
|
+
children.push(...result)
|
|
235
|
+
} else {
|
|
236
|
+
children.push(result)
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Always create MdxJsxTextElement initially
|
|
241
|
+
// The conversion to MdxJsxFlowElement will be handled by a separate plugin
|
|
242
|
+
const element: MdxJsxTextElement = {
|
|
243
|
+
type: 'mdxJsxTextElement',
|
|
244
|
+
name: componentName,
|
|
245
|
+
attributes,
|
|
246
|
+
children: children as any,
|
|
247
|
+
}
|
|
248
|
+
return element
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Main function to parse HTML and return MDX AST
|
|
252
|
+
export function htmlToMdxAst(
|
|
253
|
+
options: ParseHtmlToMdxAstOptions,
|
|
254
|
+
): RootContent | RootContent[] {
|
|
255
|
+
// Parse HTML with linkedom
|
|
256
|
+
const { document } = parseHTML(options.html.trim())
|
|
257
|
+
|
|
258
|
+
// linkedom behavior:
|
|
259
|
+
// - If input is a fragment (like "<div>Hello</div>"), the content becomes direct children of document
|
|
260
|
+
// - If input has body tag, it creates proper body element
|
|
261
|
+
// - We need to handle both cases
|
|
262
|
+
|
|
263
|
+
// linkedom behavior:
|
|
264
|
+
// - When parsing fragments, content becomes direct children of document
|
|
265
|
+
// - Accessing document.body on fragments auto-creates HEAD and BODY as children
|
|
266
|
+
// - We must avoid accessing document.body to prevent this
|
|
267
|
+
|
|
268
|
+
// Just use document's direct children and filter for relevant nodes
|
|
269
|
+
const childNodes = Array.from(document.childNodes).filter(
|
|
270
|
+
(node) =>
|
|
271
|
+
node.nodeType === 1 || // Element nodes
|
|
272
|
+
node.nodeType === 3 || // Text nodes
|
|
273
|
+
node.nodeType === 8, // Comment nodes
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
if (childNodes.length === 0) {
|
|
277
|
+
return []
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (childNodes.length === 1) {
|
|
281
|
+
return htmlNodeToMdxAst(childNodes[0]!, options)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Multiple nodes - return as array
|
|
285
|
+
const results: RootContent[] = []
|
|
286
|
+
for (const node of childNodes) {
|
|
287
|
+
const result = htmlNodeToMdxAst(node, options)
|
|
288
|
+
if (Array.isArray(result)) {
|
|
289
|
+
results.push(...result)
|
|
290
|
+
} else {
|
|
291
|
+
results.push(result)
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return results
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Export a wrapper that always returns an array for consistency
|
|
299
|
+
export function parseHtmlToMdxAst(
|
|
300
|
+
options: ParseHtmlToMdxAstOptions,
|
|
301
|
+
): RootContent[] {
|
|
302
|
+
const result = htmlToMdxAst(options)
|
|
303
|
+
return Array.isArray(result) ? result : [result]
|
|
304
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import type { Root, RootContent, PhrasingContent } from 'mdast'
|
|
2
|
+
import type { MdxJsxTextElement, MdxJsxFlowElement } from 'mdast-util-mdx-jsx'
|
|
3
|
+
import { visitParents } from 'unist-util-visit-parents'
|
|
4
|
+
import type { Node, Parent } from 'unist'
|
|
5
|
+
|
|
6
|
+
// Type definitions for MDX and MDAST content types
|
|
7
|
+
// type FlowContent = Blockquote | Code | Heading | Html | List | ThematicBreak | Content
|
|
8
|
+
// type PhrasingContent = Break | Emphasis | Html | Image | ImageReference | InlineCode | Link | LinkReference | Strong | Text
|
|
9
|
+
// type MdxJsxFlowContent = MdxJsxFlowElement | FlowContent
|
|
10
|
+
// type MdxJsxPhrasingContent = MdxJsxTextElement | PhrasingContent
|
|
11
|
+
|
|
12
|
+
/** Parents that require phrasing/inline children */
|
|
13
|
+
const PHRASE_CONTAINERS = new Set([
|
|
14
|
+
'paragraph',
|
|
15
|
+
'heading',
|
|
16
|
+
'emphasis',
|
|
17
|
+
'strong',
|
|
18
|
+
'delete',
|
|
19
|
+
'link',
|
|
20
|
+
'linkReference',
|
|
21
|
+
'tableCell',
|
|
22
|
+
'mdxJsxTextElement', // MDX JSX text elements should contain phrasing
|
|
23
|
+
])
|
|
24
|
+
|
|
25
|
+
/** Parents that accept/expect flow (block) content */
|
|
26
|
+
const FLOW_CONTAINERS = new Set([
|
|
27
|
+
'root',
|
|
28
|
+
'listItem',
|
|
29
|
+
'blockquote',
|
|
30
|
+
'footnoteDefinition',
|
|
31
|
+
'mdxJsxFlowElement', // MDX JSX flow elements should contain flow
|
|
32
|
+
])
|
|
33
|
+
|
|
34
|
+
/** Check if a node represents phrasing content */
|
|
35
|
+
function isPhrasing(node: Node): boolean {
|
|
36
|
+
const phrasingTypes = new Set([
|
|
37
|
+
'text',
|
|
38
|
+
'emphasis',
|
|
39
|
+
'strong',
|
|
40
|
+
'delete',
|
|
41
|
+
'html',
|
|
42
|
+
'image',
|
|
43
|
+
'imageReference',
|
|
44
|
+
'inlineCode',
|
|
45
|
+
'link',
|
|
46
|
+
'linkReference',
|
|
47
|
+
'break',
|
|
48
|
+
'mdxJsxTextElement',
|
|
49
|
+
])
|
|
50
|
+
return phrasingTypes.has(node.type)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Tags that are typically block-level elements */
|
|
54
|
+
const blockLevelTags = new Set([
|
|
55
|
+
'div',
|
|
56
|
+
'p',
|
|
57
|
+
'blockquote',
|
|
58
|
+
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
59
|
+
'ul', 'ol', 'li',
|
|
60
|
+
'pre',
|
|
61
|
+
'hr',
|
|
62
|
+
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td',
|
|
63
|
+
'section', 'article', 'aside', 'nav', 'header', 'footer', 'main',
|
|
64
|
+
'figure', 'figcaption',
|
|
65
|
+
// Notion-specific block elements
|
|
66
|
+
'callout',
|
|
67
|
+
'columns', 'column',
|
|
68
|
+
'page',
|
|
69
|
+
'database',
|
|
70
|
+
'data-source',
|
|
71
|
+
'audio', 'video', 'file', 'pdf', 'embed',
|
|
72
|
+
'synced_block', 'synced_block_reference',
|
|
73
|
+
'meeting-notes', 'summary', 'notes', 'transcript',
|
|
74
|
+
'table_of_contents',
|
|
75
|
+
'unknown',
|
|
76
|
+
'image', // Images can be block-level in Notion
|
|
77
|
+
])
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* remark plugin: make mdxJsx* element kinds match their context.
|
|
81
|
+
* - Inside phrasing parents → mdxJsxTextElement
|
|
82
|
+
* - Inside flow parents → mdxJsxFlowElement
|
|
83
|
+
* - Elements with block-level tag names → mdxJsxFlowElement
|
|
84
|
+
* - Elements containing non-phrasing children → mdxJsxFlowElement
|
|
85
|
+
*/
|
|
86
|
+
export default function remarkMdxJsxNormalize() {
|
|
87
|
+
return function transform(tree: Root) {
|
|
88
|
+
visitParents(tree, isMdxJsx, (node, ancestors) => {
|
|
89
|
+
const element = node as MdxJsxTextElement | MdxJsxFlowElement
|
|
90
|
+
const parent = ancestors[ancestors.length - 1] as Parent | undefined
|
|
91
|
+
if (!parent) return
|
|
92
|
+
|
|
93
|
+
const parentType = parent.type
|
|
94
|
+
const parentExpectsPhrasing = PHRASE_CONTAINERS.has(parentType)
|
|
95
|
+
const parentExpectsFlow = FLOW_CONTAINERS.has(parentType)
|
|
96
|
+
|
|
97
|
+
// Check element properties
|
|
98
|
+
const hasBlockTag = element.name ? blockLevelTags.has(element.name.toLowerCase()) : false
|
|
99
|
+
const children = (element.children || []) as RootContent[]
|
|
100
|
+
const containsNonPhrasing = children.some((c) => !isPhrasing(c))
|
|
101
|
+
|
|
102
|
+
// Determine desired type
|
|
103
|
+
let desired: 'mdxJsxTextElement' | 'mdxJsxFlowElement' = element.type
|
|
104
|
+
|
|
105
|
+
// Priority rules:
|
|
106
|
+
// 1. If it has a block-level tag name, it should be flow
|
|
107
|
+
// 2. If it contains non-phrasing children, it should be flow
|
|
108
|
+
// 3. Otherwise, match parent context
|
|
109
|
+
if (hasBlockTag || containsNonPhrasing) {
|
|
110
|
+
desired = 'mdxJsxFlowElement'
|
|
111
|
+
} else if (parentExpectsPhrasing) {
|
|
112
|
+
desired = 'mdxJsxTextElement'
|
|
113
|
+
} else if (parentExpectsFlow) {
|
|
114
|
+
desired = 'mdxJsxFlowElement'
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Apply the change if needed
|
|
118
|
+
if (element.type !== desired) {
|
|
119
|
+
element.type = desired
|
|
120
|
+
}
|
|
121
|
+
})
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Check if a node is an MDX JSX element */
|
|
126
|
+
function isMdxJsx(node: Node): boolean {
|
|
127
|
+
return node.type === 'mdxJsxTextElement' || node.type === 'mdxJsxFlowElement'
|
|
128
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// List of valid HTML elements that should be preserved
|
|
2
|
+
// All other elements will be filtered out (return empty string)
|
|
3
|
+
export const validHtmlElements = new Set([
|
|
4
|
+
// Document metadata
|
|
5
|
+
'base', 'head', 'link', 'meta', 'style', 'title',
|
|
6
|
+
|
|
7
|
+
// Content sectioning
|
|
8
|
+
'address', 'article', 'aside', 'footer', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
9
|
+
'main', 'nav', 'section',
|
|
10
|
+
|
|
11
|
+
// Text content
|
|
12
|
+
'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul',
|
|
13
|
+
|
|
14
|
+
// Inline text semantics
|
|
15
|
+
'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em', 'i', 'kbd',
|
|
16
|
+
'mark', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'small', 'span', 'strong', 'sub', 'sup',
|
|
17
|
+
'time', 'u', 'var', 'wbr',
|
|
18
|
+
|
|
19
|
+
// Image and multimedia
|
|
20
|
+
'area', 'audio', 'img', 'map', 'track', 'video',
|
|
21
|
+
|
|
22
|
+
// Embedded content
|
|
23
|
+
'embed', 'iframe', 'object', 'param', 'picture', 'portal', 'source',
|
|
24
|
+
|
|
25
|
+
// SVG and MathML
|
|
26
|
+
'svg', 'math', 'path', // Added 'path' from nativeTags
|
|
27
|
+
|
|
28
|
+
// Scripting
|
|
29
|
+
'canvas', 'noscript', 'script',
|
|
30
|
+
|
|
31
|
+
// Demarcating edits
|
|
32
|
+
'del', 'ins',
|
|
33
|
+
|
|
34
|
+
// Table content
|
|
35
|
+
'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
|
|
36
|
+
|
|
37
|
+
// Forms
|
|
38
|
+
'button', 'datalist', 'fieldset', 'form', 'input', 'label', 'legend', 'meter', 'optgroup',
|
|
39
|
+
'option', 'output', 'progress', 'select', 'textarea',
|
|
40
|
+
|
|
41
|
+
// Interactive elements
|
|
42
|
+
'details', 'dialog', 'menu', 'summary',
|
|
43
|
+
|
|
44
|
+
// Web Components
|
|
45
|
+
'slot', 'template',
|
|
46
|
+
])
|
|
47
|
+
|
|
48
|
+
// Export as an array for backward compatibility with nativeTags
|
|
49
|
+
export const nativeTags = Array.from(validHtmlElements) as readonly string[]
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Convert HTML tag name to JSX component name
|
|
53
|
+
* Returns empty string if the tag is not a valid HTML element
|
|
54
|
+
*/
|
|
55
|
+
export function htmlTagNameConverter({ tagName }: { tagName: string }): string {
|
|
56
|
+
const lowerTag = tagName.toLowerCase()
|
|
57
|
+
|
|
58
|
+
// Check if it's a valid HTML element
|
|
59
|
+
if (validHtmlElements.has(lowerTag)) {
|
|
60
|
+
return lowerTag
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Return empty string for non-HTML elements
|
|
64
|
+
return ''
|
|
65
|
+
}
|
package/src/parse.ts
CHANGED
|
@@ -5,6 +5,9 @@ import { Root, RootContent } from 'mdast'
|
|
|
5
5
|
import { remark } from 'remark'
|
|
6
6
|
import remarkGfm from 'remark-gfm'
|
|
7
7
|
import remarkMdx from 'remark-mdx'
|
|
8
|
+
import { parseHtmlToMdxAst } from './html/html-to-mdx-ast.js'
|
|
9
|
+
|
|
10
|
+
export { parseHtmlToMdxAst }
|
|
8
11
|
|
|
9
12
|
export function mdxParse(code: string) {
|
|
10
13
|
const file = mdxProcessor.processSync(code)
|