safe-mdx 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +14 -14
  2. package/dist/assets/HtmlToJsxConverter-Ds0bTjpw.js +24 -0
  3. package/dist/assets/_commonjsHelpers-CqkleIqs.js +1 -0
  4. package/dist/assets/index-B5fPOjPt.css +1 -0
  5. package/dist/assets/index-B7ATSoRE.js +9 -0
  6. package/dist/assets/index-BwZ2FTRd.js +146 -0
  7. package/dist/assets/index-R1UqLMGJ.js +1 -0
  8. package/dist/assets/index-c0qeY2gs.js +9 -0
  9. package/dist/assets/jsx-runtime-BhZZLbvw.js +9 -0
  10. package/dist/assets/jsx-runtime-NArryeSM.js +1 -0
  11. package/dist/assets/react-Ca6JzGpx.js +1 -0
  12. package/dist/assets/react-dom-BYRHYqYl.js +1 -0
  13. package/dist/html/attributes.d.ts +19 -0
  14. package/dist/html/attributes.d.ts.map +1 -0
  15. package/dist/html/attributes.js +289 -0
  16. package/dist/html/attributes.js.map +1 -0
  17. package/dist/html/convert-attributes.d.ts +6 -0
  18. package/dist/html/convert-attributes.d.ts.map +1 -0
  19. package/dist/html/convert-attributes.js +43 -0
  20. package/dist/html/convert-attributes.js.map +1 -0
  21. package/dist/html/domparser-browser.d.ts +4 -0
  22. package/dist/html/domparser-browser.d.ts.map +1 -0
  23. package/dist/html/domparser-browser.js +7 -0
  24. package/dist/html/domparser-browser.js.map +1 -0
  25. package/dist/html/domparser.d.ts +2 -0
  26. package/dist/html/domparser.d.ts.map +1 -0
  27. package/dist/html/domparser.js +5 -0
  28. package/dist/html/domparser.js.map +1 -0
  29. package/dist/html/html-to-mdx-ast.d.ts +23 -0
  30. package/dist/html/html-to-mdx-ast.d.ts.map +1 -0
  31. package/dist/html/html-to-mdx-ast.js +227 -0
  32. package/dist/html/html-to-mdx-ast.js.map +1 -0
  33. package/dist/html/html-to-mdx-ast.test.d.ts +2 -0
  34. package/dist/html/html-to-mdx-ast.test.d.ts.map +1 -0
  35. package/dist/html/html-to-mdx-ast.test.js +324 -0
  36. package/dist/html/html-to-mdx-ast.test.js.map +1 -0
  37. package/dist/html/remark-mdx-jsx-normalize.d.ts +10 -0
  38. package/dist/html/remark-mdx-jsx-normalize.d.ts.map +1 -0
  39. package/dist/html/remark-mdx-jsx-normalize.js +117 -0
  40. package/dist/html/remark-mdx-jsx-normalize.js.map +1 -0
  41. package/dist/html/valid-html-elements.d.ts +10 -0
  42. package/dist/html/valid-html-elements.d.ts.map +1 -0
  43. package/dist/html/valid-html-elements.js +50 -0
  44. package/dist/html/valid-html-elements.js.map +1 -0
  45. package/dist/index.html +19 -0
  46. package/dist/parse.d.ts +2 -0
  47. package/dist/parse.d.ts.map +1 -1
  48. package/dist/parse.js +2 -0
  49. package/dist/parse.js.map +1 -1
  50. package/dist/safe-mdx.d.ts +1 -1
  51. package/dist/safe-mdx.d.ts.map +1 -1
  52. package/dist/safe-mdx.js +23 -71
  53. package/dist/safe-mdx.js.map +1 -1
  54. package/dist/safe-mdx.test.js +161 -8
  55. package/dist/safe-mdx.test.js.map +1 -1
  56. package/package.json +27 -6
  57. package/src/html/README +17 -0
  58. package/src/html/attributes.ts +297 -0
  59. package/src/html/convert-attributes.ts +59 -0
  60. package/src/html/domparser-browser.ts +6 -0
  61. package/src/html/domparser.ts +5 -0
  62. package/src/html/html-to-mdx-ast.test.ts +365 -0
  63. package/src/html/html-to-mdx-ast.ts +304 -0
  64. package/src/html/remark-mdx-jsx-normalize.ts +128 -0
  65. package/src/html/valid-html-elements.ts +65 -0
  66. package/src/parse.ts +3 -0
  67. package/src/safe-mdx.test.tsx +178 -12
  68. package/src/safe-mdx.tsx +23 -79
  69. package/dist/HtmlToJsxConverter.d.ts +0 -10
  70. package/dist/HtmlToJsxConverter.d.ts.map +0 -1
  71. package/dist/HtmlToJsxConverter.js +0 -22
  72. package/dist/HtmlToJsxConverter.js.map +0 -1
  73. package/dist/plugins.d.ts +0 -12
  74. package/dist/plugins.d.ts.map +0 -1
  75. package/dist/plugins.js +0 -68
  76. package/dist/plugins.js.map +0 -1
  77. package/src/HtmlToJsxConverter.tsx +0 -37
@@ -0,0 +1,304 @@
1
+ import type { Root, RootContent, Text as MdastText } from 'mdast'
2
+ import type {
3
+ MdxJsxAttribute,
4
+ MdxJsxAttributeValueExpression,
5
+ MdxJsxTextElement,
6
+ } from 'mdast-util-mdx-jsx'
7
+ import type { Processor } from 'unified'
8
+ import { convertAttributeNameToJSX } from './convert-attributes.js'
9
+ import { parseHTML } from './domparser.js'
10
+
11
+ // Re-export the normalize plugin
12
+ export { default as remarkMdxJsxNormalize } from './remark-mdx-jsx-normalize.js'
13
+
14
+ // Type for converting tag names
15
+ export type ConvertTagName = (args: { tagName: string }) => string
16
+
17
+ // Type for converting text to mdast nodes - now returns AST nodes directly
18
+ export type TextToMdast = (args: {
19
+ text: string
20
+ }) => RootContent | RootContent[]
21
+
22
+ // Type for converting attribute values
23
+ export type ConvertAttributeValue = (args: {
24
+ name: string
25
+ value: string
26
+ tagName: string
27
+ }) => string
28
+
29
+ // Options for parsing HTML to MDX AST
30
+ export interface ParseHtmlToMdxAstOptions {
31
+ html: string
32
+ onError?: (error: unknown, text: string) => void
33
+ convertTagName?: ConvertTagName
34
+ textToMdast?: TextToMdast
35
+ convertAttributeValue?: ConvertAttributeValue
36
+ }
37
+
38
+ // Type guard functions for DOM nodes
39
+ function isCommentNode(node: Node): node is Comment {
40
+ return node.nodeType === 8 // Node.COMMENT_NODE
41
+ }
42
+
43
+ function isTextNode(node: Node): node is Text {
44
+ return node.nodeType === 3 // Node.TEXT_NODE
45
+ }
46
+
47
+ function isElementNode(node: Node): node is Element {
48
+ return node.nodeType === 1 // Node.ELEMENT_NODE
49
+ }
50
+
51
+ // Default tag name converter (no transformation)
52
+ function defaultConvertTagName({ tagName }: { tagName: string }): string {
53
+ return tagName.toLowerCase()
54
+ }
55
+
56
+ // Default attribute value converter (no transformation)
57
+ function defaultConvertAttributeValue({
58
+ value,
59
+ }: {
60
+ name: string
61
+ value: string
62
+ tagName: string
63
+ }): string {
64
+ return value
65
+ }
66
+
67
+ // Convert HTML attribute to MDX JSX attribute
68
+ function convertAttribute(
69
+ attr: Attr,
70
+ tagName: string,
71
+ options?: ParseHtmlToMdxAstOptions,
72
+ ): MdxJsxAttribute {
73
+ let jsxName = convertAttributeNameToJSX(attr.name)
74
+
75
+ // Apply attribute value transformation
76
+ const convertAttrValue =
77
+ options?.convertAttributeValue || defaultConvertAttributeValue
78
+ let value = convertAttrValue({
79
+ name: attr.name,
80
+ value: attr.value,
81
+ tagName,
82
+ })
83
+
84
+ // Handle boolean attributes
85
+ if (value === '' || value === attr.name) {
86
+ return {
87
+ type: 'mdxJsxAttribute',
88
+ name: jsxName,
89
+ value: null, // boolean true
90
+ }
91
+ }
92
+
93
+ // Handle special number attributes
94
+ const numberAttrs = [
95
+ 'tabIndex',
96
+ 'cols',
97
+ 'rows',
98
+ 'size',
99
+ 'span',
100
+ 'colSpan',
101
+ 'rowSpan',
102
+ 'border',
103
+ ]
104
+ if (numberAttrs.includes(jsxName) && value && !isNaN(Number(value))) {
105
+ return {
106
+ type: 'mdxJsxAttribute',
107
+ name: jsxName,
108
+ value: {
109
+ type: 'mdxJsxAttributeValueExpression',
110
+ value: value,
111
+ data: {
112
+ estree: {
113
+ type: 'Program',
114
+ sourceType: 'module',
115
+ body: [
116
+ {
117
+ type: 'ExpressionStatement',
118
+ expression: {
119
+ type: 'Literal',
120
+ value: Number(value),
121
+ },
122
+ },
123
+ ],
124
+ },
125
+ },
126
+ } satisfies MdxJsxAttributeValueExpression,
127
+ }
128
+ }
129
+
130
+ // Handle style attribute - for now keep as string
131
+ // if (jsxName === 'style' && value.includes(':')) {
132
+ // // Could enhance to parse CSS to object
133
+ // return {
134
+ // type: 'mdxJsxAttribute',
135
+ // name: jsxName,
136
+ // value: {
137
+ // type: 'mdxJsxAttributeValueExpression',
138
+ // value: `{${JSON.stringify(parseStyleString(value))}}`,
139
+ // data: {
140
+ // estree: parseExpression(JSON.stringify(parseStyleString(value))),
141
+ // },
142
+ // },
143
+ // }
144
+ // }
145
+
146
+ // String value
147
+ return {
148
+ type: 'mdxJsxAttribute',
149
+ name: jsxName,
150
+ value: value,
151
+ }
152
+ }
153
+
154
+ // Convert DOM node to MDX AST nodes
155
+ function htmlNodeToMdxAst(
156
+ node: Node,
157
+ options?: ParseHtmlToMdxAstOptions,
158
+ ): RootContent | RootContent[] {
159
+ if (isCommentNode(node)) {
160
+ // Convert comments to MDX JSX expression with comment
161
+ // For now, return as HTML node
162
+ // return {
163
+ // type: 'html',
164
+ // value: `<!-- ${node.data} -->`
165
+ // } as Html
166
+ return []
167
+ }
168
+
169
+ if (isTextNode(node)) {
170
+ const textValue = node.textContent || ''
171
+
172
+ // If we have a textToMdast converter, use it
173
+ if (options?.textToMdast) {
174
+ try {
175
+ const result = options.textToMdast({ text: textValue })
176
+ return result
177
+ } catch (error) {
178
+ // Call onError callback if provided, otherwise log
179
+ if (options.onError) {
180
+ options.onError(error, textValue)
181
+ } else {
182
+ console.error('Failed to convert text to mdast:', error)
183
+ console.error('Text content:', textValue)
184
+ }
185
+ // Fallback to simple text node
186
+ return {
187
+ type: 'text',
188
+ value: textValue,
189
+ } satisfies MdastText
190
+ }
191
+ }
192
+
193
+ // Default: return simple text node
194
+ return {
195
+ type: 'text',
196
+ value: textValue,
197
+ } satisfies MdastText
198
+ }
199
+
200
+ if (!isElementNode(node)) {
201
+ return []
202
+ }
203
+
204
+ const convertTagNameFn = options?.convertTagName || defaultConvertTagName
205
+ // Use localName which is always lowercase in both browser and linkedom
206
+ const componentName = convertTagNameFn({ tagName: node.localName })
207
+
208
+ // If convertTagName returns empty string, skip this element and only return its children
209
+ if (componentName === '') {
210
+ // Process children but skip the element wrapper
211
+ const children: RootContent[] = []
212
+ for (const child of Array.from(node.childNodes)) {
213
+ const result = htmlNodeToMdxAst(child, options)
214
+ if (Array.isArray(result)) {
215
+ children.push(...result)
216
+ } else {
217
+ children.push(result)
218
+ }
219
+ }
220
+ return children
221
+ }
222
+
223
+ // Convert attributes
224
+ const attributes: MdxJsxAttribute[] = []
225
+ for (const attr of Array.from(node.attributes)) {
226
+ attributes.push(convertAttribute(attr, node.tagName, options))
227
+ }
228
+
229
+ // Process children
230
+ const children: RootContent[] = []
231
+ for (const child of Array.from(node.childNodes)) {
232
+ const result = htmlNodeToMdxAst(child, options)
233
+ if (Array.isArray(result)) {
234
+ children.push(...result)
235
+ } else {
236
+ children.push(result)
237
+ }
238
+ }
239
+
240
+ // Always create MdxJsxTextElement initially
241
+ // The conversion to MdxJsxFlowElement will be handled by a separate plugin
242
+ const element: MdxJsxTextElement = {
243
+ type: 'mdxJsxTextElement',
244
+ name: componentName,
245
+ attributes,
246
+ children: children as any,
247
+ }
248
+ return element
249
+ }
250
+
251
+ // Main function to parse HTML and return MDX AST
252
+ export function htmlToMdxAst(
253
+ options: ParseHtmlToMdxAstOptions,
254
+ ): RootContent | RootContent[] {
255
+ // Parse HTML with linkedom
256
+ const { document } = parseHTML(options.html.trim())
257
+
258
+ // linkedom behavior:
259
+ // - If input is a fragment (like "<div>Hello</div>"), the content becomes direct children of document
260
+ // - If input has body tag, it creates proper body element
261
+ // - We need to handle both cases
262
+
263
+ // linkedom behavior:
264
+ // - When parsing fragments, content becomes direct children of document
265
+ // - Accessing document.body on fragments auto-creates HEAD and BODY as children
266
+ // - We must avoid accessing document.body to prevent this
267
+
268
+ // Just use document's direct children and filter for relevant nodes
269
+ const childNodes = Array.from(document.childNodes).filter(
270
+ (node) =>
271
+ node.nodeType === 1 || // Element nodes
272
+ node.nodeType === 3 || // Text nodes
273
+ node.nodeType === 8, // Comment nodes
274
+ )
275
+
276
+ if (childNodes.length === 0) {
277
+ return []
278
+ }
279
+
280
+ if (childNodes.length === 1) {
281
+ return htmlNodeToMdxAst(childNodes[0]!, options)
282
+ }
283
+
284
+ // Multiple nodes - return as array
285
+ const results: RootContent[] = []
286
+ for (const node of childNodes) {
287
+ const result = htmlNodeToMdxAst(node, options)
288
+ if (Array.isArray(result)) {
289
+ results.push(...result)
290
+ } else {
291
+ results.push(result)
292
+ }
293
+ }
294
+
295
+ return results
296
+ }
297
+
298
+ // Export a wrapper that always returns an array for consistency
299
+ export function parseHtmlToMdxAst(
300
+ options: ParseHtmlToMdxAstOptions,
301
+ ): RootContent[] {
302
+ const result = htmlToMdxAst(options)
303
+ return Array.isArray(result) ? result : [result]
304
+ }
@@ -0,0 +1,128 @@
1
+ import type { Root, RootContent, PhrasingContent } from 'mdast'
2
+ import type { MdxJsxTextElement, MdxJsxFlowElement } from 'mdast-util-mdx-jsx'
3
+ import { visitParents } from 'unist-util-visit-parents'
4
+ import type { Node, Parent } from 'unist'
5
+
6
+ // Type definitions for MDX and MDAST content types
7
+ // type FlowContent = Blockquote | Code | Heading | Html | List | ThematicBreak | Content
8
+ // type PhrasingContent = Break | Emphasis | Html | Image | ImageReference | InlineCode | Link | LinkReference | Strong | Text
9
+ // type MdxJsxFlowContent = MdxJsxFlowElement | FlowContent
10
+ // type MdxJsxPhrasingContent = MdxJsxTextElement | PhrasingContent
11
+
12
+ /** Parents that require phrasing/inline children */
13
+ const PHRASE_CONTAINERS = new Set([
14
+ 'paragraph',
15
+ 'heading',
16
+ 'emphasis',
17
+ 'strong',
18
+ 'delete',
19
+ 'link',
20
+ 'linkReference',
21
+ 'tableCell',
22
+ 'mdxJsxTextElement', // MDX JSX text elements should contain phrasing
23
+ ])
24
+
25
+ /** Parents that accept/expect flow (block) content */
26
+ const FLOW_CONTAINERS = new Set([
27
+ 'root',
28
+ 'listItem',
29
+ 'blockquote',
30
+ 'footnoteDefinition',
31
+ 'mdxJsxFlowElement', // MDX JSX flow elements should contain flow
32
+ ])
33
+
34
+ /** Check if a node represents phrasing content */
35
+ function isPhrasing(node: Node): boolean {
36
+ const phrasingTypes = new Set([
37
+ 'text',
38
+ 'emphasis',
39
+ 'strong',
40
+ 'delete',
41
+ 'html',
42
+ 'image',
43
+ 'imageReference',
44
+ 'inlineCode',
45
+ 'link',
46
+ 'linkReference',
47
+ 'break',
48
+ 'mdxJsxTextElement',
49
+ ])
50
+ return phrasingTypes.has(node.type)
51
+ }
52
+
53
+ /** Tags that are typically block-level elements */
54
+ const blockLevelTags = new Set([
55
+ 'div',
56
+ 'p',
57
+ 'blockquote',
58
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
59
+ 'ul', 'ol', 'li',
60
+ 'pre',
61
+ 'hr',
62
+ 'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td',
63
+ 'section', 'article', 'aside', 'nav', 'header', 'footer', 'main',
64
+ 'figure', 'figcaption',
65
+ // Notion-specific block elements
66
+ 'callout',
67
+ 'columns', 'column',
68
+ 'page',
69
+ 'database',
70
+ 'data-source',
71
+ 'audio', 'video', 'file', 'pdf', 'embed',
72
+ 'synced_block', 'synced_block_reference',
73
+ 'meeting-notes', 'summary', 'notes', 'transcript',
74
+ 'table_of_contents',
75
+ 'unknown',
76
+ 'image', // Images can be block-level in Notion
77
+ ])
78
+
79
+ /**
80
+ * remark plugin: make mdxJsx* element kinds match their context.
81
+ * - Inside phrasing parents → mdxJsxTextElement
82
+ * - Inside flow parents → mdxJsxFlowElement
83
+ * - Elements with block-level tag names → mdxJsxFlowElement
84
+ * - Elements containing non-phrasing children → mdxJsxFlowElement
85
+ */
86
+ export default function remarkMdxJsxNormalize() {
87
+ return function transform(tree: Root) {
88
+ visitParents(tree, isMdxJsx, (node, ancestors) => {
89
+ const element = node as MdxJsxTextElement | MdxJsxFlowElement
90
+ const parent = ancestors[ancestors.length - 1] as Parent | undefined
91
+ if (!parent) return
92
+
93
+ const parentType = parent.type
94
+ const parentExpectsPhrasing = PHRASE_CONTAINERS.has(parentType)
95
+ const parentExpectsFlow = FLOW_CONTAINERS.has(parentType)
96
+
97
+ // Check element properties
98
+ const hasBlockTag = element.name ? blockLevelTags.has(element.name.toLowerCase()) : false
99
+ const children = (element.children || []) as RootContent[]
100
+ const containsNonPhrasing = children.some((c) => !isPhrasing(c))
101
+
102
+ // Determine desired type
103
+ let desired: 'mdxJsxTextElement' | 'mdxJsxFlowElement' = element.type
104
+
105
+ // Priority rules:
106
+ // 1. If it has a block-level tag name, it should be flow
107
+ // 2. If it contains non-phrasing children, it should be flow
108
+ // 3. Otherwise, match parent context
109
+ if (hasBlockTag || containsNonPhrasing) {
110
+ desired = 'mdxJsxFlowElement'
111
+ } else if (parentExpectsPhrasing) {
112
+ desired = 'mdxJsxTextElement'
113
+ } else if (parentExpectsFlow) {
114
+ desired = 'mdxJsxFlowElement'
115
+ }
116
+
117
+ // Apply the change if needed
118
+ if (element.type !== desired) {
119
+ element.type = desired
120
+ }
121
+ })
122
+ }
123
+ }
124
+
125
+ /** Check if a node is an MDX JSX element */
126
+ function isMdxJsx(node: Node): boolean {
127
+ return node.type === 'mdxJsxTextElement' || node.type === 'mdxJsxFlowElement'
128
+ }
@@ -0,0 +1,65 @@
1
+ // List of valid HTML elements that should be preserved
2
+ // All other elements will be filtered out (return empty string)
3
+ export const validHtmlElements = new Set([
4
+ // Document metadata
5
+ 'base', 'head', 'link', 'meta', 'style', 'title',
6
+
7
+ // Content sectioning
8
+ 'address', 'article', 'aside', 'footer', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
9
+ 'main', 'nav', 'section',
10
+
11
+ // Text content
12
+ 'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul',
13
+
14
+ // Inline text semantics
15
+ 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em', 'i', 'kbd',
16
+ 'mark', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'small', 'span', 'strong', 'sub', 'sup',
17
+ 'time', 'u', 'var', 'wbr',
18
+
19
+ // Image and multimedia
20
+ 'area', 'audio', 'img', 'map', 'track', 'video',
21
+
22
+ // Embedded content
23
+ 'embed', 'iframe', 'object', 'param', 'picture', 'portal', 'source',
24
+
25
+ // SVG and MathML
26
+ 'svg', 'math', 'path', // Added 'path' from nativeTags
27
+
28
+ // Scripting
29
+ 'canvas', 'noscript', 'script',
30
+
31
+ // Demarcating edits
32
+ 'del', 'ins',
33
+
34
+ // Table content
35
+ 'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
36
+
37
+ // Forms
38
+ 'button', 'datalist', 'fieldset', 'form', 'input', 'label', 'legend', 'meter', 'optgroup',
39
+ 'option', 'output', 'progress', 'select', 'textarea',
40
+
41
+ // Interactive elements
42
+ 'details', 'dialog', 'menu', 'summary',
43
+
44
+ // Web Components
45
+ 'slot', 'template',
46
+ ])
47
+
48
+ // Export as an array for backward compatibility with nativeTags
49
+ export const nativeTags = Array.from(validHtmlElements) as readonly string[]
50
+
51
+ /**
52
+ * Convert HTML tag name to JSX component name
53
+ * Returns empty string if the tag is not a valid HTML element
54
+ */
55
+ export function htmlTagNameConverter({ tagName }: { tagName: string }): string {
56
+ const lowerTag = tagName.toLowerCase()
57
+
58
+ // Check if it's a valid HTML element
59
+ if (validHtmlElements.has(lowerTag)) {
60
+ return lowerTag
61
+ }
62
+
63
+ // Return empty string for non-HTML elements
64
+ return ''
65
+ }
package/src/parse.ts CHANGED
@@ -5,6 +5,9 @@ import { Root, RootContent } from 'mdast'
5
5
  import { remark } from 'remark'
6
6
  import remarkGfm from 'remark-gfm'
7
7
  import remarkMdx from 'remark-mdx'
8
+ import { parseHtmlToMdxAst } from './html/html-to-mdx-ast.js'
9
+
10
+ export { parseHtmlToMdxAst }
8
11
 
9
12
  export function mdxParse(code: string) {
10
13
  const file = mdxProcessor.processSync(code)