safe-mdx 1.3.3 → 1.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,13 @@ import type {
5
5
  MdxJsxTextElement,
6
6
  } from 'mdast-util-mdx-jsx'
7
7
  import type { Processor } from 'unified'
8
+ import { unified } from 'unified'
8
9
  import { convertAttributeNameToJSX } from './convert-attributes.js'
9
10
  import { parseHTML } from './domparser.js'
11
+ import { remarkMdxJsxNormalize } from './remark-mdx-jsx-normalize.js'
10
12
 
11
13
  // Re-export the normalize plugin
12
- export { default as remarkMdxJsxNormalize } from './remark-mdx-jsx-normalize.js'
14
+ export { remarkMdxJsxNormalize }
13
15
 
14
16
  // Type for converting tag names
15
17
  export type ConvertTagName = (args: { tagName: string }) => string
@@ -29,6 +31,7 @@ export type ConvertAttributeValue = (args: {
29
31
  // Options for parsing HTML to MDX AST
30
32
  export interface ParseHtmlToMdxAstOptions {
31
33
  html: string
34
+ parentType?: string
32
35
  onError?: (error: unknown, text: string) => void
33
36
  convertTagName?: ConvertTagName
34
37
  textToMdast?: TextToMdast
@@ -64,6 +67,32 @@ function defaultConvertAttributeValue({
64
67
  return value
65
68
  }
66
69
 
70
+ // Remove common indentation from multi-line text while preserving relative indentation
71
+ function deindent(text: string): string {
72
+ const lines = text.split('\n')
73
+
74
+ // Find minimum indentation (excluding empty lines)
75
+ let minIndent = Infinity
76
+ for (const line of lines) {
77
+ if (line.trim()) {
78
+ const match = line.match(/^(\s*)/)
79
+ if (match) {
80
+ minIndent = Math.min(minIndent, match[1].length)
81
+ }
82
+ }
83
+ }
84
+
85
+ // If no indentation found, return as is
86
+ if (minIndent === 0 || minIndent === Infinity) {
87
+ return text
88
+ }
89
+
90
+ // Remove common indentation from each line, preserving relative indentation
91
+ return lines
92
+ .map(line => line.slice(minIndent))
93
+ .join('\n')
94
+ }
95
+
67
96
  // Convert HTML attribute to MDX JSX attribute
68
97
  function convertAttribute(
69
98
  attr: Attr,
@@ -151,29 +180,51 @@ function convertAttribute(
151
180
  }
152
181
  }
153
182
 
154
- // Convert DOM node to MDX AST nodes
183
+ // Convert DOM node to MDX AST nodes - always returns an array
155
184
  function htmlNodeToMdxAst(
156
185
  node: Node,
157
186
  options?: ParseHtmlToMdxAstOptions,
158
- ): RootContent | RootContent[] {
187
+ ): RootContent[] {
159
188
  if (isCommentNode(node)) {
160
189
  // Convert comments to MDX JSX expression with comment
161
- // For now, return as HTML node
162
- // return {
190
+ // For now, return empty array
191
+ // return [{
163
192
  // type: 'html',
164
193
  // value: `<!-- ${node.data} -->`
165
- // } as Html
194
+ // }] as Html[]
166
195
  return []
167
196
  }
168
197
 
169
198
  if (isTextNode(node)) {
170
- const textValue = node.textContent || ''
199
+ let textValue = node.textContent || ''
200
+
201
+ // Skip whitespace-only nodes between elements
202
+ if (!textValue.trim()) {
203
+ const prevSibling = node.previousSibling
204
+ const nextSibling = node.nextSibling
205
+
206
+ // If between elements and contains newlines, it's likely formatting
207
+ if (textValue.includes('\n') &&
208
+ ((prevSibling && isElementNode(prevSibling)) ||
209
+ (nextSibling && isElementNode(nextSibling)))) {
210
+ return []
211
+ }
212
+ // Otherwise preserve the whitespace (could be intentional space)
213
+ }
214
+
215
+ // Always deindent text content
216
+ textValue = deindent(textValue).trim()
217
+
218
+ // Skip empty text after processing
219
+ if (!textValue) {
220
+ return []
221
+ }
171
222
 
172
223
  // If we have a textToMdast converter, use it
173
224
  if (options?.textToMdast) {
174
225
  try {
175
226
  const result = options.textToMdast({ text: textValue })
176
- return result
227
+ return Array.isArray(result) ? result : [result]
177
228
  } catch (error) {
178
229
  // Call onError callback if provided, otherwise log
179
230
  if (options.onError) {
@@ -183,18 +234,22 @@ function htmlNodeToMdxAst(
183
234
  console.error('Text content:', textValue)
184
235
  }
185
236
  // Fallback to simple text node
186
- return {
187
- type: 'text',
188
- value: textValue,
189
- } satisfies MdastText
237
+ return [
238
+ {
239
+ type: 'text',
240
+ value: textValue,
241
+ } satisfies MdastText,
242
+ ]
190
243
  }
191
244
  }
192
245
 
193
246
  // Default: return simple text node
194
- return {
195
- type: 'text',
196
- value: textValue,
197
- } satisfies MdastText
247
+ return [
248
+ {
249
+ type: 'text',
250
+ value: textValue,
251
+ } satisfies MdastText,
252
+ ]
198
253
  }
199
254
 
200
255
  if (!isElementNode(node)) {
@@ -210,12 +265,7 @@ function htmlNodeToMdxAst(
210
265
  // Process children but skip the element wrapper
211
266
  const children: RootContent[] = []
212
267
  for (const child of Array.from(node.childNodes)) {
213
- const result = htmlNodeToMdxAst(child, options)
214
- if (Array.isArray(result)) {
215
- children.push(...result)
216
- } else {
217
- children.push(result)
218
- }
268
+ children.push(...htmlNodeToMdxAst(child, options))
219
269
  }
220
270
  return children
221
271
  }
@@ -229,12 +279,7 @@ function htmlNodeToMdxAst(
229
279
  // Process children
230
280
  const children: RootContent[] = []
231
281
  for (const child of Array.from(node.childNodes)) {
232
- const result = htmlNodeToMdxAst(child, options)
233
- if (Array.isArray(result)) {
234
- children.push(...result)
235
- } else {
236
- children.push(result)
237
- }
282
+ children.push(...htmlNodeToMdxAst(child, options))
238
283
  }
239
284
 
240
285
  // Always create MdxJsxTextElement initially
@@ -245,13 +290,11 @@ function htmlNodeToMdxAst(
245
290
  attributes,
246
291
  children: children as any,
247
292
  }
248
- return element
293
+ return [element]
249
294
  }
250
295
 
251
- // Main function to parse HTML and return MDX AST
252
- export function htmlToMdxAst(
253
- options: ParseHtmlToMdxAstOptions,
254
- ): RootContent | RootContent[] {
296
+ // Main function to parse HTML and return MDX AST - always returns an array
297
+ export function htmlToMdxAst(options: ParseHtmlToMdxAstOptions): RootContent[] {
255
298
  // Parse HTML with linkedom
256
299
  const { document } = parseHTML(options.html.trim())
257
300
 
@@ -273,22 +316,50 @@ export function htmlToMdxAst(
273
316
  node.nodeType === 8, // Comment nodes
274
317
  )
275
318
 
276
- if (childNodes.length === 0) {
277
- return []
278
- }
319
+ let results: RootContent[] = []
279
320
 
280
- if (childNodes.length === 1) {
281
- return htmlNodeToMdxAst(childNodes[0]!, options)
321
+ for (const node of childNodes) {
322
+ results.push(...htmlNodeToMdxAst(node, options))
282
323
  }
283
324
 
284
- // Multiple nodes - return as array
285
- const results: RootContent[] = []
286
- for (const node of childNodes) {
287
- const result = htmlNodeToMdxAst(node, options)
288
- if (Array.isArray(result)) {
289
- results.push(...result)
325
+ // Apply the normalize plugin if we have a parentType
326
+ if (options.parentType && results.length > 0) {
327
+ // Create a temporary AST node with the same parent type
328
+ const parentType = options.parentType
329
+ const tempRoot: Root = {
330
+ type: 'root',
331
+ children: results,
332
+ }
333
+
334
+ // If we have a specific parent type, wrap the content in that parent
335
+ // to provide proper context for the normalize plugin
336
+ let astToProcess: Root
337
+ if (parentType !== 'root') {
338
+ // Create a parent node of the specified type with our content as children
339
+ const parentNode: any = {
340
+ type: parentType,
341
+ children: tempRoot.children,
342
+ }
343
+ astToProcess = {
344
+ type: 'root',
345
+ children: [parentNode],
346
+ }
347
+ } else {
348
+ astToProcess = tempRoot
349
+ }
350
+
351
+ // Create a simple processor and run the normalize plugin
352
+ const processor = unified().use(remarkMdxJsxNormalize)
353
+ processor.runSync(astToProcess)
354
+
355
+ // Extract the result back
356
+ if (parentType !== 'root') {
357
+ // Get the children from the parent node we created
358
+ const processedParent = astToProcess.children[0] as any
359
+ results = processedParent.children as RootContent[]
290
360
  } else {
291
- results.push(result)
361
+ // Get children directly from root
362
+ results = astToProcess.children
292
363
  }
293
364
  }
294
365
 
@@ -296,9 +367,9 @@ export function htmlToMdxAst(
296
367
  }
297
368
 
298
369
  // Export a wrapper that always returns an array for consistency
370
+ // Note: htmlToMdxAst now already returns an array, so this is just an alias
299
371
  export function parseHtmlToMdxAst(
300
372
  options: ParseHtmlToMdxAstOptions,
301
373
  ): RootContent[] {
302
- const result = htmlToMdxAst(options)
303
- return Array.isArray(result) ? result : [result]
374
+ return htmlToMdxAst(options)
304
375
  }
@@ -22,6 +22,40 @@ const PHRASE_CONTAINERS = new Set([
22
22
  'mdxJsxTextElement', // MDX JSX text elements should contain phrasing
23
23
  ])
24
24
 
25
+ /** HTML tags that require phrasing/inline children */
26
+ const PHRASE_HTML_CONTAINERS = new Set([
27
+ 'p',
28
+ 'h1',
29
+ 'h2',
30
+ 'h3',
31
+ 'h4',
32
+ 'h5',
33
+ 'h6',
34
+ 'em',
35
+ 'strong',
36
+ 'b',
37
+ 'i',
38
+ 'u',
39
+ 's',
40
+ 'del',
41
+ 'ins',
42
+ 'mark',
43
+ 'small',
44
+ 'sub',
45
+ 'sup',
46
+ 'a',
47
+ 'abbr',
48
+ 'cite',
49
+ 'code',
50
+ 'dfn',
51
+ 'kbd',
52
+ 'q',
53
+ 'samp',
54
+ 'span',
55
+ 'time',
56
+ 'var',
57
+ ])
58
+
25
59
  /** Parents that accept/expect flow (block) content */
26
60
  const FLOW_CONTAINERS = new Set([
27
61
  'root',
@@ -55,22 +89,51 @@ const blockLevelTags = new Set([
55
89
  'div',
56
90
  'p',
57
91
  'blockquote',
58
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
59
- 'ul', 'ol', 'li',
92
+ 'h1',
93
+ 'h2',
94
+ 'h3',
95
+ 'h4',
96
+ 'h5',
97
+ 'h6',
98
+ 'ul',
99
+ 'ol',
100
+ 'li',
60
101
  'pre',
61
102
  'hr',
62
- 'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td',
63
- 'section', 'article', 'aside', 'nav', 'header', 'footer', 'main',
64
- 'figure', 'figcaption',
103
+ 'table',
104
+ 'thead',
105
+ 'tbody',
106
+ 'tfoot',
107
+ 'tr',
108
+ 'th',
109
+ 'td',
110
+ 'section',
111
+ 'article',
112
+ 'aside',
113
+ 'nav',
114
+ 'header',
115
+ 'footer',
116
+ 'main',
117
+ 'figure',
118
+ 'figcaption',
65
119
  // Notion-specific block elements
66
120
  'callout',
67
- 'columns', 'column',
121
+ 'columns',
122
+ 'column',
68
123
  'page',
69
124
  'database',
70
125
  'data-source',
71
- 'audio', 'video', 'file', 'pdf', 'embed',
72
- 'synced_block', 'synced_block_reference',
73
- 'meeting-notes', 'summary', 'notes', 'transcript',
126
+ 'audio',
127
+ 'video',
128
+ 'file',
129
+ 'pdf',
130
+ 'embed',
131
+ 'synced_block',
132
+ 'synced_block_reference',
133
+ 'meeting-notes',
134
+ 'summary',
135
+ 'notes',
136
+ 'transcript',
74
137
  'table_of_contents',
75
138
  'unknown',
76
139
  'image', // Images can be block-level in Notion
@@ -83,24 +146,40 @@ const blockLevelTags = new Set([
83
146
  * - Elements with block-level tag names → mdxJsxFlowElement
84
147
  * - Elements containing non-phrasing children → mdxJsxFlowElement
85
148
  */
86
- export default function remarkMdxJsxNormalize() {
149
+ export function remarkMdxJsxNormalize() {
87
150
  return function transform(tree: Root) {
88
151
  visitParents(tree, isMdxJsx, (node, ancestors) => {
89
152
  const element = node as MdxJsxTextElement | MdxJsxFlowElement
90
153
  const parent = ancestors[ancestors.length - 1] as Parent | undefined
91
154
  if (!parent) return
92
155
 
93
- const parentType = parent.type
94
- const parentExpectsPhrasing = PHRASE_CONTAINERS.has(parentType)
95
- const parentExpectsFlow = FLOW_CONTAINERS.has(parentType)
156
+ // Check if parent expects phrasing or flow content
157
+ let parentExpectsPhrasing = false
158
+ let parentExpectsFlow = false
159
+
160
+ if ((parent.type === 'mdxJsxFlowElement' || parent.type === 'mdxJsxTextElement') &&
161
+ (parent as any).name) {
162
+ // For MDX JSX elements, check the tag name
163
+ const parentTagName = (parent as any).name.toLowerCase()
164
+ parentExpectsPhrasing = PHRASE_HTML_CONTAINERS.has(parentTagName)
165
+ // If not phrasing and is a block-level tag, it expects flow
166
+ parentExpectsFlow = !parentExpectsPhrasing && blockLevelTags.has(parentTagName)
167
+ } else {
168
+ // For mdast nodes, check the type
169
+ parentExpectsPhrasing = PHRASE_CONTAINERS.has(parent.type)
170
+ parentExpectsFlow = FLOW_CONTAINERS.has(parent.type)
171
+ }
96
172
 
97
173
  // Check element properties
98
- const hasBlockTag = element.name ? blockLevelTags.has(element.name.toLowerCase()) : false
174
+ const hasBlockTag = element.name
175
+ ? blockLevelTags.has(element.name.toLowerCase())
176
+ : false
99
177
  const children = (element.children || []) as RootContent[]
100
178
  const containsNonPhrasing = children.some((c) => !isPhrasing(c))
101
179
 
102
180
  // Determine desired type
103
- let desired: 'mdxJsxTextElement' | 'mdxJsxFlowElement' = element.type
181
+ let desired: 'mdxJsxTextElement' | 'mdxJsxFlowElement' =
182
+ element.type
104
183
 
105
184
  // Priority rules:
106
185
  // 1. If it has a block-level tag name, it should be flow
@@ -124,5 +203,7 @@ export default function remarkMdxJsxNormalize() {
124
203
 
125
204
  /** Check if a node is an MDX JSX element */
126
205
  function isMdxJsx(node: Node): boolean {
127
- return node.type === 'mdxJsxTextElement' || node.type === 'mdxJsxFlowElement'
128
- }
206
+ return (
207
+ node.type === 'mdxJsxTextElement' || node.type === 'mdxJsxFlowElement'
208
+ )
209
+ }
package/src/parse.ts CHANGED
@@ -5,9 +5,9 @@ import { Root, RootContent } from 'mdast'
5
5
  import { remark } from 'remark'
6
6
  import remarkGfm from 'remark-gfm'
7
7
  import remarkMdx from 'remark-mdx'
8
- import { parseHtmlToMdxAst } from './html/html-to-mdx-ast.js'
8
+ import { parseHtmlToMdxAst, remarkMdxJsxNormalize } from './html/html-to-mdx-ast.js'
9
9
 
10
- export { parseHtmlToMdxAst }
10
+ export { parseHtmlToMdxAst, remarkMdxJsxNormalize }
11
11
 
12
12
  export function mdxParse(code: string) {
13
13
  const file = mdxProcessor.processSync(code)
package/src/safe-mdx.tsx CHANGED
@@ -186,7 +186,7 @@ export class MdastToJsx {
186
186
 
187
187
  mapMdastChildren(node: any) {
188
188
  const res = node.children
189
- ?.flatMap((child) => this.mdastTransformer(child))
189
+ ?.flatMap((child) => this.mdastTransformer(child, node.type))
190
190
  .filter(Boolean)
191
191
  if (Array.isArray(res)) {
192
192
  if (!res.length) {
@@ -287,7 +287,7 @@ export class MdastToJsx {
287
287
  )
288
288
  }
289
289
  default: {
290
- return this.mdastTransformer(node)
290
+ return this.mdastTransformer(node, 'mdxJsxElement')
291
291
  }
292
292
  }
293
293
  }
@@ -564,14 +564,14 @@ export class MdastToJsx {
564
564
  }
565
565
 
566
566
  run() {
567
- const res = this.mdastTransformer(this.mdast) as ReactNode
567
+ const res = this.mdastTransformer(this.mdast, 'root') as ReactNode
568
568
  if (Array.isArray(res) && res.length === 1) {
569
569
  return res[0]
570
570
  }
571
571
  return res
572
572
  }
573
573
 
574
- mdastTransformer(node: MyRootContent): ReactNode {
574
+ mdastTransformer(node: MyRootContent, parentType?: string): ReactNode {
575
575
  if (!node) {
576
576
  return []
577
577
  }
@@ -580,7 +580,7 @@ export class MdastToJsx {
580
580
  if (this.renderNode) {
581
581
  const customResult = this.renderNode(
582
582
  node,
583
- this.mdastTransformer.bind(this),
583
+ (n: MyRootContent) => this.mdastTransformer(n, node.type),
584
584
  )
585
585
  if (customResult !== undefined) {
586
586
  return customResult
@@ -928,9 +928,10 @@ export class MdastToJsx {
928
928
  return []
929
929
  }
930
930
 
931
- // Parse HTML to MDX AST using the new approach
931
+ // Parse HTML to MDX AST using the new approach - always returns an array
932
932
  const mdxAst = htmlToMdxAst({
933
933
  html: text,
934
+ parentType: parentType || 'root',
934
935
  convertTagName: ({ tagName }) => {
935
936
  const lowerTag = tagName.toLowerCase()
936
937
  // Only keep valid HTML elements
@@ -943,11 +944,7 @@ export class MdastToJsx {
943
944
  })
944
945
 
945
946
  // Process the MDX AST nodes
946
- if (Array.isArray(mdxAst)) {
947
- return mdxAst.map(child => this.mdastTransformer(child))
948
- } else {
949
- return this.mdastTransformer(mdxAst)
950
- }
947
+ return mdxAst.map(child => this.mdastTransformer(child, parentType))
951
948
  }
952
949
  case 'imageReference': {
953
950
  return []