notro-loader 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,501 +1,501 @@
1
- /**
2
- * MDX plugin pipeline for Notion Enhanced Markdown.
3
- *
4
- * Parser layer — configures the remark (markdown → mdast) and rehype
5
- * (hast → HTML) plugin chains. Astro runtime binding lives in compile-mdx.ts.
6
- *
7
- * Responsibility layers:
8
- * - remarkNfm: always active, required for Notion content
9
- * - NOTION_CORE_REHYPE_PLUGINS (internal): always active, Notion-specific
10
- * - User-provided plugins via notro({ remarkPlugins, rehypePlugins }):
11
- * math (remark-math + rehype-katex), diagrams (rehype-beautiful-mermaid), etc.
12
- * - Built-in Shiki support via notro({ shikiConfig }): injected last so user
13
- * plugins (rehypeMermaid, rehypeKatex) run before syntax highlighting
14
- */
15
-
16
- import rehypeRaw from 'rehype-raw';
17
- import rehypeSlug from 'rehype-slug';
18
- import { remarkNfm } from 'remark-nfm';
19
- import { getNotroPlugins } from './notro-config.ts';
20
-
21
- import type { Plugin, PluggableList } from 'unified';
22
- import type { Root, Element } from 'hast';
23
- import { visit } from 'unist-util-visit';
24
- import type { LinkToPages } from '../types.ts';
25
-
26
- // Recursively extract text content from a hast node tree.
27
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
28
- function hastNodeToString(node: any): string {
29
- if (node.type === 'text') return node.value ?? '';
30
- return (node.children ?? []).map(hastNodeToString).join('');
31
- }
32
-
33
- // Notion-specific custom element names that rehype-raw must pass through
34
- // without stripping. These are mapped to Astro components in notionComponents.
35
- const NOTION_CUSTOM_ELEMENTS = [
36
- // MDX AST node types — must be passed through rehype-raw or it throws
37
- // "Cannot compile mdxJsxFlowElement node" at build time.
38
- 'mdxJsxFlowElement',
39
- 'mdxJsxTextElement',
40
- 'mdxFlowExpression',
41
- 'mdxTextExpression',
42
- 'mdxJsImport',
43
- 'mdxJsExport',
44
- 'callout',
45
- 'columns',
46
- 'column',
47
- 'audio',
48
- 'video',
49
- 'file',
50
- 'pdf',
51
- 'page',
52
- 'database',
53
- 'table_of_contents',
54
-
55
- 'empty-block',
56
- 'mention-user',
57
- 'mention-page',
58
- 'mention-database',
59
- 'mention-data-source',
60
- 'mention-agent',
61
- 'mention-date',
62
- ];
63
-
64
- // ── Notion color attribute → CSS class conversion ─────────────────────────
65
-
66
- // Notion text color names. Used for both text and background variants.
67
- const NOTION_COLOR_NAMES = new Set([
68
- 'gray', 'brown', 'orange', 'yellow', 'green', 'blue', 'purple', 'pink', 'red',
69
- ]);
70
-
71
- /**
72
- * Maps a Notion color attribute value to a notro CSS class.
73
- * Handles the current `_bg` suffix format and the legacy `_background` suffix.
74
- * CSS classes are defined in notro-theme.css.
75
- */
76
- function notionColorToClass(color: string): string {
77
- if (!color || color === 'default') return '';
78
- if (color.endsWith('_bg')) {
79
- const base = color.slice(0, -3);
80
- if (NOTION_COLOR_NAMES.has(base)) return `notro-bg-${base}`;
81
- } else if (color.endsWith('_background')) {
82
- const base = color.slice(0, -'_background'.length);
83
- if (NOTION_COLOR_NAMES.has(base)) return `notro-bg-${base}`;
84
- } else if (NOTION_COLOR_NAMES.has(color)) {
85
- return `notro-text-${color}`;
86
- }
87
- return '';
88
- }
89
-
90
- function appendClass(properties: Record<string, unknown>, cls: string): void {
91
- if (!cls) return;
92
- const existing = properties.className;
93
- properties.className = existing
94
- ? (Array.isArray(existing) ? [...existing, cls] : [String(existing), cls])
95
- : [cls];
96
- }
97
-
98
- /**
99
- * Rehype plugin: converts Notion `color` attributes on block and inline elements
100
- * to `notro-*` CSS classes (defined in notro-theme.css).
101
- *
102
- * MDX's component substitution does not apply to HTML elements that come from
103
- * rehype-raw (raw HTML processed from the markdown source). This plugin runs
104
- * after rehype-raw and applies color classes directly in the hast tree so that
105
- * component mapping is not required.
106
- *
107
- * Handles both node types:
108
- * - `element` (hast): standard HTML nodes processed by rehype-raw
109
- * - `mdxJsxFlowElement` / `mdxJsxTextElement`: produced when @mdx-js/mdx parses
110
- * raw HTML like `<p color="gray_bg">` — MDX treats any tagged element with
111
- * attributes as JSX, so the node type is mdxJsxFlowElement, not element.
112
- * These nodes use `name` + `attributes[]` instead of `tagName` + `properties`.
113
- *
114
- * Handles:
115
- * - Block-level: <p color="gray_bg">, <h1-h6 color="blue">
116
- * - Inline: <span color="gray">, <span underline="true">
117
- */
118
- const rehypeNotionColorPlugin: Plugin<[], Root> = () => {
119
- return (tree) => {
120
- // Handle standard hast element nodes (produced by rehype-raw from raw HTML
121
- // blocks — e.g. `<p color="gray_bg">` that appears at block level without
122
- // any other attributes that would trigger MDX JSX parsing)
123
- visit(tree, 'element', (node: Element) => {
124
- const props = node.properties ?? {};
125
- const color = props.color;
126
- const isBlockEl = /^(p|h[1-6])$/.test(node.tagName);
127
- const isSpan = node.tagName === 'span';
128
-
129
- if (!isBlockEl && !isSpan) return;
130
-
131
- // Convert color attribute to CSS class
132
- if (typeof color === 'string') {
133
- const cls = notionColorToClass(color);
134
- delete props.color;
135
- appendClass(props, cls);
136
- node.properties = props;
137
- }
138
-
139
- // Convert underline attribute on spans to CSS class
140
- if (isSpan && (props.underline === 'true' || props.underline === true)) {
141
- delete props.underline;
142
- appendClass(props, 'underline');
143
- node.properties = props;
144
- }
145
- });
146
-
147
- // Handle MDX JSX nodes (mdxJsxFlowElement / mdxJsxTextElement).
148
- // @mdx-js/mdx parses any HTML element with attributes (e.g. `<p color="gray_bg">`)
149
- // as a JSX element. These nodes use `name` + `attributes[]` (array of
150
- // {type:'mdxJsxAttribute', name, value}) instead of `tagName` + `properties`.
151
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
152
- visit(tree, (node: any) => {
153
- if (node.type !== 'mdxJsxFlowElement' && node.type !== 'mdxJsxTextElement') return;
154
- const name: string = node.name ?? '';
155
- const isBlockEl = /^(p|h[1-6])$/.test(name);
156
- const isSpan = name === 'span';
157
-
158
- if (!isBlockEl && !isSpan) return;
159
-
160
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
161
- const attrs: any[] = Array.isArray(node.attributes) ? node.attributes : [];
162
- const classesToAdd: string[] = [];
163
-
164
- // Filter out color/underline attributes, collecting their values
165
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
166
- const filteredAttrs = attrs.filter((attr: any) => {
167
- if (attr.type !== 'mdxJsxAttribute') return true;
168
- if (attr.name === 'color') {
169
- const cls = notionColorToClass(String(attr.value ?? ''));
170
- if (cls) classesToAdd.push(cls);
171
- return false;
172
- }
173
- if (isSpan && attr.name === 'underline' && String(attr.value) === 'true') {
174
- classesToAdd.push('underline');
175
- return false;
176
- }
177
- return true;
178
- });
179
-
180
- if (classesToAdd.length === 0) return;
181
-
182
- // Append to existing class attribute or add a new one
183
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
184
- const classAttr = filteredAttrs.find((attr: any) =>
185
- attr.type === 'mdxJsxAttribute' && (attr.name === 'class' || attr.name === 'className'),
186
- );
187
- if (classAttr) {
188
- classAttr.value = [String(classAttr.value ?? ''), ...classesToAdd].filter(Boolean).join(' ');
189
- } else {
190
- filteredAttrs.push({ type: 'mdxJsxAttribute', name: 'class', value: classesToAdd.join(' ') });
191
- }
192
-
193
- node.attributes = filteredAttrs;
194
- });
195
- };
196
- };
197
-
198
- // ── Notion element name → PascalCase component name mapping ──────────────
199
- //
200
- // MDX's component-substitution rule:
201
- // - PascalCase names → component variable: _jsx(Video, ...) ← components map IS consulted
202
- // - lowercase names → HTML string: _jsx("video", ...) ← components map is IGNORED
203
- //
204
- // This applies to ALL elements in the MDX compile tree, whether they come
205
- // from the MDX source, remark plugins, or raw HTML processed by rehype-raw.
206
- // Elements from raw HTML (Notion markdown) end up as `mdxJsxFlowElement`
207
- // nodes with their original lowercase names. Renaming them to PascalCase here
208
- // enables the `components` prop to substitute them with Astro components.
209
- //
210
- // There are two sets of renames:
211
- // 1. NOTION_BLOCK_RENAMES — block-level elements (mdxJsxFlowElement)
212
- // 2. NOTION_MENTION_RENAMES — inline mention elements (mdxJsxTextElement)
213
-
214
- // Block-level Notion elements from raw HTML in markdown.
215
- // The target PascalCase names must match keys in defaultComponents / notroComponents.
216
- const NOTION_BLOCK_RENAMES = new Map<string, string>([
217
- ['table_of_contents', 'TableOfContents'],
218
- ['video', 'Video'],
219
- ['audio', 'Audio'],
220
- ['file', 'FileBlock'],
221
- ['pdf', 'PdfBlock'],
222
- ['columns', 'Columns'],
223
- ['column', 'Column'],
224
- ['page', 'PageRef'],
225
- ['database', 'DatabaseRef'],
226
- ['details', 'Details'],
227
- ['summary', 'Summary'],
228
- ['empty-block', 'EmptyBlock'],
229
- ]);
230
-
231
- /**
232
- * Rehype plugin: renames Notion block-level elements from lowercase to
233
- * PascalCase so MDX generates a components-map lookup instead of a
234
- * plain HTML string.
235
- *
236
- * Notion block elements (video, audio, table_of_contents, columns, etc.)
237
- * arrive as `mdxJsxFlowElement` nodes — the MDX JSX parser processes all
238
- * inline/block HTML as JSX. With lowercase names, MDX compiles them as
239
- * `_jsx("video", ...)` (literal string), which bypasses the `components`
240
- * prop entirely. Renaming to PascalCase makes MDX emit `_jsx(Video, ...)`,
241
- * which looks up `_components.Video` at runtime.
242
- *
243
- * Must run before rehypeSlug and rehypeTocPlugin. Component keys in
244
- * defaultComponents / notroComponents must use the same PascalCase names.
245
- */
246
- const rehypeBlockElementsPlugin: Plugin<[], Root> = () => {
247
- return (tree) => {
248
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
249
- visit(tree, (node: any) => {
250
- // Block elements appear as mdxJsxFlowElement at the top level,
251
- // but may appear as mdxJsxTextElement when consecutive blocks appear
252
- // without blank lines in the Notion markdown (grouped into a <p>).
253
- if (node.type !== 'mdxJsxFlowElement' && node.type !== 'mdxJsxTextElement') return;
254
- const renamed = NOTION_BLOCK_RENAMES.get(node.name);
255
- if (renamed) node.name = renamed;
256
- });
257
- };
258
- };
259
-
260
- // Inline mention elements from Notion markdown.
261
- // Hyphenated-lowercase names also compile as plain HTML strings in MDX.
262
- const NOTION_MENTION_RENAMES = new Map<string, string>([
263
- ['mention-user', 'MentionUser'],
264
- ['mention-page', 'MentionPage'],
265
- ['mention-database', 'MentionDatabase'],
266
- ['mention-data-source', 'MentionDataSource'],
267
- ['mention-agent', 'MentionAgent'],
268
- ['mention-date', 'MentionDate'],
269
- ]);
270
-
271
- /**
272
- * Rehype plugin: renames Notion inline mention elements from hyphenated-
273
- * lowercase (mention-user, mention-date…) to PascalCase (MentionUser,
274
- * MentionDate…) so MDX generates a components-map lookup instead of a
275
- * plain HTML string.
276
- *
277
- * Must run before hast-util-to-estree (i.e. before @mdx-js/mdx compiles
278
- * the tree). Component keys in defaultComponents / notroComponents must
279
- * use the same PascalCase names.
280
- */
281
- const rehypeInlineMentionsPlugin: Plugin<[], Root> = () => {
282
- return (tree) => {
283
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
284
- visit(tree, (node: any) => {
285
- // Notion mentions come through as mdxJsxTextElement nodes because
286
- // MDX's JSX parser processes inline HTML like <mention-user url="...">
287
- if (node.type !== 'mdxJsxTextElement' && node.type !== 'mdxJsxFlowElement') return;
288
- const renamed = NOTION_MENTION_RENAMES.get(node.name);
289
- if (renamed) node.name = renamed;
290
- });
291
- };
292
- };
293
-
294
-
295
-
296
- function resolveNotionUrl(
297
- url: string,
298
- linkToPages: LinkToPages,
299
- ): { href: string; isExternal: boolean } {
300
- // Notion URLs end with the page ID (32-char hex, with or without dashes).
301
- // Example: https://www.notion.so/My-Page-Title-abc123def456...
302
- // Strip dashes from both the URL and the ID, then check whether the URL
303
- // ends with the normalised ID. Using endsWith() instead of includes()
304
- // prevents a shorter ID from matching a different longer ID that happens
305
- // to contain it as a substring (e.g. "abc" matching "abc123").
306
- const urlNoDash = url.replace(/-/g, '');
307
- for (const [pageId, info] of Object.entries(linkToPages)) {
308
- const idNoDash = pageId.replace(/-/g, '');
309
- if (urlNoDash === idNoDash || urlNoDash.endsWith(idNoDash)) {
310
- return { href: `/${info.url}`, isExternal: false };
311
- }
312
- }
313
- return { href: url, isExternal: true };
314
- }
315
-
316
- type ResolveOptions = { linkToPages: LinkToPages };
317
-
318
- /** Read the `url` attribute value from an mdxJsxFlowElement/mdxJsxTextElement. */
319
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
320
- function getUrlFromMdxJsx(node: any): string | undefined {
321
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
322
- const attr = node.attributes?.find((a: any) => a.type === 'mdxJsxAttribute' && a.name === 'url');
323
- return typeof attr?.value === 'string' ? attr.value : undefined;
324
- }
325
-
326
- /** Set the `url` attribute on an mdxJsxFlowElement/mdxJsxTextElement. */
327
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
328
- function setUrlOnMdxJsx(node: any, href: string): void {
329
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
330
- const attr = node.attributes?.find((a: any) => a.type === 'mdxJsxAttribute' && a.name === 'url');
331
- if (attr) {
332
- attr.value = href;
333
- } else {
334
- node.attributes = [...(node.attributes ?? []), { type: 'mdxJsxAttribute', name: 'url', value: href }];
335
- }
336
- }
337
-
338
- /**
339
- * Rehype plugin: resolves Notion page/database URLs in hast elements.
340
- * Handles <page>, <database>, <MentionPage>, <MentionDatabase>, and <a href>.
341
- *
342
- * Notion page/database block elements (<page>, <database>) come through as
343
- * regular hast `element` nodes. Inline mention elements come through as
344
- * mdxJsxTextElement nodes (renamed to MentionPage etc. by
345
- * rehypeInlineMentionsPlugin which runs before this plugin).
346
- */
347
- const resolvePageLinksPlugin: Plugin<[ResolveOptions], Root> = (options) => {
348
- const { linkToPages } = options;
349
- return (tree) => {
350
- // Handle <a href> hast elements (standard links to Notion pages).
351
- visit(tree, 'element', (node: Element) => {
352
- if (node.tagName === 'a') {
353
- const rawHref = node.properties?.href;
354
- const href = typeof rawHref === 'string' ? rawHref : undefined;
355
- if (href?.includes('notion.so')) {
356
- const { href: resolved, isExternal } = resolveNotionUrl(href, linkToPages);
357
- if (!isExternal) {
358
- node.properties = { ...node.properties, href: resolved };
359
- }
360
- }
361
- }
362
- });
363
-
364
- // Handle MDX JSX nodes for page/database references and inline mentions.
365
- // By the time this plugin runs, rehypeBlockElementsPlugin has renamed:
366
- // page → PageRef, database → DatabaseRef
367
- // And rehypeInlineMentionsPlugin has renamed:
368
- // mention-page → MentionPage, mention-database → MentionDatabase
369
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
370
- visit(tree, (node: any) => {
371
- if (node.type !== 'mdxJsxTextElement' && node.type !== 'mdxJsxFlowElement') return;
372
- if (
373
- node.name !== 'PageRef' &&
374
- node.name !== 'DatabaseRef' &&
375
- node.name !== 'MentionPage' &&
376
- node.name !== 'MentionDatabase'
377
- ) return;
378
- const url = getUrlFromMdxJsx(node);
379
- if (url) {
380
- const { href } = resolveNotionUrl(url, linkToPages);
381
- setUrlOnMdxJsx(node, href);
382
- }
383
- });
384
- };
385
- };
386
-
387
- // ── TOC population ─────────────────────────────────────────────────────────
388
-
389
- /**
390
- * Rehype plugin: populates <table_of_contents> elements with anchor links
391
- * generated from all h1–h4 headings in the document.
392
- *
393
- * Must run AFTER rehype-slug so that headings already have id attributes.
394
- * Performs a two-pass traversal:
395
- * 1. Collect every h1–h4 that has an id (added by rehype-slug).
396
- * 2. Replace the children of each <table_of_contents> with a <ul> list
397
- * of <li><a href="#id"> entries, preserving heading level as a
398
- * data-level attribute for CSS indentation.
399
- */
400
- const rehypeTocPlugin: Plugin<[], Root> = () => {
401
- return (tree) => {
402
- // Pass 1: collect headings with IDs
403
- const headings: Array<{ level: number; id: string; text: string }> = [];
404
- visit(tree, 'element', (node: Element) => {
405
- const match = /^h([1-4])$/.exec(node.tagName);
406
- if (!match) return;
407
- const id = node.properties?.id;
408
- if (typeof id !== 'string' || !id) return;
409
- headings.push({
410
- level: parseInt(match[1], 10),
411
- id,
412
- text: hastNodeToString(node),
413
- });
414
- });
415
-
416
- if (headings.length === 0) return;
417
-
418
- // Pass 2: inject heading links into TableOfContents nodes.
419
- // After rehypeBlockElementsPlugin runs, <table_of_contents> is renamed to
420
- // TableOfContents as a mdxJsxFlowElement. This plugin runs after that rename,
421
- // so we look for mdxJsxFlowElement nodes with name 'TableOfContents'.
422
- const listItems = headings.map((h) => ({
423
- type: 'element' as const,
424
- tagName: 'li',
425
- properties: { className: [`notro-toc-item`, `notro-toc-level-${h.level}`] },
426
- children: [
427
- {
428
- type: 'element' as const,
429
- tagName: 'a',
430
- properties: { href: `#${h.id}` },
431
- children: [{ type: 'text' as const, value: h.text }],
432
- },
433
- ],
434
- }));
435
-
436
- const tocChildren = [
437
- {
438
- type: 'element' as const,
439
- tagName: 'ul',
440
- properties: { className: ['notro-toc-list'] },
441
- children: listItems,
442
- },
443
- ];
444
-
445
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
446
- visit(tree, (node: any) => {
447
- if (node.type !== 'mdxJsxFlowElement') return;
448
- if (node.name !== 'TableOfContents') return;
449
- node.children = tocChildren;
450
- });
451
- };
452
- };
453
-
454
- // ── Plugin bundle factory ──────────────────────────────────────────────────
455
-
456
- export type MdxPlugins = {
457
- remarkPlugins: PluggableList;
458
- rehypePlugins: PluggableList;
459
- };
460
-
461
- /** Returns the remark and rehype plugin configuration for Notion MDX. */
462
- export function buildMdxPlugins(linkToPages: LinkToPages): MdxPlugins {
463
- const { remarkPlugins: userRemarkPlugins, rehypePlugins: userRehypePlugins } = getNotroPlugins();
464
-
465
- return {
466
- remarkPlugins: [
467
- remarkNfm,
468
- ...userRemarkPlugins,
469
- ],
470
- rehypePlugins: [
471
- // rehypeRaw must come first: converts raw HTML strings in mdast into
472
- // hast element nodes so that subsequent plugins and component mapping
473
- // can process them (e.g. <table>, <h2 color="...">, etc.).
474
- // passThrough preserves Notion-specific custom elements that are not
475
- // valid HTML and would otherwise be stripped by the HTML parser.
476
- [rehypeRaw, { passThrough: NOTION_CUSTOM_ELEMENTS }],
477
- // Convert Notion color/underline attributes to CSS classes.
478
- // MDX component substitution does not apply to HTML elements created by
479
- // rehypeRaw, so we apply color classes directly here in the hast tree.
480
- rehypeNotionColorPlugin,
481
- // Rename Notion block-level elements (video, audio, table_of_contents,
482
- // columns, etc.) from lowercase to PascalCase so MDX generates a
483
- // components-map lookup (_jsx(Video, ...)) instead of a plain HTML
484
- // string literal (_jsx("video", ...)).
485
- rehypeBlockElementsPlugin,
486
- // Rename Notion inline mention elements (mention-user, mention-date…)
487
- // from hyphenated-lowercase to PascalCase for the same reason.
488
- rehypeInlineMentionsPlugin,
489
- // User-provided plugins: math, diagrams, syntax highlighting, etc.
490
- // e.g. notro({ rehypePlugins: [rehypeKatex, [rehypeMermaid, { theme: 'github-dark' }]] })
491
- // notro({ shikiConfig: { theme: 'github-dark' } }) injects @shikijs/rehype automatically.
492
- ...userRehypePlugins,
493
- // rehype-slug adds id attributes to h1–h4 elements.
494
- // Must run before rehypeTocPlugin, which reads those ids.
495
- rehypeSlug,
496
- // Populates TableOfContents with anchor links to all headings.
497
- rehypeTocPlugin,
498
- [resolvePageLinksPlugin, { linkToPages }] as const,
499
- ],
500
- };
501
- }
1
+ /**
2
+ * MDX plugin pipeline for Notion Enhanced Markdown.
3
+ *
4
+ * Parser layer — configures the remark (markdown → mdast) and rehype
5
+ * (hast → HTML) plugin chains. Astro runtime binding lives in compile-mdx.ts.
6
+ *
7
+ * Responsibility layers:
8
+ * - remarkNfm: always active, required for Notion content
9
+ * - NOTION_CORE_REHYPE_PLUGINS (internal): always active, Notion-specific
10
+ * - User-provided plugins via notro({ remarkPlugins, rehypePlugins }):
11
+ * math (remark-math + rehype-katex), diagrams (rehype-beautiful-mermaid), etc.
12
+ * - Built-in Shiki support via notro({ shikiConfig }): injected last so user
13
+ * plugins (rehypeMermaid, rehypeKatex) run before syntax highlighting
14
+ */
15
+
16
+ import rehypeRaw from 'rehype-raw';
17
+ import rehypeSlug from 'rehype-slug';
18
+ import { remarkNfm } from 'remark-notro';
19
+ import { getNotroPlugins } from './notro-config.ts';
20
+
21
+ import type { Plugin, PluggableList } from 'unified';
22
+ import type { Root, Element } from 'hast';
23
+ import { visit } from 'unist-util-visit';
24
+ import type { LinkToPages } from '../types.ts';
25
+
26
+ // Recursively extract text content from a hast node tree.
27
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
28
+ function hastNodeToString(node: any): string {
29
+ if (node.type === 'text') return node.value ?? '';
30
+ return (node.children ?? []).map(hastNodeToString).join('');
31
+ }
32
+
33
+ // Notion-specific custom element names that rehype-raw must pass through
34
+ // without stripping. These are mapped to Astro components in notionComponents.
35
+ const NOTION_CUSTOM_ELEMENTS = [
36
+ // MDX AST node types — must be passed through rehype-raw or it throws
37
+ // "Cannot compile mdxJsxFlowElement node" at build time.
38
+ 'mdxJsxFlowElement',
39
+ 'mdxJsxTextElement',
40
+ 'mdxFlowExpression',
41
+ 'mdxTextExpression',
42
+ 'mdxJsImport',
43
+ 'mdxJsExport',
44
+ 'callout',
45
+ 'columns',
46
+ 'column',
47
+ 'audio',
48
+ 'video',
49
+ 'file',
50
+ 'pdf',
51
+ 'page',
52
+ 'database',
53
+ 'table_of_contents',
54
+
55
+ 'empty-block',
56
+ 'mention-user',
57
+ 'mention-page',
58
+ 'mention-database',
59
+ 'mention-data-source',
60
+ 'mention-agent',
61
+ 'mention-date',
62
+ ];
63
+
64
+ // ── Notion color attribute → CSS class conversion ─────────────────────────
65
+
66
+ // Notion text color names. Used for both text and background variants.
67
+ const NOTION_COLOR_NAMES = new Set([
68
+ 'gray', 'brown', 'orange', 'yellow', 'green', 'blue', 'purple', 'pink', 'red',
69
+ ]);
70
+
71
+ /**
72
+ * Maps a Notion color attribute value to a notro CSS class.
73
+ * Handles the current `_bg` suffix format and the legacy `_background` suffix.
74
+ * CSS classes are defined in notro-theme.css.
75
+ */
76
+ function notionColorToClass(color: string): string {
77
+ if (!color || color === 'default') return '';
78
+ if (color.endsWith('_bg')) {
79
+ const base = color.slice(0, -3);
80
+ if (NOTION_COLOR_NAMES.has(base)) return `notro-bg-${base}`;
81
+ } else if (color.endsWith('_background')) {
82
+ const base = color.slice(0, -'_background'.length);
83
+ if (NOTION_COLOR_NAMES.has(base)) return `notro-bg-${base}`;
84
+ } else if (NOTION_COLOR_NAMES.has(color)) {
85
+ return `notro-text-${color}`;
86
+ }
87
+ return '';
88
+ }
89
+
90
+ function appendClass(properties: Record<string, unknown>, cls: string): void {
91
+ if (!cls) return;
92
+ const existing = properties.className;
93
+ properties.className = existing
94
+ ? (Array.isArray(existing) ? [...existing, cls] : [String(existing), cls])
95
+ : [cls];
96
+ }
97
+
98
+ /**
99
+ * Rehype plugin: converts Notion `color` attributes on block and inline elements
100
+ * to `notro-*` CSS classes (defined in notro-theme.css).
101
+ *
102
+ * MDX's component substitution does not apply to HTML elements that come from
103
+ * rehype-raw (raw HTML processed from the markdown source). This plugin runs
104
+ * after rehype-raw and applies color classes directly in the hast tree so that
105
+ * component mapping is not required.
106
+ *
107
+ * Handles both node types:
108
+ * - `element` (hast): standard HTML nodes processed by rehype-raw
109
+ * - `mdxJsxFlowElement` / `mdxJsxTextElement`: produced when @mdx-js/mdx parses
110
+ * raw HTML like `<p color="gray_bg">` — MDX treats any tagged element with
111
+ * attributes as JSX, so the node type is mdxJsxFlowElement, not element.
112
+ * These nodes use `name` + `attributes[]` instead of `tagName` + `properties`.
113
+ *
114
+ * Handles:
115
+ * - Block-level: <p color="gray_bg">, <h1-h6 color="blue">
116
+ * - Inline: <span color="gray">, <span underline="true">
117
+ */
118
+ const rehypeNotionColorPlugin: Plugin<[], Root> = () => {
119
+ return (tree) => {
120
+ // Handle standard hast element nodes (produced by rehype-raw from raw HTML
121
+ // blocks — e.g. `<p color="gray_bg">` that appears at block level without
122
+ // any other attributes that would trigger MDX JSX parsing)
123
+ visit(tree, 'element', (node: Element) => {
124
+ const props = node.properties ?? {};
125
+ const color = props.color;
126
+ const isBlockEl = /^(p|h[1-6])$/.test(node.tagName);
127
+ const isSpan = node.tagName === 'span';
128
+
129
+ if (!isBlockEl && !isSpan) return;
130
+
131
+ // Convert color attribute to CSS class
132
+ if (typeof color === 'string') {
133
+ const cls = notionColorToClass(color);
134
+ delete props.color;
135
+ appendClass(props, cls);
136
+ node.properties = props;
137
+ }
138
+
139
+ // Convert underline attribute on spans to CSS class
140
+ if (isSpan && (props.underline === 'true' || props.underline === true)) {
141
+ delete props.underline;
142
+ appendClass(props, 'underline');
143
+ node.properties = props;
144
+ }
145
+ });
146
+
147
+ // Handle MDX JSX nodes (mdxJsxFlowElement / mdxJsxTextElement).
148
+ // @mdx-js/mdx parses any HTML element with attributes (e.g. `<p color="gray_bg">`)
149
+ // as a JSX element. These nodes use `name` + `attributes[]` (array of
150
+ // {type:'mdxJsxAttribute', name, value}) instead of `tagName` + `properties`.
151
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
152
+ visit(tree, (node: any) => {
153
+ if (node.type !== 'mdxJsxFlowElement' && node.type !== 'mdxJsxTextElement') return;
154
+ const name: string = node.name ?? '';
155
+ const isBlockEl = /^(p|h[1-6])$/.test(name);
156
+ const isSpan = name === 'span';
157
+
158
+ if (!isBlockEl && !isSpan) return;
159
+
160
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
161
+ const attrs: any[] = Array.isArray(node.attributes) ? node.attributes : [];
162
+ const classesToAdd: string[] = [];
163
+
164
+ // Filter out color/underline attributes, collecting their values
165
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
166
+ const filteredAttrs = attrs.filter((attr: any) => {
167
+ if (attr.type !== 'mdxJsxAttribute') return true;
168
+ if (attr.name === 'color') {
169
+ const cls = notionColorToClass(String(attr.value ?? ''));
170
+ if (cls) classesToAdd.push(cls);
171
+ return false;
172
+ }
173
+ if (isSpan && attr.name === 'underline' && String(attr.value) === 'true') {
174
+ classesToAdd.push('underline');
175
+ return false;
176
+ }
177
+ return true;
178
+ });
179
+
180
+ if (classesToAdd.length === 0) return;
181
+
182
+ // Append to existing class attribute or add a new one
183
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
184
+ const classAttr = filteredAttrs.find((attr: any) =>
185
+ attr.type === 'mdxJsxAttribute' && (attr.name === 'class' || attr.name === 'className'),
186
+ );
187
+ if (classAttr) {
188
+ classAttr.value = [String(classAttr.value ?? ''), ...classesToAdd].filter(Boolean).join(' ');
189
+ } else {
190
+ filteredAttrs.push({ type: 'mdxJsxAttribute', name: 'class', value: classesToAdd.join(' ') });
191
+ }
192
+
193
+ node.attributes = filteredAttrs;
194
+ });
195
+ };
196
+ };
197
+
198
+ // ── Notion element name → PascalCase component name mapping ──────────────
199
+ //
200
+ // MDX's component-substitution rule:
201
+ // - PascalCase names → component variable: _jsx(Video, ...) ← components map IS consulted
202
+ // - lowercase names → HTML string: _jsx("video", ...) ← components map is IGNORED
203
+ //
204
+ // This applies to ALL elements in the MDX compile tree, whether they come
205
+ // from the MDX source, remark plugins, or raw HTML processed by rehype-raw.
206
+ // Elements from raw HTML (Notion markdown) end up as `mdxJsxFlowElement`
207
+ // nodes with their original lowercase names. Renaming them to PascalCase here
208
+ // enables the `components` prop to substitute them with Astro components.
209
+ //
210
+ // There are two sets of renames:
211
+ // 1. NOTION_BLOCK_RENAMES — block-level elements (mdxJsxFlowElement)
212
+ // 2. NOTION_MENTION_RENAMES — inline mention elements (mdxJsxTextElement)
213
+
214
+ // Block-level Notion elements from raw HTML in markdown.
215
+ // The target PascalCase names must match keys in defaultComponents / notroComponents.
216
+ const NOTION_BLOCK_RENAMES = new Map<string, string>([
217
+ ['table_of_contents', 'TableOfContents'],
218
+ ['video', 'Video'],
219
+ ['audio', 'Audio'],
220
+ ['file', 'FileBlock'],
221
+ ['pdf', 'PdfBlock'],
222
+ ['columns', 'Columns'],
223
+ ['column', 'Column'],
224
+ ['page', 'PageRef'],
225
+ ['database', 'DatabaseRef'],
226
+ ['details', 'Details'],
227
+ ['summary', 'Summary'],
228
+ ['empty-block', 'EmptyBlock'],
229
+ ]);
230
+
231
+ /**
232
+ * Rehype plugin: renames Notion block-level elements from lowercase to
233
+ * PascalCase so MDX generates a components-map lookup instead of a
234
+ * plain HTML string.
235
+ *
236
+ * Notion block elements (video, audio, table_of_contents, columns, etc.)
237
+ * arrive as `mdxJsxFlowElement` nodes — the MDX JSX parser processes all
238
+ * inline/block HTML as JSX. With lowercase names, MDX compiles them as
239
+ * `_jsx("video", ...)` (literal string), which bypasses the `components`
240
+ * prop entirely. Renaming to PascalCase makes MDX emit `_jsx(Video, ...)`,
241
+ * which looks up `_components.Video` at runtime.
242
+ *
243
+ * Must run before rehypeSlug and rehypeTocPlugin. Component keys in
244
+ * defaultComponents / notroComponents must use the same PascalCase names.
245
+ */
246
+ const rehypeBlockElementsPlugin: Plugin<[], Root> = () => {
247
+ return (tree) => {
248
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
249
+ visit(tree, (node: any) => {
250
+ // Block elements appear as mdxJsxFlowElement at the top level,
251
+ // but may appear as mdxJsxTextElement when consecutive blocks appear
252
+ // without blank lines in the Notion markdown (grouped into a <p>).
253
+ if (node.type !== 'mdxJsxFlowElement' && node.type !== 'mdxJsxTextElement') return;
254
+ const renamed = NOTION_BLOCK_RENAMES.get(node.name);
255
+ if (renamed) node.name = renamed;
256
+ });
257
+ };
258
+ };
259
+
260
+ // Inline mention elements from Notion markdown.
261
+ // Hyphenated-lowercase names also compile as plain HTML strings in MDX.
262
+ const NOTION_MENTION_RENAMES = new Map<string, string>([
263
+ ['mention-user', 'MentionUser'],
264
+ ['mention-page', 'MentionPage'],
265
+ ['mention-database', 'MentionDatabase'],
266
+ ['mention-data-source', 'MentionDataSource'],
267
+ ['mention-agent', 'MentionAgent'],
268
+ ['mention-date', 'MentionDate'],
269
+ ]);
270
+
271
+ /**
272
+ * Rehype plugin: renames Notion inline mention elements from hyphenated-
273
+ * lowercase (mention-user, mention-date…) to PascalCase (MentionUser,
274
+ * MentionDate…) so MDX generates a components-map lookup instead of a
275
+ * plain HTML string.
276
+ *
277
+ * Must run before hast-util-to-estree (i.e. before @mdx-js/mdx compiles
278
+ * the tree). Component keys in defaultComponents / notroComponents must
279
+ * use the same PascalCase names.
280
+ */
281
+ const rehypeInlineMentionsPlugin: Plugin<[], Root> = () => {
282
+ return (tree) => {
283
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
284
+ visit(tree, (node: any) => {
285
+ // Notion mentions come through as mdxJsxTextElement nodes because
286
+ // MDX's JSX parser processes inline HTML like <mention-user url="...">
287
+ if (node.type !== 'mdxJsxTextElement' && node.type !== 'mdxJsxFlowElement') return;
288
+ const renamed = NOTION_MENTION_RENAMES.get(node.name);
289
+ if (renamed) node.name = renamed;
290
+ });
291
+ };
292
+ };
293
+
294
+
295
+
296
+ function resolveNotionUrl(
297
+ url: string,
298
+ linkToPages: LinkToPages,
299
+ ): { href: string; isExternal: boolean } {
300
+ // Notion URLs end with the page ID (32-char hex, with or without dashes).
301
+ // Example: https://www.notion.so/My-Page-Title-abc123def456...
302
+ // Strip dashes from both the URL and the ID, then check whether the URL
303
+ // ends with the normalised ID. Using endsWith() instead of includes()
304
+ // prevents a shorter ID from matching a different longer ID that happens
305
+ // to contain it as a substring (e.g. "abc" matching "abc123").
306
+ const urlNoDash = url.replace(/-/g, '');
307
+ for (const [pageId, info] of Object.entries(linkToPages)) {
308
+ const idNoDash = pageId.replace(/-/g, '');
309
+ if (urlNoDash === idNoDash || urlNoDash.endsWith(idNoDash)) {
310
+ return { href: `/${info.url}`, isExternal: false };
311
+ }
312
+ }
313
+ return { href: url, isExternal: true };
314
+ }
315
+
316
+ type ResolveOptions = { linkToPages: LinkToPages };
317
+
318
+ /** Read the `url` attribute value from an mdxJsxFlowElement/mdxJsxTextElement. */
319
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
320
+ function getUrlFromMdxJsx(node: any): string | undefined {
321
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
322
+ const attr = node.attributes?.find((a: any) => a.type === 'mdxJsxAttribute' && a.name === 'url');
323
+ return typeof attr?.value === 'string' ? attr.value : undefined;
324
+ }
325
+
326
+ /** Set the `url` attribute on an mdxJsxFlowElement/mdxJsxTextElement. */
327
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
328
+ function setUrlOnMdxJsx(node: any, href: string): void {
329
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
330
+ const attr = node.attributes?.find((a: any) => a.type === 'mdxJsxAttribute' && a.name === 'url');
331
+ if (attr) {
332
+ attr.value = href;
333
+ } else {
334
+ node.attributes = [...(node.attributes ?? []), { type: 'mdxJsxAttribute', name: 'url', value: href }];
335
+ }
336
+ }
337
+
338
+ /**
339
+ * Rehype plugin: resolves Notion page/database URLs in hast elements.
340
+ * Handles <page>, <database>, <MentionPage>, <MentionDatabase>, and <a href>.
341
+ *
342
+ * Notion page/database block elements (<page>, <database>) come through as
343
+ * regular hast `element` nodes. Inline mention elements come through as
344
+ * mdxJsxTextElement nodes (renamed to MentionPage etc. by
345
+ * rehypeInlineMentionsPlugin which runs before this plugin).
346
+ */
347
+ const resolvePageLinksPlugin: Plugin<[ResolveOptions], Root> = (options) => {
348
+ const { linkToPages } = options;
349
+ return (tree) => {
350
+ // Handle <a href> hast elements (standard links to Notion pages).
351
+ visit(tree, 'element', (node: Element) => {
352
+ if (node.tagName === 'a') {
353
+ const rawHref = node.properties?.href;
354
+ const href = typeof rawHref === 'string' ? rawHref : undefined;
355
+ if (href?.includes('notion.so')) {
356
+ const { href: resolved, isExternal } = resolveNotionUrl(href, linkToPages);
357
+ if (!isExternal) {
358
+ node.properties = { ...node.properties, href: resolved };
359
+ }
360
+ }
361
+ }
362
+ });
363
+
364
+ // Handle MDX JSX nodes for page/database references and inline mentions.
365
+ // By the time this plugin runs, rehypeBlockElementsPlugin has renamed:
366
+ // page → PageRef, database → DatabaseRef
367
+ // And rehypeInlineMentionsPlugin has renamed:
368
+ // mention-page → MentionPage, mention-database → MentionDatabase
369
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
370
+ visit(tree, (node: any) => {
371
+ if (node.type !== 'mdxJsxTextElement' && node.type !== 'mdxJsxFlowElement') return;
372
+ if (
373
+ node.name !== 'PageRef' &&
374
+ node.name !== 'DatabaseRef' &&
375
+ node.name !== 'MentionPage' &&
376
+ node.name !== 'MentionDatabase'
377
+ ) return;
378
+ const url = getUrlFromMdxJsx(node);
379
+ if (url) {
380
+ const { href } = resolveNotionUrl(url, linkToPages);
381
+ setUrlOnMdxJsx(node, href);
382
+ }
383
+ });
384
+ };
385
+ };
386
+
387
+ // ── TOC population ─────────────────────────────────────────────────────────
388
+
389
+ /**
390
+ * Rehype plugin: populates <table_of_contents> elements with anchor links
391
+ * generated from all h1–h4 headings in the document.
392
+ *
393
+ * Must run AFTER rehype-slug so that headings already have id attributes.
394
+ * Performs a two-pass traversal:
395
+ * 1. Collect every h1–h4 that has an id (added by rehype-slug).
396
+ * 2. Replace the children of each <table_of_contents> with a <ul> list
397
+ * of <li><a href="#id"> entries, preserving heading level as a
398
+ * data-level attribute for CSS indentation.
399
+ */
400
+ const rehypeTocPlugin: Plugin<[], Root> = () => {
401
+ return (tree) => {
402
+ // Pass 1: collect headings with IDs
403
+ const headings: Array<{ level: number; id: string; text: string }> = [];
404
+ visit(tree, 'element', (node: Element) => {
405
+ const match = /^h([1-4])$/.exec(node.tagName);
406
+ if (!match) return;
407
+ const id = node.properties?.id;
408
+ if (typeof id !== 'string' || !id) return;
409
+ headings.push({
410
+ level: parseInt(match[1], 10),
411
+ id,
412
+ text: hastNodeToString(node),
413
+ });
414
+ });
415
+
416
+ if (headings.length === 0) return;
417
+
418
+ // Pass 2: inject heading links into TableOfContents nodes.
419
+ // After rehypeBlockElementsPlugin runs, <table_of_contents> is renamed to
420
+ // TableOfContents as a mdxJsxFlowElement. This plugin runs after that rename,
421
+ // so we look for mdxJsxFlowElement nodes with name 'TableOfContents'.
422
+ const listItems = headings.map((h) => ({
423
+ type: 'element' as const,
424
+ tagName: 'li',
425
+ properties: { className: [`notro-toc-item`, `notro-toc-level-${h.level}`] },
426
+ children: [
427
+ {
428
+ type: 'element' as const,
429
+ tagName: 'a',
430
+ properties: { href: `#${h.id}` },
431
+ children: [{ type: 'text' as const, value: h.text }],
432
+ },
433
+ ],
434
+ }));
435
+
436
+ const tocChildren = [
437
+ {
438
+ type: 'element' as const,
439
+ tagName: 'ul',
440
+ properties: { className: ['notro-toc-list'] },
441
+ children: listItems,
442
+ },
443
+ ];
444
+
445
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
446
+ visit(tree, (node: any) => {
447
+ if (node.type !== 'mdxJsxFlowElement') return;
448
+ if (node.name !== 'TableOfContents') return;
449
+ node.children = tocChildren;
450
+ });
451
+ };
452
+ };
453
+
454
+ // ── Plugin bundle factory ──────────────────────────────────────────────────
455
+
456
+ export type MdxPlugins = {
457
+ remarkPlugins: PluggableList;
458
+ rehypePlugins: PluggableList;
459
+ };
460
+
461
+ /** Returns the remark and rehype plugin configuration for Notion MDX. */
462
+ export function buildMdxPlugins(linkToPages: LinkToPages): MdxPlugins {
463
+ const { remarkPlugins: userRemarkPlugins, rehypePlugins: userRehypePlugins } = getNotroPlugins();
464
+
465
+ return {
466
+ remarkPlugins: [
467
+ remarkNfm,
468
+ ...userRemarkPlugins,
469
+ ],
470
+ rehypePlugins: [
471
+ // rehypeRaw must come first: converts raw HTML strings in mdast into
472
+ // hast element nodes so that subsequent plugins and component mapping
473
+ // can process them (e.g. <table>, <h2 color="...">, etc.).
474
+ // passThrough preserves Notion-specific custom elements that are not
475
+ // valid HTML and would otherwise be stripped by the HTML parser.
476
+ [rehypeRaw, { passThrough: NOTION_CUSTOM_ELEMENTS }],
477
+ // Convert Notion color/underline attributes to CSS classes.
478
+ // MDX component substitution does not apply to HTML elements created by
479
+ // rehypeRaw, so we apply color classes directly here in the hast tree.
480
+ rehypeNotionColorPlugin,
481
+ // Rename Notion block-level elements (video, audio, table_of_contents,
482
+ // columns, etc.) from lowercase to PascalCase so MDX generates a
483
+ // components-map lookup (_jsx(Video, ...)) instead of a plain HTML
484
+ // string literal (_jsx("video", ...)).
485
+ rehypeBlockElementsPlugin,
486
+ // Rename Notion inline mention elements (mention-user, mention-date…)
487
+ // from hyphenated-lowercase to PascalCase for the same reason.
488
+ rehypeInlineMentionsPlugin,
489
+ // User-provided plugins: math, diagrams, syntax highlighting, etc.
490
+ // e.g. notro({ rehypePlugins: [rehypeKatex, [rehypeMermaid, { theme: 'github-dark' }]] })
491
+ // notro({ shikiConfig: { theme: 'github-dark' } }) injects @shikijs/rehype automatically.
492
+ ...userRehypePlugins,
493
+ // rehype-slug adds id attributes to h1–h4 elements.
494
+ // Must run before rehypeTocPlugin, which reads those ids.
495
+ rehypeSlug,
496
+ // Populates TableOfContents with anchor links to all headings.
497
+ rehypeTocPlugin,
498
+ [resolvePageLinksPlugin, { linkToPages }] as const,
499
+ ],
500
+ };
501
+ }