@dogsbay/format-starlight 0.2.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,554 @@
1
+ /**
2
+ * Starlight MDX parser — v2 using markdown-it-mdx-jsx plugin.
3
+ *
4
+ * Replaces the regex-based segmentMdx approach with proper JSX token handling.
5
+ * The markdown-it-mdx-jsx plugin emits jsx_open/jsx_close/jsx_self_closing
6
+ * tokens that are handled alongside standard markdown tokens.
7
+ *
8
+ * Keeps:
9
+ * - Directive conversion (:::note → <Aside>)
10
+ * - Inline transforms (adapter.inlineTransforms)
11
+ * - Steps unwrapping (steps > ordered-list > list-item → steps > step)
12
+ * - Card grouping (consecutive cards → cards container)
13
+ * - The adapter pattern for site-specific component mappings
14
+ */
15
+ import MarkdownIt from "markdown-it";
16
+ import mdxJsx, { normalizeJsxLines } from "@dogsbay/markdown-it-mdx-jsx";
17
+ import { extractFrontmatter } from "./parse.js";
18
+ // ── Component mapping ───────────────────────────────────
19
+ // Starlight built-in components (always available)
20
+ const STARLIGHT_COMPONENTS = {
21
+ Tabs: { type: "tabs", container: true },
22
+ TabItem: {
23
+ type: "tab",
24
+ container: true,
25
+ propsFromAttrs: (attrs) => ({ title: attrs.label || "" }),
26
+ },
27
+ Steps: { type: "steps", container: true },
28
+ Aside: {
29
+ type: "callout",
30
+ container: true,
31
+ propsFromAttrs: (attrs) => ({
32
+ variant: attrs.type || "note",
33
+ title: attrs.title,
34
+ }),
35
+ },
36
+ Card: {
37
+ type: "card",
38
+ container: true,
39
+ propsFromAttrs: (attrs) => ({
40
+ title: attrs.title || "",
41
+ icon: attrs.icon,
42
+ href: attrs.href,
43
+ }),
44
+ },
45
+ CardGrid: { type: "cards", container: true },
46
+ LinkCard: {
47
+ type: "card",
48
+ container: false,
49
+ propsFromAttrs: (attrs) => ({
50
+ title: attrs.title || "",
51
+ description: attrs.description,
52
+ href: attrs.href,
53
+ }),
54
+ },
55
+ LinkButton: {
56
+ type: "link-button",
57
+ container: true,
58
+ propsFromAttrs: (attrs) => ({
59
+ href: attrs.href || "",
60
+ variant: attrs.variant,
61
+ }),
62
+ },
63
+ Badge: {
64
+ type: "badge",
65
+ container: false,
66
+ propsFromAttrs: (attrs) => ({
67
+ text: attrs.text || "",
68
+ variant: attrs.variant,
69
+ }),
70
+ },
71
+ Code: {
72
+ type: "code",
73
+ container: false,
74
+ propsFromAttrs: (attrs) => ({
75
+ code: attrs.code || "",
76
+ lang: attrs.lang || "plaintext",
77
+ title: attrs.title,
78
+ }),
79
+ },
80
+ };
81
+ const CORE_SELF_CLOSING = ["LinkCard", "Badge", "Code"];
82
+ function buildComponentMap(adapter) {
83
+ const map = { ...STARLIGHT_COMPONENTS };
84
+ const selfClosing = [...CORE_SELF_CLOSING];
85
+ if (adapter) {
86
+ Object.assign(map, adapter.components);
87
+ for (const tag of adapter.selfClosingTags ?? []) {
88
+ selfClosing.push(tag);
89
+ }
90
+ }
91
+ return {
92
+ components: Object.keys(map),
93
+ selfClosing,
94
+ map,
95
+ };
96
+ }
97
+ // ── Pre-processing ──────────────────────────────────────
98
+ function applyInlineTransforms(source, adapter) {
99
+ if (!adapter?.inlineTransforms)
100
+ return source;
101
+ let result = source;
102
+ for (const [regex, replacement] of adapter.inlineTransforms) {
103
+ result = result.replace(regex, replacement);
104
+ }
105
+ return result;
106
+ }
107
+ /**
108
+ * Rewrite ~/assets/ Vite alias to relative path.
109
+ */
110
+ function rewriteAssetPaths(source) {
111
+ return source.replace(/~\/assets\//g, "assets/");
112
+ }
113
+ /**
114
+ * Convert Starlight :::note / :::caution / :::tip / :::danger directives
115
+ * to <Aside> component syntax before parsing.
116
+ */
117
+ function convertDirectives(source) {
118
+ const lines = source.split("\n");
119
+ const result = [];
120
+ let i = 0;
121
+ while (i < lines.length) {
122
+ const line = lines[i];
123
+ // Match directives with optional leading whitespace (for indented contexts)
124
+ const directiveMatch = line.match(/^(\s*):::(note|tip|caution|danger|warning|info)(?:\[([^\]]*)\])?\s*$/);
125
+ if (directiveMatch) {
126
+ const indent = directiveMatch[1] || "";
127
+ const variant = directiveMatch[2] === "info" ? "note" : directiveMatch[2];
128
+ const title = directiveMatch[3] || "";
129
+ const titleAttr = title ? ` title="${title}"` : "";
130
+ result.push(`${indent}<Aside type="${variant}"${titleAttr}>`);
131
+ i++;
132
+ while (i < lines.length && lines[i]?.trim() !== ":::") {
133
+ result.push(lines[i]);
134
+ i++;
135
+ }
136
+ result.push(`${indent}</Aside>`);
137
+ if (i < lines.length)
138
+ i++; // skip closing :::
139
+ }
140
+ else {
141
+ result.push(line);
142
+ i++;
143
+ }
144
+ }
145
+ return result.join("\n");
146
+ }
147
+ // ── Token → TreeNode conversion ─────────────────────────
148
+ /**
149
+ * Convert markdown-it tokens (including jsx_* tokens) to TreeNode[].
150
+ * This replaces both segmentsToTree and tokensToTree from the old parser.
151
+ */
152
+ function tokensToTree(tokens, componentMap) {
153
+ const tree = [];
154
+ let i = 0;
155
+ while (i < tokens.length) {
156
+ const token = tokens[i];
157
+ // ── JSX tokens ─────────────────────────────────────
158
+ if (token.type === "jsx_self_closing") {
159
+ const mapping = componentMap[token.tag];
160
+ if (mapping) {
161
+ if (!mapping.strip) {
162
+ const attrs = token.meta?.attrs || {};
163
+ const props = mapping.propsFromAttrs?.(attrs) || {};
164
+ tree.push({ type: mapping.type, props, children: [] });
165
+ }
166
+ }
167
+ i++;
168
+ continue;
169
+ }
170
+ if (token.type === "jsx_open") {
171
+ const tag = token.tag;
172
+ const mapping = componentMap[tag];
173
+ // Collect children until matching jsx_close
174
+ const children = [];
175
+ let depth = 1;
176
+ i++;
177
+ while (i < tokens.length && depth > 0) {
178
+ if (tokens[i].type === "jsx_open" && tokens[i].tag === tag) {
179
+ depth++;
180
+ }
181
+ else if (tokens[i].type === "jsx_close" && tokens[i].tag === tag) {
182
+ depth--;
183
+ if (depth === 0) {
184
+ i++; // skip close token
185
+ break;
186
+ }
187
+ }
188
+ children.push(tokens[i]);
189
+ i++;
190
+ }
191
+ if (mapping) {
192
+ if (mapping.strip) {
193
+ // Strip but keep inner content parsed
194
+ if (mapping.container && children.length > 0) {
195
+ const innerNodes = tokensToTree(children, componentMap);
196
+ tree.push(...innerNodes);
197
+ }
198
+ }
199
+ else {
200
+ const attrs = token.meta?.attrs || {};
201
+ const props = mapping.propsFromAttrs?.(attrs) || {};
202
+ const childNodes = tokensToTree(children, componentMap);
203
+ tree.push({ type: mapping.type, props, children: childNodes });
204
+ }
205
+ }
206
+ else {
207
+ // Unknown component — parse inner content as markdown
208
+ const childNodes = tokensToTree(children, componentMap);
209
+ tree.push(...childNodes);
210
+ }
211
+ continue;
212
+ }
213
+ if (token.type === "jsx_close") {
214
+ // Orphan close token — skip (shouldn't happen with proper nesting)
215
+ i++;
216
+ continue;
217
+ }
218
+ // ── Standard markdown tokens ───────────────────────
219
+ if (token.type === "heading_open") {
220
+ const level = parseInt(token.tag.slice(1), 10);
221
+ const inline = tokens[i + 1];
222
+ const text = inline?.content || "";
223
+ const slug = text
224
+ .toLowerCase()
225
+ .replace(/[^a-z0-9]+/g, "-")
226
+ .replace(/^-|-$/g, "");
227
+ tree.push({
228
+ type: "heading",
229
+ props: { level, text, slug },
230
+ inline: inline ? inlineTokensToInline(inline.children || []) : [],
231
+ });
232
+ i += 3; // heading_open, inline, heading_close
233
+ continue;
234
+ }
235
+ if (token.type === "paragraph_open") {
236
+ const inline = tokens[i + 1];
237
+ if (inline?.children?.length) {
238
+ tree.push({
239
+ type: "paragraph",
240
+ children: [
241
+ {
242
+ type: "prose",
243
+ inline: inlineTokensToInline(inline.children),
244
+ },
245
+ ],
246
+ });
247
+ }
248
+ i += 3; // paragraph_open, inline, paragraph_close
249
+ continue;
250
+ }
251
+ if (token.type === "fence") {
252
+ tree.push({
253
+ type: "code",
254
+ props: {
255
+ code: token.content.replace(/\n$/, ""),
256
+ lang: token.info?.split(/\s/)[0] || "plaintext",
257
+ title: token.info?.match(/title="([^"]*)"/)?.[1],
258
+ },
259
+ });
260
+ i++;
261
+ continue;
262
+ }
263
+ if (token.type === "code_block") {
264
+ tree.push({
265
+ type: "code",
266
+ props: { code: token.content.replace(/\n$/, ""), lang: "plaintext" },
267
+ });
268
+ i++;
269
+ continue;
270
+ }
271
+ if (token.type === "bullet_list_open" ||
272
+ token.type === "ordered_list_open") {
273
+ const listType = token.type === "ordered_list_open"
274
+ ? "ordered-list"
275
+ : "unordered-list";
276
+ const start = token.type === "ordered_list_open"
277
+ ? parseInt(token.attrGet("start") || "1", 10)
278
+ : undefined;
279
+ const items = [];
280
+ i++;
281
+ while (i < tokens.length &&
282
+ tokens[i].type !== "bullet_list_close" &&
283
+ tokens[i].type !== "ordered_list_close") {
284
+ if (tokens[i].type === "list_item_open") {
285
+ i++;
286
+ // Collect all tokens until matching list_item_close (track depth for nested lists)
287
+ const itemTokens = [];
288
+ let liDepth = 1;
289
+ while (i < tokens.length && liDepth > 0) {
290
+ if (tokens[i].type === "list_item_open")
291
+ liDepth++;
292
+ if (tokens[i].type === "list_item_close") {
293
+ liDepth--;
294
+ if (liDepth === 0)
295
+ break; // This is OUR close, don't include it
296
+ }
297
+ itemTokens.push(tokens[i]);
298
+ i++;
299
+ }
300
+ // Recursively parse the list item content using tokensToTree
301
+ // This handles nested lists, JSX components, paragraphs, etc.
302
+ const itemChildren = tokensToTree(itemTokens, componentMap);
303
+ items.push({ type: "list-item", children: itemChildren });
304
+ i++; // skip list_item_close
305
+ }
306
+ else {
307
+ i++;
308
+ }
309
+ }
310
+ i++; // skip list close
311
+ const listProps = start && start !== 1 ? { start } : undefined;
312
+ tree.push({ type: listType, props: listProps, children: items });
313
+ continue;
314
+ }
315
+ if (token.type === "table_open") {
316
+ const tableNode = parseTable(tokens, i);
317
+ tree.push(tableNode.node);
318
+ i = tableNode.endIndex;
319
+ continue;
320
+ }
321
+ if (token.type === "hr") {
322
+ tree.push({ type: "thematic-break" });
323
+ i++;
324
+ continue;
325
+ }
326
+ if (token.type === "blockquote_open") {
327
+ const children = [];
328
+ i++;
329
+ while (i < tokens.length && tokens[i].type !== "blockquote_close") {
330
+ const nested = tokensToTree(tokens.slice(i, i + 1), componentMap);
331
+ children.push(...nested);
332
+ i++;
333
+ }
334
+ i++; // skip close
335
+ tree.push({ type: "blockquote", children });
336
+ continue;
337
+ }
338
+ if (token.type === "html_block") {
339
+ // Check if this is a leftover JSX component that wasn't caught
340
+ const trimmed = token.content.trim();
341
+ if (trimmed.match(/^<[A-Z]/)) {
342
+ // Likely a JSX component — skip it (already handled or unrecognized)
343
+ i++;
344
+ continue;
345
+ }
346
+ tree.push({ type: "html", html: token.content });
347
+ i++;
348
+ continue;
349
+ }
350
+ // Skip unhandled tokens
351
+ i++;
352
+ }
353
+ return tree;
354
+ }
355
+ function parseTable(tokens, start) {
356
+ const rows = [];
357
+ let i = start + 1; // skip table_open
358
+ let inHead = false;
359
+ while (i < tokens.length && tokens[i].type !== "table_close") {
360
+ if (tokens[i].type === "thead_open") {
361
+ inHead = true;
362
+ i++;
363
+ continue;
364
+ }
365
+ if (tokens[i].type === "thead_close") {
366
+ inHead = false;
367
+ i++;
368
+ continue;
369
+ }
370
+ if (tokens[i].type === "tbody_open" ||
371
+ tokens[i].type === "tbody_close") {
372
+ i++;
373
+ continue;
374
+ }
375
+ if (tokens[i].type === "tr_open") {
376
+ const cells = [];
377
+ i++;
378
+ while (i < tokens.length && tokens[i].type !== "tr_close") {
379
+ if (tokens[i].type === "th_open" || tokens[i].type === "td_open") {
380
+ const cellType = inHead ? "th" : "td";
381
+ const inline = tokens[i + 1];
382
+ const inlineNodes = inline?.type === "inline"
383
+ ? inlineTokensToInline(inline.children || [])
384
+ : [];
385
+ cells.push({
386
+ type: cellType,
387
+ children: [{ type: "prose", inline: inlineNodes }],
388
+ });
389
+ i += 3; // open, inline, close
390
+ }
391
+ else {
392
+ i++;
393
+ }
394
+ }
395
+ rows.push({ type: "tr", children: cells });
396
+ i++; // skip tr_close
397
+ continue;
398
+ }
399
+ i++;
400
+ }
401
+ return {
402
+ node: { type: "table", children: rows },
403
+ endIndex: i + 1,
404
+ };
405
+ }
406
+ // ── Inline token conversion ─────────────────────────────
407
+ function inlineTokensToInline(tokens) {
408
+ const result = [];
409
+ let bold = false;
410
+ let italic = false;
411
+ let inLink = false;
412
+ let linkHref = "";
413
+ let linkChildren = [];
414
+ const push = (node) => {
415
+ if (inLink)
416
+ linkChildren.push(node);
417
+ else
418
+ result.push(node);
419
+ };
420
+ for (const token of tokens) {
421
+ if (token.type === "text") {
422
+ push({ type: "text", text: token.content, bold, italic });
423
+ }
424
+ else if (token.type === "code_inline") {
425
+ push({ type: "code", text: token.content });
426
+ }
427
+ else if (token.type === "softbreak" || token.type === "hardbreak") {
428
+ push({ type: "text", text: "\n" });
429
+ }
430
+ else if (token.type === "strong_open") {
431
+ bold = true;
432
+ }
433
+ else if (token.type === "strong_close") {
434
+ bold = false;
435
+ }
436
+ else if (token.type === "em_open") {
437
+ italic = true;
438
+ }
439
+ else if (token.type === "em_close") {
440
+ italic = false;
441
+ }
442
+ else if (token.type === "link_open") {
443
+ linkHref = token.attrGet("href") || "";
444
+ linkChildren = [];
445
+ inLink = true;
446
+ }
447
+ else if (token.type === "link_close") {
448
+ if (inLink) {
449
+ result.push({
450
+ type: "link",
451
+ href: linkHref,
452
+ children: linkChildren,
453
+ });
454
+ inLink = false;
455
+ linkHref = "";
456
+ linkChildren = [];
457
+ }
458
+ }
459
+ else if (token.type === "image") {
460
+ push({
461
+ type: "image",
462
+ src: token.attrGet("src") || "",
463
+ alt: token.content || token.attrGet("alt") || "",
464
+ });
465
+ }
466
+ else if (token.type === "html_inline") {
467
+ push({ type: "text", text: token.content });
468
+ }
469
+ else if (token.type === "jsx_inline_self_closing" ||
470
+ token.type === "jsx_inline_open" ||
471
+ token.type === "jsx_inline_close") {
472
+ // Inline JSX tokens — skip (stripped components like <Render />)
473
+ }
474
+ }
475
+ return result;
476
+ }
477
+ // ── Post-processing ─────────────────────────────────────
478
+ function unwrapStepsOrderedList(nodes) {
479
+ return nodes.map((node) => {
480
+ if (node.children) {
481
+ node.children = unwrapStepsOrderedList(node.children);
482
+ }
483
+ if (node.type !== "steps")
484
+ return node;
485
+ const children = node.children ?? [];
486
+ if (children.length === 1 &&
487
+ children[0].type === "ordered-list" &&
488
+ children[0].children) {
489
+ node.children = children[0].children.map((listItem) => ({
490
+ type: "step",
491
+ props: {},
492
+ children: listItem.children,
493
+ }));
494
+ }
495
+ return node;
496
+ });
497
+ }
498
+ function groupConsecutiveCards(tree) {
499
+ const result = [];
500
+ let cardBuf = [];
501
+ function flushCards() {
502
+ if (cardBuf.length > 0) {
503
+ result.push({ type: "cards", children: [...cardBuf] });
504
+ cardBuf = [];
505
+ }
506
+ }
507
+ for (const node of tree) {
508
+ if (node.type === "card") {
509
+ cardBuf.push(node);
510
+ }
511
+ else {
512
+ flushCards();
513
+ result.push(node);
514
+ }
515
+ }
516
+ flushCards();
517
+ return result;
518
+ }
519
+ export function starlightToTree(source, options) {
520
+ const adapter = options?.adapter;
521
+ const { components, selfClosing, map: componentMap } = buildComponentMap(adapter);
522
+ // Strip frontmatter block before parsing. Without this, markdown-it sees
523
+ // `---...---` as thematic-break + paragraph + thematic-break, which leaks the
524
+ // YAML into the rendered body. Frontmatter is extracted separately at the
525
+ // CLI level; here we only need to remove it from the content stream.
526
+ const { body } = extractFrontmatter(source);
527
+ // Pre-process source
528
+ // Normalize tabs to spaces — MDX source often mixes tabs and spaces
529
+ let cleaned = body.replace(/\t/g, " ");
530
+ // Resolve include/partial tags via adapter (e.g. Cloudflare's <Render>)
531
+ if (adapter?.resolveIncludes) {
532
+ cleaned = adapter.resolveIncludes(cleaned, options?.partialsDir);
533
+ }
534
+ cleaned = applyInlineTransforms(cleaned, adapter);
535
+ cleaned = rewriteAssetPaths(cleaned);
536
+ cleaned = convertDirectives(cleaned);
537
+ // Strip raw HTML div wrappers
538
+ cleaned = cleaned.replace(/<div[^>]*>\s*\n?/g, "");
539
+ cleaned = cleaned.replace(/<\/div>\s*\n?/g, "");
540
+ // Normalize JSX lines — split multi-tag lines for the line-based parser
541
+ cleaned = normalizeJsxLines(cleaned);
542
+ // Create markdown-it instance with JSX plugin
543
+ // The plugin handles import stripping, JSX comments, and component tokenization
544
+ const md = new MarkdownIt({ html: true, linkify: true }).use(mdxJsx, {
545
+ components,
546
+ selfClosing,
547
+ });
548
+ // Parse into tokens (including jsx_* tokens)
549
+ const tokens = md.parse(cleaned, {});
550
+ // Convert tokens to tree
551
+ const tree = tokensToTree(tokens, componentMap);
552
+ const unwrapped = unwrapStepsOrderedList(tree);
553
+ return groupConsecutiveCards(unwrapped);
554
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Starlight MDX parser — parse Starlight source .mdx files into TreeNode[].
3
+ *
4
+ * Handles:
5
+ * - YAML frontmatter extraction (title, sidebar.order, sidebar.label)
6
+ * - Import line stripping (import { ... } from "~/components")
7
+ * - Starlight directive admonitions (:::note, :::caution, :::tip, :::danger)
8
+ * - Starlight JSX components (Tabs, TabItem, Steps, Card, etc.)
9
+ * - Cloudflare custom components (Feature, Plan, Description, etc.) via customization layer
10
+ * - Standard CommonMark (headings, paragraphs, lists, tables, code, links)
11
+ */
12
+ import type { TreeNode } from "@dogsbay/types";
13
+ import type { SiteAdapter } from "./adapter.js";
14
+ export interface StarlightFrontmatter {
15
+ title: string;
16
+ sidebarLabel?: string;
17
+ sidebarOrder?: number;
18
+ sidebarHidden?: boolean;
19
+ /** sidebar.group.hideIndex — when true, index page is not shown as child in group */
20
+ hideIndex?: boolean;
21
+ description?: string;
22
+ contentType?: string;
23
+ }
24
+ export declare function extractFrontmatter(source: string): {
25
+ frontmatter: StarlightFrontmatter;
26
+ /** Full YAML as a flat record — includes all fields, not just the typed subset. */
27
+ raw: Record<string, unknown>;
28
+ body: string;
29
+ };
30
+ export interface ParseOptions {
31
+ /** Site-specific adapter for custom component mappings */
32
+ adapter?: SiteAdapter;
33
+ }
34
+ export declare function starlightToTree(source: string, options?: ParseOptions): TreeNode[];