@dogsbay/format-starlight 0.2.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/parse.js ADDED
@@ -0,0 +1,729 @@
1
+ /**
2
+ * Starlight MDX parser — parse Starlight source .mdx files into TreeNode[].
3
+ *
4
+ * Handles:
5
+ * - YAML frontmatter extraction (title, sidebar.order, sidebar.label)
6
+ * - Import line stripping (import { ... } from "~/components")
7
+ * - Starlight directive admonitions (:::note, :::caution, :::tip, :::danger)
8
+ * - Starlight JSX components (Tabs, TabItem, Steps, Card, etc.)
9
+ * - Cloudflare custom components (Feature, Plan, Description, etc.) via customization layer
10
+ * - Standard CommonMark (headings, paragraphs, lists, tables, code, links)
11
+ */
12
+ import MarkdownIt from "markdown-it";
13
+ import matter from "gray-matter";
14
+ const md = new MarkdownIt({ html: true, linkify: true });
15
+ export function extractFrontmatter(source) {
16
+ const lines = source.split("\n");
17
+ if (lines[0]?.trim() !== "---") {
18
+ return { frontmatter: { title: "" }, raw: {}, body: source };
19
+ }
20
+ let end = 1;
21
+ while (end < lines.length && lines[end]?.trim() !== "---") {
22
+ end++;
23
+ }
24
+ const yamlBlock = lines.slice(1, end).join("\n");
25
+ const body = lines.slice(end + 1).join("\n");
26
+ // Parse YAML via gray-matter for a full record that preserves custom fields.
27
+ let raw = {};
28
+ try {
29
+ const result = matter(source);
30
+ raw = result.data ?? {};
31
+ }
32
+ catch {
33
+ // Fall back to regex-based extraction below if YAML is malformed.
34
+ }
35
+ // Build the typed subset for backwards compatibility. Prefer gray-matter
36
+ // values when available; otherwise fall back to the legacy regex extraction.
37
+ const stripQuotes = (s) => s?.replace(/^["']|["']$/g, "");
38
+ const get = (key) => {
39
+ const match = yamlBlock.match(new RegExp(`^${key}:\\s*(.+)$`, "m"));
40
+ return stripQuotes(match?.[1]?.trim());
41
+ };
42
+ const getIndented = (parent, child) => {
43
+ const re = new RegExp(`^${parent}:\\s*\\n(?:[ \\t]+\\S.*\\n)*?[ \\t]+${child}:\\s*(.+)$`, "m");
44
+ const match = yamlBlock.match(re);
45
+ return stripQuotes(match?.[1]?.trim());
46
+ };
47
+ const rawSidebar = raw.sidebar;
48
+ const frontmatter = {
49
+ title: raw.title ?? get("title") ?? "",
50
+ description: raw.description ?? get("description"),
51
+ sidebarLabel: rawSidebar?.label ?? getIndented("sidebar", "label"),
52
+ sidebarOrder: rawSidebar?.order ??
53
+ (getIndented("sidebar", "order")
54
+ ? parseInt(getIndented("sidebar", "order"), 10)
55
+ : undefined),
56
+ sidebarHidden: rawSidebar?.hidden ??
57
+ (getIndented("sidebar", "hidden") === "true"),
58
+ hideIndex: extractHideIndex(raw) ?? yamlBlock.includes("hideIndex: true"),
59
+ contentType: raw.pcx_content_type ?? get("pcx_content_type"),
60
+ };
61
+ return { frontmatter, raw, body };
62
+ }
63
+ function extractHideIndex(raw) {
64
+ const sidebar = raw.sidebar;
65
+ const group = sidebar?.group;
66
+ return group?.hideIndex;
67
+ }
68
+ // ── Component mapping ───────────────────────────────────
69
+ // Starlight built-in components (always available)
70
+ const STARLIGHT_COMPONENTS = {
71
+ Tabs: { type: "tabs", container: true },
72
+ TabItem: {
73
+ type: "tab",
74
+ container: true,
75
+ propsFromAttrs: (attrs) => ({ title: attrs.label || "" }),
76
+ },
77
+ Steps: { type: "steps", container: true },
78
+ Aside: {
79
+ type: "callout",
80
+ container: true,
81
+ propsFromAttrs: (attrs) => ({
82
+ variant: attrs.type || "note",
83
+ title: attrs.title,
84
+ }),
85
+ },
86
+ Card: {
87
+ type: "card",
88
+ container: true,
89
+ propsFromAttrs: (attrs) => ({
90
+ title: attrs.title || "",
91
+ icon: attrs.icon,
92
+ href: attrs.href,
93
+ }),
94
+ },
95
+ CardGrid: { type: "cards", container: true },
96
+ LinkCard: {
97
+ type: "card",
98
+ container: false,
99
+ propsFromAttrs: (attrs) => ({
100
+ title: attrs.title || "",
101
+ description: attrs.description,
102
+ href: attrs.href,
103
+ }),
104
+ },
105
+ Badge: {
106
+ type: "badge",
107
+ container: false,
108
+ propsFromAttrs: (attrs) => ({
109
+ text: attrs.text || "",
110
+ variant: attrs.variant,
111
+ }),
112
+ },
113
+ Code: {
114
+ type: "code",
115
+ container: false,
116
+ propsFromAttrs: (attrs) => ({
117
+ code: attrs.code || "",
118
+ lang: attrs.lang || "plaintext",
119
+ title: attrs.title,
120
+ }),
121
+ },
122
+ };
123
+ // Core self-closing tags (Starlight built-ins)
124
+ const CORE_SELF_CLOSING = new Set(["LinkCard", "Badge", "Code"]);
125
+ /**
126
+ * Build the component map and self-closing set from core + adapter.
127
+ */
128
+ function buildComponentMap(adapter) {
129
+ const map = { ...STARLIGHT_COMPONENTS };
130
+ const selfClosing = new Set(CORE_SELF_CLOSING);
131
+ if (adapter) {
132
+ Object.assign(map, adapter.components);
133
+ for (const tag of adapter.selfClosingTags ?? []) {
134
+ selfClosing.add(tag);
135
+ }
136
+ }
137
+ return { map, selfClosing };
138
+ }
139
+ // ── Import stripping ────────────────────────────────────
140
+ function stripImports(source) {
141
+ // Strip single-line imports: import { X } from "y";
142
+ // Strip multi-line imports: import {\n X,\n Y,\n} from "y";
143
+ // Rewrite ~/assets/ Vite alias to relative assets/ path
144
+ return source
145
+ .replace(/^import\s+\{[^}]*\}\s+from\s+["'][^"']*["']\s*;?\s*$/gm, "")
146
+ .replace(/^import\s+\{[\s\S]*?\}\s+from\s+["'][^"']*["']\s*;?\s*$/gm, "")
147
+ .replace(/~\/assets\//g, "assets/")
148
+ .replace(/\{\/\*[\s\S]*?\*\/\}/g, "")
149
+ .replace(/\n{3,}/g, "\n\n");
150
+ }
151
+ /**
152
+ * Apply adapter's inline component transforms to source text.
153
+ * These handle components that appear within prose (not on their own line)
154
+ * and must be transformed before the block-level segmenter runs.
155
+ */
156
+ function applyInlineTransforms(source, adapter) {
157
+ if (!adapter?.inlineTransforms)
158
+ return source;
159
+ let result = source;
160
+ for (const [regex, replacement] of adapter.inlineTransforms) {
161
+ result = result.replace(regex, replacement);
162
+ }
163
+ return result;
164
+ }
165
+ // ── Directive parsing ───────────────────────────────────
166
+ /**
167
+ * Convert Starlight :::note / :::caution / :::tip / :::danger directives
168
+ * to component-style segments before the main parser runs.
169
+ *
170
+ * Input:
171
+ * :::note[Custom title]
172
+ * Content here
173
+ * :::
174
+ *
175
+ * Output:
176
+ * <Aside type="note" title="Custom title">
177
+ * Content here
178
+ * </Aside>
179
+ */
180
+ function convertDirectives(source) {
181
+ const lines = source.split("\n");
182
+ const result = [];
183
+ let i = 0;
184
+ while (i < lines.length) {
185
+ const line = lines[i];
186
+ const directiveMatch = line.match(/^:::(note|tip|caution|danger|warning)(?:\[([^\]]*)\])?\s*$/);
187
+ if (directiveMatch) {
188
+ const variant = directiveMatch[1];
189
+ const title = directiveMatch[2] || "";
190
+ const titleAttr = title ? ` title="${title}"` : "";
191
+ result.push(`<Aside type="${variant}"${titleAttr}>`);
192
+ i++;
193
+ // Collect until closing :::
194
+ while (i < lines.length && lines[i]?.trim() !== ":::") {
195
+ result.push(lines[i]);
196
+ i++;
197
+ }
198
+ result.push("</Aside>");
199
+ if (i < lines.length)
200
+ i++; // skip closing :::
201
+ }
202
+ else {
203
+ result.push(line);
204
+ i++;
205
+ }
206
+ }
207
+ return result.join("\n");
208
+ }
209
+ /**
210
+ * Split source into alternating markdown and component segments.
211
+ */
212
+ function segmentMdx(source, componentMap, selfClosing) {
213
+ const segments = [];
214
+ const lines = source.split("\n");
215
+ let markdownBuf = [];
216
+ let inFence = false;
217
+ let fenceChar = "";
218
+ let i = 0;
219
+ function flushMarkdown() {
220
+ const text = markdownBuf.join("\n").trim();
221
+ if (text)
222
+ segments.push({ kind: "markdown", content: text });
223
+ markdownBuf = [];
224
+ }
225
+ while (i < lines.length) {
226
+ const line = lines[i];
227
+ // Track fenced code blocks
228
+ const fenceMatch = line.match(/^(`{3,}|~{3,})/);
229
+ if (fenceMatch) {
230
+ if (!inFence) {
231
+ inFence = true;
232
+ fenceChar = fenceMatch[1][0];
233
+ }
234
+ else if (line.startsWith(fenceChar.repeat(3)) && line.trim().length <= fenceMatch[1].length + 1) {
235
+ inFence = false;
236
+ }
237
+ markdownBuf.push(line);
238
+ i++;
239
+ continue;
240
+ }
241
+ if (inFence) {
242
+ markdownBuf.push(line);
243
+ i++;
244
+ continue;
245
+ }
246
+ // Detect opening component tag (single-line: <Tag attrs> or <Tag attrs />)
247
+ // Use trimmed line to handle indented tags (e.g. <Details> inside list items)
248
+ const trimmed = line.trim();
249
+ const tagMatch = trimmed.match(/^<(\w+)(\s[^>]*)?\s*(\/?)>/);
250
+ // Also detect multi-line opening tag: <Tag (no closing > on this line)
251
+ const multiLineOpen = !tagMatch && trimmed.match(/^<(\w+)(\s.*)?$/);
252
+ if (tagMatch || multiLineOpen) {
253
+ const tag = tagMatch ? tagMatch[1] : multiLineOpen[1];
254
+ let attrsStr = tagMatch ? (tagMatch[2] || "") : (multiLineOpen[2] || "");
255
+ let selfClose = tagMatch ? tagMatch[3] === "/" : false;
256
+ let endLine = i;
257
+ // For multi-line opening tags, collect until we find > or />
258
+ if (multiLineOpen && !tagMatch) {
259
+ let j = i + 1;
260
+ while (j < lines.length) {
261
+ attrsStr += " " + lines[j].trim();
262
+ if (lines[j].trim().endsWith("/>")) {
263
+ selfClose = true;
264
+ endLine = j;
265
+ break;
266
+ }
267
+ if (lines[j].trim().endsWith(">")) {
268
+ endLine = j;
269
+ break;
270
+ }
271
+ j++;
272
+ }
273
+ // Clean up attrs — remove trailing > or />
274
+ attrsStr = attrsStr.replace(/\s*\/?>$/, "");
275
+ }
276
+ const mapping = componentMap[tag];
277
+ if (mapping) {
278
+ flushMarkdown();
279
+ const attrs = parseAttrs(attrsStr);
280
+ if (selfClose || selfClosing.has(tag)) {
281
+ segments.push({ kind: "component", content: line, tag, attrs });
282
+ }
283
+ else {
284
+ // Collect until closing tag
285
+ const inner = collectUntilClose(lines, endLine + 1, tag);
286
+ segments.push({
287
+ kind: "component",
288
+ content: line,
289
+ tag,
290
+ attrs,
291
+ inner: inner.content,
292
+ });
293
+ i = inner.endIndex;
294
+ continue;
295
+ }
296
+ i = endLine + 1;
297
+ continue;
298
+ }
299
+ }
300
+ // Check for single-line component: <Tag ...>content</Tag>
301
+ const singleLineMatch = trimmed.match(/^<(\w+)(\s[^>]*)?>(.+?)<\/\1>\s*$/);
302
+ if (singleLineMatch) {
303
+ const tag = singleLineMatch[1];
304
+ const mapping = componentMap[tag];
305
+ if (mapping) {
306
+ flushMarkdown();
307
+ const attrs = parseAttrs(singleLineMatch[2] || "");
308
+ segments.push({
309
+ kind: "component",
310
+ content: line,
311
+ tag,
312
+ attrs,
313
+ inner: singleLineMatch[3],
314
+ });
315
+ i++;
316
+ continue;
317
+ }
318
+ }
319
+ markdownBuf.push(line);
320
+ i++;
321
+ }
322
+ flushMarkdown();
323
+ return segments;
324
+ }
325
+ function parseAttrs(str) {
326
+ const attrs = {};
327
+ const re = /(\w+)(?:=(?:"([^"]*)"|'([^']*)'|\{([^}]*)\}))?/g;
328
+ let m;
329
+ while ((m = re.exec(str))) {
330
+ attrs[m[1]] = m[2] ?? m[3] ?? m[4] ?? "true";
331
+ }
332
+ return attrs;
333
+ }
334
+ function collectUntilClose(lines, start, tag) {
335
+ let depth = 1;
336
+ let i = start;
337
+ const collected = [];
338
+ while (i < lines.length && depth > 0) {
339
+ const line = lines[i];
340
+ const trimmed = line.trim();
341
+ // Opening tag (same tag name) — match after trimming whitespace
342
+ if (trimmed.match(new RegExp(`^<${tag}(\\s|>)`)) && !trimmed.endsWith("/>")) {
343
+ depth++;
344
+ }
345
+ // Closing tag
346
+ if (trimmed.match(new RegExp(`^</${tag}\\s*>`))) {
347
+ depth--;
348
+ if (depth === 0) {
349
+ i++;
350
+ break;
351
+ }
352
+ }
353
+ collected.push(line);
354
+ i++;
355
+ }
356
+ // Dedent: strip common leading whitespace
357
+ const nonEmpty = collected.filter((l) => l.trim().length > 0);
358
+ if (nonEmpty.length > 0) {
359
+ const minIndent = Math.min(...nonEmpty.map((l) => l.match(/^(\s*)/)?.[1].length ?? 0));
360
+ if (minIndent > 0) {
361
+ return {
362
+ content: collected.map((l) => l.slice(minIndent)).join("\n"),
363
+ endIndex: i,
364
+ };
365
+ }
366
+ }
367
+ return { content: collected.join("\n"), endIndex: i };
368
+ }
369
+ // ── Segments → TreeNode[] ───────────────────────────────
370
+ function segmentsToTree(segments, componentMap, adapter) {
371
+ const tree = [];
372
+ for (const seg of segments) {
373
+ if (seg.kind === "markdown") {
374
+ const nodes = markdownToTree(seg.content, adapter);
375
+ tree.push(...nodes);
376
+ }
377
+ else if (seg.kind === "component" && seg.tag) {
378
+ const mapping = componentMap[seg.tag];
379
+ if (!mapping)
380
+ continue;
381
+ if (mapping.strip) {
382
+ // For GlossaryTooltip etc — parse inner content as markdown and include it
383
+ if (seg.inner && mapping.container) {
384
+ const innerNodes = markdownToTree(seg.inner, adapter);
385
+ tree.push(...innerNodes);
386
+ }
387
+ continue;
388
+ }
389
+ const props = mapping.propsFromAttrs?.(seg.attrs || {}) || {};
390
+ const children = seg.inner ? starlightToTree(seg.inner, { adapter }) : [];
391
+ tree.push({ type: mapping.type, props, children });
392
+ }
393
+ }
394
+ return tree;
395
+ }
396
+ // ── Markdown → TreeNode[] (via markdown-it) ─────────────
397
+ function markdownToTree(source, adapter) {
398
+ // Apply inline transforms to markdown content too (catches components inside collected inner content)
399
+ let cleaned = applyInlineTransforms(source, adapter);
400
+ // Strip any remaining unrecognized self-closing components
401
+ cleaned = cleaned.replace(/<[A-Z]\w+\s[^>]*\/>\s*\n?/g, "");
402
+ const tokens = md.parse(cleaned, {});
403
+ return tokensToTree(tokens);
404
+ }
405
+ function tokensToTree(tokens) {
406
+ const tree = [];
407
+ let i = 0;
408
+ while (i < tokens.length) {
409
+ const token = tokens[i];
410
+ if (token.type === "heading_open") {
411
+ const level = parseInt(token.tag.slice(1), 10);
412
+ const inline = tokens[i + 1];
413
+ const text = inline?.content || "";
414
+ const slug = text
415
+ .toLowerCase()
416
+ .replace(/[^a-z0-9]+/g, "-")
417
+ .replace(/^-|-$/g, "");
418
+ tree.push({
419
+ type: "heading",
420
+ props: { level, text, slug },
421
+ inline: inline ? inlineTokensToInline(inline.children || []) : [],
422
+ });
423
+ i += 3; // heading_open, inline, heading_close
424
+ continue;
425
+ }
426
+ if (token.type === "paragraph_open") {
427
+ const inline = tokens[i + 1];
428
+ if (inline?.children?.length) {
429
+ tree.push({
430
+ type: "paragraph",
431
+ children: [
432
+ {
433
+ type: "prose",
434
+ inline: inlineTokensToInline(inline.children),
435
+ },
436
+ ],
437
+ });
438
+ }
439
+ i += 3; // paragraph_open, inline, paragraph_close
440
+ continue;
441
+ }
442
+ if (token.type === "fence") {
443
+ tree.push({
444
+ type: "code",
445
+ props: {
446
+ code: token.content.replace(/\n$/, ""),
447
+ lang: token.info?.split(/\s/)[0] || "plaintext",
448
+ title: token.info?.match(/title="([^"]*)"/)?.[1],
449
+ },
450
+ });
451
+ i++;
452
+ continue;
453
+ }
454
+ if (token.type === "code_block") {
455
+ tree.push({
456
+ type: "code",
457
+ props: { code: token.content.replace(/\n$/, ""), lang: "plaintext" },
458
+ });
459
+ i++;
460
+ continue;
461
+ }
462
+ if (token.type === "bullet_list_open" || token.type === "ordered_list_open") {
463
+ const listType = token.type === "ordered_list_open" ? "ordered-list" : "unordered-list";
464
+ const items = [];
465
+ i++;
466
+ while (i < tokens.length && tokens[i].type !== "bullet_list_close" && tokens[i].type !== "ordered_list_close") {
467
+ if (tokens[i].type === "list_item_open") {
468
+ i++;
469
+ const itemChildren = [];
470
+ while (i < tokens.length && tokens[i].type !== "list_item_close") {
471
+ if (tokens[i].type === "paragraph_open") {
472
+ const inline = tokens[i + 1];
473
+ if (inline?.children?.length) {
474
+ itemChildren.push({
475
+ type: "paragraph",
476
+ children: [
477
+ { type: "prose", inline: inlineTokensToInline(inline.children) },
478
+ ],
479
+ });
480
+ }
481
+ i += 3;
482
+ }
483
+ else {
484
+ // Nested lists, etc.
485
+ const nested = tokensToTree([tokens[i]]);
486
+ itemChildren.push(...nested);
487
+ i++;
488
+ }
489
+ }
490
+ items.push({ type: "list-item", children: itemChildren });
491
+ i++; // skip list_item_close
492
+ }
493
+ else {
494
+ i++;
495
+ }
496
+ }
497
+ i++; // skip list close
498
+ tree.push({ type: listType, children: items });
499
+ continue;
500
+ }
501
+ if (token.type === "table_open") {
502
+ const tableNode = parseTable(tokens, i);
503
+ tree.push(tableNode.node);
504
+ i = tableNode.endIndex;
505
+ continue;
506
+ }
507
+ if (token.type === "hr") {
508
+ tree.push({ type: "thematic-break" });
509
+ i++;
510
+ continue;
511
+ }
512
+ if (token.type === "blockquote_open") {
513
+ const children = [];
514
+ i++;
515
+ while (i < tokens.length && tokens[i].type !== "blockquote_close") {
516
+ const nested = tokensToTree([tokens[i]]);
517
+ children.push(...nested);
518
+ i++;
519
+ }
520
+ i++; // skip close
521
+ tree.push({ type: "blockquote", children });
522
+ continue;
523
+ }
524
+ if (token.type === "html_block") {
525
+ tree.push({ type: "html", html: token.content });
526
+ i++;
527
+ continue;
528
+ }
529
+ // Skip unhandled tokens
530
+ i++;
531
+ }
532
+ return tree;
533
+ }
534
+ function parseTable(tokens, start) {
535
+ const rows = [];
536
+ let i = start + 1; // skip table_open
537
+ let inHead = false;
538
+ while (i < tokens.length && tokens[i].type !== "table_close") {
539
+ if (tokens[i].type === "thead_open") {
540
+ inHead = true;
541
+ i++;
542
+ continue;
543
+ }
544
+ if (tokens[i].type === "thead_close") {
545
+ inHead = false;
546
+ i++;
547
+ continue;
548
+ }
549
+ if (tokens[i].type === "tbody_open" || tokens[i].type === "tbody_close") {
550
+ i++;
551
+ continue;
552
+ }
553
+ if (tokens[i].type === "tr_open") {
554
+ const cells = [];
555
+ i++;
556
+ while (i < tokens.length && tokens[i].type !== "tr_close") {
557
+ if (tokens[i].type === "th_open" || tokens[i].type === "td_open") {
558
+ const cellType = inHead ? "th" : "td";
559
+ const inline = tokens[i + 1];
560
+ const inlineNodes = inline?.type === "inline"
561
+ ? inlineTokensToInline(inline.children || [])
562
+ : [];
563
+ cells.push({
564
+ type: cellType,
565
+ children: [{ type: "prose", inline: inlineNodes }],
566
+ });
567
+ i += 3; // open, inline, close
568
+ }
569
+ else {
570
+ i++;
571
+ }
572
+ }
573
+ rows.push({ type: "tr", children: cells });
574
+ i++; // skip tr_close
575
+ continue;
576
+ }
577
+ i++;
578
+ }
579
+ return {
580
+ node: { type: "table", children: rows },
581
+ endIndex: i + 1,
582
+ };
583
+ }
584
+ // ── Inline token conversion ─────────────────────────────
585
+ function inlineTokensToInline(tokens) {
586
+ const result = [];
587
+ let bold = false;
588
+ let italic = false;
589
+ let inLink = false;
590
+ let linkHref = "";
591
+ let linkChildren = [];
592
+ const push = (node) => {
593
+ if (inLink)
594
+ linkChildren.push(node);
595
+ else
596
+ result.push(node);
597
+ };
598
+ for (const token of tokens) {
599
+ if (token.type === "text") {
600
+ push({ type: "text", text: token.content, bold, italic });
601
+ }
602
+ else if (token.type === "code_inline") {
603
+ push({ type: "code", text: token.content });
604
+ }
605
+ else if (token.type === "softbreak" || token.type === "hardbreak") {
606
+ push({ type: "text", text: "\n" });
607
+ }
608
+ else if (token.type === "strong_open") {
609
+ bold = true;
610
+ }
611
+ else if (token.type === "strong_close") {
612
+ bold = false;
613
+ }
614
+ else if (token.type === "em_open") {
615
+ italic = true;
616
+ }
617
+ else if (token.type === "em_close") {
618
+ italic = false;
619
+ }
620
+ else if (token.type === "link_open") {
621
+ const href = token.attrGet("href") || "";
622
+ linkHref = href;
623
+ linkChildren = [];
624
+ inLink = true;
625
+ }
626
+ else if (token.type === "link_close") {
627
+ if (inLink) {
628
+ result.push({ type: "link", href: linkHref, children: linkChildren });
629
+ inLink = false;
630
+ linkHref = "";
631
+ linkChildren = [];
632
+ }
633
+ }
634
+ else if (token.type === "image") {
635
+ result.push({
636
+ type: "image",
637
+ src: token.attrGet("src") || "",
638
+ alt: token.content || token.attrGet("alt") || "",
639
+ });
640
+ }
641
+ else if (token.type === "html_inline") {
642
+ result.push({ type: "text", text: token.content });
643
+ }
644
+ }
645
+ return result;
646
+ }
647
+ // ── Main entry point ────────────────────────────────────
648
+ /**
649
+ * Parse Starlight MDX source into TreeNode[].
650
+ *
651
+ * Pipeline:
652
+ * 1. Strip import lines
653
+ * 2. Convert :::directive admonitions to <Aside> component syntax
654
+ * 3. Segment into markdown + component blocks
655
+ * 4. Convert segments to TreeNode[]
656
+ */
657
+ /**
658
+ * Group consecutive card nodes into a cards container.
659
+ * Feature and RelatedProduct components parse as individual card nodes —
660
+ * this groups them so the serializer renders them in a grid.
661
+ */
662
+ function groupConsecutiveCards(tree) {
663
+ const result = [];
664
+ let cardBuf = [];
665
+ function flushCards() {
666
+ if (cardBuf.length > 0) {
667
+ result.push({ type: "cards", children: [...cardBuf] });
668
+ cardBuf = [];
669
+ }
670
+ }
671
+ for (const node of tree) {
672
+ if (node.type === "card") {
673
+ cardBuf.push(node);
674
+ }
675
+ else {
676
+ flushCards();
677
+ result.push(node);
678
+ }
679
+ }
680
+ flushCards();
681
+ return result;
682
+ }
683
+ export function starlightToTree(source, options) {
684
+ const adapter = options?.adapter;
685
+ const { map: componentMap, selfClosing } = buildComponentMap(adapter);
686
+ let cleaned = stripImports(source);
687
+ cleaned = applyInlineTransforms(cleaned, adapter);
688
+ cleaned = convertDirectives(cleaned);
689
+ // Strip raw HTML div wrappers (e.g. <div style="font-size:87%">)
690
+ cleaned = cleaned.replace(/<div[^>]*>\s*\n?/g, "");
691
+ cleaned = cleaned.replace(/<\/div>\s*\n?/g, "");
692
+ const segments = segmentMdx(cleaned, componentMap, selfClosing);
693
+ const tree = segmentsToTree(segments, componentMap, adapter);
694
+ const unwrapped = unwrapStepsOrderedList(tree);
695
+ return groupConsecutiveCards(unwrapped);
696
+ }
697
+ /**
698
+ * Starlight's <Steps> wraps a markdown ordered list, producing:
699
+ * steps > ordered-list > list-item
700
+ *
701
+ * Serializers expect:
702
+ * steps > step
703
+ *
704
+ * This unwraps the ordered-list: each list-item becomes a step node.
705
+ * Applied recursively so nested steps (e.g. inside tabs) also work.
706
+ */
707
+ function unwrapStepsOrderedList(nodes) {
708
+ return nodes.map((node) => {
709
+ // Recurse into children first
710
+ if (node.children) {
711
+ node.children = unwrapStepsOrderedList(node.children);
712
+ }
713
+ if (node.type !== "steps")
714
+ return node;
715
+ // Check for steps > ordered-list pattern
716
+ const children = node.children ?? [];
717
+ if (children.length === 1 &&
718
+ children[0].type === "ordered-list" &&
719
+ children[0].children) {
720
+ // Promote each list-item to a step node
721
+ node.children = children[0].children.map((listItem) => ({
722
+ type: "step",
723
+ props: {},
724
+ children: listItem.children,
725
+ }));
726
+ }
727
+ return node;
728
+ });
729
+ }