@jxsuite/compiler 0.1.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,942 @@
1
+ /**
2
+ * Compile-markdown.js — Clean markdown export target
3
+ *
4
+ * Converts a fully-resolved Jx document tree to pure markdown, stripping all Jx-specific decoration
5
+ * (styles, attributes, custom element wrappers). Components are inlined by resolving their
6
+ * definitions with instance props.
7
+ */
8
+
9
+ import { unified } from "unified";
10
+ import remarkStringify from "remark-stringify";
11
+ import remarkGfm from "remark-gfm";
12
+ import { buildInitialScope, evaluateStaticTemplate, isTemplateString } from "../shared.js";
13
+
14
+ // ─── Tag classification ────────────────────────────────────────────────────
15
+
16
+ /** Tags that map directly to mdast node types. */
17
+ const TAG_MDAST_MAP = /** @type {Record<string, string>} */ ({
18
+ h1: "heading",
19
+ h2: "heading",
20
+ h3: "heading",
21
+ h4: "heading",
22
+ h5: "heading",
23
+ h6: "heading",
24
+ p: "paragraph",
25
+ em: "emphasis",
26
+ strong: "strong",
27
+ del: "delete",
28
+ code: "inlineCode",
29
+ a: "link",
30
+ img: "image",
31
+ blockquote: "blockquote",
32
+ ul: "list",
33
+ ol: "list",
34
+ li: "listItem",
35
+ pre: "code",
36
+ hr: "thematicBreak",
37
+ br: "break",
38
+ table: "table",
39
+ thead: "thead",
40
+ tbody: "tbody",
41
+ tr: "tableRow",
42
+ th: "tableCell",
43
+ td: "tableCell",
44
+ });
45
+
46
+ /** Wrapper tags that should be unwrapped — their children are promoted. */
47
+ const WRAPPER_TAGS = new Set([
48
+ "div",
49
+ "section",
50
+ "span",
51
+ "nav",
52
+ "header",
53
+ "footer",
54
+ "main",
55
+ "article",
56
+ "aside",
57
+ "figure",
58
+ "figcaption",
59
+ "slot",
60
+ ]);
61
+
62
+ // ─── Core conversion ────────────────────────────────────────────────────────
63
+
64
+ /**
65
+ * Convert a Jx node to an array of mdast nodes. Returns an array because wrapper unwrapping can
66
+ * produce multiple children.
67
+ *
68
+ * @param {any} node
69
+ * @param {Map<string, any>} componentDefs
70
+ * @param {any} [scope] - Current resolution scope
71
+ * @returns {any[]}
72
+ */
73
+ function nodeToMdast(node, componentDefs, scope) {
74
+ // Bare text
75
+ if (typeof node === "string") {
76
+ return node.trim() ? [{ type: "text", value: node }] : [];
77
+ }
78
+ if (typeof node === "number") {
79
+ return [{ type: "text", value: String(node) }];
80
+ }
81
+ if (!node || typeof node !== "object") return [];
82
+
83
+ // Array descriptor — expand mapped arrays
84
+ if (node.$prototype === "Array") {
85
+ return expandArray(node, componentDefs, scope);
86
+ }
87
+
88
+ const tag = node.tagName ?? "div";
89
+
90
+ // Resolve text content
91
+ const text = resolveText(node.textContent, scope);
92
+
93
+ // innerHTML — if present, convert HTML content to mdast
94
+ if (typeof node.innerHTML === "string" && node.innerHTML.trim()) {
95
+ const htmlNodes = htmlToMdast(node.innerHTML);
96
+ if (htmlNodes.length > 0) return htmlNodes;
97
+ }
98
+
99
+ // Custom elements — inline component content
100
+ if (tag.includes("-")) {
101
+ return inlineComponent(node, tag, componentDefs);
102
+ }
103
+
104
+ // Wrapper tags — unwrap, promote children
105
+ if (WRAPPER_TAGS.has(tag)) {
106
+ // If wrapper has only textContent, wrap in paragraph for block structure
107
+ if (text != null) {
108
+ return text.trim() ? [{ type: "paragraph", children: [{ type: "text", value: text }] }] : [];
109
+ }
110
+ return convertChildren(node, componentDefs, scope);
111
+ }
112
+
113
+ const mdastType = TAG_MDAST_MAP[tag];
114
+ if (!mdastType) {
115
+ // Unknown tag — wrap textContent in paragraph, or unwrap children
116
+ if (text != null) {
117
+ return text.trim() ? [{ type: "paragraph", children: [{ type: "text", value: text }] }] : [];
118
+ }
119
+ return convertChildren(node, componentDefs, scope);
120
+ }
121
+
122
+ // Standard markdown elements
123
+ switch (mdastType) {
124
+ case "heading": {
125
+ const depth = parseInt(tag.slice(1), 10);
126
+ const children =
127
+ text != null
128
+ ? [{ type: "text", value: text }]
129
+ : convertChildrenInline(node, componentDefs, scope);
130
+ return [{ type: "heading", depth, children }];
131
+ }
132
+
133
+ case "paragraph": {
134
+ const children =
135
+ text != null
136
+ ? [{ type: "text", value: text }]
137
+ : convertChildrenInline(node, componentDefs, scope);
138
+ if (children.length === 0) return [];
139
+ return [{ type: "paragraph", children }];
140
+ }
141
+
142
+ case "emphasis":
143
+ case "strong":
144
+ case "delete": {
145
+ const children =
146
+ text != null
147
+ ? [{ type: "text", value: text }]
148
+ : convertChildrenInline(node, componentDefs, scope);
149
+ return [{ type: mdastType, children }];
150
+ }
151
+
152
+ case "inlineCode":
153
+ return [{ type: "inlineCode", value: text ?? "" }];
154
+
155
+ case "link": {
156
+ const href = node.attributes?.href ?? "";
157
+ const title = node.attributes?.title ?? null;
158
+ const children =
159
+ text != null
160
+ ? [{ type: "text", value: text }]
161
+ : convertChildrenInline(node, componentDefs, scope);
162
+ return [{ type: "link", url: href, title, children }];
163
+ }
164
+
165
+ case "image": {
166
+ const src = node.attributes?.src ?? "";
167
+ const alt = node.attributes?.alt ?? "";
168
+ const title = node.attributes?.title ?? null;
169
+ return [{ type: "image", url: src, alt, title }];
170
+ }
171
+
172
+ case "blockquote": {
173
+ const children = convertChildren(node, componentDefs, scope);
174
+ // Wrap bare text in paragraph if needed
175
+ const wrapped = children.map((c) =>
176
+ c.type === "text" ? { type: "paragraph", children: [c] } : c,
177
+ );
178
+ return [{ type: "blockquote", children: wrapped }];
179
+ }
180
+
181
+ case "list": {
182
+ const ordered = tag === "ol";
183
+ const children = convertChildren(node, componentDefs, scope);
184
+ // Only keep listItem children
185
+ const items = children.filter((c) => c.type === "listItem");
186
+ if (items.length === 0) return [];
187
+ return [{ type: "list", ordered, spread: false, children: items }];
188
+ }
189
+
190
+ case "listItem": {
191
+ let children = convertChildren(node, componentDefs, scope);
192
+ // Wrap bare text/inline nodes in paragraph
193
+ if (children.length > 0 && children.every((c) => c.type === "text" || isInlineType(c.type))) {
194
+ children = [{ type: "paragraph", children }];
195
+ }
196
+ return [{ type: "listItem", spread: false, children }];
197
+ }
198
+
199
+ case "code": {
200
+ // pre > code → fenced code block
201
+ const codeChild = Array.isArray(node.children)
202
+ ? node.children.find((/** @type {any} */ c) => c?.tagName === "code")
203
+ : null;
204
+ const value = codeChild?.textContent ?? text ?? "";
205
+ const lang = codeChild?.className?.replace("language-", "") ?? null;
206
+ return [{ type: "code", lang, value }];
207
+ }
208
+
209
+ case "thematicBreak":
210
+ return [{ type: "thematicBreak" }];
211
+
212
+ case "break":
213
+ return [{ type: "break" }];
214
+
215
+ case "table":
216
+ return convertTable(node, componentDefs, scope);
217
+
218
+ case "thead":
219
+ case "tbody":
220
+ // Unwrap — promote rows
221
+ return convertChildren(node, componentDefs, scope);
222
+
223
+ case "tableRow": {
224
+ const cells = convertChildren(node, componentDefs, scope);
225
+ return [{ type: "tableRow", children: cells.filter((c) => c.type === "tableCell") }];
226
+ }
227
+
228
+ case "tableCell": {
229
+ const children =
230
+ text != null
231
+ ? [{ type: "text", value: text }]
232
+ : convertChildrenInline(node, componentDefs, scope);
233
+ return [{ type: "tableCell", children }];
234
+ }
235
+ }
236
+
237
+ return [];
238
+ }
239
+
240
+ /**
241
+ * @param {string} type
242
+ * @returns {boolean}
243
+ */
244
+ function isInlineType(type) {
245
+ return ["text", "emphasis", "strong", "delete", "inlineCode", "link", "image", "break"].includes(
246
+ type,
247
+ );
248
+ }
249
+
250
+ /**
251
+ * Convert a node's children to mdast nodes (block context).
252
+ *
253
+ * @param {any} node
254
+ * @param {Map<string, any>} componentDefs
255
+ * @param {any} [scope]
256
+ * @returns {any[]}
257
+ */
258
+ function convertChildren(node, componentDefs, scope) {
259
+ if (node.textContent != null) {
260
+ const text = resolveText(node.textContent, scope);
261
+ if (text) return [{ type: "text", value: text }];
262
+ return [];
263
+ }
264
+ if (!Array.isArray(node.children)) return [];
265
+ return node.children.flatMap((/** @type {any} */ c) => nodeToMdast(c, componentDefs, scope));
266
+ }
267
+
268
+ /**
269
+ * Convert children in inline context — same as convertChildren but for inline content.
270
+ *
271
+ * @param {any} node
272
+ * @param {Map<string, any>} componentDefs
273
+ * @param {any} [scope]
274
+ * @returns {any[]}
275
+ */
276
+ function convertChildrenInline(node, componentDefs, scope) {
277
+ if (node.textContent != null) {
278
+ const text = resolveText(node.textContent, scope);
279
+ if (text) return [{ type: "text", value: text }];
280
+ return [];
281
+ }
282
+ if (!Array.isArray(node.children)) return [];
283
+ return node.children.flatMap((/** @type {any} */ c) => nodeToMdast(c, componentDefs, scope));
284
+ }
285
+
286
+ // ─── Component inlining ─────────────────────────────────────────────────────
287
+
288
+ /**
289
+ * Inline a custom element by resolving its component definition.
290
+ *
291
+ * @param {any} node - The element instance (with $props, children, etc.)
292
+ * @param {string} tag - The tagName
293
+ * @param {Map<string, any>} componentDefs
294
+ * @returns {any[]}
295
+ */
296
+ function inlineComponent(node, tag, componentDefs) {
297
+ const def = componentDefs.get(tag);
298
+ if (!def) {
299
+ // No definition — unwrap any children the instance has
300
+ return convertChildren(node, componentDefs);
301
+ }
302
+
303
+ // Merge instance $props into component state
304
+ const props = node.$props ?? {};
305
+ let stateDefs = { ...def.state };
306
+ for (const [key, value] of Object.entries(props)) {
307
+ if (key in stateDefs) {
308
+ const existing = stateDefs[key];
309
+ if (
310
+ existing &&
311
+ typeof existing === "object" &&
312
+ !Array.isArray(existing) &&
313
+ "default" in existing
314
+ ) {
315
+ stateDefs[key] = { ...existing, default: value };
316
+ } else {
317
+ stateDefs[key] = value;
318
+ }
319
+ } else {
320
+ stateDefs[key] = value;
321
+ }
322
+ }
323
+
324
+ const scope = buildInitialScope(stateDefs, null);
325
+
326
+ // Resolve the component's children with the merged scope
327
+ if (!Array.isArray(def.children)) return [];
328
+
329
+ // Deep-resolve template expressions in the component's children
330
+ const resolved = deepResolve(def.children, scope);
331
+
332
+ // Convert to mdast, passing instance's own children as potential slot content
333
+ const instanceChildren = node.children;
334
+ return resolved.flatMap((/** @type {any} */ child) => {
335
+ // Replace slot elements with instance children
336
+ if (child?.tagName === "slot" && Array.isArray(instanceChildren)) {
337
+ return instanceChildren.flatMap((/** @type {any} */ c) => nodeToMdast(c, componentDefs));
338
+ }
339
+ return nodeToMdast(child, componentDefs, scope);
340
+ });
341
+ }
342
+
343
+ /**
344
+ * Deep-resolve template expressions in a node tree.
345
+ *
346
+ * @param {any} nodes
347
+ * @param {any} scope
348
+ * @returns {any[]}
349
+ */
350
+ function deepResolve(nodes, scope) {
351
+ if (!Array.isArray(nodes)) return [];
352
+ return nodes.map((/** @type {any} */ node) => resolveNode(node, scope));
353
+ }
354
+
355
+ /**
356
+ * Resolve template expressions in a single node.
357
+ *
358
+ * @param {any} node
359
+ * @param {any} scope
360
+ * @returns {any}
361
+ */
362
+ function resolveNode(node, scope) {
363
+ if (typeof node === "string") {
364
+ return isTemplateString(node) ? (evaluateStaticTemplate(node, scope) ?? node) : node;
365
+ }
366
+ if (!node || typeof node !== "object") return node;
367
+
368
+ const result = { ...node };
369
+
370
+ if (typeof result.textContent === "string" && isTemplateString(result.textContent)) {
371
+ result.textContent = evaluateStaticTemplate(result.textContent, scope) ?? result.textContent;
372
+ }
373
+ if (typeof result.innerHTML === "string" && isTemplateString(result.innerHTML)) {
374
+ result.innerHTML = evaluateStaticTemplate(result.innerHTML, scope) ?? result.innerHTML;
375
+ }
376
+ if (result.attributes) {
377
+ result.attributes = { ...result.attributes };
378
+ for (const [k, v] of Object.entries(result.attributes)) {
379
+ if (typeof v === "string" && isTemplateString(v)) {
380
+ result.attributes[k] = evaluateStaticTemplate(v, scope) ?? v;
381
+ }
382
+ }
383
+ }
384
+ if (Array.isArray(result.children)) {
385
+ result.children = deepResolve(result.children, scope);
386
+ }
387
+
388
+ return result;
389
+ }
390
+
391
+ // ─── Array expansion ────────────────────────────────────────────────────────
392
+
393
+ /**
394
+ * Expand a $prototype: "Array" descriptor into concrete mdast nodes.
395
+ *
396
+ * @param {any} arrayDef
397
+ * @param {Map<string, any>} componentDefs
398
+ * @param {any} [scope]
399
+ * @returns {any[]}
400
+ */
401
+ function expandArray(arrayDef, componentDefs, scope) {
402
+ const itemsRef = arrayDef.items?.$ref;
403
+ if (!itemsRef || !scope) return [];
404
+
405
+ // Resolve the items array from scope
406
+ const items = resolveRef(itemsRef, scope);
407
+ if (!Array.isArray(items)) return [];
408
+
409
+ const mapTemplate = arrayDef.map;
410
+ if (!mapTemplate) return [];
411
+
412
+ return items.flatMap((/** @type {any} */ item, /** @type {number} */ index) => {
413
+ // Create a scope with $map values
414
+ const mapScope = Object.create(scope);
415
+ mapScope.item = item;
416
+ mapScope.index = index;
417
+
418
+ // Resolve the map template with $map refs
419
+ const resolved = resolveMapNode(mapTemplate, item);
420
+ return nodeToMdast(resolved, componentDefs, scope);
421
+ });
422
+ }
423
+
424
+ /**
425
+ * Resolve $map/ references in a map template node.
426
+ *
427
+ * @param {any} node
428
+ * @param {any} item
429
+ * @returns {any}
430
+ */
431
+ function resolveMapNode(node, item) {
432
+ if (typeof node === "string") return node;
433
+ if (!node || typeof node !== "object") return node;
434
+
435
+ const result = { ...node };
436
+
437
+ // Resolve $ref values
438
+ for (const [key, value] of Object.entries(result)) {
439
+ if (value && typeof value === "object" && value.$ref) {
440
+ const ref = value.$ref;
441
+ if (ref.startsWith("$map/")) {
442
+ const path = ref.slice("$map/".length);
443
+ result[key] = resolvePath(
444
+ path === "item" ? item : item,
445
+ path.startsWith("item/") ? path.slice("item/".length) : path,
446
+ );
447
+ }
448
+ }
449
+ }
450
+
451
+ if (result.$props) {
452
+ result.$props = resolveMapNode(result.$props, item);
453
+ }
454
+
455
+ if (typeof result.textContent === "string" && result.textContent.startsWith("$map/")) {
456
+ result.textContent = resolvePath(item, result.textContent.slice("$map/".length));
457
+ }
458
+
459
+ if (Array.isArray(result.children)) {
460
+ result.children = result.children.map((/** @type {any} */ c) => resolveMapNode(c, item));
461
+ }
462
+
463
+ return result;
464
+ }
465
+
466
+ /**
467
+ * Resolve a dot/slash-separated path on an object.
468
+ *
469
+ * @param {any} obj
470
+ * @param {string} path
471
+ * @returns {any}
472
+ */
473
+ function resolvePath(obj, path) {
474
+ const parts = path.split(/[/.]/);
475
+ let current = obj;
476
+ for (const part of parts) {
477
+ if (current == null) return undefined;
478
+ current = current[part];
479
+ }
480
+ return current;
481
+ }
482
+
483
+ /**
484
+ * Resolve a $ref string against a scope.
485
+ *
486
+ * @param {string} ref
487
+ * @param {any} scope
488
+ * @returns {any}
489
+ */
490
+ function resolveRef(ref, scope) {
491
+ if (ref.startsWith("#/state/")) {
492
+ return resolvePath(scope, ref.slice("#/state/".length));
493
+ }
494
+ return resolvePath(scope, ref);
495
+ }
496
+
497
+ // ─── Table conversion ───────────────────────────────────────────────────────
498
+
499
+ /**
500
+ * Convert a table element to mdast table node.
501
+ *
502
+ * @param {any} node
503
+ * @param {Map<string, any>} componentDefs
504
+ * @param {any} [scope]
505
+ * @returns {any[]}
506
+ */
507
+ function convertTable(node, componentDefs, scope) {
508
+ // Flatten thead/tbody wrappers to get rows
509
+ const rows = convertChildren(node, componentDefs, scope).filter((c) => c.type === "tableRow");
510
+ if (rows.length === 0) return [];
511
+ return [{ type: "table", children: rows }];
512
+ }
513
+
514
+ // ─── Text resolution ────────────────────────────────────────────────────────
515
+
516
+ /**
517
+ * Resolve text content, handling template strings if a scope is available.
518
+ *
519
+ * @param {any} value
520
+ * @param {any} [scope]
521
+ * @returns {string | null}
522
+ */
523
+ function resolveText(value, scope) {
524
+ if (value == null) return null;
525
+ if (typeof value === "string") {
526
+ if (scope && isTemplateString(value)) {
527
+ const resolved = evaluateStaticTemplate(value, scope);
528
+ return resolved != null ? String(resolved) : value;
529
+ }
530
+ return value;
531
+ }
532
+ return String(value);
533
+ }
534
+
535
+ // ─── HTML → mdast conversion ──────────────────────────────────────────────
536
+
537
+ /**
538
+ * Convert an HTML string to mdast nodes. Handles common block and inline elements from rendered
539
+ * markdown content.
540
+ *
541
+ * @param {string} html
542
+ * @returns {any[]}
543
+ */
544
+ function htmlToMdast(html) {
545
+ /** @type {any[]} */
546
+ const nodes = [];
547
+
548
+ // Simple top-level block parser
549
+ const parts = splitHtmlBlocks(html);
550
+ for (const part of parts) {
551
+ const trimmed = part.trim();
552
+ if (!trimmed) continue;
553
+ const parsed = parseHtmlElement(trimmed);
554
+ if (parsed) nodes.push(...parsed);
555
+ }
556
+
557
+ return nodes;
558
+ }
559
+
560
+ /**
561
+ * Split HTML into top-level block chunks.
562
+ *
563
+ * @param {string} html
564
+ * @returns {string[]}
565
+ */
566
+ function splitHtmlBlocks(html) {
567
+ /** @type {string[]} */
568
+ const blocks = [];
569
+ const trimmed = html.trim();
570
+
571
+ // Simple regex: split on top-level block boundaries
572
+ // Match each top-level element or bare text
573
+ const pattern =
574
+ /(<(?:h[1-6]|p|blockquote|pre|ul|ol|hr|table|div|section|article|aside|figure|nav|header|footer|main)[\s>][\s\S]*?<\/(?:h[1-6]|p|blockquote|pre|ul|ol|table|div|section|article|aside|figure|nav|header|footer|main)>|<hr\s*\/?>)/gi;
575
+ let lastIdx = 0;
576
+ let m;
577
+ while ((m = pattern.exec(trimmed)) !== null) {
578
+ if (m.index > lastIdx) {
579
+ const between = trimmed.slice(lastIdx, m.index).trim();
580
+ if (between) blocks.push(between);
581
+ }
582
+ blocks.push(m[0]);
583
+ lastIdx = pattern.lastIndex;
584
+ }
585
+ if (lastIdx < trimmed.length) {
586
+ const tail = trimmed.slice(lastIdx).trim();
587
+ if (tail) blocks.push(tail);
588
+ }
589
+
590
+ return blocks;
591
+ }
592
+
593
+ /**
594
+ * Parse a single HTML element string into mdast node(s).
595
+ *
596
+ * @param {string} html
597
+ * @returns {any[] | null}
598
+ */
599
+ function parseHtmlElement(html) {
600
+ // Heading
601
+ const hMatch = html.match(/^<(h[1-6])(?:\s[^>]*)?>(.+?)<\/\1>$/is);
602
+ if (hMatch) {
603
+ const depth = parseInt(hMatch[1].slice(1), 10);
604
+ const children = parseInlineHtml(hMatch[2]);
605
+ return [{ type: "heading", depth, children }];
606
+ }
607
+
608
+ // Paragraph
609
+ const pMatch = html.match(/^<p(?:\s[^>]*)?>(.+?)<\/p>$/is);
610
+ if (pMatch) {
611
+ const children = parseInlineHtml(pMatch[1]);
612
+ if (children.length === 0) return null;
613
+ return [{ type: "paragraph", children }];
614
+ }
615
+
616
+ // Horizontal rule
617
+ if (/^<hr\s*\/?>$/i.test(html)) {
618
+ return [{ type: "thematicBreak" }];
619
+ }
620
+
621
+ // Code block (pre > code)
622
+ const preMatch = html.match(
623
+ /^<pre(?:\s[^>]*)?>\s*<code(?:\s+class="language-(\w+)")?(?:\s[^>]*)?>([^]*?)<\/code>\s*<\/pre>$/is,
624
+ );
625
+ if (preMatch) {
626
+ const lang = preMatch[1] ?? null;
627
+ const value = decodeHtmlEntities(preMatch[2]);
628
+ return [{ type: "code", lang, value }];
629
+ }
630
+
631
+ // Blockquote
632
+ const bqMatch = html.match(/^<blockquote(?:\s[^>]*)?>([^]*?)<\/blockquote>$/is);
633
+ if (bqMatch) {
634
+ const inner = htmlToMdast(bqMatch[1]);
635
+ const children = inner.map((c) =>
636
+ c.type === "text" ? { type: "paragraph", children: [c] } : c,
637
+ );
638
+ return [{ type: "blockquote", children }];
639
+ }
640
+
641
+ // Unordered list
642
+ const ulMatch = html.match(/^<ul(?:\s[^>]*)?>([^]*?)<\/ul>$/is);
643
+ if (ulMatch) {
644
+ const items = parseListItems(ulMatch[1]);
645
+ if (items.length === 0) return null;
646
+ return [{ type: "list", ordered: false, spread: false, children: items }];
647
+ }
648
+
649
+ // Ordered list
650
+ const olMatch = html.match(/^<ol(?:\s[^>]*)?>([^]*?)<\/ol>$/is);
651
+ if (olMatch) {
652
+ const items = parseListItems(olMatch[1]);
653
+ if (items.length === 0) return null;
654
+ return [{ type: "list", ordered: true, spread: false, children: items }];
655
+ }
656
+
657
+ // Table
658
+ const tableMatch = html.match(/^<table(?:\s[^>]*)?>([^]*?)<\/table>$/is);
659
+ if (tableMatch) {
660
+ return parseHtmlTable(tableMatch[1]);
661
+ }
662
+
663
+ // Wrapper elements (div, section, etc.) — unwrap
664
+ const wrapperMatch = html.match(
665
+ /^<(?:div|section|article|aside|figure|nav|header|footer|main)(?:\s[^>]*)?>([^]*?)<\/(?:div|section|article|aside|figure|nav|header|footer|main)>$/is,
666
+ );
667
+ if (wrapperMatch) {
668
+ return htmlToMdast(wrapperMatch[1]);
669
+ }
670
+
671
+ // Bare text / inline content
672
+ const text = stripHtmlTags(html).trim();
673
+ if (text) return [{ type: "paragraph", children: parseInlineHtml(html) }];
674
+
675
+ return null;
676
+ }
677
+
678
+ /**
679
+ * Parse inline HTML content to mdast inline nodes.
680
+ *
681
+ * @param {string} html
682
+ * @returns {any[]}
683
+ */
684
+ function parseInlineHtml(html) {
685
+ /** @type {any[]} */
686
+ const nodes = [];
687
+ let pos = 0;
688
+
689
+ while (pos < html.length) {
690
+ const tagStart = html.indexOf("<", pos);
691
+ if (tagStart === -1) {
692
+ // Remaining text
693
+ const text = decodeHtmlEntities(html.slice(pos));
694
+ if (text.trim()) nodes.push({ type: "text", value: text });
695
+ break;
696
+ }
697
+
698
+ // Text before tag
699
+ if (tagStart > pos) {
700
+ const text = decodeHtmlEntities(html.slice(pos, tagStart));
701
+ if (text.trim()) nodes.push({ type: "text", value: text });
702
+ }
703
+
704
+ // Self-closing tags
705
+ const brMatch = html.slice(tagStart).match(/^<br\s*\/?>/i);
706
+ if (brMatch) {
707
+ nodes.push({ type: "break" });
708
+ pos = tagStart + brMatch[0].length;
709
+ continue;
710
+ }
711
+
712
+ const imgMatch = html.slice(tagStart).match(/^<img(\s[^>]*?)\/?>/i);
713
+ if (imgMatch) {
714
+ const attrs = imgMatch[1] ?? "";
715
+ const src = attrs.match(/src="([^"]*)"/)?.[1] ?? "";
716
+ const alt = attrs.match(/alt="([^"]*)"/)?.[1] ?? "";
717
+ nodes.push({ type: "image", url: decodeHtmlEntities(src), alt: decodeHtmlEntities(alt) });
718
+ pos = tagStart + imgMatch[0].length;
719
+ continue;
720
+ }
721
+
722
+ // Paired inline tags
723
+ const openMatch = html.slice(tagStart).match(/^<(a|em|strong|del|code|b|i|s)(\s[^>]*)?>/);
724
+ if (openMatch) {
725
+ const tag = openMatch[1].toLowerCase();
726
+ const attrs = openMatch[2] ?? "";
727
+ const innerStart = tagStart + openMatch[0].length;
728
+ const closeTag = `</${tag}>`;
729
+ const closeIdx = findMatchingClose(html, innerStart, tag);
730
+ if (closeIdx === -1) {
731
+ // No matching close — treat as text
732
+ pos = tagStart + 1;
733
+ continue;
734
+ }
735
+ const inner = html.slice(innerStart, closeIdx);
736
+ pos = closeIdx + closeTag.length;
737
+
738
+ switch (tag) {
739
+ case "a": {
740
+ const href = attrs.match(/href="([^"]*)"/)?.[1] ?? "";
741
+ const title = attrs.match(/title="([^"]*)"/)?.[1] ?? null;
742
+ const children = parseInlineHtml(inner);
743
+ if (children.length === 0)
744
+ children.push({ type: "text", value: decodeHtmlEntities(inner) });
745
+ nodes.push({ type: "link", url: decodeHtmlEntities(href), title, children });
746
+ break;
747
+ }
748
+ case "em":
749
+ case "i":
750
+ nodes.push({ type: "emphasis", children: parseInlineHtml(inner) });
751
+ break;
752
+ case "strong":
753
+ case "b":
754
+ nodes.push({ type: "strong", children: parseInlineHtml(inner) });
755
+ break;
756
+ case "del":
757
+ case "s":
758
+ nodes.push({ type: "delete", children: parseInlineHtml(inner) });
759
+ break;
760
+ case "code":
761
+ nodes.push({ type: "inlineCode", value: decodeHtmlEntities(inner) });
762
+ break;
763
+ }
764
+ continue;
765
+ }
766
+
767
+ // Unknown tag — skip it
768
+ const skipMatch = html.slice(tagStart).match(/^<[^>]*>/);
769
+ if (skipMatch) {
770
+ pos = tagStart + skipMatch[0].length;
771
+ } else {
772
+ pos = tagStart + 1;
773
+ }
774
+ }
775
+
776
+ return nodes;
777
+ }
778
+
779
+ /**
780
+ * Find the matching closing tag, handling nested same-name tags.
781
+ *
782
+ * @param {string} html
783
+ * @param {number} start - Position after the opening tag
784
+ * @param {string} tag - Tag name to match
785
+ * @returns {number} Position of the matching closing tag, or -1
786
+ */
787
+ function findMatchingClose(html, start, tag) {
788
+ let depth = 1;
789
+ const openRe = new RegExp(`<${tag}[\\s>]`, "gi");
790
+ const closeRe = new RegExp(`</${tag}>`, "gi");
791
+ openRe.lastIndex = start;
792
+ closeRe.lastIndex = start;
793
+
794
+ while (depth > 0) {
795
+ const openMatch = openRe.exec(html);
796
+ const closeMatch = closeRe.exec(html);
797
+
798
+ if (!closeMatch) return -1;
799
+
800
+ if (openMatch && openMatch.index < closeMatch.index) {
801
+ depth++;
802
+ openRe.lastIndex = openMatch.index + openMatch[0].length;
803
+ closeRe.lastIndex = closeMatch.index; // re-check this close
804
+ } else {
805
+ depth--;
806
+ if (depth === 0) return closeMatch.index;
807
+ }
808
+ }
809
+ return -1;
810
+ }
811
+
812
+ /**
813
+ * Parse <li> elements from list HTML.
814
+ *
815
+ * @param {string} html
816
+ * @returns {any[]}
817
+ */
818
+ function parseListItems(html) {
819
+ /** @type {any[]} */
820
+ const items = [];
821
+ const liPattern = /<li(?:\s[^>]*)?>([\s\S]*?)<\/li>/gi;
822
+ let m;
823
+ while ((m = liPattern.exec(html)) !== null) {
824
+ const inner = m[1].trim();
825
+ // Check for nested block content
826
+ const innerNodes = /<(?:p|ul|ol|blockquote|pre)[\s>]/i.test(inner)
827
+ ? htmlToMdast(inner)
828
+ : [{ type: "paragraph", children: parseInlineHtml(inner) }];
829
+ items.push({ type: "listItem", spread: false, children: innerNodes });
830
+ }
831
+ return items;
832
+ }
833
+
834
+ /**
835
+ * Parse an HTML table to mdast table node.
836
+ *
837
+ * @param {string} html
838
+ * @returns {any[]}
839
+ */
840
+ function parseHtmlTable(html) {
841
+ /** @type {any[]} */
842
+ const rows = [];
843
+ const trPattern = /<tr(?:\s[^>]*)?>([\s\S]*?)<\/tr>/gi;
844
+ let m;
845
+ while ((m = trPattern.exec(html)) !== null) {
846
+ const cellPattern = /<(?:th|td)(?:\s[^>]*)?>([\s\S]*?)<\/(?:th|td)>/gi;
847
+ /** @type {any[]} */
848
+ const cells = [];
849
+ let c;
850
+ while ((c = cellPattern.exec(m[1])) !== null) {
851
+ cells.push({ type: "tableCell", children: parseInlineHtml(c[1]) });
852
+ }
853
+ if (cells.length > 0) rows.push({ type: "tableRow", children: cells });
854
+ }
855
+ if (rows.length === 0) return [];
856
+ return [{ type: "table", children: rows }];
857
+ }
858
+
859
+ /**
860
+ * Strip all HTML tags from a string.
861
+ *
862
+ * @param {string} html
863
+ * @returns {string}
864
+ */
865
+ function stripHtmlTags(html) {
866
+ return html.replace(/<[^>]+>/g, "");
867
+ }
868
+
869
+ /**
870
+ * Decode common HTML entities.
871
+ *
872
+ * @param {string} str
873
+ * @returns {string}
874
+ */
875
+ function decodeHtmlEntities(str) {
876
+ return str
877
+ .replace(/&amp;/g, "&")
878
+ .replace(/&lt;/g, "<")
879
+ .replace(/&gt;/g, ">")
880
+ .replace(/&quot;/g, '"')
881
+ .replace(/&#39;/g, "'")
882
+ .replace(/&#36;/g, "$")
883
+ .replace(/&nbsp;/g, " ");
884
+ }
885
+
886
+ // ─── Public API ─────────────────────────────────────────────────────────────
887
+
888
+ /**
889
+ * Compile a fully-resolved Jx document to clean markdown.
890
+ *
891
+ * @param {any} doc - Resolved Jx document (post layout, context, prototypes, templates)
892
+ * @param {Map<string, any>} [componentDefs] - Component definitions for inlining
893
+ * @returns {{ content: string }}
894
+ */
895
+ export function compileMarkdown(doc, componentDefs = new Map()) {
896
+ if (!Array.isArray(doc.children) || doc.children.length === 0) {
897
+ return { content: "" };
898
+ }
899
+
900
+ // Build scope from resolved state for any remaining template expressions
901
+ const scope = doc.state ? buildInitialScope(doc.state, null) : null;
902
+
903
+ // Convert to mdast
904
+ const mdastChildren = doc.children.flatMap((/** @type {any} */ child) =>
905
+ nodeToMdast(child, componentDefs, scope),
906
+ );
907
+
908
+ // Clean up: ensure block-level structure (no bare inline nodes at root)
909
+ /** @type {any[]} */
910
+ const cleaned = [];
911
+ /** @type {any[]} */
912
+ let inlineBuf = [];
913
+
914
+ const flushInline = () => {
915
+ if (inlineBuf.length > 0) {
916
+ cleaned.push({ type: "paragraph", children: inlineBuf });
917
+ inlineBuf = [];
918
+ }
919
+ };
920
+
921
+ for (const node of mdastChildren) {
922
+ if (isInlineType(node.type)) {
923
+ inlineBuf.push(node);
924
+ } else {
925
+ flushInline();
926
+ cleaned.push(node);
927
+ }
928
+ }
929
+ flushInline();
930
+
931
+ const mdast = /** @type {any} */ ({ type: "root", children: cleaned });
932
+
933
+ const md = unified()
934
+ .use(remarkGfm)
935
+ .use(remarkStringify, { bullet: "-", emphasis: "*", strong: "*", setext: false })
936
+ .stringify(mdast);
937
+
938
+ // Clean up excessive whitespace
939
+ const content = md.replace(/\n{3,}/g, "\n\n").trim() + "\n";
940
+
941
+ return { content };
942
+ }