mnfst-render 0.5.23 → 0.5.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/manifest.render.mjs +944 -39
  2. package/package.json +2 -2
@@ -130,6 +130,37 @@ async function waitForManifestRenderReady(page, { allLocales, currentLocale, tim
130
130
  }));
131
131
  window.dispatchEvent(new PopStateEvent('popstate'));
132
132
 
133
+ // 5b. Eagerly warm up declared local data sources for the current locale.
134
+ //
135
+ // Without this, sources are loaded lazily — only when a `$x.foo` access
136
+ // triggers the proxy. For static `<template x-for="group in $x.docs">`
137
+ // patterns the iterator may not run early enough for the load to be
138
+ // in-flight before checkAndDispatchRenderReady's debounced timer fires,
139
+ // and the snapshot captures an empty template (no clones for SEO).
140
+ //
141
+ // Warming up here forces every declared local source into the loading
142
+ // state synchronously (loadDataSource sets _<name>_state.loading = true
143
+ // and registers a promise in loadingPromises before returning), which
144
+ // gates the render-ready dispatch until all loads settle. Cloud
145
+ // sources (Appwrite collections, object-form API URLs) are skipped —
146
+ // those are typically auth-gated or intentionally dynamic and not
147
+ // appropriate for SEO-baking; lazy access still works for them.
148
+ try {
149
+ const cfg = window.ManifestDataConfig;
150
+ const main = window.ManifestDataMain;
151
+ const manifest = await cfg?.ensureManifest?.();
152
+ if (manifest?.data && typeof main?.loadDataSource === 'function') {
153
+ const isAppwrite = cfg.isAppwriteCollection;
154
+ for (const [name, source] of Object.entries(manifest.data)) {
155
+ if (isAppwrite && isAppwrite(source)) continue;
156
+ if (source && typeof source === 'object' && source.url) continue;
157
+ // Fire-and-forget: we just need the loading flag set and the
158
+ // promise registered. Failures fall back to lazy behaviour.
159
+ main.loadDataSource(name, loc).catch(() => { });
160
+ }
161
+ }
162
+ } catch { /* warmup is best-effort; existing lazy access is the fallback */ }
163
+
133
164
  // 6. Run component swapping explicitly so components tied to this route render
134
165
  // and trigger any $x accesses that start on-demand data loads.
135
166
  if (window.ManifestComponentsSwapping?.processAll) {
@@ -251,6 +282,27 @@ function resolveConfig() {
251
282
  // fall back to the timeout. 10s gives slow data plugin pipelines a
252
283
  // chance while bounding worst-case per-path overhead.
253
284
  pipelineTimeout: 10000,
285
+ // SEO / AEO meta injection — see metaInjection() and the prerender.meta
286
+ // section of manifest.json. Layered precedence (highest first):
287
+ // 1. <template data-head> per-route (already in DOM at snapshot time)
288
+ // 2. <head> in index.html (already in DOM at snapshot time)
289
+ // 3. prerender.meta.* expressions (Alpine-evaluated per route)
290
+ // 4. prerender.meta.fallback.* (static strings if expression empty)
291
+ // 5. PWA-style manifest.json fields (name, description, author, icons)
292
+ // 6. Smart defaults derived from the rendered DOM (h1, first p, etc.)
293
+ //
294
+ // Each layer only fills slots not yet present. An empty <title></title>
295
+ // or one matching manifest.json "name" counts as missing (placeholder rule).
296
+ seo: {
297
+ siteName: manifest.name || null,
298
+ siteDescription: manifest.description || null,
299
+ siteAuthor: manifest.author || null,
300
+ icons: Array.isArray(manifest.icons) ? manifest.icons : [],
301
+ meta: pre.meta || null,
302
+ structuredData: pre.structuredData || null,
303
+ imageSnapshots: pre.meta?.imageSnapshots !== false, // default true
304
+ defaults: pre.meta?.defaults !== false, // default true
305
+ },
254
306
  };
255
307
  }
256
308
 
@@ -676,14 +728,14 @@ function stripDataTailwindAttr(html) {
676
728
  * Puppeteer applies `<html class="light">` or `<html class="dark">` based on
677
729
  * the build host's system preference at prerender time. Shipping that baked
678
730
  * class to users in the OPPOSITE preference causes a visible flash on every
679
- * page load (dark→light or light→dark) until the themes plugin re-evaluates.
731
+ * page load (dark→light or light→dark) until the colors plugin re-evaluates.
680
732
  *
681
733
  * Fix: strip `light`/`dark` from the baked `<html class>` and inject a tiny
682
734
  * synchronous `<script>` at the top of `<head>` that sets the correct class
683
735
  * BEFORE the first paint — based on the user's `localStorage.theme` (their
684
736
  * saved preference) or `prefers-color-scheme` (their system preference).
685
737
  *
686
- * The themes plugin (`manifest.themes.js`) still runs later for reactivity
738
+ * The colors plugin (`manifest.colors.js`) still runs later for reactivity
687
739
  * (Alpine bindings, click handlers, system-preference change listener), but
688
740
  * the initial paint already has the correct class so there's no flash.
689
741
  */
@@ -1686,13 +1738,591 @@ function resolveHeadXBindings(html, xData) {
1686
1738
  });
1687
1739
  }
1688
1740
 
1689
- // --- SEO: robots.txt and sitemap.xml (written to output, use liveUrl for crawlers) ---
1741
+ // --- SEO: per-route OG image auto-snapshot --------------------------------
1742
+ //
1743
+ // When prerender.meta.imageSnapshots is true (the default) and no other source
1744
+ // has provided an og:image (data-head, prerender.meta.image, or prerender.meta
1745
+ // .fallback.image), capture a 1200×630 PNG of the rendered page and use that as
1746
+ // the og:image / twitter:image. Saved to <output>/og/<sanitized-path>.png.
1747
+ //
1748
+ // 1200×630 is the OpenGraph / Twitter / LinkedIn recommended dimension. We set
1749
+ // the viewport before snapshotting so layouts intended for desktop render
1750
+ // correctly (mobile-first sites otherwise look misaligned in social previews).
1751
+ async function takeOgSnapshot(page, outputDir, pathSeg) {
1752
+ const fileSeg = pathSeg === '' || pathSeg === '__404__'
1753
+ ? 'index'
1754
+ : pathSeg.replace(/\//g, '-').replace(/[^a-zA-Z0-9_-]/g, '_');
1755
+ const ogDir = join(outputDir, 'og');
1756
+ try { mkdirSync(ogDir, { recursive: true }); } catch { /* exists */ }
1757
+ const filePath = join(ogDir, `${fileSeg}.png`);
1758
+ try {
1759
+ // Viewport stays at the page-creation default (1200×800). Clipping a
1760
+ // 1200×630 region from the top gives the OG/Twitter card aspect ratio
1761
+ // without forcing a layout reflow that would invalidate Chromium's
1762
+ // compositor frame — pages whose hero relies on viewport-height (e.g.
1763
+ // body min-h-screen + flex grow) can otherwise screenshot as blank if
1764
+ // the compositor doesn't repaint between setViewport and screenshot.
1765
+ await page.evaluate(() => window.scrollTo(0, 0));
1766
+ await page.screenshot({
1767
+ path: filePath,
1768
+ type: 'png',
1769
+ clip: { x: 0, y: 0, width: 1200, height: 630 },
1770
+ omitBackground: false,
1771
+ captureBeyondViewport: false,
1772
+ });
1773
+ // Sanity check: a blank 1200×630 PNG (header only, white body) is ~8–10KB;
1774
+ // a content-rich page is 50KB+. When the resulting file is suspiciously
1775
+ // small the snapshot is treated as failed and the renderer falls through
1776
+ // to other og:image sources (manifest icon, first content <img>). 15KB
1777
+ // is a safe floor that catches blank/header-only snapshots without false
1778
+ // positives for legitimately simple pages.
1779
+ try {
1780
+ const sz = statSync(filePath).size;
1781
+ if (sz < 15 * 1024) {
1782
+ unlinkSync(filePath);
1783
+ return null;
1784
+ }
1785
+ } catch { /* stat failure is non-fatal */ }
1786
+ return `/og/${fileSeg}.png`;
1787
+ } catch (e) {
1788
+ // Failures here are non-fatal — fall back to whatever other og:image source
1789
+ // is available (manifest icon, first content <img>, etc.).
1790
+ console.error(`prerender: og snapshot failed for /${pathSeg || ''}: ${e?.message || e}`);
1791
+ return null;
1792
+ }
1793
+ }
1794
+
1795
+ // --- SEO: per-route meta + structured data injection ----------------------
1796
+ //
1797
+ // Runs in the live page right before HTML serialization. Layers (highest
1798
+ // precedence first; each layer only fills slots not yet present):
1799
+ //
1800
+ // 1. <template data-head> per-route — already in the head by snapshot time
1801
+ // 2. <head> in index.html — already in the head by snapshot time
1802
+ // 3. prerender.meta.* expressions — Alpine-evaluated against the live page
1803
+ // 4. prerender.meta.fallback.* — static strings used when expressions are empty
1804
+ // 5. PWA-style manifest.json fields (name, description, author, icons)
1805
+ // 6. Smart defaults from the rendered DOM (h1, first p, first img, etc.)
1806
+ //
1807
+ // "Slot taken" detection is by selector: <title>, <meta name=>, <meta property=>.
1808
+ // An empty <title></title> or one matching manifest.json "name" counts as
1809
+ // missing (placeholder rule), so smart defaults can fill route-specific titles
1810
+ // without the author having to clear the static <title> in index.html.
1811
+ //
1812
+ // JSON-LD blocks (WebSite, Article, BreadcrumbList) follow the same pattern:
1813
+ // only inject if no <script type="application/ld+json"> already covers that
1814
+ // schema type for the route.
1815
+ async function injectMetaInDom(page, ctx) {
1816
+ await page.evaluate((ctx) => {
1817
+ const head = document.head;
1818
+ if (!head) return;
1819
+
1820
+ // --- Helpers ---------------------------------------------------------
1821
+
1822
+ const SOCIAL_PREFIXES = /^(og:|twitter:|article:|fb:)/;
1823
+
1824
+ const findMeta = (key) => {
1825
+ // Selectors are case-sensitive in querySelector; meta name/property are case-insensitive
1826
+ // in HTML but always written lowercase by us. Cover both attribute styles.
1827
+ return head.querySelector(`meta[name="${key}"], meta[property="${key}"]`);
1828
+ };
1829
+
1830
+ // Slots are "open" if missing, OR if their content equals a known site-wide
1831
+ // placeholder (manifest.json's name/description). Mirrors the title rule so
1832
+ // existing projects with hardcoded site-default meta in index.html still get
1833
+ // route-specific values from smart defaults. Per-tag placeholder map:
1834
+ const PLACEHOLDER = {
1835
+ description: ctx.seo.siteDescription,
1836
+ };
1837
+ const slotIsOpen = (key, existingEl) => {
1838
+ if (!existingEl) return true;
1839
+ const current = (existingEl.getAttribute('content') || '').trim();
1840
+ if (!current) return true;
1841
+ const placeholder = PLACEHOLDER[key];
1842
+ return placeholder && current === placeholder;
1843
+ };
1844
+ const setMeta = (key, content) => {
1845
+ if (content == null) return false;
1846
+ const str = String(content).trim();
1847
+ if (!str) return false;
1848
+ const existing = findMeta(key);
1849
+ if (!slotIsOpen(key, existing)) return false;
1850
+ if (existing) {
1851
+ existing.setAttribute('content', str);
1852
+ } else {
1853
+ const m = document.createElement('meta');
1854
+ m.setAttribute(SOCIAL_PREFIXES.test(key) ? 'property' : 'name', key);
1855
+ m.setAttribute('content', str);
1856
+ head.appendChild(m);
1857
+ }
1858
+ return true;
1859
+ };
1860
+
1861
+ const getCurrentTitle = () => {
1862
+ const el = head.querySelector('title');
1863
+ return { el, text: el ? (el.textContent || '').trim() : '' };
1864
+ };
1865
+
1866
+ const titleSlotIsOpen = () => {
1867
+ const { text } = getCurrentTitle();
1868
+ if (!text) return true;
1869
+ // Equals manifest.name → treat as placeholder (the static <title>Site</title>
1870
+ // pattern in starter templates). Allows smart-defaults to inject a
1871
+ // route-specific title without the author having to wipe the static tag.
1872
+ if (ctx.seo.siteName && text === ctx.seo.siteName) return true;
1873
+ return false;
1874
+ };
1875
+
1876
+ const setTitle = (text) => {
1877
+ if (!text) return false;
1878
+ if (!titleSlotIsOpen()) return false;
1879
+ const trimmed = String(text).trim();
1880
+ if (!trimmed) return false;
1881
+ const { el } = getCurrentTitle();
1882
+ if (el) el.textContent = trimmed;
1883
+ else {
1884
+ const t = document.createElement('title');
1885
+ t.textContent = trimmed;
1886
+ head.appendChild(t);
1887
+ }
1888
+ return true;
1889
+ };
1890
+
1891
+ const evalAlpine = (expr) => {
1892
+ if (typeof expr !== 'string' || !expr.trim()) return null;
1893
+ try {
1894
+ const A = window.Alpine;
1895
+ if (!A || typeof A.evaluate !== 'function') return null;
1896
+ const v = A.evaluate(document.body, expr);
1897
+ if (v == null) return null;
1898
+ const s = typeof v === 'string' ? v : String(v);
1899
+ return s.trim() || null;
1900
+ } catch { return null; }
1901
+ };
1902
+
1903
+ const truncate = (s, max) => {
1904
+ const t = String(s).replace(/\s+/g, ' ').trim();
1905
+ if (t.length <= max) return t;
1906
+ // Cut at the last word boundary before max-3 to leave room for ellipsis.
1907
+ const sliced = t.slice(0, max - 1);
1908
+ const lastSpace = sliced.lastIndexOf(' ');
1909
+ const base = lastSpace > max * 0.6 ? sliced.slice(0, lastSpace) : sliced;
1910
+ return base + '…';
1911
+ };
1912
+
1913
+ // --- Smart defaults (DOM derivation) ---------------------------------
1914
+
1915
+ const smartDefaults = (() => {
1916
+ if (!ctx.seo.defaults) return {};
1917
+ // Title source: first <h1> inside <main>/<article>, then any <h1>.
1918
+ const h1El = document.querySelector('main h1, article h1') || document.querySelector('h1');
1919
+ const h1 = h1El ? (h1El.textContent || '').trim() : '';
1920
+ const composedTitle = (() => {
1921
+ if (!h1) return ctx.seo.siteName || null;
1922
+ if (!ctx.seo.siteName || h1 === ctx.seo.siteName) return h1;
1923
+ return `${h1} — ${ctx.seo.siteName}`;
1924
+ })();
1925
+
1926
+ // Description: first non-trivial <p> in main/article content.
1927
+ const descCandidates = document.querySelectorAll('main p, article p, .prose p');
1928
+ let desc = '';
1929
+ for (const p of descCandidates) {
1930
+ const text = (p.textContent || '').trim();
1931
+ if (text.length >= 30) { desc = truncate(text, 160); break; }
1932
+ }
1933
+
1934
+ // Image: snapshot URL if auto-snapshot was taken; else first content
1935
+ // <img> with a non-data src; else largest manifest icon. Snapshot wins
1936
+ // over content <img> because it represents the rendered page and is
1937
+ // sized for OG/Twitter cards (1200×630), whereas a content image could
1938
+ // be a thumbnail of arbitrary aspect ratio.
1939
+ let imgSrc = ctx.snapshotUrl || '';
1940
+ if (!imgSrc) {
1941
+ const imgCandidates = document.querySelectorAll('main img[src], article img[src]');
1942
+ for (const img of imgCandidates) {
1943
+ const src = img.getAttribute('src') || '';
1944
+ if (src && !src.startsWith('data:')) { imgSrc = src; break; }
1945
+ }
1946
+ }
1947
+ if (!imgSrc && Array.isArray(ctx.seo.icons) && ctx.seo.icons.length) {
1948
+ // Largest icon by area.
1949
+ const sorted = ctx.seo.icons.slice().sort((a, b) => {
1950
+ const area = (s) => {
1951
+ const m = String(s?.sizes || '').match(/(\d+)x(\d+)/);
1952
+ return m ? parseInt(m[1], 10) * parseInt(m[2], 10) : 0;
1953
+ };
1954
+ return area(b) - area(a);
1955
+ });
1956
+ imgSrc = sorted[0]?.src || '';
1957
+ }
1958
+
1959
+ // Type heuristic: 'article' if the page renders an <article> or its path
1960
+ // looks like article content (e.g. /docs/foo, /blog/foo, /articles/foo);
1961
+ // 'website' otherwise.
1962
+ const looksLikeArticle = !!document.querySelector('article')
1963
+ || /^\/(?:docs|blog|articles|posts|guides)\//i.test(location.pathname);
1964
+ const ogType = looksLikeArticle ? 'article' : 'website';
1965
+
1966
+ return {
1967
+ title: composedTitle,
1968
+ description: desc || ctx.seo.siteDescription || null,
1969
+ image: imgSrc || null,
1970
+ ogType,
1971
+ };
1972
+ })();
1973
+
1974
+ // --- Resolve a single meta value through the precedence chain --------
1975
+
1976
+ const resolve = (key) => {
1977
+ // Layer 3: prerender.meta expression
1978
+ const exprMap = ctx.seo.meta || {};
1979
+ const expr = exprMap[key];
1980
+ if (typeof expr === 'string') {
1981
+ const v = evalAlpine(expr);
1982
+ if (v) return v;
1983
+ } else if (typeof expr === 'boolean' || typeof expr === 'number') {
1984
+ return String(expr);
1985
+ }
1986
+ // Layer 4: explicit fallback
1987
+ const fallback = exprMap.fallback?.[key];
1988
+ if (fallback) return String(fallback);
1989
+ // Layer 5: smart defaults from DOM (page-specific — beats generic PWA fields).
1990
+ // For title specifically, the placeholder rule in setTitle() also requires
1991
+ // the static <title>Site</title> to be treated as missing so this wins.
1992
+ if (smartDefaults[key]) return smartDefaults[key];
1993
+ // Layer 6: PWA-style manifest.json fields — last-resort generic fallback
1994
+ if (key === 'title' && ctx.seo.siteName) return ctx.seo.siteName;
1995
+ if (key === 'description' && ctx.seo.siteDescription) return ctx.seo.siteDescription;
1996
+ if (key === 'author' && ctx.seo.siteAuthor) return ctx.seo.siteAuthor;
1997
+ return null;
1998
+ };
1999
+
2000
+ // --- Title -----------------------------------------------------------
2001
+
2002
+ setTitle(resolve('title'));
2003
+
2004
+ // --- Description / author -------------------------------------------
2005
+
2006
+ const description = resolve('description');
2007
+ setMeta('description', description);
2008
+ setMeta('author', resolve('author'));
2009
+
2010
+ // --- Canonical URL (skip — already injected later by buildCanonicalAndHreflang) ---
2011
+
2012
+ // --- OpenGraph / Twitter --------------------------------------------
2013
+
2014
+ const liveBase = (ctx.liveUrl || '').replace(/\/$/, '');
2015
+ const pageUrl = ctx.pathSeg === '' || ctx.pathSeg === '__404__'
2016
+ ? (liveBase ? liveBase + '/' : null)
2017
+ : (liveBase ? `${liveBase}/${ctx.pathSeg}` : null);
2018
+ const finalTitle = getCurrentTitle().text || resolve('title');
2019
+ const ogType = resolve('ogType') || smartDefaults.ogType || 'website';
2020
+ const image = resolve('image');
2021
+
2022
+ setMeta('og:title', finalTitle);
2023
+ setMeta('og:description', description);
2024
+ setMeta('og:type', ogType);
2025
+ setMeta('og:url', pageUrl);
2026
+ setMeta('og:site_name', ctx.seo.siteName);
2027
+ if (image) setMeta('og:image', image);
2028
+
2029
+ setMeta('twitter:card', image ? 'summary_large_image' : 'summary');
2030
+ setMeta('twitter:title', finalTitle);
2031
+ setMeta('twitter:description', description);
2032
+ if (image) setMeta('twitter:image', image);
2033
+
2034
+ // --- JSON-LD structured data ----------------------------------------
2035
+
2036
+ const sd = ctx.seo.structuredData;
2037
+ if (sd && typeof sd === 'object') {
2038
+ const existingLdScripts = head.querySelectorAll('script[type="application/ld+json"]');
2039
+ const existingTypes = new Set();
2040
+ existingLdScripts.forEach((s) => {
2041
+ try {
2042
+ const parsed = JSON.parse(s.textContent || '{}');
2043
+ const t = Array.isArray(parsed) ? parsed.map((x) => x['@type']) : [parsed['@type']];
2044
+ t.forEach((tt) => tt && existingTypes.add(tt));
2045
+ } catch { /* skip malformed */ }
2046
+ });
2047
+
2048
+ const resolveSdField = (v) => {
2049
+ if (typeof v === 'string') {
2050
+ const evaled = evalAlpine(v);
2051
+ return evaled ?? v; // if eval fails, keep literal (lets users write plain strings)
2052
+ }
2053
+ return v;
2054
+ };
2055
+ const resolveSchema = (obj) => {
2056
+ if (obj == null || typeof obj !== 'object') return obj;
2057
+ const out = {};
2058
+ for (const k of Object.keys(obj)) {
2059
+ out[k] = resolveSdField(obj[k]);
2060
+ }
2061
+ return out;
2062
+ };
2063
+
2064
+ const blocks = [];
2065
+ for (const [type, def] of Object.entries(sd)) {
2066
+ if (existingTypes.has(type)) continue;
2067
+ if (def === false) continue;
2068
+ if (type === 'BreadcrumbList' && def === true) {
2069
+ // Auto-derive from URL path segments.
2070
+ const parts = location.pathname.split('/').filter(Boolean);
2071
+ const items = [{
2072
+ '@type': 'ListItem',
2073
+ position: 1,
2074
+ name: ctx.seo.siteName || 'Home',
2075
+ item: liveBase ? liveBase + '/' : '/',
2076
+ }];
2077
+ parts.forEach((seg, i) => {
2078
+ items.push({
2079
+ '@type': 'ListItem',
2080
+ position: i + 2,
2081
+ name: seg.replace(/-/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase()),
2082
+ item: liveBase ? `${liveBase}/${parts.slice(0, i + 1).join('/')}` : '/' + parts.slice(0, i + 1).join('/'),
2083
+ });
2084
+ });
2085
+ blocks.push({ '@context': 'https://schema.org', '@type': 'BreadcrumbList', itemListElement: items });
2086
+ continue;
2087
+ }
2088
+ if (def === true) {
2089
+ // Bare-true for known schemas: minimal auto-fill
2090
+ if (type === 'WebSite') {
2091
+ blocks.push({
2092
+ '@context': 'https://schema.org',
2093
+ '@type': 'WebSite',
2094
+ name: ctx.seo.siteName || finalTitle || '',
2095
+ url: liveBase || '',
2096
+ });
2097
+ } else if (type === 'Article') {
2098
+ blocks.push({
2099
+ '@context': 'https://schema.org',
2100
+ '@type': 'Article',
2101
+ headline: finalTitle || '',
2102
+ description: description || '',
2103
+ ...(image ? { image } : {}),
2104
+ ...(pageUrl ? { url: pageUrl } : {}),
2105
+ ...(ctx.seo.siteAuthor ? { author: { '@type': 'Person', name: ctx.seo.siteAuthor } } : {}),
2106
+ });
2107
+ }
2108
+ continue;
2109
+ }
2110
+ if (typeof def === 'object') {
2111
+ const resolved = resolveSchema(def);
2112
+ blocks.push({ '@context': 'https://schema.org', '@type': type, ...resolved });
2113
+ }
2114
+ }
2115
+
2116
+ for (const block of blocks) {
2117
+ const s = document.createElement('script');
2118
+ s.setAttribute('type', 'application/ld+json');
2119
+ s.textContent = JSON.stringify(block);
2120
+ head.appendChild(s);
2121
+ }
2122
+ }
2123
+ }, ctx);
2124
+ }
2125
+
2126
+ // --- SEO: robots.txt, sitemap.xml, llms.txt, llms-full.txt ---------------
2127
+ //
2128
+ // Written to the prerender output directory. liveUrl is the canonical public
2129
+ // host (https://...), used for absolute URLs in sitemap entries and the llms.txt
2130
+ // page index. llms.txt and llms-full.txt follow the llmstxt.org convention —
2131
+ // a plain-markdown index and full-content concatenation specifically for LLM
2132
+ // crawlers (ChatGPT, Claude, Perplexity, etc.) that prefer structured plaintext
2133
+ // over scraping rendered HTML.
1690
2134
 
1691
- function writeSeoFiles(outputDir, pathList, liveUrl, locales, defaultLocale) {
2135
+ /**
2136
+ * Strip HTML tags + collapse whitespace to plaintext. Crude but sufficient for
2137
+ * meta description / llms-full content extraction; we run on prerendered HTML
2138
+ * where Alpine bindings have already been resolved to literal values.
2139
+ */
2140
+ function htmlToText(html) {
2141
+ return String(html || '')
2142
+ .replace(/<script[\s\S]*?<\/script>/gi, ' ')
2143
+ .replace(/<style[\s\S]*?<\/style>/gi, ' ')
2144
+ .replace(/<svg[\s\S]*?<\/svg>/gi, ' ')
2145
+ .replace(/<template[\s\S]*?<\/template>/gi, ' ')
2146
+ .replace(/<!--[\s\S]*?-->/g, ' ')
2147
+ .replace(/<[^>]+>/g, ' ')
2148
+ .replace(/&nbsp;/g, ' ')
2149
+ .replace(/&amp;/g, '&')
2150
+ .replace(/&lt;/g, '<')
2151
+ .replace(/&gt;/g, '>')
2152
+ .replace(/&quot;/g, '"')
2153
+ .replace(/&#39;/g, "'")
2154
+ .replace(/\s+/g, ' ')
2155
+ .trim();
2156
+ }
2157
+
2158
+ /**
2159
+ * Extract <title>, <meta name="description">, and the route's article content
2160
+ * from a prerendered HTML file. Targets the article body, not the whole page
2161
+ * layout, so the resulting llms-full.txt isn't dominated by repeated nav, TOC,
2162
+ * footer, and other site chrome.
2163
+ *
2164
+ * Selection order (first hit wins):
2165
+ * 1. `.prose` — Manifest convention for rendered markdown article content.
2166
+ * 2. `<article>` — semantic HTML for article bodies.
2167
+ * 3. `<main>` minus chrome — strips [data-static] (nav lists, TOCs marked
2168
+ * static-bake), <nav>, <header>, <footer>, <aside>.
2169
+ * 4. `<body>` minus same chrome — last resort.
2170
+ */
2171
+ function extractRouteContent(filePath) {
2172
+ if (!existsSync(filePath)) return null;
2173
+ const html = readFileSync(filePath, 'utf8');
2174
+ const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
2175
+ const descMatch = html.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
2176
+
2177
+ // Find the article-content region using depth-tracked tag matching. Naive
2178
+ // non-greedy regex breaks on nested same-tag elements (article markdown
2179
+ // typically contains many nested <div>s for code blocks, frames, etc.).
2180
+ // Walks the source from the opening tag, counting open/close pairs of the
2181
+ // same tag, until depth returns to zero.
2182
+ const extractByOpener = (source, openerRx) => {
2183
+ const m = openerRx.exec(source);
2184
+ if (!m) return null;
2185
+ const tagName = m[1];
2186
+ const start = m.index + m[0].length;
2187
+ const open = new RegExp(`<${tagName}\\b[^>]*>`, 'gi');
2188
+ const close = new RegExp(`</${tagName}\\s*>`, 'gi');
2189
+ let depth = 1;
2190
+ let cursor = start;
2191
+ while (depth > 0) {
2192
+ open.lastIndex = cursor;
2193
+ close.lastIndex = cursor;
2194
+ const nextOpen = open.exec(source);
2195
+ const nextClose = close.exec(source);
2196
+ if (!nextClose) return source.slice(start);
2197
+ if (nextOpen && nextOpen.index < nextClose.index) {
2198
+ depth++;
2199
+ cursor = nextOpen.index + nextOpen[0].length;
2200
+ } else {
2201
+ depth--;
2202
+ if (depth === 0) return source.slice(start, nextClose.index);
2203
+ cursor = nextClose.index + nextClose[0].length;
2204
+ }
2205
+ }
2206
+ return null;
2207
+ };
2208
+
2209
+ // Selection order — first hit wins:
2210
+ // 1. `.prose` — Manifest convention for rendered markdown article content.
2211
+ // This is the cleanest source: contains only article body, no chrome.
2212
+ // 2. `<article>` — semantic HTML for article bodies.
2213
+ // 3. `<main>` — last resort. At this layer we additionally strip the
2214
+ // site-chrome wrappers (data-static nav/TOC, semantic nav/header/footer
2215
+ // tags). We do NOT strip <aside> because article content commonly uses
2216
+ // <aside class="frame"> for example boxes.
2217
+ const proseRegion = extractByOpener(
2218
+ html,
2219
+ /<([a-z][a-z0-9]*)\b[^>]*\bclass=["'][^"']*\bprose\b[^"']*["'][^>]*>/i
2220
+ );
2221
+ let region = '';
2222
+ if (proseRegion) {
2223
+ region = proseRegion;
2224
+ } else {
2225
+ const articleMatch = html.match(/<article\b[^>]*>([\s\S]*?)<\/article>/i);
2226
+ if (articleMatch) {
2227
+ region = articleMatch[1];
2228
+ } else {
2229
+ const mainMatch = html.match(/<main\b[^>]*>([\s\S]*?)<\/main>/i);
2230
+ const bodyMatch = mainMatch ? null : html.match(/<body\b[^>]*>([\s\S]*?)<\/body>/i);
2231
+ let candidate = mainMatch ? mainMatch[1] : (bodyMatch ? bodyMatch[1] : '');
2232
+ // Strip site chrome: top-level wrappers, not nested article content.
2233
+ // <aside> is intentionally NOT stripped here — articles use <aside
2234
+ // class="frame"> for example boxes that should appear in llms-full.
2235
+ candidate = candidate.replace(/<nav\b[\s\S]*?<\/nav>/gi, ' ');
2236
+ candidate = candidate.replace(/<footer\b[\s\S]*?<\/footer>/gi, ' ');
2237
+ // Strip data-static containers (depth-tracked because nav lists nest).
2238
+ const stripContainer = (s, openerRx) => {
2239
+ let out = s;
2240
+ let m;
2241
+ while ((m = openerRx.exec(out))) {
2242
+ const tagName = m[1];
2243
+ const innerStart = m.index + m[0].length;
2244
+ const open = new RegExp(`<${tagName}\\b[^>]*>`, 'gi');
2245
+ const close = new RegExp(`</${tagName}\\s*>`, 'gi');
2246
+ let depth = 1;
2247
+ let cursor = innerStart;
2248
+ let endIdx = out.length;
2249
+ while (depth > 0) {
2250
+ open.lastIndex = cursor;
2251
+ close.lastIndex = cursor;
2252
+ const nextOpen = open.exec(out);
2253
+ const nextClose = close.exec(out);
2254
+ if (!nextClose) break;
2255
+ if (nextOpen && nextOpen.index < nextClose.index) {
2256
+ depth++;
2257
+ cursor = nextOpen.index + nextOpen[0].length;
2258
+ } else {
2259
+ depth--;
2260
+ cursor = nextClose.index + nextClose[0].length;
2261
+ if (depth === 0) endIdx = cursor;
2262
+ }
2263
+ }
2264
+ out = out.slice(0, m.index) + ' ' + out.slice(endIdx);
2265
+ openerRx.lastIndex = 0;
2266
+ }
2267
+ return out;
2268
+ };
2269
+ candidate = stripContainer(candidate, /<([a-z][a-z0-9]*)\b[^>]*\bdata-static\b[^>]*>/gi);
2270
+ region = candidate;
2271
+ }
2272
+ }
2273
+
2274
+ return {
2275
+ title: titleMatch ? htmlToText(titleMatch[1]) : '',
2276
+ description: descMatch ? descMatch[1] : '',
2277
+ bodyText: region ? htmlToText(region) : '',
2278
+ };
2279
+ }
2280
+
2281
+ /** Resolve the per-route output HTML file (matches the layout writePrerenderOutput uses). */
2282
+ function routeHtmlPath(outputDir, pathSeg) {
2283
+ if (pathSeg === '') return join(outputDir, 'index.html');
2284
+ if (pathSeg === '__prerender_404__') return join(outputDir, '404.html');
2285
+ return join(outputDir, ...pathSeg.split('/'), 'index.html');
2286
+ }
2287
+
2288
+ /**
2289
+ * Best-effort per-route lastmod date. We pick the prerendered HTML file's
2290
+ * mtime — that file IS regenerated on every prerender, so it's no better than
2291
+ * "today" for unchanged content. Fallback hierarchy: 1) source markdown if
2292
+ * discoverable under articles/<path>.md; 2) prerendered HTML mtime; 3) today.
2293
+ */
2294
+ function routeLastModDate(rootDir, outputDir, pathSeg) {
2295
+ // Try common source-file conventions first so the date reflects content
2296
+ // changes rather than the prerender run. Strip leading section prefix
2297
+ // ("docs/", "blog/", "articles/") since markdown files typically live
2298
+ // under articles/ keyed by the remaining path.
2299
+ const stripPrefix = pathSeg.replace(/^(?:docs|blog|articles|posts|guides)\//, '');
2300
+ const candidates = [
2301
+ join(rootDir, 'articles', `${stripPrefix}.md`),
2302
+ join(rootDir, 'articles', `${pathSeg}.md`),
2303
+ join(rootDir, 'pages', `${pathSeg}.html`),
2304
+ join(rootDir, `${pathSeg}.md`),
2305
+ ];
2306
+ for (const c of candidates) {
2307
+ try {
2308
+ const s = statSync(c);
2309
+ if (s.isFile()) return s.mtime.toISOString().slice(0, 10);
2310
+ } catch { /* not found */ }
2311
+ }
2312
+ // Fallback to the prerendered output mtime (always present).
2313
+ try {
2314
+ const out = routeHtmlPath(outputDir, pathSeg || '');
2315
+ const s = statSync(out);
2316
+ return s.mtime.toISOString().slice(0, 10);
2317
+ } catch { /* ignore */ }
2318
+ return new Date().toISOString().slice(0, 10);
2319
+ }
2320
+
2321
+ function writeSeoFiles(outputDir, pathList, liveUrl, locales, defaultLocale, ctx = {}) {
1692
2322
  const base = liveUrl.replace(/\/$/, '');
1693
- const today = new Date().toISOString().slice(0, 10);
1694
2323
  const localeList = Array.isArray(locales) ? locales : [];
1695
2324
  const multiLocale = localeList.length > 1;
2325
+ const rootDir = ctx.rootDir || '';
1696
2326
 
1697
2327
  writeFileSync(
1698
2328
  join(outputDir, 'robots.txt'),
@@ -1718,7 +2348,8 @@ Sitemap: ${base}/sitemap.xml
1718
2348
  body += `\n <xhtml:link rel="alternate" hreflang="${escapeXmlText(hreflang)}" href="${escapeXmlText(href)}" />`;
1719
2349
  }
1720
2350
  }
1721
- body += `\n <lastmod>${today}</lastmod>
2351
+ const lastmod = routeLastModDate(rootDir, outputDir, pathSeg);
2352
+ body += `\n <lastmod>${lastmod}</lastmod>
1722
2353
  <changefreq>monthly</changefreq>
1723
2354
  <priority>${path === '' ? '1.0' : '0.8'}</priority>`;
1724
2355
  return ` <url>
@@ -1732,6 +2363,155 @@ ${body}
1732
2363
  ${urlsetNs}
1733
2364
  ${urlEntries.join('\n')}
1734
2365
  </urlset>
2366
+ `,
2367
+ 'utf8'
2368
+ );
2369
+
2370
+ writeLlmsFiles(outputDir, pathList, base, ctx);
2371
+ }
2372
+
2373
+ /**
2374
+ * Write llms.txt (curated index) and llms-full.txt (concatenated full content)
2375
+ * per the llmstxt.org convention. Read each prerendered HTML file in pathList
2376
+ * and extract title / description / body text — these were already filled by
2377
+ * injectMetaInDom + smart defaults, so the output reflects the same layered
2378
+ * precedence (data-head → prerender.meta → smart defaults) without re-deriving.
2379
+ *
2380
+ * Pages are grouped into sections by their first URL segment ("Getting Started"
2381
+ * for /docs/getting-started/*, etc.) so the index is browseable. The root /
2382
+ * page is treated as the site overview.
2383
+ */
2384
+ function writeLlmsFiles(outputDir, pathList, liveBase, ctx = {}) {
2385
+ const siteName = ctx.siteName || 'Site';
2386
+ const siteDescription = ctx.siteDescription || '';
2387
+
2388
+ // Extract content for every route up front so we can build both files in one pass.
2389
+ const entries = [];
2390
+ for (const pathSeg of pathList) {
2391
+ const filePath = routeHtmlPath(outputDir, pathSeg);
2392
+ const extracted = extractRouteContent(filePath);
2393
+ if (!extracted) continue;
2394
+ entries.push({
2395
+ pathSeg,
2396
+ url: pathSeg === '' ? `${liveBase}/` : `${liveBase}/${pathSeg}`,
2397
+ title: extracted.title || pathSeg || siteName,
2398
+ description: extracted.description,
2399
+ bodyText: extracted.bodyText,
2400
+ });
2401
+ }
2402
+
2403
+ // Group entries by section. For /a/b/c, the section is "a"; for the root,
2404
+ // "Overview". Sections are presented in first-encounter order to preserve
2405
+ // whatever order the project's manifest.json or yaml index dictated.
2406
+ const sections = new Map();
2407
+ const titleCase = (s) => s.replace(/-/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
2408
+ for (const entry of entries) {
2409
+ const first = entry.pathSeg.split('/')[0] || '';
2410
+ const sectionKey = first || 'Overview';
2411
+ const sectionLabel = first ? titleCase(first) : 'Overview';
2412
+ if (!sections.has(sectionKey)) sections.set(sectionKey, { label: sectionLabel, entries: [] });
2413
+ sections.get(sectionKey).entries.push(entry);
2414
+ }
2415
+
2416
+ // --- llms.txt: short curated index ---
2417
+ let llms = `# ${siteName}\n`;
2418
+ if (siteDescription) llms += `\n> ${siteDescription}\n`;
2419
+ for (const { label, entries: items } of sections.values()) {
2420
+ llms += `\n## ${label}\n\n`;
2421
+ for (const e of items) {
2422
+ const desc = e.description ? `: ${e.description}` : '';
2423
+ llms += `- [${e.title}](${e.url})${desc}\n`;
2424
+ }
2425
+ }
2426
+ writeFileSync(join(outputDir, 'llms.txt'), llms, 'utf8');
2427
+
2428
+ // --- llms-full.txt: full concatenated text content ---
2429
+ // Description is intentionally omitted per-entry — bodyText typically opens
2430
+ // with the same sentence (smart-default description came from the first
2431
+ // paragraph), so printing both produces a duplicate first line. llms.txt
2432
+ // already carries descriptions for the curated index.
2433
+ let llmsFull = `# ${siteName}\n`;
2434
+ if (siteDescription) llmsFull += `\n> ${siteDescription}\n`;
2435
+ for (const { label, entries: items } of sections.values()) {
2436
+ llmsFull += `\n\n# ${label}\n`;
2437
+ for (const e of items) {
2438
+ llmsFull += `\n\n## ${e.title}\n`;
2439
+ llmsFull += `\nSource: ${e.url}\n`;
2440
+ if (e.bodyText) llmsFull += `\n${e.bodyText}\n`;
2441
+ }
2442
+ }
2443
+ writeFileSync(join(outputDir, 'llms-full.txt'), llmsFull, 'utf8');
2444
+ }
2445
+
2446
+ // --- Output protection: keep editors/formatters from rewriting generated HTML ---
2447
+ //
2448
+ // Prerendered HTML embeds highlight.js spans inside <pre><code>, where
2449
+ // whitespace IS significant. Most HTML formatters (Prettier, VS Code's
2450
+ // html-language-features, biome) only respect "preserve <pre> content" when
2451
+ // <pre> sits at the top level — when it's nested inside an unrecognised custom
2452
+ // element like <x-code>, they recurse in and reformat the spans, breaking the
2453
+ // indentation in every code block. These four files tell common tools to
2454
+ // leave the output alone, so the corruption can't happen in any dev's
2455
+ // environment regardless of their global editor config.
2456
+ function writeOutputProtectionFiles(outputDir) {
2457
+ // Prettier: hierarchical, walks up the tree from the file being formatted.
2458
+ writeFileSync(
2459
+ join(outputDir, '.prettierignore'),
2460
+ `# Generated by Manifest prerender. Do not edit; re-run \`mnfst-render\`.
2461
+ *
2462
+ `,
2463
+ 'utf8'
2464
+ );
2465
+
2466
+ // Git: hide from PR diffs by default and skip text normalisation that could
2467
+ // touch <pre> whitespace.
2468
+ writeFileSync(
2469
+ join(outputDir, '.gitattributes'),
2470
+ `# Generated by Manifest prerender. Do not edit; re-run \`mnfst-render\`.
2471
+ * linguist-generated=true
2472
+ *.html -text
2473
+ `,
2474
+ 'utf8'
2475
+ );
2476
+
2477
+ // EditorConfig: hierarchical (editors walk up from the file). \`root = true\`
2478
+ // stops the walk at this folder so a parent .editorconfig can't override us.
2479
+ // We can't disable formatters via EditorConfig, but pinning indent/charset
2480
+ // matches what the renderer emits, so format-on-type doesn't churn the file.
2481
+ writeFileSync(
2482
+ join(outputDir, '.editorconfig'),
2483
+ `# Generated by Manifest prerender. Do not edit; re-run \`mnfst-render\`.
2484
+ root = true
2485
+
2486
+ [*]
2487
+ charset = utf-8
2488
+ end_of_line = lf
2489
+ insert_final_newline = false
2490
+ trim_trailing_whitespace = false
2491
+ indent_style = space
2492
+ indent_size = 2
2493
+ `,
2494
+ 'utf8'
2495
+ );
2496
+
2497
+ // VS Code: applies when this folder is opened directly as a workspace root.
2498
+ // (A nested .vscode/settings.json is NOT picked up automatically by a
2499
+ // parent workspace; for that case the dev needs to add a pattern to their
2500
+ // own settings.) \`files.readonlyInclude\` is the cleanest defence: VS Code
2501
+ // refuses to save the file, so format-on-save can't fire.
2502
+ // VS Code settings.json is JSONC — // comments are allowed.
2503
+ const vscodeDir = join(outputDir, '.vscode');
2504
+ mkdirSync(vscodeDir, { recursive: true });
2505
+ writeFileSync(
2506
+ join(vscodeDir, 'settings.json'),
2507
+ `// Generated by Manifest prerender. Do not edit; re-run mnfst-render.
2508
+ {
2509
+ "files.readonlyInclude": { "**": true },
2510
+ "editor.formatOnSave": false,
2511
+ "editor.formatOnPaste": false,
2512
+ "editor.formatOnType": false,
2513
+ "html.format.enable": false
2514
+ }
1735
2515
  `,
1736
2516
  'utf8'
1737
2517
  );
@@ -2103,6 +2883,13 @@ async function runPrerender(config) {
2103
2883
  // here instead of throwing "browser not ready".
2104
2884
  await browserReadyPromise;
2105
2885
  const page = await browser.newPage();
2886
+ // Render at a typical desktop viewport so layouts dependent on viewport
2887
+ // width (responsive flex/grid, container queries, media queries) settle
2888
+ // into their desktop variant. Without this the headless default (often
2889
+ // 800×600) leaves narrower layouts baked into the prerendered HTML and
2890
+ // also produces blank OG screenshots for hero sections that rely on
2891
+ // viewport-driven flex distribution.
2892
+ await page.setViewport({ width: 1200, height: 800, deviceScaleFactor: 1 });
2106
2893
  try {
2107
2894
  // Align <html lang> with the URL being prerendered before any app script runs.
2108
2895
  // initializeDataSourcesPlugin picks locale from document.documentElement.lang first; a mismatch
@@ -2314,6 +3101,20 @@ async function runPrerender(config) {
2314
3101
  // Flush any remaining Alpine microtask effects after the render-ready signal.
2315
3102
  await flushAlpineEffects(page);
2316
3103
 
3104
+ // OG image auto-snapshot — captured here, BEFORE the heavy DOM-transform
3105
+ // passes (template removal, hydration contract, route-hidden cleanup)
3106
+ // perturb the rendered visual state. Skip if og:image is already set
3107
+ // by data-head, prerender.meta config, or an explicit fallback.
3108
+ let earlySnapshotUrl = null;
3109
+ if (config.seo.imageSnapshots) {
3110
+ const ogImageHandled = !!config.seo.meta?.image
3111
+ || !!config.seo.meta?.fallback?.image
3112
+ || await page.evaluate(() => !!document.head.querySelector('meta[property="og:image"]'));
3113
+ if (!ogImageHandled) {
3114
+ earlySnapshotUrl = await takeOgSnapshot(page, config.output, is404 ? '__404__' : pathSeg);
3115
+ }
3116
+ }
3117
+
2317
3118
  if (config.debugPrerender) {
2318
3119
  const before = await page.evaluate(() => {
2319
3120
  const templates = Array.from(document.querySelectorAll('template[x-for]'));
@@ -2527,14 +3328,14 @@ async function runPrerender(config) {
2527
3328
  // Interactive Manifest-registered directives that attach click/hover/
2528
3329
  // observer state at runtime and therefore need the live Alpine scope.
2529
3330
  const INTERACTIVE_DIRECTIVES = new Set([
2530
- 'x-theme', 'x-dropdown', 'x-tooltip', 'x-tab', 'x-tabpanel',
3331
+ 'x-colors', 'x-dropdown', 'x-tooltip', 'x-tab', 'x-tabpanel',
2531
3332
  'x-toast', 'x-carousel', 'x-resize', 'x-anchors', 'x-model',
2532
3333
  'x-files', 'x-data-files',
2533
3334
  ]);
2534
3335
  // Runtime-only Alpine magics whose values change after the prerender
2535
3336
  // snapshot (e.g. via media query, route change, auth state). Bindings
2536
3337
  // referencing these must re-evaluate in the live page.
2537
- const RUNTIME_MAGIC_RX = /(?<!['"])\$(theme|locale|url|auth|search|query|toast)\b/;
3338
+ const RUNTIME_MAGIC_RX = /(?<!['"])\$(colors|locale|url|auth|search|query|toast)\b/;
2538
3339
 
2539
3340
  const isDiffBindingAttr = (name) =>
2540
3341
  name === ':class' || name === 'x-bind:class' ||
@@ -2550,6 +3351,13 @@ async function runPrerender(config) {
2550
3351
  // Explicit data-hydrate — subtree-wide restoration.
2551
3352
  if (el.hasAttribute('data-hydrate')) return 'explicit';
2552
3353
 
3354
+ // data-static: the author has frozen this subtree post-bake — Alpine
3355
+ // is not re-rendering iteration here, and the baked class/style/etc.
3356
+ // represent the intended final state. Including these elements in
3357
+ // the hydration contract would null out their baked class (per the
3358
+ // diff-binding rule below), undoing the SEO-baked styling. Skip.
3359
+ if (el.hasAttribute('data-static') || el.closest('[data-static]')) return null;
3360
+
2553
3361
  const list = el.attributes;
2554
3362
  for (let i = 0; i < list.length; i++) {
2555
3363
  const name = list[i].name;
@@ -2692,11 +3500,25 @@ async function runPrerender(config) {
2692
3500
  const inferred = xFor.includes('$search') || xFor.includes('$query') ||
2693
3501
  xFor.includes('$url') || xFor.includes('$auth') ||
2694
3502
  /\bin\s+(filtered\w*|results|searchResults)\b/.test(xFor);
2695
- const forceCollapse = explicit || inferred;
3503
+ // data-static (on template or ancestor) opts the list out of dynamic
3504
+ // collapse and pins it to the static-bake path, even if the x-for
3505
+ // expression looks dynamic. Mirrors data-hydrate as the alternative:
3506
+ // data-hydrate keeps a subtree live for runtime hydration; data-static
3507
+ // freezes baked clones into the HTML for SEO with no further re-render.
3508
+ const isStatic = tpl.hasAttribute('data-static') || !!tpl.closest('[data-static]');
3509
+ const forceCollapse = !isStatic && (explicit || inferred);
2696
3510
  if (!forceCollapse) {
2697
3511
  tpl.removeAttribute('data-prerender-collapsed');
2698
3512
  tpl.removeAttribute('data-prerender-static-generated');
2699
3513
  // Static mode: if prerender produced concrete siblings, mark template for removal later.
3514
+ //
3515
+ // Default sibling-match check is strict (tag + class) to avoid
3516
+ // capturing unrelated elements that happen to share a tag. Under
3517
+ // data-static the user has explicitly opted in to baking, so we
3518
+ // relax to tag-only — Alpine's :class evaluation on clones often
3519
+ // differs from the template's static class (e.g. template has no
3520
+ // `class=` and clones have an evaluated string), and the strict
3521
+ // check would miss the clones and leave the template unmarked.
2700
3522
  const first = tpl.content?.firstElementChild;
2701
3523
  if (first) {
2702
3524
  const tag = first.tagName;
@@ -2705,8 +3527,10 @@ async function runPrerender(config) {
2705
3527
  let generatedCount = 0;
2706
3528
  while (next) {
2707
3529
  if (next.tagName !== tag) break;
2708
- const sameClass = (next.getAttribute('class') || '') === cls;
2709
- if (!sameClass) break;
3530
+ if (!isStatic) {
3531
+ const sameClass = (next.getAttribute('class') || '') === cls;
3532
+ if (!sameClass) break;
3533
+ }
2710
3534
  generatedCount++;
2711
3535
  next = next.nextElementSibling;
2712
3536
  }
@@ -2809,7 +3633,16 @@ async function runPrerender(config) {
2809
3633
  // Strip loop-scope bindings from x-for clones while <template> nodes still exist.
2810
3634
  // (If we remove static templates first, querySelectorAll('template[x-for]') misses them and clones
2811
3635
  // keep x-text/x-bind referencing card/item — Alpine then mutates or errors on the static HTML.)
3636
+ //
3637
+ // Wrapped in Alpine.mutateDom so attribute removals (e.g. removing :class)
3638
+ // don't trigger Alpine's reactive teardown — without this, Alpine sees
3639
+ // the :class attribute disappear, runs its unbind effect, and clears the
3640
+ // bound attribute (class) back to its pre-binding value (empty for clones
3641
+ // whose template had no static class). mutateDom suppresses the observer
3642
+ // for the duration of the callback.
2812
3643
  await page.evaluate(() => {
3644
+ const A = window.Alpine;
3645
+ const runBatch = typeof A?.mutateDom === 'function' ? (fn) => A.mutateDom(fn) : (fn) => fn();
2813
3646
  const loopVarRegex = /^\s*(?:\(\s*([A-Za-z_$][\w$]*)(?:\s*,\s*([A-Za-z_$][\w$]*))?\s*\)|([A-Za-z_$][\w$]*))\s+in\s+/;
2814
3647
  // Include x-init: expanded clones still had x-init="getDescription(article)" etc.; Alpine then throws (article undefined).
2815
3648
  const bindingAttrRegex = /^(?:x-bind:|:|x-text|x-html|x-show|x-if|x-model|x-effect|x-init|x-icon|x-on:|@)/;
@@ -2845,7 +3678,13 @@ async function runPrerender(config) {
2845
3678
  if (boundAttr) {
2846
3679
  const concrete = node.getAttribute(boundAttr);
2847
3680
  if (concrete != null && String(concrete).trim() !== '') {
3681
+ // Removing :foo triggers Alpine's binding teardown, which
3682
+ // restores the bound attribute to its pre-binding value
3683
+ // (empty for clones whose template had no static class).
3684
+ // Snapshot the eval'd value and re-set it after removal so
3685
+ // the baked attribute survives the unbind.
2848
3686
  node.removeAttribute(name);
3687
+ node.setAttribute(boundAttr, concrete);
2849
3688
  }
2850
3689
  continue;
2851
3690
  }
@@ -2855,24 +3694,26 @@ async function runPrerender(config) {
2855
3694
  }
2856
3695
  };
2857
3696
 
2858
- document.querySelectorAll('template[x-for]').forEach((tpl) => {
2859
- if (tpl.hasAttribute('data-hydrate') || tpl.closest('[data-hydrate]')) return;
2860
- const xFor = (tpl.getAttribute('x-for') || '').trim();
2861
- const m = xFor.match(loopVarRegex);
2862
- const itemVar = m ? (m[1] || m[3] || '') : '';
2863
- const indexVar = m ? (m[2] || '') : '';
2864
- if (!itemVar && !indexVar) return;
3697
+ runBatch(() => {
3698
+ document.querySelectorAll('template[x-for]').forEach((tpl) => {
3699
+ if (tpl.hasAttribute('data-hydrate') || tpl.closest('[data-hydrate]')) return;
3700
+ const xFor = (tpl.getAttribute('x-for') || '').trim();
3701
+ const m = xFor.match(loopVarRegex);
3702
+ const itemVar = m ? (m[1] || m[3] || '') : '';
3703
+ const indexVar = m ? (m[2] || '') : '';
3704
+ if (!itemVar && !indexVar) return;
2865
3705
 
2866
- const first = tpl.content?.firstElementChild;
2867
- if (!first) return;
2868
- const tag = first.tagName;
3706
+ const first = tpl.content?.firstElementChild;
3707
+ if (!first) return;
3708
+ const tag = first.tagName;
2869
3709
 
2870
- let next = tpl.nextElementSibling;
2871
- while (next) {
2872
- if (next.tagName !== tag) break;
2873
- stripLoopBindings(next, itemVar, indexVar);
2874
- next = next.nextElementSibling;
2875
- }
3710
+ let next = tpl.nextElementSibling;
3711
+ while (next) {
3712
+ if (next.tagName !== tag) break;
3713
+ stripLoopBindings(next, itemVar, indexVar);
3714
+ next = next.nextElementSibling;
3715
+ }
3716
+ });
2876
3717
  });
2877
3718
  });
2878
3719
 
@@ -2880,20 +3721,42 @@ async function runPrerender(config) {
2880
3721
  // Alpine registers a cleanup on <template x-for> that removes every node in _x_lookup when the
2881
3722
  // template is detached — so tpl.remove() alone deletes all sibling clones (empty grids in output).
2882
3723
  // Replace each clone with a deep cloneNode first so teardown targets detached nodes; copies stay in DOM.
3724
+ //
3725
+ // Iterate until quiet: when an outer template's siblings are deep-cloned,
3726
+ // any nested templates inside those clones become FRESH DOM nodes that
3727
+ // weren't in the original querySelectorAll snapshot. We re-query and
3728
+ // re-process until no marked templates remain, so nested static lists
3729
+ // (e.g. <template x-for="group in $x.docs"> with an inner
3730
+ // <template x-for="item in group.items">) are fully baked and removed.
2883
3731
  await page.evaluate(() => {
2884
3732
  const A = window.Alpine;
2885
3733
  const runBatch = typeof A?.mutateDom === 'function' ? (fn) => A.mutateDom(fn) : (fn) => fn();
2886
- runBatch(() => {
2887
- document.querySelectorAll('template[x-for][data-prerender-static-generated="1"]').forEach((tpl) => {
3734
+ const SAFETY_PASSES = 8;
3735
+ for (let pass = 0; pass < SAFETY_PASSES; pass++) {
3736
+ const remaining = document.querySelectorAll('template[x-for][data-prerender-static-generated="1"]');
3737
+ if (remaining.length === 0) break;
3738
+ let processed = 0;
3739
+ runBatch(() => {
3740
+ remaining.forEach((tpl) => {
2888
3741
  if (tpl.hasAttribute('data-hydrate') || tpl.closest('[data-hydrate]')) return;
2889
- // $x-driven x-for: keep the template so Alpine can re-render the
2890
- // list at runtime (locale switching, filtering, etc.), but remove
2891
- // the static clones — Alpine creates fresh clones on init and does
2892
- // NOT adopt existing DOM nodes, so leaving them produces duplicates.
2893
- // Individual article/pricing pages still have full baked content
2894
- // (via x-text/x-html); the x-for list is only the index/grid view.
3742
+ // $x-driven x-for: by default, keep the template so Alpine can
3743
+ // re-render the list at runtime (locale switching, filtering, etc.)
3744
+ // and remove the static clones — Alpine creates fresh clones on
3745
+ // init and does NOT adopt existing DOM nodes, so leaving them
3746
+ // produces duplicates. Individual article/pricing pages still
3747
+ // have full baked content (via x-text/x-html); the x-for list is
3748
+ // only the index/grid view.
3749
+ //
3750
+ // Opt-in via data-static (on template or ancestor) reverses this:
3751
+ // we keep the baked clones for SEO and remove the template instead,
3752
+ // which freezes the list (Alpine has nothing left to iterate, so
3753
+ // no duplicates and no runtime re-render). Use this for static
3754
+ // navigation lists or any $x-driven list whose source data does
3755
+ // not change after first paint. Loop-scope bindings on the kept
3756
+ // clones are stripped earlier in the pipeline.
2895
3757
  const xFor = (tpl.getAttribute('x-for') || '');
2896
- if (xFor.includes('$x')) {
3758
+ const isStatic = tpl.hasAttribute('data-static') || !!tpl.closest('[data-static]');
3759
+ if (xFor.includes('$x') && !isStatic) {
2897
3760
  const first = tpl.content?.firstElementChild;
2898
3761
  if (first) {
2899
3762
  const tag = first.tagName;
@@ -2922,14 +3785,20 @@ async function runPrerender(config) {
2922
3785
  const cls = first.getAttribute('class') || '';
2923
3786
  let n = tpl.nextElementSibling;
2924
3787
  while (n && n.tagName === tag) {
2925
- if ((n.getAttribute('class') || '') !== cls) break;
3788
+ // Same rationale as the marking pass: under data-static, relax
3789
+ // class match (Alpine's :class evaluation on clones often differs
3790
+ // from the template's static class).
3791
+ if (!isStatic && (n.getAttribute('class') || '') !== cls) break;
2926
3792
  const next = n.nextElementSibling;
2927
3793
  n.replaceWith(n.cloneNode(true));
2928
3794
  n = next;
2929
3795
  }
2930
3796
  tpl.remove();
3797
+ processed++;
2931
3798
  });
2932
- });
3799
+ });
3800
+ if (processed === 0) break;
3801
+ }
2933
3802
  });
2934
3803
 
2935
3804
  // Remove orphan x-for clones that still reference loop-scope vars (e.g. image/index)
@@ -2980,6 +3849,23 @@ async function runPrerender(config) {
2980
3849
  });
2981
3850
  });
2982
3851
 
3852
+ // data-static cleanup: any <template> still inside a [data-static] subtree
3853
+ // is removed. Plugin-driven templates (x-anchors, custom directives that
3854
+ // insert their rendered output as siblings) leave the rendered DOM behind
3855
+ // and the template intact — at runtime the plugin would re-run and
3856
+ // duplicate the output. Removing the template here is the equivalent of
3857
+ // the x-for static path: bake the rendered content, drop the source.
3858
+ // x-for templates have their own staged removal earlier in the pipeline;
3859
+ // this catch-all cleans up everything else.
3860
+ await page.evaluate(() => {
3861
+ document.querySelectorAll('[data-static] template, template[data-static]').forEach((tpl) => {
3862
+ // Don't remove templates explicitly marked data-hydrate (those are an
3863
+ // opt-out from any prerender transforms within the data-static subtree).
3864
+ if (tpl.hasAttribute('data-hydrate') || tpl.closest('[data-hydrate]')) return;
3865
+ tpl.remove();
3866
+ });
3867
+ });
3868
+
2983
3869
  const visibilityNormalizedPath = logicalPathToVisibilityNormalizedPath(pathSeg, locales);
2984
3870
  await page.evaluate((np) => {
2985
3871
  try {
@@ -2999,6 +3885,19 @@ async function runPrerender(config) {
2999
3885
  toRemove.forEach((el) => { if (document.contains(el)) el.remove(); });
3000
3886
  });
3001
3887
 
3888
+ // SEO / AEO meta injection — see resolveConfig().seo for precedence layers.
3889
+ // Runs in the live page so prerender.meta expressions can use Alpine context
3890
+ // (real $x.* evaluation, not yaml-only paths). Each pass only fills
3891
+ // slots that are still missing; data-head and index.html static head wins.
3892
+ // The og:image snapshot was captured earlier (post-Alpine, pre-transforms);
3893
+ // this pass uses it as the highest smart-default for the image slot.
3894
+ await injectMetaInDom(page, {
3895
+ seo: config.seo,
3896
+ liveUrl: (config.liveUrl || '').replace(/\/$/, ''),
3897
+ pathSeg: is404 ? '__404__' : pathSeg,
3898
+ snapshotUrl: earlySnapshotUrl,
3899
+ });
3900
+
3002
3901
  let html = await page.evaluate(() => document.documentElement.outerHTML);
3003
3902
  // Inject the hydration contract blob into the raw HTML *before* caching
3004
3903
  // it for locale variant generation, so every locale variant inherits the
@@ -3305,8 +4204,14 @@ async function runPrerender(config) {
3305
4204
  pathList.filter((p) => p !== NOT_FOUND_PATH),
3306
4205
  config.liveUrl,
3307
4206
  locales,
3308
- defaultLocale
4207
+ defaultLocale,
4208
+ {
4209
+ rootDir: config.root,
4210
+ siteName: config.seo?.siteName,
4211
+ siteDescription: config.seo?.siteDescription,
4212
+ }
3309
4213
  );
4214
+ writeOutputProtectionFiles(config.output);
3310
4215
  validatePrerenderedOutput(config.output, pathList.filter((p) => p !== NOT_FOUND_PATH));
3311
4216
 
3312
4217
  if (config.redirects.length > 0) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mnfst-render",
3
- "version": "0.5.23",
3
+ "version": "0.5.24",
4
4
  "description": "Render Manifest sites to static HTML for SEO",
5
5
  "type": "module",
6
6
  "bin": {
@@ -35,4 +35,4 @@
35
35
  "url": "git+https://github.com/andrewmatlock/Manifest.git",
36
36
  "directory": "packages/render"
37
37
  }
38
- }
38
+ }