mnfst-render 0.5.23 → 0.5.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ import { createServer } from 'node:http';
9
9
  import { cpus } from 'node:os';
10
10
  import { createRequire } from 'node:module';
11
11
  import { fileURLToPath } from 'node:url';
12
+ import { createHash } from 'node:crypto';
12
13
 
13
14
  const __dirname = dirname(fileURLToPath(import.meta.url));
14
15
  const require = createRequire(import.meta.url);
@@ -130,6 +131,37 @@ async function waitForManifestRenderReady(page, { allLocales, currentLocale, tim
130
131
  }));
131
132
  window.dispatchEvent(new PopStateEvent('popstate'));
132
133
 
134
+ // 5b. Eagerly warm up declared local data sources for the current locale.
135
+ //
136
+ // Without this, sources are loaded lazily — only when a `$x.foo` access
137
+ // triggers the proxy. For static `<template x-for="group in $x.docs">`
138
+ // patterns the iterator may not run early enough for the load to be
139
+ // in-flight before checkAndDispatchRenderReady's debounced timer fires,
140
+ // and the snapshot captures an empty template (no clones for SEO).
141
+ //
142
+ // Warming up here forces every declared local source into the loading
143
+ // state synchronously (loadDataSource sets _<name>_state.loading = true
144
+ // and registers a promise in loadingPromises before returning), which
145
+ // gates the render-ready dispatch until all loads settle. Cloud
146
+ // sources (Appwrite collections, object-form API URLs) are skipped —
147
+ // those are typically auth-gated or intentionally dynamic and not
148
+ // appropriate for SEO-baking; lazy access still works for them.
149
+ try {
150
+ const cfg = window.ManifestDataConfig;
151
+ const main = window.ManifestDataMain;
152
+ const manifest = await cfg?.ensureManifest?.();
153
+ if (manifest?.data && typeof main?.loadDataSource === 'function') {
154
+ const isAppwrite = cfg.isAppwriteCollection;
155
+ for (const [name, source] of Object.entries(manifest.data)) {
156
+ if (isAppwrite && isAppwrite(source)) continue;
157
+ if (source && typeof source === 'object' && source.url) continue;
158
+ // Fire-and-forget: we just need the loading flag set and the
159
+ // promise registered. Failures fall back to lazy behaviour.
160
+ main.loadDataSource(name, loc).catch(() => { });
161
+ }
162
+ }
163
+ } catch { /* warmup is best-effort; existing lazy access is the fallback */ }
164
+
133
165
  // 6. Run component swapping explicitly so components tied to this route render
134
166
  // and trigger any $x accesses that start on-demand data loads.
135
167
  if (window.ManifestComponentsSwapping?.processAll) {
@@ -182,15 +214,99 @@ function parseArgs() {
182
214
  function loadConfig(rootDir) {
183
215
  const manifestPath = join(rootDir, 'manifest.json');
184
216
  if (!existsSync(manifestPath)) {
185
- return { prerender: {} };
217
+ return { render: {} };
186
218
  }
187
219
  const raw = readFileSync(manifestPath, 'utf8');
188
220
  let manifest;
189
221
  try {
190
222
  manifest = JSON.parse(raw);
191
223
  } catch {
192
- return { prerender: {} };
224
+ return { render: {} };
225
+ }
226
+ return manifest;
227
+ }
228
+
229
+ // Move credential-shaped values out of manifest.json into .env. Anything
230
+ // inlined into manifest.json ships verbatim into dist/ (the file is copied
231
+ // to the output unchanged), so a literal devKey there is a production leak.
232
+ // Rewriting source AND the in-memory object covers both directions: future
233
+ // renders see the placeholder, and the dist copy this run produces does too.
234
+ // Idempotent — values already shaped like ${VAR} are skipped.
235
+ function relocateSecretsToEnv(rootDir, manifest) {
236
+ const moves = [];
237
+
238
+ function maybeMove(obj, key, envVar, displayPath) {
239
+ if (!obj || typeof obj !== 'object') return;
240
+ const v = obj[key];
241
+ if (typeof v !== 'string' || !v) return;
242
+ if (/^\$\{[^}]+\}$/.test(v)) return; // already a placeholder
243
+ moves.push({ obj, key, value: v, envVar, displayPath });
244
+ }
245
+
246
+ maybeMove(manifest.appwrite, 'devKey', 'APPWRITE_DEV_KEY', 'appwrite.devKey');
247
+ if (manifest.data && typeof manifest.data === 'object') {
248
+ for (const [srcName, src] of Object.entries(manifest.data)) {
249
+ maybeMove(src, 'appwriteDevKey', 'APPWRITE_DEV_KEY', `data.${srcName}.appwriteDevKey`);
250
+ }
251
+ }
252
+
253
+ if (moves.length === 0) return manifest;
254
+
255
+ for (const m of moves) {
256
+ m.obj[m.key] = `\${${m.envVar}}`;
257
+ }
258
+
259
+ const manifestPath = join(rootDir, 'manifest.json');
260
+ try {
261
+ writeFileSync(manifestPath, JSON.stringify(manifest, null, 4) + '\n');
262
+ } catch (err) {
263
+ console.error(`mnfst-render: failed to rewrite ${manifestPath}: ${err.message}`);
264
+ process.exit(1);
265
+ }
266
+
267
+ const envPath = join(rootDir, '.env');
268
+ let envText = '';
269
+ try {
270
+ if (existsSync(envPath)) envText = readFileSync(envPath, 'utf8');
271
+ } catch {}
272
+ if (envText && !envText.endsWith('\n')) envText += '\n';
273
+
274
+ const existingVars = new Set();
275
+ for (const line of envText.split(/\r?\n/)) {
276
+ const m = line.match(/^([A-Z_][A-Z0-9_]*)\s*=/);
277
+ if (m) existingVars.add(m[1]);
278
+ }
279
+
280
+ const additions = [];
281
+ for (const m of moves) {
282
+ if (!existingVars.has(m.envVar)) {
283
+ additions.push(`${m.envVar}=${m.value}`);
284
+ existingVars.add(m.envVar);
285
+ }
286
+ }
287
+
288
+ if (additions.length) {
289
+ try {
290
+ writeFileSync(envPath, envText + additions.join('\n') + '\n');
291
+ } catch (err) {
292
+ console.error(`mnfst-render: failed to write ${envPath}: ${err.message}`);
293
+ process.exit(1);
294
+ }
193
295
  }
296
+
297
+ console.warn('');
298
+ console.warn('mnfst-render: relocated credentials from manifest.json to .env');
299
+ for (const m of moves) {
300
+ console.warn(` • ${m.displayPath} → \${${m.envVar}}`);
301
+ }
302
+ if (additions.length) {
303
+ console.warn(` Appended ${additions.length} var(s) to .env (verify .env is in .gitignore).`);
304
+ } else {
305
+ console.warn(' .env already had matching vars; manifest.json placeholders now point at them.');
306
+ }
307
+ console.warn(' Browser-side dev needs window.env populated separately (e.g. env.local.js).');
308
+ console.warn('');
309
+
194
310
  return manifest;
195
311
  }
196
312
 
@@ -205,44 +321,44 @@ function resolveConfig() {
205
321
  const cli = parseArgs();
206
322
  const cwd = process.cwd();
207
323
  const root = resolve(cwd, cli.root ?? '.');
208
- const manifest = loadConfig(root);
209
- const pre = manifest.prerender ?? {};
324
+ const manifest = relocateSecretsToEnv(root, loadConfig(root));
325
+ const ren = manifest.render ?? {};
210
326
 
211
- const localUrl = (cli.localUrl ?? cli.baseUrl ?? process.env.PRERENDER_BASE ?? pre.localUrl ?? pre.baseUrl)?.replace(/\/$/, '');
327
+ const localUrl = (cli.localUrl ?? cli.baseUrl ?? process.env.PRERENDER_BASE ?? ren.localUrl ?? ren.baseUrl)?.replace(/\/$/, '');
212
328
  const serve = cli.localUrl ? false : (cli.serve !== undefined ? !!cli.serve : true);
213
329
  if (!serve && !localUrl) {
214
- console.error('prerender: localUrl is required when not using built-in server. Set manifest.prerender.localUrl or use --local.');
330
+ console.error('prerender: localUrl is required when not using built-in server. Set manifest.render.localUrl or use --local.');
215
331
  process.exit(1);
216
332
  }
217
- const liveUrl = (cli.liveUrl ?? process.env.PRERENDER_LIVE ?? manifest.live_url ?? manifest.liveUrl ?? pre.live_url ?? pre.liveUrl ?? localUrl ?? '')?.replace(/\/$/, '');
333
+ const liveUrl = (cli.liveUrl ?? process.env.PRERENDER_LIVE ?? manifest.live_url ?? manifest.liveUrl ?? ren.live_url ?? ren.liveUrl ?? localUrl ?? '')?.replace(/\/$/, '');
218
334
 
219
335
  return {
220
336
  localUrl: localUrl ?? '',
221
337
  liveUrl,
222
338
  serve,
223
- output: resolve(root, cli.output ?? pre.output ?? 'website'),
339
+ output: resolve(root, cli.output ?? ren.output ?? 'website'),
224
340
  root,
225
- routerBase: pre.routerBase ?? null,
341
+ routerBase: ren.routerBase ?? null,
226
342
  /** Logical path prefixes (after locale) that skip sticky locale prefix; see manifest:locale-route-exclude */
227
343
  localeRouteExclude: normalizeLocaleRouteExclude(
228
- pre.localeRouteExclude ?? pre.localeStickyExclude
344
+ ren.localeRouteExclude ?? ren.localeStickyExclude
229
345
  ),
230
- locales: pre.locales,
231
- redirects: Array.isArray(pre.redirects) ? pre.redirects : [],
232
- wait: cli.wait ?? pre.wait ?? null,
346
+ locales: ren.locales,
347
+ redirects: Array.isArray(ren.redirects) ? ren.redirects : [],
348
+ wait: cli.wait ?? ren.wait ?? null,
233
349
  waitAfterIdle: 0,
234
350
  // Default concurrency: 2. Chromium per-page memory overhead is large and
235
351
  // our hydration source-attribute map adds more per page. On big sites
236
352
  // (>100 routes) higher concurrency crashes the browser with OOM/target
237
353
  // closed errors. Users can override for small projects with --concurrency.
238
- concurrency: Math.max(1, cli.concurrency ?? pre.concurrency ?? 2),
239
- retries: Math.max(0, cli.retries ?? pre.retries ?? 2),
354
+ concurrency: Math.max(1, cli.concurrency ?? ren.concurrency ?? 2),
355
+ retries: Math.max(0, cli.retries ?? ren.retries ?? 2),
240
356
  localeSubstitution: true,
241
357
  localeSubstitutionExclude: [],
242
358
  /** Explicit locale-neutral paths to render in addition to those discovered automatically.
243
359
  * Each entry is expanded to all locale variants (e.g. "legal/privacy" → "cs/legal/privacy", ...) */
244
- paths: Array.isArray(pre.paths)
245
- ? pre.paths.map((p) => String(p).replace(/^\/+|\/+$/g, '')).filter(Boolean)
360
+ paths: Array.isArray(ren.paths)
361
+ ? ren.paths.map((p) => String(p).replace(/^\/+|\/+$/g, '')).filter(Boolean)
246
362
  : [],
247
363
  dryRun: !!cli.dryRun,
248
364
  debugPrerender: !!cli.debugPrerender,
@@ -251,6 +367,27 @@ function resolveConfig() {
251
367
  // fall back to the timeout. 10s gives slow data plugin pipelines a
252
368
  // chance while bounding worst-case per-path overhead.
253
369
  pipelineTimeout: 10000,
370
+ // SEO / AEO meta injection — see metaInjection() and the render.meta
371
+ // section of manifest.json. Layered precedence (highest first):
372
+ // 1. <template data-head> per-route (already in DOM at snapshot time)
373
+ // 2. <head> in index.html (already in DOM at snapshot time)
374
+ // 3. render.meta.* expressions (Alpine-evaluated per route)
375
+ // 4. render.meta.fallback.* (static strings if expression empty)
376
+ // 5. PWA-style manifest.json fields (name, description, author, icons)
377
+ // 6. Smart defaults derived from the rendered DOM (h1, first p, etc.)
378
+ //
379
+ // Each layer only fills slots not yet present. An empty <title></title>
380
+ // or one matching manifest.json "name" counts as missing (placeholder rule).
381
+ seo: {
382
+ siteName: manifest.name || null,
383
+ siteDescription: manifest.description || null,
384
+ siteAuthor: manifest.author || null,
385
+ icons: Array.isArray(manifest.icons) ? manifest.icons : [],
386
+ meta: ren.meta || null,
387
+ structuredData: ren.structuredData || null,
388
+ imageSnapshots: ren.meta?.imageSnapshots !== false, // default true
389
+ defaults: ren.meta?.defaults !== false, // default true
390
+ },
254
391
  };
255
392
  }
256
393
 
@@ -676,14 +813,14 @@ function stripDataTailwindAttr(html) {
676
813
  * Puppeteer applies `<html class="light">` or `<html class="dark">` based on
677
814
  * the build host's system preference at prerender time. Shipping that baked
678
815
  * class to users in the OPPOSITE preference causes a visible flash on every
679
- * page load (dark→light or light→dark) until the themes plugin re-evaluates.
816
+ * page load (dark→light or light→dark) until the colors plugin re-evaluates.
680
817
  *
681
818
  * Fix: strip `light`/`dark` from the baked `<html class>` and inject a tiny
682
819
  * synchronous `<script>` at the top of `<head>` that sets the correct class
683
820
  * BEFORE the first paint — based on the user's `localStorage.theme` (their
684
821
  * saved preference) or `prefers-color-scheme` (their system preference).
685
822
  *
686
- * The themes plugin (`manifest.themes.js`) still runs later for reactivity
823
+ * The color plugin (`manifest.color.js`) still runs later for reactivity
687
824
  * (Alpine bindings, click handlers, system-preference change listener), but
688
825
  * the initial paint already has the correct class so there's no flash.
689
826
  */
@@ -789,9 +926,9 @@ function promptContinueWithRuntimeTailwind(rootDir) {
789
926
  /**
790
927
  * Build a static Tailwind stylesheet via @tailwindcss/cli (v4+), scanning project sources.
791
928
  * Only runs when the project uses data-tailwind on the manifest script tag (auto-detected).
792
- * Set manifest.prerender.tailwindInput to a custom CSS entry file if needed.
929
+ * Set manifest.render.tailwindInput to a custom CSS entry file if needed.
793
930
  */
794
- function runTailwindCliForPrerender(rootDir, outputDir, pre) {
931
+ function runTailwindCliForPrerender(rootDir, outputDir, ren) {
795
932
  if (!indexHtmlUsesTailwind(rootDir)) return false;
796
933
 
797
934
  const outCss = join(outputDir, 'prerender.tailwind.css');
@@ -807,7 +944,7 @@ function runTailwindCliForPrerender(rootDir, outputDir, pre) {
807
944
  }
808
945
  let inputPath = null;
809
946
  let createdTempInput = false;
810
- const userInput = pre?.tailwindInput;
947
+ const userInput = ren?.tailwindInput;
811
948
  if (typeof userInput === 'string' && userInput.trim()) {
812
949
  inputPath = resolve(rootDir, userInput.trim());
813
950
  }
@@ -838,10 +975,15 @@ function runTailwindCliForPrerender(rootDir, outputDir, pre) {
838
975
  }
839
976
 
840
977
  process.stdout.write('prerender: compiling Tailwind CSS (this may take a minute)...\n');
841
- const r = spawnSync('npx', args, {
978
+ // On Windows, invoke the .cmd shim directly instead of routing through
979
+ // `shell: true`. With `shell: true`, every argument (including the user-
980
+ // controlled `tailwindInput` path from manifest.json) gets parsed by cmd.exe
981
+ // — so a value like `styles.css & evilcmd` would run `evilcmd`. Calling
982
+ // npx.cmd directly keeps args as a literal argv with no shell interpretation.
983
+ const command = process.platform === 'win32' ? 'npx.cmd' : 'npx';
984
+ const r = spawnSync(command, args, {
842
985
  cwd: rootDir,
843
986
  encoding: 'utf8',
844
- shell: process.platform === 'win32',
845
987
  });
846
988
  if (createdTempInput) {
847
989
  try {
@@ -851,7 +993,7 @@ function runTailwindCliForPrerender(rootDir, outputDir, pre) {
851
993
  }
852
994
  }
853
995
  if (r.status !== 0) {
854
- console.error('prerender: Tailwind CLI failed; install with `npm i -D tailwindcss @tailwindcss/cli` or check tailwindInput in manifest.prerender.');
996
+ console.error('prerender: Tailwind CLI failed; install with `npm i -D tailwindcss @tailwindcss/cli` or check tailwindInput in manifest.render.');
855
997
  if (r.stderr) console.error(r.stderr);
856
998
  if (r.stdout) console.error(r.stdout);
857
999
  return false;
@@ -1686,13 +1828,687 @@ function resolveHeadXBindings(html, xData) {
1686
1828
  });
1687
1829
  }
1688
1830
 
1689
- // --- SEO: robots.txt and sitemap.xml (written to output, use liveUrl for crawlers) ---
1831
+ // --- SEO: per-route OG image auto-snapshot --------------------------------
1832
+ //
1833
+ // When prerender.meta.imageSnapshots is true (the default) and no other source
1834
+ // has provided an og:image (data-head, prerender.meta.image, or prerender.meta
1835
+ // .fallback.image), capture a 1200×630 PNG of the rendered page and use that as
1836
+ // the og:image / twitter:image. Saved to <output>/og/<sanitized-path>.png.
1837
+ //
1838
+ // 1200×630 is the OpenGraph / Twitter / LinkedIn recommended dimension.
1839
+
1840
+ const sha = (s) => createHash('sha256').update(String(s)).digest('hex').slice(0, 16);
1841
+
1842
+ /**
1843
+ * Hash of the project-wide assets that affect every page's visual output
1844
+ * (theme CSS, manifest config, root HTML shell). Computed once per prerender
1845
+ * run and folded into each route's snapshot-cache key so that touching any of
1846
+ * these invalidates every cached OG image — a more correct behaviour than
1847
+ * per-route source-mtime caching, which would miss shared-chrome changes.
1848
+ *
1849
+ * Files included are conventional Manifest project assets that influence
1850
+ * layout/theme; missing files are recorded as the literal `missing` so the
1851
+ * hash still differs from an installation that has the file present.
1852
+ */
1853
+ function computeGlobalAssetSignature(rootDir) {
1854
+ const candidates = [
1855
+ 'manifest.json',
1856
+ 'manifest.theme.css',
1857
+ 'manifest.utilities.css',
1858
+ 'index.html',
1859
+ ];
1860
+ const parts = candidates.map((rel) => {
1861
+ const p = join(rootDir, rel);
1862
+ try {
1863
+ return `${rel}:${sha(readFileSync(p, 'utf8'))}`;
1864
+ } catch {
1865
+ return `${rel}:missing`;
1866
+ }
1867
+ });
1868
+ return sha(parts.join('|'));
1869
+ }
1870
+
1871
+ /**
1872
+ * Snapshot the page at 1200×630 and write to <output>/og/<slug>.png. Cache
1873
+ * sidecar lives in <root>/.mnfst-cache/og/ — outside the output dir, which is
1874
+ * wiped at the start of every prerender. On cache hit, the cached PNG is
1875
+ * copied into the output dir and the screenshot is skipped — saves ~0.2–0.5s
1876
+ * per hit, which adds up across hundreds of routes × locales. Hash inputs:
1877
+ * - globalAssetSignature (theme CSS / manifest config / root HTML)
1878
+ * - body outerHTML, normalised to strip non-visual volatile attributes
1879
+ * - html.className (theme variant: light/dark/etc.)
1880
+ */
1881
+ async function takeOgSnapshot(page, outputDir, pathSeg, globalAssetSignature, cacheDir) {
1882
+ const fileSeg = pathSeg === '' || pathSeg === '__404__'
1883
+ ? 'index'
1884
+ : pathSeg.replace(/\//g, '-').replace(/[^a-zA-Z0-9_-]/g, '_');
1885
+ const ogDir = join(outputDir, 'og');
1886
+ try { mkdirSync(ogDir, { recursive: true }); } catch { /* exists */ }
1887
+ const filePath = join(ogDir, `${fileSeg}.png`);
1888
+ // Cache locations — outside the output dir so they survive the per-run
1889
+ // rmSync. cacheDir is .mnfst-cache/og under the project root.
1890
+ const cachePngPath = cacheDir ? join(cacheDir, `${fileSeg}.png`) : null;
1891
+ const cacheHashPath = cacheDir ? join(cacheDir, `${fileSeg}.hash`) : null;
1892
+
1893
+ // Cache lookup: fingerprint the rendered DOM and check against the stored
1894
+ // hash. The fingerprint normalises away attribute values assigned in
1895
+ // iteration order (data-hydrate-id, data-component-N) and randomly-generated
1896
+ // CSS anchor-name positioning IDs. Without normalisation the hash would
1897
+ // never match across runs and the cache would always miss.
1898
+ let contentHash = null;
1899
+ try {
1900
+ const fingerprint = await page.evaluate(() => {
1901
+ const body = document.body?.outerHTML || '';
1902
+ const htmlClass = document.documentElement?.className || '';
1903
+ const normalised = body
1904
+ .replace(/\sdata-hydrate-id="[^"]*"/g, '')
1905
+ .replace(/\sdata-component="[^"]*"/g, '')
1906
+ .replace(/\sdata-pre-rendered="[^"]*"/g, '')
1907
+ .replace(/\sid="(?:tab-|code-)[^"]*"/g, '')
1908
+ .replace(/\saria-controls="(?:code-)[^"]*"/g, '')
1909
+ .replace(/\saria-labelledby="(?:tab-)[^"]*"/g, '')
1910
+ // CSS anchor-positioning IDs (e.g. `--dropdown-zc7nofh3c`) are
1911
+ // regenerated per run by the dropdown/popover system.
1912
+ .replace(/--dropdown-[a-z0-9]+/g, '--dropdown-X')
1913
+ .replace(/--popover-[a-z0-9]+/g, '--popover-X')
1914
+ .replace(/--anchor-[a-z0-9]+/g, '--anchor-X');
1915
+ return normalised + '\n@html:' + htmlClass;
1916
+ });
1917
+ contentHash = sha(`${globalAssetSignature || ''}|${fingerprint}`);
1918
+ if (cachePngPath && existsSync(cachePngPath) && existsSync(cacheHashPath)) {
1919
+ const stored = readFileSync(cacheHashPath, 'utf8').trim();
1920
+ if (stored === contentHash) {
1921
+ // Cache hit — copy the cached PNG into the output dir. We still need
1922
+ // a copy in /og/ so the served site has it; the cache just lets us
1923
+ // skip the screenshot + PNG-encode work.
1924
+ try {
1925
+ cpSync(cachePngPath, filePath);
1926
+ return `/og/${fileSeg}.png`;
1927
+ } catch { /* copy failure — fall through to fresh snapshot */ }
1928
+ }
1929
+ }
1930
+ } catch { /* hash failure is non-fatal — fall through to fresh snapshot */ }
1931
+
1932
+ try {
1933
+ // Viewport stays at the page-creation default (1200×800). Clipping a
1934
+ // 1200×630 region from the top gives the OG/Twitter card aspect ratio
1935
+ // without forcing a layout reflow that would invalidate Chromium's
1936
+ // compositor frame — pages whose hero relies on viewport-height (e.g.
1937
+ // body min-h-screen + flex grow) can otherwise screenshot as blank if
1938
+ // the compositor doesn't repaint between setViewport and screenshot.
1939
+ await page.evaluate(() => window.scrollTo(0, 0));
1940
+ await page.screenshot({
1941
+ path: filePath,
1942
+ type: 'png',
1943
+ clip: { x: 0, y: 0, width: 1200, height: 630 },
1944
+ omitBackground: false,
1945
+ captureBeyondViewport: false,
1946
+ });
1947
+ // Sanity check: a blank 1200×630 PNG (header only, white body) is ~8–10KB;
1948
+ // a content-rich page is 50KB+. When the resulting file is suspiciously
1949
+ // small the snapshot is treated as failed and the renderer falls through
1950
+ // to other og:image sources (manifest icon, first content <img>). 15KB
1951
+ // is a safe floor that catches blank/header-only snapshots without false
1952
+ // positives for legitimately simple pages.
1953
+ try {
1954
+ const sz = statSync(filePath).size;
1955
+ if (sz < 15 * 1024) {
1956
+ unlinkSync(filePath);
1957
+ // Drop the cache too so the next run doesn't trust it.
1958
+ if (cachePngPath) { try { unlinkSync(cachePngPath); } catch { /* missing is fine */ } }
1959
+ if (cacheHashPath) { try { unlinkSync(cacheHashPath); } catch { /* missing is fine */ } }
1960
+ return null;
1961
+ }
1962
+ } catch { /* stat failure is non-fatal */ }
1963
+ // Populate the cache: copy the fresh PNG into the cache dir and write the
1964
+ // content hash sidecar. Hash failure earlier leaves contentHash null —
1965
+ // in that case we don't cache (correct fallback: prefer to re-snapshot
1966
+ // than to claim a stale cache is valid).
1967
+ if (cacheDir && contentHash) {
1968
+ try { mkdirSync(cacheDir, { recursive: true }); } catch { /* exists */ }
1969
+ try { cpSync(filePath, cachePngPath); } catch { /* ignore */ }
1970
+ try { writeFileSync(cacheHashPath, contentHash, 'utf8'); } catch { /* ignore */ }
1971
+ }
1972
+ return `/og/${fileSeg}.png`;
1973
+ } catch (e) {
1974
+ // Failures here are non-fatal — fall back to whatever other og:image source
1975
+ // is available (manifest icon, first content <img>, etc.).
1976
+ console.error(`prerender: og snapshot failed for /${pathSeg || ''}: ${e?.message || e}`);
1977
+ return null;
1978
+ }
1979
+ }
1980
+
1981
+ // --- SEO: per-route meta + structured data injection ----------------------
1982
+ //
1983
+ // Runs in the live page right before HTML serialization. Layers (highest
1984
+ // precedence first; each layer only fills slots not yet present):
1985
+ //
1986
+ // 1. <template data-head> per-route — already in the head by snapshot time
1987
+ // 2. <head> in index.html — already in the head by snapshot time
1988
+ // 3. prerender.meta.* expressions — Alpine-evaluated against the live page
1989
+ // 4. prerender.meta.fallback.* — static strings used when expressions are empty
1990
+ // 5. PWA-style manifest.json fields (name, description, author, icons)
1991
+ // 6. Smart defaults from the rendered DOM (h1, first p, first img, etc.)
1992
+ //
1993
+ // "Slot taken" detection is by selector: <title>, <meta name=>, <meta property=>.
1994
+ // An empty <title></title> or one matching manifest.json "name" counts as
1995
+ // missing (placeholder rule), so smart defaults can fill route-specific titles
1996
+ // without the author having to clear the static <title> in index.html.
1997
+ //
1998
+ // JSON-LD blocks (WebSite, Article, BreadcrumbList) follow the same pattern:
1999
+ // only inject if no <script type="application/ld+json"> already covers that
2000
+ // schema type for the route.
2001
+ async function injectMetaInDom(page, ctx) {
2002
+ await page.evaluate((ctx) => {
2003
+ const head = document.head;
2004
+ if (!head) return;
2005
+
2006
+ // --- Helpers ---------------------------------------------------------
2007
+
2008
+ const SOCIAL_PREFIXES = /^(og:|twitter:|article:|fb:)/;
2009
+
2010
+ const findMeta = (key) => {
2011
+ // Selectors are case-sensitive in querySelector; meta name/property are case-insensitive
2012
+ // in HTML but always written lowercase by us. Cover both attribute styles.
2013
+ return head.querySelector(`meta[name="${key}"], meta[property="${key}"]`);
2014
+ };
2015
+
2016
+ // Slots are "open" if missing, OR if their content equals a known site-wide
2017
+ // placeholder (manifest.json's name/description). Mirrors the title rule so
2018
+ // existing projects with hardcoded site-default meta in index.html still get
2019
+ // route-specific values from smart defaults. Per-tag placeholder map:
2020
+ const PLACEHOLDER = {
2021
+ description: ctx.seo.siteDescription,
2022
+ };
2023
+ const slotIsOpen = (key, existingEl) => {
2024
+ if (!existingEl) return true;
2025
+ const current = (existingEl.getAttribute('content') || '').trim();
2026
+ if (!current) return true;
2027
+ const placeholder = PLACEHOLDER[key];
2028
+ return placeholder && current === placeholder;
2029
+ };
2030
+ const setMeta = (key, content) => {
2031
+ if (content == null) return false;
2032
+ const str = String(content).trim();
2033
+ if (!str) return false;
2034
+ const existing = findMeta(key);
2035
+ if (!slotIsOpen(key, existing)) return false;
2036
+ if (existing) {
2037
+ existing.setAttribute('content', str);
2038
+ } else {
2039
+ const m = document.createElement('meta');
2040
+ m.setAttribute(SOCIAL_PREFIXES.test(key) ? 'property' : 'name', key);
2041
+ m.setAttribute('content', str);
2042
+ head.appendChild(m);
2043
+ }
2044
+ return true;
2045
+ };
2046
+
2047
+ const getCurrentTitle = () => {
2048
+ const el = head.querySelector('title');
2049
+ return { el, text: el ? (el.textContent || '').trim() : '' };
2050
+ };
2051
+
2052
+ const titleSlotIsOpen = () => {
2053
+ const { text } = getCurrentTitle();
2054
+ if (!text) return true;
2055
+ // Equals manifest.name → treat as placeholder (the static <title>Site</title>
2056
+ // pattern in starter templates). Allows smart-defaults to inject a
2057
+ // route-specific title without the author having to wipe the static tag.
2058
+ if (ctx.seo.siteName && text === ctx.seo.siteName) return true;
2059
+ return false;
2060
+ };
2061
+
2062
+ const setTitle = (text) => {
2063
+ if (!text) return false;
2064
+ if (!titleSlotIsOpen()) return false;
2065
+ const trimmed = String(text).trim();
2066
+ if (!trimmed) return false;
2067
+ const { el } = getCurrentTitle();
2068
+ if (el) el.textContent = trimmed;
2069
+ else {
2070
+ const t = document.createElement('title');
2071
+ t.textContent = trimmed;
2072
+ head.appendChild(t);
2073
+ }
2074
+ return true;
2075
+ };
2076
+
2077
+ const evalAlpine = (expr) => {
2078
+ if (typeof expr !== 'string' || !expr.trim()) return null;
2079
+ try {
2080
+ const A = window.Alpine;
2081
+ if (!A || typeof A.evaluate !== 'function') return null;
2082
+ const v = A.evaluate(document.body, expr);
2083
+ if (v == null) return null;
2084
+ const s = typeof v === 'string' ? v : String(v);
2085
+ return s.trim() || null;
2086
+ } catch { return null; }
2087
+ };
2088
+
2089
+ const truncate = (s, max) => {
2090
+ const t = String(s).replace(/\s+/g, ' ').trim();
2091
+ if (t.length <= max) return t;
2092
+ // Cut at the last word boundary before max-3 to leave room for ellipsis.
2093
+ const sliced = t.slice(0, max - 1);
2094
+ const lastSpace = sliced.lastIndexOf(' ');
2095
+ const base = lastSpace > max * 0.6 ? sliced.slice(0, lastSpace) : sliced;
2096
+ return base + '…';
2097
+ };
2098
+
2099
+ // --- Smart defaults (DOM derivation) ---------------------------------
2100
+
2101
+ const smartDefaults = (() => {
2102
+ if (!ctx.seo.defaults) return {};
2103
+ // Title source: first <h1> inside <main>/<article>, then any <h1>.
2104
+ const h1El = document.querySelector('main h1, article h1') || document.querySelector('h1');
2105
+ const h1 = h1El ? (h1El.textContent || '').trim() : '';
2106
+ const composedTitle = (() => {
2107
+ if (!h1) return ctx.seo.siteName || null;
2108
+ if (!ctx.seo.siteName || h1 === ctx.seo.siteName) return h1;
2109
+ return `${h1} — ${ctx.seo.siteName}`;
2110
+ })();
2111
+
2112
+ // Description: first non-trivial <p> in main/article content.
2113
+ const descCandidates = document.querySelectorAll('main p, article p, .prose p');
2114
+ let desc = '';
2115
+ for (const p of descCandidates) {
2116
+ const text = (p.textContent || '').trim();
2117
+ if (text.length >= 30) { desc = truncate(text, 160); break; }
2118
+ }
2119
+
2120
+ // Image: snapshot URL if auto-snapshot was taken; else first content
2121
+ // <img> with a non-data src; else largest manifest icon. Snapshot wins
2122
+ // over content <img> because it represents the rendered page and is
2123
+ // sized for OG/Twitter cards (1200×630), whereas a content image could
2124
+ // be a thumbnail of arbitrary aspect ratio.
2125
+ let imgSrc = ctx.snapshotUrl || '';
2126
+ if (!imgSrc) {
2127
+ const imgCandidates = document.querySelectorAll('main img[src], article img[src]');
2128
+ for (const img of imgCandidates) {
2129
+ const src = img.getAttribute('src') || '';
2130
+ if (src && !src.startsWith('data:')) { imgSrc = src; break; }
2131
+ }
2132
+ }
2133
+ if (!imgSrc && Array.isArray(ctx.seo.icons) && ctx.seo.icons.length) {
2134
+ // Largest icon by area.
2135
+ const sorted = ctx.seo.icons.slice().sort((a, b) => {
2136
+ const area = (s) => {
2137
+ const m = String(s?.sizes || '').match(/(\d+)x(\d+)/);
2138
+ return m ? parseInt(m[1], 10) * parseInt(m[2], 10) : 0;
2139
+ };
2140
+ return area(b) - area(a);
2141
+ });
2142
+ imgSrc = sorted[0]?.src || '';
2143
+ }
2144
+
2145
+ // Type heuristic: 'article' if the page renders an <article> or its path
2146
+ // looks like article content (e.g. /docs/foo, /blog/foo, /articles/foo);
2147
+ // 'website' otherwise.
2148
+ const looksLikeArticle = !!document.querySelector('article')
2149
+ || /^\/(?:docs|blog|articles|posts|guides)\//i.test(location.pathname);
2150
+ const ogType = looksLikeArticle ? 'article' : 'website';
2151
+
2152
+ return {
2153
+ title: composedTitle,
2154
+ description: desc || ctx.seo.siteDescription || null,
2155
+ image: imgSrc || null,
2156
+ ogType,
2157
+ };
2158
+ })();
2159
+
2160
+ // --- Resolve a single meta value through the precedence chain --------
2161
+
2162
+ const resolve = (key) => {
2163
+ // Layer 3: prerender.meta expression
2164
+ const exprMap = ctx.seo.meta || {};
2165
+ const expr = exprMap[key];
2166
+ if (typeof expr === 'string') {
2167
+ const v = evalAlpine(expr);
2168
+ if (v) return v;
2169
+ } else if (typeof expr === 'boolean' || typeof expr === 'number') {
2170
+ return String(expr);
2171
+ }
2172
+ // Layer 4: explicit fallback
2173
+ const fallback = exprMap.fallback?.[key];
2174
+ if (fallback) return String(fallback);
2175
+ // Layer 5: smart defaults from DOM (page-specific — beats generic PWA fields).
2176
+ // For title specifically, the placeholder rule in setTitle() also requires
2177
+ // the static <title>Site</title> to be treated as missing so this wins.
2178
+ if (smartDefaults[key]) return smartDefaults[key];
2179
+ // Layer 6: PWA-style manifest.json fields — last-resort generic fallback
2180
+ if (key === 'title' && ctx.seo.siteName) return ctx.seo.siteName;
2181
+ if (key === 'description' && ctx.seo.siteDescription) return ctx.seo.siteDescription;
2182
+ if (key === 'author' && ctx.seo.siteAuthor) return ctx.seo.siteAuthor;
2183
+ return null;
2184
+ };
2185
+
2186
+ // --- Title -----------------------------------------------------------
2187
+
2188
+ setTitle(resolve('title'));
2189
+
2190
+ // --- Description / author -------------------------------------------
2191
+
2192
+ const description = resolve('description');
2193
+ setMeta('description', description);
2194
+ setMeta('author', resolve('author'));
2195
+
2196
+ // --- Canonical URL (skip — already injected later by buildCanonicalAndHreflang) ---
2197
+
2198
+ // --- OpenGraph / Twitter --------------------------------------------
2199
+
2200
+ const liveBase = (ctx.liveUrl || '').replace(/\/$/, '');
2201
+ const pageUrl = ctx.pathSeg === '' || ctx.pathSeg === '__404__'
2202
+ ? (liveBase ? liveBase + '/' : null)
2203
+ : (liveBase ? `${liveBase}/${ctx.pathSeg}` : null);
2204
+ const finalTitle = getCurrentTitle().text || resolve('title');
2205
+ const ogType = resolve('ogType') || smartDefaults.ogType || 'website';
2206
+ const image = resolve('image');
2207
+
2208
+ setMeta('og:title', finalTitle);
2209
+ setMeta('og:description', description);
2210
+ setMeta('og:type', ogType);
2211
+ setMeta('og:url', pageUrl);
2212
+ setMeta('og:site_name', ctx.seo.siteName);
2213
+ if (image) setMeta('og:image', image);
2214
+
2215
+ setMeta('twitter:card', image ? 'summary_large_image' : 'summary');
2216
+ setMeta('twitter:title', finalTitle);
2217
+ setMeta('twitter:description', description);
2218
+ if (image) setMeta('twitter:image', image);
2219
+
2220
+ // --- JSON-LD structured data ----------------------------------------
2221
+
2222
+ const sd = ctx.seo.structuredData;
2223
+ if (sd && typeof sd === 'object') {
2224
+ const existingLdScripts = head.querySelectorAll('script[type="application/ld+json"]');
2225
+ const existingTypes = new Set();
2226
+ existingLdScripts.forEach((s) => {
2227
+ try {
2228
+ const parsed = JSON.parse(s.textContent || '{}');
2229
+ const t = Array.isArray(parsed) ? parsed.map((x) => x['@type']) : [parsed['@type']];
2230
+ t.forEach((tt) => tt && existingTypes.add(tt));
2231
+ } catch { /* skip malformed */ }
2232
+ });
2233
+
2234
+ const resolveSdField = (v) => {
2235
+ if (typeof v === 'string') {
2236
+ const evaled = evalAlpine(v);
2237
+ return evaled ?? v; // if eval fails, keep literal (lets users write plain strings)
2238
+ }
2239
+ return v;
2240
+ };
2241
+ const resolveSchema = (obj) => {
2242
+ if (obj == null || typeof obj !== 'object') return obj;
2243
+ const out = {};
2244
+ for (const k of Object.keys(obj)) {
2245
+ out[k] = resolveSdField(obj[k]);
2246
+ }
2247
+ return out;
2248
+ };
2249
+
2250
+ const blocks = [];
2251
+ for (const [type, def] of Object.entries(sd)) {
2252
+ if (existingTypes.has(type)) continue;
2253
+ if (def === false) continue;
2254
+ if (type === 'BreadcrumbList' && def === true) {
2255
+ // Auto-derive from URL path segments.
2256
+ const parts = location.pathname.split('/').filter(Boolean);
2257
+ const items = [{
2258
+ '@type': 'ListItem',
2259
+ position: 1,
2260
+ name: ctx.seo.siteName || 'Home',
2261
+ item: liveBase ? liveBase + '/' : '/',
2262
+ }];
2263
+ parts.forEach((seg, i) => {
2264
+ items.push({
2265
+ '@type': 'ListItem',
2266
+ position: i + 2,
2267
+ name: seg.replace(/-/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase()),
2268
+ item: liveBase ? `${liveBase}/${parts.slice(0, i + 1).join('/')}` : '/' + parts.slice(0, i + 1).join('/'),
2269
+ });
2270
+ });
2271
+ blocks.push({ '@context': 'https://schema.org', '@type': 'BreadcrumbList', itemListElement: items });
2272
+ continue;
2273
+ }
2274
+ if (def === true) {
2275
+ // Bare-true for known schemas: minimal auto-fill
2276
+ if (type === 'WebSite') {
2277
+ blocks.push({
2278
+ '@context': 'https://schema.org',
2279
+ '@type': 'WebSite',
2280
+ name: ctx.seo.siteName || finalTitle || '',
2281
+ url: liveBase || '',
2282
+ });
2283
+ } else if (type === 'Article') {
2284
+ blocks.push({
2285
+ '@context': 'https://schema.org',
2286
+ '@type': 'Article',
2287
+ headline: finalTitle || '',
2288
+ description: description || '',
2289
+ ...(image ? { image } : {}),
2290
+ ...(pageUrl ? { url: pageUrl } : {}),
2291
+ ...(ctx.seo.siteAuthor ? { author: { '@type': 'Person', name: ctx.seo.siteAuthor } } : {}),
2292
+ });
2293
+ }
2294
+ continue;
2295
+ }
2296
+ if (typeof def === 'object') {
2297
+ const resolved = resolveSchema(def);
2298
+ blocks.push({ '@context': 'https://schema.org', '@type': type, ...resolved });
2299
+ }
2300
+ }
1690
2301
 
1691
- function writeSeoFiles(outputDir, pathList, liveUrl, locales, defaultLocale) {
2302
+ for (const block of blocks) {
2303
+ const s = document.createElement('script');
2304
+ s.setAttribute('type', 'application/ld+json');
2305
+ s.textContent = JSON.stringify(block);
2306
+ head.appendChild(s);
2307
+ }
2308
+ }
2309
+ }, ctx);
2310
+ }
2311
+
2312
+ // --- SEO: robots.txt, sitemap.xml, llms.txt, llms-full.txt ---------------
2313
+ //
2314
+ // Written to the prerender output directory. liveUrl is the canonical public
2315
+ // host (https://...), used for absolute URLs in sitemap entries and the llms.txt
2316
+ // page index. llms.txt and llms-full.txt follow the llmstxt.org convention —
2317
+ // a plain-markdown index and full-content concatenation specifically for LLM
2318
+ // crawlers (ChatGPT, Claude, Perplexity, etc.) that prefer structured plaintext
2319
+ // over scraping rendered HTML.
2320
+
2321
+ /**
2322
+ * Strip HTML tags + collapse whitespace to plaintext. Crude but sufficient for
2323
+ * meta description / llms-full content extraction; we run on prerendered HTML
2324
+ * where Alpine bindings have already been resolved to literal values.
2325
+ */
2326
+ function htmlToText(html) {
2327
+ return String(html || '')
2328
+ .replace(/<script[\s\S]*?<\/script>/gi, ' ')
2329
+ .replace(/<style[\s\S]*?<\/style>/gi, ' ')
2330
+ .replace(/<svg[\s\S]*?<\/svg>/gi, ' ')
2331
+ .replace(/<template[\s\S]*?<\/template>/gi, ' ')
2332
+ .replace(/<!--[\s\S]*?-->/g, ' ')
2333
+ .replace(/<[^>]+>/g, ' ')
2334
+ .replace(/&nbsp;/g, ' ')
2335
+ .replace(/&amp;/g, '&')
2336
+ .replace(/&lt;/g, '<')
2337
+ .replace(/&gt;/g, '>')
2338
+ .replace(/&quot;/g, '"')
2339
+ .replace(/&#39;/g, "'")
2340
+ .replace(/\s+/g, ' ')
2341
+ .trim();
2342
+ }
2343
+
2344
+ /**
2345
+ * Extract <title>, <meta name="description">, and the route's article content
2346
+ * from a prerendered HTML file. Targets the article body, not the whole page
2347
+ * layout, so the resulting llms-full.txt isn't dominated by repeated nav, TOC,
2348
+ * footer, and other site chrome.
2349
+ *
2350
+ * Selection order (first hit wins):
2351
+ * 1. `.prose` — Manifest convention for rendered markdown article content.
2352
+ * 2. `<article>` — semantic HTML for article bodies.
2353
+ * 3. `<main>` minus chrome — strips [data-static] (nav lists, TOCs marked
2354
+ * static-bake), <nav>, <header>, <footer>, <aside>.
2355
+ * 4. `<body>` minus same chrome — last resort.
2356
+ */
2357
+ function extractRouteContent(filePath) {
2358
+ if (!existsSync(filePath)) return null;
2359
+ const html = readFileSync(filePath, 'utf8');
2360
+ const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
2361
+ const descMatch = html.match(/<meta\s+name=["']description["']\s+content=["']([^"']*)["']/i);
2362
+
2363
+ // Find the article-content region using depth-tracked tag matching. Naive
2364
+ // non-greedy regex breaks on nested same-tag elements (article markdown
2365
+ // typically contains many nested <div>s for code blocks, frames, etc.).
2366
+ // Walks the source from the opening tag, counting open/close pairs of the
2367
+ // same tag, until depth returns to zero.
2368
+ const extractByOpener = (source, openerRx) => {
2369
+ const m = openerRx.exec(source);
2370
+ if (!m) return null;
2371
+ const tagName = m[1];
2372
+ const start = m.index + m[0].length;
2373
+ const open = new RegExp(`<${tagName}\\b[^>]*>`, 'gi');
2374
+ const close = new RegExp(`</${tagName}\\s*>`, 'gi');
2375
+ let depth = 1;
2376
+ let cursor = start;
2377
+ while (depth > 0) {
2378
+ open.lastIndex = cursor;
2379
+ close.lastIndex = cursor;
2380
+ const nextOpen = open.exec(source);
2381
+ const nextClose = close.exec(source);
2382
+ if (!nextClose) return source.slice(start);
2383
+ if (nextOpen && nextOpen.index < nextClose.index) {
2384
+ depth++;
2385
+ cursor = nextOpen.index + nextOpen[0].length;
2386
+ } else {
2387
+ depth--;
2388
+ if (depth === 0) return source.slice(start, nextClose.index);
2389
+ cursor = nextClose.index + nextClose[0].length;
2390
+ }
2391
+ }
2392
+ return null;
2393
+ };
2394
+
2395
+ // Selection order — first hit wins:
2396
+ // 1. `.prose` — Manifest convention for rendered markdown article content.
2397
+ // This is the cleanest source: contains only article body, no chrome.
2398
+ // 2. `<article>` — semantic HTML for article bodies.
2399
+ // 3. `<main>` — last resort. At this layer we additionally strip the
2400
+ // site-chrome wrappers (data-static nav/TOC, semantic nav/header/footer
2401
+ // tags). We do NOT strip <aside> because article content commonly uses
2402
+ // <aside class="frame"> for example boxes.
2403
+ const proseRegion = extractByOpener(
2404
+ html,
2405
+ /<([a-z][a-z0-9]*)\b[^>]*\bclass=["'][^"']*\bprose\b[^"']*["'][^>]*>/i
2406
+ );
2407
+ let region = '';
2408
+ if (proseRegion) {
2409
+ region = proseRegion;
2410
+ } else {
2411
+ const articleMatch = html.match(/<article\b[^>]*>([\s\S]*?)<\/article>/i);
2412
+ if (articleMatch) {
2413
+ region = articleMatch[1];
2414
+ } else {
2415
+ const mainMatch = html.match(/<main\b[^>]*>([\s\S]*?)<\/main>/i);
2416
+ const bodyMatch = mainMatch ? null : html.match(/<body\b[^>]*>([\s\S]*?)<\/body>/i);
2417
+ let candidate = mainMatch ? mainMatch[1] : (bodyMatch ? bodyMatch[1] : '');
2418
+ // Strip site chrome: top-level wrappers, not nested article content.
2419
+ // <aside> is intentionally NOT stripped here — articles use <aside
2420
+ // class="frame"> for example boxes that should appear in llms-full.
2421
+ candidate = candidate.replace(/<nav\b[\s\S]*?<\/nav>/gi, ' ');
2422
+ candidate = candidate.replace(/<footer\b[\s\S]*?<\/footer>/gi, ' ');
2423
+ // Strip data-static containers (depth-tracked because nav lists nest).
2424
+ const stripContainer = (s, openerRx) => {
2425
+ let out = s;
2426
+ let m;
2427
+ while ((m = openerRx.exec(out))) {
2428
+ const tagName = m[1];
2429
+ const innerStart = m.index + m[0].length;
2430
+ const open = new RegExp(`<${tagName}\\b[^>]*>`, 'gi');
2431
+ const close = new RegExp(`</${tagName}\\s*>`, 'gi');
2432
+ let depth = 1;
2433
+ let cursor = innerStart;
2434
+ let endIdx = out.length;
2435
+ while (depth > 0) {
2436
+ open.lastIndex = cursor;
2437
+ close.lastIndex = cursor;
2438
+ const nextOpen = open.exec(out);
2439
+ const nextClose = close.exec(out);
2440
+ if (!nextClose) break;
2441
+ if (nextOpen && nextOpen.index < nextClose.index) {
2442
+ depth++;
2443
+ cursor = nextOpen.index + nextOpen[0].length;
2444
+ } else {
2445
+ depth--;
2446
+ cursor = nextClose.index + nextClose[0].length;
2447
+ if (depth === 0) endIdx = cursor;
2448
+ }
2449
+ }
2450
+ out = out.slice(0, m.index) + ' ' + out.slice(endIdx);
2451
+ openerRx.lastIndex = 0;
2452
+ }
2453
+ return out;
2454
+ };
2455
+ candidate = stripContainer(candidate, /<([a-z][a-z0-9]*)\b[^>]*\bdata-static\b[^>]*>/gi);
2456
+ region = candidate;
2457
+ }
2458
+ }
2459
+
2460
+ return {
2461
+ title: titleMatch ? htmlToText(titleMatch[1]) : '',
2462
+ description: descMatch ? descMatch[1] : '',
2463
+ bodyText: region ? htmlToText(region) : '',
2464
+ };
2465
+ }
2466
+
2467
+ /** Resolve the per-route output HTML file (matches the layout writePrerenderOutput uses). */
2468
+ function routeHtmlPath(outputDir, pathSeg) {
2469
+ if (pathSeg === '') return join(outputDir, 'index.html');
2470
+ if (pathSeg === '__prerender_404__') return join(outputDir, '404.html');
2471
+ return join(outputDir, ...pathSeg.split('/'), 'index.html');
2472
+ }
2473
+
2474
+ /**
2475
+ * Best-effort per-route lastmod date. We pick the prerendered HTML file's
2476
+ * mtime — that file IS regenerated on every prerender, so it's no better than
2477
+ * "today" for unchanged content. Fallback hierarchy: 1) source markdown if
2478
+ * discoverable under articles/<path>.md; 2) prerendered HTML mtime; 3) today.
2479
+ */
2480
+ function routeLastModDate(rootDir, outputDir, pathSeg) {
2481
+ // Try common source-file conventions first so the date reflects content
2482
+ // changes rather than the prerender run. Strip leading section prefix
2483
+ // ("docs/", "blog/", "articles/") since markdown files typically live
2484
+ // under articles/ keyed by the remaining path.
2485
+ const stripPrefix = pathSeg.replace(/^(?:docs|blog|articles|posts|guides)\//, '');
2486
+ const candidates = [
2487
+ join(rootDir, 'articles', `${stripPrefix}.md`),
2488
+ join(rootDir, 'articles', `${pathSeg}.md`),
2489
+ join(rootDir, 'pages', `${pathSeg}.html`),
2490
+ join(rootDir, `${pathSeg}.md`),
2491
+ ];
2492
+ for (const c of candidates) {
2493
+ try {
2494
+ const s = statSync(c);
2495
+ if (s.isFile()) return s.mtime.toISOString().slice(0, 10);
2496
+ } catch { /* not found */ }
2497
+ }
2498
+ // Fallback to the prerendered output mtime (always present).
2499
+ try {
2500
+ const out = routeHtmlPath(outputDir, pathSeg || '');
2501
+ const s = statSync(out);
2502
+ return s.mtime.toISOString().slice(0, 10);
2503
+ } catch { /* ignore */ }
2504
+ return new Date().toISOString().slice(0, 10);
2505
+ }
2506
+
2507
+ function writeSeoFiles(outputDir, pathList, liveUrl, locales, defaultLocale, ctx = {}) {
1692
2508
  const base = liveUrl.replace(/\/$/, '');
1693
- const today = new Date().toISOString().slice(0, 10);
1694
2509
  const localeList = Array.isArray(locales) ? locales : [];
1695
2510
  const multiLocale = localeList.length > 1;
2511
+ const rootDir = ctx.rootDir || '';
1696
2512
 
1697
2513
  writeFileSync(
1698
2514
  join(outputDir, 'robots.txt'),
@@ -1718,7 +2534,8 @@ Sitemap: ${base}/sitemap.xml
1718
2534
  body += `\n <xhtml:link rel="alternate" hreflang="${escapeXmlText(hreflang)}" href="${escapeXmlText(href)}" />`;
1719
2535
  }
1720
2536
  }
1721
- body += `\n <lastmod>${today}</lastmod>
2537
+ const lastmod = routeLastModDate(rootDir, outputDir, pathSeg);
2538
+ body += `\n <lastmod>${lastmod}</lastmod>
1722
2539
  <changefreq>monthly</changefreq>
1723
2540
  <priority>${path === '' ? '1.0' : '0.8'}</priority>`;
1724
2541
  return ` <url>
@@ -1732,6 +2549,155 @@ ${body}
1732
2549
  ${urlsetNs}
1733
2550
  ${urlEntries.join('\n')}
1734
2551
  </urlset>
2552
+ `,
2553
+ 'utf8'
2554
+ );
2555
+
2556
+ writeLlmsFiles(outputDir, pathList, base, ctx);
2557
+ }
2558
+
2559
+ /**
2560
+ * Write llms.txt (curated index) and llms-full.txt (concatenated full content)
2561
+ * per the llmstxt.org convention. Read each prerendered HTML file in pathList
2562
+ * and extract title / description / body text — these were already filled by
2563
+ * injectMetaInDom + smart defaults, so the output reflects the same layered
2564
+ * precedence (data-head → prerender.meta → smart defaults) without re-deriving.
2565
+ *
2566
+ * Pages are grouped into sections by their first URL segment ("Getting Started"
2567
+ * for /docs/getting-started/*, etc.) so the index is browseable. The root /
2568
+ * page is treated as the site overview.
2569
+ */
2570
+ function writeLlmsFiles(outputDir, pathList, liveBase, ctx = {}) {
2571
+ const siteName = ctx.siteName || 'Site';
2572
+ const siteDescription = ctx.siteDescription || '';
2573
+
2574
+ // Extract content for every route up front so we can build both files in one pass.
2575
+ const entries = [];
2576
+ for (const pathSeg of pathList) {
2577
+ const filePath = routeHtmlPath(outputDir, pathSeg);
2578
+ const extracted = extractRouteContent(filePath);
2579
+ if (!extracted) continue;
2580
+ entries.push({
2581
+ pathSeg,
2582
+ url: pathSeg === '' ? `${liveBase}/` : `${liveBase}/${pathSeg}`,
2583
+ title: extracted.title || pathSeg || siteName,
2584
+ description: extracted.description,
2585
+ bodyText: extracted.bodyText,
2586
+ });
2587
+ }
2588
+
2589
+ // Group entries by section. For /a/b/c, the section is "a"; for the root,
2590
+ // "Overview". Sections are presented in first-encounter order to preserve
2591
+ // whatever order the project's manifest.json or yaml index dictated.
2592
+ const sections = new Map();
2593
+ const titleCase = (s) => s.replace(/-/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
2594
+ for (const entry of entries) {
2595
+ const first = entry.pathSeg.split('/')[0] || '';
2596
+ const sectionKey = first || 'Overview';
2597
+ const sectionLabel = first ? titleCase(first) : 'Overview';
2598
+ if (!sections.has(sectionKey)) sections.set(sectionKey, { label: sectionLabel, entries: [] });
2599
+ sections.get(sectionKey).entries.push(entry);
2600
+ }
2601
+
2602
+ // --- llms.txt: short curated index ---
2603
+ let llms = `# ${siteName}\n`;
2604
+ if (siteDescription) llms += `\n> ${siteDescription}\n`;
2605
+ for (const { label, entries: items } of sections.values()) {
2606
+ llms += `\n## ${label}\n\n`;
2607
+ for (const e of items) {
2608
+ const desc = e.description ? `: ${e.description}` : '';
2609
+ llms += `- [${e.title}](${e.url})${desc}\n`;
2610
+ }
2611
+ }
2612
+ writeFileSync(join(outputDir, 'llms.txt'), llms, 'utf8');
2613
+
2614
+ // --- llms-full.txt: full concatenated text content ---
2615
+ // Description is intentionally omitted per-entry — bodyText typically opens
2616
+ // with the same sentence (smart-default description came from the first
2617
+ // paragraph), so printing both produces a duplicate first line. llms.txt
2618
+ // already carries descriptions for the curated index.
2619
+ let llmsFull = `# ${siteName}\n`;
2620
+ if (siteDescription) llmsFull += `\n> ${siteDescription}\n`;
2621
+ for (const { label, entries: items } of sections.values()) {
2622
+ llmsFull += `\n\n# ${label}\n`;
2623
+ for (const e of items) {
2624
+ llmsFull += `\n\n## ${e.title}\n`;
2625
+ llmsFull += `\nSource: ${e.url}\n`;
2626
+ if (e.bodyText) llmsFull += `\n${e.bodyText}\n`;
2627
+ }
2628
+ }
2629
+ writeFileSync(join(outputDir, 'llms-full.txt'), llmsFull, 'utf8');
2630
+ }
2631
+
2632
+ // --- Output protection: keep editors/formatters from rewriting generated HTML ---
2633
+ //
2634
+ // Prerendered HTML embeds highlight.js spans inside <pre><code>, where
2635
+ // whitespace IS significant. Most HTML formatters (Prettier, VS Code's
2636
+ // html-language-features, biome) only respect "preserve <pre> content" when
2637
+ // <pre> sits at the top level — when it's nested inside an unrecognised custom
2638
+ // element like <x-code>, they recurse in and reformat the spans, breaking the
2639
+ // indentation in every code block. These four files tell common tools to
2640
+ // leave the output alone, so the corruption can't happen in any dev's
2641
+ // environment regardless of their global editor config.
2642
+ function writeOutputProtectionFiles(outputDir) {
2643
+ // Prettier: hierarchical, walks up the tree from the file being formatted.
2644
+ writeFileSync(
2645
+ join(outputDir, '.prettierignore'),
2646
+ `# Generated by Manifest prerender. Do not edit; re-run \`mnfst-render\`.
2647
+ *
2648
+ `,
2649
+ 'utf8'
2650
+ );
2651
+
2652
+ // Git: hide from PR diffs by default and skip text normalisation that could
2653
+ // touch <pre> whitespace.
2654
+ writeFileSync(
2655
+ join(outputDir, '.gitattributes'),
2656
+ `# Generated by Manifest prerender. Do not edit; re-run \`mnfst-render\`.
2657
+ * linguist-generated=true
2658
+ *.html -text
2659
+ `,
2660
+ 'utf8'
2661
+ );
2662
+
2663
+ // EditorConfig: hierarchical (editors walk up from the file). \`root = true\`
2664
+ // stops the walk at this folder so a parent .editorconfig can't override us.
2665
+ // We can't disable formatters via EditorConfig, but pinning indent/charset
2666
+ // matches what the renderer emits, so format-on-type doesn't churn the file.
2667
+ writeFileSync(
2668
+ join(outputDir, '.editorconfig'),
2669
+ `# Generated by Manifest prerender. Do not edit; re-run \`mnfst-render\`.
2670
+ root = true
2671
+
2672
+ [*]
2673
+ charset = utf-8
2674
+ end_of_line = lf
2675
+ insert_final_newline = false
2676
+ trim_trailing_whitespace = false
2677
+ indent_style = space
2678
+ indent_size = 2
2679
+ `,
2680
+ 'utf8'
2681
+ );
2682
+
2683
+ // VS Code: applies when this folder is opened directly as a workspace root.
2684
+ // (A nested .vscode/settings.json is NOT picked up automatically by a
2685
+ // parent workspace; for that case the dev needs to add a pattern to their
2686
+ // own settings.) \`files.readonlyInclude\` is the cleanest defence: VS Code
2687
+ // refuses to save the file, so format-on-save can't fire.
2688
+ // VS Code settings.json is JSONC — // comments are allowed.
2689
+ const vscodeDir = join(outputDir, '.vscode');
2690
+ mkdirSync(vscodeDir, { recursive: true });
2691
+ writeFileSync(
2692
+ join(vscodeDir, 'settings.json'),
2693
+ `// Generated by Manifest prerender. Do not edit; re-run mnfst-render.
2694
+ {
2695
+ "files.readonlyInclude": { "**": true },
2696
+ "editor.formatOnSave": false,
2697
+ "editor.formatOnPaste": false,
2698
+ "editor.formatOnType": false,
2699
+ "html.format.enable": false
2700
+ }
1735
2701
  `,
1736
2702
  'utf8'
1737
2703
  );
@@ -1881,7 +2847,7 @@ async function runPrerender(config) {
1881
2847
 
1882
2848
  const defaultLocale = locales[0] ?? null;
1883
2849
  const routeSegments = discoverRoutes(manifest, config.root);
1884
- // Merge any explicitly configured paths (manifest.prerender.paths) into the discovered segments.
2850
+ // Merge any explicitly configured paths (manifest.render.paths) into the discovered segments.
1885
2851
  // These are treated as locale-neutral and get full locale-expansion like all other discovered paths.
1886
2852
  if (config.paths && config.paths.length > 0) {
1887
2853
  const segSet = new Set(routeSegments);
@@ -1928,7 +2894,7 @@ async function runPrerender(config) {
1928
2894
  const outputResolved = resolve(config.output);
1929
2895
  const rootResolved = resolve(config.root);
1930
2896
  // Router base = URL pathname to the app root. When dist is deployed as site root (e.g. Appwrite), use "".
1931
- // Set manifest.prerender.routerBase only when the app is served from a subpath (e.g. /app).
2897
+ // Set manifest.render.routerBase only when the app is served from a subpath (e.g. /app).
1932
2898
  let routerBasePath = null;
1933
2899
  if (config.routerBase != null && String(config.routerBase).trim() !== '') {
1934
2900
  const trimmed = String(config.routerBase).replace(/^\/+|\/+$/g, '').trim();
@@ -1943,9 +2909,9 @@ async function runPrerender(config) {
1943
2909
  mkdirSync(outputResolved, { recursive: true });
1944
2910
  copyProjectIntoDist(rootResolved, outputResolved);
1945
2911
 
1946
- const pre = manifest.prerender ?? {};
1947
- const bundleUtilities = pre.utilitiesBundle !== false;
1948
- const tailwindBuilt = runTailwindCliForPrerender(rootResolved, outputResolved, pre);
2912
+ const ren = manifest.render ?? {};
2913
+ const bundleUtilities = ren.utilitiesBundle !== false;
2914
+ const tailwindBuilt = runTailwindCliForPrerender(rootResolved, outputResolved, ren);
1949
2915
  const utilityBlocks = [];
1950
2916
 
1951
2917
  // Launch a fresh browser instance. Chromium is known to accumulate memory
@@ -1977,12 +2943,18 @@ async function runPrerender(config) {
1977
2943
  console.error(' npm i -D puppeteer-core @sparticuz/chromium');
1978
2944
  process.exit(1);
1979
2945
  }
2946
+ // Chrome's sandbox is the primary defense against renderer-process
2947
+ // exploits — disabling it means any RCE in a rendered page runs as the
2948
+ // developer's UID with full filesystem access. We render arbitrary CDN
2949
+ // scripts and third-party iframes, so the threat is real. Opt-in only
2950
+ // for CI environments where the sandbox legitimately can't initialize:
2951
+ // set MNFST_RENDER_NO_SANDBOX=1 to add the flags back.
2952
+ const extraArgs = process.env.MNFST_RENDER_NO_SANDBOX === '1'
2953
+ ? ['--no-sandbox', '--disable-setuid-sandbox']
2954
+ : [];
1980
2955
  return await puppeteer.default.launch({
1981
2956
  headless: true,
1982
- args: [
1983
- '--no-sandbox',
1984
- '--disable-setuid-sandbox',
1985
- ],
2957
+ args: extraArgs,
1986
2958
  });
1987
2959
  }
1988
2960
  }
@@ -1993,12 +2965,12 @@ async function runPrerender(config) {
1993
2965
  // substantial, and we also now maintain a per-page source-attribute Map for
1994
2966
  // the hydration contract. On large sites (>100 routes) higher concurrency
1995
2967
  // spikes memory and crashes the browser. Users can still override via
1996
- // --concurrency or manifest.prerender.concurrency.
2968
+ // --concurrency or manifest.render.concurrency.
1997
2969
  const concurrency = config.concurrency;
1998
2970
  const maxRetries = config.retries ?? 2;
1999
2971
  // Recycle the browser every N processed pages to bound resource growth.
2000
- // Configurable via manifest.prerender.browserRecycleEvery.
2001
- const browserRecycleEvery = Math.max(0, pre.browserRecycleEvery ?? 40);
2972
+ // Configurable via manifest.render.browserRecycleEvery.
2973
+ const browserRecycleEvery = Math.max(0, ren.browserRecycleEvery ?? 40);
2002
2974
  let pagesSinceRecycle = 0;
2003
2975
  const recycleLock = { busy: false };
2004
2976
  // Workers block on this promise before touching `browser`. While a recycle
@@ -2076,6 +3048,19 @@ async function runPrerender(config) {
2076
3048
 
2077
3049
  process.stdout.write(`Prerendering ${pathTotal} path(s) (${puppeteerTotal} via Puppeteer, ${localeVariantPaths.length} via substitution)...\n`);
2078
3050
 
3051
+ // Asset-wide fingerprint used as a cache-invalidator for OG snapshots:
3052
+ // changes to theme CSS, manifest config, or the root index.html mean every
3053
+ // route's visual chrome has changed, so the snapshot cache must drop. Per-
3054
+ // route content hashes (in takeOgSnapshot) catch route-specific changes.
3055
+ // The cache lives at <root>/.mnfst-cache/og/ — survives the output-dir
3056
+ // rmSync that fires at the start of every prerender.
3057
+ const globalAssetSig = config.seo?.imageSnapshots
3058
+ ? computeGlobalAssetSignature(config.root)
3059
+ : '';
3060
+ const ogCacheDir = config.seo?.imageSnapshots
3061
+ ? join(config.root, '.mnfst-cache', 'og')
3062
+ : null;
3063
+
2079
3064
  function pushDebug(row) {
2080
3065
  if (!config.debugPrerender) return;
2081
3066
  debugRows.push(row);
@@ -2103,6 +3088,13 @@ async function runPrerender(config) {
2103
3088
  // here instead of throwing "browser not ready".
2104
3089
  await browserReadyPromise;
2105
3090
  const page = await browser.newPage();
3091
+ // Render at a typical desktop viewport so layouts dependent on viewport
3092
+ // width (responsive flex/grid, container queries, media queries) settle
3093
+ // into their desktop variant. Without this the headless default (often
3094
+ // 800×600) leaves narrower layouts baked into the prerendered HTML and
3095
+ // also produces blank OG screenshots for hero sections that rely on
3096
+ // viewport-driven flex distribution.
3097
+ await page.setViewport({ width: 1200, height: 800, deviceScaleFactor: 1 });
2106
3098
  try {
2107
3099
  // Align <html lang> with the URL being prerendered before any app script runs.
2108
3100
  // initializeDataSourcesPlugin picks locale from document.documentElement.lang first; a mismatch
@@ -2314,6 +3306,20 @@ async function runPrerender(config) {
2314
3306
  // Flush any remaining Alpine microtask effects after the render-ready signal.
2315
3307
  await flushAlpineEffects(page);
2316
3308
 
3309
+ // OG image auto-snapshot — captured here, BEFORE the heavy DOM-transform
3310
+ // passes (template removal, hydration contract, route-hidden cleanup)
3311
+ // perturb the rendered visual state. Skip if og:image is already set
3312
+ // by data-head, prerender.meta config, or an explicit fallback.
3313
+ let earlySnapshotUrl = null;
3314
+ if (config.seo.imageSnapshots) {
3315
+ const ogImageHandled = !!config.seo.meta?.image
3316
+ || !!config.seo.meta?.fallback?.image
3317
+ || await page.evaluate(() => !!document.head.querySelector('meta[property="og:image"]'));
3318
+ if (!ogImageHandled) {
3319
+ earlySnapshotUrl = await takeOgSnapshot(page, config.output, is404 ? '__404__' : pathSeg, globalAssetSig, ogCacheDir);
3320
+ }
3321
+ }
3322
+
2317
3323
  if (config.debugPrerender) {
2318
3324
  const before = await page.evaluate(() => {
2319
3325
  const templates = Array.from(document.querySelectorAll('template[x-for]'));
@@ -2527,14 +3533,14 @@ async function runPrerender(config) {
2527
3533
  // Interactive Manifest-registered directives that attach click/hover/
2528
3534
  // observer state at runtime and therefore need the live Alpine scope.
2529
3535
  const INTERACTIVE_DIRECTIVES = new Set([
2530
- 'x-theme', 'x-dropdown', 'x-tooltip', 'x-tab', 'x-tabpanel',
3536
+ 'x-color', 'x-dropdown', 'x-tooltip', 'x-tab', 'x-tabpanel',
2531
3537
  'x-toast', 'x-carousel', 'x-resize', 'x-anchors', 'x-model',
2532
3538
  'x-files', 'x-data-files',
2533
3539
  ]);
2534
3540
  // Runtime-only Alpine magics whose values change after the prerender
2535
3541
  // snapshot (e.g. via media query, route change, auth state). Bindings
2536
3542
  // referencing these must re-evaluate in the live page.
2537
- const RUNTIME_MAGIC_RX = /(?<!['"])\$(theme|locale|url|auth|search|query|toast)\b/;
3543
+ const RUNTIME_MAGIC_RX = /(?<!['"])\$(color|locale|url|auth|search|query|toast)\b/;
2538
3544
 
2539
3545
  const isDiffBindingAttr = (name) =>
2540
3546
  name === ':class' || name === 'x-bind:class' ||
@@ -2550,6 +3556,13 @@ async function runPrerender(config) {
2550
3556
  // Explicit data-hydrate — subtree-wide restoration.
2551
3557
  if (el.hasAttribute('data-hydrate')) return 'explicit';
2552
3558
 
3559
+ // data-static: the author has frozen this subtree post-bake — Alpine
3560
+ // is not re-rendering iteration here, and the baked class/style/etc.
3561
+ // represent the intended final state. Including these elements in
3562
+ // the hydration contract would null out their baked class (per the
3563
+ // diff-binding rule below), undoing the SEO-baked styling. Skip.
3564
+ if (el.hasAttribute('data-static') || el.closest('[data-static]')) return null;
3565
+
2553
3566
  const list = el.attributes;
2554
3567
  for (let i = 0; i < list.length; i++) {
2555
3568
  const name = list[i].name;
@@ -2692,11 +3705,25 @@ async function runPrerender(config) {
2692
3705
  const inferred = xFor.includes('$search') || xFor.includes('$query') ||
2693
3706
  xFor.includes('$url') || xFor.includes('$auth') ||
2694
3707
  /\bin\s+(filtered\w*|results|searchResults)\b/.test(xFor);
2695
- const forceCollapse = explicit || inferred;
3708
+ // data-static (on template or ancestor) opts the list out of dynamic
3709
+ // collapse and pins it to the static-bake path, even if the x-for
3710
+ // expression looks dynamic. Mirrors data-hydrate as the alternative:
3711
+ // data-hydrate keeps a subtree live for runtime hydration; data-static
3712
+ // freezes baked clones into the HTML for SEO with no further re-render.
3713
+ const isStatic = tpl.hasAttribute('data-static') || !!tpl.closest('[data-static]');
3714
+ const forceCollapse = !isStatic && (explicit || inferred);
2696
3715
  if (!forceCollapse) {
2697
3716
  tpl.removeAttribute('data-prerender-collapsed');
2698
3717
  tpl.removeAttribute('data-prerender-static-generated');
2699
3718
  // Static mode: if prerender produced concrete siblings, mark template for removal later.
3719
+ //
3720
+ // Default sibling-match check is strict (tag + class) to avoid
3721
+ // capturing unrelated elements that happen to share a tag. Under
3722
+ // data-static the user has explicitly opted in to baking, so we
3723
+ // relax to tag-only — Alpine's :class evaluation on clones often
3724
+ // differs from the template's static class (e.g. template has no
3725
+ // `class=` and clones have an evaluated string), and the strict
3726
+ // check would miss the clones and leave the template unmarked.
2700
3727
  const first = tpl.content?.firstElementChild;
2701
3728
  if (first) {
2702
3729
  const tag = first.tagName;
@@ -2705,8 +3732,10 @@ async function runPrerender(config) {
2705
3732
  let generatedCount = 0;
2706
3733
  while (next) {
2707
3734
  if (next.tagName !== tag) break;
2708
- const sameClass = (next.getAttribute('class') || '') === cls;
2709
- if (!sameClass) break;
3735
+ if (!isStatic) {
3736
+ const sameClass = (next.getAttribute('class') || '') === cls;
3737
+ if (!sameClass) break;
3738
+ }
2710
3739
  generatedCount++;
2711
3740
  next = next.nextElementSibling;
2712
3741
  }
@@ -2809,7 +3838,16 @@ async function runPrerender(config) {
2809
3838
  // Strip loop-scope bindings from x-for clones while <template> nodes still exist.
2810
3839
  // (If we remove static templates first, querySelectorAll('template[x-for]') misses them and clones
2811
3840
  // keep x-text/x-bind referencing card/item — Alpine then mutates or errors on the static HTML.)
3841
+ //
3842
+ // Wrapped in Alpine.mutateDom so attribute removals (e.g. removing :class)
3843
+ // don't trigger Alpine's reactive teardown — without this, Alpine sees
3844
+ // the :class attribute disappear, runs its unbind effect, and clears the
3845
+ // bound attribute (class) back to its pre-binding value (empty for clones
3846
+ // whose template had no static class). mutateDom suppresses the observer
3847
+ // for the duration of the callback.
2812
3848
  await page.evaluate(() => {
3849
+ const A = window.Alpine;
3850
+ const runBatch = typeof A?.mutateDom === 'function' ? (fn) => A.mutateDom(fn) : (fn) => fn();
2813
3851
  const loopVarRegex = /^\s*(?:\(\s*([A-Za-z_$][\w$]*)(?:\s*,\s*([A-Za-z_$][\w$]*))?\s*\)|([A-Za-z_$][\w$]*))\s+in\s+/;
2814
3852
  // Include x-init: expanded clones still had x-init="getDescription(article)" etc.; Alpine then throws (article undefined).
2815
3853
  const bindingAttrRegex = /^(?:x-bind:|:|x-text|x-html|x-show|x-if|x-model|x-effect|x-init|x-icon|x-on:|@)/;
@@ -2845,7 +3883,13 @@ async function runPrerender(config) {
2845
3883
  if (boundAttr) {
2846
3884
  const concrete = node.getAttribute(boundAttr);
2847
3885
  if (concrete != null && String(concrete).trim() !== '') {
3886
+ // Removing :foo triggers Alpine's binding teardown, which
3887
+ // restores the bound attribute to its pre-binding value
3888
+ // (empty for clones whose template had no static class).
3889
+ // Snapshot the eval'd value and re-set it after removal so
3890
+ // the baked attribute survives the unbind.
2848
3891
  node.removeAttribute(name);
3892
+ node.setAttribute(boundAttr, concrete);
2849
3893
  }
2850
3894
  continue;
2851
3895
  }
@@ -2855,24 +3899,26 @@ async function runPrerender(config) {
2855
3899
  }
2856
3900
  };
2857
3901
 
2858
- document.querySelectorAll('template[x-for]').forEach((tpl) => {
2859
- if (tpl.hasAttribute('data-hydrate') || tpl.closest('[data-hydrate]')) return;
2860
- const xFor = (tpl.getAttribute('x-for') || '').trim();
2861
- const m = xFor.match(loopVarRegex);
2862
- const itemVar = m ? (m[1] || m[3] || '') : '';
2863
- const indexVar = m ? (m[2] || '') : '';
2864
- if (!itemVar && !indexVar) return;
3902
+ runBatch(() => {
3903
+ document.querySelectorAll('template[x-for]').forEach((tpl) => {
3904
+ if (tpl.hasAttribute('data-hydrate') || tpl.closest('[data-hydrate]')) return;
3905
+ const xFor = (tpl.getAttribute('x-for') || '').trim();
3906
+ const m = xFor.match(loopVarRegex);
3907
+ const itemVar = m ? (m[1] || m[3] || '') : '';
3908
+ const indexVar = m ? (m[2] || '') : '';
3909
+ if (!itemVar && !indexVar) return;
2865
3910
 
2866
- const first = tpl.content?.firstElementChild;
2867
- if (!first) return;
2868
- const tag = first.tagName;
3911
+ const first = tpl.content?.firstElementChild;
3912
+ if (!first) return;
3913
+ const tag = first.tagName;
2869
3914
 
2870
- let next = tpl.nextElementSibling;
2871
- while (next) {
2872
- if (next.tagName !== tag) break;
2873
- stripLoopBindings(next, itemVar, indexVar);
2874
- next = next.nextElementSibling;
2875
- }
3915
+ let next = tpl.nextElementSibling;
3916
+ while (next) {
3917
+ if (next.tagName !== tag) break;
3918
+ stripLoopBindings(next, itemVar, indexVar);
3919
+ next = next.nextElementSibling;
3920
+ }
3921
+ });
2876
3922
  });
2877
3923
  });
2878
3924
 
@@ -2880,20 +3926,42 @@ async function runPrerender(config) {
2880
3926
  // Alpine registers a cleanup on <template x-for> that removes every node in _x_lookup when the
2881
3927
  // template is detached — so tpl.remove() alone deletes all sibling clones (empty grids in output).
2882
3928
  // Replace each clone with a deep cloneNode first so teardown targets detached nodes; copies stay in DOM.
3929
+ //
3930
+ // Iterate until quiet: when an outer template's siblings are deep-cloned,
3931
+ // any nested templates inside those clones become FRESH DOM nodes that
3932
+ // weren't in the original querySelectorAll snapshot. We re-query and
3933
+ // re-process until no marked templates remain, so nested static lists
3934
+ // (e.g. <template x-for="group in $x.docs"> with an inner
3935
+ // <template x-for="item in group.items">) are fully baked and removed.
2883
3936
  await page.evaluate(() => {
2884
3937
  const A = window.Alpine;
2885
3938
  const runBatch = typeof A?.mutateDom === 'function' ? (fn) => A.mutateDom(fn) : (fn) => fn();
2886
- runBatch(() => {
2887
- document.querySelectorAll('template[x-for][data-prerender-static-generated="1"]').forEach((tpl) => {
3939
+ const SAFETY_PASSES = 8;
3940
+ for (let pass = 0; pass < SAFETY_PASSES; pass++) {
3941
+ const remaining = document.querySelectorAll('template[x-for][data-prerender-static-generated="1"]');
3942
+ if (remaining.length === 0) break;
3943
+ let processed = 0;
3944
+ runBatch(() => {
3945
+ remaining.forEach((tpl) => {
2888
3946
  if (tpl.hasAttribute('data-hydrate') || tpl.closest('[data-hydrate]')) return;
2889
- // $x-driven x-for: keep the template so Alpine can re-render the
2890
- // list at runtime (locale switching, filtering, etc.), but remove
2891
- // the static clones — Alpine creates fresh clones on init and does
2892
- // NOT adopt existing DOM nodes, so leaving them produces duplicates.
2893
- // Individual article/pricing pages still have full baked content
2894
- // (via x-text/x-html); the x-for list is only the index/grid view.
3947
+ // $x-driven x-for: by default, keep the template so Alpine can
3948
+ // re-render the list at runtime (locale switching, filtering, etc.)
3949
+ // and remove the static clones — Alpine creates fresh clones on
3950
+ // init and does NOT adopt existing DOM nodes, so leaving them
3951
+ // produces duplicates. Individual article/pricing pages still
3952
+ // have full baked content (via x-text/x-html); the x-for list is
3953
+ // only the index/grid view.
3954
+ //
3955
+ // Opt-in via data-static (on template or ancestor) reverses this:
3956
+ // we keep the baked clones for SEO and remove the template instead,
3957
+ // which freezes the list (Alpine has nothing left to iterate, so
3958
+ // no duplicates and no runtime re-render). Use this for static
3959
+ // navigation lists or any $x-driven list whose source data does
3960
+ // not change after first paint. Loop-scope bindings on the kept
3961
+ // clones are stripped earlier in the pipeline.
2895
3962
  const xFor = (tpl.getAttribute('x-for') || '');
2896
- if (xFor.includes('$x')) {
3963
+ const isStatic = tpl.hasAttribute('data-static') || !!tpl.closest('[data-static]');
3964
+ if (xFor.includes('$x') && !isStatic) {
2897
3965
  const first = tpl.content?.firstElementChild;
2898
3966
  if (first) {
2899
3967
  const tag = first.tagName;
@@ -2922,14 +3990,20 @@ async function runPrerender(config) {
2922
3990
  const cls = first.getAttribute('class') || '';
2923
3991
  let n = tpl.nextElementSibling;
2924
3992
  while (n && n.tagName === tag) {
2925
- if ((n.getAttribute('class') || '') !== cls) break;
3993
+ // Same rationale as the marking pass: under data-static, relax
3994
+ // class match (Alpine's :class evaluation on clones often differs
3995
+ // from the template's static class).
3996
+ if (!isStatic && (n.getAttribute('class') || '') !== cls) break;
2926
3997
  const next = n.nextElementSibling;
2927
3998
  n.replaceWith(n.cloneNode(true));
2928
3999
  n = next;
2929
4000
  }
2930
4001
  tpl.remove();
4002
+ processed++;
2931
4003
  });
2932
- });
4004
+ });
4005
+ if (processed === 0) break;
4006
+ }
2933
4007
  });
2934
4008
 
2935
4009
  // Remove orphan x-for clones that still reference loop-scope vars (e.g. image/index)
@@ -2980,6 +4054,23 @@ async function runPrerender(config) {
2980
4054
  });
2981
4055
  });
2982
4056
 
4057
+ // data-static cleanup: any <template> still inside a [data-static] subtree
4058
+ // is removed. Plugin-driven templates (x-anchors, custom directives that
4059
+ // insert their rendered output as siblings) leave the rendered DOM behind
4060
+ // and the template intact — at runtime the plugin would re-run and
4061
+ // duplicate the output. Removing the template here is the equivalent of
4062
+ // the x-for static path: bake the rendered content, drop the source.
4063
+ // x-for templates have their own staged removal earlier in the pipeline;
4064
+ // this catch-all cleans up everything else.
4065
+ await page.evaluate(() => {
4066
+ document.querySelectorAll('[data-static] template, template[data-static]').forEach((tpl) => {
4067
+ // Don't remove templates explicitly marked data-hydrate (those are an
4068
+ // opt-out from any prerender transforms within the data-static subtree).
4069
+ if (tpl.hasAttribute('data-hydrate') || tpl.closest('[data-hydrate]')) return;
4070
+ tpl.remove();
4071
+ });
4072
+ });
4073
+
2983
4074
  const visibilityNormalizedPath = logicalPathToVisibilityNormalizedPath(pathSeg, locales);
2984
4075
  await page.evaluate((np) => {
2985
4076
  try {
@@ -2999,6 +4090,19 @@ async function runPrerender(config) {
2999
4090
  toRemove.forEach((el) => { if (document.contains(el)) el.remove(); });
3000
4091
  });
3001
4092
 
4093
+ // SEO / AEO meta injection — see resolveConfig().seo for precedence layers.
4094
+ // Runs in the live page so prerender.meta expressions can use Alpine context
4095
+ // (real $x.* evaluation, not yaml-only paths). Each pass only fills
4096
+ // slots that are still missing; data-head and index.html static head wins.
4097
+ // The og:image snapshot was captured earlier (post-Alpine, pre-transforms);
4098
+ // this pass uses it as the highest smart-default for the image slot.
4099
+ await injectMetaInDom(page, {
4100
+ seo: config.seo,
4101
+ liveUrl: (config.liveUrl || '').replace(/\/$/, ''),
4102
+ pathSeg: is404 ? '__404__' : pathSeg,
4103
+ snapshotUrl: earlySnapshotUrl,
4104
+ });
4105
+
3002
4106
  let html = await page.evaluate(() => document.documentElement.outerHTML);
3003
4107
  // Inject the hydration contract blob into the raw HTML *before* caching
3004
4108
  // it for locale variant generation, so every locale variant inherits the
@@ -3305,8 +4409,14 @@ async function runPrerender(config) {
3305
4409
  pathList.filter((p) => p !== NOT_FOUND_PATH),
3306
4410
  config.liveUrl,
3307
4411
  locales,
3308
- defaultLocale
4412
+ defaultLocale,
4413
+ {
4414
+ rootDir: config.root,
4415
+ siteName: config.seo?.siteName,
4416
+ siteDescription: config.seo?.siteDescription,
4417
+ }
3309
4418
  );
4419
+ writeOutputProtectionFiles(config.output);
3310
4420
  validatePrerenderedOutput(config.output, pathList.filter((p) => p !== NOT_FOUND_PATH));
3311
4421
 
3312
4422
  if (config.redirects.length > 0) {