mnfst-render 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/manifest.render.mjs +408 -271
  2. package/package.json +1 -1
@@ -172,6 +172,7 @@ function parseArgs() {
172
172
  if (args[i] === '--wait' && args[i + 1]) { out.wait = parseInt(args[++i], 10); continue; }
173
173
  if (args[i] === '--wait-after-idle' && args[i + 1]) { out.waitAfterIdle = parseInt(args[++i], 10); continue; }
174
174
  if (args[i] === '--concurrency' && args[i + 1]) { out.concurrency = parseInt(args[++i], 10); continue; }
175
+ if (args[i] === '--retries' && args[i + 1]) { out.retries = parseInt(args[++i], 10); continue; }
175
176
  if (args[i] === '--dry-run') { out.dryRun = true; continue; }
176
177
  if (args[i] === '--debug-prerender') { out.debugPrerender = true; continue; }
177
178
  }
@@ -230,7 +231,12 @@ function resolveConfig() {
230
231
  redirects: Array.isArray(pre.redirects) ? pre.redirects : [],
231
232
  wait: cli.wait ?? pre.wait ?? null,
232
233
  waitAfterIdle: 0,
233
- concurrency: Math.max(1, cli.concurrency ?? pre.concurrency ?? Math.max(4, cpus().length - 1)),
234
+ // Default concurrency: 2. Chromium per-page memory overhead is large and
235
+ // our hydration source-attribute map adds more per page. On big sites
236
+ // (>100 routes) higher concurrency crashes the browser with OOM/target
237
+ // closed errors. Users can override for small projects with --concurrency.
238
+ concurrency: Math.max(1, cli.concurrency ?? pre.concurrency ?? 2),
239
+ retries: Math.max(0, cli.retries ?? pre.retries ?? 2),
234
240
  localeSubstitution: true,
235
241
  localeSubstitutionExclude: [],
236
242
  /** Explicit locale-neutral paths to render in addition to those discovered automatically.
@@ -599,14 +605,24 @@ function stripDevOnlyContent(html) {
599
605
  return out;
600
606
  }
601
607
 
602
- // --- Strip CDN-injected plugin scripts from snapshot so only the loader remains ---
603
- // When the static page loads, the loader runs once and adds plugins; avoids duplicate script execution.
608
+ // --- Strip plugin scripts injected by the loader during prerender so only the loader tag remains ---
609
+ // When the static page loads, the loader runs once and adds plugins; avoids
610
+ // duplicate script execution (which would cause `const` re-declaration errors).
611
+ // Matches both CDN-minified (.min.js) and self-hosted (.js) plugin URLs.
612
+ // Also strips the loader-injected Alpine script (both defer and non-defer
613
+ // forms) — at runtime the loader re-injects Alpine AFTER plugin registration,
614
+ // and if Chromium serialized an Alpine script tag during the Puppeteer render,
615
+ // leaving it in place would cause Alpine to execute synchronously during HTML
616
+ // parse, before plugins have a chance to register their directives.
604
617
  function stripInjectedPluginScripts(html) {
605
618
  const pluginPattern =
606
- /<script[^>]*\ssrc=["'][^"']*manifest\.(?:components|router|utilities|data|icons|localization|markdown|code|themes|toasts|tooltips|dropdowns|tabs|slides|resize|tailwind|appwrite\.(?:auth|data|presence))[^"']*\.min\.js["'][^>]*>\s*<\/script>/gi;
619
+ /<script[^>]*\ssrc=["'][^"']*manifest\.(?:components|router|utilities|data|icons|localization|markdown|code|themes|toasts|tooltips|dropdowns|tabs|slides|resize|colorpicker|tailwind|appwrite\.(?:auth|data|presence))[^"']*\.(?:min\.)?js["'][^>]*>\s*<\/script>/gi;
607
620
  let out = html.replace(pluginPattern, '');
621
+ const alpinePattern =
622
+ /<script[^>]*\ssrc=["'][^"']*\/alpinejs@[^"']*["'][^>]*>\s*<\/script>/gi;
623
+ out = out.replace(alpinePattern, '');
608
624
  const runtimePattern =
609
- /<script[^>]*\ssrc=["'][^"']*(?:alpinejs\/dist\/cdn\.min\.js|papaparse@[^"']*\/papaparse\.min\.js|marked\/marked\.min\.js|highlightjs\/cdn-release@[^"']*\/highlight\.min\.js)[^"']*["'][^>]*>\s*<\/script>/gi;
625
+ /<script[^>]*\ssrc=["'][^"']*(?:papaparse@[^"']*\/papaparse\.min\.js|marked\/marked\.min\.js|highlightjs\/cdn-release@[^"']*\/highlight\.min\.js)[^"']*["'][^>]*>\s*<\/script>/gi;
610
626
  out = out.replace(runtimePattern, '');
611
627
  return out;
612
628
  }
@@ -840,8 +856,15 @@ function stripDuplicatedLoopDirectives(html) {
840
856
  return html;
841
857
  }
842
858
 
859
+ // Returns true if the attribute string contains either the explicit `data-hydrate`
860
+ // attribute (source-authored hydrate island root) or a `data-hydrate-id` (element
861
+ // that the prerender has tagged as a runtime-restoration target). String-level
862
+ // strip passes use this to skip elements whose attribute state will be restored
863
+ // from the hydration contract at runtime — leaving them untouched is the safest
864
+ // default even though the contract would correct most damage anyway.
843
865
  function isHydrateMarkedAttrs(attrsStr) {
844
- return /\sdata-prerender-hydrate(?:\s*=|[\s>])/i.test(attrsStr || '');
866
+ if (!attrsStr) return false;
867
+ return /\sdata-hydrate(?:-id)?(?:\s*=|[\s>])/i.test(attrsStr);
845
868
  }
846
869
 
847
870
  // --- Strip x-text and x-html that reference $x when static/SEO (content already in snapshot).
@@ -943,22 +966,16 @@ function stripResolvedXIconDirectives(html) {
943
966
  });
944
967
  }
945
968
 
946
- function stripPrerenderHydrateMarkers(html) {
947
- return html.replace(/\sdata-prerender-hydrate(?:=(?:"[^"]*"|'[^']*'|[^\s>]+))?/gi, '');
948
- }
949
-
950
- // Remove the snapshot id attribute used by the hydrate restore phase. These ids
951
- // only exist to let the post-Alpine restore step in Puppeteer find each snapshotted
952
- // element back; they have no purpose in the final output.
953
- function stripPrerenderHydrateSnapshotIds(html) {
954
- return html.replace(/\sdata-manifest-hyd-id(?:=(?:"[^"]*"|'[^']*'|[^\s>]+))?/gi, '');
955
- }
956
-
957
969
  function markPrerenderedManifestComponents(html) {
958
970
  return html.replace(/<(x-[a-z][\w-]*)([^>]*)>/gi, (full, tag, attrs) => {
959
971
  const a = attrs || '';
960
972
  if (/\bdata-pre-rendered\s*=/i.test(a) || /\bdata-processed\s*=/i.test(a)) return full;
961
- if (/\bdata-prerender-hydrate\b/i.test(a)) return full; // Inside data-hydrate island — skip
973
+ // Inside an explicit hydrate island — the runtime will restore its
974
+ // innerHTML to the authored source, so we must NOT tell the components
975
+ // processor to skip re-fetching. Leaving the placeholder unmarked lets
976
+ // the runtime restoration reinstate the <x-*> tag and the components
977
+ // plugin processes it normally on load.
978
+ if (/\bdata-hydrate\b/i.test(a)) return full;
962
979
  const spacer = /\S/.test(a) ? ' ' : '';
963
980
  return `<${tag}${a}${spacer}data-pre-rendered="1">`;
964
981
  });
@@ -1341,8 +1358,6 @@ function generateLocaleVariantHtml({
1341
1358
  // markPrerenderedManifestComponents must run BEFORE stripPrerenderHydrateMarkers so it can
1342
1359
  // detect data-prerender-hydrate markers and skip components inside hydrate islands.
1343
1360
  html = markPrerenderedManifestComponents(html);
1344
- html = stripPrerenderHydrateMarkers(html);
1345
- html = stripPrerenderHydrateSnapshotIds(html);
1346
1361
 
1347
1362
  const fileSegments = pathToFileSegments(pathSeg ? '/' + pathSeg : '/');
1348
1363
  html = rewriteHtmlAssetPaths(html, fileSegments.length);
@@ -1725,34 +1740,53 @@ async function runPrerender(config) {
1725
1740
  const tailwindBuilt = runTailwindCliForPrerender(rootResolved, outputResolved, pre);
1726
1741
  const utilityBlocks = [];
1727
1742
 
1728
- let browser;
1729
- try {
1730
- const chromium = await importFromProject('@sparticuz/chromium');
1731
- const pptr = await importFromProject('puppeteer-core');
1732
- const executablePath = await chromium.default.executablePath();
1733
- browser = await pptr.default.launch({
1734
- args: chromium.default.args,
1735
- defaultViewport: chromium.default.defaultViewport ?? null,
1736
- executablePath,
1737
- headless: chromium.default.headless ?? true,
1738
- ignoreHTTPSErrors: true,
1739
- });
1740
- } catch (serverlessErr) {
1741
- let puppeteer;
1743
+ // Launch a fresh browser instance. Chromium is known to accumulate memory
1744
+ // and handle leaks on large prerender runs (we've seen crashes around page
1745
+ // ~230 on sites with hundreds of routes). The launchBrowser function is
1746
+ // used both for the initial launch AND for periodic recycling — we close
1747
+ // the old browser and start a new one every `browserRecycleEvery` pages to
1748
+ // bound memory growth.
1749
+ async function launchBrowser() {
1742
1750
  try {
1743
- puppeteer = await importFromProject('puppeteer');
1744
- } catch {
1745
- console.error('prerender: missing browser runtime.');
1746
- console.error('Install one of the following, then rerun:');
1747
- console.error(' npm i -D puppeteer');
1748
- console.error(' npm i -D puppeteer-core @sparticuz/chromium');
1749
- process.exit(1);
1751
+ const chromium = await importFromProject('@sparticuz/chromium');
1752
+ const pptr = await importFromProject('puppeteer-core');
1753
+ const executablePath = await chromium.default.executablePath();
1754
+ return await pptr.default.launch({
1755
+ args: chromium.default.args,
1756
+ defaultViewport: chromium.default.defaultViewport ?? null,
1757
+ executablePath,
1758
+ headless: chromium.default.headless ?? true,
1759
+ ignoreHTTPSErrors: true,
1760
+ });
1761
+ } catch (_serverlessErr) {
1762
+ let puppeteer;
1763
+ try {
1764
+ puppeteer = await importFromProject('puppeteer');
1765
+ } catch {
1766
+ console.error('prerender: missing browser runtime.');
1767
+ console.error('Install one of the following, then rerun:');
1768
+ console.error(' npm i -D puppeteer');
1769
+ console.error(' npm i -D puppeteer-core @sparticuz/chromium');
1770
+ process.exit(1);
1771
+ }
1772
+ return await puppeteer.default.launch({ headless: true });
1750
1773
  }
1751
- browser = await puppeteer.default.launch({ headless: true });
1752
1774
  }
1775
+ let browser = await launchBrowser();
1753
1776
 
1754
- const timeout = config.wait ?? 15000;
1777
+ const timeout = config.wait ?? 30000;
1778
+ // Lower default concurrency: Chromium's own memory overhead per page is
1779
+ // substantial, and we also now maintain a per-page source-attribute Map for
1780
+ // the hydration contract. On large sites (>100 routes) higher concurrency
1781
+ // spikes memory and crashes the browser. Users can still override via
1782
+ // --concurrency or manifest.prerender.concurrency.
1755
1783
  const concurrency = config.concurrency;
1784
+ const maxRetries = config.retries ?? 2;
1785
+ // Recycle the browser every N processed pages to bound resource growth.
1786
+ // Configurable via manifest.prerender.browserRecycleEvery.
1787
+ const browserRecycleEvery = Math.max(0, pre.browserRecycleEvery ?? 40);
1788
+ let pagesSinceRecycle = 0;
1789
+ const recycleLock = { busy: false };
1756
1790
  const pathTotal = pathList.length;
1757
1791
  const failedPaths = [];
1758
1792
  const debugRows = [];
@@ -1844,6 +1878,11 @@ async function runPrerender(config) {
1844
1878
  : defaultLocale || 'en'
1845
1879
  : defaultLocale || 'en';
1846
1880
 
1881
+ // Refuse to newPage() against a closed browser (happens briefly during
1882
+ // recycle); the worker loop will retry.
1883
+ if (!browser || !browser.connected) {
1884
+ throw new Error('browser not ready');
1885
+ }
1847
1886
  const page = await browser.newPage();
1848
1887
  try {
1849
1888
  // Align <html lang> with the URL being prerendered before any app script runs.
@@ -1865,161 +1904,113 @@ async function runPrerender(config) {
1865
1904
  }
1866
1905
  }, currentLocale);
1867
1906
 
1868
- // Snapshot pristine source attributes of hydrate-target elements BEFORE Alpine
1869
- // touches them. We do this by wrapping `Alpine.initTree` Alpine calls this
1870
- // for the initial tree walk AND every time the components plugin lazy-loads a
1871
- // new <x-*> component. Right before Alpine processes a subtree, we walk it
1872
- // and snapshot every hydrate target inside. This is the exact moment the
1873
- // user's source HTML is sitting in the DOM with no Alpine mutations applied.
1907
+ // Deterministic source-attribute capture via MutationObserver with
1908
+ // `attributeOldValue`. This runs before ANY page script and records the
1909
+ // first (pre-mutation) value of every attribute that Alpine or a Manifest
1910
+ // plugin ever touches. It also records the *initial* attributes of every
1911
+ // new element added to the DOM via childList mutations so elements
1912
+ // parsed from innerHTML (components, markdown rendering, etc.) are also
1913
+ // captured the moment they appear.
1914
+ //
1915
+ // The observer handles all mutation surfaces at once:
1916
+ // - setAttribute / removeAttribute
1917
+ // - className setter
1918
+ // - classList.add / remove / toggle / replace
1919
+ // - style.* property assignments (which mutate the style attribute)
1920
+ // - Any other path that ultimately modifies an attribute
1874
1921
  //
1875
- // The snapshots are restored in a later page.evaluate call after Alpine
1876
- // settles. This is true hydration: Alpine never gets to bake state into
1877
- // hydrate elements, so every directive (`:class`, `:style`, `x-text`, custom
1878
- // plugin directives, etc.) works in the prerendered MPA exactly the way it
1879
- // does in the live SPA — no per-binding strip logic, no cloak band-aids, no
1880
- // edge cases to chase.
1922
+ // At serialize time we read the map, identify hydrate targets per the
1923
+ // catalog, and emit a compact JSON hydration contract. The runtime
1924
+ // (`hydratePrerenderedPage` in manifest.js) reads the contract and
1925
+ // restores source attributes before Alpine starts.
1881
1926
  await page.evaluateOnNewDocument(() => {
1882
- const allSnapshots = [];
1883
- let nextId = 0;
1884
- const skipTags = new Set(['MAIN', 'BODY', 'HTML']);
1885
-
1886
- // Own MutationObserver registered before any other script on the page.
1887
- // This guarantees we process DOM additions before Alpine's observer does —
1888
- // critically, before Alpine's observer calls initTree on newly expanded
1889
- // Manifest components (preloaded or lazy) and bakes their `:class` state.
1890
- const installHydrateObserver = () => {
1891
- if (window.__manifestHydrateObserver || !document.body) return;
1892
- const obs = new MutationObserver((mutations) => {
1893
- for (const m of mutations) {
1894
- if (m.type !== 'childList') continue;
1895
- for (const node of m.addedNodes) {
1896
- if (node.nodeType !== 1) continue;
1897
- try { snapshotSubtree(node); } catch (_) {}
1898
- }
1899
- }
1900
- });
1901
- obs.observe(document.body, { childList: true, subtree: true });
1902
- window.__manifestHydrateObserver = obs;
1903
- };
1904
- if (typeof document !== 'undefined') {
1905
- if (document.body) {
1906
- installHydrateObserver();
1907
- } else {
1908
- document.addEventListener('DOMContentLoaded', installHydrateObserver, { once: true });
1909
- // Also try once readyState flips to interactive
1910
- document.addEventListener('readystatechange', () => {
1911
- if (document.readyState !== 'loading') installHydrateObserver();
1912
- });
1927
+ // element -> { attrName: originalValue (null if attribute was absent) }
1928
+ // Keyed by reference so detached elements drop out naturally.
1929
+ const sourceAttrs = new Map();
1930
+ // element -> original innerHTML (only populated for elements already
1931
+ // marked data-hydrate when we first see them used for subtree-wide
1932
+ // restoration of explicit hydrate islands).
1933
+ const sourceInnerHTML = new Map();
1934
+
1935
+ const recordInitialAttrs = (el) => {
1936
+ if (!el || el.nodeType !== 1 || sourceAttrs.has(el)) return;
1937
+ const rec = {};
1938
+ const list = el.attributes;
1939
+ for (let i = 0; i < list.length; i++) {
1940
+ rec[list[i].name] = list[i].value;
1913
1941
  }
1914
- }
1915
-
1916
- const snapshotElement = (el) => {
1917
- if (!el || el.nodeType !== 1) return;
1918
- if (el.hasAttribute('data-manifest-hyd-id')) return; // already snapshotted
1919
- const id = '__manifest-hyd-' + nextId++;
1920
- el.setAttribute('data-manifest-hyd-id', id);
1921
- const attrs = {};
1922
- for (let i = 0; i < el.attributes.length; i++) {
1923
- const a = el.attributes[i];
1924
- if (a.name === 'data-manifest-hyd-id') continue;
1925
- attrs[a.name] = a.value;
1942
+ sourceAttrs.set(el, rec);
1943
+ if (el.hasAttribute && el.hasAttribute('data-hydrate')) {
1944
+ try { sourceInnerHTML.set(el, el.innerHTML); } catch (_) {}
1926
1945
  }
1927
- allSnapshots.push({ id, tag: el.tagName, attrs });
1928
1946
  };
1929
1947
 
1930
- const snapshotElementAndDescendants = (el) => {
1931
- snapshotElement(el);
1932
- if (el && el.querySelectorAll) {
1933
- el.querySelectorAll('*').forEach(snapshotElement);
1948
+ const handleMutations = (mutations) => {
1949
+ for (const m of mutations) {
1950
+ if (m.type === 'attributes') {
1951
+ const el = m.target;
1952
+ let rec = sourceAttrs.get(el);
1953
+ if (!rec) {
1954
+ // First time we see this element AT ALL via an attribute record:
1955
+ // seed with every current attribute so we never lose attrs that
1956
+ // existed before any mutation we happened to observe.
1957
+ rec = {};
1958
+ const list = el.attributes;
1959
+ for (let i = 0; i < list.length; i++) {
1960
+ rec[list[i].name] = list[i].value;
1961
+ }
1962
+ // Overwrite the one being mutated with the true oldValue
1963
+ // (which may be null if the attribute was absent pre-mutation).
1964
+ rec[m.attributeName] = m.oldValue;
1965
+ sourceAttrs.set(el, rec);
1966
+ } else if (!(m.attributeName in rec)) {
1967
+ rec[m.attributeName] = m.oldValue;
1968
+ }
1969
+ } else if (m.type === 'childList') {
1970
+ for (const node of m.addedNodes) {
1971
+ if (node.nodeType !== 1) continue;
1972
+ recordInitialAttrs(node);
1973
+ if (node.querySelectorAll) {
1974
+ node.querySelectorAll('*').forEach(recordInitialAttrs);
1975
+ }
1976
+ }
1977
+ }
1934
1978
  }
1935
1979
  };
1936
1980
 
1937
- const snapshotSubtree = (root) => {
1938
- if (!root || root.nodeType !== 1) return;
1981
+ const observer = new MutationObserver(handleMutations);
1939
1982
 
1940
- // 1. Direct data-hydrate roots + descendants within this subtree.
1941
- const hydrateRoots = [];
1942
- if (root.matches && root.matches('[data-hydrate]')) hydrateRoots.push(root);
1943
- if (root.querySelectorAll) {
1944
- root.querySelectorAll('[data-hydrate]').forEach((el) => hydrateRoots.push(el));
1945
- }
1946
- hydrateRoots.forEach(snapshotElementAndDescendants);
1947
-
1948
- // 2. x-theme elements (color mode plugin needs runtime click handler).
1949
- if (root.matches && root.matches('[x-theme]')) snapshotElementAndDescendants(root);
1950
- if (root.querySelectorAll) {
1951
- root.querySelectorAll('[x-theme]').forEach(snapshotElementAndDescendants);
1983
+ let observing = false;
1984
+ const startObserving = () => {
1985
+ if (observing) return true;
1986
+ // We can observe `document` itself — MutationObserver accepts it as a
1987
+ // target and forwards subtree mutations, so we catch <html> creation
1988
+ // and everything under it without racing the parser.
1989
+ try {
1990
+ observer.observe(document, {
1991
+ attributes: true,
1992
+ attributeOldValue: true,
1993
+ childList: true,
1994
+ subtree: true,
1995
+ });
1996
+ observing = true;
1997
+ } catch (_) { return false; }
1998
+ // Seed whatever already exists.
1999
+ if (document.documentElement) {
2000
+ recordInitialAttrs(document.documentElement);
2001
+ document.documentElement.querySelectorAll('*').forEach(recordInitialAttrs);
1952
2002
  }
1953
-
1954
- // 3. Propagate from data-hydrate children to nearest LOCAL x-data ancestor
1955
- // so the reactive controller, sibling event handlers (@click toggles
1956
- // etc.) and all bindings inside the scope are preserved together.
1957
- // Skip page-level scopes (main, body, [x-route]).
1958
- hydrateRoots.forEach((el) => {
1959
- let ancestor = el.parentElement;
1960
- while (ancestor && ancestor !== document.body) {
1961
- if (
1962
- ancestor.hasAttribute('x-data') &&
1963
- !skipTags.has(ancestor.tagName) &&
1964
- !ancestor.hasAttribute('x-route')
1965
- ) {
1966
- snapshotElementAndDescendants(ancestor);
1967
- break;
1968
- }
1969
- ancestor = ancestor.parentElement;
1970
- }
1971
- });
1972
-
1973
- window.__manifestHydrateSnapshots = allSnapshots;
2003
+ return true;
1974
2004
  };
2005
+ startObserving();
1975
2006
 
1976
- // Wrap Alpine.start so the snapshot runs INSIDE the start call, before
1977
- // Alpine has a chance to walk and mutate the tree. alpine:init as an
1978
- // external hook proved unreliable in some configurations it fires after
1979
- // Alpine has already processed some elements. Alpine.start is the single
1980
- // synchronous entry point for the initial walk, so wrapping it guarantees
1981
- // we capture source state before any directive has been applied.
1982
- //
1983
- // We also wrap Alpine.initTree for lazy-loaded components that appear in
1984
- // the DOM after Alpine.start() has completed (fetched by the components
1985
- // plugin in response to new <x-*> placeholders).
1986
- //
1987
- // Both wraps are installed via a defineProperty setter on window.Alpine
1988
- // so they land the instant Alpine's CDN script does `window.Alpine = ...`.
1989
- const wrap = (alpine) => {
1990
- if (!alpine || alpine.__manifestRenderWrapped) return;
1991
- alpine.__manifestRenderWrapped = true;
1992
- if (typeof alpine.start === 'function') {
1993
- const originalStart = alpine.start.bind(alpine);
1994
- alpine.start = function () {
1995
- try { snapshotSubtree(document.body); } catch (_) { /* graceful */ }
1996
- return originalStart.apply(this, arguments);
1997
- };
1998
- }
1999
- if (typeof alpine.initTree === 'function') {
2000
- const originalInit = alpine.initTree.bind(alpine);
2001
- alpine.initTree = function (root) {
2002
- try { snapshotSubtree(root || document.body); } catch (_) { /* graceful */ }
2003
- return originalInit.apply(this, arguments);
2004
- };
2005
- }
2007
+ // Flush any pending mutations before the DOM is read for serialization.
2008
+ window.__manifestFlushHydrateSources = () => {
2009
+ try { handleMutations(observer.takeRecords()); } catch (_) {}
2006
2010
  };
2007
-
2008
- let _Alpine;
2009
- try {
2010
- Object.defineProperty(window, 'Alpine', {
2011
- configurable: true,
2012
- enumerable: true,
2013
- get() { return _Alpine; },
2014
- set(v) { _Alpine = v; wrap(v); },
2015
- });
2016
- } catch (_) { /* defineProperty failed, fall back to event listeners */ }
2017
-
2018
- if (typeof document !== 'undefined') {
2019
- // Event-based fallback in case the setter trap missed Alpine assignment.
2020
- document.addEventListener('alpine:init', () => wrap(window.Alpine));
2021
- document.addEventListener('alpine:initialized', () => wrap(window.Alpine));
2022
- }
2011
+ // Expose for the contract-emission phase.
2012
+ window.__manifestSourceAttrs = sourceAttrs;
2013
+ window.__manifestSourceInnerHTML = sourceInnerHTML;
2023
2014
  });
2024
2015
 
2025
2016
  pushDebug({ path: displayPath, stage: 'start' });
@@ -2253,88 +2244,154 @@ async function runPrerender(config) {
2253
2244
  });
2254
2245
  });
2255
2246
 
2256
- // Restore hydrate-target elements to their pristine source attributes
2257
- // (snapshotted via evaluateOnNewDocument before Alpine ran). This is true
2258
- // hydration: every Alpine binding (`:class`, `:style`, `:value`, `x-text`,
2259
- // `x-init`, custom plugin directives, …) is preserved exactly as authored,
2260
- // and Alpine processes them at runtime in the prerendered MPA the same way
2261
- // it would in the live SPA. After restoring source attributes we re-add the
2262
- // `data-prerender-hydrate` marker so downstream Node.js stripping passes
2263
- // continue to skip these elements.
2247
+ // Emit the hydration contract: walk the DOM, identify every hydrate
2248
+ // target (explicit `data-hydrate`, interactive Manifest directives,
2249
+ // diff-semantic bindings, runtime-magic-driven bindings), tag each with
2250
+ // `data-hydrate-id`, and collect the diff between each target's source
2251
+ // attributes (recorded by the MutationObserver in evaluateOnNewDocument)
2252
+ // and its current post-render attributes. The contract is returned as a
2253
+ // JSON-serialisable array; the runtime reads it on page load and restores
2254
+ // source state before Alpine starts.
2264
2255
  //
2265
- // Implementation note: we use `outerHTML` to swap the element rather than
2266
- // `setAttribute` per-attribute. Alpine's special attribute names (`@click`,
2267
- // possibly others starting with `@`) are not valid DOM Names per the XML
2268
- // production, so `setAttribute('@click', …)` throws InvalidCharacterError.
2269
- // The HTML parser, on the other hand, is lenient and accepts these names.
2270
- // Building an HTML string and assigning it via outerHTML round-trips through
2271
- // the parser and produces an element with all source attributes intact.
2272
- // Stop Alpine from observing further DOM mutations and flush any pending
2273
- // effects. Then restore each hydrate target by replacing it with a fresh
2274
- // element parsed from a source-attribute HTML string. Replacing the element
2275
- // (rather than mutating attributes in place) detaches it from Alpine's
2276
- // reactive bindings entirely — the new node has no `_x_*` state, no
2277
- // effects, and no observers. Alpine's MutationObserver is stopped first
2278
- // so it can't pick up the new node and re-process it.
2256
+ // For explicit `data-hydrate` roots, the entry also carries the original
2257
+ // innerHTML so the whole subtree is restored to source, not just its
2258
+ // attributes.
2279
2259
  //
2280
- // We process snapshots deepest-first so that when an ancestor is rebuilt,
2281
- // its children have already been replaced with their pristine versions and
2282
- // are captured (via innerHTML) into the new ancestor.
2283
- const restoreReport = await page.evaluate(async () => {
2284
- try { window.Alpine && window.Alpine.flushAndStopDeferringMutations && window.Alpine.flushAndStopDeferringMutations(); } catch (_) {}
2285
- try { window.Alpine && window.Alpine.stopObservingMutations && window.Alpine.stopObservingMutations(); } catch (_) {}
2286
- await Promise.resolve();
2287
- await Promise.resolve();
2288
-
2289
- const snapshots = window.__manifestHydrateSnapshots || [];
2290
- const report = { total: snapshots.length, restored: 0, notFound: 0, errors: [] };
2291
-
2292
- // Resolve every snapshot to its element, then sort by depth (deepest first).
2293
- const items = [];
2294
- snapshots.forEach(({ id, attrs }) => {
2295
- const el = document.querySelector(`[data-manifest-hyd-id="${id}"]`);
2296
- if (!el) { report.notFound++; return; }
2297
- let depth = 0;
2298
- for (let p = el.parentNode; p; p = p.parentNode) depth++;
2299
- items.push({ id, el, attrs, depth });
2260
+ // The catalog here is the authoritative list of "what counts as
2261
+ // interactive" and MUST match the docs/articles surface.
2262
+ const hydrationContractRaw = await page.evaluate(() => {
2263
+ // Drain any mutations not yet delivered to the observer so our source
2264
+ // map has the latest values.
2265
+ try { window.__manifestFlushHydrateSources && window.__manifestFlushHydrateSources(); } catch (_) {}
2266
+
2267
+ const sourceAttrs = window.__manifestSourceAttrs || new Map();
2268
+ const sourceInnerHTML = window.__manifestSourceInnerHTML || new Map();
2269
+
2270
+ // --- CATALOG: what makes an element a hydrate target ---
2271
+ // Interactive Manifest-registered directives that attach click/hover/
2272
+ // observer state at runtime and therefore need the live Alpine scope.
2273
+ const INTERACTIVE_DIRECTIVES = new Set([
2274
+ 'x-theme', 'x-dropdown', 'x-tooltip', 'x-tab', 'x-tabpanel',
2275
+ 'x-toast', 'x-carousel', 'x-resize', 'x-anchors', 'x-model',
2276
+ 'x-files', 'x-data-files',
2277
+ ]);
2278
+ // Runtime-only Alpine magics whose values change after the prerender
2279
+ // snapshot (e.g. via media query, route change, auth state). Bindings
2280
+ // referencing these must re-evaluate in the live page.
2281
+ const RUNTIME_MAGIC_RX = /\$(theme|locale|url|auth|search|query|toast)\b/;
2282
+
2283
+ const isDiffBindingAttr = (name) =>
2284
+ name === ':class' || name === 'x-bind:class' ||
2285
+ name === ':style' || name === 'x-bind:style';
2286
+
2287
+ const isEventAttr = (name) =>
2288
+ name.charCodeAt(0) === 64 /* @ */ || name.startsWith('x-on:');
2289
+
2290
+ const isBindingAttr = (name) =>
2291
+ name.charCodeAt(0) === 58 /* : */ || name.startsWith('x-bind:') || name.startsWith('x-');
2292
+
2293
+ const classifyElement = (el) => {
2294
+ // Explicit data-hydrate — subtree-wide restoration.
2295
+ if (el.hasAttribute('data-hydrate')) return 'explicit';
2296
+
2297
+ const list = el.attributes;
2298
+ for (let i = 0; i < list.length; i++) {
2299
+ const name = list[i].name;
2300
+ const val = list[i].value;
2301
+
2302
+ if (INTERACTIVE_DIRECTIVES.has(name)) return 'interactive';
2303
+ if (isEventAttr(name)) return 'event';
2304
+ if (isDiffBindingAttr(name)) return 'diff-binding';
2305
+ if (isBindingAttr(name) && val && RUNTIME_MAGIC_RX.test(val)) return 'runtime-magic';
2306
+ }
2307
+ return null;
2308
+ };
2309
+
2310
+ // --- Walk: collect all hydrate targets ---
2311
+ const targets = new Set();
2312
+ const subtreeRoots = new Set(); // explicit roots — restore innerHTML too
2313
+ const all = document.body ? document.body.querySelectorAll('*') : [];
2314
+ all.forEach((el) => {
2315
+ const kind = classifyElement(el);
2316
+ if (!kind) return;
2317
+ if (kind === 'explicit') {
2318
+ subtreeRoots.add(el);
2319
+ targets.add(el);
2320
+ el.querySelectorAll('*').forEach((d) => targets.add(d));
2321
+ } else {
2322
+ targets.add(el);
2323
+ }
2300
2324
  });
2301
- items.sort((a, b) => b.depth - a.depth);
2302
-
2303
- const voidEls = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
2304
- const escAttr = (s) => String(s == null ? '' : s).replace(/&/g, '&amp;').replace(/"/g, '&quot;');
2305
-
2306
- items.forEach(({ id, attrs }) => {
2307
- // Re-resolve the element by id every iteration: ancestors that were
2308
- // already rebuilt will have re-parsed their children, so previous
2309
- // references are stale.
2310
- const el = document.querySelector(`[data-manifest-hyd-id="${id}"]`);
2311
- if (!el || !el.parentNode) { report.errors.push({ id, msg: 'lost reference' }); return; }
2312
- const tag = el.tagName.toLowerCase();
2313
- const attrString = Object.entries(attrs)
2314
- .map(([name, value]) => `${name}="${escAttr(value)}"`)
2315
- .join(' ');
2316
- const innerHTML = voidEls.has(tag) ? '' : el.innerHTML;
2317
- const newHTML = voidEls.has(tag)
2318
- ? `<${tag} ${attrString} data-prerender-hydrate="1">`
2319
- : `<${tag} ${attrString} data-prerender-hydrate="1">${innerHTML}</${tag}>`;
2320
- // Parse via a temporary container so we can use replaceChild (more
2321
- // reliable than outerHTML in nested-replace scenarios).
2322
- const tmp = document.createElement(el.parentNode.tagName === 'TR' ? 'tr' : 'div');
2323
- tmp.innerHTML = newHTML;
2324
- const parsed = tmp.firstElementChild;
2325
- if (!parsed) { report.errors.push({ id, msg: 'parse failed' }); return; }
2326
- try {
2327
- el.parentNode.replaceChild(parsed, el);
2328
- report.restored++;
2329
- } catch (e) {
2330
- report.errors.push({ id, tag, msg: String(e && e.message || e) });
2325
+
2326
+ // --- Build contract entries ---
2327
+ let nextId = 0;
2328
+ const entries = [];
2329
+ targets.forEach((el) => {
2330
+ const source = sourceAttrs.get(el);
2331
+ const attrsOut = {};
2332
+ let dirty = false;
2333
+
2334
+ // Collect attributes that DIVERGED from source. For each current
2335
+ // attribute: if the source recorded a different value (or absent),
2336
+ // we need to restore the source value.
2337
+ const currentAttrs = {};
2338
+ const list = el.attributes;
2339
+ for (let i = 0; i < list.length; i++) {
2340
+ currentAttrs[list[i].name] = list[i].value;
2341
+ }
2342
+
2343
+ if (source) {
2344
+ // For every attribute in source, check if current differs.
2345
+ for (const name in source) {
2346
+ if (name === 'data-hydrate-id') continue;
2347
+ const src = source[name];
2348
+ const cur = name in currentAttrs ? currentAttrs[name] : null;
2349
+ if (src !== cur) {
2350
+ attrsOut[name] = src; // may be null (means "remove this attribute")
2351
+ dirty = true;
2352
+ }
2353
+ }
2354
+ // For current attributes that weren't in source, remove them.
2355
+ for (const name in currentAttrs) {
2356
+ if (name === 'data-hydrate-id') continue;
2357
+ if (!(name in source)) {
2358
+ attrsOut[name] = null;
2359
+ dirty = true;
2360
+ }
2361
+ }
2362
+ }
2363
+ // If no source recorded and it's not an explicit subtree root, the
2364
+ // element had no mutations observed — no restoration needed.
2365
+
2366
+ const innerHTMLSource = sourceInnerHTML.get(el);
2367
+ let innerHTMLEntry;
2368
+ if (subtreeRoots.has(el) && innerHTMLSource !== undefined) {
2369
+ if (innerHTMLSource !== el.innerHTML) {
2370
+ innerHTMLEntry = innerHTMLSource;
2371
+ dirty = true;
2372
+ }
2331
2373
  }
2374
+
2375
+ if (!dirty) return;
2376
+
2377
+ const id = 'h' + nextId++;
2378
+ el.setAttribute('data-hydrate-id', id);
2379
+ const entry = { id, attrs: attrsOut };
2380
+ if (innerHTMLEntry !== undefined) entry.html = innerHTMLEntry;
2381
+ entries.push(entry);
2332
2382
  });
2333
2383
 
2334
- return report;
2384
+ return entries;
2335
2385
  });
2386
+ // Stash the contract on the route record for HTML injection later.
2387
+ // We carry it through as a string to avoid re-stringifying multiple times.
2388
+ const hydrationContractJSON = JSON.stringify(hydrationContractRaw || []);
2336
2389
  if (config.debugPrerender) {
2337
- pushDebug({ path: displayPath, stage: 'hydrate-restore', metrics: restoreReport });
2390
+ pushDebug({
2391
+ path: displayPath,
2392
+ stage: 'hydrate-contract',
2393
+ metrics: { entries: (hydrationContractRaw || []).length },
2394
+ });
2338
2395
  }
2339
2396
 
2340
2397
  // x-for lists: keep static lists in the HTML for SEO; collapse only dynamic lists so Alpine re-renders.
@@ -2639,6 +2696,19 @@ async function runPrerender(config) {
2639
2696
  });
2640
2697
 
2641
2698
  let html = await page.evaluate(() => document.documentElement.outerHTML);
2699
+ // Inject the hydration contract blob into the raw HTML *before* caching
2700
+ // it for locale variant generation, so every locale variant inherits the
2701
+ // same contract (locale substitution only mutates visible text, not the
2702
+ // JSON blob). The same injection happens again later in the Puppeteer
2703
+ // path after Node.js post-processing, but injecting early simplifies the
2704
+ // cache model: "raw HTML carries its own contract."
2705
+ if (hydrationContractJSON && hydrationContractJSON !== '[]') {
2706
+ const safe = hydrationContractJSON.replace(/<\/script/gi, '<\\/script');
2707
+ html = html.replace(
2708
+ '</body>',
2709
+ `<script type="application/json" id="__manifest_hydrate__">${safe}</script>\n</body>`
2710
+ );
2711
+ }
2642
2712
  // Cache raw DOM snapshot for locale variant generation (before any Node.js transforms).
2643
2713
  if (typeof onRawHtml === 'function') onRawHtml(pathSeg, html);
2644
2714
  if (config.debugPrerender) {
@@ -2676,11 +2746,7 @@ async function runPrerender(config) {
2676
2746
  html = stripRedundantImgSrcBindings(html);
2677
2747
  html = stripEmptyInlineMaskStyles(html);
2678
2748
  html = stripResolvedXIconDirectives(html);
2679
- // markPrerenderedManifestComponents must run BEFORE stripPrerenderHydrateMarkers so it can
2680
- // detect data-prerender-hydrate markers and skip components inside hydrate islands.
2681
2749
  html = markPrerenderedManifestComponents(html);
2682
- html = stripPrerenderHydrateMarkers(html);
2683
- html = stripPrerenderHydrateSnapshotIds(html);
2684
2750
  html = rewriteHtmlAssetPaths(html, fileSegments.length);
2685
2751
  const liveBase = config.liveUrl.replace(/\/$/, '');
2686
2752
  const canonicalHreflang = buildCanonicalAndHreflang(is404 ? '' : pathSeg, locales, defaultLocale, liveBase);
@@ -2699,6 +2765,8 @@ async function runPrerender(config) {
2699
2765
  '</head>',
2700
2766
  `${canonicalHreflang}${injectOgLocale ? ogLocale : ''}${routeMeta}${baseMeta}${prerenderedMeta}<meta name="manifest:router-base-depth" content="${routeDepth}">\n</head>`
2701
2767
  );
2768
+ // (Hydration contract was already injected into the raw HTML before
2769
+ // the Node.js post-processing pipeline ran, so it's already present.)
2702
2770
  mkdirSync(outDir, { recursive: true });
2703
2771
  writeFileSync(outFile, html, 'utf8');
2704
2772
  pushDebug({
@@ -2717,30 +2785,99 @@ async function runPrerender(config) {
2717
2785
  process.stderr.write(`prerender: failed ${displayPath}: ${failedPaths[failedPaths.length - 1].message}\n`);
2718
2786
  }
2719
2787
  } finally {
2720
- await page.close();
2788
+ try { await page.close(); } catch (_) { /* page may be gone if browser died */ }
2721
2789
  }
2722
2790
  }
2723
2791
 
2724
- // Phase 1: Puppeteer — render base paths, cache raw DOM for substitution
2792
+ // Phase 1: Puppeteer — render base paths, cache raw DOM for substitution.
2793
+ // Any failures (e.g. transient navigation timeouts) are retried up to
2794
+ // `maxRetries` times with a short backoff before being reported as fatal.
2795
+ //
2796
+ // Browser recycling: after every `browserRecycleEvery` successful pages,
2797
+ // all workers pause, one worker closes the browser and launches a fresh
2798
+ // one, then all resume. This bounds Chromium's memory + handle growth.
2725
2799
  try {
2726
2800
  let index = 0;
2801
+ let activeWorkers = 0;
2802
+ const recycleGate = { resume: null, waitForZero: null };
2803
+
2804
+ const waitUntilZero = () => new Promise((resolve) => {
2805
+ if (activeWorkers === 0) return resolve();
2806
+ recycleGate.waitForZero = resolve;
2807
+ });
2808
+ const waitForResume = () => new Promise((resolve) => {
2809
+ if (!recycleLock.busy) return resolve();
2810
+ const prev = recycleGate.resume;
2811
+ recycleGate.resume = () => { if (prev) prev(); resolve(); };
2812
+ });
2813
+
2814
+ const maybeRecycleBrowser = async () => {
2815
+ if (browserRecycleEvery <= 0) return;
2816
+ if (pagesSinceRecycle < browserRecycleEvery) return;
2817
+ if (recycleLock.busy) return;
2818
+ recycleLock.busy = true;
2819
+ try {
2820
+ // Wait for all in-flight workers to finish their current page.
2821
+ await waitUntilZero();
2822
+ process.stdout.write(`prerender: recycling browser (processed ${pagesSinceRecycle} pages)\n`);
2823
+ try { await browser.close(); } catch (_) {}
2824
+ browser = await launchBrowser();
2825
+ pagesSinceRecycle = 0;
2826
+ } finally {
2827
+ recycleLock.busy = false;
2828
+ const r = recycleGate.resume;
2829
+ recycleGate.resume = null;
2830
+ if (r) r();
2831
+ }
2832
+ };
2833
+
2727
2834
  async function worker() {
2728
2835
  while (true) {
2836
+ // Pause if a recycle is underway.
2837
+ if (recycleLock.busy) await waitForResume();
2838
+
2729
2839
  const i = index++;
2730
2840
  if (i >= puppeteerPaths.length) return;
2731
- await processPath(puppeteerPaths[i], i, {
2732
- onRawHtml: (seg, html) => {
2733
- // Cache raw DOM snapshot for locale variant generation (NOT_FOUND_PATH excluded)
2734
- if (seg !== NOT_FOUND_PATH) baseHtmlCache.set(seg || '', html);
2735
- },
2736
- });
2841
+ const pathSeg = puppeteerPaths[i];
2842
+ let attempt = 0;
2843
+ while (true) {
2844
+ const failureCountBefore = failedPaths.length;
2845
+ activeWorkers++;
2846
+ try {
2847
+ await processPath(pathSeg, i, {
2848
+ onRawHtml: (seg, html) => {
2849
+ if (seg !== NOT_FOUND_PATH) baseHtmlCache.set(seg || '', html);
2850
+ },
2851
+ });
2852
+ } finally {
2853
+ activeWorkers--;
2854
+ if (activeWorkers === 0 && recycleGate.waitForZero) {
2855
+ const z = recycleGate.waitForZero;
2856
+ recycleGate.waitForZero = null;
2857
+ z();
2858
+ }
2859
+ }
2860
+ if (failedPaths.length === failureCountBefore) {
2861
+ pagesSinceRecycle++;
2862
+ break; // success
2863
+ }
2864
+ if (attempt >= maxRetries) { pagesSinceRecycle++; break; }
2865
+ failedPaths.pop();
2866
+ attempt++;
2867
+ const displayPath = pathSeg === '' ? '/' : (pathSeg === NOT_FOUND_PATH ? '/__prerender_404__' : '/' + pathSeg);
2868
+ process.stderr.write(`prerender: retrying ${displayPath} (attempt ${attempt + 1}/${maxRetries + 1})\n`);
2869
+ await new Promise((r) => setTimeout(r, 500 * attempt));
2870
+ }
2871
+ // Attempt recycle after each completed path (only one worker will
2872
+ // actually perform the recycle; others will be gated by recycleLock).
2873
+ await maybeRecycleBrowser();
2737
2874
  }
2738
2875
  }
2739
2876
  await Promise.all(
2740
2877
  Array.from({ length: Math.min(concurrency, puppeteerPaths.length || 1) }, () => worker())
2741
2878
  );
2742
2879
  } finally {
2743
- await browser.close();
2880
+ try { await browser.close(); } catch (_) {}
2744
2881
  }
2745
2882
 
2746
2883
  // Phase 2: Node.js — generate locale variants via text substitution
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mnfst-render",
3
- "version": "0.5.2",
3
+ "version": "0.5.4",
4
4
  "description": "Render Manifest sites to static HTML for SEO",
5
5
  "type": "module",
6
6
  "bin": {