mnfst-render 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/manifest.render.mjs +423 -272
  2. package/package.json +1 -1
@@ -231,7 +231,11 @@ function resolveConfig() {
231
231
  redirects: Array.isArray(pre.redirects) ? pre.redirects : [],
232
232
  wait: cli.wait ?? pre.wait ?? null,
233
233
  waitAfterIdle: 0,
234
- concurrency: Math.max(1, cli.concurrency ?? pre.concurrency ?? Math.max(4, cpus().length - 1)),
234
+ // Default concurrency: 2. Chromium per-page memory overhead is large and
235
+ // our hydration source-attribute map adds more per page. On big sites
236
+ // (>100 routes) higher concurrency crashes the browser with OOM/target
237
+ // closed errors. Users can override for small projects with --concurrency.
238
+ concurrency: Math.max(1, cli.concurrency ?? pre.concurrency ?? 2),
235
239
  retries: Math.max(0, cli.retries ?? pre.retries ?? 2),
236
240
  localeSubstitution: true,
237
241
  localeSubstitutionExclude: [],
@@ -601,14 +605,24 @@ function stripDevOnlyContent(html) {
601
605
  return out;
602
606
  }
603
607
 
604
- // --- Strip CDN-injected plugin scripts from snapshot so only the loader remains ---
605
- // When the static page loads, the loader runs once and adds plugins; avoids duplicate script execution.
608
+ // --- Strip plugin scripts injected by the loader during prerender so only the loader tag remains ---
609
+ // When the static page loads, the loader runs once and adds plugins; avoids
610
+ // duplicate script execution (which would cause `const` re-declaration errors).
611
+ // Matches both CDN-minified (.min.js) and self-hosted (.js) plugin URLs.
612
+ // Also strips the loader-injected Alpine script (both defer and non-defer
613
+ // forms) — at runtime the loader re-injects Alpine AFTER plugin registration,
614
+ // and if Chromium serialized an Alpine script tag during the Puppeteer render,
615
+ // leaving it in place would cause Alpine to execute synchronously during HTML
616
+ // parse, before plugins have a chance to register their directives.
606
617
  function stripInjectedPluginScripts(html) {
607
618
  const pluginPattern =
608
- /<script[^>]*\ssrc=["'][^"']*manifest\.(?:components|router|utilities|data|icons|localization|markdown|code|themes|toasts|tooltips|dropdowns|tabs|slides|resize|tailwind|appwrite\.(?:auth|data|presence))[^"']*\.min\.js["'][^>]*>\s*<\/script>/gi;
619
+ /<script[^>]*\ssrc=["'][^"']*manifest\.(?:components|router|utilities|data|icons|localization|markdown|code|themes|toasts|tooltips|dropdowns|tabs|slides|resize|colorpicker|tailwind|appwrite\.(?:auth|data|presence))[^"']*\.(?:min\.)?js["'][^>]*>\s*<\/script>/gi;
609
620
  let out = html.replace(pluginPattern, '');
621
+ const alpinePattern =
622
+ /<script[^>]*\ssrc=["'][^"']*\/alpinejs@[^"']*["'][^>]*>\s*<\/script>/gi;
623
+ out = out.replace(alpinePattern, '');
610
624
  const runtimePattern =
611
- /<script[^>]*\ssrc=["'][^"']*(?:alpinejs\/dist\/cdn\.min\.js|papaparse@[^"']*\/papaparse\.min\.js|marked\/marked\.min\.js|highlightjs\/cdn-release@[^"']*\/highlight\.min\.js)[^"']*["'][^>]*>\s*<\/script>/gi;
625
+ /<script[^>]*\ssrc=["'][^"']*(?:papaparse@[^"']*\/papaparse\.min\.js|marked\/marked\.min\.js|highlightjs\/cdn-release@[^"']*\/highlight\.min\.js)[^"']*["'][^>]*>\s*<\/script>/gi;
612
626
  out = out.replace(runtimePattern, '');
613
627
  return out;
614
628
  }
@@ -842,8 +856,15 @@ function stripDuplicatedLoopDirectives(html) {
842
856
  return html;
843
857
  }
844
858
 
859
+ // Returns true if the attribute string contains either the explicit `data-hydrate`
860
+ // attribute (source-authored hydrate island root) or a `data-hydrate-id` (element
861
+ // that the prerender has tagged as a runtime-restoration target). String-level
862
+ // strip passes use this to skip elements whose attribute state will be restored
863
+ // from the hydration contract at runtime — leaving them untouched is the safest
864
+ // default even though the contract would correct most damage anyway.
845
865
  function isHydrateMarkedAttrs(attrsStr) {
846
- return /\sdata-prerender-hydrate(?:\s*=|[\s>])/i.test(attrsStr || '');
866
+ if (!attrsStr) return false;
867
+ return /\sdata-hydrate(?:-id)?(?:\s*=|[\s>])/i.test(attrsStr);
847
868
  }
848
869
 
849
870
  // --- Strip x-text and x-html that reference $x when static/SEO (content already in snapshot).
@@ -945,22 +966,16 @@ function stripResolvedXIconDirectives(html) {
945
966
  });
946
967
  }
947
968
 
948
- function stripPrerenderHydrateMarkers(html) {
949
- return html.replace(/\sdata-prerender-hydrate(?:=(?:"[^"]*"|'[^']*'|[^\s>]+))?/gi, '');
950
- }
951
-
952
- // Remove the snapshot id attribute used by the hydrate restore phase. These ids
953
- // only exist to let the post-Alpine restore step in Puppeteer find each snapshotted
954
- // element back; they have no purpose in the final output.
955
- function stripPrerenderHydrateSnapshotIds(html) {
956
- return html.replace(/\sdata-manifest-hyd-id(?:=(?:"[^"]*"|'[^']*'|[^\s>]+))?/gi, '');
957
- }
958
-
959
969
  function markPrerenderedManifestComponents(html) {
960
970
  return html.replace(/<(x-[a-z][\w-]*)([^>]*)>/gi, (full, tag, attrs) => {
961
971
  const a = attrs || '';
962
972
  if (/\bdata-pre-rendered\s*=/i.test(a) || /\bdata-processed\s*=/i.test(a)) return full;
963
- if (/\bdata-prerender-hydrate\b/i.test(a)) return full; // Inside data-hydrate island — skip
973
+ // Inside an explicit hydrate island — the runtime will restore its
974
+ // innerHTML to the authored source, so we must NOT tell the components
975
+ // processor to skip re-fetching. Leaving the placeholder unmarked lets
976
+ // the runtime restoration reinstate the <x-*> tag and the components
977
+ // plugin processes it normally on load.
978
+ if (/\bdata-hydrate\b/i.test(a)) return full;
964
979
  const spacer = /\S/.test(a) ? ' ' : '';
965
980
  return `<${tag}${a}${spacer}data-pre-rendered="1">`;
966
981
  });
@@ -1343,8 +1358,6 @@ function generateLocaleVariantHtml({
1343
1358
  // markPrerenderedManifestComponents must run BEFORE stripPrerenderHydrateMarkers so it can
1344
1359
  // detect data-prerender-hydrate markers and skip components inside hydrate islands.
1345
1360
  html = markPrerenderedManifestComponents(html);
1346
- html = stripPrerenderHydrateMarkers(html);
1347
- html = stripPrerenderHydrateSnapshotIds(html);
1348
1361
 
1349
1362
  const fileSegments = pathToFileSegments(pathSeg ? '/' + pathSeg : '/');
1350
1363
  html = rewriteHtmlAssetPaths(html, fileSegments.length);
@@ -1727,35 +1740,58 @@ async function runPrerender(config) {
1727
1740
  const tailwindBuilt = runTailwindCliForPrerender(rootResolved, outputResolved, pre);
1728
1741
  const utilityBlocks = [];
1729
1742
 
1730
- let browser;
1731
- try {
1732
- const chromium = await importFromProject('@sparticuz/chromium');
1733
- const pptr = await importFromProject('puppeteer-core');
1734
- const executablePath = await chromium.default.executablePath();
1735
- browser = await pptr.default.launch({
1736
- args: chromium.default.args,
1737
- defaultViewport: chromium.default.defaultViewport ?? null,
1738
- executablePath,
1739
- headless: chromium.default.headless ?? true,
1740
- ignoreHTTPSErrors: true,
1741
- });
1742
- } catch (serverlessErr) {
1743
- let puppeteer;
1743
+ // Launch a fresh browser instance. Chromium is known to accumulate memory
1744
+ // and handle leaks on large prerender runs (we've seen crashes around page
1745
+ // ~230 on sites with hundreds of routes). The launchBrowser function is
1746
+ // used both for the initial launch AND for periodic recycling — we close
1747
+ // the old browser and start a new one every `browserRecycleEvery` pages to
1748
+ // bound memory growth.
1749
+ async function launchBrowser() {
1744
1750
  try {
1745
- puppeteer = await importFromProject('puppeteer');
1746
- } catch {
1747
- console.error('prerender: missing browser runtime.');
1748
- console.error('Install one of the following, then rerun:');
1749
- console.error(' npm i -D puppeteer');
1750
- console.error(' npm i -D puppeteer-core @sparticuz/chromium');
1751
- process.exit(1);
1751
+ const chromium = await importFromProject('@sparticuz/chromium');
1752
+ const pptr = await importFromProject('puppeteer-core');
1753
+ const executablePath = await chromium.default.executablePath();
1754
+ return await pptr.default.launch({
1755
+ args: chromium.default.args,
1756
+ defaultViewport: chromium.default.defaultViewport ?? null,
1757
+ executablePath,
1758
+ headless: chromium.default.headless ?? true,
1759
+ ignoreHTTPSErrors: true,
1760
+ });
1761
+ } catch (_serverlessErr) {
1762
+ let puppeteer;
1763
+ try {
1764
+ puppeteer = await importFromProject('puppeteer');
1765
+ } catch {
1766
+ console.error('prerender: missing browser runtime.');
1767
+ console.error('Install one of the following, then rerun:');
1768
+ console.error(' npm i -D puppeteer');
1769
+ console.error(' npm i -D puppeteer-core @sparticuz/chromium');
1770
+ process.exit(1);
1771
+ }
1772
+ return await puppeteer.default.launch({ headless: true });
1752
1773
  }
1753
- browser = await puppeteer.default.launch({ headless: true });
1754
1774
  }
1775
+ let browser = await launchBrowser();
1755
1776
 
1756
1777
  const timeout = config.wait ?? 30000;
1778
+ // Lower default concurrency: Chromium's own memory overhead per page is
1779
+ // substantial, and we also now maintain a per-page source-attribute Map for
1780
+ // the hydration contract. On large sites (>100 routes) higher concurrency
1781
+ // spikes memory and crashes the browser. Users can still override via
1782
+ // --concurrency or manifest.prerender.concurrency.
1757
1783
  const concurrency = config.concurrency;
1758
1784
  const maxRetries = config.retries ?? 2;
1785
+ // Recycle the browser every N processed pages to bound resource growth.
1786
+ // Configurable via manifest.prerender.browserRecycleEvery.
1787
+ const browserRecycleEvery = Math.max(0, pre.browserRecycleEvery ?? 40);
1788
+ let pagesSinceRecycle = 0;
1789
+ const recycleLock = { busy: false };
1790
+ // Workers block on this promise before touching `browser`. While a recycle
1791
+ // is in progress it's a pending promise; once the new browser is up it
1792
+ // resolves and workers can proceed. This prevents "browser not ready"
1793
+ // errors from racing retries during recycle.
1794
+ let browserReadyPromise = Promise.resolve();
1759
1795
  const pathTotal = pathList.length;
1760
1796
  const failedPaths = [];
1761
1797
  const debugRows = [];
@@ -1847,6 +1883,11 @@ async function runPrerender(config) {
1847
1883
  : defaultLocale || 'en'
1848
1884
  : defaultLocale || 'en';
1849
1885
 
1886
+ // Wait for any in-progress browser recycle to complete before touching
1887
+ // `browser`. This transparently handles the window between the old
1888
+ // browser being closed and the new one being launched — workers block
1889
+ // here instead of throwing "browser not ready".
1890
+ await browserReadyPromise;
1850
1891
  const page = await browser.newPage();
1851
1892
  try {
1852
1893
  // Align <html lang> with the URL being prerendered before any app script runs.
@@ -1868,161 +1909,113 @@ async function runPrerender(config) {
1868
1909
  }
1869
1910
  }, currentLocale);
1870
1911
 
1871
- // Snapshot pristine source attributes of hydrate-target elements BEFORE Alpine
1872
- // touches them. We do this by wrapping `Alpine.initTree` Alpine calls this
1873
- // for the initial tree walk AND every time the components plugin lazy-loads a
1874
- // new <x-*> component. Right before Alpine processes a subtree, we walk it
1875
- // and snapshot every hydrate target inside. This is the exact moment the
1876
- // user's source HTML is sitting in the DOM with no Alpine mutations applied.
1912
+ // Deterministic source-attribute capture via MutationObserver with
1913
+ // `attributeOldValue`. This runs before ANY page script and records the
1914
+ // first (pre-mutation) value of every attribute that Alpine or a Manifest
1915
+ // plugin ever touches. It also records the *initial* attributes of every
1916
+ // new element added to the DOM via childList mutations so elements
1917
+ // parsed from innerHTML (components, markdown rendering, etc.) are also
1918
+ // captured the moment they appear.
1919
+ //
1920
+ // The observer handles all mutation surfaces at once:
1921
+ // - setAttribute / removeAttribute
1922
+ // - className setter
1923
+ // - classList.add / remove / toggle / replace
1924
+ // - style.* property assignments (which mutate the style attribute)
1925
+ // - Any other path that ultimately modifies an attribute
1877
1926
  //
1878
- // The snapshots are restored in a later page.evaluate call after Alpine
1879
- // settles. This is true hydration: Alpine never gets to bake state into
1880
- // hydrate elements, so every directive (`:class`, `:style`, `x-text`, custom
1881
- // plugin directives, etc.) works in the prerendered MPA exactly the way it
1882
- // does in the live SPA — no per-binding strip logic, no cloak band-aids, no
1883
- // edge cases to chase.
1927
+ // At serialize time we read the map, identify hydrate targets per the
1928
+ // catalog, and emit a compact JSON hydration contract. The runtime
1929
+ // (`hydratePrerenderedPage` in manifest.js) reads the contract and
1930
+ // restores source attributes before Alpine starts.
1884
1931
  await page.evaluateOnNewDocument(() => {
1885
- const allSnapshots = [];
1886
- let nextId = 0;
1887
- const skipTags = new Set(['MAIN', 'BODY', 'HTML']);
1888
-
1889
- // Own MutationObserver registered before any other script on the page.
1890
- // This guarantees we process DOM additions before Alpine's observer does —
1891
- // critically, before Alpine's observer calls initTree on newly expanded
1892
- // Manifest components (preloaded or lazy) and bakes their `:class` state.
1893
- const installHydrateObserver = () => {
1894
- if (window.__manifestHydrateObserver || !document.body) return;
1895
- const obs = new MutationObserver((mutations) => {
1896
- for (const m of mutations) {
1897
- if (m.type !== 'childList') continue;
1898
- for (const node of m.addedNodes) {
1899
- if (node.nodeType !== 1) continue;
1900
- try { snapshotSubtree(node); } catch (_) {}
1901
- }
1902
- }
1903
- });
1904
- obs.observe(document.body, { childList: true, subtree: true });
1905
- window.__manifestHydrateObserver = obs;
1906
- };
1907
- if (typeof document !== 'undefined') {
1908
- if (document.body) {
1909
- installHydrateObserver();
1910
- } else {
1911
- document.addEventListener('DOMContentLoaded', installHydrateObserver, { once: true });
1912
- // Also try once readyState flips to interactive
1913
- document.addEventListener('readystatechange', () => {
1914
- if (document.readyState !== 'loading') installHydrateObserver();
1915
- });
1932
+ // element -> { attrName: originalValue (null if attribute was absent) }
1933
+ // Keyed by reference so detached elements drop out naturally.
1934
+ const sourceAttrs = new Map();
1935
+ // element -> original innerHTML (only populated for elements already
1936
+ // marked data-hydrate when we first see them used for subtree-wide
1937
+ // restoration of explicit hydrate islands).
1938
+ const sourceInnerHTML = new Map();
1939
+
1940
+ const recordInitialAttrs = (el) => {
1941
+ if (!el || el.nodeType !== 1 || sourceAttrs.has(el)) return;
1942
+ const rec = {};
1943
+ const list = el.attributes;
1944
+ for (let i = 0; i < list.length; i++) {
1945
+ rec[list[i].name] = list[i].value;
1916
1946
  }
1917
- }
1918
-
1919
- const snapshotElement = (el) => {
1920
- if (!el || el.nodeType !== 1) return;
1921
- if (el.hasAttribute('data-manifest-hyd-id')) return; // already snapshotted
1922
- const id = '__manifest-hyd-' + nextId++;
1923
- el.setAttribute('data-manifest-hyd-id', id);
1924
- const attrs = {};
1925
- for (let i = 0; i < el.attributes.length; i++) {
1926
- const a = el.attributes[i];
1927
- if (a.name === 'data-manifest-hyd-id') continue;
1928
- attrs[a.name] = a.value;
1947
+ sourceAttrs.set(el, rec);
1948
+ if (el.hasAttribute && el.hasAttribute('data-hydrate')) {
1949
+ try { sourceInnerHTML.set(el, el.innerHTML); } catch (_) {}
1929
1950
  }
1930
- allSnapshots.push({ id, tag: el.tagName, attrs });
1931
1951
  };
1932
1952
 
1933
- const snapshotElementAndDescendants = (el) => {
1934
- snapshotElement(el);
1935
- if (el && el.querySelectorAll) {
1936
- el.querySelectorAll('*').forEach(snapshotElement);
1953
+ const handleMutations = (mutations) => {
1954
+ for (const m of mutations) {
1955
+ if (m.type === 'attributes') {
1956
+ const el = m.target;
1957
+ let rec = sourceAttrs.get(el);
1958
+ if (!rec) {
1959
+ // First time we see this element AT ALL via an attribute record:
1960
+ // seed with every current attribute so we never lose attrs that
1961
+ // existed before any mutation we happened to observe.
1962
+ rec = {};
1963
+ const list = el.attributes;
1964
+ for (let i = 0; i < list.length; i++) {
1965
+ rec[list[i].name] = list[i].value;
1966
+ }
1967
+ // Overwrite the one being mutated with the true oldValue
1968
+ // (which may be null if the attribute was absent pre-mutation).
1969
+ rec[m.attributeName] = m.oldValue;
1970
+ sourceAttrs.set(el, rec);
1971
+ } else if (!(m.attributeName in rec)) {
1972
+ rec[m.attributeName] = m.oldValue;
1973
+ }
1974
+ } else if (m.type === 'childList') {
1975
+ for (const node of m.addedNodes) {
1976
+ if (node.nodeType !== 1) continue;
1977
+ recordInitialAttrs(node);
1978
+ if (node.querySelectorAll) {
1979
+ node.querySelectorAll('*').forEach(recordInitialAttrs);
1980
+ }
1981
+ }
1982
+ }
1937
1983
  }
1938
1984
  };
1939
1985
 
1940
- const snapshotSubtree = (root) => {
1941
- if (!root || root.nodeType !== 1) return;
1942
-
1943
- // 1. Direct data-hydrate roots + descendants within this subtree.
1944
- const hydrateRoots = [];
1945
- if (root.matches && root.matches('[data-hydrate]')) hydrateRoots.push(root);
1946
- if (root.querySelectorAll) {
1947
- root.querySelectorAll('[data-hydrate]').forEach((el) => hydrateRoots.push(el));
1948
- }
1949
- hydrateRoots.forEach(snapshotElementAndDescendants);
1986
+ const observer = new MutationObserver(handleMutations);
1950
1987
 
1951
- // 2. x-theme elements (color mode plugin needs runtime click handler).
1952
- if (root.matches && root.matches('[x-theme]')) snapshotElementAndDescendants(root);
1953
- if (root.querySelectorAll) {
1954
- root.querySelectorAll('[x-theme]').forEach(snapshotElementAndDescendants);
1988
+ let observing = false;
1989
+ const startObserving = () => {
1990
+ if (observing) return true;
1991
+ // We can observe `document` itself — MutationObserver accepts it as a
1992
+ // target and forwards subtree mutations, so we catch <html> creation
1993
+ // and everything under it without racing the parser.
1994
+ try {
1995
+ observer.observe(document, {
1996
+ attributes: true,
1997
+ attributeOldValue: true,
1998
+ childList: true,
1999
+ subtree: true,
2000
+ });
2001
+ observing = true;
2002
+ } catch (_) { return false; }
2003
+ // Seed whatever already exists.
2004
+ if (document.documentElement) {
2005
+ recordInitialAttrs(document.documentElement);
2006
+ document.documentElement.querySelectorAll('*').forEach(recordInitialAttrs);
1955
2007
  }
1956
-
1957
- // 3. Propagate from data-hydrate children to nearest LOCAL x-data ancestor
1958
- // so the reactive controller, sibling event handlers (@click toggles
1959
- // etc.) and all bindings inside the scope are preserved together.
1960
- // Skip page-level scopes (main, body, [x-route]).
1961
- hydrateRoots.forEach((el) => {
1962
- let ancestor = el.parentElement;
1963
- while (ancestor && ancestor !== document.body) {
1964
- if (
1965
- ancestor.hasAttribute('x-data') &&
1966
- !skipTags.has(ancestor.tagName) &&
1967
- !ancestor.hasAttribute('x-route')
1968
- ) {
1969
- snapshotElementAndDescendants(ancestor);
1970
- break;
1971
- }
1972
- ancestor = ancestor.parentElement;
1973
- }
1974
- });
1975
-
1976
- window.__manifestHydrateSnapshots = allSnapshots;
2008
+ return true;
1977
2009
  };
2010
+ startObserving();
1978
2011
 
1979
- // Wrap Alpine.start so the snapshot runs INSIDE the start call, before
1980
- // Alpine has a chance to walk and mutate the tree. alpine:init as an
1981
- // external hook proved unreliable in some configurations it fires after
1982
- // Alpine has already processed some elements. Alpine.start is the single
1983
- // synchronous entry point for the initial walk, so wrapping it guarantees
1984
- // we capture source state before any directive has been applied.
1985
- //
1986
- // We also wrap Alpine.initTree for lazy-loaded components that appear in
1987
- // the DOM after Alpine.start() has completed (fetched by the components
1988
- // plugin in response to new <x-*> placeholders).
1989
- //
1990
- // Both wraps are installed via a defineProperty setter on window.Alpine
1991
- // so they land the instant Alpine's CDN script does `window.Alpine = ...`.
1992
- const wrap = (alpine) => {
1993
- if (!alpine || alpine.__manifestRenderWrapped) return;
1994
- alpine.__manifestRenderWrapped = true;
1995
- if (typeof alpine.start === 'function') {
1996
- const originalStart = alpine.start.bind(alpine);
1997
- alpine.start = function () {
1998
- try { snapshotSubtree(document.body); } catch (_) { /* graceful */ }
1999
- return originalStart.apply(this, arguments);
2000
- };
2001
- }
2002
- if (typeof alpine.initTree === 'function') {
2003
- const originalInit = alpine.initTree.bind(alpine);
2004
- alpine.initTree = function (root) {
2005
- try { snapshotSubtree(root || document.body); } catch (_) { /* graceful */ }
2006
- return originalInit.apply(this, arguments);
2007
- };
2008
- }
2012
+ // Flush any pending mutations before the DOM is read for serialization.
2013
+ window.__manifestFlushHydrateSources = () => {
2014
+ try { handleMutations(observer.takeRecords()); } catch (_) {}
2009
2015
  };
2010
-
2011
- let _Alpine;
2012
- try {
2013
- Object.defineProperty(window, 'Alpine', {
2014
- configurable: true,
2015
- enumerable: true,
2016
- get() { return _Alpine; },
2017
- set(v) { _Alpine = v; wrap(v); },
2018
- });
2019
- } catch (_) { /* defineProperty failed, fall back to event listeners */ }
2020
-
2021
- if (typeof document !== 'undefined') {
2022
- // Event-based fallback in case the setter trap missed Alpine assignment.
2023
- document.addEventListener('alpine:init', () => wrap(window.Alpine));
2024
- document.addEventListener('alpine:initialized', () => wrap(window.Alpine));
2025
- }
2016
+ // Expose for the contract-emission phase.
2017
+ window.__manifestSourceAttrs = sourceAttrs;
2018
+ window.__manifestSourceInnerHTML = sourceInnerHTML;
2026
2019
  });
2027
2020
 
2028
2021
  pushDebug({ path: displayPath, stage: 'start' });
@@ -2256,88 +2249,154 @@ async function runPrerender(config) {
2256
2249
  });
2257
2250
  });
2258
2251
 
2259
- // Restore hydrate-target elements to their pristine source attributes
2260
- // (snapshotted via evaluateOnNewDocument before Alpine ran). This is true
2261
- // hydration: every Alpine binding (`:class`, `:style`, `:value`, `x-text`,
2262
- // `x-init`, custom plugin directives, …) is preserved exactly as authored,
2263
- // and Alpine processes them at runtime in the prerendered MPA the same way
2264
- // it would in the live SPA. After restoring source attributes we re-add the
2265
- // `data-prerender-hydrate` marker so downstream Node.js stripping passes
2266
- // continue to skip these elements.
2252
+ // Emit the hydration contract: walk the DOM, identify every hydrate
2253
+ // target (explicit `data-hydrate`, interactive Manifest directives,
2254
+ // diff-semantic bindings, runtime-magic-driven bindings), tag each with
2255
+ // `data-hydrate-id`, and collect the diff between each target's source
2256
+ // attributes (recorded by the MutationObserver in evaluateOnNewDocument)
2257
+ // and its current post-render attributes. The contract is returned as a
2258
+ // JSON-serialisable array; the runtime reads it on page load and restores
2259
+ // source state before Alpine starts.
2267
2260
  //
2268
- // Implementation note: we use `outerHTML` to swap the element rather than
2269
- // `setAttribute` per-attribute. Alpine's special attribute names (`@click`,
2270
- // possibly others starting with `@`) are not valid DOM Names per the XML
2271
- // production, so `setAttribute('@click', …)` throws InvalidCharacterError.
2272
- // The HTML parser, on the other hand, is lenient and accepts these names.
2273
- // Building an HTML string and assigning it via outerHTML round-trips through
2274
- // the parser and produces an element with all source attributes intact.
2275
- // Stop Alpine from observing further DOM mutations and flush any pending
2276
- // effects. Then restore each hydrate target by replacing it with a fresh
2277
- // element parsed from a source-attribute HTML string. Replacing the element
2278
- // (rather than mutating attributes in place) detaches it from Alpine's
2279
- // reactive bindings entirely — the new node has no `_x_*` state, no
2280
- // effects, and no observers. Alpine's MutationObserver is stopped first
2281
- // so it can't pick up the new node and re-process it.
2261
+ // For explicit `data-hydrate` roots, the entry also carries the original
2262
+ // innerHTML so the whole subtree is restored to source, not just its
2263
+ // attributes.
2282
2264
  //
2283
- // We process snapshots deepest-first so that when an ancestor is rebuilt,
2284
- // its children have already been replaced with their pristine versions and
2285
- // are captured (via innerHTML) into the new ancestor.
2286
- const restoreReport = await page.evaluate(async () => {
2287
- try { window.Alpine && window.Alpine.flushAndStopDeferringMutations && window.Alpine.flushAndStopDeferringMutations(); } catch (_) {}
2288
- try { window.Alpine && window.Alpine.stopObservingMutations && window.Alpine.stopObservingMutations(); } catch (_) {}
2289
- await Promise.resolve();
2290
- await Promise.resolve();
2291
-
2292
- const snapshots = window.__manifestHydrateSnapshots || [];
2293
- const report = { total: snapshots.length, restored: 0, notFound: 0, errors: [] };
2294
-
2295
- // Resolve every snapshot to its element, then sort by depth (deepest first).
2296
- const items = [];
2297
- snapshots.forEach(({ id, attrs }) => {
2298
- const el = document.querySelector(`[data-manifest-hyd-id="${id}"]`);
2299
- if (!el) { report.notFound++; return; }
2300
- let depth = 0;
2301
- for (let p = el.parentNode; p; p = p.parentNode) depth++;
2302
- items.push({ id, el, attrs, depth });
2265
+ // The catalog here is the authoritative list of "what counts as
2266
+ // interactive" and MUST match the docs/articles surface.
2267
+ const hydrationContractRaw = await page.evaluate(() => {
2268
+ // Drain any mutations not yet delivered to the observer so our source
2269
+ // map has the latest values.
2270
+ try { window.__manifestFlushHydrateSources && window.__manifestFlushHydrateSources(); } catch (_) {}
2271
+
2272
+ const sourceAttrs = window.__manifestSourceAttrs || new Map();
2273
+ const sourceInnerHTML = window.__manifestSourceInnerHTML || new Map();
2274
+
2275
+ // --- CATALOG: what makes an element a hydrate target ---
2276
+ // Interactive Manifest-registered directives that attach click/hover/
2277
+ // observer state at runtime and therefore need the live Alpine scope.
2278
+ const INTERACTIVE_DIRECTIVES = new Set([
2279
+ 'x-theme', 'x-dropdown', 'x-tooltip', 'x-tab', 'x-tabpanel',
2280
+ 'x-toast', 'x-carousel', 'x-resize', 'x-anchors', 'x-model',
2281
+ 'x-files', 'x-data-files',
2282
+ ]);
2283
+ // Runtime-only Alpine magics whose values change after the prerender
2284
+ // snapshot (e.g. via media query, route change, auth state). Bindings
2285
+ // referencing these must re-evaluate in the live page.
2286
+ const RUNTIME_MAGIC_RX = /\$(theme|locale|url|auth|search|query|toast)\b/;
2287
+
2288
+ const isDiffBindingAttr = (name) =>
2289
+ name === ':class' || name === 'x-bind:class' ||
2290
+ name === ':style' || name === 'x-bind:style';
2291
+
2292
+ const isEventAttr = (name) =>
2293
+ name.charCodeAt(0) === 64 /* @ */ || name.startsWith('x-on:');
2294
+
2295
+ const isBindingAttr = (name) =>
2296
+ name.charCodeAt(0) === 58 /* : */ || name.startsWith('x-bind:') || name.startsWith('x-');
2297
+
2298
+ const classifyElement = (el) => {
2299
+ // Explicit data-hydrate — subtree-wide restoration.
2300
+ if (el.hasAttribute('data-hydrate')) return 'explicit';
2301
+
2302
+ const list = el.attributes;
2303
+ for (let i = 0; i < list.length; i++) {
2304
+ const name = list[i].name;
2305
+ const val = list[i].value;
2306
+
2307
+ if (INTERACTIVE_DIRECTIVES.has(name)) return 'interactive';
2308
+ if (isEventAttr(name)) return 'event';
2309
+ if (isDiffBindingAttr(name)) return 'diff-binding';
2310
+ if (isBindingAttr(name) && val && RUNTIME_MAGIC_RX.test(val)) return 'runtime-magic';
2311
+ }
2312
+ return null;
2313
+ };
2314
+
2315
+ // --- Walk: collect all hydrate targets ---
2316
+ const targets = new Set();
2317
+ const subtreeRoots = new Set(); // explicit roots — restore innerHTML too
2318
+ const all = document.body ? document.body.querySelectorAll('*') : [];
2319
+ all.forEach((el) => {
2320
+ const kind = classifyElement(el);
2321
+ if (!kind) return;
2322
+ if (kind === 'explicit') {
2323
+ subtreeRoots.add(el);
2324
+ targets.add(el);
2325
+ el.querySelectorAll('*').forEach((d) => targets.add(d));
2326
+ } else {
2327
+ targets.add(el);
2328
+ }
2303
2329
  });
2304
- items.sort((a, b) => b.depth - a.depth);
2305
-
2306
- const voidEls = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
2307
- const escAttr = (s) => String(s == null ? '' : s).replace(/&/g, '&amp;').replace(/"/g, '&quot;');
2308
-
2309
- items.forEach(({ id, attrs }) => {
2310
- // Re-resolve the element by id every iteration: ancestors that were
2311
- // already rebuilt will have re-parsed their children, so previous
2312
- // references are stale.
2313
- const el = document.querySelector(`[data-manifest-hyd-id="${id}"]`);
2314
- if (!el || !el.parentNode) { report.errors.push({ id, msg: 'lost reference' }); return; }
2315
- const tag = el.tagName.toLowerCase();
2316
- const attrString = Object.entries(attrs)
2317
- .map(([name, value]) => `${name}="${escAttr(value)}"`)
2318
- .join(' ');
2319
- const innerHTML = voidEls.has(tag) ? '' : el.innerHTML;
2320
- const newHTML = voidEls.has(tag)
2321
- ? `<${tag} ${attrString} data-prerender-hydrate="1">`
2322
- : `<${tag} ${attrString} data-prerender-hydrate="1">${innerHTML}</${tag}>`;
2323
- // Parse via a temporary container so we can use replaceChild (more
2324
- // reliable than outerHTML in nested-replace scenarios).
2325
- const tmp = document.createElement(el.parentNode.tagName === 'TR' ? 'tr' : 'div');
2326
- tmp.innerHTML = newHTML;
2327
- const parsed = tmp.firstElementChild;
2328
- if (!parsed) { report.errors.push({ id, msg: 'parse failed' }); return; }
2329
- try {
2330
- el.parentNode.replaceChild(parsed, el);
2331
- report.restored++;
2332
- } catch (e) {
2333
- report.errors.push({ id, tag, msg: String(e && e.message || e) });
2330
+
2331
+ // --- Build contract entries ---
2332
+ let nextId = 0;
2333
+ const entries = [];
2334
+ targets.forEach((el) => {
2335
+ const source = sourceAttrs.get(el);
2336
+ const attrsOut = {};
2337
+ let dirty = false;
2338
+
2339
+ // Collect attributes that DIVERGED from source. For each current
2340
+ // attribute: if the source recorded a different value (or absent),
2341
+ // we need to restore the source value.
2342
+ const currentAttrs = {};
2343
+ const list = el.attributes;
2344
+ for (let i = 0; i < list.length; i++) {
2345
+ currentAttrs[list[i].name] = list[i].value;
2334
2346
  }
2347
+
2348
+ if (source) {
2349
+ // For every attribute in source, check if current differs.
2350
+ for (const name in source) {
2351
+ if (name === 'data-hydrate-id') continue;
2352
+ const src = source[name];
2353
+ const cur = name in currentAttrs ? currentAttrs[name] : null;
2354
+ if (src !== cur) {
2355
+ attrsOut[name] = src; // may be null (means "remove this attribute")
2356
+ dirty = true;
2357
+ }
2358
+ }
2359
+ // For current attributes that weren't in source, remove them.
2360
+ for (const name in currentAttrs) {
2361
+ if (name === 'data-hydrate-id') continue;
2362
+ if (!(name in source)) {
2363
+ attrsOut[name] = null;
2364
+ dirty = true;
2365
+ }
2366
+ }
2367
+ }
2368
+ // If no source recorded and it's not an explicit subtree root, the
2369
+ // element had no mutations observed — no restoration needed.
2370
+
2371
+ const innerHTMLSource = sourceInnerHTML.get(el);
2372
+ let innerHTMLEntry;
2373
+ if (subtreeRoots.has(el) && innerHTMLSource !== undefined) {
2374
+ if (innerHTMLSource !== el.innerHTML) {
2375
+ innerHTMLEntry = innerHTMLSource;
2376
+ dirty = true;
2377
+ }
2378
+ }
2379
+
2380
+ if (!dirty) return;
2381
+
2382
+ const id = 'h' + nextId++;
2383
+ el.setAttribute('data-hydrate-id', id);
2384
+ const entry = { id, attrs: attrsOut };
2385
+ if (innerHTMLEntry !== undefined) entry.html = innerHTMLEntry;
2386
+ entries.push(entry);
2335
2387
  });
2336
2388
 
2337
- return report;
2389
+ return entries;
2338
2390
  });
2391
+ // Stash the contract on the route record for HTML injection later.
2392
+ // We carry it through as a string to avoid re-stringifying multiple times.
2393
+ const hydrationContractJSON = JSON.stringify(hydrationContractRaw || []);
2339
2394
  if (config.debugPrerender) {
2340
- pushDebug({ path: displayPath, stage: 'hydrate-restore', metrics: restoreReport });
2395
+ pushDebug({
2396
+ path: displayPath,
2397
+ stage: 'hydrate-contract',
2398
+ metrics: { entries: (hydrationContractRaw || []).length },
2399
+ });
2341
2400
  }
2342
2401
 
2343
2402
  // x-for lists: keep static lists in the HTML for SEO; collapse only dynamic lists so Alpine re-renders.
@@ -2642,6 +2701,19 @@ async function runPrerender(config) {
2642
2701
  });
2643
2702
 
2644
2703
  let html = await page.evaluate(() => document.documentElement.outerHTML);
2704
+ // Inject the hydration contract blob into the raw HTML *before* caching
2705
+ // it for locale variant generation, so every locale variant inherits the
2706
+ // same contract (locale substitution only mutates visible text, not the
2707
+ // JSON blob). The same injection happens again later in the Puppeteer
2708
+ // path after Node.js post-processing, but injecting early simplifies the
2709
+ // cache model: "raw HTML carries its own contract."
2710
+ if (hydrationContractJSON && hydrationContractJSON !== '[]') {
2711
+ const safe = hydrationContractJSON.replace(/<\/script/gi, '<\\/script');
2712
+ html = html.replace(
2713
+ '</body>',
2714
+ `<script type="application/json" id="__manifest_hydrate__">${safe}</script>\n</body>`
2715
+ );
2716
+ }
2645
2717
  // Cache raw DOM snapshot for locale variant generation (before any Node.js transforms).
2646
2718
  if (typeof onRawHtml === 'function') onRawHtml(pathSeg, html);
2647
2719
  if (config.debugPrerender) {
@@ -2679,11 +2751,7 @@ async function runPrerender(config) {
2679
2751
  html = stripRedundantImgSrcBindings(html);
2680
2752
  html = stripEmptyInlineMaskStyles(html);
2681
2753
  html = stripResolvedXIconDirectives(html);
2682
- // markPrerenderedManifestComponents must run BEFORE stripPrerenderHydrateMarkers so it can
2683
- // detect data-prerender-hydrate markers and skip components inside hydrate islands.
2684
2754
  html = markPrerenderedManifestComponents(html);
2685
- html = stripPrerenderHydrateMarkers(html);
2686
- html = stripPrerenderHydrateSnapshotIds(html);
2687
2755
  html = rewriteHtmlAssetPaths(html, fileSegments.length);
2688
2756
  const liveBase = config.liveUrl.replace(/\/$/, '');
2689
2757
  const canonicalHreflang = buildCanonicalAndHreflang(is404 ? '' : pathSeg, locales, defaultLocale, liveBase);
@@ -2702,6 +2770,8 @@ async function runPrerender(config) {
2702
2770
  '</head>',
2703
2771
  `${canonicalHreflang}${injectOgLocale ? ogLocale : ''}${routeMeta}${baseMeta}${prerenderedMeta}<meta name="manifest:router-base-depth" content="${routeDepth}">\n</head>`
2704
2772
  );
2773
+ // (Hydration contract was already injected into the raw HTML before
2774
+ // the Node.js post-processing pipeline ran, so it's already present.)
2705
2775
  mkdirSync(outDir, { recursive: true });
2706
2776
  writeFileSync(outFile, html, 'utf8');
2707
2777
  pushDebug({
@@ -2720,45 +2790,126 @@ async function runPrerender(config) {
2720
2790
  process.stderr.write(`prerender: failed ${displayPath}: ${failedPaths[failedPaths.length - 1].message}\n`);
2721
2791
  }
2722
2792
  } finally {
2723
- await page.close();
2793
+ try { await page.close(); } catch (_) { /* page may be gone if browser died */ }
2724
2794
  }
2725
2795
  }
2726
2796
 
2727
2797
  // Phase 1: Puppeteer — render base paths, cache raw DOM for substitution.
2728
2798
  // Any failures (e.g. transient navigation timeouts) are retried up to
2729
2799
  // `maxRetries` times with a short backoff before being reported as fatal.
2800
+ //
2801
+ // Browser recycling: after every `browserRecycleEvery` successful pages,
2802
+ // all workers pause, one worker closes the browser and launches a fresh
2803
+ // one, then all resume. This bounds Chromium's memory + handle growth.
2730
2804
  try {
2731
2805
  let index = 0;
2806
+ let activeWorkers = 0;
2807
+ const recycleGate = { resume: null, waitForZero: null };
2808
+
2809
+ const waitUntilZero = () => new Promise((resolve) => {
2810
+ if (activeWorkers === 0) return resolve();
2811
+ recycleGate.waitForZero = resolve;
2812
+ });
2813
+ const waitForResume = () => new Promise((resolve) => {
2814
+ if (!recycleLock.busy) return resolve();
2815
+ const prev = recycleGate.resume;
2816
+ recycleGate.resume = () => { if (prev) prev(); resolve(); };
2817
+ });
2818
+
2819
+ const maybeRecycleBrowser = async () => {
2820
+ if (browserRecycleEvery <= 0) return;
2821
+ if (pagesSinceRecycle < browserRecycleEvery) return;
2822
+ if (recycleLock.busy) return;
2823
+ recycleLock.busy = true;
2824
+ // Wait for all in-flight workers to finish their current page BEFORE
2825
+ // we gate `browserReadyPromise`, so workers already mid-processPath
2826
+ // don't deadlock awaiting a promise we haven't yet started.
2827
+ await waitUntilZero();
2828
+ // Now gate newPage() calls from any worker that enters processPath
2829
+ // after this point.
2830
+ let resolveReady;
2831
+ browserReadyPromise = new Promise((r) => { resolveReady = r; });
2832
+ try {
2833
+ process.stdout.write(`prerender: recycling browser (processed ${pagesSinceRecycle} pages)\n`);
2834
+ try { await browser.close(); } catch (_) {}
2835
+ browser = await launchBrowser();
2836
+ pagesSinceRecycle = 0;
2837
+ } finally {
2838
+ // Release the gate first so any waiting workers can proceed, then
2839
+ // clear the recycle lock so the outer while loop stops pausing.
2840
+ try { resolveReady(); } catch (_) {}
2841
+ recycleLock.busy = false;
2842
+ const r = recycleGate.resume;
2843
+ recycleGate.resume = null;
2844
+ if (r) r();
2845
+ }
2846
+ };
2847
+
2732
2848
  async function worker() {
2733
2849
  while (true) {
2850
+ // Pause if a recycle is underway.
2851
+ if (recycleLock.busy) await waitForResume();
2852
+ // Also wait for any pending browser readiness (e.g. another worker
2853
+ // started a recycle while we were processing).
2854
+ await browserReadyPromise;
2855
+
2734
2856
  const i = index++;
2735
2857
  if (i >= puppeteerPaths.length) return;
2736
2858
  const pathSeg = puppeteerPaths[i];
2737
2859
  let attempt = 0;
2738
2860
  while (true) {
2861
+ // Re-check recycle state at the start of every retry iteration.
2862
+ if (recycleLock.busy) await waitForResume();
2863
+ await browserReadyPromise;
2864
+
2739
2865
  const failureCountBefore = failedPaths.length;
2740
- await processPath(pathSeg, i, {
2741
- onRawHtml: (seg, html) => {
2742
- // Cache raw DOM snapshot for locale variant generation (NOT_FOUND_PATH excluded)
2743
- if (seg !== NOT_FOUND_PATH) baseHtmlCache.set(seg || '', html);
2744
- },
2745
- });
2746
- if (failedPaths.length === failureCountBefore) break; // success
2747
- if (attempt >= maxRetries) break; // out of retries — leave the failure recorded
2748
- // Pop the failure record and retry after a short backoff.
2866
+ activeWorkers++;
2867
+ try {
2868
+ await processPath(pathSeg, i, {
2869
+ onRawHtml: (seg, html) => {
2870
+ if (seg !== NOT_FOUND_PATH) baseHtmlCache.set(seg || '', html);
2871
+ },
2872
+ });
2873
+ } catch (err) {
2874
+ // Unexpected exception escaped processPath (e.g. browser died
2875
+ // mid-call). Record as a failure so the retry logic can handle
2876
+ // it gracefully instead of tearing down the whole worker.
2877
+ failedPaths.push({
2878
+ path: pathSeg === '' ? '/' : '/' + pathSeg,
2879
+ message: err && err.message ? err.message : String(err),
2880
+ });
2881
+ if (failedPaths.length <= 10) {
2882
+ process.stderr.write(`prerender: worker exception on ${pathSeg || '/'}: ${failedPaths[failedPaths.length - 1].message}\n`);
2883
+ }
2884
+ } finally {
2885
+ activeWorkers--;
2886
+ if (activeWorkers === 0 && recycleGate.waitForZero) {
2887
+ const z = recycleGate.waitForZero;
2888
+ recycleGate.waitForZero = null;
2889
+ z();
2890
+ }
2891
+ }
2892
+ if (failedPaths.length === failureCountBefore) {
2893
+ pagesSinceRecycle++;
2894
+ break; // success
2895
+ }
2896
+ if (attempt >= maxRetries) { pagesSinceRecycle++; break; }
2749
2897
  failedPaths.pop();
2750
2898
  attempt++;
2751
2899
  const displayPath = pathSeg === '' ? '/' : (pathSeg === NOT_FOUND_PATH ? '/__prerender_404__' : '/' + pathSeg);
2752
2900
  process.stderr.write(`prerender: retrying ${displayPath} (attempt ${attempt + 1}/${maxRetries + 1})\n`);
2753
2901
  await new Promise((r) => setTimeout(r, 500 * attempt));
2754
2902
  }
2903
+ // Attempt recycle after each completed path (only one worker will
2904
+ // actually perform the recycle; others will be gated by recycleLock).
2905
+ await maybeRecycleBrowser();
2755
2906
  }
2756
2907
  }
2757
2908
  await Promise.all(
2758
2909
  Array.from({ length: Math.min(concurrency, puppeteerPaths.length || 1) }, () => worker())
2759
2910
  );
2760
2911
  } finally {
2761
- await browser.close();
2912
+ try { await browser.close(); } catch (_) {}
2762
2913
  }
2763
2914
 
2764
2915
  // Phase 2: Node.js — generate locale variants via text substitution
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mnfst-render",
3
- "version": "0.5.3",
3
+ "version": "0.5.5",
4
4
  "description": "Render Manifest sites to static HTML for SEO",
5
5
  "type": "module",
6
6
  "bin": {