@turntrout/subfont 1.6.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +43 -43
  2. package/lib/FontTracerPool.d.ts +37 -0
  3. package/lib/FontTracerPool.d.ts.map +1 -0
  4. package/lib/FontTracerPool.js +212 -173
  5. package/lib/FontTracerPool.js.map +1 -0
  6. package/lib/HeadlessBrowser.js +11 -3
  7. package/lib/cli.d.ts +3 -0
  8. package/lib/cli.d.ts.map +1 -0
  9. package/lib/cli.js +15 -12
  10. package/lib/cli.js.map +1 -0
  11. package/lib/collectTextsByPage.js +425 -352
  12. package/lib/escapeJsStringLiteral.js +13 -0
  13. package/lib/extractVisibleText.js +6 -2
  14. package/lib/fontConverter.js +25 -0
  15. package/lib/fontConverterWorker.js +16 -0
  16. package/lib/fontFaceHelpers.js +16 -4
  17. package/lib/gatherStylesheetsWithPredicates.js +4 -5
  18. package/lib/normalizeFontPropertyValue.js +1 -1
  19. package/lib/sfntCache.js +10 -7
  20. package/lib/subfont.d.ts +33 -0
  21. package/lib/subfont.d.ts.map +1 -0
  22. package/lib/subfont.js +533 -591
  23. package/lib/subfont.js.map +1 -0
  24. package/lib/subsetFontWithGlyphs.d.ts +17 -0
  25. package/lib/subsetFontWithGlyphs.d.ts.map +1 -0
  26. package/lib/subsetFontWithGlyphs.js +231 -253
  27. package/lib/subsetFontWithGlyphs.js.map +1 -0
  28. package/lib/subsetFonts.d.ts +59 -0
  29. package/lib/subsetFonts.d.ts.map +1 -0
  30. package/lib/subsetFonts.js +921 -1180
  31. package/lib/subsetFonts.js.map +1 -0
  32. package/lib/subsetGeneration.d.ts +39 -0
  33. package/lib/subsetGeneration.d.ts.map +1 -0
  34. package/lib/subsetGeneration.js +294 -324
  35. package/lib/subsetGeneration.js.map +1 -0
  36. package/lib/unquote.js +9 -4
  37. package/lib/warnAboutMissingGlyphs.js +36 -25
  38. package/lib/wasmQueue.js +6 -2
  39. package/package.json +11 -3
@@ -41,7 +41,6 @@ const fontFaceTraversalTypes = new Set(['HtmlStyle', 'SvgStyle', 'CssImport']);
41
41
  const MIN_PAGES_FOR_WORKER_POOL = 4;
42
42
 
43
43
  const {
44
- extractFeatureTagsFromDecl,
45
44
  findFontFamiliesWithFeatureSettings,
46
45
  resolveFeatureSettings,
47
46
  } = require('./fontFeatureHelpers');
@@ -167,9 +166,10 @@ function computeSnappedGlobalEntries(declarations, globalTextByProps) {
167
166
  return entries;
168
167
  }
169
168
 
170
- // Build global font usage templates and per-page indices from
171
- // snapped entries. Mutates the declCache entry for declKey in place.
172
- function getOrComputeGlobalFontUsages(
169
+ // Fill in fontUsageTemplates/pageTextIndex/preloadIndex on the cached
170
+ // declarations entry. No-op on repeat calls results are shared across
171
+ // pages that resolve to the same @font-face set.
172
+ function populateGlobalFontUsages(
173
173
  cached,
174
174
  accumulatedFontFaceDeclarations,
175
175
  text
@@ -310,8 +310,9 @@ function getOrComputeGlobalFontUsages(
310
310
  cached.preloadIndex = textAndPropsToFontUrl;
311
311
  }
312
312
 
313
- // Extract font tracing (worker pool + sequential) to reduce
314
- // cyclomatic complexity of collectTextsByPage.
313
+ // Trace fonts across the given pages. Uses a worker pool when the workload
314
+ // justifies the thread-startup overhead; otherwise falls back to sequential
315
+ // in-process tracing (required when a HeadlessBrowser is driving things).
315
316
  async function tracePages(
316
317
  pagesNeedingFullTrace,
317
318
  {
@@ -411,16 +412,15 @@ async function tracePages(
411
412
  }
412
413
  }
413
414
 
414
- // Extract fast-path text extraction to reduce collectTextsByPage complexity.
415
- // Pages sharing the same CSS configuration reuse the representative's
416
- // props and only extract visible text content.
415
+ // For each page that shares a representative's CSS configuration, copy the
416
+ // representative's font-variant props and overlay this page's visible text.
417
+ // Returns the number of pages that had to fall back to a full trace
418
+ // (because inline style attributes made the fast path unsafe).
417
419
  function processFastPathPages(
418
420
  fastPathPages,
419
- { memoizedGetCssRulesByProperty, subTimings, trackPhase }
421
+ { memoizedGetCssRulesByProperty }
420
422
  ) {
421
- if (fastPathPages.length === 0) return;
422
-
423
- const fastPathPhase = trackPhase('Fast-path extraction');
423
+ if (fastPathPages.length === 0) return 0;
424
424
 
425
425
  const repDataCache = new Map();
426
426
  function getRepData(representativePd) {
@@ -528,234 +528,205 @@ function processFastPathPages(
528
528
  });
529
529
  }
530
530
  }
531
- subTimings['Fast-path extraction'] = fastPathPhase.end(
532
- `${fastPathPages.length} pages, ${fastPathFallbacks} fell back to full trace`
533
- );
531
+ return fastPathFallbacks;
534
532
  }
535
533
 
536
- async function collectTextsByPage(
537
- assetGraph,
538
- htmlOrSvgAssets,
539
- {
540
- text,
541
- console,
542
- dynamic = false,
543
- debug = false,
544
- concurrency,
545
- chromeArgs = [],
546
- } = {}
547
- ) {
548
- const htmlOrSvgAssetTextsWithProps = [];
549
-
550
- const memoizedGetCssRulesByProperty = memoizeSync(getCssRulesByProperty);
551
-
552
- const fontFaceDeclarationsByHtmlOrSvgAsset = new Map();
553
-
554
- // Cache stylesheet-dependent results for pages with identical CSS
555
- // configurations.
556
- const stylesheetResultCache = new Map();
557
-
558
- // Pre-build an index of stylesheet-related relations by source asset
559
- // to avoid repeated assetGraph.findRelations scans (O(allRelations) each).
560
- const stylesheetRelTypes = [
561
- 'HtmlStyle',
562
- 'SvgStyle',
563
- 'CssImport',
564
- 'HtmlConditionalComment',
565
- 'HtmlNoscript',
566
- ];
567
- const stylesheetRelsByFromAsset = new Map();
534
+ // Pre-build an index of stylesheet-related relations by source asset
535
+ // to avoid repeated assetGraph.findRelations scans (O(allRelations) each).
536
+ const STYLESHEET_REL_TYPES = [
537
+ 'HtmlStyle',
538
+ 'SvgStyle',
539
+ 'CssImport',
540
+ 'HtmlConditionalComment',
541
+ 'HtmlNoscript',
542
+ ];
543
+
544
+ function indexStylesheetRelations(assetGraph) {
545
+ const byFromAsset = new Map();
568
546
  for (const relation of assetGraph.findRelations({
569
- type: {
570
- $in: stylesheetRelTypes,
571
- },
547
+ type: { $in: STYLESHEET_REL_TYPES },
572
548
  })) {
573
- let arr = stylesheetRelsByFromAsset.get(relation.from);
549
+ let arr = byFromAsset.get(relation.from);
574
550
  if (!arr) {
575
551
  arr = [];
576
- stylesheetRelsByFromAsset.set(relation.from, arr);
552
+ byFromAsset.set(relation.from, arr);
577
553
  }
578
554
  arr.push(relation);
579
555
  }
556
+ return byFromAsset;
557
+ }
580
558
 
581
- // Build a cache key by traversing stylesheet relations, capturing
582
- // both asset identity and relation context (media, conditionalComment,
583
- // noscript) that affect gatherStylesheetsWithPredicates output.
584
- function buildStylesheetKey(htmlOrSvgAsset, skipNonFontInlineCss) {
585
- const keyParts = [];
586
- const visited = new Set();
587
- (function traverse(asset, isNoscript) {
588
- if (visited.has(asset)) return;
589
- if (!asset.isLoaded) return;
590
- visited.add(asset);
591
- for (const relation of stylesheetRelsByFromAsset.get(asset) || []) {
592
- if (relation.type === 'HtmlNoscript') {
593
- traverse(relation.to, true);
594
- } else if (relation.type === 'HtmlConditionalComment') {
595
- keyParts.push(`cc:${relation.condition}`);
596
- traverse(relation.to, isNoscript);
597
- } else {
598
- const target = relation.to;
599
- if (
600
- skipNonFontInlineCss &&
601
- target.isInline &&
602
- target.type === 'Css' &&
603
- !fontRelevantCssRegex.test(target.text || '')
604
- ) {
605
- continue;
606
- }
607
- const media = relation.media || '';
608
- keyParts.push(`${target.id}:${media}:${isNoscript ? 'ns' : ''}`);
609
- traverse(target, isNoscript);
559
+ // Build a cache key by traversing stylesheet relations, capturing
560
+ // both asset identity and relation context (media, conditionalComment,
561
+ // noscript) that affect gatherStylesheetsWithPredicates output.
562
+ function buildStylesheetKey(
563
+ htmlOrSvgAsset,
564
+ skipNonFontInlineCss,
565
+ stylesheetRelsByFromAsset
566
+ ) {
567
+ const keyParts = [];
568
+ const visited = new Set();
569
+ (function traverse(asset, isNoscript) {
570
+ if (visited.has(asset)) return;
571
+ if (!asset.isLoaded) return;
572
+ visited.add(asset);
573
+ for (const relation of stylesheetRelsByFromAsset.get(asset) || []) {
574
+ if (relation.type === 'HtmlNoscript') {
575
+ traverse(relation.to, true);
576
+ } else if (relation.type === 'HtmlConditionalComment') {
577
+ keyParts.push(`cc:${relation.condition}`);
578
+ traverse(relation.to, isNoscript);
579
+ } else {
580
+ const target = relation.to;
581
+ if (
582
+ skipNonFontInlineCss &&
583
+ target.isInline &&
584
+ target.type === 'Css' &&
585
+ !fontRelevantCssRegex.test(target.text || '')
586
+ ) {
587
+ continue;
610
588
  }
589
+ const media = relation.media || '';
590
+ keyParts.push(`${target.id}:${media}:${isNoscript ? 'ns' : ''}`);
591
+ traverse(target, isNoscript);
611
592
  }
612
- })(htmlOrSvgAsset, false);
613
- return keyParts.join('\x1d');
614
- }
615
-
616
- function getOrComputeStylesheetResults(htmlOrSvgAsset) {
617
- const key = buildStylesheetKey(htmlOrSvgAsset);
618
- if (stylesheetResultCache.has(key)) {
619
- return stylesheetResultCache.get(key);
620
593
  }
594
+ })(htmlOrSvgAsset, false);
595
+ return keyParts.join('\x1d');
596
+ }
621
597
 
622
- const stylesheetsWithPredicates = gatherStylesheetsWithPredicates(
623
- htmlOrSvgAsset.assetGraph,
624
- htmlOrSvgAsset,
625
- stylesheetRelsByFromAsset
626
- );
598
+ // Walk the stylesheet graph rooted at htmlOrSvgAsset and collect every
599
+ // @font-face declaration into a flat list, preserving the CSS relation node
600
+ // so callers can correlate declarations back to their source rules.
601
+ function collectFontFaceDeclarations(
602
+ htmlOrSvgAsset,
603
+ stylesheetRelsByFromAsset
604
+ ) {
605
+ const accumulatedFontFaceDeclarations = [];
606
+ const visitedAssets = new Set();
607
+ (function traverseForFontFace(asset) {
608
+ if (visitedAssets.has(asset)) return;
609
+ visitedAssets.add(asset);
610
+
611
+ if (asset.type === 'Css' && asset.isLoaded) {
612
+ const seenNodes = new Set();
613
+ const fontRelations = asset.outgoingRelations.filter(
614
+ (relation) => relation.type === 'CssFontFaceSrc'
615
+ );
627
616
 
628
- // Compute accumulatedFontFaceDeclarations by traversing CSS relations
629
- const accumulatedFontFaceDeclarations = [];
630
- {
631
- const visitedAssets = new Set();
632
- (function traverseForFontFace(asset) {
633
- if (visitedAssets.has(asset)) return;
634
- visitedAssets.add(asset);
635
-
636
- if (asset.type === 'Css' && asset.isLoaded) {
637
- const seenNodes = new Set();
638
- const fontRelations = asset.outgoingRelations.filter(
639
- (relation) => relation.type === 'CssFontFaceSrc'
640
- );
617
+ for (const fontRelation of fontRelations) {
618
+ const node = fontRelation.node;
619
+ if (seenNodes.has(node)) continue;
620
+ seenNodes.add(node);
641
621
 
642
- for (const fontRelation of fontRelations) {
643
- const node = fontRelation.node;
644
- if (seenNodes.has(node)) continue;
645
- seenNodes.add(node);
646
-
647
- const fontFaceDeclaration = {
648
- relations: fontRelations.filter((r) => r.node === node),
649
- ...initialValueByProp,
650
- };
651
-
652
- node.walkDecls((declaration) => {
653
- const propName = declaration.prop.toLowerCase();
654
- fontFaceDeclaration[propName] =
655
- propName === 'font-family'
656
- ? cssFontParser.parseFontFamily(declaration.value)[0]
657
- : declaration.value;
658
- });
659
- // Disregard incomplete @font-face declarations (must contain font-family and src per spec):
660
- if (fontFaceDeclaration['font-family'] && fontFaceDeclaration.src) {
661
- accumulatedFontFaceDeclarations.push(fontFaceDeclaration);
662
- }
663
- }
664
- }
622
+ const fontFaceDeclaration = {
623
+ relations: fontRelations.filter((r) => r.node === node),
624
+ ...initialValueByProp,
625
+ };
665
626
 
666
- // Traverse children using the pre-built index
667
- const rels = stylesheetRelsByFromAsset.get(asset) || [];
668
- for (const rel of rels) {
669
- if (
670
- fontFaceTraversalTypes.has(rel.type) ||
671
- (rel.to && rel.to.type === 'Html' && rel.to.isInline)
672
- ) {
673
- traverseForFontFace(rel.to);
674
- }
627
+ node.walkDecls((declaration) => {
628
+ const propName = declaration.prop.toLowerCase();
629
+ fontFaceDeclaration[propName] =
630
+ propName === 'font-family'
631
+ ? cssFontParser.parseFontFamily(declaration.value)[0]
632
+ : declaration.value;
633
+ });
634
+ // Disregard incomplete @font-face declarations (must contain font-family and src per spec):
635
+ if (fontFaceDeclaration['font-family'] && fontFaceDeclaration.src) {
636
+ accumulatedFontFaceDeclarations.push(fontFaceDeclaration);
675
637
  }
676
- })(htmlOrSvgAsset);
677
- }
678
-
679
- // Group @font-face declarations that share family/style/weight but have
680
- // different unicode-range values. Each group's members cover a disjoint
681
- // subset of the Unicode space (common for CJK / large character-set fonts).
682
- const comboGroups = new Map();
683
- for (const fontFace of accumulatedFontFaceDeclarations) {
684
- const comboKey = `${fontFace['font-family']}/${fontFace['font-style']}/${fontFace['font-weight']}`;
685
- if (!comboGroups.has(comboKey)) comboGroups.set(comboKey, []);
686
- comboGroups.get(comboKey).push(fontFace);
687
- }
688
- for (const [comboKey, group] of comboGroups) {
689
- if (group.length <= 1) continue;
690
- const withoutRange = group.filter((d) => !d['unicode-range']);
691
- if (withoutRange.length > 0) {
692
- throw new Error(
693
- `Multiple @font-face with the same font-family/font-style/font-weight combo but missing unicode-range on ${withoutRange.length} of ${group.length} declarations: ${comboKey}`
694
- );
695
638
  }
696
639
  }
697
640
 
698
- const featureTagsByFamily = new Map();
699
- const fontFamiliesWithFeatureSettings = findFontFamiliesWithFeatureSettings(
700
- stylesheetsWithPredicates,
701
- featureTagsByFamily
702
- );
641
+ const rels = stylesheetRelsByFromAsset.get(asset) || [];
642
+ for (const rel of rels) {
643
+ if (
644
+ fontFaceTraversalTypes.has(rel.type) ||
645
+ (rel.to && rel.to.type === 'Html' && rel.to.isInline)
646
+ ) {
647
+ traverseForFontFace(rel.to);
648
+ }
649
+ }
650
+ })(htmlOrSvgAsset);
651
+ return accumulatedFontFaceDeclarations;
652
+ }
703
653
 
704
- const result = {
705
- accumulatedFontFaceDeclarations,
706
- stylesheetsWithPredicates,
707
- fontFamiliesWithFeatureSettings,
708
- featureTagsByFamily,
709
- fastPathKey: buildStylesheetKey(htmlOrSvgAsset, true),
710
- };
711
- stylesheetResultCache.set(key, result);
712
- return result;
654
+ // Validate that @font-face declarations sharing family/style/weight carry
655
+ // disjoint unicode-range values; throws on incomplete coverage.
656
+ function validateFontFaceComboCoverage(accumulatedFontFaceDeclarations) {
657
+ const comboGroups = new Map();
658
+ for (const fontFace of accumulatedFontFaceDeclarations) {
659
+ const comboKey = `${fontFace['font-family']}/${fontFace['font-style']}/${fontFace['font-weight']}`;
660
+ if (!comboGroups.has(comboKey)) comboGroups.set(comboKey, []);
661
+ comboGroups.get(comboKey).push(fontFace);
662
+ }
663
+ for (const [comboKey, group] of comboGroups) {
664
+ if (group.length <= 1) continue;
665
+ const withoutRange = group.filter((d) => !d['unicode-range']);
666
+ if (withoutRange.length > 0) {
667
+ throw new Error(
668
+ `Multiple @font-face with the same font-family/font-style/font-weight combo but missing unicode-range on ${withoutRange.length} of ${group.length} declarations: ${comboKey}`
669
+ );
670
+ }
713
671
  }
672
+ }
714
673
 
715
- const headlessBrowser =
716
- dynamic && new HeadlessBrowser({ console, chromeArgs });
717
- const globalTextByProps = [];
718
- const subTimings = {};
674
+ function computeStylesheetResults(htmlOrSvgAsset, stylesheetRelsByFromAsset) {
675
+ const stylesheetsWithPredicates = gatherStylesheetsWithPredicates(
676
+ htmlOrSvgAsset.assetGraph,
677
+ htmlOrSvgAsset,
678
+ stylesheetRelsByFromAsset
679
+ );
719
680
 
720
- const trackPhase = makePhaseTracker(console, debug);
721
- const overallPhase = trackPhase('collectTextsByPage');
681
+ const accumulatedFontFaceDeclarations = collectFontFaceDeclarations(
682
+ htmlOrSvgAsset,
683
+ stylesheetRelsByFromAsset
684
+ );
685
+ validateFontFaceComboCoverage(accumulatedFontFaceDeclarations);
722
686
 
723
- // Pre-compute stylesheet results for all pages
724
- const stylesheetPrecompute = trackPhase('Stylesheet precompute');
725
- const pageData = [];
726
- for (const htmlOrSvgAsset of htmlOrSvgAssets) {
727
- const {
728
- accumulatedFontFaceDeclarations,
729
- stylesheetsWithPredicates,
730
- fontFamiliesWithFeatureSettings,
731
- featureTagsByFamily,
732
- fastPathKey,
733
- } = getOrComputeStylesheetResults(htmlOrSvgAsset);
734
- fontFaceDeclarationsByHtmlOrSvgAsset.set(
687
+ const featureTagsByFamily = new Map();
688
+ const fontFamiliesWithFeatureSettings = findFontFamiliesWithFeatureSettings(
689
+ stylesheetsWithPredicates,
690
+ featureTagsByFamily
691
+ );
692
+
693
+ return {
694
+ accumulatedFontFaceDeclarations,
695
+ stylesheetsWithPredicates,
696
+ fontFamiliesWithFeatureSettings,
697
+ featureTagsByFamily,
698
+ fastPathKey: buildStylesheetKey(
735
699
  htmlOrSvgAsset,
736
- accumulatedFontFaceDeclarations
737
- );
700
+ true,
701
+ stylesheetRelsByFromAsset
702
+ ),
703
+ };
704
+ }
738
705
 
739
- if (accumulatedFontFaceDeclarations.length === 0) {
740
- continue;
741
- }
706
+ // Strip `-subfont-text` nodes from CSS @font-face declarations once the
707
+ // subset planning is done, so they don't leak to the rendered output.
708
+ function stripSubfontTextNodes(fontFaceDeclarationsByHtmlOrSvgAsset) {
709
+ for (const fontFaceDeclarations of fontFaceDeclarationsByHtmlOrSvgAsset.values()) {
710
+ for (const fontFaceDeclaration of fontFaceDeclarations) {
711
+ const firstRelation = fontFaceDeclaration.relations[0];
712
+ const subfontTextNode = firstRelation.node.nodes.find(
713
+ (childNode) =>
714
+ childNode.type === 'decl' &&
715
+ childNode.prop.toLowerCase() === '-subfont-text'
716
+ );
742
717
 
743
- pageData.push({
744
- htmlOrSvgAsset,
745
- accumulatedFontFaceDeclarations,
746
- stylesheetsWithPredicates,
747
- fontFamiliesWithFeatureSettings,
748
- featureTagsByFamily,
749
- stylesheetCacheKey: fastPathKey,
750
- });
718
+ if (subfontTextNode) {
719
+ subfontTextNode.remove();
720
+ firstRelation.from.markDirty();
721
+ }
722
+ }
751
723
  }
724
+ }
752
725
 
753
- subTimings['Stylesheet precompute'] = stylesheetPrecompute.end(
754
- `${pageData.length} pages with fonts`
755
- );
756
-
757
- // Group pages by stylesheet cache key — pages sharing the same CSS
758
- // configuration produce identical font-tracer props, only text differs.
726
+ // Split trace work: with a headless browser every page needs a full trace
727
+ // (dynamic content); otherwise one representative per stylesheet group is
728
+ // traced and the rest use fast-path text extraction.
729
+ function planTracing(pageData, hasHeadlessBrowser) {
759
730
  const pagesByStylesheetKey = new Map();
760
731
  for (const pd of pageData) {
761
732
  let group = pagesByStylesheetKey.get(pd.stylesheetCacheKey);
@@ -768,7 +739,7 @@ async function collectTextsByPage(
768
739
 
769
740
  const pagesNeedingFullTrace = [];
770
741
  const fastPathPages = [];
771
- if (headlessBrowser) {
742
+ if (hasHeadlessBrowser) {
772
743
  for (const pd of pageData) {
773
744
  pagesNeedingFullTrace.push(pd);
774
745
  }
@@ -782,91 +753,37 @@ async function collectTextsByPage(
782
753
  }
783
754
  }
784
755
 
785
- // Always surface the per-page work breakdown so users can tell at a
786
- // glance how much of the run is actual tracing vs cheap CSS-group
787
- // reuse. The threshold matches createPageProgress's minTotal so it
788
- // only appears on non-trivial runs.
789
- if (console && pageData.length >= 5) {
790
- console.log(
791
- ` ${pageData.length} pages with fonts: ${pagesNeedingFullTrace.length} to trace, ${fastPathPages.length} via cached CSS group (${pagesByStylesheetKey.size} unique groups)`
792
- );
793
- }
794
-
795
- const tracingStart = Date.now();
796
- const fullTracing = trackPhase(
797
- `Full tracing (${pagesNeedingFullTrace.length} pages)`
798
- );
799
- try {
800
- await tracePages(pagesNeedingFullTrace, {
801
- headlessBrowser,
802
- concurrency,
803
- console,
804
- memoizedGetCssRulesByProperty,
805
- debug,
806
- });
807
-
808
- subTimings['Full tracing'] = fullTracing.end();
809
-
810
- processFastPathPages(fastPathPages, {
811
- memoizedGetCssRulesByProperty,
812
- subTimings,
813
- trackPhase,
814
- });
815
-
816
- const assemblePhase = trackPhase('Result assembly');
817
- for (const pd of pageData) {
818
- for (const textByPropsEntry of pd.textByProps) {
819
- textByPropsEntry.htmlOrSvgAsset = pd.htmlOrSvgAsset;
820
- }
821
- // Use a loop instead of push(...spread) to avoid stack overflow on large sites
822
- for (const entry of pd.textByProps) {
823
- globalTextByProps.push(entry);
824
- }
825
- htmlOrSvgAssetTextsWithProps.push({
826
- htmlOrSvgAsset: pd.htmlOrSvgAsset,
827
- textByProps: pd.textByProps,
828
- accumulatedFontFaceDeclarations: pd.accumulatedFontFaceDeclarations,
829
- fontFamiliesWithFeatureSettings: pd.fontFamiliesWithFeatureSettings,
830
- featureTagsByFamily: pd.featureTagsByFamily,
831
- });
832
- }
833
- subTimings['Result assembly'] = assemblePhase.end();
834
- if (debug && console) {
835
- console.log(
836
- `[subfont timing] Total tracing+extraction+assembly: ${
837
- Date.now() - tracingStart
838
- }ms`
839
- );
840
- }
841
- } finally {
842
- if (headlessBrowser) {
843
- await headlessBrowser.close();
844
- }
845
- }
846
-
847
- const postProcessPhase = trackPhase('Post-processing total');
756
+ return {
757
+ pagesNeedingFullTrace,
758
+ fastPathPages,
759
+ uniqueGroupCount: pagesByStylesheetKey.size,
760
+ };
761
+ }
848
762
 
849
- // Consolidated cache for per-declarations-key data.
763
+ // Iterate every traced page, snap its text against the @font-face set, and
764
+ // emit fully-formed per-page fontUsages (one entry per font URL + props).
765
+ // Caching is per declarations-key (declCache) and per raw pageText
766
+ // (uniqueCharsCache) so sites with many similar pages stay linear.
767
+ function buildPerPageFontUsages(
768
+ htmlOrSvgAssetTextsWithProps,
769
+ globalTextByProps,
770
+ text
771
+ ) {
850
772
  const declCache = new Map();
851
-
852
- const perPageLoopPhase = trackPhase('Per-page loop');
773
+ const uniqueCharsCache = new Map();
853
774
  let snappingTime = 0;
854
775
  let globalUsageTime = 0;
855
-
856
- // Cache uniqueChars results by raw pageText string to avoid recomputing
857
- const uniqueCharsCache = new Map();
858
776
  let cloningTime = 0;
859
777
 
860
- for (const htmlOrSvgAssetTextsWithPropsEntry of htmlOrSvgAssetTextsWithProps) {
778
+ for (const entry of htmlOrSvgAssetTextsWithProps) {
861
779
  const {
862
780
  htmlOrSvgAsset,
863
781
  textByProps,
864
782
  accumulatedFontFaceDeclarations,
865
783
  fontFamiliesWithFeatureSettings,
866
784
  featureTagsByFamily,
867
- } = htmlOrSvgAssetTextsWithPropsEntry;
785
+ } = entry;
868
786
 
869
- // Get or compute the snapped global entries for this declarations set
870
787
  const declKey = getDeclarationsKey(accumulatedFontFaceDeclarations);
871
788
  if (!declCache.has(declKey)) {
872
789
  const snapStart = Date.now();
@@ -882,98 +799,260 @@ async function collectTextsByPage(
882
799
  snappingTime += Date.now() - snapStart;
883
800
  }
884
801
 
885
- // Precompute global font usage templates and indices once per declarations key
886
802
  const declCacheEntry = declCache.get(declKey);
887
803
  const globalUsageStart = Date.now();
888
- getOrComputeGlobalFontUsages(
804
+ populateGlobalFontUsages(
889
805
  declCacheEntry,
890
806
  accumulatedFontFaceDeclarations,
891
807
  text
892
808
  );
893
809
  globalUsageTime += Date.now() - globalUsageStart;
894
810
 
895
- const fontUsageTemplates = declCacheEntry.fontUsageTemplates;
896
- const pageTextIndex = declCacheEntry.pageTextIndex;
897
- const textAndPropsToFontUrl = declCacheEntry.preloadIndex;
811
+ const {
812
+ fontUsageTemplates,
813
+ pageTextIndex,
814
+ preloadIndex: textAndPropsToFontUrl,
815
+ } = declCacheEntry;
898
816
 
899
- // Compute preload per fontUrl using inverted index
900
817
  const preloadFontUrls = new Set();
901
- for (const entry of textByProps) {
902
- const fontUrl = textAndPropsToFontUrl.get(entry);
818
+ for (const textByPropsEntry of textByProps) {
819
+ const fontUrl = textAndPropsToFontUrl.get(textByPropsEntry);
903
820
  if (fontUrl) {
904
821
  preloadFontUrls.add(fontUrl);
905
822
  }
906
823
  }
907
824
 
908
- // Build per-page fontUsages from precomputed templates
909
825
  const cloneStart = Date.now();
910
826
  const assetTexts = pageTextIndex.get(htmlOrSvgAsset);
911
- htmlOrSvgAssetTextsWithPropsEntry.fontUsages = fontUsageTemplates.map(
912
- (template) => {
913
- const pageTexts = assetTexts
914
- ? assetTexts.get(template.fontUrl)
915
- : undefined;
916
- let pageTextStr = pageTexts ? pageTexts.join('') : '';
917
- if (template.extraTextsStr) {
918
- pageTextStr += template.extraTextsStr;
919
- }
827
+ entry.fontUsages = fontUsageTemplates.map((template) => {
828
+ const pageTexts = assetTexts
829
+ ? assetTexts.get(template.fontUrl)
830
+ : undefined;
831
+ let pageTextStr = pageTexts ? pageTexts.join('') : '';
832
+ if (template.extraTextsStr) {
833
+ pageTextStr += template.extraTextsStr;
834
+ }
920
835
 
921
- let pageTextUnique = uniqueCharsCache.get(pageTextStr);
922
- if (pageTextUnique === undefined) {
923
- pageTextUnique = uniqueChars(pageTextStr);
924
- uniqueCharsCache.set(pageTextStr, pageTextUnique);
925
- }
836
+ let pageTextUnique = uniqueCharsCache.get(pageTextStr);
837
+ if (pageTextUnique === undefined) {
838
+ pageTextUnique = uniqueChars(pageTextStr);
839
+ uniqueCharsCache.set(pageTextStr, pageTextUnique);
840
+ }
926
841
 
927
- const { hasFontFeatureSettings, fontFeatureTags } =
928
- resolveFeatureSettings(
929
- template.fontFamilies,
930
- fontFamiliesWithFeatureSettings,
931
- featureTagsByFamily
932
- );
842
+ const { hasFontFeatureSettings, fontFeatureTags } =
843
+ resolveFeatureSettings(
844
+ template.fontFamilies,
845
+ fontFamiliesWithFeatureSettings,
846
+ featureTagsByFamily
847
+ );
933
848
 
934
- return {
935
- smallestOriginalSize: template.smallestOriginalSize,
936
- smallestOriginalFormat: template.smallestOriginalFormat,
937
- texts: template.texts,
938
- pageText: pageTextUnique,
939
- text: template.text,
940
- props: { ...template.props },
941
- fontUrl: template.fontUrl,
942
- fontFamilies: template.fontFamilies,
943
- fontStyles: template.fontStyles,
944
- fontStretches: template.fontStretches,
945
- fontWeights: template.fontWeights,
946
- fontVariationSettings: template.fontVariationSettings,
947
- preload: preloadFontUrls.has(template.fontUrl),
948
- hasFontFeatureSettings,
949
- fontFeatureTags,
950
- };
951
- }
952
- );
849
+ return {
850
+ smallestOriginalSize: template.smallestOriginalSize,
851
+ smallestOriginalFormat: template.smallestOriginalFormat,
852
+ texts: template.texts,
853
+ pageText: pageTextUnique,
854
+ text: template.text,
855
+ props: { ...template.props },
856
+ fontUrl: template.fontUrl,
857
+ fontFamilies: template.fontFamilies,
858
+ fontStyles: template.fontStyles,
859
+ fontStretches: template.fontStretches,
860
+ fontWeights: template.fontWeights,
861
+ fontVariationSettings: template.fontVariationSettings,
862
+ preload: preloadFontUrls.has(template.fontUrl),
863
+ hasFontFeatureSettings,
864
+ fontFeatureTags,
865
+ };
866
+ });
953
867
  cloningTime += Date.now() - cloneStart;
954
868
  }
955
869
 
870
+ return { snappingTime, globalUsageTime, cloningTime };
871
+ }
872
+
873
+ // Run computeStylesheetResults once per page, memoizing the result across
874
+ // pages that resolve to the same set of stylesheets. Pages without any
875
+ // @font-face declarations are recorded in the declarations map but skipped
876
+ // from pageData (nothing to trace or subset for them).
877
+ function precomputeStylesheetsForPages(
878
+ htmlOrSvgAssets,
879
+ stylesheetRelsByFromAsset,
880
+ fontFaceDeclarationsByHtmlOrSvgAsset
881
+ ) {
882
+ const stylesheetResultCache = new Map();
883
+ const pageData = [];
884
+
885
+ for (const htmlOrSvgAsset of htmlOrSvgAssets) {
886
+ const key = buildStylesheetKey(
887
+ htmlOrSvgAsset,
888
+ false,
889
+ stylesheetRelsByFromAsset
890
+ );
891
+ let result = stylesheetResultCache.get(key);
892
+ if (!result) {
893
+ result = computeStylesheetResults(
894
+ htmlOrSvgAsset,
895
+ stylesheetRelsByFromAsset
896
+ );
897
+ stylesheetResultCache.set(key, result);
898
+ }
899
+
900
+ fontFaceDeclarationsByHtmlOrSvgAsset.set(
901
+ htmlOrSvgAsset,
902
+ result.accumulatedFontFaceDeclarations
903
+ );
904
+
905
+ if (result.accumulatedFontFaceDeclarations.length === 0) {
906
+ continue;
907
+ }
908
+
909
+ pageData.push({
910
+ htmlOrSvgAsset,
911
+ accumulatedFontFaceDeclarations: result.accumulatedFontFaceDeclarations,
912
+ stylesheetsWithPredicates: result.stylesheetsWithPredicates,
913
+ fontFamiliesWithFeatureSettings: result.fontFamiliesWithFeatureSettings,
914
+ featureTagsByFamily: result.featureTagsByFamily,
915
+ stylesheetCacheKey: result.fastPathKey,
916
+ });
917
+ }
918
+
919
+ return pageData;
920
+ }
921
+
922
+ // Flatten traced per-page textByProps into a single globalTextByProps array,
923
+ // tagging each entry with its owning asset so downstream code can map text
924
+ // back to the page that rendered it.
925
+ function flattenTracedPagesIntoGlobal(
926
+ pageData,
927
+ htmlOrSvgAssetTextsWithProps,
928
+ globalTextByProps
929
+ ) {
930
+ for (const pd of pageData) {
931
+ for (const textByPropsEntry of pd.textByProps) {
932
+ textByPropsEntry.htmlOrSvgAsset = pd.htmlOrSvgAsset;
933
+ }
934
+ // Use a loop instead of push(...spread) to avoid stack overflow on large sites
935
+ for (const entry of pd.textByProps) {
936
+ globalTextByProps.push(entry);
937
+ }
938
+ htmlOrSvgAssetTextsWithProps.push({
939
+ htmlOrSvgAsset: pd.htmlOrSvgAsset,
940
+ textByProps: pd.textByProps,
941
+ accumulatedFontFaceDeclarations: pd.accumulatedFontFaceDeclarations,
942
+ fontFamiliesWithFeatureSettings: pd.fontFamiliesWithFeatureSettings,
943
+ featureTagsByFamily: pd.featureTagsByFamily,
944
+ });
945
+ }
946
+ }
947
+
948
+ async function collectTextsByPage(
949
+ assetGraph,
950
+ htmlOrSvgAssets,
951
+ {
952
+ text,
953
+ console,
954
+ dynamic = false,
955
+ debug = false,
956
+ concurrency,
957
+ chromeArgs = [],
958
+ } = {}
959
+ ) {
960
+ const htmlOrSvgAssetTextsWithProps = [];
961
+ const memoizedGetCssRulesByProperty = memoizeSync(getCssRulesByProperty);
962
+ const fontFaceDeclarationsByHtmlOrSvgAsset = new Map();
963
+ const stylesheetRelsByFromAsset = indexStylesheetRelations(assetGraph);
964
+
965
+ const headlessBrowser =
966
+ dynamic && new HeadlessBrowser({ console, chromeArgs });
967
+ const globalTextByProps = [];
968
+ const subTimings = {};
969
+
970
+ const trackPhase = makePhaseTracker(console, debug);
971
+ const overallPhase = trackPhase('collectTextsByPage');
972
+
973
+ const stylesheetPrecompute = trackPhase('Stylesheet precompute');
974
+ const pageData = precomputeStylesheetsForPages(
975
+ htmlOrSvgAssets,
976
+ stylesheetRelsByFromAsset,
977
+ fontFaceDeclarationsByHtmlOrSvgAsset
978
+ );
979
+ subTimings['Stylesheet precompute'] = stylesheetPrecompute.end(
980
+ `${pageData.length} pages with fonts`
981
+ );
982
+
983
+ // Pages sharing the same CSS configuration produce identical font-tracer
984
+ // props, only text differs — so we trace one representative and fast-path
985
+ // the rest. With --dynamic every page is traced individually.
986
+ const { pagesNeedingFullTrace, fastPathPages, uniqueGroupCount } =
987
+ planTracing(pageData, Boolean(headlessBrowser));
988
+
989
+ // Always surface the per-page work breakdown so users can tell at a
990
+ // glance how much of the run is actual tracing vs cheap CSS-group
991
+ // reuse. The threshold matches createPageProgress's minTotal so it
992
+ // only appears on non-trivial runs.
993
+ if (console && pageData.length >= 5) {
994
+ console.log(
995
+ ` ${pageData.length} pages with fonts: ${pagesNeedingFullTrace.length} to trace, ${fastPathPages.length} via cached CSS group (${uniqueGroupCount} unique groups)`
996
+ );
997
+ }
998
+
999
+ const tracingStart = Date.now();
1000
+ const fullTracing = trackPhase(
1001
+ `Full tracing (${pagesNeedingFullTrace.length} pages)`
1002
+ );
1003
+ try {
1004
+ await tracePages(pagesNeedingFullTrace, {
1005
+ headlessBrowser,
1006
+ concurrency,
1007
+ console,
1008
+ memoizedGetCssRulesByProperty,
1009
+ debug,
1010
+ });
1011
+
1012
+ subTimings['Full tracing'] = fullTracing.end();
1013
+
1014
+ const fastPathPhase = trackPhase('Fast-path extraction');
1015
+ const fastPathFallbacks = processFastPathPages(fastPathPages, {
1016
+ memoizedGetCssRulesByProperty,
1017
+ });
1018
+ subTimings['Fast-path extraction'] = fastPathPhase.end(
1019
+ `${fastPathPages.length} pages, ${fastPathFallbacks} fell back to full trace`
1020
+ );
1021
+
1022
+ const assemblePhase = trackPhase('Result assembly');
1023
+ flattenTracedPagesIntoGlobal(
1024
+ pageData,
1025
+ htmlOrSvgAssetTextsWithProps,
1026
+ globalTextByProps
1027
+ );
1028
+ subTimings['Result assembly'] = assemblePhase.end();
1029
+ if (debug && console) {
1030
+ console.log(
1031
+ `[subfont timing] Total tracing+extraction+assembly: ${
1032
+ Date.now() - tracingStart
1033
+ }ms`
1034
+ );
1035
+ }
1036
+ } finally {
1037
+ if (headlessBrowser) {
1038
+ await headlessBrowser.close();
1039
+ }
1040
+ }
1041
+
1042
+ const postProcessPhase = trackPhase('Post-processing total');
1043
+ const perPageLoopPhase = trackPhase('Per-page loop');
1044
+ const { snappingTime, globalUsageTime, cloningTime } = buildPerPageFontUsages(
1045
+ htmlOrSvgAssetTextsWithProps,
1046
+ globalTextByProps,
1047
+ text
1048
+ );
956
1049
  subTimings['Per-page loop'] = perPageLoopPhase.end(
957
1050
  `snapping: ${snappingTime}ms, globalUsage: ${globalUsageTime}ms, cloning: ${cloningTime}ms`
958
1051
  );
959
1052
  subTimings['Post-processing total'] = postProcessPhase.end();
960
1053
  overallPhase.end();
961
1054
 
962
- for (const fontFaceDeclarations of fontFaceDeclarationsByHtmlOrSvgAsset.values()) {
963
- for (const fontFaceDeclaration of fontFaceDeclarations) {
964
- const firstRelation = fontFaceDeclaration.relations[0];
965
- const subfontTextNode = firstRelation.node.nodes.find(
966
- (childNode) =>
967
- childNode.type === 'decl' &&
968
- childNode.prop.toLowerCase() === '-subfont-text'
969
- );
970
-
971
- if (subfontTextNode) {
972
- subfontTextNode.remove();
973
- firstRelation.from.markDirty();
974
- }
975
- }
976
- }
1055
+ stripSubfontTextNodes(fontFaceDeclarationsByHtmlOrSvgAsset);
977
1056
  return {
978
1057
  htmlOrSvgAssetTextsWithProps,
979
1058
  fontFaceDeclarationsByHtmlOrSvgAsset,
@@ -982,9 +1061,3 @@ async function collectTextsByPage(
982
1061
  }
983
1062
 
984
1063
  module.exports = collectTextsByPage;
985
-
986
- // Exported for testing only
987
- module.exports._extractFeatureTagsFromDecl = extractFeatureTagsFromDecl;
988
- module.exports._resolveFeatureSettings = resolveFeatureSettings;
989
- module.exports._findFontFamiliesWithFeatureSettings =
990
- findFontFamiliesWithFeatureSettings;