@turntrout/subfont 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1017 @@
1
+ const memoizeSync = require('memoizesync');
2
+ const os = require('os');
3
+
4
+ const fontTracer = require('font-tracer');
5
+ const fontSnapper = require('font-snapper');
6
+
7
+ const HeadlessBrowser = require('./HeadlessBrowser');
8
+ const FontTracerPool = require('./FontTracerPool');
9
+ const gatherStylesheetsWithPredicates = require('./gatherStylesheetsWithPredicates');
10
+ const cssFontParser = require('css-font-parser');
11
+ const unquote = require('./unquote');
12
+ const normalizeFontPropertyValue = require('./normalizeFontPropertyValue');
13
+ const getCssRulesByProperty = require('./getCssRulesByProperty');
14
+ const extractVisibleText = require('./extractVisibleText');
15
+ const {
16
+ stringifyFontFamily,
17
+ getPreferredFontUrl,
18
+ uniqueChars,
19
+ uniqueCharsFromArray,
20
+ } = require('./fontFaceHelpers');
21
+
22
+ // Inline stylesheets matching this regex contain font-related CSS and
23
+ // must be included in the fast-path grouping key. Non-matching inline
24
+ // CSS (e.g., layout-only critical CSS) is excluded so pages that differ
25
+ // only in non-font inline styles still share a single fontTracer run.
26
+ const fontRelevantCssRegex =
27
+ /font-family|font-weight|font-style|font-stretch|font-display|@font-face|font-variation|font-feature/i;
28
+
29
+ // Detect inline style attributes with font-related properties.
30
+ // Used to exclude pages from fast-path when inline styles could affect
31
+ // font-tracer output (since the stylesheet cache key doesn't cover them).
32
+ // Matches style="..." containing font-family, font-weight, font-style,
33
+ // font-stretch, or the font shorthand (font:).
34
+ // The \s before style ensures we don't match data-style or similar.
35
+ const inlineFontStyleRegex =
36
+ /(?:^|\s)style\s*=\s*["'][^"']*\b(?:font-family|font-weight|font-style|font-stretch|font\s*:)/i;
37
+ function hasInlineFontStyles(html) {
38
+ return inlineFontStyleRegex.test(html);
39
+ }
40
+
41
+ // Relation types followed when traversing from HTML to CSS for @font-face gathering
42
+ const fontFaceTraversalTypes = new Set(['HtmlStyle', 'SvgStyle', 'CssImport']);
43
+
44
+ // CSS properties that trigger OpenType feature glyph collection
45
+ const featureSettingsProps = new Set([
46
+ 'font-feature-settings',
47
+ 'font-variant-alternates',
48
+ 'font-variant-caps',
49
+ 'font-variant-east-asian',
50
+ 'font-variant-ligatures',
51
+ 'font-variant-numeric',
52
+ 'font-variant-position',
53
+ ]);
54
+
55
+ function ruleUsesFeatureSettings(rule) {
56
+ return rule.nodes.some(
57
+ (node) =>
58
+ node.type === 'decl' && featureSettingsProps.has(node.prop.toLowerCase())
59
+ );
60
+ }
61
+
62
+ function ruleFontFamily(rule) {
63
+ for (let i = rule.nodes.length - 1; i >= 0; i--) {
64
+ const node = rule.nodes[i];
65
+ if (node.type === 'decl' && node.prop.toLowerCase() === 'font-family') {
66
+ return node.value;
67
+ }
68
+ }
69
+ return null;
70
+ }
71
+
72
+ // Determine which font-families use font-feature-settings or font-variant-*.
73
+ // Returns null (none detected), a Set of lowercase family names, or true (all).
74
+ function findFontFamiliesWithFeatureSettings(stylesheetsWithPredicates) {
75
+ let result = null;
76
+ for (const { asset } of stylesheetsWithPredicates) {
77
+ if (!asset || !asset.parseTree) continue;
78
+ asset.parseTree.walkRules((rule) => {
79
+ if (result === true) return;
80
+ if (!ruleUsesFeatureSettings(rule)) return;
81
+
82
+ const fontFamily = ruleFontFamily(rule);
83
+ if (!fontFamily) {
84
+ // No font-family in this rule — conservatively assume all fonts
85
+ result = true;
86
+ return;
87
+ }
88
+ if (!result) result = new Set();
89
+ for (const family of cssFontParser.parseFontFamily(fontFamily)) {
90
+ result.add(family.toLowerCase());
91
+ }
92
+ });
93
+ if (result === true) break;
94
+ }
95
+ return result;
96
+ }
97
+
98
+ const allInitialValues = require('./initialValueByProp');
99
+ const initialValueByProp = {
100
+ 'font-style': allInitialValues['font-style'],
101
+ 'font-weight': allInitialValues['font-weight'],
102
+ 'font-stretch': allInitialValues['font-stretch'],
103
+ };
104
+
105
+ // Null byte delimiter — CSS property values cannot contain \0,
106
+ // so this is collision-safe and cheaper than JSON.stringify in hot loops.
107
+ function fontPropsKey(family, weight, style, stretch) {
108
+ return `${family}\0${weight}\0${style}\0${stretch}`;
109
+ }
110
+
111
+ const declKeyCache = new WeakMap();
112
+ function getDeclarationsKey(declarations) {
113
+ if (declKeyCache.has(declarations)) {
114
+ return declKeyCache.get(declarations);
115
+ }
116
+ const key = JSON.stringify(
117
+ declarations.map((d) => [
118
+ d['font-family'],
119
+ d['font-style'],
120
+ d['font-weight'],
121
+ d['font-stretch'],
122
+ ])
123
+ );
124
+ declKeyCache.set(declarations, key);
125
+ return key;
126
+ }
127
+
128
+ // Snap each globalTextByProps entry against font-face declarations
129
+ // to determine which font URL and properties each text segment maps to.
130
+ function computeSnappedGlobalEntries(declarations, globalTextByProps) {
131
+ const entries = [];
132
+ // Cache snapping results per unique props key within this declarations
133
+ // set. Many globalTextByProps entries share the same font properties
134
+ // (only text differs), so we avoid redundant fontSnapper + family
135
+ // parsing calls.
136
+ const snappingResultCache = new Map();
137
+
138
+ for (const textAndProps of globalTextByProps) {
139
+ const family = textAndProps.props['font-family'];
140
+ if (family === undefined) {
141
+ continue;
142
+ }
143
+
144
+ const propsKey = fontPropsKey(
145
+ family,
146
+ textAndProps.props['font-weight'] || '',
147
+ textAndProps.props['font-style'] || '',
148
+ textAndProps.props['font-stretch'] || ''
149
+ );
150
+
151
+ let snappedResults = snappingResultCache.get(propsKey);
152
+ if (!snappedResults) {
153
+ snappedResults = [];
154
+ const families = cssFontParser
155
+ .parseFontFamily(family)
156
+ .filter((fam) =>
157
+ declarations.some(
158
+ (fontFace) =>
159
+ fontFace['font-family'].toLowerCase() === fam.toLowerCase()
160
+ )
161
+ );
162
+
163
+ for (const fam of families) {
164
+ const activeFontFaceDeclaration = fontSnapper(declarations, {
165
+ ...textAndProps.props,
166
+ 'font-family': stringifyFontFamily(fam),
167
+ });
168
+
169
+ if (!activeFontFaceDeclaration) {
170
+ continue;
171
+ }
172
+
173
+ const {
174
+ relations,
175
+ '-subfont-text': _,
176
+ ...props
177
+ } = activeFontFaceDeclaration;
178
+ const fontUrl = getPreferredFontUrl(relations);
179
+ if (!fontUrl) {
180
+ continue;
181
+ }
182
+
183
+ let fontWeight = normalizeFontPropertyValue(
184
+ 'font-weight',
185
+ textAndProps.props['font-weight']
186
+ );
187
+ if (fontWeight === 'normal') {
188
+ fontWeight = 400;
189
+ }
190
+
191
+ snappedResults.push({
192
+ fontUrl,
193
+ props,
194
+ fontRelations: relations,
195
+ fontStyle: normalizeFontPropertyValue(
196
+ 'font-style',
197
+ textAndProps.props['font-style']
198
+ ),
199
+ fontWeight,
200
+ fontStretch: normalizeFontPropertyValue(
201
+ 'font-stretch',
202
+ textAndProps.props['font-stretch']
203
+ ),
204
+ });
205
+ }
206
+ snappingResultCache.set(propsKey, snappedResults);
207
+ }
208
+
209
+ for (const snapped of snappedResults) {
210
+ entries.push({
211
+ textAndProps,
212
+ ...snapped,
213
+ fontVariationSettings: textAndProps.props['font-variation-settings'],
214
+ });
215
+ }
216
+ }
217
+ return entries;
218
+ }
219
+
220
+ // Build global font usage templates and per-page indices from
221
+ // snapped entries. Mutates the declCache entry for declKey in place.
222
+ function getOrComputeGlobalFontUsages(
223
+ cached,
224
+ accumulatedFontFaceDeclarations,
225
+ text
226
+ ) {
227
+ if (cached.fontUsageTemplates) {
228
+ return;
229
+ }
230
+
231
+ const snappedGlobalEntries = cached.snappedEntries;
232
+
233
+ // Build all indices in a single pass over snappedGlobalEntries:
234
+ // - pageTextIndex: Map<htmlOrSvgAsset, Map<fontUrl, string[]>> for pageText
235
+ // - entriesByFontUrl: Map<fontUrl, entry[]> for building templates
236
+ // - textAndPropsToFontUrl: Map<textAndProps, fontUrl> for preload (inverted index)
237
+ const pageTextIndex = new Map();
238
+ const entriesByFontUrl = new Map();
239
+ const textAndPropsToFontUrl = new Map();
240
+
241
+ for (const entry of snappedGlobalEntries) {
242
+ if (!entry.fontUrl) continue;
243
+
244
+ // pageTextIndex: group texts by (asset, fontUrl)
245
+ const asset = entry.textAndProps.htmlOrSvgAsset;
246
+ let assetMap = pageTextIndex.get(asset);
247
+ if (!assetMap) {
248
+ assetMap = new Map();
249
+ pageTextIndex.set(asset, assetMap);
250
+ }
251
+ let texts = assetMap.get(entry.fontUrl);
252
+ if (!texts) {
253
+ texts = [];
254
+ assetMap.set(entry.fontUrl, texts);
255
+ }
256
+ texts.push(entry.textAndProps.text);
257
+
258
+ // entriesByFontUrl: group entries by fontUrl
259
+ let arr = entriesByFontUrl.get(entry.fontUrl);
260
+ if (!arr) {
261
+ arr = [];
262
+ entriesByFontUrl.set(entry.fontUrl, arr);
263
+ }
264
+ arr.push(entry);
265
+
266
+ // Inverted preload index: textAndProps -> fontUrl
267
+ // In the per-page loop we iterate the page's small textByProps and
268
+ // look up which fontUrls they map to, making preload O(|pageTextByProps|).
269
+ textAndPropsToFontUrl.set(entry.textAndProps, entry.fontUrl);
270
+ }
271
+
272
+ // Also collect subfont-text / text param contributions per fontUrl
273
+ // These are the same for every page sharing this declarations key
274
+ const extraTextsByFontUrl = new Map();
275
+ for (const fontFaceDeclaration of accumulatedFontFaceDeclarations) {
276
+ const {
277
+ relations,
278
+ '-subfont-text': subfontText,
279
+ ...props
280
+ } = fontFaceDeclaration;
281
+ const fontUrl = getPreferredFontUrl(relations);
282
+ if (!fontUrl) continue;
283
+
284
+ const extras = [];
285
+ if (subfontText !== undefined) {
286
+ extras.push(unquote(subfontText));
287
+ }
288
+ if (text !== undefined) {
289
+ extras.push(text);
290
+ }
291
+ if (extras.length > 0) {
292
+ let arr = extraTextsByFontUrl.get(fontUrl);
293
+ if (!arr) {
294
+ arr = { texts: [], props, fontRelations: relations };
295
+ extraTextsByFontUrl.set(fontUrl, arr);
296
+ }
297
+ arr.texts.push(...extras);
298
+ }
299
+ }
300
+
301
+ // Build the global fontUsage template for each fontUrl
302
+ const fontUsageTemplates = [];
303
+ const allFontUrls = new Set([
304
+ ...entriesByFontUrl.keys(),
305
+ ...extraTextsByFontUrl.keys(),
306
+ ]);
307
+
308
+ for (const fontUrl of allFontUrls) {
309
+ const fontEntries = entriesByFontUrl.get(fontUrl) || [];
310
+ const extra = extraTextsByFontUrl.get(fontUrl);
311
+
312
+ // Collect all texts (extras first, then global entries)
313
+ const allTexts = [];
314
+ if (extra) {
315
+ allTexts.push(...extra.texts);
316
+ }
317
+ for (const e of fontEntries) {
318
+ allTexts.push(e.textAndProps.text);
319
+ }
320
+
321
+ const fontFamilies = new Set(
322
+ fontEntries.map((e) => e.props['font-family'])
323
+ );
324
+ const fontStyles = new Set(fontEntries.map((e) => e.fontStyle));
325
+ const fontWeights = new Set(fontEntries.map((e) => e.fontWeight));
326
+ const fontStretches = new Set(fontEntries.map((e) => e.fontStretch));
327
+ const fontVariationSettings = new Set(
328
+ fontEntries
329
+ .map((e) => e.fontVariationSettings)
330
+ .filter((fvs) => fvs && fvs.toLowerCase() !== 'normal')
331
+ );
332
+ // Use first entry's relations for size computation, or extra's if no entries
333
+ const fontRelations =
334
+ fontEntries.length > 0
335
+ ? fontEntries[0].fontRelations
336
+ : extra.fontRelations;
337
+ let smallestOriginalSize = 0;
338
+ // undefined is fine here — only used for display/logging, never in arithmetic
339
+ let smallestOriginalFormat;
340
+ for (const relation of fontRelations) {
341
+ if (relation.to.isLoaded) {
342
+ const size = relation.to.rawSrc.length;
343
+ if (smallestOriginalSize === 0 || size < smallestOriginalSize) {
344
+ smallestOriginalSize = size;
345
+ smallestOriginalFormat = relation.to.type.toLowerCase();
346
+ }
347
+ }
348
+ }
349
+
350
+ const props =
351
+ fontEntries.length > 0 ? { ...fontEntries[0].props } : { ...extra.props };
352
+ // Pre-join the extra texts (subfont-text / text param) for pageText computation
353
+ const extraTextsStr = extra ? extra.texts.join('') : '';
354
+
355
+ fontUsageTemplates.push({
356
+ smallestOriginalSize,
357
+ smallestOriginalFormat,
358
+ texts: allTexts,
359
+ text: uniqueCharsFromArray(allTexts),
360
+ extraTextsStr,
361
+ props,
362
+ fontUrl,
363
+ fontFamilies,
364
+ fontStyles,
365
+ fontStretches,
366
+ fontWeights,
367
+ fontVariationSettings,
368
+ });
369
+ }
370
+
371
+ cached.fontUsageTemplates = fontUsageTemplates;
372
+ cached.pageTextIndex = pageTextIndex;
373
+ cached.preloadIndex = textAndPropsToFontUrl;
374
+ }
375
+
376
+ async function collectTextsByPage(
377
+ assetGraph,
378
+ htmlOrSvgAssets,
379
+ {
380
+ text,
381
+ console,
382
+ dynamic = false,
383
+ debug = false,
384
+ concurrency,
385
+ chromeArgs = [],
386
+ } = {}
387
+ ) {
388
+ const htmlOrSvgAssetTextsWithProps = [];
389
+
390
+ const memoizedGetCssRulesByProperty = memoizeSync(getCssRulesByProperty);
391
+
392
+ const fontFaceDeclarationsByHtmlOrSvgAsset = new Map();
393
+
394
+ // Cache stylesheet-dependent results for pages with identical CSS
395
+ // configurations.
396
+ const stylesheetResultCache = new Map();
397
+
398
+ // Pre-build an index of stylesheet-related relations by source asset
399
+ // to avoid repeated assetGraph.findRelations scans (O(allRelations) each).
400
+ const stylesheetRelTypes = [
401
+ 'HtmlStyle',
402
+ 'SvgStyle',
403
+ 'CssImport',
404
+ 'HtmlConditionalComment',
405
+ 'HtmlNoscript',
406
+ ];
407
+ const stylesheetRelsByFromAsset = new Map();
408
+ for (const relation of assetGraph.findRelations({
409
+ type: {
410
+ $in: stylesheetRelTypes,
411
+ },
412
+ })) {
413
+ let arr = stylesheetRelsByFromAsset.get(relation.from);
414
+ if (!arr) {
415
+ arr = [];
416
+ stylesheetRelsByFromAsset.set(relation.from, arr);
417
+ }
418
+ arr.push(relation);
419
+ }
420
+
421
+ // Build a cache key by traversing stylesheet relations, capturing
422
+ // both asset identity and relation context (media, conditionalComment,
423
+ // noscript) that affect gatherStylesheetsWithPredicates output.
424
+ function buildStylesheetKey(htmlOrSvgAsset, skipNonFontInlineCss) {
425
+ const keyParts = [];
426
+ const visited = new Set();
427
+ (function traverse(asset, isNoscript) {
428
+ if (visited.has(asset)) return;
429
+ if (!asset.isLoaded) return;
430
+ visited.add(asset);
431
+ for (const relation of stylesheetRelsByFromAsset.get(asset) || []) {
432
+ if (relation.type === 'HtmlNoscript') {
433
+ traverse(relation.to, true);
434
+ } else if (relation.type === 'HtmlConditionalComment') {
435
+ keyParts.push(`cc:${relation.condition}`);
436
+ traverse(relation.to, isNoscript);
437
+ } else {
438
+ const target = relation.to;
439
+ if (
440
+ skipNonFontInlineCss &&
441
+ target.isInline &&
442
+ target.type === 'Css' &&
443
+ !fontRelevantCssRegex.test(target.text || '')
444
+ ) {
445
+ continue;
446
+ }
447
+ const media = relation.media || '';
448
+ keyParts.push(`${target.id}:${media}:${isNoscript ? 'ns' : ''}`);
449
+ traverse(target, isNoscript);
450
+ }
451
+ }
452
+ })(htmlOrSvgAsset, false);
453
+ // Key parts are structured id:media:ns strings — simple join is safe
454
+ return keyParts.join('\x1d');
455
+ }
456
+
457
+ function getOrComputeStylesheetResults(htmlOrSvgAsset) {
458
+ const key = buildStylesheetKey(htmlOrSvgAsset);
459
+ if (stylesheetResultCache.has(key)) {
460
+ return stylesheetResultCache.get(key);
461
+ }
462
+
463
+ const stylesheetsWithPredicates = gatherStylesheetsWithPredicates(
464
+ htmlOrSvgAsset.assetGraph,
465
+ htmlOrSvgAsset,
466
+ stylesheetRelsByFromAsset
467
+ );
468
+
469
+ // Compute accumulatedFontFaceDeclarations by traversing CSS relations
470
+ const accumulatedFontFaceDeclarations = [];
471
+ {
472
+ const visitedAssets = new Set();
473
+ (function traverseForFontFace(asset) {
474
+ if (visitedAssets.has(asset)) return;
475
+ visitedAssets.add(asset);
476
+
477
+ if (asset.type === 'Css' && asset.isLoaded) {
478
+ const seenNodes = new Set();
479
+
480
+ const fontRelations = asset.outgoingRelations.filter(
481
+ (relation) => relation.type === 'CssFontFaceSrc'
482
+ );
483
+
484
+ for (const fontRelation of fontRelations) {
485
+ const node = fontRelation.node;
486
+
487
+ if (!seenNodes.has(node)) {
488
+ seenNodes.add(node);
489
+
490
+ const fontFaceDeclaration = {
491
+ relations: fontRelations.filter((r) => r.node === node),
492
+ ...initialValueByProp,
493
+ };
494
+
495
+ node.walkDecls((declaration) => {
496
+ const propName = declaration.prop.toLowerCase();
497
+ if (propName === 'font-family') {
498
+ fontFaceDeclaration[propName] = cssFontParser.parseFontFamily(
499
+ declaration.value
500
+ )[0];
501
+ } else {
502
+ fontFaceDeclaration[propName] = declaration.value;
503
+ }
504
+ });
505
+ // Disregard incomplete @font-face declarations (must contain font-family and src per spec):
506
+ if (
507
+ fontFaceDeclaration['font-family'] &&
508
+ fontFaceDeclaration.src
509
+ ) {
510
+ accumulatedFontFaceDeclarations.push(fontFaceDeclaration);
511
+ }
512
+ }
513
+ }
514
+ }
515
+
516
+ // Traverse children using the pre-built index
517
+ const rels = stylesheetRelsByFromAsset.get(asset) || [];
518
+ for (const rel of rels) {
519
+ if (
520
+ fontFaceTraversalTypes.has(rel.type) ||
521
+ (rel.to && rel.to.type === 'Html' && rel.to.isInline)
522
+ ) {
523
+ traverseForFontFace(rel.to);
524
+ }
525
+ }
526
+ })(htmlOrSvgAsset);
527
+ }
528
+
529
+ // Group @font-face declarations that share family/style/weight but have
530
+ // different unicode-range values. Each group's members cover a disjoint
531
+ // subset of the Unicode space (common for CJK / large character-set fonts).
532
+ if (accumulatedFontFaceDeclarations.length > 0) {
533
+ const comboGroups = new Map();
534
+ for (const fontFace of accumulatedFontFaceDeclarations) {
535
+ const comboKey = `${fontFace['font-family']}/${fontFace['font-style']}/${fontFace['font-weight']}`;
536
+ if (!comboGroups.has(comboKey)) {
537
+ comboGroups.set(comboKey, []);
538
+ }
539
+ comboGroups.get(comboKey).push(fontFace);
540
+ }
541
+ for (const [comboKey, group] of comboGroups) {
542
+ if (group.length > 1) {
543
+ const withoutRange = group.filter((d) => !d['unicode-range']);
544
+ if (withoutRange.length > 0) {
545
+ throw new Error(
546
+ `Multiple @font-face with the same font-family/font-style/font-weight combo but missing unicode-range on ${withoutRange.length} of ${group.length} declarations: ${comboKey}`
547
+ );
548
+ }
549
+ }
550
+ }
551
+ }
552
+
553
+ const fontFamiliesWithFeatureSettings = findFontFamiliesWithFeatureSettings(
554
+ stylesheetsWithPredicates
555
+ );
556
+
557
+ const result = {
558
+ accumulatedFontFaceDeclarations,
559
+ stylesheetsWithPredicates,
560
+ fontFamiliesWithFeatureSettings,
561
+ fastPathKey: buildStylesheetKey(htmlOrSvgAsset, true),
562
+ };
563
+ stylesheetResultCache.set(key, result);
564
+ return result;
565
+ }
566
+
567
+ const headlessBrowser =
568
+ dynamic && new HeadlessBrowser({ console, chromeArgs });
569
+ const globalTextByProps = [];
570
+ const subTimings = {};
571
+
572
+ if (debug && console)
573
+ console.log('[subfont timing] collectTextsByPage started');
574
+ const timingStart = Date.now();
575
+
576
+ // Pre-compute stylesheet results for all pages
577
+ const stylesheetPrecomputeStart = Date.now();
578
+ const pageData = [];
579
+ for (const htmlOrSvgAsset of htmlOrSvgAssets) {
580
+ const {
581
+ accumulatedFontFaceDeclarations,
582
+ stylesheetsWithPredicates,
583
+ fontFamiliesWithFeatureSettings,
584
+ fastPathKey,
585
+ } = getOrComputeStylesheetResults(htmlOrSvgAsset);
586
+ fontFaceDeclarationsByHtmlOrSvgAsset.set(
587
+ htmlOrSvgAsset,
588
+ accumulatedFontFaceDeclarations
589
+ );
590
+
591
+ if (accumulatedFontFaceDeclarations.length === 0) {
592
+ continue;
593
+ }
594
+
595
+ pageData.push({
596
+ htmlOrSvgAsset,
597
+ accumulatedFontFaceDeclarations,
598
+ stylesheetsWithPredicates,
599
+ fontFamiliesWithFeatureSettings,
600
+ stylesheetCacheKey: fastPathKey,
601
+ });
602
+ }
603
+
604
+ if (debug && console)
605
+ console.log(
606
+ `[subfont timing] Stylesheet precompute: ${(subTimings['Stylesheet precompute'] = Date.now() - stylesheetPrecomputeStart)}ms (${pageData.length} pages with fonts)`
607
+ );
608
+
609
+ // Group pages by stylesheet cache key — pages sharing the same CSS
610
+ // configuration produce identical font-tracer props, only text differs.
611
+ const pagesByStylesheetKey = new Map();
612
+ for (const pd of pageData) {
613
+ let group = pagesByStylesheetKey.get(pd.stylesheetCacheKey);
614
+ if (!group) {
615
+ group = [];
616
+ pagesByStylesheetKey.set(pd.stylesheetCacheKey, group);
617
+ }
618
+ group.push(pd);
619
+ }
620
+
621
+ const pagesNeedingFullTrace = [];
622
+ const fastPathPages = [];
623
+ if (headlessBrowser) {
624
+ for (const pd of pageData) {
625
+ pagesNeedingFullTrace.push(pd);
626
+ }
627
+ } else {
628
+ for (const group of pagesByStylesheetKey.values()) {
629
+ pagesNeedingFullTrace.push(group[0]);
630
+ for (let i = 1; i < group.length; i++) {
631
+ group[i].representativePd = group[0];
632
+ fastPathPages.push(group[i]);
633
+ }
634
+ }
635
+ }
636
+
637
+ if (debug && console)
638
+ console.log(
639
+ `[subfont timing] CSS groups: ${pagesByStylesheetKey.size} unique, ${pagesNeedingFullTrace.length} to trace, ${fastPathPages.length} fast-path`
640
+ );
641
+
642
+ // Use worker pool for parallel fontTracer when there are enough pages
643
+ const useWorkerPool = !headlessBrowser && pagesNeedingFullTrace.length >= 4;
644
+
645
+ const tracingStart = Date.now();
646
+ try {
647
+ if (useWorkerPool) {
648
+ const maxWorkers =
649
+ concurrency > 0 ? concurrency : Math.min(os.cpus().length, 8);
650
+ const numWorkers = Math.min(maxWorkers, pagesNeedingFullTrace.length);
651
+ const pool = new FontTracerPool(numWorkers);
652
+ await pool.init();
653
+
654
+ try {
655
+ const totalPages = pagesNeedingFullTrace.length;
656
+ const showProgress = totalPages >= 10 && console;
657
+ let tracedCount = 0;
658
+ const tracePromises = pagesNeedingFullTrace.map(async (pd) => {
659
+ try {
660
+ pd.textByProps = await pool.trace(
661
+ pd.htmlOrSvgAsset.text || '',
662
+ pd.stylesheetsWithPredicates
663
+ );
664
+ } catch (err) {
665
+ if (console) {
666
+ console.warn(
667
+ `Worker fontTracer failed for ${pd.htmlOrSvgAsset.url}, falling back to main thread: ${err.message}`
668
+ );
669
+ }
670
+ pd.textByProps = fontTracer(pd.htmlOrSvgAsset.parseTree, {
671
+ stylesheetsWithPredicates: pd.stylesheetsWithPredicates,
672
+ getCssRulesByProperty: memoizedGetCssRulesByProperty,
673
+ asset: pd.htmlOrSvgAsset,
674
+ });
675
+ }
676
+ tracedCount++;
677
+ if (showProgress && tracedCount % 10 === 0) {
678
+ console.log(
679
+ ` Tracing fonts: ${tracedCount}/${totalPages} pages...`
680
+ );
681
+ }
682
+ });
683
+ await Promise.all(tracePromises);
684
+ await pool.destroy();
685
+ } catch (err) {
686
+ await pool.destroy();
687
+ throw err;
688
+ }
689
+ } else if (pagesNeedingFullTrace.length > 0) {
690
+ const totalPages = pagesNeedingFullTrace.length;
691
+ const showProgress = totalPages >= 10 && console;
692
+ for (let pi = 0; pi < totalPages; pi++) {
693
+ const pd = pagesNeedingFullTrace[pi];
694
+ pd.textByProps = fontTracer(pd.htmlOrSvgAsset.parseTree, {
695
+ stylesheetsWithPredicates: pd.stylesheetsWithPredicates,
696
+ getCssRulesByProperty: memoizedGetCssRulesByProperty,
697
+ asset: pd.htmlOrSvgAsset,
698
+ });
699
+ if (headlessBrowser) {
700
+ pd.textByProps.push(
701
+ ...(await headlessBrowser.tracePage(pd.htmlOrSvgAsset))
702
+ );
703
+ }
704
+ if (showProgress && (pi + 1) % 10 === 0) {
705
+ console.log(` Tracing fonts: ${pi + 1}/${totalPages} pages...`);
706
+ }
707
+ }
708
+ }
709
+
710
+ subTimings['Full tracing'] = Date.now() - tracingStart;
711
+ if (debug && console)
712
+ console.log(
713
+ `[subfont timing] Full tracing (${pagesNeedingFullTrace.length} pages): ${subTimings['Full tracing']}ms`
714
+ );
715
+
716
+ // Fast-path: for pages sharing CSS with a traced representative,
717
+ // reuse the representative's props and extract only the text content.
718
+ if (fastPathPages.length > 0) {
719
+ const fastPathStart = Date.now();
720
+
721
+ const repDataCache = new Map();
722
+ function getRepData(representativePd) {
723
+ if (repDataCache.has(representativePd)) {
724
+ return repDataCache.get(representativePd);
725
+ }
726
+ const repTextByProps = representativePd.textByProps;
727
+
728
+ const uniquePropsMap = new Map();
729
+ const textPerPropsKey = new Map();
730
+ const seenVariantKeys = new Set();
731
+ for (const entry of repTextByProps) {
732
+ const family = entry.props['font-family'] || '';
733
+ const propsKey = fontPropsKey(
734
+ family,
735
+ entry.props['font-weight'] || '',
736
+ entry.props['font-style'] || '',
737
+ entry.props['font-stretch'] || ''
738
+ );
739
+ if (!uniquePropsMap.has(propsKey)) {
740
+ uniquePropsMap.set(propsKey, entry.props);
741
+ textPerPropsKey.set(propsKey, []);
742
+ }
743
+ textPerPropsKey.get(propsKey).push(entry.text);
744
+ if (family) {
745
+ const weight = entry.props['font-weight'] || 'normal';
746
+ const style = entry.props['font-style'] || 'normal';
747
+ const stretch = entry.props['font-stretch'] || 'normal';
748
+ for (const fam of cssFontParser.parseFontFamily(family)) {
749
+ seenVariantKeys.add(
750
+ fontPropsKey(fam.toLowerCase(), weight, style, stretch)
751
+ );
752
+ }
753
+ }
754
+ }
755
+ const data = { uniquePropsMap, textPerPropsKey, seenVariantKeys };
756
+ repDataCache.set(representativePd, data);
757
+ return data;
758
+ }
759
+
760
+ let fastPathFallbacks = 0;
761
+ for (const pd of fastPathPages) {
762
+ if (hasInlineFontStyles(pd.htmlOrSvgAsset.text || '')) {
763
+ fastPathFallbacks++;
764
+ pd.textByProps = fontTracer(pd.htmlOrSvgAsset.parseTree, {
765
+ stylesheetsWithPredicates: pd.stylesheetsWithPredicates,
766
+ getCssRulesByProperty: memoizedGetCssRulesByProperty,
767
+ asset: pd.htmlOrSvgAsset,
768
+ });
769
+ continue;
770
+ }
771
+
772
+ const { uniquePropsMap, textPerPropsKey, seenVariantKeys } = getRepData(
773
+ pd.representativePd
774
+ );
775
+
776
+ // Check if any @font-face variants are unseen by the representative.
777
+ // Only copy Maps when extensions are actually needed.
778
+ let effectivePropsMap = uniquePropsMap;
779
+ let effectiveTextPerPropsKey = textPerPropsKey;
780
+ for (const decl of pd.accumulatedFontFaceDeclarations) {
781
+ const family = decl['font-family'];
782
+ if (!family) continue;
783
+ const weight = decl['font-weight'] || 'normal';
784
+ const style = decl['font-style'] || 'normal';
785
+ const stretch = decl['font-stretch'] || 'normal';
786
+ const variantKey = fontPropsKey(
787
+ family.toLowerCase(),
788
+ weight,
789
+ style,
790
+ stretch
791
+ );
792
+ if (!seenVariantKeys.has(variantKey)) {
793
+ // Lazy-copy on first unseen variant
794
+ if (effectivePropsMap === uniquePropsMap) {
795
+ effectivePropsMap = new Map(uniquePropsMap);
796
+ effectiveTextPerPropsKey = new Map(textPerPropsKey);
797
+ }
798
+ const propsKey = fontPropsKey(
799
+ stringifyFontFamily(family),
800
+ weight,
801
+ style,
802
+ stretch
803
+ );
804
+ if (!effectivePropsMap.has(propsKey)) {
805
+ effectivePropsMap.set(propsKey, {
806
+ 'font-family': stringifyFontFamily(family),
807
+ 'font-weight': weight,
808
+ 'font-style': style,
809
+ 'font-stretch': stretch,
810
+ });
811
+ effectiveTextPerPropsKey.set(propsKey, []);
812
+ }
813
+ }
814
+ }
815
+
816
+ const pageText = extractVisibleText(pd.htmlOrSvgAsset.text || '');
817
+
818
+ pd.textByProps = [];
819
+ for (const [propsKey, props] of effectivePropsMap) {
820
+ const repTexts = effectiveTextPerPropsKey.get(propsKey) || [];
821
+ pd.textByProps.push({
822
+ text: pageText + repTexts.join(''),
823
+ props: { ...props },
824
+ });
825
+ }
826
+ }
827
+ subTimings['Fast-path extraction'] = Date.now() - fastPathStart;
828
+ if (debug && console)
829
+ console.log(
830
+ `[subfont timing] Fast-path text extraction (${fastPathPages.length} pages, ${fastPathFallbacks} fell back to full trace): ${subTimings['Fast-path extraction']}ms`
831
+ );
832
+ }
833
+
834
+ const assembleStart = Date.now();
835
+ for (const pd of pageData) {
836
+ for (const textByPropsEntry of pd.textByProps) {
837
+ textByPropsEntry.htmlOrSvgAsset = pd.htmlOrSvgAsset;
838
+ }
839
+ // Use a loop instead of push(...spread) to avoid stack overflow on large sites
840
+ for (const entry of pd.textByProps) {
841
+ globalTextByProps.push(entry);
842
+ }
843
+ htmlOrSvgAssetTextsWithProps.push({
844
+ htmlOrSvgAsset: pd.htmlOrSvgAsset,
845
+ textByProps: pd.textByProps,
846
+ accumulatedFontFaceDeclarations: pd.accumulatedFontFaceDeclarations,
847
+ fontFamiliesWithFeatureSettings: pd.fontFamiliesWithFeatureSettings,
848
+ });
849
+ }
850
+
851
+ subTimings['Result assembly'] = Date.now() - assembleStart;
852
+ if (debug && console) {
853
+ console.log(
854
+ `[subfont timing] Result assembly: ${subTimings['Result assembly']}ms`
855
+ );
856
+ console.log(
857
+ `[subfont timing] Total tracing+extraction+assembly: ${
858
+ Date.now() - tracingStart
859
+ }ms`
860
+ );
861
+ }
862
+ } finally {
863
+ if (headlessBrowser) {
864
+ await headlessBrowser.close();
865
+ }
866
+ }
867
+
868
+ const postProcessStart = Date.now();
869
+
870
+ // Consolidated cache for per-declarations-key data.
871
+ const declCache = new Map();
872
+
873
+ const perPageLoopStart = Date.now();
874
+ let snappingTime = 0;
875
+ let globalUsageTime = 0;
876
+
877
+ // Cache uniqueChars results by raw pageText string to avoid recomputing
878
+ const uniqueCharsCache = new Map();
879
+ let cloningTime = 0;
880
+
881
+ for (const htmlOrSvgAssetTextsWithPropsEntry of htmlOrSvgAssetTextsWithProps) {
882
+ const {
883
+ htmlOrSvgAsset,
884
+ textByProps,
885
+ accumulatedFontFaceDeclarations,
886
+ fontFamiliesWithFeatureSettings,
887
+ } = htmlOrSvgAssetTextsWithPropsEntry;
888
+
889
+ // Get or compute the snapped global entries for this declarations set
890
+ const declKey = getDeclarationsKey(accumulatedFontFaceDeclarations);
891
+ if (!declCache.has(declKey)) {
892
+ const snapStart = Date.now();
893
+ declCache.set(declKey, {
894
+ snappedEntries: computeSnappedGlobalEntries(
895
+ accumulatedFontFaceDeclarations,
896
+ globalTextByProps
897
+ ),
898
+ fontUsageTemplates: null,
899
+ pageTextIndex: null,
900
+ preloadIndex: null,
901
+ });
902
+ snappingTime += Date.now() - snapStart;
903
+ }
904
+
905
+ // Precompute global font usage templates and indices once per declarations key
906
+ const declCacheEntry = declCache.get(declKey);
907
+ const globalUsageStart = Date.now();
908
+ getOrComputeGlobalFontUsages(
909
+ declCacheEntry,
910
+ accumulatedFontFaceDeclarations,
911
+ text
912
+ );
913
+ globalUsageTime += Date.now() - globalUsageStart;
914
+
915
+ const fontUsageTemplates = declCacheEntry.fontUsageTemplates;
916
+ const pageTextIndex = declCacheEntry.pageTextIndex;
917
+ const textAndPropsToFontUrl = declCacheEntry.preloadIndex;
918
+
919
+ // Compute preload per fontUrl using inverted index
920
+ const preloadFontUrls = new Set();
921
+ for (const entry of textByProps) {
922
+ const fontUrl = textAndPropsToFontUrl.get(entry);
923
+ if (fontUrl) {
924
+ preloadFontUrls.add(fontUrl);
925
+ }
926
+ }
927
+
928
+ // Build per-page fontUsages from precomputed templates
929
+ const cloneStart = Date.now();
930
+ const assetTexts = pageTextIndex.get(htmlOrSvgAsset);
931
+ htmlOrSvgAssetTextsWithPropsEntry.fontUsages = fontUsageTemplates.map(
932
+ (template) => {
933
+ const pageTexts = assetTexts
934
+ ? assetTexts.get(template.fontUrl)
935
+ : undefined;
936
+ let pageTextStr = pageTexts ? pageTexts.join('') : '';
937
+ if (template.extraTextsStr) {
938
+ pageTextStr += template.extraTextsStr;
939
+ }
940
+
941
+ let pageTextUnique = uniqueCharsCache.get(pageTextStr);
942
+ if (pageTextUnique === undefined) {
943
+ pageTextUnique = uniqueChars(pageTextStr);
944
+ uniqueCharsCache.set(pageTextStr, pageTextUnique);
945
+ }
946
+
947
+ let hasFontFeatureSettings = false;
948
+ if (fontFamiliesWithFeatureSettings === true) {
949
+ hasFontFeatureSettings = true;
950
+ } else if (fontFamiliesWithFeatureSettings instanceof Set) {
951
+ for (const f of template.fontFamilies) {
952
+ if (fontFamiliesWithFeatureSettings.has(f.toLowerCase())) {
953
+ hasFontFeatureSettings = true;
954
+ break;
955
+ }
956
+ }
957
+ }
958
+
959
+ return {
960
+ smallestOriginalSize: template.smallestOriginalSize,
961
+ smallestOriginalFormat: template.smallestOriginalFormat,
962
+ texts: template.texts,
963
+ pageText: pageTextUnique,
964
+ text: template.text,
965
+ props: { ...template.props },
966
+ fontUrl: template.fontUrl,
967
+ fontFamilies: template.fontFamilies,
968
+ fontStyles: template.fontStyles,
969
+ fontStretches: template.fontStretches,
970
+ fontWeights: template.fontWeights,
971
+ fontVariationSettings: template.fontVariationSettings,
972
+ preload: preloadFontUrls.has(template.fontUrl),
973
+ hasFontFeatureSettings,
974
+ };
975
+ }
976
+ );
977
+ cloningTime += Date.now() - cloneStart;
978
+ }
979
+
980
+ subTimings['Per-page loop'] = Date.now() - perPageLoopStart;
981
+ subTimings['Post-processing total'] = Date.now() - postProcessStart;
982
+ if (debug && console)
983
+ console.log(
984
+ `[subfont timing] Per-page loop: ${subTimings['Per-page loop']}ms (snapping: ${snappingTime}ms, globalUsage: ${globalUsageTime}ms, cloning: ${cloningTime}ms)`
985
+ );
986
+ if (debug && console)
987
+ console.log(
988
+ `[subfont timing] Post-processing total: ${subTimings['Post-processing total']}ms`
989
+ );
990
+ if (debug && console)
991
+ console.log(
992
+ `[subfont timing] collectTextsByPage total: ${Date.now() - timingStart}ms`
993
+ );
994
+
995
+ for (const fontFaceDeclarations of fontFaceDeclarationsByHtmlOrSvgAsset.values()) {
996
+ for (const fontFaceDeclaration of fontFaceDeclarations) {
997
+ const firstRelation = fontFaceDeclaration.relations[0];
998
+ const subfontTextNode = firstRelation.node.nodes.find(
999
+ (childNode) =>
1000
+ childNode.type === 'decl' &&
1001
+ childNode.prop.toLowerCase() === '-subfont-text'
1002
+ );
1003
+
1004
+ if (subfontTextNode) {
1005
+ subfontTextNode.remove();
1006
+ firstRelation.from.markDirty();
1007
+ }
1008
+ }
1009
+ }
1010
+ return {
1011
+ htmlOrSvgAssetTextsWithProps,
1012
+ fontFaceDeclarationsByHtmlOrSvgAsset,
1013
+ subTimings,
1014
+ };
1015
+ }
1016
+
1017
+ module.exports = collectTextsByPage;