@turntrout/subfont 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ // Escape a value for safe inclusion in any JS string context (single-quoted,
2
+ // double-quoted, or template literal). Uses JSON.stringify for robust escaping
3
+ // of backslashes, quotes, newlines, U+2028, U+2029, etc.
4
+ // The < escape prevents </script> from closing an inline script tag.
5
+ function escapeJsStringLiteral(str) {
6
+ return JSON.stringify(str)
7
+ .slice(1, -1)
8
+ .replace(/'/g, "\\'")
9
+ .replace(/`/g, '\\x60')
10
+ .replace(/</g, '\\x3c');
11
+ }
12
+
13
+ module.exports = escapeJsStringLiteral;
@@ -81,8 +81,12 @@ const namedEntities = {
81
81
  const entityRe = /&(?:#x([0-9a-fA-F]+)|#(\d+)|([a-zA-Z]+));/g;
82
82
  function decodeEntities(str) {
83
83
  return str.replace(entityRe, (match, hex, dec, name) => {
84
- if (hex) return String.fromCodePoint(parseInt(hex, 16));
85
- if (dec) return String.fromCodePoint(parseInt(dec, 10));
84
+ try {
85
+ if (hex) return String.fromCodePoint(parseInt(hex, 16));
86
+ if (dec) return String.fromCodePoint(parseInt(dec, 10));
87
+ } catch {
88
+ return match;
89
+ }
86
90
  if (name && namedEntities[name.toLowerCase()] !== undefined) {
87
91
  return namedEntities[name.toLowerCase()];
88
92
  }
@@ -0,0 +1,25 @@
1
+ const pathModule = require('path');
2
+ const { Worker } = require('worker_threads');
3
+
4
+ const workerPath = pathModule.join(__dirname, 'fontConverterWorker.js');
5
+
6
+ function convert(buffer, targetFormat, sourceFormat) {
7
+ return new Promise((resolve, reject) => {
8
+ const worker = new Worker(workerPath);
9
+ worker.on('message', (msg) => {
10
+ worker.terminate();
11
+ if (msg.type === 'result') {
12
+ resolve(Buffer.from(msg.buffer));
13
+ } else {
14
+ reject(new Error(msg.error));
15
+ }
16
+ });
17
+ worker.on('error', (err) => {
18
+ worker.terminate();
19
+ reject(err);
20
+ });
21
+ worker.postMessage({ buffer, targetFormat, sourceFormat });
22
+ });
23
+ }
24
+
25
+ module.exports = { convert };
@@ -0,0 +1,16 @@
1
+ const { parentPort } = require('worker_threads');
2
+ const fontverter = require('fontverter');
3
+
4
+ parentPort.on('message', async (msg) => {
5
+ try {
6
+ const buffer = Buffer.from(msg.buffer);
7
+ const result = await fontverter.convert(
8
+ buffer,
9
+ msg.targetFormat,
10
+ msg.sourceFormat
11
+ );
12
+ parentPort.postMessage({ type: 'result', buffer: result });
13
+ } catch (err) {
14
+ parentPort.postMessage({ type: 'error', error: err.message });
15
+ }
16
+ });
@@ -75,15 +75,27 @@ function getFontFaceDeclarationText(node, relations) {
75
75
 
76
76
  const fontOrder = ['woff2', 'woff', 'truetype'];
77
77
 
78
- function getFontFaceForFontUsage(fontUsage) {
79
- const subsets = fontOrder
80
- .filter((format) => fontUsage.subsets[format])
78
+ // Cache base64-encoded data URIs keyed by the underlying Buffer. Subset
79
+ // buffers are shared across pages (propagated from the canonical fontUsage),
80
+ // so without this every page re-encodes the same multi-hundred-KB buffer.
81
+ const subsetDataUrlCache = new WeakMap();
82
+ function getSubsetDataUrls(subsetsObj) {
83
+ let cached = subsetDataUrlCache.get(subsetsObj);
84
+ if (cached) return cached;
85
+ cached = fontOrder
86
+ .filter((format) => subsetsObj[format])
81
87
  .map((format) => ({
82
88
  format,
83
- url: `data:${contentTypeByFontFormat[format]};base64,${fontUsage.subsets[
89
+ url: `data:${contentTypeByFontFormat[format]};base64,${subsetsObj[
84
90
  format
85
91
  ].toString('base64')}`,
86
92
  }));
93
+ subsetDataUrlCache.set(subsetsObj, cached);
94
+ return cached;
95
+ }
96
+
97
+ function getFontFaceForFontUsage(fontUsage) {
98
+ const subsets = getSubsetDataUrls(fontUsage.subsets);
87
99
 
88
100
  const resultString = ['@font-face {'];
89
101
 
@@ -3,18 +3,17 @@ module.exports = function gatherStylesheetsWithPredicates(
3
3
  htmlAsset,
4
4
  relationIndex
5
5
  ) {
6
- const assetStack = [];
6
+ const visiting = new Set();
7
7
  const incomingMedia = [];
8
8
  const conditionalCommentConditionStack = [];
9
9
  const result = [];
10
10
  (function traverse(asset, isWithinNotIeConditionalComment, isWithinNoscript) {
11
- if (assetStack.includes(asset)) {
12
- // Cycle detected
11
+ if (visiting.has(asset)) {
13
12
  return;
14
13
  } else if (!asset.isLoaded) {
15
14
  return;
16
15
  }
17
- assetStack.push(asset);
16
+ visiting.add(asset);
18
17
  // Use pre-built index if available, otherwise fall back to findRelations
19
18
  const relations = relationIndex
20
19
  ? relationIndex.get(asset) || []
@@ -60,7 +59,7 @@ module.exports = function gatherStylesheetsWithPredicates(
60
59
  }
61
60
  }
62
61
  }
63
- assetStack.pop();
62
+ visiting.delete(asset);
64
63
  if (asset.type === 'Css') {
65
64
  const predicates = {};
66
65
  for (const incomingMedium of incomingMedia) {
@@ -34,7 +34,7 @@ function isValidWeight(weight) {
34
34
  function normalizeFontPropertyValue(propName, value) {
35
35
  const propNameLowerCase = propName.toLowerCase();
36
36
  if (value === undefined) {
37
- return initialValueByProp[propName];
37
+ return initialValueByProp[propNameLowerCase];
38
38
  }
39
39
  if (propNameLowerCase === 'font-family') {
40
40
  return unquote(value);
package/lib/sfntCache.js CHANGED
@@ -1,4 +1,5 @@
1
1
  const fontverter = require('fontverter');
2
+ const { convert } = require('./fontConverter');
2
3
 
3
4
  const sfntPromiseByBuffer = new WeakMap();
4
5
 
@@ -9,13 +10,15 @@ function toSfnt(buffer) {
9
10
  let promise;
10
11
  try {
11
12
  const format = fontverter.detectFormat(buffer);
12
- promise =
13
- format === 'sfnt'
14
- ? Promise.resolve(buffer)
15
- : fontverter.convert(buffer, 'sfnt');
16
- } catch (err) {
17
- // Unrecognized format — don't cache so retries work
18
- return fontverter.convert(buffer, 'sfnt');
13
+ if (format === 'sfnt') {
14
+ promise = Promise.resolve(buffer);
15
+ } else if (format === 'woff2') {
16
+ promise = convert(buffer, 'sfnt');
17
+ } else {
18
+ promise = fontverter.convert(buffer, 'sfnt');
19
+ }
20
+ } catch {
21
+ promise = convert(buffer, 'sfnt');
19
22
  }
20
23
  // Evict on rejection so retries with the same buffer aren't stuck
21
24
  promise.catch(() => sfntPromiseByBuffer.delete(buffer));
package/lib/subfont.js CHANGED
@@ -370,22 +370,63 @@ module.exports = async function subfont(
370
370
 
371
371
  const reportingPhase = trackPhase('output reporting');
372
372
  if (debug) {
373
- const compactFontInfo = fontInfo.map(({ fontUsages, ...rest }) => ({
374
- ...rest,
375
- fontUsages: fontUsages.map(({ codepoints, texts, ...fu }) => ({
376
- ...fu,
377
- codepoints: codepoints
378
- ? {
379
- original: `[${codepoints.original.length} codepoints]`,
380
- used: `[${codepoints.used.length} codepoints]`,
381
- unused: `[${codepoints.unused.length} codepoints]`,
382
- page: `[${codepoints.page.length} codepoints]`,
383
- }
384
- : undefined,
385
- texts: texts ? `[${texts.length} entries]` : undefined,
386
- })),
387
- }));
388
- log(util.inspect(compactFontInfo, false, 99));
373
+ // One entry per unique (fontUrl, props) variant. A variable-font URL can
374
+ // back multiple variants, so fontUrl alone is too coarse. Codepoint unions
375
+ // and subset sizes are per-font, so the remaining per-page variation
376
+ // worth surfacing is just which pages reference the variant.
377
+ const SAMPLE_PAGES = 5;
378
+ const byVariant = new Map();
379
+ for (const { assetFileName, fontUsages } of fontInfo) {
380
+ for (const fu of fontUsages) {
381
+ const p = fu.props || {};
382
+ const key = [
383
+ fu.fontUrl || '[inline]',
384
+ p['font-family'],
385
+ p['font-weight'],
386
+ p['font-style'],
387
+ p['font-stretch'],
388
+ ].join('\0');
389
+ let entry = byVariant.get(key);
390
+ if (!entry) {
391
+ entry = {
392
+ fontUrl: fu.fontUrl,
393
+ props: fu.props,
394
+ preload: fu.preload,
395
+ fullyInstanced: fu.fullyInstanced,
396
+ numAxesPinned: fu.numAxesPinned,
397
+ numAxesReduced: fu.numAxesReduced,
398
+ smallestOriginalFormat: fu.smallestOriginalFormat,
399
+ smallestSubsetFormat: fu.smallestSubsetFormat,
400
+ smallestOriginalSize: fu.smallestOriginalSize,
401
+ smallestSubsetSize: fu.smallestSubsetSize,
402
+ codepoints: fu.codepoints
403
+ ? {
404
+ original: fu.codepoints.original.length,
405
+ used: fu.codepoints.used.length,
406
+ unused: fu.codepoints.unused.length,
407
+ }
408
+ : undefined,
409
+ pageCount: 0,
410
+ samplePages: [],
411
+ };
412
+ byVariant.set(key, entry);
413
+ }
414
+ entry.pageCount += 1;
415
+ if (entry.samplePages.length < SAMPLE_PAGES) {
416
+ entry.samplePages.push(assetFileName);
417
+ }
418
+ }
419
+ }
420
+ for (const entry of byVariant.values()) {
421
+ const remaining = entry.pageCount - entry.samplePages.length;
422
+ if (remaining > 0) {
423
+ entry.samplePages.push(`...and ${remaining} more`);
424
+ }
425
+ }
426
+ log(
427
+ `Font variants (aggregated across ${fontInfo.length} page${fontInfo.length === 1 ? '' : 's'}):`
428
+ );
429
+ log(util.inspect([...byVariant.values()], false, 99));
389
430
  }
390
431
 
391
432
  let totalSavings = sumSizesBefore - sumSizesAfter;
@@ -2,6 +2,7 @@ const os = require('os');
2
2
  const { readFile } = require('fs').promises;
3
3
  const fontverter = require('fontverter');
4
4
  const { toSfnt } = require('./sfntCache');
5
+ const { convert: convertInWorker } = require('./fontConverter');
5
6
 
6
7
  // hb_subset_sets_t enum values — https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-subset.h
7
8
  const HB_SUBSET_SETS_GLYPH_INDEX = 0;
@@ -51,6 +52,7 @@ async function initPool() {
51
52
 
52
53
  // Waiters queue: callers waiting for an idle WASM instance.
53
54
  const _waiters = [];
55
+ const ACQUIRE_TIMEOUT_MS = 120_000;
54
56
 
55
57
  async function acquireInstance() {
56
58
  await initPool();
@@ -60,7 +62,23 @@ async function acquireInstance() {
60
62
  return idle;
61
63
  }
62
64
  // All instances busy — wait for one to be released.
63
- return new Promise((resolve) => _waiters.push(resolve));
65
+ return new Promise((resolve, reject) => {
66
+ const timer = setTimeout(() => {
67
+ const idx = _waiters.indexOf(entry);
68
+ if (idx !== -1) _waiters.splice(idx, 1);
69
+ reject(
70
+ new Error(
71
+ `Timed out waiting for a WASM subsetting instance after ${ACQUIRE_TIMEOUT_MS}ms`
72
+ )
73
+ );
74
+ }, ACQUIRE_TIMEOUT_MS);
75
+ timer.unref();
76
+ const entry = (inst) => {
77
+ clearTimeout(timer);
78
+ resolve(inst);
79
+ };
80
+ _waiters.push(entry);
81
+ });
64
82
  }
65
83
 
66
84
  function releaseInstance(inst) {
@@ -71,16 +89,11 @@ function releaseInstance(inst) {
71
89
  }
72
90
  }
73
91
 
74
- // Serialize fontverter.convert calls the wawoff2 module (used internally by
75
- // fontverter for WOFF2 compression) has a shared WASM instance whose memory
76
- // is corrupted by concurrent calls. Only woff2 paths need this; woff and
77
- // truetype conversions use separate synchronous/JS libraries that are safe
78
- // to call in parallel.
79
- const convertLimiter = require('p-limit')(1);
80
-
81
- function usesWawoff2(buffer) {
82
- return buffer.length >= 4 && buffer.toString('ascii', 0, 4) === 'wOF2';
83
- }
92
+ // woff2 encode/decode uses wawoff2's WASM module, which has a shared
93
+ // instance that corrupts memory under concurrent use. Instead of
94
+ // serializing to p-limit(1) in the main thread, we route woff2
95
+ // operations through fontConverterPool each worker thread loads its
96
+ // own wawoff2 instance, enabling safe parallel compression.
84
97
 
85
98
  // Re-create on every call — WASM memory.buffer is detached when memory grows,
86
99
  // so a cached Uint8Array would silently read/write stale data.
@@ -88,9 +101,11 @@ function getHeapu8(exports) {
88
101
  return new Uint8Array(exports.memory.buffer);
89
102
  }
90
103
 
104
+ // >>> 0 keeps the accumulator unsigned; without it, tags whose first byte
105
+ // exceeds 0x7F would overflow into negative i32 territory after << 24.
91
106
  function HB_TAG(str) {
92
107
  return str.split('').reduce(function (a, ch) {
93
- return (a << 8) + ch.charCodeAt(0);
108
+ return ((a << 8) >>> 0) + ch.charCodeAt(0);
94
109
  }, 0);
95
110
  }
96
111
 
@@ -238,11 +253,8 @@ async function subsetFontWithGlyphs(
238
253
  ) {
239
254
  // Reuse cached sfnt conversion when available (same buffer may have
240
255
  // been converted by getFontInfo or collectFeatureGlyphIds already).
241
- // Serialize through convertLimiter when the source is woff2 — toSfnt
242
- // would call wawoff2.decompress which isn't concurrency-safe.
243
- const ttf = usesWawoff2(originalFont)
244
- ? await convertLimiter(() => toSfnt(originalFont))
245
- : await toSfnt(originalFont);
256
+ // sfntCache routes woff2 decompression through the worker pool.
257
+ const ttf = await toSfnt(originalFont);
246
258
 
247
259
  const inst = await acquireInstance();
248
260
  const { exports } = inst;
@@ -287,12 +299,11 @@ async function subsetFontWithGlyphs(
287
299
  released = true;
288
300
  releaseInstance(inst);
289
301
 
290
- // Only serialize through convertLimiter when targeting woff2
291
- // woff and truetype conversions don't use wawoff2.
302
+ // Route woff2 compression to a worker thread (each spawns its own
303
+ // wawoff2 WASM instance). Non-woff2 formats use JS-based converters
304
+ // that are safe to call concurrently in the main thread.
292
305
  return targetFormat === 'woff2'
293
- ? convertLimiter(() =>
294
- fontverter.convert(subsetFont, targetFormat, 'truetype')
295
- )
306
+ ? convertInWorker(subsetFont, targetFormat, 'truetype')
296
307
  : fontverter.convert(subsetFont, targetFormat, 'truetype');
297
308
  } finally {
298
309
  if (!released) releaseInstance(inst);