@turntrout/subfont 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -43
- package/lib/FontTracerPool.js +49 -1
- package/lib/HeadlessBrowser.js +11 -3
- package/lib/collectTextsByPage.js +425 -352
- package/lib/escapeJsStringLiteral.js +13 -0
- package/lib/extractVisibleText.js +6 -2
- package/lib/fontConverter.js +25 -0
- package/lib/fontConverterWorker.js +16 -0
- package/lib/fontFaceHelpers.js +16 -4
- package/lib/gatherStylesheetsWithPredicates.js +4 -5
- package/lib/normalizeFontPropertyValue.js +1 -1
- package/lib/sfntCache.js +10 -7
- package/lib/subfont.js +57 -16
- package/lib/subsetFontWithGlyphs.js +33 -22
- package/lib/subsetFonts.js +166 -123
- package/lib/unquote.js +9 -4
- package/lib/warnAboutMissingGlyphs.js +36 -25
- package/lib/wasmQueue.js +6 -2
- package/package.json +1 -2
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
// Escape a value for safe inclusion in any JS string context (single-quoted,
|
|
2
|
+
// double-quoted, or template literal). Uses JSON.stringify for robust escaping
|
|
3
|
+
// of backslashes, quotes, newlines, U+2028, U+2029, etc.
|
|
4
|
+
// The < escape prevents </script> from closing an inline script tag.
|
|
5
|
+
function escapeJsStringLiteral(str) {
|
|
6
|
+
return JSON.stringify(str)
|
|
7
|
+
.slice(1, -1)
|
|
8
|
+
.replace(/'/g, "\\'")
|
|
9
|
+
.replace(/`/g, '\\x60')
|
|
10
|
+
.replace(/</g, '\\x3c');
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
module.exports = escapeJsStringLiteral;
|
|
@@ -81,8 +81,12 @@ const namedEntities = {
|
|
|
81
81
|
const entityRe = /&(?:#x([0-9a-fA-F]+)|#(\d+)|([a-zA-Z]+));/g;
|
|
82
82
|
function decodeEntities(str) {
|
|
83
83
|
return str.replace(entityRe, (match, hex, dec, name) => {
|
|
84
|
-
|
|
85
|
-
|
|
84
|
+
try {
|
|
85
|
+
if (hex) return String.fromCodePoint(parseInt(hex, 16));
|
|
86
|
+
if (dec) return String.fromCodePoint(parseInt(dec, 10));
|
|
87
|
+
} catch {
|
|
88
|
+
return match;
|
|
89
|
+
}
|
|
86
90
|
if (name && namedEntities[name.toLowerCase()] !== undefined) {
|
|
87
91
|
return namedEntities[name.toLowerCase()];
|
|
88
92
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
const pathModule = require('path');
|
|
2
|
+
const { Worker } = require('worker_threads');
|
|
3
|
+
|
|
4
|
+
const workerPath = pathModule.join(__dirname, 'fontConverterWorker.js');
|
|
5
|
+
|
|
6
|
+
function convert(buffer, targetFormat, sourceFormat) {
|
|
7
|
+
return new Promise((resolve, reject) => {
|
|
8
|
+
const worker = new Worker(workerPath);
|
|
9
|
+
worker.on('message', (msg) => {
|
|
10
|
+
worker.terminate();
|
|
11
|
+
if (msg.type === 'result') {
|
|
12
|
+
resolve(Buffer.from(msg.buffer));
|
|
13
|
+
} else {
|
|
14
|
+
reject(new Error(msg.error));
|
|
15
|
+
}
|
|
16
|
+
});
|
|
17
|
+
worker.on('error', (err) => {
|
|
18
|
+
worker.terminate();
|
|
19
|
+
reject(err);
|
|
20
|
+
});
|
|
21
|
+
worker.postMessage({ buffer, targetFormat, sourceFormat });
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
module.exports = { convert };
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
const { parentPort } = require('worker_threads');
|
|
2
|
+
const fontverter = require('fontverter');
|
|
3
|
+
|
|
4
|
+
parentPort.on('message', async (msg) => {
|
|
5
|
+
try {
|
|
6
|
+
const buffer = Buffer.from(msg.buffer);
|
|
7
|
+
const result = await fontverter.convert(
|
|
8
|
+
buffer,
|
|
9
|
+
msg.targetFormat,
|
|
10
|
+
msg.sourceFormat
|
|
11
|
+
);
|
|
12
|
+
parentPort.postMessage({ type: 'result', buffer: result });
|
|
13
|
+
} catch (err) {
|
|
14
|
+
parentPort.postMessage({ type: 'error', error: err.message });
|
|
15
|
+
}
|
|
16
|
+
});
|
package/lib/fontFaceHelpers.js
CHANGED
|
@@ -75,15 +75,27 @@ function getFontFaceDeclarationText(node, relations) {
|
|
|
75
75
|
|
|
76
76
|
const fontOrder = ['woff2', 'woff', 'truetype'];
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
78
|
+
// Cache base64-encoded data URIs keyed by the underlying Buffer. Subset
|
|
79
|
+
// buffers are shared across pages (propagated from the canonical fontUsage),
|
|
80
|
+
// so without this every page re-encodes the same multi-hundred-KB buffer.
|
|
81
|
+
const subsetDataUrlCache = new WeakMap();
|
|
82
|
+
function getSubsetDataUrls(subsetsObj) {
|
|
83
|
+
let cached = subsetDataUrlCache.get(subsetsObj);
|
|
84
|
+
if (cached) return cached;
|
|
85
|
+
cached = fontOrder
|
|
86
|
+
.filter((format) => subsetsObj[format])
|
|
81
87
|
.map((format) => ({
|
|
82
88
|
format,
|
|
83
|
-
url: `data:${contentTypeByFontFormat[format]};base64,${
|
|
89
|
+
url: `data:${contentTypeByFontFormat[format]};base64,${subsetsObj[
|
|
84
90
|
format
|
|
85
91
|
].toString('base64')}`,
|
|
86
92
|
}));
|
|
93
|
+
subsetDataUrlCache.set(subsetsObj, cached);
|
|
94
|
+
return cached;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function getFontFaceForFontUsage(fontUsage) {
|
|
98
|
+
const subsets = getSubsetDataUrls(fontUsage.subsets);
|
|
87
99
|
|
|
88
100
|
const resultString = ['@font-face {'];
|
|
89
101
|
|
|
@@ -3,18 +3,17 @@ module.exports = function gatherStylesheetsWithPredicates(
|
|
|
3
3
|
htmlAsset,
|
|
4
4
|
relationIndex
|
|
5
5
|
) {
|
|
6
|
-
const
|
|
6
|
+
const visiting = new Set();
|
|
7
7
|
const incomingMedia = [];
|
|
8
8
|
const conditionalCommentConditionStack = [];
|
|
9
9
|
const result = [];
|
|
10
10
|
(function traverse(asset, isWithinNotIeConditionalComment, isWithinNoscript) {
|
|
11
|
-
if (
|
|
12
|
-
// Cycle detected
|
|
11
|
+
if (visiting.has(asset)) {
|
|
13
12
|
return;
|
|
14
13
|
} else if (!asset.isLoaded) {
|
|
15
14
|
return;
|
|
16
15
|
}
|
|
17
|
-
|
|
16
|
+
visiting.add(asset);
|
|
18
17
|
// Use pre-built index if available, otherwise fall back to findRelations
|
|
19
18
|
const relations = relationIndex
|
|
20
19
|
? relationIndex.get(asset) || []
|
|
@@ -60,7 +59,7 @@ module.exports = function gatherStylesheetsWithPredicates(
|
|
|
60
59
|
}
|
|
61
60
|
}
|
|
62
61
|
}
|
|
63
|
-
|
|
62
|
+
visiting.delete(asset);
|
|
64
63
|
if (asset.type === 'Css') {
|
|
65
64
|
const predicates = {};
|
|
66
65
|
for (const incomingMedium of incomingMedia) {
|
|
@@ -34,7 +34,7 @@ function isValidWeight(weight) {
|
|
|
34
34
|
function normalizeFontPropertyValue(propName, value) {
|
|
35
35
|
const propNameLowerCase = propName.toLowerCase();
|
|
36
36
|
if (value === undefined) {
|
|
37
|
-
return initialValueByProp[
|
|
37
|
+
return initialValueByProp[propNameLowerCase];
|
|
38
38
|
}
|
|
39
39
|
if (propNameLowerCase === 'font-family') {
|
|
40
40
|
return unquote(value);
|
package/lib/sfntCache.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
const fontverter = require('fontverter');
|
|
2
|
+
const { convert } = require('./fontConverter');
|
|
2
3
|
|
|
3
4
|
const sfntPromiseByBuffer = new WeakMap();
|
|
4
5
|
|
|
@@ -9,13 +10,15 @@ function toSfnt(buffer) {
|
|
|
9
10
|
let promise;
|
|
10
11
|
try {
|
|
11
12
|
const format = fontverter.detectFormat(buffer);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
13
|
+
if (format === 'sfnt') {
|
|
14
|
+
promise = Promise.resolve(buffer);
|
|
15
|
+
} else if (format === 'woff2') {
|
|
16
|
+
promise = convert(buffer, 'sfnt');
|
|
17
|
+
} else {
|
|
18
|
+
promise = fontverter.convert(buffer, 'sfnt');
|
|
19
|
+
}
|
|
20
|
+
} catch {
|
|
21
|
+
promise = convert(buffer, 'sfnt');
|
|
19
22
|
}
|
|
20
23
|
// Evict on rejection so retries with the same buffer aren't stuck
|
|
21
24
|
promise.catch(() => sfntPromiseByBuffer.delete(buffer));
|
package/lib/subfont.js
CHANGED
|
@@ -370,22 +370,63 @@ module.exports = async function subfont(
|
|
|
370
370
|
|
|
371
371
|
const reportingPhase = trackPhase('output reporting');
|
|
372
372
|
if (debug) {
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
373
|
+
// One entry per unique (fontUrl, props) variant. A variable-font URL can
|
|
374
|
+
// back multiple variants, so fontUrl alone is too coarse. Codepoint unions
|
|
375
|
+
// and subset sizes are per-font, so the remaining per-page variation
|
|
376
|
+
// worth surfacing is just which pages reference the variant.
|
|
377
|
+
const SAMPLE_PAGES = 5;
|
|
378
|
+
const byVariant = new Map();
|
|
379
|
+
for (const { assetFileName, fontUsages } of fontInfo) {
|
|
380
|
+
for (const fu of fontUsages) {
|
|
381
|
+
const p = fu.props || {};
|
|
382
|
+
const key = [
|
|
383
|
+
fu.fontUrl || '[inline]',
|
|
384
|
+
p['font-family'],
|
|
385
|
+
p['font-weight'],
|
|
386
|
+
p['font-style'],
|
|
387
|
+
p['font-stretch'],
|
|
388
|
+
].join('\0');
|
|
389
|
+
let entry = byVariant.get(key);
|
|
390
|
+
if (!entry) {
|
|
391
|
+
entry = {
|
|
392
|
+
fontUrl: fu.fontUrl,
|
|
393
|
+
props: fu.props,
|
|
394
|
+
preload: fu.preload,
|
|
395
|
+
fullyInstanced: fu.fullyInstanced,
|
|
396
|
+
numAxesPinned: fu.numAxesPinned,
|
|
397
|
+
numAxesReduced: fu.numAxesReduced,
|
|
398
|
+
smallestOriginalFormat: fu.smallestOriginalFormat,
|
|
399
|
+
smallestSubsetFormat: fu.smallestSubsetFormat,
|
|
400
|
+
smallestOriginalSize: fu.smallestOriginalSize,
|
|
401
|
+
smallestSubsetSize: fu.smallestSubsetSize,
|
|
402
|
+
codepoints: fu.codepoints
|
|
403
|
+
? {
|
|
404
|
+
original: fu.codepoints.original.length,
|
|
405
|
+
used: fu.codepoints.used.length,
|
|
406
|
+
unused: fu.codepoints.unused.length,
|
|
407
|
+
}
|
|
408
|
+
: undefined,
|
|
409
|
+
pageCount: 0,
|
|
410
|
+
samplePages: [],
|
|
411
|
+
};
|
|
412
|
+
byVariant.set(key, entry);
|
|
413
|
+
}
|
|
414
|
+
entry.pageCount += 1;
|
|
415
|
+
if (entry.samplePages.length < SAMPLE_PAGES) {
|
|
416
|
+
entry.samplePages.push(assetFileName);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
for (const entry of byVariant.values()) {
|
|
421
|
+
const remaining = entry.pageCount - entry.samplePages.length;
|
|
422
|
+
if (remaining > 0) {
|
|
423
|
+
entry.samplePages.push(`...and ${remaining} more`);
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
log(
|
|
427
|
+
`Font variants (aggregated across ${fontInfo.length} page${fontInfo.length === 1 ? '' : 's'}):`
|
|
428
|
+
);
|
|
429
|
+
log(util.inspect([...byVariant.values()], false, 99));
|
|
389
430
|
}
|
|
390
431
|
|
|
391
432
|
let totalSavings = sumSizesBefore - sumSizesAfter;
|
|
@@ -2,6 +2,7 @@ const os = require('os');
|
|
|
2
2
|
const { readFile } = require('fs').promises;
|
|
3
3
|
const fontverter = require('fontverter');
|
|
4
4
|
const { toSfnt } = require('./sfntCache');
|
|
5
|
+
const { convert: convertInWorker } = require('./fontConverter');
|
|
5
6
|
|
|
6
7
|
// hb_subset_sets_t enum values — https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-subset.h
|
|
7
8
|
const HB_SUBSET_SETS_GLYPH_INDEX = 0;
|
|
@@ -51,6 +52,7 @@ async function initPool() {
|
|
|
51
52
|
|
|
52
53
|
// Waiters queue: callers waiting for an idle WASM instance.
|
|
53
54
|
const _waiters = [];
|
|
55
|
+
const ACQUIRE_TIMEOUT_MS = 120_000;
|
|
54
56
|
|
|
55
57
|
async function acquireInstance() {
|
|
56
58
|
await initPool();
|
|
@@ -60,7 +62,23 @@ async function acquireInstance() {
|
|
|
60
62
|
return idle;
|
|
61
63
|
}
|
|
62
64
|
// All instances busy — wait for one to be released.
|
|
63
|
-
return new Promise((resolve) =>
|
|
65
|
+
return new Promise((resolve, reject) => {
|
|
66
|
+
const timer = setTimeout(() => {
|
|
67
|
+
const idx = _waiters.indexOf(entry);
|
|
68
|
+
if (idx !== -1) _waiters.splice(idx, 1);
|
|
69
|
+
reject(
|
|
70
|
+
new Error(
|
|
71
|
+
`Timed out waiting for a WASM subsetting instance after ${ACQUIRE_TIMEOUT_MS}ms`
|
|
72
|
+
)
|
|
73
|
+
);
|
|
74
|
+
}, ACQUIRE_TIMEOUT_MS);
|
|
75
|
+
timer.unref();
|
|
76
|
+
const entry = (inst) => {
|
|
77
|
+
clearTimeout(timer);
|
|
78
|
+
resolve(inst);
|
|
79
|
+
};
|
|
80
|
+
_waiters.push(entry);
|
|
81
|
+
});
|
|
64
82
|
}
|
|
65
83
|
|
|
66
84
|
function releaseInstance(inst) {
|
|
@@ -71,16 +89,11 @@ function releaseInstance(inst) {
|
|
|
71
89
|
}
|
|
72
90
|
}
|
|
73
91
|
|
|
74
|
-
//
|
|
75
|
-
//
|
|
76
|
-
//
|
|
77
|
-
//
|
|
78
|
-
//
|
|
79
|
-
const convertLimiter = require('p-limit')(1);
|
|
80
|
-
|
|
81
|
-
function usesWawoff2(buffer) {
|
|
82
|
-
return buffer.length >= 4 && buffer.toString('ascii', 0, 4) === 'wOF2';
|
|
83
|
-
}
|
|
92
|
+
// woff2 encode/decode uses wawoff2's WASM module, which has a shared
|
|
93
|
+
// instance that corrupts memory under concurrent use. Instead of
|
|
94
|
+
// serializing to p-limit(1) in the main thread, we route woff2
|
|
95
|
+
// operations through fontConverterPool — each worker thread loads its
|
|
96
|
+
// own wawoff2 instance, enabling safe parallel compression.
|
|
84
97
|
|
|
85
98
|
// Re-create on every call — WASM memory.buffer is detached when memory grows,
|
|
86
99
|
// so a cached Uint8Array would silently read/write stale data.
|
|
@@ -88,9 +101,11 @@ function getHeapu8(exports) {
|
|
|
88
101
|
return new Uint8Array(exports.memory.buffer);
|
|
89
102
|
}
|
|
90
103
|
|
|
104
|
+
// >>> 0 keeps the accumulator unsigned; without it, tags whose first byte
|
|
105
|
+
// exceeds 0x7F would overflow into negative i32 territory after << 24.
|
|
91
106
|
function HB_TAG(str) {
|
|
92
107
|
return str.split('').reduce(function (a, ch) {
|
|
93
|
-
return (a << 8) + ch.charCodeAt(0);
|
|
108
|
+
return ((a << 8) >>> 0) + ch.charCodeAt(0);
|
|
94
109
|
}, 0);
|
|
95
110
|
}
|
|
96
111
|
|
|
@@ -238,11 +253,8 @@ async function subsetFontWithGlyphs(
|
|
|
238
253
|
) {
|
|
239
254
|
// Reuse cached sfnt conversion when available (same buffer may have
|
|
240
255
|
// been converted by getFontInfo or collectFeatureGlyphIds already).
|
|
241
|
-
//
|
|
242
|
-
|
|
243
|
-
const ttf = usesWawoff2(originalFont)
|
|
244
|
-
? await convertLimiter(() => toSfnt(originalFont))
|
|
245
|
-
: await toSfnt(originalFont);
|
|
256
|
+
// sfntCache routes woff2 decompression through the worker pool.
|
|
257
|
+
const ttf = await toSfnt(originalFont);
|
|
246
258
|
|
|
247
259
|
const inst = await acquireInstance();
|
|
248
260
|
const { exports } = inst;
|
|
@@ -287,12 +299,11 @@ async function subsetFontWithGlyphs(
|
|
|
287
299
|
released = true;
|
|
288
300
|
releaseInstance(inst);
|
|
289
301
|
|
|
290
|
-
//
|
|
291
|
-
//
|
|
302
|
+
// Route woff2 compression to a worker thread (each spawns its own
|
|
303
|
+
// wawoff2 WASM instance). Non-woff2 formats use JS-based converters
|
|
304
|
+
// that are safe to call concurrently in the main thread.
|
|
292
305
|
return targetFormat === 'woff2'
|
|
293
|
-
?
|
|
294
|
-
fontverter.convert(subsetFont, targetFormat, 'truetype')
|
|
295
|
-
)
|
|
306
|
+
? convertInWorker(subsetFont, targetFormat, 'truetype')
|
|
296
307
|
: fontverter.convert(subsetFont, targetFormat, 'truetype');
|
|
297
308
|
} finally {
|
|
298
309
|
if (!released) releaseInstance(inst);
|