scribe.js-ocr 0.7.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build-deno-compile.sh +30 -0
- package/cli/cli.js +46 -18
- package/cli/detectPDFType.js +1 -2
- package/cli/extract.js +14 -7
- package/cli/main.js +39 -39
- package/cli/require.js +1 -1
- package/cli/scribe.js +12 -11
- package/fonts/Dingbats.woff +0 -0
- package/fonts/all/URWGothicBook-Bold.woff +0 -0
- package/fonts/all/URWGothicBook-BoldItalic.woff +0 -0
- package/fonts/all/URWGothicBook-Italic.woff +0 -0
- package/fonts/all/URWGothicBook-Regular.woff +0 -0
- package/fonts/latin/URWGothicBook-Bold.woff +0 -0
- package/fonts/latin/URWGothicBook-BoldItalic.woff +0 -0
- package/fonts/latin/URWGothicBook-Italic.woff +0 -0
- package/fonts/latin/URWGothicBook-Regular.woff +0 -0
- package/js/canvasAdapter.js +4 -1
- package/js/clear.js +7 -8
- package/js/containers/app.js +2 -0
- package/js/containers/dataContainer.js +1 -4
- package/js/containers/fontContainer.js +59 -44
- package/js/containers/imageContainer.js +13 -35
- package/js/coordinates.js +3 -3
- package/js/debug.js +2 -2
- package/js/export/export.js +103 -18
- package/js/export/exportDebugCsv.js +4 -3
- package/js/export/pdf/writePdf.js +389 -0
- package/js/export/{writePdfFonts.js → pdf/writePdfFonts.js} +16 -12
- package/js/export/pdf/writePdfImages.js +218 -0
- package/js/export/{writePdf.js → pdf/writePdfText.js} +28 -315
- package/js/export/writeDocx.js +12 -5
- package/js/export/writeHocr.js +11 -10
- package/js/export/writeHtml.js +208 -48
- package/js/export/writeTabular.js +31 -20
- package/js/export/writeText.js +12 -10
- package/js/fontContainerMain.js +101 -50
- package/js/fontEval.js +18 -14
- package/js/fontStatistics.js +90 -90
- package/js/generalWorkerMain.js +52 -6
- package/js/global.d.ts +178 -6
- package/js/import/convertDocTextract.js +447 -0
- package/js/import/convertPageAbbyy.js +10 -4
- package/js/import/convertPageBlocks.js +4 -4
- package/js/import/convertPageGoogleVision.js +204 -0
- package/js/import/convertPageHocr.js +3 -3
- package/js/import/convertPageShared.js +1 -0
- package/js/import/convertPageStext.js +18 -10
- package/js/import/convertPageText.js +289 -0
- package/js/import/import.js +133 -125
- package/js/import/importOCR.js +98 -46
- package/js/import/nodeAdapter.js +2 -2
- package/js/modifyOCR.js +6 -5
- package/js/nudge.js +3 -3
- package/js/objects/{fontMetricsObjects.js → charMetricsObjects.js} +12 -12
- package/js/objects/imageObjects.js +3 -2
- package/js/objects/layoutObjects.js +37 -0
- package/js/objects/ocrObjects.js +51 -3
- package/js/recognizeConvert.js +74 -23
- package/js/utils/fontUtils.js +32 -1
- package/js/utils/imageUtils.js +99 -0
- package/js/utils/miscUtils.js +158 -9
- package/js/utils/reflowPars.js +4 -0
- package/js/worker/compareOCRModule.js +20 -18
- package/js/worker/generalWorker.js +12 -6
- package/js/worker/optimizeFontModule.js +19 -19
- package/mupdf/libmupdf.js +3 -3
- package/mupdf/libmupdf.wasm +0 -0
- package/mupdf/mupdf-async.js +1 -1
- package/mupdf/mupdf-worker.js +9 -4
- package/package.json +7 -4
- package/scribe.js +5 -5
- package/tess/tesseract.esm.min.js +1 -1
- package/tess/tesseract.min.js +1 -1
- package/tess/worker.min.js +1 -1
package/js/fontContainerMain.js
CHANGED
|
@@ -15,9 +15,9 @@ import { gs } from './generalWorkerMain.js';
|
|
|
15
15
|
*/
|
|
16
16
|
export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
|
|
17
17
|
// Return early if the font set is already loaded, or a superset of the requested set is loaded.
|
|
18
|
-
if (FontCont.glyphSet === glyphSet || FontCont.glyphSet === 'all' && glyphSet === 'latin') return;
|
|
18
|
+
if (FontCont.state.glyphSet === glyphSet || FontCont.state.glyphSet === 'all' && glyphSet === 'latin') return;
|
|
19
19
|
|
|
20
|
-
FontCont.glyphSet = glyphSet;
|
|
20
|
+
FontCont.state.glyphSet = glyphSet;
|
|
21
21
|
|
|
22
22
|
// Note: this function is intentionally verbose, and should not be refactored to generate the paths dynamically.
|
|
23
23
|
// Build systems will not be able to resolve the paths if they are generated dynamically.
|
|
@@ -49,6 +49,10 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
|
|
|
49
49
|
let /** @type {Promise<ArrayBuffer>} */nimbusMonoItalic;
|
|
50
50
|
let /** @type {Promise<ArrayBuffer>} */nimbusMonoBold;
|
|
51
51
|
let /** @type {Promise<ArrayBuffer>} */nimbusMonoBoldItalic;
|
|
52
|
+
let /** @type {Promise<ArrayBuffer>} */gothicNormal;
|
|
53
|
+
let /** @type {Promise<ArrayBuffer>} */gothicItalic;
|
|
54
|
+
let /** @type {Promise<ArrayBuffer>} */gothicBold;
|
|
55
|
+
let /** @type {Promise<ArrayBuffer>} */gothicBoldItalic;
|
|
52
56
|
if (typeof process === 'undefined') {
|
|
53
57
|
if (glyphSet === 'latin') {
|
|
54
58
|
carlitoNormal = fetch(new URL('../fonts/latin/Carlito-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
@@ -79,6 +83,10 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
|
|
|
79
83
|
nimbusMonoItalic = fetch(new URL('../fonts/latin/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
80
84
|
nimbusMonoBold = fetch(new URL('../fonts/latin/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
81
85
|
nimbusMonoBoldItalic = fetch(new URL('../fonts/latin/NimbusMono-BoldItalic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
86
|
+
gothicNormal = fetch(new URL('../fonts/latin/URWGothicBook-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
87
|
+
gothicItalic = fetch(new URL('../fonts/latin/URWGothicBook-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
88
|
+
gothicBold = fetch(new URL('../fonts/latin/URWGothicBook-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
89
|
+
gothicBoldItalic = fetch(new URL('../fonts/latin/URWGothicBook-BoldItalic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
82
90
|
} else {
|
|
83
91
|
carlitoNormal = fetch(new URL('../fonts/all/Carlito-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
84
92
|
carlitoItalic = fetch(new URL('../fonts/all/Carlito-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
@@ -108,9 +116,13 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
|
|
|
108
116
|
nimbusMonoItalic = fetch(new URL('../fonts/all/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
109
117
|
nimbusMonoBold = fetch(new URL('../fonts/all/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
110
118
|
nimbusMonoBoldItalic = fetch(new URL('../fonts/all/NimbusMono-BoldItalic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
119
|
+
gothicNormal = fetch(new URL('../fonts/all/URWGothicBook-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
120
|
+
gothicItalic = fetch(new URL('../fonts/all/URWGothicBook-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
121
|
+
gothicBold = fetch(new URL('../fonts/all/URWGothicBook-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
122
|
+
gothicBoldItalic = fetch(new URL('../fonts/all/URWGothicBook-BoldItalic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
111
123
|
}
|
|
112
124
|
} else {
|
|
113
|
-
const { readFile } = await import('fs/promises');
|
|
125
|
+
const { readFile } = await import('node:fs/promises');
|
|
114
126
|
carlitoNormal = readFile(new URL('../fonts/all/Carlito-Regular.woff', import.meta.url)).then((res) => res.buffer);
|
|
115
127
|
carlitoItalic = readFile(new URL('../fonts/all/Carlito-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
116
128
|
carlitoBold = readFile(new URL('../fonts/all/Carlito-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
@@ -139,6 +151,10 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
|
|
|
139
151
|
nimbusMonoItalic = readFile(new URL('../fonts/all/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
140
152
|
nimbusMonoBold = readFile(new URL('../fonts/all/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
141
153
|
nimbusMonoBoldItalic = readFile(new URL('../fonts/all/NimbusMono-BoldItalic.woff', import.meta.url)).then((res) => res.buffer);
|
|
154
|
+
gothicNormal = readFile(new URL('../fonts/all/URWGothicBook-Regular.woff', import.meta.url)).then((res) => res.buffer);
|
|
155
|
+
gothicItalic = readFile(new URL('../fonts/all/URWGothicBook-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
156
|
+
gothicBold = readFile(new URL('../fonts/all/URWGothicBook-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
157
|
+
gothicBoldItalic = readFile(new URL('../fonts/all/URWGothicBook-BoldItalic.woff', import.meta.url)).then((res) => res.buffer);
|
|
142
158
|
}
|
|
143
159
|
|
|
144
160
|
const srcObj = {
|
|
@@ -151,6 +167,9 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
|
|
|
151
167
|
Garamond: {
|
|
152
168
|
normal: await garamondNormal, italic: await garamondItalic, bold: await garamondBold, boldItalic: await garamondBoldItalic,
|
|
153
169
|
},
|
|
170
|
+
Gothic: {
|
|
171
|
+
normal: await gothicNormal, italic: await gothicItalic, bold: await gothicBold, boldItalic: await gothicBoldItalic,
|
|
172
|
+
},
|
|
154
173
|
Palatino: {
|
|
155
174
|
normal: await palatinoNormal, italic: await palatinoItalic, bold: await palatinoBold, boldItalic: await palatinoBoldItalic,
|
|
156
175
|
},
|
|
@@ -193,7 +212,7 @@ export async function loadChiSimFont() {
|
|
|
193
212
|
if (typeof process === 'undefined') {
|
|
194
213
|
chiSimSrc = fetch(new URL('../fonts/NotoSansSC-Regular.ttf', import.meta.url)).then((res) => res.arrayBuffer());
|
|
195
214
|
} else {
|
|
196
|
-
const { readFile } = await import('fs/promises');
|
|
215
|
+
const { readFile } = await import('node:fs/promises');
|
|
197
216
|
chiSimSrc = readFile(new URL('../fonts/NotoSansSC-Regular.ttf', import.meta.url)).then((res) => res.buffer);
|
|
198
217
|
}
|
|
199
218
|
|
|
@@ -204,6 +223,35 @@ export async function loadChiSimFont() {
|
|
|
204
223
|
return chiReady;
|
|
205
224
|
}
|
|
206
225
|
|
|
226
|
+
let dingbatsReadyRes;
|
|
227
|
+
let dingbatsReady;
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Loads dingbats font. Returns early if already loaded.
|
|
231
|
+
*/
|
|
232
|
+
export async function loadDingbatsFont() {
|
|
233
|
+
console.log('Loading Dingbats font');
|
|
234
|
+
if (dingbatsReady) return dingbatsReady;
|
|
235
|
+
|
|
236
|
+
dingbatsReady = new Promise((resolve, reject) => {
|
|
237
|
+
dingbatsReadyRes = resolve;
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
let /** @type {Promise<ArrayBuffer>} */ dingbatsSrc;
|
|
241
|
+
if (typeof process === 'undefined') {
|
|
242
|
+
dingbatsSrc = fetch(new URL('../fonts/Dingbats.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
243
|
+
} else {
|
|
244
|
+
const { readFile } = await import('node:fs/promises');
|
|
245
|
+
dingbatsSrc = readFile(new URL('../fonts/Dingbats.woff', import.meta.url)).then((res) => res.buffer);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
FontCont.supp.dingbats = await loadFont('Dingbats', 'normal', 'sans', await dingbatsSrc, false);
|
|
249
|
+
|
|
250
|
+
dingbatsReadyRes();
|
|
251
|
+
|
|
252
|
+
return dingbatsReady;
|
|
253
|
+
}
|
|
254
|
+
|
|
207
255
|
/**
|
|
208
256
|
* Enable or disable font optimization settings.
|
|
209
257
|
* This function is used rather than exposing the settings using the `opt` object, as these settings exist on the font container in both the main thread and the worker threads.
|
|
@@ -213,15 +261,15 @@ export async function loadChiSimFont() {
|
|
|
213
261
|
export async function enableFontOpt(enableOpt, forceOpt) {
|
|
214
262
|
let change = false;
|
|
215
263
|
if (enableOpt === true || enableOpt === false) {
|
|
216
|
-
if (FontCont.enableOpt !== enableOpt) {
|
|
264
|
+
if (FontCont.state.enableOpt !== enableOpt) {
|
|
217
265
|
change = true;
|
|
218
|
-
FontCont.enableOpt = enableOpt;
|
|
266
|
+
FontCont.state.enableOpt = enableOpt;
|
|
219
267
|
}
|
|
220
268
|
}
|
|
221
269
|
if (forceOpt === true || forceOpt === false) {
|
|
222
|
-
if (FontCont.forceOpt !== forceOpt) {
|
|
270
|
+
if (FontCont.state.forceOpt !== forceOpt) {
|
|
223
271
|
change = true;
|
|
224
|
-
FontCont.forceOpt = forceOpt;
|
|
272
|
+
FontCont.state.forceOpt = forceOpt;
|
|
225
273
|
}
|
|
226
274
|
}
|
|
227
275
|
|
|
@@ -286,11 +334,11 @@ export async function updateFontContWorkerMain(params = {}) {
|
|
|
286
334
|
const res = worker.updateFontContWorker({
|
|
287
335
|
rawMetrics: FontCont.rawMetrics,
|
|
288
336
|
optMetrics: FontCont.optMetrics,
|
|
289
|
-
sansDefaultName: FontCont.sansDefaultName,
|
|
290
|
-
serifDefaultName: FontCont.serifDefaultName,
|
|
291
|
-
defaultFontName: FontCont.defaultFontName,
|
|
292
|
-
enableOpt: FontCont.enableOpt,
|
|
293
|
-
forceOpt: FontCont.forceOpt,
|
|
337
|
+
sansDefaultName: FontCont.state.sansDefaultName,
|
|
338
|
+
serifDefaultName: FontCont.state.serifDefaultName,
|
|
339
|
+
defaultFontName: FontCont.state.defaultFontName,
|
|
340
|
+
enableOpt: FontCont.state.enableOpt,
|
|
341
|
+
forceOpt: FontCont.state.forceOpt,
|
|
294
342
|
});
|
|
295
343
|
resArr.push(res);
|
|
296
344
|
}
|
|
@@ -336,11 +384,11 @@ export async function setUploadFontsWorker(scheduler) {
|
|
|
336
384
|
const res = worker.updateFontContWorker({
|
|
337
385
|
rawMetrics: FontCont.rawMetrics,
|
|
338
386
|
optMetrics: FontCont.optMetrics,
|
|
339
|
-
sansDefaultName: FontCont.sansDefaultName,
|
|
340
|
-
serifDefaultName: FontCont.serifDefaultName,
|
|
341
|
-
defaultFontName: FontCont.defaultFontName,
|
|
342
|
-
enableOpt: FontCont.enableOpt,
|
|
343
|
-
forceOpt: FontCont.forceOpt,
|
|
387
|
+
sansDefaultName: FontCont.state.sansDefaultName,
|
|
388
|
+
serifDefaultName: FontCont.state.serifDefaultName,
|
|
389
|
+
defaultFontName: FontCont.state.defaultFontName,
|
|
390
|
+
enableOpt: FontCont.state.enableOpt,
|
|
391
|
+
forceOpt: FontCont.state.forceOpt,
|
|
344
392
|
});
|
|
345
393
|
resArr.push(res);
|
|
346
394
|
}
|
|
@@ -351,23 +399,23 @@ export async function setUploadFontsWorker(scheduler) {
|
|
|
351
399
|
* Automatically sets the default font to whatever font is most common in the provided font metrics.
|
|
352
400
|
*
|
|
353
401
|
*/
|
|
354
|
-
export function setDefaultFontAuto(
|
|
355
|
-
const multiFontMode = checkMultiFontMode(
|
|
402
|
+
export function setDefaultFontAuto(charMetricsObj) {
|
|
403
|
+
const multiFontMode = checkMultiFontMode(charMetricsObj);
|
|
356
404
|
|
|
357
405
|
// Return early if the OCR data does not contain font info.
|
|
358
406
|
if (!multiFontMode) return;
|
|
359
407
|
|
|
360
408
|
// Change default font to whatever named font appears more
|
|
361
|
-
if ((
|
|
362
|
-
FontCont.defaultFontName = 'SerifDefault';
|
|
409
|
+
if ((charMetricsObj.SerifDefault?.obs || 0) > (charMetricsObj.SansDefault?.obs || 0)) {
|
|
410
|
+
FontCont.state.defaultFontName = 'SerifDefault';
|
|
363
411
|
} else {
|
|
364
|
-
FontCont.defaultFontName = 'SansDefault';
|
|
412
|
+
FontCont.state.defaultFontName = 'SansDefault';
|
|
365
413
|
}
|
|
366
414
|
|
|
367
415
|
if (gs.schedulerInner) {
|
|
368
416
|
for (let i = 0; i < gs.schedulerInner.workers.length; i++) {
|
|
369
417
|
const worker = gs.schedulerInner.workers[i];
|
|
370
|
-
worker.updateFontContWorker({ defaultFontName: FontCont.defaultFontName });
|
|
418
|
+
worker.updateFontContWorker({ defaultFontName: FontCont.state.defaultFontName });
|
|
371
419
|
}
|
|
372
420
|
}
|
|
373
421
|
}
|
|
@@ -375,39 +423,39 @@ export function setDefaultFontAuto(fontMetricsObj) {
|
|
|
375
423
|
/**
|
|
376
424
|
*
|
|
377
425
|
* @param {FontContainerFamilyBuiltIn} fontFamily
|
|
378
|
-
* @param {Object.<string,
|
|
426
|
+
* @param {Object.<string, CharMetricsFamily>} charMetricsObj
|
|
379
427
|
*/
|
|
380
|
-
export async function optimizeFontContainerFamily(fontFamily,
|
|
428
|
+
export async function optimizeFontContainerFamily(fontFamily, charMetricsObj) {
|
|
381
429
|
// When we have metrics for individual fonts families, those are used to optimize the appropriate fonts.
|
|
382
430
|
// Otherwise, the "default" metric is applied to whatever font the user has selected as the default font.
|
|
383
|
-
const multiFontMode = checkMultiFontMode(
|
|
384
|
-
let
|
|
431
|
+
const multiFontMode = checkMultiFontMode(charMetricsObj);
|
|
432
|
+
let charMetricsType = 'Default';
|
|
385
433
|
if (multiFontMode) {
|
|
386
434
|
if (fontFamily.normal.type === 'sans') {
|
|
387
|
-
|
|
435
|
+
charMetricsType = 'SansDefault';
|
|
388
436
|
} else {
|
|
389
|
-
|
|
437
|
+
charMetricsType = 'SerifDefault';
|
|
390
438
|
}
|
|
391
439
|
}
|
|
392
440
|
|
|
393
441
|
// If there are no statistics to use for optimization, create "optimized" font by simply copying the raw font without modification.
|
|
394
442
|
// This should only occur when `multiFontMode` is true, but a document contains no sans words or no serif words.
|
|
395
|
-
if (!
|
|
443
|
+
if (!charMetricsObj[charMetricsType] || !charMetricsObj[charMetricsType][fontFamily.normal.style] || charMetricsObj[charMetricsType][fontFamily.normal.style].obs < 200) {
|
|
396
444
|
return null;
|
|
397
445
|
}
|
|
398
446
|
|
|
399
|
-
const metricsNormal =
|
|
400
|
-
const normalOptFont = gs.optimizeFont({ fontData: fontFamily.normal.src,
|
|
447
|
+
const metricsNormal = charMetricsObj[charMetricsType][fontFamily.normal.style];
|
|
448
|
+
const normalOptFont = gs.optimizeFont({ fontData: fontFamily.normal.src, charMetricsObj: metricsNormal, style: fontFamily.normal.style })
|
|
401
449
|
.then(async (x) => {
|
|
402
450
|
const font = await loadOpentype(x.fontData, x.kerningPairs);
|
|
403
451
|
return new FontContainerFont(fontFamily.normal.family, fontFamily.normal.style, x.fontData, true, font);
|
|
404
452
|
});
|
|
405
453
|
|
|
406
|
-
const metricsItalic =
|
|
454
|
+
const metricsItalic = charMetricsObj[charMetricsType][fontFamily.italic.style];
|
|
407
455
|
/** @type {?FontContainerFont|Promise<FontContainerFont>} */
|
|
408
456
|
let italicOptFont = null;
|
|
409
457
|
if (metricsItalic && metricsItalic.obs >= 200) {
|
|
410
|
-
italicOptFont = gs.optimizeFont({ fontData: fontFamily.italic.src,
|
|
458
|
+
italicOptFont = gs.optimizeFont({ fontData: fontFamily.italic.src, charMetricsObj: metricsItalic, style: fontFamily.italic.style })
|
|
411
459
|
.then(async (x) => {
|
|
412
460
|
const font = await loadOpentype(x.fontData, x.kerningPairs);
|
|
413
461
|
return new FontContainerFont(fontFamily.italic.family, fontFamily.italic.style, x.fontData, true, font);
|
|
@@ -424,18 +472,20 @@ export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) {
|
|
|
424
472
|
* Optimize all fonts.
|
|
425
473
|
* If a font cannot be optimized, then the raw font is returned.
|
|
426
474
|
* @param {Object<string, FontContainerFamilyBuiltIn>} fontPrivate
|
|
427
|
-
* @param {Object.<string,
|
|
475
|
+
* @param {Object.<string, CharMetricsFamily>} charMetricsObj
|
|
428
476
|
*/
|
|
429
|
-
export async function optimizeFontContainerAll(fontPrivate,
|
|
430
|
-
const carlitoPromise = optimizeFontContainerFamily(fontPrivate.Carlito,
|
|
431
|
-
const centuryPromise = optimizeFontContainerFamily(fontPrivate.Century,
|
|
432
|
-
const garamondPromise = optimizeFontContainerFamily(fontPrivate.Garamond,
|
|
433
|
-
const
|
|
434
|
-
const
|
|
435
|
-
const
|
|
436
|
-
const
|
|
437
|
-
|
|
438
|
-
|
|
477
|
+
export async function optimizeFontContainerAll(fontPrivate, charMetricsObj) {
|
|
478
|
+
const carlitoPromise = optimizeFontContainerFamily(fontPrivate.Carlito, charMetricsObj);
|
|
479
|
+
const centuryPromise = optimizeFontContainerFamily(fontPrivate.Century, charMetricsObj);
|
|
480
|
+
const garamondPromise = optimizeFontContainerFamily(fontPrivate.Garamond, charMetricsObj);
|
|
481
|
+
const gothicPromise = optimizeFontContainerFamily(fontPrivate.Gothic, charMetricsObj);
|
|
482
|
+
const palatinoPromise = optimizeFontContainerFamily(fontPrivate.Palatino, charMetricsObj);
|
|
483
|
+
const nimbusRomanPromise = optimizeFontContainerFamily(fontPrivate.NimbusRoman, charMetricsObj);
|
|
484
|
+
const nimbusSansPromise = optimizeFontContainerFamily(fontPrivate.NimbusSans, charMetricsObj);
|
|
485
|
+
const nimbusMonoPromise = optimizeFontContainerFamily(fontPrivate.NimbusMono, charMetricsObj);
|
|
486
|
+
|
|
487
|
+
const results = await Promise.all([carlitoPromise, centuryPromise, garamondPromise, gothicPromise,
|
|
488
|
+
palatinoPromise, nimbusRomanPromise, nimbusSansPromise, nimbusMonoPromise]);
|
|
439
489
|
|
|
440
490
|
if (results.every((x) => x === null)) return null;
|
|
441
491
|
|
|
@@ -443,9 +493,10 @@ export async function optimizeFontContainerAll(fontPrivate, fontMetricsObj) {
|
|
|
443
493
|
Carlito: results[0],
|
|
444
494
|
Century: results[1],
|
|
445
495
|
Garamond: results[2],
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
496
|
+
Gothic: results[3],
|
|
497
|
+
Palatino: results[4],
|
|
498
|
+
NimbusRoman: results[5],
|
|
499
|
+
NimbusSans: results[6],
|
|
500
|
+
NimbusMono: results[7],
|
|
450
501
|
};
|
|
451
502
|
}
|
package/js/fontEval.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { pageMetricsAll } from './containers/dataContainer.js';
|
|
2
2
|
import { FontCont } from './containers/fontContainer.js';
|
|
3
3
|
import { ImageCache } from './containers/imageContainer.js';
|
|
4
4
|
import {
|
|
@@ -28,7 +28,7 @@ export async function evalPagesFont(font, pageArr, opt, n = 500) {
|
|
|
28
28
|
font,
|
|
29
29
|
page: pageArr[i],
|
|
30
30
|
binaryImage: imageI,
|
|
31
|
-
pageMetricsObj:
|
|
31
|
+
pageMetricsObj: pageMetricsAll[i],
|
|
32
32
|
opt,
|
|
33
33
|
});
|
|
34
34
|
|
|
@@ -49,6 +49,7 @@ export async function evaluateFonts(pageArr, opt) {
|
|
|
49
49
|
const evalCentury = !!(opt ? FontCont.opt?.Century : FontCont.raw?.Century);
|
|
50
50
|
const evalPalatino = !!(opt ? FontCont.opt?.Palatino : FontCont.raw?.Palatino);
|
|
51
51
|
const evalGaramond = !!(opt ? FontCont.opt?.Garamond : FontCont.raw?.Garamond);
|
|
52
|
+
const evalGothic = !!(opt ? FontCont.opt?.Gothic : FontCont.raw?.Gothic);
|
|
52
53
|
const evalNimbusRoman = !!(opt ? FontCont.opt?.NimbusRoman : FontCont.raw?.NimbusRoman);
|
|
53
54
|
const evalNimbusMono = !!(opt ? FontCont.opt?.NimbusMono : FontCont.raw?.NimbusMono);
|
|
54
55
|
|
|
@@ -58,6 +59,7 @@ export async function evaluateFonts(pageArr, opt) {
|
|
|
58
59
|
century: evalCentury ? evalPagesFont('Century', pageArr, opt) : null,
|
|
59
60
|
palatino: evalPalatino ? evalPagesFont('Palatino', pageArr, opt) : null,
|
|
60
61
|
garamond: evalGaramond ? evalPagesFont('Garamond', pageArr, opt) : null,
|
|
62
|
+
gothic: evalGothic ? evalPagesFont('Gothic', pageArr, opt) : null,
|
|
61
63
|
nimbusRoman: evalNimbusRoman ? evalPagesFont('NimbusRoman', pageArr, opt) : null,
|
|
62
64
|
nimbusMono: evalNimbusMono ? evalPagesFont('NimbusMono', pageArr, opt) : null,
|
|
63
65
|
};
|
|
@@ -68,6 +70,7 @@ export async function evaluateFonts(pageArr, opt) {
|
|
|
68
70
|
century: await fontMetricsPromises.century,
|
|
69
71
|
palatino: await fontMetricsPromises.palatino,
|
|
70
72
|
garamond: await fontMetricsPromises.garamond,
|
|
73
|
+
gothic: await fontMetricsPromises.gothic,
|
|
71
74
|
nimbusRoman: await fontMetricsPromises.nimbusRoman,
|
|
72
75
|
nimbusMono: await fontMetricsPromises.nimbusMono,
|
|
73
76
|
};
|
|
@@ -78,6 +81,7 @@ export async function evaluateFonts(pageArr, opt) {
|
|
|
78
81
|
Century: fontMetricsTmp.century ? fontMetricsTmp.century.metricTotal / fontMetricsTmp.century.wordsTotal : null,
|
|
79
82
|
Palatino: fontMetricsTmp.palatino ? fontMetricsTmp.palatino.metricTotal / fontMetricsTmp.palatino.wordsTotal : null,
|
|
80
83
|
Garamond: fontMetricsTmp.garamond ? fontMetricsTmp.garamond.metricTotal / fontMetricsTmp.garamond.wordsTotal : null,
|
|
84
|
+
Gothic: fontMetricsTmp.gothic ? fontMetricsTmp.gothic.metricTotal / fontMetricsTmp.gothic.wordsTotal : null,
|
|
81
85
|
NimbusRoman: fontMetricsTmp.nimbusRoman ? fontMetricsTmp.nimbusRoman.metricTotal / fontMetricsTmp.nimbusRoman.wordsTotal : null,
|
|
82
86
|
NimbusMono: fontMetricsTmp.nimbusMono ? fontMetricsTmp.nimbusMono.metricTotal / fontMetricsTmp.nimbusMono.wordsTotal : null,
|
|
83
87
|
};
|
|
@@ -94,7 +98,7 @@ const calcBestFonts = (fontMetrics) => {
|
|
|
94
98
|
let minValueSans = Number.MAX_VALUE;
|
|
95
99
|
|
|
96
100
|
for (const [key, value] of Object.entries(fontMetrics)) {
|
|
97
|
-
if (!['Carlito', 'NimbusSans'].includes(key)) continue;
|
|
101
|
+
if (!['Carlito', 'Gothic', 'NimbusSans'].includes(key)) continue;
|
|
98
102
|
if (value && value < minValueSans) {
|
|
99
103
|
minValueSans = value;
|
|
100
104
|
minKeySans = key;
|
|
@@ -132,16 +136,16 @@ const calcBestFonts = (fontMetrics) => {
|
|
|
132
136
|
export async function runFontOptimization(ocrArr) {
|
|
133
137
|
await loadBuiltInFontsRaw();
|
|
134
138
|
|
|
135
|
-
const calculateOpt =
|
|
139
|
+
const calculateOpt = FontCont.state.charMetrics && Object.keys(FontCont.state.charMetrics).length > 0;
|
|
136
140
|
|
|
137
141
|
let enableOptSerif = false;
|
|
138
142
|
let enableOptSans = false;
|
|
139
143
|
|
|
140
144
|
let optimizeFontContainerAllPromise;
|
|
141
145
|
if (calculateOpt) {
|
|
142
|
-
setDefaultFontAuto(
|
|
146
|
+
setDefaultFontAuto(FontCont.state.charMetrics);
|
|
143
147
|
|
|
144
|
-
optimizeFontContainerAllPromise = optimizeFontContainerAll(FontCont.raw,
|
|
148
|
+
optimizeFontContainerAllPromise = optimizeFontContainerAll(FontCont.raw, FontCont.state.charMetrics)
|
|
145
149
|
.then((res) => {
|
|
146
150
|
FontCont.opt = res;
|
|
147
151
|
});
|
|
@@ -167,28 +171,28 @@ export async function runFontOptimization(ocrArr) {
|
|
|
167
171
|
// This ensures that switching on/off "font optimization" does not change the font, which would be confusing.
|
|
168
172
|
if (FontCont.optMetrics[bestMetricsOpt.minKeySans] < FontCont.rawMetrics[bestMetricsRaw.minKeySans]) {
|
|
169
173
|
enableOptSans = true;
|
|
170
|
-
FontCont.sansDefaultName = bestMetricsOpt.minKeySans;
|
|
174
|
+
FontCont.state.sansDefaultName = bestMetricsOpt.minKeySans;
|
|
171
175
|
} else {
|
|
172
|
-
FontCont.sansDefaultName = bestMetricsRaw.minKeySans;
|
|
176
|
+
FontCont.state.sansDefaultName = bestMetricsRaw.minKeySans;
|
|
173
177
|
}
|
|
174
178
|
|
|
175
179
|
// Repeat for serif fonts
|
|
176
180
|
if (FontCont.optMetrics[bestMetricsOpt.minKeySerif] < FontCont.rawMetrics[bestMetricsRaw.minKeySerif]) {
|
|
177
181
|
enableOptSerif = true;
|
|
178
|
-
FontCont.serifDefaultName = bestMetricsOpt.minKeySerif;
|
|
182
|
+
FontCont.state.serifDefaultName = bestMetricsOpt.minKeySerif;
|
|
179
183
|
} else {
|
|
180
|
-
FontCont.serifDefaultName = bestMetricsRaw.minKeySerif;
|
|
184
|
+
FontCont.state.serifDefaultName = bestMetricsRaw.minKeySerif;
|
|
181
185
|
}
|
|
182
186
|
} else {
|
|
183
|
-
FontCont.sansDefaultName = bestMetricsRaw.minKeySans;
|
|
184
|
-
FontCont.serifDefaultName = bestMetricsRaw.minKeySerif;
|
|
187
|
+
FontCont.state.sansDefaultName = bestMetricsRaw.minKeySans;
|
|
188
|
+
FontCont.state.serifDefaultName = bestMetricsRaw.minKeySerif;
|
|
185
189
|
}
|
|
186
190
|
|
|
187
|
-
FontCont.enableOpt = enableOptSerif || enableOptSans;
|
|
191
|
+
FontCont.state.enableOpt = enableOptSerif || enableOptSans;
|
|
188
192
|
|
|
189
193
|
// Send updated state to all workers.
|
|
190
194
|
await updateFontContWorkerMain();
|
|
191
195
|
}
|
|
192
196
|
|
|
193
|
-
return FontCont.enableOpt;
|
|
197
|
+
return FontCont.state.enableOpt;
|
|
194
198
|
}
|