scribe.js-ocr 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  checkMultiFontMode,
3
- fontAll,
3
+ FontCont,
4
4
  FontContainerFont,
5
5
  loadFont,
6
6
  loadFontsFromSource,
@@ -15,9 +15,9 @@ import { gs } from './generalWorkerMain.js';
15
15
  */
16
16
  export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
17
17
  // Return early if the font set is already loaded, or a superset of the requested set is loaded.
18
- if (fontAll.glyphSet === glyphSet || fontAll.glyphSet === 'all' && glyphSet === 'latin') return;
18
+ if (FontCont.glyphSet === glyphSet || FontCont.glyphSet === 'all' && glyphSet === 'latin') return;
19
19
 
20
- fontAll.glyphSet = glyphSet;
20
+ FontCont.glyphSet = glyphSet;
21
21
 
22
22
  // Note: this function is intentionally verbose, and should not be refactored to generate the paths dynamically.
23
23
  // Build systems will not be able to resolve the paths if they are generated dynamically.
@@ -110,14 +110,14 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
110
110
  NimbusSans: { normal: await nimbusSansNormal, italic: await nimbusSansItalic, bold: await nimbusSansBold },
111
111
  };
112
112
 
113
- fontAll.raw = await /** @type {FontContainer} */(/** @type {any} */(loadFontsFromSource(srcObj)));
114
- if (!fontAll.active || (!fontAll.active.NimbusSans.normal.opt && !fontAll.active.NimbusRomNo9L.normal.opt)) fontAll.active = fontAll.raw;
113
+ FontCont.raw = await /** @type {FontContainer} */(/** @type {any} */(loadFontsFromSource(srcObj)));
115
114
 
116
115
  if (typeof process === 'undefined') {
117
116
  // This assumes that the scheduler `init` method has at least started.
118
117
  if (gs.schedulerReady === null) console.warn('Failed to load fonts to workers as workers have not been initialized yet.');
119
118
  await gs.schedulerReady;
120
- await setBuiltInFontsWorker(gs.schedulerInner, true);
119
+ // If this is running, presumably a new glyphset is being loaded, so the fonts should be forced to be updated.
120
+ await updateFontContWorkerMain({ loadRaw: true });
121
121
  }
122
122
 
123
123
  return;
@@ -144,7 +144,7 @@ export async function loadChiSimFont() {
144
144
  chiSimSrc = readFile(new URL('../fonts/NotoSansSC-Regular.ttf', import.meta.url)).then((res) => res.buffer);
145
145
  }
146
146
 
147
- fontAll.supp.chi_sim = await loadFont('NotoSansSC', 'normal', 'sans', await chiSimSrc, false);
147
+ FontCont.supp.chi_sim = await loadFont('NotoSansSC', 'normal', 'sans', await chiSimSrc, false);
148
148
 
149
149
  chiReadyRes();
150
150
 
@@ -152,102 +152,89 @@ export async function loadChiSimFont() {
152
152
  }
153
153
 
154
154
  /**
155
- *
156
- * @param {boolean} enable
157
- * @param {boolean} [useInitial=false]
158
- * @param {boolean} [forceWorkerUpdate=false] - If true, forces the worker to update the font data even if the font data of this type is already loaded.
159
- * This should be used when switching from unvalidated to validated optimized fonts.
155
+ * Enable or disable font optimization settings.
156
+ * This function is used rather than exposing the settings using the `opt` object, as these settings exist on the font container in both the main thread and the worker threads.
157
+ * @param {boolean} enableOpt
158
+ * @param {boolean} [forceOpt]
160
159
  */
161
- export async function enableFontOpt(enable, useInitial = false, forceWorkerUpdate = false) {
162
- // Enable/disable optimized font
163
- if (enable && useInitial && fontAll.optInitial) {
164
- fontAll.active = fontAll.optInitial;
165
- } else if (enable && fontAll.opt) {
166
- fontAll.active = fontAll.opt;
167
- } else {
168
- fontAll.active = fontAll.raw;
160
+ export async function enableFontOpt(enableOpt, forceOpt) {
161
+ let change = false;
162
+ if (enableOpt === true || enableOpt === false) {
163
+ if (FontCont.enableOpt !== enableOpt) {
164
+ change = true;
165
+ FontCont.enableOpt = enableOpt;
166
+ }
167
+ }
168
+ if (forceOpt === true || forceOpt === false) {
169
+ if (FontCont.forceOpt !== forceOpt) {
170
+ change = true;
171
+ FontCont.forceOpt = forceOpt;
172
+ }
169
173
  }
170
174
 
171
- // Enable/disable optimized font in workers
172
- if (typeof process === 'undefined') {
173
- await setBuiltInFontsWorker(gs.schedulerInner, forceWorkerUpdate);
174
- } else {
175
- // const { setFontAll } = await import('./worker/compareOCRModule.js');
176
- // setFontAll(fontAll);
175
+ if (typeof process === 'undefined' && change) {
176
+ await updateFontContWorkerMain();
177
177
  }
178
178
  }
179
179
 
180
180
  /**
181
- *
182
- * @param {*} scheduler
183
- * @param {boolean} [force=false] - If true, forces the worker to update the font data even if the font data of this type is already loaded.
181
+ * @param {Object} [params]
182
+ * @param {boolean} [params.loadRaw] - By default, raw fonts are loaded if they have not been loaded before.
183
+ * Set `loadRaw` to `true` or `false` to force the raw fonts to be loaded or not loaded, respectively.
184
+ * @param {boolean} [params.loadOpt] - By default, optimized fonts are loaded if they have not been loaded before.
185
+ * Set `loadOpt` to `true` or `false` to force the optimized fonts to be loaded or not loaded, respectively.
186
+ * @param {boolean} [params.updateProps]
184
187
  */
185
- export async function setBuiltInFontsWorker(scheduler, force = false) {
186
- if (!fontAll.active) return;
187
-
188
- const opt = fontAll.active.Carlito.normal.opt || fontAll.active.NimbusRomNo9L.normal.opt;
189
-
190
- const loadedBuiltIn = (!opt && fontAll.loadedBuiltInRawWorker) || (opt && fontAll.loadedBuiltInOptWorker);
188
+ export async function updateFontContWorkerMain(params = {}) {
189
+ const loadRaw = params.loadRaw === true || (params.loadRaw !== false && FontCont.raw && !gs.loadedBuiltInRawWorker);
190
+ const loadOpt = params.loadOpt === true || (params.loadOpt !== false && FontCont.opt && !gs.loadedBuiltInOptWorker);
191
191
 
192
192
  // If the active font data is not already loaded, load it now.
193
193
  // This assumes that only one version of the raw/optimized fonts ever exist--
194
194
  // it does not check whether the current optimized font changed since it was last loaded.
195
- if (!loadedBuiltIn || force) {
195
+ for (const [type, load] of [['raw', loadRaw], ['opt', loadOpt]]) {
196
+ if (!load) continue;
197
+
196
198
  const resArr = [];
197
- for (let i = 0; i < scheduler.workers.length; i++) {
198
- const worker = scheduler.workers[i];
199
- const res = worker.loadFontsWorker({
200
- src: {
201
- Carlito: {
202
- normal: fontAll.active.Carlito.normal.src,
203
- italic: fontAll.active.Carlito.italic.src,
204
- bold: fontAll.active.Carlito.bold.src,
205
- },
206
- Century: {
207
- normal: fontAll.active.Century.normal.src,
208
- italic: fontAll.active.Century.italic.src,
209
- bold: fontAll.active.Century.bold.src,
210
- },
211
- Garamond: {
212
- normal: fontAll.active.Garamond.normal.src,
213
- italic: fontAll.active.Garamond.italic.src,
214
- bold: fontAll.active.Garamond.bold.src,
215
- },
216
- Palatino: {
217
- normal: fontAll.active.Palatino.normal.src,
218
- italic: fontAll.active.Palatino.italic.src,
219
- bold: fontAll.active.Palatino.bold.src,
220
- },
221
- NimbusRomNo9L: {
222
- normal: fontAll.active.NimbusRomNo9L.normal.src,
223
- italic: fontAll.active.NimbusRomNo9L.italic.src,
224
- bold: fontAll.active.NimbusRomNo9L.bold.src,
225
- },
226
- NimbusSans: {
227
- normal: fontAll.active.NimbusSans.normal.src,
228
- italic: fontAll.active.NimbusSans.italic.src,
229
- bold: fontAll.active.NimbusSans.bold.src,
230
- },
231
- },
232
- opt,
233
- });
234
- resArr.push(res);
199
+
200
+ const input = { opt: type === 'opt', src: {} };
201
+ for (const [key, value] of Object.entries(FontCont[type])) {
202
+ if (!value || !value.normal) continue;
203
+ input.src[key] = {
204
+ normal: value.normal.src,
205
+ };
206
+ if (value.italic) input.src[key].italic = value.italic.src;
207
+ if (value.bold) input.src[key].bold = value.bold.src;
235
208
  }
236
- await Promise.all(resArr);
237
209
 
238
- // Theoretically this should be changed to use promises to avoid the race condition when `setBuiltInFontsWorker` is called multiple times quickly and `loadFontsWorker` is still running.
239
- if (opt) {
240
- fontAll.loadedBuiltInOptWorker = true;
241
- } else {
242
- fontAll.loadedBuiltInRawWorker = true;
210
+ for (let i = 0; i < gs.schedulerInner.workers.length; i++) {
211
+ const worker = gs.schedulerInner.workers[i];
212
+ const res = worker.loadFontsWorker(input);
213
+ resArr.push(res);
214
+
215
+ // TODO: consider the race condition when `setBuiltInFontsWorkers` is called multiple times quickly and `loadFontsWorker` is still running.
216
+ if (type === 'opt') {
217
+ gs.loadedBuiltInOptWorker = true;
218
+ } else {
219
+ gs.loadedBuiltInRawWorker = true;
220
+ }
243
221
  }
222
+ await Promise.all(resArr);
244
223
  }
245
224
 
246
225
  // Set the active font in the workers to match the active font in `fontAll`
247
226
  const resArr = [];
248
- for (let i = 0; i < scheduler.workers.length; i++) {
249
- const worker = scheduler.workers[i];
250
- const res = worker.setFontActiveWorker({ opt, sansDefaultName: fontAll.sansDefaultName, serifDefaultName: fontAll.serifDefaultName });
227
+ for (let i = 0; i < gs.schedulerInner.workers.length; i++) {
228
+ const worker = gs.schedulerInner.workers[i];
229
+ const res = worker.updateFontContWorker({
230
+ rawMetrics: FontCont.rawMetrics,
231
+ optMetrics: FontCont.optMetrics,
232
+ sansDefaultName: FontCont.sansDefaultName,
233
+ serifDefaultName: FontCont.serifDefaultName,
234
+ defaultFontName: FontCont.defaultFontName,
235
+ enableOpt: FontCont.enableOpt,
236
+ forceOpt: FontCont.forceOpt,
237
+ });
251
238
  resArr.push(res);
252
239
  }
253
240
  await Promise.all(resArr);
@@ -255,15 +242,15 @@ export async function setBuiltInFontsWorker(scheduler, force = false) {
255
242
 
256
243
  /**
257
244
  * WIP: Import fonts embedded in PDFs.
258
- * This function is not currently used.
245
+ * This function is out of date and not currently used.
259
246
  * @param {*} scheduler
260
247
  */
261
248
  export async function setUploadFontsWorker(scheduler) {
262
- if (!fontAll.active) return;
249
+ if (!FontCont.active) return;
263
250
 
264
251
  /** @type {Object<string, fontSrcBuiltIn|fontSrcUpload>} */
265
252
  const fontsUpload = {};
266
- for (const [key, value] of Object.entries(fontAll.active)) {
253
+ for (const [key, value] of Object.entries(FontCont.active)) {
267
254
  if (!['Carlito', 'Century', 'Garamond', 'Palatino', 'NimbusRomNo9L', 'NimbusSans'].includes(key)) {
268
255
  fontsUpload[key] = {
269
256
  normal: value?.normal?.src, italic: value?.italic?.src, bold: value?.bold?.src,
@@ -286,10 +273,18 @@ export async function setUploadFontsWorker(scheduler) {
286
273
 
287
274
  // Set the active font in the workers to match the active font in `fontAll`
288
275
  const resArr = [];
289
- const opt = fontAll.active.Carlito.normal.opt || fontAll.active.NimbusRomNo9L.normal.opt;
276
+ const opt = FontCont.active.Carlito.normal.opt || FontCont.active.NimbusRomNo9L.normal.opt;
290
277
  for (let i = 0; i < scheduler.workers.length; i++) {
291
278
  const worker = scheduler.workers[i];
292
- const res = worker.setFontActiveWorker({ opt, sansDefaultName: fontAll.sansDefaultName, serifDefaultName: fontAll.serifDefaultName });
279
+ const res = worker.updateFontContWorker({
280
+ rawMetrics: FontCont.rawMetrics,
281
+ optMetrics: FontCont.optMetrics,
282
+ sansDefaultName: FontCont.sansDefaultName,
283
+ serifDefaultName: FontCont.serifDefaultName,
284
+ defaultFontName: FontCont.defaultFontName,
285
+ enableOpt: FontCont.enableOpt,
286
+ forceOpt: FontCont.forceOpt,
287
+ });
293
288
  resArr.push(res);
294
289
  }
295
290
  await Promise.all(resArr);
@@ -307,15 +302,15 @@ export function setDefaultFontAuto(fontMetricsObj) {
307
302
 
308
303
  // Change default font to whatever named font appears more
309
304
  if ((fontMetricsObj.SerifDefault?.obs || 0) > (fontMetricsObj.SansDefault?.obs || 0)) {
310
- fontAll.defaultFontName = 'SerifDefault';
305
+ FontCont.defaultFontName = 'SerifDefault';
311
306
  } else {
312
- fontAll.defaultFontName = 'SansDefault';
307
+ FontCont.defaultFontName = 'SansDefault';
313
308
  }
314
309
 
315
310
  if (gs.schedulerInner) {
316
311
  for (let i = 0; i < gs.schedulerInner.workers.length; i++) {
317
312
  const worker = gs.schedulerInner.workers[i];
318
- worker.setDefaultFontNameWorker({ defaultFontName: fontAll.defaultFontName });
313
+ worker.updateFontContWorker({ defaultFontName: FontCont.defaultFontName });
319
314
  }
320
315
  }
321
316
  }
@@ -342,14 +337,8 @@ export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) {
342
337
 
343
338
  // If there are no statistics to use for optimization, create "optimized" font by simply copying the raw font without modification.
344
339
  // This should only occur when `multiFontMode` is true, but a document contains no sans words or no serif words.
345
- if (!fontMetricsObj[fontMetricsType] || !fontMetricsObj[fontMetricsType][fontFamily.normal.style]) {
346
- const opentypeFontArr = await Promise.all([loadOpentype(fontFamily.normal.src, null), loadOpentype(fontFamily.italic.src, null), loadOpentype(fontFamily.bold.src, null)]);
347
- const normalOptFont = new FontContainerFont(fontFamily.normal.family, fontFamily.normal.style, fontFamily.normal.src, true, opentypeFontArr[0]);
348
- const italicOptFont = new FontContainerFont(fontFamily.italic.family, fontFamily.italic.style, fontFamily.italic.src, true, opentypeFontArr[1]);
349
- const boldOptFont = new FontContainerFont(fontFamily.bold.family, fontFamily.bold.style, fontFamily.bold.src, true, opentypeFontArr[2]);
350
- return {
351
- normal: await normalOptFont, italic: await italicOptFont, bold: await boldOptFont,
352
- };
340
+ if (!fontMetricsObj[fontMetricsType] || !fontMetricsObj[fontMetricsType][fontFamily.normal.style] || fontMetricsObj[fontMetricsType][fontFamily.normal.style].obs < 200) {
341
+ return null;
353
342
  }
354
343
 
355
344
  const metricsNormal = fontMetricsObj[fontMetricsType][fontFamily.normal.style];
@@ -360,29 +349,25 @@ export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) {
360
349
  });
361
350
 
362
351
  const metricsItalic = fontMetricsObj[fontMetricsType][fontFamily.italic.style];
363
- /** @type {FontContainerFont|Promise<FontContainerFont>} */
364
- let italicOptFont;
365
- if (metricsItalic) {
352
+ /** @type {?FontContainerFont|Promise<FontContainerFont>} */
353
+ let italicOptFont = null;
354
+ if (metricsItalic && metricsItalic.obs >= 200) {
366
355
  italicOptFont = gs.scheduler.optimizeFont({ fontData: fontFamily.italic.src, fontMetricsObj: metricsItalic, style: fontFamily.italic.style })
367
356
  .then(async (x) => {
368
357
  const font = await loadOpentype(x.fontData, x.kerningPairs);
369
358
  return new FontContainerFont(fontFamily.italic.family, fontFamily.italic.style, x.fontData, true, font);
370
359
  });
371
- } else {
372
- const font = await loadOpentype(fontFamily.italic.src, null);
373
- italicOptFont = new FontContainerFont(fontFamily.italic.family, fontFamily.italic.style, fontFamily.italic.src, true, font);
374
360
  }
375
361
 
376
362
  // Bold fonts are not optimized, as we currently have no accurate way to determine if characters are bold within OCR, so do not have bold metrics.
377
- const boldOptFont = loadOpentype(fontFamily.bold.src, null).then((opentypeFont) => new FontContainerFont(fontFamily.bold.family, fontFamily.bold.style, fontFamily.bold.src, true, opentypeFont));
378
-
379
363
  return {
380
- normal: await normalOptFont, italic: await italicOptFont, bold: await boldOptFont,
364
+ normal: await normalOptFont, italic: await italicOptFont, bold: null,
381
365
  };
382
366
  }
383
367
 
384
368
  /**
385
369
  * Optimize all fonts.
370
+ * If a font cannot be optimized, then the raw font is returned.
386
371
  * @param {Object<string, FontContainerFamilyBuiltIn>} fontPrivate
387
372
  * @param {Object.<string, FontMetricsFamily>} fontMetricsObj
388
373
  */
@@ -396,6 +381,8 @@ export async function optimizeFontContainerAll(fontPrivate, fontMetricsObj) {
396
381
 
397
382
  const results = await Promise.all([carlitoPromise, centuryPromise, garamondPromise, palatinoPromise, nimbusRomNo9LPromise, nimbusSansPromise]);
398
383
 
384
+ if (results.every((x) => x === null)) return null;
385
+
399
386
  return {
400
387
  Carlito: results[0],
401
388
  Century: results[1],
package/js/fontEval.js CHANGED
@@ -1,20 +1,21 @@
1
- import { DebugData, fontMetricsObj, pageMetricsArr } from './containers/dataContainer.js';
2
- import { fontAll } from './containers/fontContainer.js';
1
+ import { fontMetricsObj, pageMetricsArr } from './containers/dataContainer.js';
2
+ import { FontCont } from './containers/fontContainer.js';
3
3
  import { ImageCache } from './containers/imageContainer.js';
4
4
  import {
5
- enableFontOpt,
6
5
  loadBuiltInFontsRaw,
7
6
  optimizeFontContainerAll, setDefaultFontAuto,
7
+ updateFontContWorkerMain,
8
8
  } from './fontContainerMain.js';
9
9
  import { gs } from './generalWorkerMain.js';
10
10
 
11
11
  /**
12
- *
13
- * @param {FontContainerFamily} font
12
+ * Evaluate how well a font matches the provided array of pages.
13
+ * @param {string} font - Name of font family.
14
14
  * @param {Array<OcrPage>} pageArr
15
+ * @param {boolean} opt - Whether to use optimized fonts.
15
16
  * @param {number} n - Number of words to compare
16
17
  */
17
- export async function evalPageFonts(font, pageArr, n = 500) {
18
+ export async function evalPagesFont(font, pageArr, opt, n = 500) {
18
19
  if (!gs.scheduler) throw new Error('GeneralScheduler must be defined before this function can run.');
19
20
 
20
21
  let metricTotal = 0;
@@ -25,25 +26,27 @@ export async function evalPageFonts(font, pageArr, n = 500) {
25
26
 
26
27
  const imageI = await ImageCache.getBinary(i);
27
28
 
28
- // The Node.js canvas package does not currently support worke threads
29
+ // The Node.js canvas package does not currently support worker threads
29
30
  // https://github.com/Automattic/node-canvas/issues/1394
30
31
  let res;
31
32
  if (!(typeof process === 'undefined')) {
32
33
  const { evalPageFont } = await import('./worker/compareOCRModule.js');
33
34
 
34
35
  res = await evalPageFont({
35
- font: font.normal.family,
36
+ font,
36
37
  page: pageArr[i],
37
38
  binaryImage: imageI,
38
39
  pageMetricsObj: pageMetricsArr[i],
40
+ opt,
39
41
  });
40
42
  // Browser case
41
43
  } else {
42
44
  res = await gs.scheduler.evalPageFont({
43
- font: font.normal.family,
45
+ font,
44
46
  page: pageArr[i],
45
47
  binaryImage: imageI,
46
48
  pageMetricsObj: pageMetricsArr[i],
49
+ opt,
47
50
  });
48
51
  }
49
52
 
@@ -56,28 +59,31 @@ export async function evalPageFonts(font, pageArr, n = 500) {
56
59
 
57
60
  /**
58
61
  * @param {Array<OcrPage>} pageArr
62
+ * @param {boolean} opt - Whether to use optimized fonts.
59
63
  */
60
- export async function evaluateFonts(pageArr) {
61
- const fontActive = fontAll.getContainer('active');
62
-
63
- const debug = false;
64
+ export async function evaluateFonts(pageArr, opt) {
65
+ const evalCarlito = !!(opt ? FontCont.opt?.Carlito : FontCont.raw?.Carlito);
66
+ const evalNimbusSans = !!(opt ? FontCont.opt?.NimbusSans : FontCont.raw?.NimbusSans);
67
+ const evalCentury = !!(opt ? FontCont.opt?.Century : FontCont.raw?.Century);
68
+ const evalPalatino = !!(opt ? FontCont.opt?.Palatino : FontCont.raw?.Palatino);
69
+ const evalGaramond = !!(opt ? FontCont.opt?.Garamond : FontCont.raw?.Garamond);
70
+ const evalNimbusRomNo9L = !!(opt ? FontCont.opt?.NimbusRomNo9L : FontCont.raw?.NimbusRomNo9L);
64
71
 
65
72
  // The browser version runs in parallel using workers, however the Node.js version runs sequentially,
66
73
  // as the canvas package does not support workers, and trying to run in parallel causes problems.
67
74
  // The logic is the same in both versions.
68
- let sansMetrics;
69
- let serifMetrics;
75
+ let fontMetricsTmp;
70
76
  if (typeof process === 'undefined') {
71
77
  const fontMetricsPromises = {
72
- carlito: evalPageFonts(fontActive.Carlito, pageArr),
73
- nimbusSans: evalPageFonts(fontActive.NimbusSans, pageArr),
74
- century: evalPageFonts(fontActive.Century, pageArr),
75
- palatino: evalPageFonts(fontActive.Palatino, pageArr),
76
- garamond: evalPageFonts(fontActive.Garamond, pageArr),
77
- nimbusRomNo9L: evalPageFonts(fontActive.NimbusRomNo9L, pageArr),
78
+ carlito: evalCarlito ? evalPagesFont('Carlito', pageArr, opt) : null,
79
+ nimbusSans: evalNimbusSans ? evalPagesFont('NimbusSans', pageArr, opt) : null,
80
+ century: evalCentury ? evalPagesFont('Century', pageArr, opt) : null,
81
+ palatino: evalPalatino ? evalPagesFont('Palatino', pageArr, opt) : null,
82
+ garamond: evalGaramond ? evalPagesFont('Garamond', pageArr, opt) : null,
83
+ nimbusRomNo9L: evalNimbusRomNo9L ? evalPagesFont('NimbusRomNo9L', pageArr, opt) : null,
78
84
  };
79
85
 
80
- const fontMetrics = {
86
+ fontMetricsTmp = {
81
87
  carlito: await fontMetricsPromises.carlito,
82
88
  nimbusSans: await fontMetricsPromises.nimbusSans,
83
89
  century: await fontMetricsPromises.century,
@@ -85,46 +91,39 @@ export async function evaluateFonts(pageArr) {
85
91
  garamond: await fontMetricsPromises.garamond,
86
92
  nimbusRomNo9L: await fontMetricsPromises.nimbusRomNo9L,
87
93
  };
88
-
89
- sansMetrics = {
90
- Carlito: fontMetrics.carlito.metricTotal / fontMetrics.carlito.wordsTotal,
91
- NimbusSans: fontMetrics.nimbusSans.metricTotal / fontMetrics.nimbusSans.wordsTotal,
92
- };
93
-
94
- serifMetrics = {
95
- Century: fontMetrics.century.metricTotal / fontMetrics.century.wordsTotal,
96
- Palatino: fontMetrics.palatino.metricTotal / fontMetrics.palatino.wordsTotal,
97
- Garamond: fontMetrics.garamond.metricTotal / fontMetrics.garamond.wordsTotal,
98
- NimbusRomNo9L: fontMetrics.nimbusRomNo9L.metricTotal / fontMetrics.nimbusRomNo9L.wordsTotal,
99
- };
100
94
  } else {
101
- const fontMetrics = {
102
- Carlito: await evalPageFonts(fontActive.Carlito, pageArr),
103
- NimbusSans: await evalPageFonts(fontActive.NimbusSans, pageArr),
104
- Century: await evalPageFonts(fontActive.Century, pageArr),
105
- Palatino: await evalPageFonts(fontActive.Palatino, pageArr),
106
- Garamond: await evalPageFonts(fontActive.Garamond, pageArr),
107
- NimbusRomNo9L: await evalPageFonts(fontActive.NimbusRomNo9L, pageArr),
95
+ fontMetricsTmp = {
96
+ carlito: evalCarlito ? await evalPagesFont('Carlito', pageArr, opt) : null,
97
+ nimbusSans: evalNimbusSans ? await evalPagesFont('NimbusSans', pageArr, opt) : null,
98
+ century: evalCentury ? await evalPagesFont('Century', pageArr, opt) : null,
99
+ palatino: evalPalatino ? await evalPagesFont('Palatino', pageArr, opt) : null,
100
+ garamond: evalGaramond ? await evalPagesFont('Garamond', pageArr, opt) : null,
101
+ nimbusRomNo9L: evalNimbusRomNo9L ? await evalPagesFont('NimbusRomNo9L', pageArr, opt) : null,
108
102
  };
103
+ }
109
104
 
110
- sansMetrics = {
111
- Carlito: fontMetrics.Carlito.metricTotal / fontMetrics.Carlito.wordsTotal,
112
- NimbusSans: fontMetrics.NimbusSans.metricTotal / fontMetrics.NimbusSans.wordsTotal,
113
- };
105
+ const fontMetrics = {
106
+ Carlito: fontMetricsTmp.carlito ? fontMetricsTmp.carlito.metricTotal / fontMetricsTmp.carlito.wordsTotal : null,
107
+ NimbusSans: fontMetricsTmp.nimbusSans ? fontMetricsTmp.nimbusSans.metricTotal / fontMetricsTmp.nimbusSans.wordsTotal : null,
108
+ Century: fontMetricsTmp.century ? fontMetricsTmp.century.metricTotal / fontMetricsTmp.century.wordsTotal : null,
109
+ Palatino: fontMetricsTmp.palatino ? fontMetricsTmp.palatino.metricTotal / fontMetricsTmp.palatino.wordsTotal : null,
110
+ Garamond: fontMetricsTmp.garamond ? fontMetricsTmp.garamond.metricTotal / fontMetricsTmp.garamond.wordsTotal : null,
111
+ NimbusRomNo9L: fontMetricsTmp.nimbusRomNo9L ? fontMetricsTmp.nimbusRomNo9L.metricTotal / fontMetricsTmp.nimbusRomNo9L.wordsTotal : null,
112
+ };
114
113
 
115
- serifMetrics = {
116
- Century: fontMetrics.Century.metricTotal / fontMetrics.Century.wordsTotal,
117
- Palatino: fontMetrics.Palatino.metricTotal / fontMetrics.Palatino.wordsTotal,
118
- Garamond: fontMetrics.Garamond.metricTotal / fontMetrics.Garamond.wordsTotal,
119
- NimbusRomNo9L: fontMetrics.NimbusRomNo9L.metricTotal / fontMetrics.NimbusRomNo9L.wordsTotal,
120
- };
121
- }
114
+ return fontMetrics;
115
+ }
122
116
 
117
+ /**
118
+ *
119
+ * @param {Awaited<ReturnType<evaluateFonts>>} fontMetrics
120
+ */
121
+ const calcBestFonts = (fontMetrics) => {
123
122
  let minKeySans = 'NimbusSans';
124
123
  let minValueSans = Number.MAX_VALUE;
125
124
 
126
- for (const [key, value] of Object.entries(sansMetrics)) {
127
- if (debug) console.log(`${key} metric: ${String(value)}`);
125
+ for (const [key, value] of Object.entries(fontMetrics)) {
126
+ if (!['Carlito', 'NimbusSans'].includes(key)) continue;
128
127
  if (value < minValueSans) {
129
128
  minValueSans = value;
130
129
  minKeySans = key;
@@ -134,8 +133,8 @@ export async function evaluateFonts(pageArr) {
134
133
  let minKeySerif = 'NimbusRomNo9L';
135
134
  let minValueSerif = Number.MAX_VALUE;
136
135
 
137
- for (const [key, value] of Object.entries(serifMetrics)) {
138
- if (debug) console.log(`${key} metric: ${String(value)}`);
136
+ for (const [key, value] of Object.entries(fontMetrics)) {
137
+ if (!['Century', 'Palatino', 'Garamond', 'NimbusRomNo9L'].includes(key)) continue;
139
138
  if (value < minValueSerif) {
140
139
  minValueSerif = value;
141
140
  minKeySerif = key;
@@ -143,12 +142,10 @@ export async function evaluateFonts(pageArr) {
143
142
  }
144
143
 
145
144
  return {
146
- sansMetrics,
147
- serifMetrics,
148
145
  minKeySans,
149
146
  minKeySerif,
150
147
  };
151
- }
148
+ };
152
149
 
153
150
  /**
154
151
  * Runs font optimization and validation. Sets `fontAll` defaults to best fonts,
@@ -164,24 +161,19 @@ export async function evaluateFonts(pageArr) {
164
161
  export async function runFontOptimization(ocrArr) {
165
162
  await loadBuiltInFontsRaw();
166
163
 
167
- const fontRaw = fontAll.getContainer('raw');
168
-
169
164
  const calculateOpt = fontMetricsObj && Object.keys(fontMetricsObj).length > 0;
170
165
 
171
166
  let enableOptSerif = false;
172
167
  let enableOptSans = false;
173
168
 
169
+ let optimizeFontContainerAllPromise;
174
170
  if (calculateOpt) {
175
171
  setDefaultFontAuto(fontMetricsObj);
176
- fontAll.optInitial = await optimizeFontContainerAll(fontRaw, fontMetricsObj);
177
-
178
- // If no image data exists, then `opt` is set to `optInitial`.
179
- // This behavior exists so that data can be loaded from previous sessions without changing the appearance of the document.
180
- // Arguably, in cases where a user uploads raw OCR data and no images, using the raw font is more prudent than an unvalidated optimized font.
181
- // If this ever comes up in actual usage and is a problem, then the behavior can be changed for that specific case.
182
- if (!ImageCache.inputModes.image && !ImageCache.inputModes.pdf) {
183
- fontAll.opt = { ...fontAll.optInitial };
184
- }
172
+
173
+ optimizeFontContainerAllPromise = optimizeFontContainerAll(FontCont.raw, fontMetricsObj)
174
+ .then((res) => {
175
+ FontCont.opt = res;
176
+ });
185
177
  }
186
178
 
187
179
  // If image data exists, select the correct font by comparing to the image.
@@ -189,70 +181,50 @@ export async function runFontOptimization(ocrArr) {
189
181
  // Evaluate default fonts using up to 5 pages.
190
182
  const pageNum = Math.min(ImageCache.pageCount, 5);
191
183
 
192
- // Set raw font in workers
193
- await enableFontOpt(false);
194
-
195
184
  // This step needs to happen here as all fonts must be registered before initializing the canvas.
196
185
  if (!(typeof process === 'undefined')) {
186
+ await optimizeFontContainerAllPromise;
197
187
  const { initCanvasNode } = await import('./worker/compareOCRModule.js');
198
188
  await initCanvasNode();
199
189
  }
200
190
 
201
- const evalRaw = await evaluateFonts(ocrArr.slice(0, pageNum));
202
-
203
- DebugData.evalRaw = evalRaw;
191
+ FontCont.rawMetrics = await evaluateFonts(ocrArr.slice(0, pageNum), false);
192
+ const bestMetricsRaw = calcBestFonts(FontCont.rawMetrics);
204
193
 
205
- if (calculateOpt && Object.keys(fontAll.optInitial).length > 0) {
206
- // Enable optimized fonts
207
- await enableFontOpt(true, true, true);
194
+ await optimizeFontContainerAllPromise;
195
+ if (FontCont.opt && Object.keys(FontCont.opt).length > 0) {
196
+ await updateFontContWorkerMain();
208
197
 
209
- const evalOpt = await evaluateFonts(ocrArr.slice(0, pageNum));
198
+ FontCont.optMetrics = await evaluateFonts(ocrArr.slice(0, pageNum), true);
210
199
 
211
- DebugData.evalOpt = evalOpt;
200
+ const bestMetricsOpt = calcBestFonts(FontCont.optMetrics);
212
201
 
213
202
  // The default font for both the optimized and unoptimized versions are set to the same font.
214
203
  // This ensures that switching on/off "font optimization" does not change the font, which would be confusing.
215
- if (evalOpt.sansMetrics[evalOpt.minKeySans] < evalRaw.sansMetrics[evalRaw.minKeySans]) {
216
- fontAll.sansDefaultName = evalOpt.minKeySans;
204
+ if (FontCont.optMetrics[bestMetricsOpt.minKeySans] < FontCont.rawMetrics[bestMetricsRaw.minKeySans]) {
217
205
  enableOptSans = true;
206
+ FontCont.sansDefaultName = bestMetricsOpt.minKeySans;
218
207
  } else {
219
- fontAll.sansDefaultName = evalRaw.minKeySans;
208
+ FontCont.sansDefaultName = bestMetricsRaw.minKeySans;
220
209
  }
221
210
 
222
211
  // Repeat for serif fonts
223
- if (evalOpt.serifMetrics[evalOpt.minKeySerif] < evalRaw.serifMetrics[evalRaw.minKeySerif]) {
224
- fontAll.serifDefaultName = evalOpt.minKeySerif;
212
+ if (FontCont.optMetrics[bestMetricsOpt.minKeySerif] < FontCont.rawMetrics[bestMetricsRaw.minKeySerif]) {
225
213
  enableOptSerif = true;
214
+ FontCont.serifDefaultName = bestMetricsOpt.minKeySerif;
226
215
  } else {
227
- fontAll.serifDefaultName = evalRaw.minKeySerif;
228
- }
229
-
230
- // Create final optimized font object.
231
- // The final optimized font is set to either the initial optimized font or the raw font depending on what fits better.
232
- // Make shallow copy to allow for changing individual fonts without copying the entire object.
233
- fontAll.opt = { ...fontAll.optInitial };
234
-
235
- if (!enableOptSans) {
236
- fontAll.opt.Carlito = fontRaw.Carlito;
237
- fontAll.opt.NimbusSans = fontRaw.NimbusSans;
238
- }
239
-
240
- if (!enableOptSerif) {
241
- fontAll.opt.Century = fontRaw.Century;
242
- fontAll.opt.Garamond = fontRaw.Garamond;
243
- fontAll.opt.NimbusRomNo9L = fontRaw.NimbusRomNo9L;
244
- fontAll.opt.Palatino = fontRaw.Palatino;
216
+ FontCont.serifDefaultName = bestMetricsRaw.minKeySerif;
245
217
  }
246
218
  } else {
247
- fontAll.sansDefaultName = evalRaw.minKeySans;
248
- fontAll.serifDefaultName = evalRaw.minKeySerif;
219
+ FontCont.sansDefaultName = bestMetricsRaw.minKeySans;
220
+ FontCont.serifDefaultName = bestMetricsRaw.minKeySerif;
249
221
  }
250
- }
251
222
 
252
- // Set final fonts in workers
253
- await enableFontOpt(true, false, true);
223
+ FontCont.enableOpt = enableOptSerif || enableOptSans;
254
224
 
255
- const enableOpt = enableOptSerif || enableOptSans;
225
+ // Send updated state to all workers.
226
+ await updateFontContWorkerMain();
227
+ }
256
228
 
257
- return enableOpt;
229
+ return FontCont.enableOpt;
258
230
  }