scribe.js-ocr 0.7.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/build-deno-compile.sh +30 -0
  2. package/cli/cli.js +46 -18
  3. package/cli/detectPDFType.js +1 -2
  4. package/cli/extract.js +14 -7
  5. package/cli/main.js +39 -39
  6. package/cli/require.js +1 -1
  7. package/cli/scribe.js +12 -11
  8. package/fonts/Dingbats.woff +0 -0
  9. package/fonts/all/URWGothicBook-Bold.woff +0 -0
  10. package/fonts/all/URWGothicBook-BoldItalic.woff +0 -0
  11. package/fonts/all/URWGothicBook-Italic.woff +0 -0
  12. package/fonts/all/URWGothicBook-Regular.woff +0 -0
  13. package/fonts/latin/URWGothicBook-Bold.woff +0 -0
  14. package/fonts/latin/URWGothicBook-BoldItalic.woff +0 -0
  15. package/fonts/latin/URWGothicBook-Italic.woff +0 -0
  16. package/fonts/latin/URWGothicBook-Regular.woff +0 -0
  17. package/js/canvasAdapter.js +4 -1
  18. package/js/clear.js +7 -8
  19. package/js/containers/app.js +2 -0
  20. package/js/containers/dataContainer.js +1 -4
  21. package/js/containers/fontContainer.js +59 -44
  22. package/js/containers/imageContainer.js +13 -35
  23. package/js/coordinates.js +3 -3
  24. package/js/debug.js +2 -2
  25. package/js/export/export.js +103 -18
  26. package/js/export/exportDebugCsv.js +4 -3
  27. package/js/export/pdf/writePdf.js +389 -0
  28. package/js/export/{writePdfFonts.js → pdf/writePdfFonts.js} +16 -12
  29. package/js/export/pdf/writePdfImages.js +218 -0
  30. package/js/export/{writePdf.js → pdf/writePdfText.js} +28 -315
  31. package/js/export/writeDocx.js +12 -5
  32. package/js/export/writeHocr.js +11 -10
  33. package/js/export/writeHtml.js +208 -48
  34. package/js/export/writeTabular.js +31 -20
  35. package/js/export/writeText.js +12 -10
  36. package/js/fontContainerMain.js +101 -50
  37. package/js/fontEval.js +18 -14
  38. package/js/fontStatistics.js +90 -90
  39. package/js/generalWorkerMain.js +52 -6
  40. package/js/global.d.ts +178 -6
  41. package/js/import/convertDocTextract.js +447 -0
  42. package/js/import/convertPageAbbyy.js +10 -4
  43. package/js/import/convertPageBlocks.js +4 -4
  44. package/js/import/convertPageGoogleVision.js +204 -0
  45. package/js/import/convertPageHocr.js +3 -3
  46. package/js/import/convertPageShared.js +1 -0
  47. package/js/import/convertPageStext.js +18 -10
  48. package/js/import/convertPageText.js +289 -0
  49. package/js/import/import.js +133 -125
  50. package/js/import/importOCR.js +98 -46
  51. package/js/import/nodeAdapter.js +2 -2
  52. package/js/modifyOCR.js +6 -5
  53. package/js/nudge.js +3 -3
  54. package/js/objects/{fontMetricsObjects.js → charMetricsObjects.js} +12 -12
  55. package/js/objects/imageObjects.js +3 -2
  56. package/js/objects/layoutObjects.js +37 -0
  57. package/js/objects/ocrObjects.js +51 -3
  58. package/js/recognizeConvert.js +74 -23
  59. package/js/utils/fontUtils.js +32 -1
  60. package/js/utils/imageUtils.js +99 -0
  61. package/js/utils/miscUtils.js +158 -9
  62. package/js/utils/reflowPars.js +4 -0
  63. package/js/worker/compareOCRModule.js +20 -18
  64. package/js/worker/generalWorker.js +12 -6
  65. package/js/worker/optimizeFontModule.js +19 -19
  66. package/mupdf/libmupdf.js +3 -3
  67. package/mupdf/libmupdf.wasm +0 -0
  68. package/mupdf/mupdf-async.js +1 -1
  69. package/mupdf/mupdf-worker.js +9 -4
  70. package/package.json +7 -4
  71. package/scribe.js +5 -5
  72. package/tess/tesseract.esm.min.js +1 -1
  73. package/tess/tesseract.min.js +1 -1
  74. package/tess/worker.min.js +1 -1
@@ -15,9 +15,9 @@ import { gs } from './generalWorkerMain.js';
15
15
  */
16
16
  export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
17
17
  // Return early if the font set is already loaded, or a superset of the requested set is loaded.
18
- if (FontCont.glyphSet === glyphSet || FontCont.glyphSet === 'all' && glyphSet === 'latin') return;
18
+ if (FontCont.state.glyphSet === glyphSet || FontCont.state.glyphSet === 'all' && glyphSet === 'latin') return;
19
19
 
20
- FontCont.glyphSet = glyphSet;
20
+ FontCont.state.glyphSet = glyphSet;
21
21
 
22
22
  // Note: this function is intentionally verbose, and should not be refactored to generate the paths dynamically.
23
23
  // Build systems will not be able to resolve the paths if they are generated dynamically.
@@ -49,6 +49,10 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
49
49
  let /** @type {Promise<ArrayBuffer>} */nimbusMonoItalic;
50
50
  let /** @type {Promise<ArrayBuffer>} */nimbusMonoBold;
51
51
  let /** @type {Promise<ArrayBuffer>} */nimbusMonoBoldItalic;
52
+ let /** @type {Promise<ArrayBuffer>} */gothicNormal;
53
+ let /** @type {Promise<ArrayBuffer>} */gothicItalic;
54
+ let /** @type {Promise<ArrayBuffer>} */gothicBold;
55
+ let /** @type {Promise<ArrayBuffer>} */gothicBoldItalic;
52
56
  if (typeof process === 'undefined') {
53
57
  if (glyphSet === 'latin') {
54
58
  carlitoNormal = fetch(new URL('../fonts/latin/Carlito-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
@@ -79,6 +83,10 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
79
83
  nimbusMonoItalic = fetch(new URL('../fonts/latin/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
80
84
  nimbusMonoBold = fetch(new URL('../fonts/latin/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
81
85
  nimbusMonoBoldItalic = fetch(new URL('../fonts/latin/NimbusMono-BoldItalic.woff', import.meta.url)).then((res) => res.arrayBuffer());
86
+ gothicNormal = fetch(new URL('../fonts/latin/URWGothicBook-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
87
+ gothicItalic = fetch(new URL('../fonts/latin/URWGothicBook-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
88
+ gothicBold = fetch(new URL('../fonts/latin/URWGothicBook-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
89
+ gothicBoldItalic = fetch(new URL('../fonts/latin/URWGothicBook-BoldItalic.woff', import.meta.url)).then((res) => res.arrayBuffer());
82
90
  } else {
83
91
  carlitoNormal = fetch(new URL('../fonts/all/Carlito-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
84
92
  carlitoItalic = fetch(new URL('../fonts/all/Carlito-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
@@ -108,9 +116,13 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
108
116
  nimbusMonoItalic = fetch(new URL('../fonts/all/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
109
117
  nimbusMonoBold = fetch(new URL('../fonts/all/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
110
118
  nimbusMonoBoldItalic = fetch(new URL('../fonts/all/NimbusMono-BoldItalic.woff', import.meta.url)).then((res) => res.arrayBuffer());
119
+ gothicNormal = fetch(new URL('../fonts/all/URWGothicBook-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
120
+ gothicItalic = fetch(new URL('../fonts/all/URWGothicBook-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
121
+ gothicBold = fetch(new URL('../fonts/all/URWGothicBook-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
122
+ gothicBoldItalic = fetch(new URL('../fonts/all/URWGothicBook-BoldItalic.woff', import.meta.url)).then((res) => res.arrayBuffer());
111
123
  }
112
124
  } else {
113
- const { readFile } = await import('fs/promises');
125
+ const { readFile } = await import('node:fs/promises');
114
126
  carlitoNormal = readFile(new URL('../fonts/all/Carlito-Regular.woff', import.meta.url)).then((res) => res.buffer);
115
127
  carlitoItalic = readFile(new URL('../fonts/all/Carlito-Italic.woff', import.meta.url)).then((res) => res.buffer);
116
128
  carlitoBold = readFile(new URL('../fonts/all/Carlito-Bold.woff', import.meta.url)).then((res) => res.buffer);
@@ -139,6 +151,10 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
139
151
  nimbusMonoItalic = readFile(new URL('../fonts/all/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.buffer);
140
152
  nimbusMonoBold = readFile(new URL('../fonts/all/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.buffer);
141
153
  nimbusMonoBoldItalic = readFile(new URL('../fonts/all/NimbusMono-BoldItalic.woff', import.meta.url)).then((res) => res.buffer);
154
+ gothicNormal = readFile(new URL('../fonts/all/URWGothicBook-Regular.woff', import.meta.url)).then((res) => res.buffer);
155
+ gothicItalic = readFile(new URL('../fonts/all/URWGothicBook-Italic.woff', import.meta.url)).then((res) => res.buffer);
156
+ gothicBold = readFile(new URL('../fonts/all/URWGothicBook-Bold.woff', import.meta.url)).then((res) => res.buffer);
157
+ gothicBoldItalic = readFile(new URL('../fonts/all/URWGothicBook-BoldItalic.woff', import.meta.url)).then((res) => res.buffer);
142
158
  }
143
159
 
144
160
  const srcObj = {
@@ -151,6 +167,9 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
151
167
  Garamond: {
152
168
  normal: await garamondNormal, italic: await garamondItalic, bold: await garamondBold, boldItalic: await garamondBoldItalic,
153
169
  },
170
+ Gothic: {
171
+ normal: await gothicNormal, italic: await gothicItalic, bold: await gothicBold, boldItalic: await gothicBoldItalic,
172
+ },
154
173
  Palatino: {
155
174
  normal: await palatinoNormal, italic: await palatinoItalic, bold: await palatinoBold, boldItalic: await palatinoBoldItalic,
156
175
  },
@@ -193,7 +212,7 @@ export async function loadChiSimFont() {
193
212
  if (typeof process === 'undefined') {
194
213
  chiSimSrc = fetch(new URL('../fonts/NotoSansSC-Regular.ttf', import.meta.url)).then((res) => res.arrayBuffer());
195
214
  } else {
196
- const { readFile } = await import('fs/promises');
215
+ const { readFile } = await import('node:fs/promises');
197
216
  chiSimSrc = readFile(new URL('../fonts/NotoSansSC-Regular.ttf', import.meta.url)).then((res) => res.buffer);
198
217
  }
199
218
 
@@ -204,6 +223,35 @@ export async function loadChiSimFont() {
204
223
  return chiReady;
205
224
  }
206
225
 
226
+ let dingbatsReadyRes;
227
+ let dingbatsReady;
228
+
229
+ /**
230
+ * Loads dingbats font. Returns early if already loaded.
231
+ */
232
+ export async function loadDingbatsFont() {
233
+ console.log('Loading Dingbats font');
234
+ if (dingbatsReady) return dingbatsReady;
235
+
236
+ dingbatsReady = new Promise((resolve, reject) => {
237
+ dingbatsReadyRes = resolve;
238
+ });
239
+
240
+ let /** @type {Promise<ArrayBuffer>} */ dingbatsSrc;
241
+ if (typeof process === 'undefined') {
242
+ dingbatsSrc = fetch(new URL('../fonts/Dingbats.woff', import.meta.url)).then((res) => res.arrayBuffer());
243
+ } else {
244
+ const { readFile } = await import('node:fs/promises');
245
+ dingbatsSrc = readFile(new URL('../fonts/Dingbats.woff', import.meta.url)).then((res) => res.buffer);
246
+ }
247
+
248
+ FontCont.supp.dingbats = await loadFont('Dingbats', 'normal', 'sans', await dingbatsSrc, false);
249
+
250
+ dingbatsReadyRes();
251
+
252
+ return dingbatsReady;
253
+ }
254
+
207
255
  /**
208
256
  * Enable or disable font optimization settings.
209
257
  * This function is used rather than exposing the settings using the `opt` object, as these settings exist on the font container in both the main thread and the worker threads.
@@ -213,15 +261,15 @@ export async function loadChiSimFont() {
213
261
  export async function enableFontOpt(enableOpt, forceOpt) {
214
262
  let change = false;
215
263
  if (enableOpt === true || enableOpt === false) {
216
- if (FontCont.enableOpt !== enableOpt) {
264
+ if (FontCont.state.enableOpt !== enableOpt) {
217
265
  change = true;
218
- FontCont.enableOpt = enableOpt;
266
+ FontCont.state.enableOpt = enableOpt;
219
267
  }
220
268
  }
221
269
  if (forceOpt === true || forceOpt === false) {
222
- if (FontCont.forceOpt !== forceOpt) {
270
+ if (FontCont.state.forceOpt !== forceOpt) {
223
271
  change = true;
224
- FontCont.forceOpt = forceOpt;
272
+ FontCont.state.forceOpt = forceOpt;
225
273
  }
226
274
  }
227
275
 
@@ -286,11 +334,11 @@ export async function updateFontContWorkerMain(params = {}) {
286
334
  const res = worker.updateFontContWorker({
287
335
  rawMetrics: FontCont.rawMetrics,
288
336
  optMetrics: FontCont.optMetrics,
289
- sansDefaultName: FontCont.sansDefaultName,
290
- serifDefaultName: FontCont.serifDefaultName,
291
- defaultFontName: FontCont.defaultFontName,
292
- enableOpt: FontCont.enableOpt,
293
- forceOpt: FontCont.forceOpt,
337
+ sansDefaultName: FontCont.state.sansDefaultName,
338
+ serifDefaultName: FontCont.state.serifDefaultName,
339
+ defaultFontName: FontCont.state.defaultFontName,
340
+ enableOpt: FontCont.state.enableOpt,
341
+ forceOpt: FontCont.state.forceOpt,
294
342
  });
295
343
  resArr.push(res);
296
344
  }
@@ -336,11 +384,11 @@ export async function setUploadFontsWorker(scheduler) {
336
384
  const res = worker.updateFontContWorker({
337
385
  rawMetrics: FontCont.rawMetrics,
338
386
  optMetrics: FontCont.optMetrics,
339
- sansDefaultName: FontCont.sansDefaultName,
340
- serifDefaultName: FontCont.serifDefaultName,
341
- defaultFontName: FontCont.defaultFontName,
342
- enableOpt: FontCont.enableOpt,
343
- forceOpt: FontCont.forceOpt,
387
+ sansDefaultName: FontCont.state.sansDefaultName,
388
+ serifDefaultName: FontCont.state.serifDefaultName,
389
+ defaultFontName: FontCont.state.defaultFontName,
390
+ enableOpt: FontCont.state.enableOpt,
391
+ forceOpt: FontCont.state.forceOpt,
344
392
  });
345
393
  resArr.push(res);
346
394
  }
@@ -351,23 +399,23 @@ export async function setUploadFontsWorker(scheduler) {
351
399
  * Automatically sets the default font to whatever font is most common in the provided font metrics.
352
400
  *
353
401
  */
354
- export function setDefaultFontAuto(fontMetricsObj) {
355
- const multiFontMode = checkMultiFontMode(fontMetricsObj);
402
+ export function setDefaultFontAuto(charMetricsObj) {
403
+ const multiFontMode = checkMultiFontMode(charMetricsObj);
356
404
 
357
405
  // Return early if the OCR data does not contain font info.
358
406
  if (!multiFontMode) return;
359
407
 
360
408
  // Change default font to whatever named font appears more
361
- if ((fontMetricsObj.SerifDefault?.obs || 0) > (fontMetricsObj.SansDefault?.obs || 0)) {
362
- FontCont.defaultFontName = 'SerifDefault';
409
+ if ((charMetricsObj.SerifDefault?.obs || 0) > (charMetricsObj.SansDefault?.obs || 0)) {
410
+ FontCont.state.defaultFontName = 'SerifDefault';
363
411
  } else {
364
- FontCont.defaultFontName = 'SansDefault';
412
+ FontCont.state.defaultFontName = 'SansDefault';
365
413
  }
366
414
 
367
415
  if (gs.schedulerInner) {
368
416
  for (let i = 0; i < gs.schedulerInner.workers.length; i++) {
369
417
  const worker = gs.schedulerInner.workers[i];
370
- worker.updateFontContWorker({ defaultFontName: FontCont.defaultFontName });
418
+ worker.updateFontContWorker({ defaultFontName: FontCont.state.defaultFontName });
371
419
  }
372
420
  }
373
421
  }
@@ -375,39 +423,39 @@ export function setDefaultFontAuto(fontMetricsObj) {
375
423
  /**
376
424
  *
377
425
  * @param {FontContainerFamilyBuiltIn} fontFamily
378
- * @param {Object.<string, FontMetricsFamily>} fontMetricsObj
426
+ * @param {Object.<string, CharMetricsFamily>} charMetricsObj
379
427
  */
380
- export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) {
428
+ export async function optimizeFontContainerFamily(fontFamily, charMetricsObj) {
381
429
  // When we have metrics for individual fonts families, those are used to optimize the appropriate fonts.
382
430
  // Otherwise, the "default" metric is applied to whatever font the user has selected as the default font.
383
- const multiFontMode = checkMultiFontMode(fontMetricsObj);
384
- let fontMetricsType = 'Default';
431
+ const multiFontMode = checkMultiFontMode(charMetricsObj);
432
+ let charMetricsType = 'Default';
385
433
  if (multiFontMode) {
386
434
  if (fontFamily.normal.type === 'sans') {
387
- fontMetricsType = 'SansDefault';
435
+ charMetricsType = 'SansDefault';
388
436
  } else {
389
- fontMetricsType = 'SerifDefault';
437
+ charMetricsType = 'SerifDefault';
390
438
  }
391
439
  }
392
440
 
393
441
  // If there are no statistics to use for optimization, create "optimized" font by simply copying the raw font without modification.
394
442
  // This should only occur when `multiFontMode` is true, but a document contains no sans words or no serif words.
395
- if (!fontMetricsObj[fontMetricsType] || !fontMetricsObj[fontMetricsType][fontFamily.normal.style] || fontMetricsObj[fontMetricsType][fontFamily.normal.style].obs < 200) {
443
+ if (!charMetricsObj[charMetricsType] || !charMetricsObj[charMetricsType][fontFamily.normal.style] || charMetricsObj[charMetricsType][fontFamily.normal.style].obs < 200) {
396
444
  return null;
397
445
  }
398
446
 
399
- const metricsNormal = fontMetricsObj[fontMetricsType][fontFamily.normal.style];
400
- const normalOptFont = gs.optimizeFont({ fontData: fontFamily.normal.src, fontMetricsObj: metricsNormal, style: fontFamily.normal.style })
447
+ const metricsNormal = charMetricsObj[charMetricsType][fontFamily.normal.style];
448
+ const normalOptFont = gs.optimizeFont({ fontData: fontFamily.normal.src, charMetricsObj: metricsNormal, style: fontFamily.normal.style })
401
449
  .then(async (x) => {
402
450
  const font = await loadOpentype(x.fontData, x.kerningPairs);
403
451
  return new FontContainerFont(fontFamily.normal.family, fontFamily.normal.style, x.fontData, true, font);
404
452
  });
405
453
 
406
- const metricsItalic = fontMetricsObj[fontMetricsType][fontFamily.italic.style];
454
+ const metricsItalic = charMetricsObj[charMetricsType][fontFamily.italic.style];
407
455
  /** @type {?FontContainerFont|Promise<FontContainerFont>} */
408
456
  let italicOptFont = null;
409
457
  if (metricsItalic && metricsItalic.obs >= 200) {
410
- italicOptFont = gs.optimizeFont({ fontData: fontFamily.italic.src, fontMetricsObj: metricsItalic, style: fontFamily.italic.style })
458
+ italicOptFont = gs.optimizeFont({ fontData: fontFamily.italic.src, charMetricsObj: metricsItalic, style: fontFamily.italic.style })
411
459
  .then(async (x) => {
412
460
  const font = await loadOpentype(x.fontData, x.kerningPairs);
413
461
  return new FontContainerFont(fontFamily.italic.family, fontFamily.italic.style, x.fontData, true, font);
@@ -424,18 +472,20 @@ export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) {
424
472
  * Optimize all fonts.
425
473
  * If a font cannot be optimized, then the raw font is returned.
426
474
  * @param {Object<string, FontContainerFamilyBuiltIn>} fontPrivate
427
- * @param {Object.<string, FontMetricsFamily>} fontMetricsObj
475
+ * @param {Object.<string, CharMetricsFamily>} charMetricsObj
428
476
  */
429
- export async function optimizeFontContainerAll(fontPrivate, fontMetricsObj) {
430
- const carlitoPromise = optimizeFontContainerFamily(fontPrivate.Carlito, fontMetricsObj);
431
- const centuryPromise = optimizeFontContainerFamily(fontPrivate.Century, fontMetricsObj);
432
- const garamondPromise = optimizeFontContainerFamily(fontPrivate.Garamond, fontMetricsObj);
433
- const palatinoPromise = optimizeFontContainerFamily(fontPrivate.Palatino, fontMetricsObj);
434
- const nimbusRomanPromise = optimizeFontContainerFamily(fontPrivate.NimbusRoman, fontMetricsObj);
435
- const nimbusSansPromise = optimizeFontContainerFamily(fontPrivate.NimbusSans, fontMetricsObj);
436
- const nimbusMonoPromise = optimizeFontContainerFamily(fontPrivate.NimbusMono, fontMetricsObj);
437
-
438
- const results = await Promise.all([carlitoPromise, centuryPromise, garamondPromise, palatinoPromise, nimbusRomanPromise, nimbusSansPromise, nimbusMonoPromise]);
477
+ export async function optimizeFontContainerAll(fontPrivate, charMetricsObj) {
478
+ const carlitoPromise = optimizeFontContainerFamily(fontPrivate.Carlito, charMetricsObj);
479
+ const centuryPromise = optimizeFontContainerFamily(fontPrivate.Century, charMetricsObj);
480
+ const garamondPromise = optimizeFontContainerFamily(fontPrivate.Garamond, charMetricsObj);
481
+ const gothicPromise = optimizeFontContainerFamily(fontPrivate.Gothic, charMetricsObj);
482
+ const palatinoPromise = optimizeFontContainerFamily(fontPrivate.Palatino, charMetricsObj);
483
+ const nimbusRomanPromise = optimizeFontContainerFamily(fontPrivate.NimbusRoman, charMetricsObj);
484
+ const nimbusSansPromise = optimizeFontContainerFamily(fontPrivate.NimbusSans, charMetricsObj);
485
+ const nimbusMonoPromise = optimizeFontContainerFamily(fontPrivate.NimbusMono, charMetricsObj);
486
+
487
+ const results = await Promise.all([carlitoPromise, centuryPromise, garamondPromise, gothicPromise,
488
+ palatinoPromise, nimbusRomanPromise, nimbusSansPromise, nimbusMonoPromise]);
439
489
 
440
490
  if (results.every((x) => x === null)) return null;
441
491
 
@@ -443,9 +493,10 @@ export async function optimizeFontContainerAll(fontPrivate, fontMetricsObj) {
443
493
  Carlito: results[0],
444
494
  Century: results[1],
445
495
  Garamond: results[2],
446
- Palatino: results[3],
447
- NimbusRoman: results[4],
448
- NimbusSans: results[5],
449
- NimbusMono: results[6],
496
+ Gothic: results[3],
497
+ Palatino: results[4],
498
+ NimbusRoman: results[5],
499
+ NimbusSans: results[6],
500
+ NimbusMono: results[7],
450
501
  };
451
502
  }
package/js/fontEval.js CHANGED
@@ -1,4 +1,4 @@
1
- import { fontMetricsObj, pageMetricsArr } from './containers/dataContainer.js';
1
+ import { pageMetricsAll } from './containers/dataContainer.js';
2
2
  import { FontCont } from './containers/fontContainer.js';
3
3
  import { ImageCache } from './containers/imageContainer.js';
4
4
  import {
@@ -28,7 +28,7 @@ export async function evalPagesFont(font, pageArr, opt, n = 500) {
28
28
  font,
29
29
  page: pageArr[i],
30
30
  binaryImage: imageI,
31
- pageMetricsObj: pageMetricsArr[i],
31
+ pageMetricsObj: pageMetricsAll[i],
32
32
  opt,
33
33
  });
34
34
 
@@ -49,6 +49,7 @@ export async function evaluateFonts(pageArr, opt) {
49
49
  const evalCentury = !!(opt ? FontCont.opt?.Century : FontCont.raw?.Century);
50
50
  const evalPalatino = !!(opt ? FontCont.opt?.Palatino : FontCont.raw?.Palatino);
51
51
  const evalGaramond = !!(opt ? FontCont.opt?.Garamond : FontCont.raw?.Garamond);
52
+ const evalGothic = !!(opt ? FontCont.opt?.Gothic : FontCont.raw?.Gothic);
52
53
  const evalNimbusRoman = !!(opt ? FontCont.opt?.NimbusRoman : FontCont.raw?.NimbusRoman);
53
54
  const evalNimbusMono = !!(opt ? FontCont.opt?.NimbusMono : FontCont.raw?.NimbusMono);
54
55
 
@@ -58,6 +59,7 @@ export async function evaluateFonts(pageArr, opt) {
58
59
  century: evalCentury ? evalPagesFont('Century', pageArr, opt) : null,
59
60
  palatino: evalPalatino ? evalPagesFont('Palatino', pageArr, opt) : null,
60
61
  garamond: evalGaramond ? evalPagesFont('Garamond', pageArr, opt) : null,
62
+ gothic: evalGothic ? evalPagesFont('Gothic', pageArr, opt) : null,
61
63
  nimbusRoman: evalNimbusRoman ? evalPagesFont('NimbusRoman', pageArr, opt) : null,
62
64
  nimbusMono: evalNimbusMono ? evalPagesFont('NimbusMono', pageArr, opt) : null,
63
65
  };
@@ -68,6 +70,7 @@ export async function evaluateFonts(pageArr, opt) {
68
70
  century: await fontMetricsPromises.century,
69
71
  palatino: await fontMetricsPromises.palatino,
70
72
  garamond: await fontMetricsPromises.garamond,
73
+ gothic: await fontMetricsPromises.gothic,
71
74
  nimbusRoman: await fontMetricsPromises.nimbusRoman,
72
75
  nimbusMono: await fontMetricsPromises.nimbusMono,
73
76
  };
@@ -78,6 +81,7 @@ export async function evaluateFonts(pageArr, opt) {
78
81
  Century: fontMetricsTmp.century ? fontMetricsTmp.century.metricTotal / fontMetricsTmp.century.wordsTotal : null,
79
82
  Palatino: fontMetricsTmp.palatino ? fontMetricsTmp.palatino.metricTotal / fontMetricsTmp.palatino.wordsTotal : null,
80
83
  Garamond: fontMetricsTmp.garamond ? fontMetricsTmp.garamond.metricTotal / fontMetricsTmp.garamond.wordsTotal : null,
84
+ Gothic: fontMetricsTmp.gothic ? fontMetricsTmp.gothic.metricTotal / fontMetricsTmp.gothic.wordsTotal : null,
81
85
  NimbusRoman: fontMetricsTmp.nimbusRoman ? fontMetricsTmp.nimbusRoman.metricTotal / fontMetricsTmp.nimbusRoman.wordsTotal : null,
82
86
  NimbusMono: fontMetricsTmp.nimbusMono ? fontMetricsTmp.nimbusMono.metricTotal / fontMetricsTmp.nimbusMono.wordsTotal : null,
83
87
  };
@@ -94,7 +98,7 @@ const calcBestFonts = (fontMetrics) => {
94
98
  let minValueSans = Number.MAX_VALUE;
95
99
 
96
100
  for (const [key, value] of Object.entries(fontMetrics)) {
97
- if (!['Carlito', 'NimbusSans'].includes(key)) continue;
101
+ if (!['Carlito', 'Gothic', 'NimbusSans'].includes(key)) continue;
98
102
  if (value && value < minValueSans) {
99
103
  minValueSans = value;
100
104
  minKeySans = key;
@@ -132,16 +136,16 @@ const calcBestFonts = (fontMetrics) => {
132
136
  export async function runFontOptimization(ocrArr) {
133
137
  await loadBuiltInFontsRaw();
134
138
 
135
- const calculateOpt = fontMetricsObj && Object.keys(fontMetricsObj).length > 0;
139
+ const calculateOpt = FontCont.state.charMetrics && Object.keys(FontCont.state.charMetrics).length > 0;
136
140
 
137
141
  let enableOptSerif = false;
138
142
  let enableOptSans = false;
139
143
 
140
144
  let optimizeFontContainerAllPromise;
141
145
  if (calculateOpt) {
142
- setDefaultFontAuto(fontMetricsObj);
146
+ setDefaultFontAuto(FontCont.state.charMetrics);
143
147
 
144
- optimizeFontContainerAllPromise = optimizeFontContainerAll(FontCont.raw, fontMetricsObj)
148
+ optimizeFontContainerAllPromise = optimizeFontContainerAll(FontCont.raw, FontCont.state.charMetrics)
145
149
  .then((res) => {
146
150
  FontCont.opt = res;
147
151
  });
@@ -167,28 +171,28 @@ export async function runFontOptimization(ocrArr) {
167
171
  // This ensures that switching on/off "font optimization" does not change the font, which would be confusing.
168
172
  if (FontCont.optMetrics[bestMetricsOpt.minKeySans] < FontCont.rawMetrics[bestMetricsRaw.minKeySans]) {
169
173
  enableOptSans = true;
170
- FontCont.sansDefaultName = bestMetricsOpt.minKeySans;
174
+ FontCont.state.sansDefaultName = bestMetricsOpt.minKeySans;
171
175
  } else {
172
- FontCont.sansDefaultName = bestMetricsRaw.minKeySans;
176
+ FontCont.state.sansDefaultName = bestMetricsRaw.minKeySans;
173
177
  }
174
178
 
175
179
  // Repeat for serif fonts
176
180
  if (FontCont.optMetrics[bestMetricsOpt.minKeySerif] < FontCont.rawMetrics[bestMetricsRaw.minKeySerif]) {
177
181
  enableOptSerif = true;
178
- FontCont.serifDefaultName = bestMetricsOpt.minKeySerif;
182
+ FontCont.state.serifDefaultName = bestMetricsOpt.minKeySerif;
179
183
  } else {
180
- FontCont.serifDefaultName = bestMetricsRaw.minKeySerif;
184
+ FontCont.state.serifDefaultName = bestMetricsRaw.minKeySerif;
181
185
  }
182
186
  } else {
183
- FontCont.sansDefaultName = bestMetricsRaw.minKeySans;
184
- FontCont.serifDefaultName = bestMetricsRaw.minKeySerif;
187
+ FontCont.state.sansDefaultName = bestMetricsRaw.minKeySans;
188
+ FontCont.state.serifDefaultName = bestMetricsRaw.minKeySerif;
185
189
  }
186
190
 
187
- FontCont.enableOpt = enableOptSerif || enableOptSans;
191
+ FontCont.state.enableOpt = enableOptSerif || enableOptSans;
188
192
 
189
193
  // Send updated state to all workers.
190
194
  await updateFontContWorkerMain();
191
195
  }
192
196
 
193
- return FontCont.enableOpt;
197
+ return FontCont.state.enableOpt;
194
198
  }