scribe.js-ocr 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/js/fontEval.js CHANGED
@@ -52,41 +52,25 @@ export async function evaluateFonts(pageArr, opt) {
52
52
  const evalNimbusRomNo9L = !!(opt ? FontCont.opt?.NimbusRomNo9L : FontCont.raw?.NimbusRomNo9L);
53
53
  const evalNimbusMono = !!(opt ? FontCont.opt?.NimbusMono : FontCont.raw?.NimbusMono);
54
54
 
55
- // The browser version runs in parallel using workers, however the Node.js version runs sequentially,
56
- // as the canvas package does not support workers, and trying to run in parallel causes problems.
57
- // The logic is the same in both versions.
58
- let fontMetricsTmp;
59
- if (typeof process === 'undefined') {
60
- const fontMetricsPromises = {
61
- carlito: evalCarlito ? evalPagesFont('Carlito', pageArr, opt) : null,
62
- nimbusSans: evalNimbusSans ? evalPagesFont('NimbusSans', pageArr, opt) : null,
63
- century: evalCentury ? evalPagesFont('Century', pageArr, opt) : null,
64
- palatino: evalPalatino ? evalPagesFont('Palatino', pageArr, opt) : null,
65
- garamond: evalGaramond ? evalPagesFont('Garamond', pageArr, opt) : null,
66
- nimbusRomNo9L: evalNimbusRomNo9L ? evalPagesFont('NimbusRomNo9L', pageArr, opt) : null,
67
- nimbusMono: evalNimbusMono ? evalPagesFont('NimbusMono', pageArr, opt) : null,
68
- };
69
-
70
- fontMetricsTmp = {
71
- carlito: await fontMetricsPromises.carlito,
72
- nimbusSans: await fontMetricsPromises.nimbusSans,
73
- century: await fontMetricsPromises.century,
74
- palatino: await fontMetricsPromises.palatino,
75
- garamond: await fontMetricsPromises.garamond,
76
- nimbusRomNo9L: await fontMetricsPromises.nimbusRomNo9L,
77
- nimbusMono: await fontMetricsPromises.nimbusMono,
78
- };
79
- } else {
80
- fontMetricsTmp = {
81
- carlito: evalCarlito ? await evalPagesFont('Carlito', pageArr, opt) : null,
82
- nimbusSans: evalNimbusSans ? await evalPagesFont('NimbusSans', pageArr, opt) : null,
83
- century: evalCentury ? await evalPagesFont('Century', pageArr, opt) : null,
84
- palatino: evalPalatino ? await evalPagesFont('Palatino', pageArr, opt) : null,
85
- garamond: evalGaramond ? await evalPagesFont('Garamond', pageArr, opt) : null,
86
- nimbusRomNo9L: evalNimbusRomNo9L ? await evalPagesFont('NimbusRomNo9L', pageArr, opt) : null,
87
- nimbusMono: evalNimbusMono ? await evalPagesFont('NimbusMono', pageArr, opt) : null,
88
- };
89
- }
55
+ const fontMetricsPromises = {
56
+ carlito: evalCarlito ? evalPagesFont('Carlito', pageArr, opt) : null,
57
+ nimbusSans: evalNimbusSans ? evalPagesFont('NimbusSans', pageArr, opt) : null,
58
+ century: evalCentury ? evalPagesFont('Century', pageArr, opt) : null,
59
+ palatino: evalPalatino ? evalPagesFont('Palatino', pageArr, opt) : null,
60
+ garamond: evalGaramond ? evalPagesFont('Garamond', pageArr, opt) : null,
61
+ nimbusRomNo9L: evalNimbusRomNo9L ? evalPagesFont('NimbusRomNo9L', pageArr, opt) : null,
62
+ nimbusMono: evalNimbusMono ? evalPagesFont('NimbusMono', pageArr, opt) : null,
63
+ };
64
+
65
+ const fontMetricsTmp = {
66
+ carlito: await fontMetricsPromises.carlito,
67
+ nimbusSans: await fontMetricsPromises.nimbusSans,
68
+ century: await fontMetricsPromises.century,
69
+ palatino: await fontMetricsPromises.palatino,
70
+ garamond: await fontMetricsPromises.garamond,
71
+ nimbusRomNo9L: await fontMetricsPromises.nimbusRomNo9L,
72
+ nimbusMono: await fontMetricsPromises.nimbusMono,
73
+ };
90
74
 
91
75
  const fontMetrics = {
92
76
  Carlito: fontMetricsTmp.carlito ? fontMetricsTmp.carlito.metricTotal / fontMetricsTmp.carlito.wordsTotal : null,
@@ -147,9 +147,6 @@ export class gs {
147
147
  /** @type {?import('../tess/tesseract.esm.min.js').default} */
148
148
  static schedulerInner = null;
149
149
 
150
- /** @type {?Function} */
151
- static #resReady = null;
152
-
153
150
  /** @type {?Promise<void>} */
154
151
  static schedulerReady = null;
155
152
 
@@ -237,10 +234,6 @@ export class gs {
237
234
  static renderPageStaticImp = async (args) => (await gs.schedulerInner.addJob('renderPageStaticImp', args));
238
235
 
239
236
  static init = async () => {
240
- gs.schedulerReady = new Promise((resolve, reject) => {
241
- gs.#resReady = resolve;
242
- });
243
-
244
237
  let workerN;
245
238
  if (opt.workerN) {
246
239
  workerN = opt.workerN;
@@ -272,8 +265,7 @@ export class gs {
272
265
 
273
266
  await Promise.all(resArr);
274
267
 
275
- // @ts-ignore
276
- gs.#resReady(true);
268
+ return;
277
269
  };
278
270
 
279
271
  /**
@@ -318,15 +310,14 @@ export class gs {
318
310
  /**
319
311
  * Gets the general scheduler if it exists, otherwise creates a new one.
320
312
  */
321
- static getGeneralScheduler = async () => {
313
+ static getGeneralScheduler = () => {
322
314
  if (gs.schedulerReady) {
323
- await gs.schedulerReady;
324
- return;
315
+ return gs.schedulerReady;
325
316
  }
326
317
 
327
- await gs.init();
318
+ gs.schedulerReady = gs.init();
328
319
 
329
- return;
320
+ return gs.schedulerReady;
330
321
  };
331
322
 
332
323
  static clear = () => {
@@ -337,7 +328,6 @@ export class gs {
337
328
  gs.clear();
338
329
  await gs.schedulerInner.terminate();
339
330
  gs.schedulerInner = null;
340
- gs.#resReady = null;
341
331
  gs.schedulerReady = null;
342
332
  gs.#resReadyTesseract = null;
343
333
  gs.schedulerReadyTesseract = null;
@@ -94,19 +94,9 @@ export const compareOCR = async (ocrA, ocrB, options) => {
94
94
  if (res.debugImg) debugImageArr[i] = res.debugImg;
95
95
  };
96
96
 
97
- // This function is run in the main thread in Node.js, with no mechanism for queuing jobs.
98
- // Therefore, this needs to be run one at a time in Node.js.
99
- if (typeof process === 'undefined') {
100
- const indices = [...Array(ocrA.length).keys()];
101
- const compPromises = indices.map(async (i) => comparePageI(i));
102
- await Promise.allSettled(compPromises);
103
- } else {
104
- // This needs to be run one at a time in Node.js, as this is run in the main thread,
105
- // and there is no mechanism for queuing jobs, so side effects will interfere with each other.
106
- for (let i = 0; i < ocrA.length; i++) {
107
- await comparePageI(i);
108
- }
109
- }
97
+ const indices = [...Array(ocrA.length).keys()];
98
+ const compPromises = indices.map(async (i) => comparePageI(i));
99
+ await Promise.allSettled(compPromises);
110
100
 
111
101
  return { ocr: ocrArr, metrics: metricsArr, debug: debugImageArr };
112
102
  };
@@ -193,8 +183,6 @@ export const recognizePageImp = async (n, legacy, lstm, areaMode, tessOptions =
193
183
  // is to get debugging images for layout analysis rather than get text.
194
184
  const runRecognition = legacy || lstm;
195
185
 
196
- await gs.getGeneralScheduler();
197
-
198
186
  const resArr = await gs.recognizeAndConvert2({
199
187
  image: nativeN.src,
200
188
  options: config,