scribe.js-ocr 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/js/fontEval.js +19 -35
- package/js/generalWorkerMain.js +5 -15
- package/js/recognizeConvert.js +3 -15
- package/mupdf/libmupdf.js +4739 -4837
- package/mupdf/libmupdf.wasm +0 -0
- package/package.json +1 -1
package/js/fontEval.js
CHANGED
|
@@ -52,41 +52,25 @@ export async function evaluateFonts(pageArr, opt) {
|
|
|
52
52
|
const evalNimbusRomNo9L = !!(opt ? FontCont.opt?.NimbusRomNo9L : FontCont.raw?.NimbusRomNo9L);
|
|
53
53
|
const evalNimbusMono = !!(opt ? FontCont.opt?.NimbusMono : FontCont.raw?.NimbusMono);
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
palatino: await fontMetricsPromises.palatino,
|
|
75
|
-
garamond: await fontMetricsPromises.garamond,
|
|
76
|
-
nimbusRomNo9L: await fontMetricsPromises.nimbusRomNo9L,
|
|
77
|
-
nimbusMono: await fontMetricsPromises.nimbusMono,
|
|
78
|
-
};
|
|
79
|
-
} else {
|
|
80
|
-
fontMetricsTmp = {
|
|
81
|
-
carlito: evalCarlito ? await evalPagesFont('Carlito', pageArr, opt) : null,
|
|
82
|
-
nimbusSans: evalNimbusSans ? await evalPagesFont('NimbusSans', pageArr, opt) : null,
|
|
83
|
-
century: evalCentury ? await evalPagesFont('Century', pageArr, opt) : null,
|
|
84
|
-
palatino: evalPalatino ? await evalPagesFont('Palatino', pageArr, opt) : null,
|
|
85
|
-
garamond: evalGaramond ? await evalPagesFont('Garamond', pageArr, opt) : null,
|
|
86
|
-
nimbusRomNo9L: evalNimbusRomNo9L ? await evalPagesFont('NimbusRomNo9L', pageArr, opt) : null,
|
|
87
|
-
nimbusMono: evalNimbusMono ? await evalPagesFont('NimbusMono', pageArr, opt) : null,
|
|
88
|
-
};
|
|
89
|
-
}
|
|
55
|
+
const fontMetricsPromises = {
|
|
56
|
+
carlito: evalCarlito ? evalPagesFont('Carlito', pageArr, opt) : null,
|
|
57
|
+
nimbusSans: evalNimbusSans ? evalPagesFont('NimbusSans', pageArr, opt) : null,
|
|
58
|
+
century: evalCentury ? evalPagesFont('Century', pageArr, opt) : null,
|
|
59
|
+
palatino: evalPalatino ? evalPagesFont('Palatino', pageArr, opt) : null,
|
|
60
|
+
garamond: evalGaramond ? evalPagesFont('Garamond', pageArr, opt) : null,
|
|
61
|
+
nimbusRomNo9L: evalNimbusRomNo9L ? evalPagesFont('NimbusRomNo9L', pageArr, opt) : null,
|
|
62
|
+
nimbusMono: evalNimbusMono ? evalPagesFont('NimbusMono', pageArr, opt) : null,
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
const fontMetricsTmp = {
|
|
66
|
+
carlito: await fontMetricsPromises.carlito,
|
|
67
|
+
nimbusSans: await fontMetricsPromises.nimbusSans,
|
|
68
|
+
century: await fontMetricsPromises.century,
|
|
69
|
+
palatino: await fontMetricsPromises.palatino,
|
|
70
|
+
garamond: await fontMetricsPromises.garamond,
|
|
71
|
+
nimbusRomNo9L: await fontMetricsPromises.nimbusRomNo9L,
|
|
72
|
+
nimbusMono: await fontMetricsPromises.nimbusMono,
|
|
73
|
+
};
|
|
90
74
|
|
|
91
75
|
const fontMetrics = {
|
|
92
76
|
Carlito: fontMetricsTmp.carlito ? fontMetricsTmp.carlito.metricTotal / fontMetricsTmp.carlito.wordsTotal : null,
|
package/js/generalWorkerMain.js
CHANGED
|
@@ -147,9 +147,6 @@ export class gs {
|
|
|
147
147
|
/** @type {?import('../tess/tesseract.esm.min.js').default} */
|
|
148
148
|
static schedulerInner = null;
|
|
149
149
|
|
|
150
|
-
/** @type {?Function} */
|
|
151
|
-
static #resReady = null;
|
|
152
|
-
|
|
153
150
|
/** @type {?Promise<void>} */
|
|
154
151
|
static schedulerReady = null;
|
|
155
152
|
|
|
@@ -237,10 +234,6 @@ export class gs {
|
|
|
237
234
|
static renderPageStaticImp = async (args) => (await gs.schedulerInner.addJob('renderPageStaticImp', args));
|
|
238
235
|
|
|
239
236
|
static init = async () => {
|
|
240
|
-
gs.schedulerReady = new Promise((resolve, reject) => {
|
|
241
|
-
gs.#resReady = resolve;
|
|
242
|
-
});
|
|
243
|
-
|
|
244
237
|
let workerN;
|
|
245
238
|
if (opt.workerN) {
|
|
246
239
|
workerN = opt.workerN;
|
|
@@ -272,8 +265,7 @@ export class gs {
|
|
|
272
265
|
|
|
273
266
|
await Promise.all(resArr);
|
|
274
267
|
|
|
275
|
-
|
|
276
|
-
gs.#resReady(true);
|
|
268
|
+
return;
|
|
277
269
|
};
|
|
278
270
|
|
|
279
271
|
/**
|
|
@@ -318,15 +310,14 @@ export class gs {
|
|
|
318
310
|
/**
|
|
319
311
|
* Gets the general scheduler if it exists, otherwise creates a new one.
|
|
320
312
|
*/
|
|
321
|
-
static getGeneralScheduler =
|
|
313
|
+
static getGeneralScheduler = () => {
|
|
322
314
|
if (gs.schedulerReady) {
|
|
323
|
-
|
|
324
|
-
return;
|
|
315
|
+
return gs.schedulerReady;
|
|
325
316
|
}
|
|
326
317
|
|
|
327
|
-
|
|
318
|
+
gs.schedulerReady = gs.init();
|
|
328
319
|
|
|
329
|
-
return;
|
|
320
|
+
return gs.schedulerReady;
|
|
330
321
|
};
|
|
331
322
|
|
|
332
323
|
static clear = () => {
|
|
@@ -337,7 +328,6 @@ export class gs {
|
|
|
337
328
|
gs.clear();
|
|
338
329
|
await gs.schedulerInner.terminate();
|
|
339
330
|
gs.schedulerInner = null;
|
|
340
|
-
gs.#resReady = null;
|
|
341
331
|
gs.schedulerReady = null;
|
|
342
332
|
gs.#resReadyTesseract = null;
|
|
343
333
|
gs.schedulerReadyTesseract = null;
|
package/js/recognizeConvert.js
CHANGED
|
@@ -94,19 +94,9 @@ export const compareOCR = async (ocrA, ocrB, options) => {
|
|
|
94
94
|
if (res.debugImg) debugImageArr[i] = res.debugImg;
|
|
95
95
|
};
|
|
96
96
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
const indices = [...Array(ocrA.length).keys()];
|
|
101
|
-
const compPromises = indices.map(async (i) => comparePageI(i));
|
|
102
|
-
await Promise.allSettled(compPromises);
|
|
103
|
-
} else {
|
|
104
|
-
// This needs to be run one at a time in Node.js, as this is run in the main thread,
|
|
105
|
-
// and there is no mechanism for queuing jobs, so side effects will interfere with each other.
|
|
106
|
-
for (let i = 0; i < ocrA.length; i++) {
|
|
107
|
-
await comparePageI(i);
|
|
108
|
-
}
|
|
109
|
-
}
|
|
97
|
+
const indices = [...Array(ocrA.length).keys()];
|
|
98
|
+
const compPromises = indices.map(async (i) => comparePageI(i));
|
|
99
|
+
await Promise.allSettled(compPromises);
|
|
110
100
|
|
|
111
101
|
return { ocr: ocrArr, metrics: metricsArr, debug: debugImageArr };
|
|
112
102
|
};
|
|
@@ -193,8 +183,6 @@ export const recognizePageImp = async (n, legacy, lstm, areaMode, tessOptions =
|
|
|
193
183
|
// is to get debugging images for layout analysis rather than get text.
|
|
194
184
|
const runRecognition = legacy || lstm;
|
|
195
185
|
|
|
196
|
-
await gs.getGeneralScheduler();
|
|
197
|
-
|
|
198
186
|
const resArr = await gs.recognizeAndConvert2({
|
|
199
187
|
image: nativeN.src,
|
|
200
188
|
options: config,
|