scribe.js-ocr 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +6 -0
- package/fonts/all/Carlito-Bold.woff +0 -0
- package/fonts/all/Carlito-Italic.woff +0 -0
- package/fonts/all/Carlito-Regular.woff +0 -0
- package/fonts/all/{C059-Bold.woff → Century-Bold.woff} +0 -0
- package/fonts/all/{C059-Italic.woff → Century-Italic.woff} +0 -0
- package/fonts/all/{C059-Roman.woff → Century-Regular.woff} +0 -0
- package/fonts/all/{EBGaramond-Bold.woff → Garamond-Bold.woff} +0 -0
- package/fonts/all/{EBGaramond-Italic.woff → Garamond-Italic.woff} +0 -0
- package/fonts/all/{EBGaramond-Regular.woff → Garamond-Regular.woff} +0 -0
- package/fonts/all/{NimbusMonoPS-Bold.woff → NimbusMono-Bold.woff} +0 -0
- package/fonts/all/{NimbusMonoPS-Italic.woff → NimbusMono-Italic.woff} +0 -0
- package/fonts/all/{NimbusMonoPS-Regular.woff → NimbusMono-Regular.woff} +0 -0
- package/fonts/all/NimbusRoman-Bold.woff +0 -0
- package/fonts/all/NimbusRoman-Italic.woff +0 -0
- package/fonts/all/NimbusRoman-Regular.woff +0 -0
- package/fonts/all/NimbusSans-Bold.woff +0 -0
- package/fonts/all/NimbusSans-Italic.woff +0 -0
- package/fonts/all/NimbusSans-Regular.woff +0 -0
- package/fonts/all/{P052-Bold.woff → Palatino-Bold.woff} +0 -0
- package/fonts/all/{P052-Italic.woff → Palatino-Italic.woff} +0 -0
- package/fonts/all/{P052-Roman.woff → Palatino-Regular.woff} +0 -0
- package/fonts/latin/Carlito-Bold.woff +0 -0
- package/fonts/latin/Carlito-Italic.woff +0 -0
- package/fonts/latin/Carlito-Regular.woff +0 -0
- package/fonts/latin/{C059-Bold.woff → Century-Bold.woff} +0 -0
- package/fonts/latin/{C059-Italic.woff → Century-Italic.woff} +0 -0
- package/fonts/latin/{C059-Roman.woff → Century-Regular.woff} +0 -0
- package/fonts/latin/{EBGaramond-Bold.woff → Garamond-Bold.woff} +0 -0
- package/fonts/latin/{EBGaramond-Italic.woff → Garamond-Italic.woff} +0 -0
- package/fonts/latin/{EBGaramond-Regular.woff → Garamond-Regular.woff} +0 -0
- package/fonts/latin/{NimbusMonoPS-Bold.woff → NimbusMono-Bold.woff} +0 -0
- package/fonts/latin/{NimbusMonoPS-Italic.woff → NimbusMono-Italic.woff} +0 -0
- package/fonts/latin/{NimbusMonoPS-Regular.woff → NimbusMono-Regular.woff} +0 -0
- package/fonts/latin/NimbusRoman-Bold.woff +0 -0
- package/fonts/latin/NimbusRoman-Italic.woff +0 -0
- package/fonts/latin/NimbusRoman-Regular.woff +0 -0
- package/fonts/latin/NimbusSans-Bold.woff +0 -0
- package/fonts/latin/NimbusSans-Italic.woff +0 -0
- package/fonts/latin/NimbusSans-Regular.woff +0 -0
- package/fonts/latin/{P052-Bold.woff → Palatino-Bold.woff} +0 -0
- package/fonts/latin/{P052-Italic.woff → Palatino-Italic.woff} +0 -0
- package/fonts/latin/{P052-Roman.woff → Palatino-Regular.woff} +0 -0
- package/js/containers/fontContainer.js +8 -8
- package/js/fontContainerMain.js +45 -45
- package/js/fontEval.js +3 -3
- package/js/fontSupp.js +7 -7
- package/js/generalWorkerMain.js +16 -16
- package/js/import/convertPageHocr.js +6 -6
- package/js/import/nodeAdapter.js +8 -8
- package/js/objects/imageObjects.js +9 -9
- package/js/objects/ocrObjects.js +3 -3
- package/js/utils/fontUtils.js +8 -8
- package/js/utils/imageUtils.js +9 -10
- package/js/utils/reflowPars.js +5 -5
- package/js/worker/compareOCRModule.js +29 -29
- package/js/worker/generalWorker.js +8 -8
- package/package.json +4 -3
- package/fonts/all_ttf/C059-Bold.ttf +0 -0
- package/fonts/all_ttf/C059-Italic.ttf +0 -0
- package/fonts/all_ttf/C059-Roman.ttf +0 -0
- package/fonts/all_ttf/Carlito-Bold.ttf +0 -0
- package/fonts/all_ttf/Carlito-Italic.ttf +0 -0
- package/fonts/all_ttf/Carlito-Regular.ttf +0 -0
- package/fonts/all_ttf/EBGaramond-Bold.ttf +0 -0
- package/fonts/all_ttf/EBGaramond-Italic.ttf +0 -0
- package/fonts/all_ttf/EBGaramond-Regular.ttf +0 -0
- package/fonts/all_ttf/NimbusMonoPS-Bold.ttf +0 -0
- package/fonts/all_ttf/NimbusMonoPS-Italic.ttf +0 -0
- package/fonts/all_ttf/NimbusMonoPS-Regular.ttf +0 -0
- package/fonts/all_ttf/NimbusRoman-Bold.ttf +0 -0
- package/fonts/all_ttf/NimbusRoman-Italic.ttf +0 -0
- package/fonts/all_ttf/NimbusRoman-Regular.ttf +0 -0
- package/fonts/all_ttf/NimbusSans-Bold.ttf +0 -0
- package/fonts/all_ttf/NimbusSans-Italic.ttf +0 -0
- package/fonts/all_ttf/NimbusSans-Regular.ttf +0 -0
- package/fonts/all_ttf/P052-Bold.ttf +0 -0
- package/fonts/all_ttf/P052-Italic.ttf +0 -0
- package/fonts/all_ttf/P052-Roman.ttf +0 -0
package/.eslintrc.json
CHANGED
|
@@ -11,6 +11,9 @@
|
|
|
11
11
|
"ecmaVersion": "latest",
|
|
12
12
|
"sourceType": "module"
|
|
13
13
|
},
|
|
14
|
+
"plugins": [
|
|
15
|
+
"jsdoc"
|
|
16
|
+
],
|
|
14
17
|
// "globals": {
|
|
15
18
|
// "fabric": "writable"
|
|
16
19
|
// },
|
|
@@ -20,6 +23,9 @@
|
|
|
20
23
|
200,
|
|
21
24
|
{ "ignoreRegExpLiterals": true, "ignoreTemplateLiterals": true }
|
|
22
25
|
],
|
|
26
|
+
|
|
27
|
+
"jsdoc/check-alignment": 1,
|
|
28
|
+
|
|
23
29
|
// This rule results in code being deleted.
|
|
24
30
|
"no-unreachable": "off",
|
|
25
31
|
// This edit allows for .js files (but not packages) to have an extension.
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -365,14 +365,14 @@ export class FontCont {
|
|
|
365
365
|
};
|
|
366
366
|
|
|
367
367
|
/**
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
368
|
+
* Gets a font object. Unlike accessing the font containers directly,
|
|
369
|
+
* this method allows for special values 'Default', 'SansDefault', and 'SerifDefault' to be used.
|
|
370
|
+
*
|
|
371
|
+
* @param {('Default'|'SansDefault'|'SerifDefault'|string)} family - Font family name.
|
|
372
|
+
* @param {('normal'|'italic'|'bold'|string)} [style='normal']
|
|
373
|
+
* @param {string} [lang='eng']
|
|
374
|
+
* @returns {FontContainerFont}
|
|
375
|
+
*/
|
|
376
376
|
static getFont = (family, style = 'normal', lang = 'eng') => {
|
|
377
377
|
if (FontCont.doc?.[family]?.[style] && !FontCont.doc?.[family]?.[style]?.disable) {
|
|
378
378
|
return FontCont.doc[family][style];
|
package/js/fontContainerMain.js
CHANGED
|
@@ -47,70 +47,70 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
|
|
|
47
47
|
carlitoNormal = fetch(new URL('../fonts/latin/Carlito-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
48
48
|
carlitoItalic = fetch(new URL('../fonts/latin/Carlito-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
49
49
|
carlitoBold = fetch(new URL('../fonts/latin/Carlito-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
50
|
-
centuryNormal = fetch(new URL('../fonts/latin/
|
|
51
|
-
centuryItalic = fetch(new URL('../fonts/latin/
|
|
52
|
-
centuryBold = fetch(new URL('../fonts/latin/
|
|
53
|
-
garamondNormal = fetch(new URL('../fonts/latin/
|
|
54
|
-
garamondItalic = fetch(new URL('../fonts/latin/
|
|
55
|
-
garamondBold = fetch(new URL('../fonts/latin/
|
|
56
|
-
palatinoNormal = fetch(new URL('../fonts/latin/
|
|
57
|
-
palatinoItalic = fetch(new URL('../fonts/latin/
|
|
58
|
-
palatinoBold = fetch(new URL('../fonts/latin/
|
|
50
|
+
centuryNormal = fetch(new URL('../fonts/latin/Century-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
51
|
+
centuryItalic = fetch(new URL('../fonts/latin/Century-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
52
|
+
centuryBold = fetch(new URL('../fonts/latin/Century-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
53
|
+
garamondNormal = fetch(new URL('../fonts/latin/Garamond-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
54
|
+
garamondItalic = fetch(new URL('../fonts/latin/Garamond-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
55
|
+
garamondBold = fetch(new URL('../fonts/latin/Garamond-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
56
|
+
palatinoNormal = fetch(new URL('../fonts/latin/Palatino-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
57
|
+
palatinoItalic = fetch(new URL('../fonts/latin/Palatino-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
58
|
+
palatinoBold = fetch(new URL('../fonts/latin/Palatino-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
59
59
|
nimbusRomNo9LNormal = fetch(new URL('../fonts/latin/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
60
60
|
nimbusRomNo9LItalic = fetch(new URL('../fonts/latin/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
61
61
|
nimbusRomNo9LBold = fetch(new URL('../fonts/latin/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
62
62
|
nimbusSansNormal = fetch(new URL('../fonts/latin/NimbusSans-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
63
63
|
nimbusSansItalic = fetch(new URL('../fonts/latin/NimbusSans-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
64
64
|
nimbusSansBold = fetch(new URL('../fonts/latin/NimbusSans-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
65
|
-
nimbusMonoNormal = fetch(new URL('../fonts/latin/
|
|
66
|
-
nimbusMonoItalic = fetch(new URL('../fonts/latin/
|
|
67
|
-
nimbusMonoBold = fetch(new URL('../fonts/latin/
|
|
65
|
+
nimbusMonoNormal = fetch(new URL('../fonts/latin/NimbusMono-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
66
|
+
nimbusMonoItalic = fetch(new URL('../fonts/latin/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
67
|
+
nimbusMonoBold = fetch(new URL('../fonts/latin/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
68
68
|
} else {
|
|
69
69
|
carlitoNormal = fetch(new URL('../fonts/all/Carlito-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
70
70
|
carlitoItalic = fetch(new URL('../fonts/all/Carlito-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
71
71
|
carlitoBold = fetch(new URL('../fonts/all/Carlito-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
72
|
-
centuryNormal = fetch(new URL('../fonts/all/
|
|
73
|
-
centuryItalic = fetch(new URL('../fonts/all/
|
|
74
|
-
centuryBold = fetch(new URL('../fonts/all/
|
|
75
|
-
garamondNormal = fetch(new URL('../fonts/all/
|
|
76
|
-
garamondItalic = fetch(new URL('../fonts/all/
|
|
77
|
-
garamondBold = fetch(new URL('../fonts/all/
|
|
78
|
-
palatinoNormal = fetch(new URL('../fonts/all/
|
|
79
|
-
palatinoItalic = fetch(new URL('../fonts/all/
|
|
80
|
-
palatinoBold = fetch(new URL('../fonts/all/
|
|
72
|
+
centuryNormal = fetch(new URL('../fonts/all/Century-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
73
|
+
centuryItalic = fetch(new URL('../fonts/all/Century-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
74
|
+
centuryBold = fetch(new URL('../fonts/all/Century-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
75
|
+
garamondNormal = fetch(new URL('../fonts/all/Garamond-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
76
|
+
garamondItalic = fetch(new URL('../fonts/all/Garamond-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
77
|
+
garamondBold = fetch(new URL('../fonts/all/Garamond-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
78
|
+
palatinoNormal = fetch(new URL('../fonts/all/Palatino-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
79
|
+
palatinoItalic = fetch(new URL('../fonts/all/Palatino-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
80
|
+
palatinoBold = fetch(new URL('../fonts/all/Palatino-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
81
81
|
nimbusRomNo9LNormal = fetch(new URL('../fonts/all/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
82
82
|
nimbusRomNo9LItalic = fetch(new URL('../fonts/all/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
83
83
|
nimbusRomNo9LBold = fetch(new URL('../fonts/all/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
84
84
|
nimbusSansNormal = fetch(new URL('../fonts/all/NimbusSans-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
85
85
|
nimbusSansItalic = fetch(new URL('../fonts/all/NimbusSans-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
86
86
|
nimbusSansBold = fetch(new URL('../fonts/all/NimbusSans-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
87
|
-
nimbusMonoNormal = fetch(new URL('../fonts/all/
|
|
88
|
-
nimbusMonoItalic = fetch(new URL('../fonts/all/
|
|
89
|
-
nimbusMonoBold = fetch(new URL('../fonts/all/
|
|
87
|
+
nimbusMonoNormal = fetch(new URL('../fonts/all/NimbusMono-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
88
|
+
nimbusMonoItalic = fetch(new URL('../fonts/all/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
89
|
+
nimbusMonoBold = fetch(new URL('../fonts/all/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
|
|
90
90
|
}
|
|
91
91
|
} else {
|
|
92
92
|
const { readFile } = await import('fs/promises');
|
|
93
|
-
carlitoNormal = readFile(new URL('../fonts/
|
|
94
|
-
carlitoItalic = readFile(new URL('../fonts/
|
|
95
|
-
carlitoBold = readFile(new URL('../fonts/
|
|
96
|
-
centuryNormal = readFile(new URL('../fonts/
|
|
97
|
-
centuryItalic = readFile(new URL('../fonts/
|
|
98
|
-
centuryBold = readFile(new URL('../fonts/
|
|
99
|
-
garamondNormal = readFile(new URL('../fonts/
|
|
100
|
-
garamondItalic = readFile(new URL('../fonts/
|
|
101
|
-
garamondBold = readFile(new URL('../fonts/
|
|
102
|
-
palatinoNormal = readFile(new URL('../fonts/
|
|
103
|
-
palatinoItalic = readFile(new URL('../fonts/
|
|
104
|
-
palatinoBold = readFile(new URL('../fonts/
|
|
105
|
-
nimbusRomNo9LNormal = readFile(new URL('../fonts/
|
|
106
|
-
nimbusRomNo9LItalic = readFile(new URL('../fonts/
|
|
107
|
-
nimbusRomNo9LBold = readFile(new URL('../fonts/
|
|
108
|
-
nimbusSansNormal = readFile(new URL('../fonts/
|
|
109
|
-
nimbusSansItalic = readFile(new URL('../fonts/
|
|
110
|
-
nimbusSansBold = readFile(new URL('../fonts/
|
|
111
|
-
nimbusMonoNormal = readFile(new URL('../fonts/
|
|
112
|
-
nimbusMonoItalic = readFile(new URL('../fonts/
|
|
113
|
-
nimbusMonoBold = readFile(new URL('../fonts/
|
|
93
|
+
carlitoNormal = readFile(new URL('../fonts/all/Carlito-Regular.woff', import.meta.url)).then((res) => res.buffer);
|
|
94
|
+
carlitoItalic = readFile(new URL('../fonts/all/Carlito-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
95
|
+
carlitoBold = readFile(new URL('../fonts/all/Carlito-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
96
|
+
centuryNormal = readFile(new URL('../fonts/all/Century-Regular.woff', import.meta.url)).then((res) => res.buffer);
|
|
97
|
+
centuryItalic = readFile(new URL('../fonts/all/Century-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
98
|
+
centuryBold = readFile(new URL('../fonts/all/Century-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
99
|
+
garamondNormal = readFile(new URL('../fonts/all/Garamond-Regular.woff', import.meta.url)).then((res) => res.buffer);
|
|
100
|
+
garamondItalic = readFile(new URL('../fonts/all/Garamond-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
101
|
+
garamondBold = readFile(new URL('../fonts/all/Garamond-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
102
|
+
palatinoNormal = readFile(new URL('../fonts/all/Palatino-Regular.woff', import.meta.url)).then((res) => res.buffer);
|
|
103
|
+
palatinoItalic = readFile(new URL('../fonts/all/Palatino-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
104
|
+
palatinoBold = readFile(new URL('../fonts/all/Palatino-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
105
|
+
nimbusRomNo9LNormal = readFile(new URL('../fonts/all/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.buffer);
|
|
106
|
+
nimbusRomNo9LItalic = readFile(new URL('../fonts/all/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
107
|
+
nimbusRomNo9LBold = readFile(new URL('../fonts/all/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
108
|
+
nimbusSansNormal = readFile(new URL('../fonts/all/NimbusSans-Regular.woff', import.meta.url)).then((res) => res.buffer);
|
|
109
|
+
nimbusSansItalic = readFile(new URL('../fonts/all/NimbusSans-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
110
|
+
nimbusSansBold = readFile(new URL('../fonts/all/NimbusSans-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
111
|
+
nimbusMonoNormal = readFile(new URL('../fonts/all/NimbusMono-Regular.woff', import.meta.url)).then((res) => res.buffer);
|
|
112
|
+
nimbusMonoItalic = readFile(new URL('../fonts/all/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.buffer);
|
|
113
|
+
nimbusMonoBold = readFile(new URL('../fonts/all/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.buffer);
|
|
114
114
|
}
|
|
115
115
|
|
|
116
116
|
const srcObj = {
|
package/js/fontEval.js
CHANGED
|
@@ -40,9 +40,9 @@ export async function evalPagesFont(font, pageArr, opt, n = 500) {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
/**
|
|
43
|
-
* @param {Array<OcrPage>} pageArr
|
|
44
|
-
* @param {boolean} opt - Whether to use optimized fonts.
|
|
45
|
-
*/
|
|
43
|
+
* @param {Array<OcrPage>} pageArr
|
|
44
|
+
* @param {boolean} opt - Whether to use optimized fonts.
|
|
45
|
+
*/
|
|
46
46
|
export async function evaluateFonts(pageArr, opt) {
|
|
47
47
|
const evalCarlito = !!(opt ? FontCont.opt?.Carlito : FontCont.raw?.Carlito);
|
|
48
48
|
const evalNimbusSans = !!(opt ? FontCont.opt?.NimbusSans : FontCont.raw?.NimbusSans);
|
package/js/fontSupp.js
CHANGED
|
@@ -62,13 +62,13 @@ const calcSuppFontInfoForWords = async (words) => {
|
|
|
62
62
|
};
|
|
63
63
|
|
|
64
64
|
/**
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
65
|
+
*
|
|
66
|
+
* @param {Array<OcrPage>} ocrArr
|
|
67
|
+
* @returns
|
|
68
|
+
* This function runs recognition on certain fonts when we need more information about them.
|
|
69
|
+
* Fonts are included when either (1) we need to know if they are sans or serif or (2) if the text is extracted from a PDF,
|
|
70
|
+
* and we need to determine how large to render the text.
|
|
71
|
+
*/
|
|
72
72
|
export const calcSuppFontInfo = async (ocrArr) => {
|
|
73
73
|
if (!ocrArr) return;
|
|
74
74
|
await gs.initTesseract({ anyOk: true, langs: ['eng'] });
|
package/js/generalWorkerMain.js
CHANGED
|
@@ -57,10 +57,10 @@ export async function initGeneralWorker() {
|
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
/**
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
* Wraps a function to be called via worker messages.
|
|
61
|
+
* @param {string} func The function name to call.
|
|
62
|
+
* @returns {Function} A function that returns a promise resolving to the worker's response.
|
|
63
|
+
*/
|
|
64
64
|
function wrap(func) {
|
|
65
65
|
return function (...args) {
|
|
66
66
|
return new Promise((innerResolve, innerReject) => {
|
|
@@ -72,10 +72,10 @@ export async function initGeneralWorker() {
|
|
|
72
72
|
}
|
|
73
73
|
|
|
74
74
|
/**
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
75
|
+
* Similar to wrap, but handles two promises.
|
|
76
|
+
* @param {string} func The function name to call.
|
|
77
|
+
* @returns {Array} Returns two promises in an array.
|
|
78
|
+
*/
|
|
79
79
|
function wrap2(func) {
|
|
80
80
|
return function (...args) {
|
|
81
81
|
const id = promiseId++;
|
|
@@ -187,14 +187,14 @@ export class gs {
|
|
|
187
187
|
static optimizeFont = async (args) => (await gs.schedulerInner.addJob('optimizeFont', args));
|
|
188
188
|
|
|
189
189
|
/**
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
190
|
+
* @template {Partial<Tesseract.OutputFormats>} TO
|
|
191
|
+
* @param {Object} args
|
|
192
|
+
* @param {Parameters<Tesseract.Worker['recognize']>[0]} args.image
|
|
193
|
+
* @param {Parameters<Tesseract.Worker['recognize']>[1]} args.options
|
|
194
|
+
* @param {TO} args.output
|
|
195
|
+
* @returns {Promise<Tesseract.Page<TO>>}
|
|
196
|
+
* Exported for type inference purposes, should not be imported anywhere.
|
|
197
|
+
*/
|
|
198
198
|
static recognize = async (args) => (await gs.schedulerInner.addJob('recognize', args));
|
|
199
199
|
|
|
200
200
|
/**
|
|
@@ -87,8 +87,8 @@ export async function convertPageHocr({
|
|
|
87
87
|
}
|
|
88
88
|
|
|
89
89
|
/**
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
* @param {string} match
|
|
91
|
+
*/
|
|
92
92
|
function convertLine(match) {
|
|
93
93
|
const titleStrLine = match.match(/title=['"]([^'"]+)/)?.[1];
|
|
94
94
|
if (!titleStrLine) return '';
|
|
@@ -258,8 +258,8 @@ export async function convertPageHocr({
|
|
|
258
258
|
}
|
|
259
259
|
|
|
260
260
|
/**
|
|
261
|
-
|
|
262
|
-
|
|
261
|
+
* @param {string} match
|
|
262
|
+
*/
|
|
263
263
|
function convertWord(match) {
|
|
264
264
|
const wordID = match.match(/id=['"]([^'"]*)['"]/i)?.[1] || `word_${n + 1}_${pageObj.lines.length + 1}_${lineObj.words.length + 1}`;
|
|
265
265
|
|
|
@@ -357,8 +357,8 @@ export async function convertPageHocr({
|
|
|
357
357
|
}
|
|
358
358
|
|
|
359
359
|
/**
|
|
360
|
-
|
|
361
|
-
|
|
360
|
+
* @param {string} match
|
|
361
|
+
*/
|
|
362
362
|
const convertPar = (match) => {
|
|
363
363
|
const parLang = match.match(/^.+?lang=['"]([^'"]*)['"]/i)?.[1];
|
|
364
364
|
if (parLang) currentLang = parLang;
|
package/js/import/nodeAdapter.js
CHANGED
|
@@ -9,11 +9,11 @@ import path from 'path';
|
|
|
9
9
|
*/
|
|
10
10
|
export class FileNode {
|
|
11
11
|
/**
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
* Creates an instance of the File class.
|
|
13
|
+
* @param {string} filePath - The path to the file.
|
|
14
|
+
* @param {string} name - The name of the file.
|
|
15
|
+
* @param {Buffer} fileData - The file's data.
|
|
16
|
+
*/
|
|
17
17
|
constructor(filePath, name, fileData) {
|
|
18
18
|
this.filePath = filePath;
|
|
19
19
|
this.name = name;
|
|
@@ -21,9 +21,9 @@ export class FileNode {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
/**
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
* Returns an ArrayBuffer with the file's contents.
|
|
25
|
+
* @returns {Promise<ArrayBuffer>} A promise that resolves with the file's contents as an ArrayBuffer.
|
|
26
|
+
*/
|
|
27
27
|
async arrayBuffer() {
|
|
28
28
|
return this.fileData.buffer.slice(this.fileData.byteOffset, this.fileData.byteOffset + this.fileData.byteLength);
|
|
29
29
|
}
|
|
@@ -2,15 +2,15 @@ import { getJpegDimensions, getPngDimensions } from '../utils/imageUtils.js';
|
|
|
2
2
|
|
|
3
3
|
export class ImageWrapper {
|
|
4
4
|
/**
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
5
|
+
* @param {number} n - Page number
|
|
6
|
+
* @param {string} imageStr - Base-64 encoded image string. Should start with "data:image/png" or "data:image/jpeg".
|
|
7
|
+
* @param {string} colorMode - Color mode ("color", "gray", or "binary").
|
|
8
|
+
* @param {boolean} rotated - Whether image has been rotated.
|
|
9
|
+
* @param {boolean} upscaled - Whether image has been upscaled.
|
|
10
|
+
*
|
|
11
|
+
* All properties of this object must be serializable, as ImageWrapper objects are sent between threads.
|
|
12
|
+
* This means that no promises can be used.
|
|
13
|
+
*/
|
|
14
14
|
constructor(n, imageStr, colorMode, rotated = false, upscaled = false) {
|
|
15
15
|
this.n = n;
|
|
16
16
|
this.src = imageStr;
|
package/js/objects/ocrObjects.js
CHANGED
|
@@ -134,7 +134,7 @@ export function OcrWord(line, text, bbox, id) {
|
|
|
134
134
|
/**
|
|
135
135
|
* @type {boolean} - If `true`, left/right coordinates represent the left/rightmost pixel.
|
|
136
136
|
* If `false`, left/right coordinates represent the start/end of the font bounding box.
|
|
137
|
-
|
|
137
|
+
*/
|
|
138
138
|
this.visualCoords = true;
|
|
139
139
|
}
|
|
140
140
|
|
|
@@ -331,7 +331,7 @@ export const getLineText = (line) => {
|
|
|
331
331
|
|
|
332
332
|
/**
|
|
333
333
|
* @param {OcrPar} par
|
|
334
|
-
*/
|
|
334
|
+
*/
|
|
335
335
|
export const getParText = (par) => {
|
|
336
336
|
let text = '';
|
|
337
337
|
for (let i = 0; i < par.lines.length; i++) {
|
|
@@ -584,7 +584,7 @@ function rotateLine(line, angle, dims = null, useCharLevel = false) {
|
|
|
584
584
|
/**
|
|
585
585
|
* Clones page object.
|
|
586
586
|
* @param {OcrPage} page
|
|
587
|
-
|
|
587
|
+
*/
|
|
588
588
|
function clonePage(page) {
|
|
589
589
|
const pageNew = new OcrPage(page.n, { ...page.dims });
|
|
590
590
|
for (const line of page.lines) {
|
package/js/utils/fontUtils.js
CHANGED
|
@@ -99,7 +99,7 @@ function calcWordFontSizePrecise(wordArr, fontOpentype, nonLatin = false) {
|
|
|
99
99
|
* Adds ligatures to text of `OcrWord` object. Returns an array of letters.
|
|
100
100
|
* @param {OcrWord} word
|
|
101
101
|
* @returns {Array<string>}
|
|
102
|
-
*/
|
|
102
|
+
*/
|
|
103
103
|
export function addLigatures(word) {
|
|
104
104
|
if (word.smallCaps || !opt.ligatures) return word.text.split('');
|
|
105
105
|
const fontI = FontCont.getWordFont(word);
|
|
@@ -181,9 +181,9 @@ function calcWordCharMetrics(wordText, fontOpentype) {
|
|
|
181
181
|
if (opt.kerning) {
|
|
182
182
|
const glyphJ = fontOpentype.charToGlyph(charJ);
|
|
183
183
|
const kerning = fontOpentype.getKerningValue(glyphI, glyphJ);
|
|
184
|
-
kerningArr.push(kerning);
|
|
184
|
+
kerningArr.push(kerning);
|
|
185
185
|
} else {
|
|
186
|
-
kerningArr.push(0);
|
|
186
|
+
kerningArr.push(0);
|
|
187
187
|
}
|
|
188
188
|
}
|
|
189
189
|
}
|
|
@@ -322,11 +322,11 @@ export const calcWordFontSize = (word) => {
|
|
|
322
322
|
// Therefore, the appropriate font size must be calculated using (1) the character stats from the input image and
|
|
323
323
|
// (2) stats regarding the font being used.
|
|
324
324
|
/**
|
|
325
|
-
* Get or calculate font size for line.
|
|
326
|
-
* This value will either be (1) a manually set value or (2) a value calculated using line metrics.
|
|
327
|
-
* @param {OcrLine} line
|
|
328
|
-
* @returns {number}
|
|
329
|
-
*/
|
|
325
|
+
* Get or calculate font size for line.
|
|
326
|
+
* This value will either be (1) a manually set value or (2) a value calculated using line metrics.
|
|
327
|
+
* @param {OcrLine} line
|
|
328
|
+
* @returns {number}
|
|
329
|
+
*/
|
|
330
330
|
export const calcLineFontSize = (line) => {
|
|
331
331
|
if (line._size) return line._size;
|
|
332
332
|
|
package/js/utils/imageUtils.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
/* eslint-disable no-bitwise */
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
/**
|
|
5
4
|
* Loads an image from a given URL and sets it to a specified HTML element.
|
|
6
5
|
*
|
|
@@ -53,15 +52,15 @@ export function base64ToBytes(base64) {
|
|
|
53
52
|
}
|
|
54
53
|
|
|
55
54
|
/**
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
55
|
+
* Extracts the width and height from the IHDR chunk of a PNG image encoded in base64.
|
|
56
|
+
*
|
|
57
|
+
* This function decodes the base64 to bytes and parses the IHDR chunk to extract the image dimensions.
|
|
58
|
+
* It assumes the base64 string is a valid PNG image and directly starts parsing the binary data.
|
|
59
|
+
* Note: This is a basic implementation without extensive error handling or validation.
|
|
60
|
+
*
|
|
61
|
+
* @param {string} base64 - The base64 encoded string of the PNG image.
|
|
62
|
+
* @returns {dims} An object containing the width and height of the image.
|
|
63
|
+
*/
|
|
65
64
|
export function getPngDimensions(base64) {
|
|
66
65
|
// The number 96 is chosen to line up leanly with byte boundaries (97 would result in an error)
|
|
67
66
|
// but is otherwise arbitrary, while being large enough to contain the IHDR chunk.
|
package/js/utils/reflowPars.js
CHANGED
|
@@ -51,11 +51,11 @@ export function assignParagraphs(page, angle) {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
/**
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
* Calculates expected line start and end positions based on surrounding lines.
|
|
55
|
+
* If this line varies from those values, it may be the first or last line of a paragraph.
|
|
56
|
+
* @param {number} lineIndex - Index of the line to calculate the expected values for.
|
|
57
|
+
* @returns
|
|
58
|
+
*/
|
|
59
59
|
const calcExpected = (lineIndex) => {
|
|
60
60
|
// Ideally, we compare the current line to the next 5 lines.
|
|
61
61
|
// When there are fewer than 5 lines after the current line, we add previous lines to the window.
|
|
@@ -74,24 +74,24 @@ export async function drawWordActual(words, imageBinaryBit, angle) {
|
|
|
74
74
|
}
|
|
75
75
|
|
|
76
76
|
/**
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
77
|
+
* Function that draws a word on a canvas.
|
|
78
|
+
* This code was factored out to allow for drawing multiple times while only calculating metrics once.
|
|
79
|
+
* Therefore, only the drawing code should be in this function; the metrics should be calculated elsewhere
|
|
80
|
+
* and passed to this function, rather than calcualting from an `OcrWord` object.
|
|
81
|
+
*
|
|
82
|
+
* @param {Object} params
|
|
83
|
+
* @param {CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D} params.ctx
|
|
84
|
+
* @param {Array<string>} params.charArr
|
|
85
|
+
* @param {number} params.left
|
|
86
|
+
* @param {number} params.bottom
|
|
87
|
+
* @param {Array<number>} params.advanceArr - Array of pixels to advance for each character.
|
|
88
|
+
* Unlike the "advance" property of a glyph, this is the actual distance to advance on the canvas,
|
|
89
|
+
* and should include kerning and character spacing.
|
|
90
|
+
* @param {FontContainerFont} params.font
|
|
91
|
+
* @param {number} params.size
|
|
92
|
+
* @param {boolean} params.smallCaps
|
|
93
|
+
* @param {string} [params.fillStyle='black']
|
|
94
|
+
*/
|
|
95
95
|
const printWordOnCanvas = async ({
|
|
96
96
|
ctx, charArr, left, bottom, advanceArr, font, size, smallCaps, fillStyle = 'black',
|
|
97
97
|
}) => {
|
|
@@ -118,15 +118,15 @@ const printWordOnCanvas = async ({
|
|
|
118
118
|
};
|
|
119
119
|
|
|
120
120
|
/**
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
121
|
+
* Print word on canvas.
|
|
122
|
+
*
|
|
123
|
+
* @param {CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D} ctx
|
|
124
|
+
* @param {OcrWord} word
|
|
125
|
+
* @param {number} offsetX
|
|
126
|
+
* @param {number} cropY
|
|
127
|
+
* @param {?CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D} ctxView
|
|
128
|
+
* @param {boolean} [imageRotated=false] -
|
|
129
|
+
*/
|
|
130
130
|
export const drawWordRender = async (ctx, word, offsetX = 0, cropY = 0, ctxView = null, imageRotated = false) => {
|
|
131
131
|
if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
|
|
132
132
|
if (!ctx) throw new Error('Canvases must be defined before running this function.');
|
|
@@ -1304,8 +1304,8 @@ export async function evalPageFont({
|
|
|
1304
1304
|
}
|
|
1305
1305
|
|
|
1306
1306
|
/**
|
|
1307
|
-
|
|
1308
|
-
|
|
1307
|
+
* @param {OcrLine} ocrLineJ
|
|
1308
|
+
*/
|
|
1309
1309
|
const transformLineFont = (ocrLineJ) => {
|
|
1310
1310
|
if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
|
|
1311
1311
|
|
|
@@ -329,14 +329,14 @@ export const recognizeAndConvert2 = async ({
|
|
|
329
329
|
};
|
|
330
330
|
|
|
331
331
|
/**
|
|
332
|
-
* @template {Partial<Tesseract.OutputFormats>} TO
|
|
333
|
-
* @param {Object} args
|
|
334
|
-
* @param {Parameters<Tesseract.Worker['recognize']>[0]} args.image
|
|
335
|
-
* @param {Parameters<Tesseract.Worker['recognize']>[1]} args.options
|
|
336
|
-
* @param {TO} args.output
|
|
337
|
-
* @returns {Promise<Tesseract.Page<TO>>}
|
|
338
|
-
* Exported for type inference purposes, should not be imported anywhere.
|
|
339
|
-
*/
|
|
332
|
+
* @template {Partial<Tesseract.OutputFormats>} TO
|
|
333
|
+
* @param {Object} args
|
|
334
|
+
* @param {Parameters<Tesseract.Worker['recognize']>[0]} args.image
|
|
335
|
+
* @param {Parameters<Tesseract.Worker['recognize']>[1]} args.options
|
|
336
|
+
* @param {TO} args.output
|
|
337
|
+
* @returns {Promise<Tesseract.Page<TO>>}
|
|
338
|
+
* Exported for type inference purposes, should not be imported anywhere.
|
|
339
|
+
*/
|
|
340
340
|
export const recognize = async ({ image, options, output }) => {
|
|
341
341
|
if (!worker) throw new Error('Worker not initialized');
|
|
342
342
|
const res1 = await worker.recognize(image, options, output);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "scribe.js-ocr",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.1",
|
|
4
4
|
"description": "High-quality OCR and text extraction for images and PDFs.",
|
|
5
5
|
"main": "scribe.js",
|
|
6
6
|
"directories": {
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
"eslint": "^8.56.0",
|
|
17
17
|
"eslint-config-airbnb-base": "^15.0.0",
|
|
18
18
|
"eslint-plugin-import": "^2.29.1",
|
|
19
|
+
"eslint-plugin-jsdoc": "^50.6.2",
|
|
19
20
|
"express": "^4.18.2",
|
|
20
21
|
"karma": "^6.4.4",
|
|
21
22
|
"karma-chrome-launcher": "^3.2.0",
|
|
@@ -48,8 +49,8 @@
|
|
|
48
49
|
},
|
|
49
50
|
"homepage": "https://github.com/scribeocr/scribe.js#readme",
|
|
50
51
|
"dependencies": {
|
|
52
|
+
"@scribe.js/tesseract.js": "^6.0.2",
|
|
51
53
|
"canvaskit-wasm": "^0.39.1",
|
|
52
|
-
"commander": "^11.1.0"
|
|
53
|
-
"@scribe.js/tesseract.js": "^6.0.2"
|
|
54
|
+
"commander": "^11.1.0"
|
|
54
55
|
}
|
|
55
56
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|