scribe.js-ocr 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -2
- package/cli/main.js +12 -46
- package/js/clear.js +3 -3
- package/js/containers/app.js +11 -2
- package/js/containers/dataContainer.js +0 -6
- package/js/containers/fontContainer.js +139 -97
- package/js/containers/imageContainer.js +20 -84
- package/js/debug.js +34 -0
- package/js/export/exportPDF.js +52 -57
- package/js/export/exportRenderHOCR.js +5 -5
- package/js/fontContainerMain.js +95 -108
- package/js/fontEval.js +83 -111
- package/js/generalWorkerMain.js +28 -3
- package/js/global.d.ts +3 -0
- package/js/import/convertPageBlocks.js +9 -0
- package/js/import/convertPageShared.js +13 -7
- package/js/import/import.js +15 -13
- package/js/objects/imageObjects.js +97 -0
- package/js/objects/ocrObjects.js +53 -1
- package/js/recognizeConvert.js +8 -4
- package/js/utils/fontUtils.js +5 -5
- package/js/utils/miscUtils.js +7 -2
- package/js/worker/compareOCRModule.js +279 -81
- package/js/worker/generalWorker.js +98 -28
- package/js/worker/renderWordCanvas.js +14 -29
- package/package.json +1 -1
- package/scribe.js +77 -5
|
@@ -2,11 +2,12 @@
|
|
|
2
2
|
/* eslint-disable no-await-in-loop */
|
|
3
3
|
|
|
4
4
|
import ocr from '../objects/ocrObjects.js';
|
|
5
|
-
import { calcLineFontSize, calcWordFontSize } from '../utils/fontUtils.js';
|
|
5
|
+
import { calcLineFontSize, calcWordFontSize, calcWordMetrics } from '../utils/fontUtils.js';
|
|
6
6
|
import { getImageBitmap } from '../utils/imageUtils.js';
|
|
7
7
|
import { drawWordActual, drawWordRender } from './renderWordCanvas.js';
|
|
8
8
|
|
|
9
|
-
import {
|
|
9
|
+
import { FontCont } from '../containers/fontContainer.js';
|
|
10
|
+
import { imageUtils } from '../objects/imageObjects.js';
|
|
10
11
|
import { getRandomAlphanum } from '../utils/miscUtils.js';
|
|
11
12
|
// import { CompDebug } from '../objects/imageObjects.js';
|
|
12
13
|
|
|
@@ -95,7 +96,7 @@ export const initCanvasNode = async () => {
|
|
|
95
96
|
// The Node.js canvas package does not currently support worke threads
|
|
96
97
|
// https://github.com/Automattic/node-canvas/issues/1394
|
|
97
98
|
if (!isMainThread) throw new Error('node-canvas is not currently supported on worker threads.');
|
|
98
|
-
if (!
|
|
99
|
+
if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
|
|
99
100
|
|
|
100
101
|
const { writeFile } = await import('fs');
|
|
101
102
|
const { promisify } = await import('util');
|
|
@@ -128,7 +129,7 @@ export const initCanvasNode = async () => {
|
|
|
128
129
|
|
|
129
130
|
// All fonts must be registered before the canvas is created, so all raw and optimized fonts are loaded.
|
|
130
131
|
// Even when using optimized fonts, at least one raw font is needed to compare against optimized version.
|
|
131
|
-
for (const [key1, value1] of Object.entries(
|
|
132
|
+
for (const [key1, value1] of Object.entries(FontCont.raw)) {
|
|
132
133
|
if (['Default', 'SansDefault', 'SerifDefault'].includes(key1)) continue;
|
|
133
134
|
for (const [key2, value2] of Object.entries(value1)) {
|
|
134
135
|
await registerFontObj(value2);
|
|
@@ -136,10 +137,11 @@ export const initCanvasNode = async () => {
|
|
|
136
137
|
}
|
|
137
138
|
|
|
138
139
|
// This function is used before font optimization is complete, so `fontAll.opt` does not exist yet.
|
|
139
|
-
if (
|
|
140
|
-
for (const [key1, value1] of Object.entries(
|
|
141
|
-
if (['Default', 'SansDefault', 'SerifDefault'].includes(key1)) continue;
|
|
140
|
+
if (FontCont.opt) {
|
|
141
|
+
for (const [key1, value1] of Object.entries(FontCont.opt)) {
|
|
142
|
+
if (['Default', 'SansDefault', 'SerifDefault'].includes(key1) || !value1) continue;
|
|
142
143
|
for (const [key2, value2] of Object.entries(value1)) {
|
|
144
|
+
if (!value2) continue;
|
|
143
145
|
await registerFontObj(value2);
|
|
144
146
|
}
|
|
145
147
|
}
|
|
@@ -202,14 +204,13 @@ export async function evalWords({
|
|
|
202
204
|
|
|
203
205
|
const binaryImageBit = await getImageBitmap(binaryImage);
|
|
204
206
|
|
|
205
|
-
if (!
|
|
207
|
+
if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
|
|
206
208
|
if (!calcCtx) throw new Error('Canvases must be defined before running this function.');
|
|
207
209
|
|
|
208
210
|
const view = options?.view === undefined ? false : options?.view;
|
|
209
211
|
const useABaseline = options?.useABaseline === undefined ? true : options?.useABaseline;
|
|
210
212
|
|
|
211
213
|
const cosAngle = Math.cos(angle * -1 * (Math.PI / 180)) || 1;
|
|
212
|
-
const sinAngle = Math.sin(angle * -1 * (Math.PI / 180)) || 0;
|
|
213
214
|
|
|
214
215
|
// All words are assumed to be on the same line
|
|
215
216
|
const linebox = wordsA[0].line.bbox;
|
|
@@ -237,15 +238,12 @@ export async function evalWords({
|
|
|
237
238
|
|
|
238
239
|
// Draw the words in wordsA
|
|
239
240
|
let x0 = wordsA[0].bbox.left;
|
|
240
|
-
|
|
241
|
+
const y0 = linebox.bottom + baselineA[1] + baselineA[0] * (wordsA[0].bbox.left - linebox.left);
|
|
241
242
|
for (let i = 0; i < wordsA.length; i++) {
|
|
242
243
|
const word = wordsA[i];
|
|
243
244
|
const wordIBox = word.bbox;
|
|
244
|
-
const baselineY = linebox.bottom + baselineA[1] + baselineA[0] * (wordIBox.left - linebox.left);
|
|
245
|
-
const x = wordIBox.left;
|
|
246
|
-
const y = word.sup || word.dropcap ? wordIBox.bottom : baselineY;
|
|
247
245
|
|
|
248
|
-
const offsetX = (
|
|
246
|
+
const offsetX = (wordIBox.left - x0) / cosAngle;
|
|
249
247
|
|
|
250
248
|
await drawWordRender(calcCtx, word, offsetX, cropY, ctxView, Boolean(angle));
|
|
251
249
|
}
|
|
@@ -296,15 +294,10 @@ export async function evalWords({
|
|
|
296
294
|
// Set style to whatever it is for wordsA. This is based on the assumption that "A" is Tesseract Legacy and "B" is Tesseract LSTM (which does not have useful style info).
|
|
297
295
|
word.style = wordsA[0].style;
|
|
298
296
|
|
|
299
|
-
const baselineY = linebox.bottom + baselineB[1] + baselineB[0] * (word.bbox.left - linebox.left);
|
|
300
297
|
if (i === 0) {
|
|
301
298
|
x0 = word.bbox.left;
|
|
302
|
-
y0 = baselineY;
|
|
303
299
|
}
|
|
304
|
-
const
|
|
305
|
-
const y = word.sup || word.dropcap ? word.bbox.bottom : baselineY;
|
|
306
|
-
|
|
307
|
-
const offsetX = (x - x0) * cosAngle - sinAngle * (y - y0);
|
|
300
|
+
const offsetX = (word.bbox.left - x0) / cosAngle;
|
|
308
301
|
|
|
309
302
|
await drawWordRender(calcCtx, word, offsetX, cropY, ctxView, Boolean(angle));
|
|
310
303
|
}
|
|
@@ -435,7 +428,7 @@ async function penalizeWord(wordObjs) {
|
|
|
435
428
|
const wordTextArr = wordStr.split('');
|
|
436
429
|
const wordFontSize = calcLineFontSize(word.line);
|
|
437
430
|
|
|
438
|
-
const fontI =
|
|
431
|
+
const fontI = FontCont.getWordFont(word);
|
|
439
432
|
const fontOpentypeI = fontI.opentype;
|
|
440
433
|
|
|
441
434
|
// These calculations differ from the standard word width calculations,
|
|
@@ -719,78 +712,111 @@ export async function compareOCRPageImp({
|
|
|
719
712
|
continue;
|
|
720
713
|
}
|
|
721
714
|
|
|
722
|
-
let hocrAError =
|
|
723
|
-
let hocrBError =
|
|
715
|
+
let hocrAError = 1;
|
|
716
|
+
let hocrBError = 1;
|
|
717
|
+
let hocrAAltError = 1;
|
|
724
718
|
|
|
725
719
|
if (!evalConflicts) {
|
|
726
|
-
|
|
720
|
+
hocrBError = 0;
|
|
727
721
|
} else if (oneToOne) {
|
|
728
|
-
//
|
|
722
|
+
// Some common patterns detected by Tesseract Legacy are so implausible that they are automatically rejected.
|
|
723
|
+
if (legacyLSTMComb && rejectWordLegacy(wordA.text, wordB.text)) {
|
|
724
|
+
hocrBError = 0;
|
|
725
|
+
// If the top choice out of the Tesseract Legacy classifier (but not entire model) is the same as the Tesseract LSTM choice, use the LSTM choice.
|
|
726
|
+
// This condition is common when the Legacy model improperly applies a dictionary "correction" to a word that was already correct.
|
|
727
|
+
} else if (legacyLSTMComb && wordA.textAlt && wordA.textAlt === wordB.text) {
|
|
728
|
+
hocrBError = 0;
|
|
729
|
+
// Otherwise, the words are compared visually.
|
|
730
|
+
} else {
|
|
731
|
+
// TODO: Figure out how to compare between small caps/non small-caps words (this is the only relevant style as it is the only style LSTM detects)
|
|
732
|
+
// Clone hocrAWord and set text content equal to hocrBWord
|
|
733
|
+
const wordAClone = ocr.cloneWord(wordA);
|
|
734
|
+
wordAClone.text = wordB.text;
|
|
735
|
+
|
|
736
|
+
if (wordB.smallCaps && !wordA.smallCaps) {
|
|
737
|
+
wordAClone.smallCaps = true;
|
|
738
|
+
wordAClone.size = calcWordFontSize(wordB);
|
|
739
|
+
}
|
|
729
740
|
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
741
|
+
const evalRes = await evalWords({
|
|
742
|
+
wordsA: [wordA], wordsB: [wordAClone], binaryImage: binaryImageBit, angle: imgAngle, imgDims, options: { view: Boolean(debugLabel) },
|
|
743
|
+
});
|
|
733
744
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
wordAClone.size = calcWordFontSize(wordB);
|
|
737
|
-
}
|
|
745
|
+
hocrAError = evalRes.metricA + (await penalizeWord([wordA]));
|
|
746
|
+
hocrBError = evalRes.metricB + (await penalizeWord([wordB]));
|
|
738
747
|
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
});
|
|
748
|
+
// Reject Tesseract Legacy word if appropriate
|
|
749
|
+
if (legacyLSTMComb && rejectWordLegacy(wordA.text, wordB.text)) hocrBError = 0;
|
|
742
750
|
|
|
743
|
-
|
|
744
|
-
|
|
751
|
+
// The alternative word from Tesseract legacy is tested if both other options are rejected.
|
|
752
|
+
// This can be useful for relatively high-quality scans of non-dictionary words, which both the LSTM model and the Legacy model (after dictionary correction) may fail on,
|
|
753
|
+
// with the raw results from the Legacy classifier being the most accurate.
|
|
754
|
+
if (legacyLSTMComb && hocrAError > 0.5 && hocrBError > 0.5 && wordA.textAlt && wordA.textAlt !== wordB.text) {
|
|
755
|
+
wordAClone.text = wordA.textAlt;
|
|
745
756
|
|
|
746
|
-
|
|
747
|
-
|
|
757
|
+
// This would run faster if it was built into the original evalWords function, but this case should be rare enough that it doesn't matter.
|
|
758
|
+
const evalResAlt = await evalWords({
|
|
759
|
+
wordsA: [wordAClone], binaryImage: binaryImageBit, angle: imgAngle, imgDims, options: { view: Boolean(debugLabel) },
|
|
760
|
+
});
|
|
748
761
|
|
|
749
|
-
|
|
750
|
-
const debugObj = evalRes.debug;
|
|
751
|
-
debugObj.errorAdjA = hocrAError;
|
|
752
|
-
debugObj.errorAdjB = hocrBError;
|
|
762
|
+
hocrAAltError = evalResAlt.metricA + (await penalizeWord([wordAClone]));
|
|
753
763
|
|
|
754
|
-
|
|
764
|
+
// To use the alt word, the error must be less than 0.5, and the alt word but be at least 0.1 better than both other options.
|
|
765
|
+
if (hocrAAltError >= 0.5 || (hocrAError - hocrAAltError) < 0.1 || (hocrBError - hocrAAltError) < 0.1) hocrAAltError = 1;
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
if (evalRes.debug) {
|
|
769
|
+
const debugObj = evalRes.debug;
|
|
770
|
+
debugObj.errorAdjA = hocrAError;
|
|
771
|
+
debugObj.errorAdjB = hocrBError;
|
|
772
|
+
|
|
773
|
+
debugImg.push(debugObj);
|
|
774
|
+
}
|
|
755
775
|
}
|
|
756
776
|
} else if (twoToOne) {
|
|
757
|
-
const evalRes = await evalWords({
|
|
758
|
-
wordsA: wordsAArr, wordsB: wordsBArr, binaryImage: binaryImageBit, angle: imgAngle, imgDims, options: { view: Boolean(debugLabel) },
|
|
759
|
-
});
|
|
760
|
-
|
|
761
777
|
const wordsAText = wordsAArr.map((x) => x.text).join('');
|
|
762
778
|
const wordsBText = wordsBArr.map((x) => x.text).join('');
|
|
763
779
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
780
|
+
if (legacyLSTMComb && rejectWordLegacy(wordsAText, wordsBText)) {
|
|
781
|
+
hocrBError = 0;
|
|
782
|
+
} else {
|
|
783
|
+
const evalRes = await evalWords({
|
|
784
|
+
wordsA: wordsAArr, wordsB: wordsBArr, binaryImage: binaryImageBit, angle: imgAngle, imgDims, options: { view: Boolean(debugLabel) },
|
|
785
|
+
});
|
|
786
|
+
|
|
787
|
+
// The option with more words has a small penalty added, as otherwise words incorrectly split will often score slightly better (due to more precise positioning)
|
|
788
|
+
hocrAError = evalRes.metricA + (wordsAArr.length - 1) * 0.025 + (await penalizeWord(wordsAArr));
|
|
789
|
+
hocrBError = evalRes.metricB + (wordsBArr.length - 1) * 0.025 + (await penalizeWord(wordsBArr));
|
|
790
|
+
|
|
791
|
+
// An additional penalty is added to the option with more words when (1) the text is the same in both options and (2) at least one word has no letters.
|
|
792
|
+
// This has 2 primary motivations:
|
|
793
|
+
// 1. Tesseract Legacy often splits numbers into separate words.
|
|
794
|
+
// For example, the "-" in a negative number may be a different word, or the digits before and after the decimal point may be split into separate words.
|
|
795
|
+
// TODO: It may be worth investigating if this issue can be improved in the engine.
|
|
796
|
+
// 1. Punctuation characters should not be their own word (e.g. quotes should come before/after alphanumeric characters)
|
|
797
|
+
if (wordsAText === wordsBText) {
|
|
798
|
+
if (wordsAArr.map((x) => /[a-z]/i.test(x.text)).filter((x) => !x).length > 0 || wordsBArr.map((x) => /[a-z]/i.test(x.text)).filter((x) => !x).length > 0) {
|
|
799
|
+
hocrAError += (wordsAArr.length - 1) * 0.05;
|
|
800
|
+
hocrBError += (wordsBArr.length - 1) * 0.05;
|
|
801
|
+
}
|
|
778
802
|
}
|
|
779
|
-
}
|
|
780
803
|
|
|
781
|
-
|
|
782
|
-
|
|
804
|
+
// Reject Tesseract Legacy word if appropriate
|
|
805
|
+
if (legacyLSTMComb && rejectWordLegacy(wordsAText, wordsBText)) hocrBError = 0;
|
|
783
806
|
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
807
|
+
if (evalRes.debug) {
|
|
808
|
+
const debugObj = evalRes.debug;
|
|
809
|
+
debugObj.errorAdjA = hocrAError;
|
|
810
|
+
debugObj.errorAdjB = hocrBError;
|
|
788
811
|
|
|
789
|
-
|
|
812
|
+
debugImg.push(debugObj);
|
|
813
|
+
}
|
|
790
814
|
}
|
|
791
815
|
}
|
|
792
816
|
|
|
793
|
-
|
|
817
|
+
// The LSTM model is known to be more accurate on average.
|
|
818
|
+
// Therefore, if both metrics are terrible (indicating the word isn't lined up at all), the LSTM word is used.
|
|
819
|
+
if ((hocrBError < hocrAError && hocrBError < hocrAAltError) || (legacyLSTMComb && hocrAError > 0.5 && hocrAAltError > 0.5)) {
|
|
794
820
|
const skip = ['eg', 'ie'].includes(wordA.text.replace(/\W/g, ''));
|
|
795
821
|
|
|
796
822
|
if (!skip) {
|
|
@@ -843,6 +869,10 @@ export async function compareOCRPageImp({
|
|
|
843
869
|
break;
|
|
844
870
|
}
|
|
845
871
|
}
|
|
872
|
+
} else if (wordA.textAlt && hocrAAltError < 0.5 && hocrAAltError < hocrAError) {
|
|
873
|
+
lineWordsEditedNew += 1;
|
|
874
|
+
if (wordA.text.length !== wordA.textAlt.length) wordA.chars = null;
|
|
875
|
+
wordA.text = wordA.textAlt;
|
|
846
876
|
}
|
|
847
877
|
}
|
|
848
878
|
}
|
|
@@ -891,11 +921,37 @@ export async function compareOCRPageImp({
|
|
|
891
921
|
// Note: These metrics leave open the door for some fringe edge cases.
|
|
892
922
|
// For example,
|
|
893
923
|
|
|
924
|
+
const hocrBAll = {};
|
|
925
|
+
ocr.getPageWords(pageB).forEach((x) => {
|
|
926
|
+
hocrBAll[x.id] = 1;
|
|
927
|
+
});
|
|
928
|
+
|
|
929
|
+
const hocrAAll = {};
|
|
930
|
+
ocr.getPageWords(pageAInt).forEach((x) => {
|
|
931
|
+
hocrAAll[x.id] = 1;
|
|
932
|
+
});
|
|
933
|
+
|
|
934
|
+
// Delete any punctuation-only words from the stats if they are being ignored.
|
|
935
|
+
if (ignorePunct) {
|
|
936
|
+
const punctOnlyIDsA = ocr.getPageWords(pageA).filter((x) => !x.text.replace(/[\W_]/g, '')).map((x) => x.id);
|
|
937
|
+
punctOnlyIDsA.forEach((x) => {
|
|
938
|
+
delete hocrAAll[x];
|
|
939
|
+
delete hocrAOverlap[x];
|
|
940
|
+
delete hocrACorrect[x];
|
|
941
|
+
});
|
|
942
|
+
const punctOnlyIDsB = ocr.getPageWords(pageB).filter((x) => !x.text.replace(/[\W_]/g, '')).map((x) => x.id);
|
|
943
|
+
punctOnlyIDsB.forEach((x) => {
|
|
944
|
+
delete hocrBAll[x];
|
|
945
|
+
delete hocrBOverlap[x];
|
|
946
|
+
delete hocrBCorrect[x];
|
|
947
|
+
});
|
|
948
|
+
}
|
|
949
|
+
|
|
894
950
|
// Number of words in ground truth
|
|
895
|
-
const totalCountB =
|
|
951
|
+
const totalCountB = Object.keys(hocrBAll).length;
|
|
896
952
|
|
|
897
953
|
// Number of words in candidate OCR
|
|
898
|
-
const totalCountA =
|
|
954
|
+
const totalCountA = Object.keys(hocrAAll).length;
|
|
899
955
|
|
|
900
956
|
// Number of words in ground truth with any overlap with candidate OCR
|
|
901
957
|
const overlapCountB = Object.keys(hocrBOverlap).length;
|
|
@@ -1053,7 +1109,7 @@ export async function evalPageBase({
|
|
|
1053
1109
|
|
|
1054
1110
|
const binaryImageBit = binaryImage.imageBitmap || await getImageBitmap(binaryImage.src);
|
|
1055
1111
|
|
|
1056
|
-
if (!
|
|
1112
|
+
if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
|
|
1057
1113
|
if (!calcCtx) throw new Error('Canvases must be defined before running this function.');
|
|
1058
1114
|
|
|
1059
1115
|
let metricTotal = 0;
|
|
@@ -1092,16 +1148,31 @@ export async function evalPageBase({
|
|
|
1092
1148
|
* @param {import('../containers/imageContainer.js').ImageWrapper} params.binaryImage
|
|
1093
1149
|
* @param {PageMetrics} params.pageMetricsObj
|
|
1094
1150
|
* @param {string} params.font
|
|
1151
|
+
* @param {boolean} [params.opt=false] - Whether to use the optimized font set
|
|
1095
1152
|
* @returns
|
|
1096
1153
|
*/
|
|
1097
1154
|
export async function evalPageFont({
|
|
1098
|
-
page, binaryImage, pageMetricsObj, font,
|
|
1155
|
+
page, binaryImage, pageMetricsObj, font, opt = false,
|
|
1099
1156
|
}) {
|
|
1100
|
-
|
|
1157
|
+
const enableOptSave = FontCont.enableOpt;
|
|
1158
|
+
const forceOptSave = FontCont.forceOpt;
|
|
1159
|
+
|
|
1160
|
+
// Allowing the font to be set here allows for better performance during font optimization compared to using the `enableFontOpt` function.
|
|
1161
|
+
// This is because the `enableFontOpt` function requires a response from the main thread and *every* worker before completing, which leads to non-trivial waiting time.
|
|
1162
|
+
if (opt === true) {
|
|
1163
|
+
if (!FontCont.opt) throw new Error('Optimized fonts requested but not defined.');
|
|
1164
|
+
FontCont.forceOpt = true;
|
|
1165
|
+
} else if (opt === false) {
|
|
1166
|
+
if (!FontCont.raw) throw new Error('Raw fonts requested but not defined.');
|
|
1167
|
+
FontCont.enableOpt = false;
|
|
1168
|
+
FontCont.forceOpt = false;
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
/**
|
|
1101
1172
|
* @param {OcrLine} ocrLineJ
|
|
1102
1173
|
*/
|
|
1103
1174
|
const transformLineFont = (ocrLineJ) => {
|
|
1104
|
-
if (!
|
|
1175
|
+
if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
|
|
1105
1176
|
|
|
1106
1177
|
if (!ocrLineJ.words[0]) {
|
|
1107
1178
|
console.log('Line has 0 words, this should not happen.');
|
|
@@ -1109,9 +1180,9 @@ export async function evalPageFont({
|
|
|
1109
1180
|
}
|
|
1110
1181
|
|
|
1111
1182
|
// If the font is not set for a specific word, whether it is assumed sans/serif will be determined by the default font.
|
|
1112
|
-
const lineFontType = ocrLineJ.words[0].font ?
|
|
1183
|
+
const lineFontType = ocrLineJ.words[0].font ? FontCont.getWordFont(ocrLineJ.words[0]).type : FontCont.getFont('Default').type;
|
|
1113
1184
|
|
|
1114
|
-
if (
|
|
1185
|
+
if (FontCont.raw[font].normal.type !== lineFontType) return null;
|
|
1115
1186
|
|
|
1116
1187
|
const ocrLineJClone = ocr.cloneLine(ocrLineJ);
|
|
1117
1188
|
|
|
@@ -1122,9 +1193,14 @@ export async function evalPageFont({
|
|
|
1122
1193
|
return ocrLineJClone;
|
|
1123
1194
|
};
|
|
1124
1195
|
|
|
1125
|
-
|
|
1196
|
+
const res = await evalPageBase({
|
|
1126
1197
|
page, binaryImage, pageMetricsObj, func: transformLineFont,
|
|
1127
1198
|
});
|
|
1199
|
+
|
|
1200
|
+
FontCont.enableOpt = enableOptSave;
|
|
1201
|
+
FontCont.forceOpt = forceOptSave;
|
|
1202
|
+
|
|
1203
|
+
return res;
|
|
1128
1204
|
}
|
|
1129
1205
|
|
|
1130
1206
|
/**
|
|
@@ -1157,7 +1233,7 @@ export async function nudgePageBase({
|
|
|
1157
1233
|
|
|
1158
1234
|
const binaryImageBit = await getImageBitmap(binaryImage);
|
|
1159
1235
|
|
|
1160
|
-
if (!
|
|
1236
|
+
if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
|
|
1161
1237
|
if (!calcCtx) throw new Error('Canvases must be defined before running this function.');
|
|
1162
1238
|
|
|
1163
1239
|
let improveCt = 0;
|
|
@@ -1249,3 +1325,125 @@ export async function nudgePageBaseline({
|
|
|
1249
1325
|
page, binaryImage, imageRotated, imageUpscaled, pageMetricsObj, func, view,
|
|
1250
1326
|
});
|
|
1251
1327
|
}
|
|
1328
|
+
|
|
1329
|
+
/**
|
|
1330
|
+
* Render a page to a canvas.
|
|
1331
|
+
* This function is a WIP and not all options are implemented.
|
|
1332
|
+
* @param {Object} args
|
|
1333
|
+
* @param {OcrPage} args.page - Page to render.
|
|
1334
|
+
* @param {import('../containers/imageContainer.js').ImageWrapper} args.image
|
|
1335
|
+
* @param {dims} [args.pageDims] - Dimensions of page.
|
|
1336
|
+
* @param {?number} [args.angle=0] - Angle of page.
|
|
1337
|
+
* @param {("proof" | "invis" | "ebook" | "eval")} [args.displayMode='proof'] - Display mode.
|
|
1338
|
+
* @param {number} [args.confThreshMed=75] - Threshold above which words are medium-confidence (0-100).
|
|
1339
|
+
* @param {number} [args.confThreshHigh=85] - Threshold above which words are high-confidence (0-100).
|
|
1340
|
+
* @returns {Promise<Blob>}
|
|
1341
|
+
*
|
|
1342
|
+
* TODO: This function does not belong here, however it is in this file because this is where the canvases live.
|
|
1343
|
+
* Think about how to refactor--the canvases within workers probably belong in their own container.
|
|
1344
|
+
*
|
|
1345
|
+
*/
|
|
1346
|
+
export const renderPageStaticImp = async ({
|
|
1347
|
+
page, image, angle = 0, displayMode = 'proof', confThreshMed = 75, confThreshHigh = 85,
|
|
1348
|
+
}) => {
|
|
1349
|
+
viewCtx0.save();
|
|
1350
|
+
|
|
1351
|
+
if (image) {
|
|
1352
|
+
const dims = imageUtils.getDims(image);
|
|
1353
|
+
viewCtx0.canvas.height = dims.height;
|
|
1354
|
+
viewCtx0.canvas.width = dims.width;
|
|
1355
|
+
|
|
1356
|
+
const imageBit = await getImageBitmap(image.src);
|
|
1357
|
+
|
|
1358
|
+
viewCtx0.drawImage(imageBit, 0, 0);
|
|
1359
|
+
} else {
|
|
1360
|
+
viewCtx0.canvas.height = page.dims.height;
|
|
1361
|
+
viewCtx0.canvas.width = page.dims.width;
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
angle = angle ?? 0;
|
|
1365
|
+
|
|
1366
|
+
viewCtx0.textBaseline = 'alphabetic';
|
|
1367
|
+
|
|
1368
|
+
const sinAngle = Math.sin(angle * (Math.PI / 180));
|
|
1369
|
+
const cosAngle = Math.cos(angle * (Math.PI / 180));
|
|
1370
|
+
|
|
1371
|
+
for (const lineObj of page.lines) {
|
|
1372
|
+
const angleAdjLine = image.rotated ? ocr.calcLineStartAngleAdj(lineObj) : { x: 0, y: 0 };
|
|
1373
|
+
|
|
1374
|
+
const baselineY = lineObj.bbox.bottom + lineObj.baseline[1] + angleAdjLine.y;
|
|
1375
|
+
const lineLeftAdj = lineObj.bbox.left + angleAdjLine.x;
|
|
1376
|
+
|
|
1377
|
+
const rotateText = !image?.rotated;
|
|
1378
|
+
|
|
1379
|
+
if (rotateText) {
|
|
1380
|
+
viewCtx0.setTransform(cosAngle, sinAngle, -sinAngle, cosAngle, lineLeftAdj, baselineY);
|
|
1381
|
+
} else {
|
|
1382
|
+
viewCtx0.setTransform(1, 0, 0, 1, lineLeftAdj, baselineY);
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
for (const wordObj of lineObj.words) {
|
|
1386
|
+
if (!wordObj.text) continue;
|
|
1387
|
+
|
|
1388
|
+
const { fill, opacity } = ocr.getWordFillOpacity(wordObj, displayMode, confThreshMed, confThreshHigh);
|
|
1389
|
+
|
|
1390
|
+
viewCtx0.fillStyle = fill;
|
|
1391
|
+
|
|
1392
|
+
const angleAdjWord = wordObj.sup ? ocr.calcWordAngleAdj(wordObj) : { x: 0, y: 0 };
|
|
1393
|
+
|
|
1394
|
+
// TODO: Test whether the math here is correct for drop caps.
|
|
1395
|
+
let ts = 0;
|
|
1396
|
+
if (wordObj.sup) {
|
|
1397
|
+
ts = (lineObj.bbox.bottom + lineObj.baseline[1] + angleAdjLine.y) - (wordObj.bbox.bottom + angleAdjLine.y + angleAdjWord.y);
|
|
1398
|
+
} else if (wordObj.dropcap) {
|
|
1399
|
+
ts = (lineObj.bbox.bottom + lineObj.baseline[1]) - wordObj.bbox.bottom + angleAdjLine.y + angleAdjWord.y;
|
|
1400
|
+
} else {
|
|
1401
|
+
ts = 0;
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
const width = (wordObj.bbox.left - wordObj.line.bbox.left) / cosAngle;
|
|
1405
|
+
|
|
1406
|
+
const visualLeft = width + angleAdjWord.x;
|
|
1407
|
+
|
|
1408
|
+
const wordMetrics = calcWordMetrics(wordObj);
|
|
1409
|
+
const advanceArr = wordMetrics.advanceArr;
|
|
1410
|
+
const kerningArr = wordMetrics.kerningArr;
|
|
1411
|
+
const charSpacing = wordMetrics.charSpacing;
|
|
1412
|
+
const wordFontSize = wordMetrics.fontSize;
|
|
1413
|
+
const leftSideBearing = wordMetrics.leftSideBearing;
|
|
1414
|
+
|
|
1415
|
+
const advanceArrTotal = [];
|
|
1416
|
+
for (let i = 0; i < advanceArr.length; i++) {
|
|
1417
|
+
let leftI = 0;
|
|
1418
|
+
leftI += advanceArr[i] || 0;
|
|
1419
|
+
leftI += kerningArr[i] || 0;
|
|
1420
|
+
leftI += charSpacing || 0;
|
|
1421
|
+
advanceArrTotal.push(leftI);
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1424
|
+
const font = FontCont.getWordFont(wordObj);
|
|
1425
|
+
viewCtx0.font = `${font.fontFaceStyle} ${font.fontFaceWeight} ${wordFontSize}px ${font.fontFaceName}`;
|
|
1426
|
+
let leftI = wordObj.visualCoords ? visualLeft - leftSideBearing : visualLeft;
|
|
1427
|
+
for (let i = 0; i < wordMetrics.charArr.length; i++) {
|
|
1428
|
+
let charI = wordMetrics.charArr[i];
|
|
1429
|
+
|
|
1430
|
+
if (wordObj.smallCaps) {
|
|
1431
|
+
if (charI === charI.toUpperCase()) {
|
|
1432
|
+
viewCtx0.font = `${font.fontFaceStyle} ${font.fontFaceWeight} ${wordFontSize}px ${font.fontFaceName}`;
|
|
1433
|
+
} else {
|
|
1434
|
+
charI = charI.toUpperCase();
|
|
1435
|
+
viewCtx0.font = `${font.fontFaceStyle} ${font.fontFaceWeight} ${wordFontSize * font.smallCapsMult}px ${font.fontFaceName}`;
|
|
1436
|
+
}
|
|
1437
|
+
}
|
|
1438
|
+
|
|
1439
|
+
viewCtx0.fillText(charI, leftI, -ts);
|
|
1440
|
+
leftI += advanceArrTotal[i];
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1445
|
+
const img = typeof process === 'undefined' ? await viewCtx0.canvas.convertToBlob() : await viewCtx0.canvas.toBuffer('image/png');
|
|
1446
|
+
|
|
1447
|
+
viewCtx0.restore();
|
|
1448
|
+
return img;
|
|
1449
|
+
};
|