scribe.js-ocr 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,11 +2,12 @@
2
2
  /* eslint-disable no-await-in-loop */
3
3
 
4
4
  import ocr from '../objects/ocrObjects.js';
5
- import { calcLineFontSize, calcWordFontSize } from '../utils/fontUtils.js';
5
+ import { calcLineFontSize, calcWordFontSize, calcWordMetrics } from '../utils/fontUtils.js';
6
6
  import { getImageBitmap } from '../utils/imageUtils.js';
7
7
  import { drawWordActual, drawWordRender } from './renderWordCanvas.js';
8
8
 
9
- import { fontAll } from '../containers/fontContainer.js';
9
+ import { FontCont } from '../containers/fontContainer.js';
10
+ import { imageUtils } from '../objects/imageObjects.js';
10
11
  import { getRandomAlphanum } from '../utils/miscUtils.js';
11
12
  // import { CompDebug } from '../objects/imageObjects.js';
12
13
 
@@ -95,7 +96,7 @@ export const initCanvasNode = async () => {
95
96
  // The Node.js canvas package does not currently support worke threads
96
97
  // https://github.com/Automattic/node-canvas/issues/1394
97
98
  if (!isMainThread) throw new Error('node-canvas is not currently supported on worker threads.');
98
- if (!fontAll.raw) throw new Error('Fonts must be defined before running this function.');
99
+ if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
99
100
 
100
101
  const { writeFile } = await import('fs');
101
102
  const { promisify } = await import('util');
@@ -128,7 +129,7 @@ export const initCanvasNode = async () => {
128
129
 
129
130
  // All fonts must be registered before the canvas is created, so all raw and optimized fonts are loaded.
130
131
  // Even when using optimized fonts, at least one raw font is needed to compare against optimized version.
131
- for (const [key1, value1] of Object.entries(fontAll.raw)) {
132
+ for (const [key1, value1] of Object.entries(FontCont.raw)) {
132
133
  if (['Default', 'SansDefault', 'SerifDefault'].includes(key1)) continue;
133
134
  for (const [key2, value2] of Object.entries(value1)) {
134
135
  await registerFontObj(value2);
@@ -136,10 +137,11 @@ export const initCanvasNode = async () => {
136
137
  }
137
138
 
138
139
  // This function is used before font optimization is complete, so `fontAll.opt` does not exist yet.
139
- if (fontAll.optInitial) {
140
- for (const [key1, value1] of Object.entries(fontAll.optInitial)) {
141
- if (['Default', 'SansDefault', 'SerifDefault'].includes(key1)) continue;
140
+ if (FontCont.opt) {
141
+ for (const [key1, value1] of Object.entries(FontCont.opt)) {
142
+ if (['Default', 'SansDefault', 'SerifDefault'].includes(key1) || !value1) continue;
142
143
  for (const [key2, value2] of Object.entries(value1)) {
144
+ if (!value2) continue;
143
145
  await registerFontObj(value2);
144
146
  }
145
147
  }
@@ -202,14 +204,13 @@ export async function evalWords({
202
204
 
203
205
  const binaryImageBit = await getImageBitmap(binaryImage);
204
206
 
205
- if (!fontAll.active) throw new Error('Fonts must be defined before running this function.');
207
+ if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
206
208
  if (!calcCtx) throw new Error('Canvases must be defined before running this function.');
207
209
 
208
210
  const view = options?.view === undefined ? false : options?.view;
209
211
  const useABaseline = options?.useABaseline === undefined ? true : options?.useABaseline;
210
212
 
211
213
  const cosAngle = Math.cos(angle * -1 * (Math.PI / 180)) || 1;
212
- const sinAngle = Math.sin(angle * -1 * (Math.PI / 180)) || 0;
213
214
 
214
215
  // All words are assumed to be on the same line
215
216
  const linebox = wordsA[0].line.bbox;
@@ -237,15 +238,12 @@ export async function evalWords({
237
238
 
238
239
  // Draw the words in wordsA
239
240
  let x0 = wordsA[0].bbox.left;
240
- let y0 = linebox.bottom + baselineA[1] + baselineA[0] * (wordsA[0].bbox.left - linebox.left);
241
+ const y0 = linebox.bottom + baselineA[1] + baselineA[0] * (wordsA[0].bbox.left - linebox.left);
241
242
  for (let i = 0; i < wordsA.length; i++) {
242
243
  const word = wordsA[i];
243
244
  const wordIBox = word.bbox;
244
- const baselineY = linebox.bottom + baselineA[1] + baselineA[0] * (wordIBox.left - linebox.left);
245
- const x = wordIBox.left;
246
- const y = word.sup || word.dropcap ? wordIBox.bottom : baselineY;
247
245
 
248
- const offsetX = (x - x0) * cosAngle - sinAngle * (y - y0);
246
+ const offsetX = (wordIBox.left - x0) / cosAngle;
249
247
 
250
248
  await drawWordRender(calcCtx, word, offsetX, cropY, ctxView, Boolean(angle));
251
249
  }
@@ -296,15 +294,10 @@ export async function evalWords({
296
294
  // Set style to whatever it is for wordsA. This is based on the assumption that "A" is Tesseract Legacy and "B" is Tesseract LSTM (which does not have useful style info).
297
295
  word.style = wordsA[0].style;
298
296
 
299
- const baselineY = linebox.bottom + baselineB[1] + baselineB[0] * (word.bbox.left - linebox.left);
300
297
  if (i === 0) {
301
298
  x0 = word.bbox.left;
302
- y0 = baselineY;
303
299
  }
304
- const x = word.bbox.left;
305
- const y = word.sup || word.dropcap ? word.bbox.bottom : baselineY;
306
-
307
- const offsetX = (x - x0) * cosAngle - sinAngle * (y - y0);
300
+ const offsetX = (word.bbox.left - x0) / cosAngle;
308
301
 
309
302
  await drawWordRender(calcCtx, word, offsetX, cropY, ctxView, Boolean(angle));
310
303
  }
@@ -435,7 +428,7 @@ async function penalizeWord(wordObjs) {
435
428
  const wordTextArr = wordStr.split('');
436
429
  const wordFontSize = calcLineFontSize(word.line);
437
430
 
438
- const fontI = fontAll.getWordFont(word);
431
+ const fontI = FontCont.getWordFont(word);
439
432
  const fontOpentypeI = fontI.opentype;
440
433
 
441
434
  // These calculations differ from the standard word width calculations,
@@ -719,78 +712,111 @@ export async function compareOCRPageImp({
719
712
  continue;
720
713
  }
721
714
 
722
- let hocrAError = 0;
723
- let hocrBError = 0;
715
+ let hocrAError = 1;
716
+ let hocrBError = 1;
717
+ let hocrAAltError = 1;
724
718
 
725
719
  if (!evalConflicts) {
726
- hocrAError = 1;
720
+ hocrBError = 0;
727
721
  } else if (oneToOne) {
728
- // TODO: Figure out how to compare between small caps/non small-caps words (this is the only relevant style as it is the only style LSTM detects)
722
+ // Some common patterns detected by Tesseract Legacy are so implausible that they are automatically rejected.
723
+ if (legacyLSTMComb && rejectWordLegacy(wordA.text, wordB.text)) {
724
+ hocrBError = 0;
725
+ // If the top choice out of the Tesseract Legacy classifier (but not entire model) is the same as the Tesseract LSTM choice, use the LSTM choice.
726
+ // This condition is common when the Legacy model improperly applies a dictionary "correction" to a word that was already correct.
727
+ } else if (legacyLSTMComb && wordA.textAlt && wordA.textAlt === wordB.text) {
728
+ hocrBError = 0;
729
+ // Otherwise, the words are compared visually.
730
+ } else {
731
+ // TODO: Figure out how to compare between small caps/non small-caps words (this is the only relevant style as it is the only style LSTM detects)
732
+ // Clone hocrAWord and set text content equal to hocrBWord
733
+ const wordAClone = ocr.cloneWord(wordA);
734
+ wordAClone.text = wordB.text;
735
+
736
+ if (wordB.smallCaps && !wordA.smallCaps) {
737
+ wordAClone.smallCaps = true;
738
+ wordAClone.size = calcWordFontSize(wordB);
739
+ }
729
740
 
730
- // Clone hocrAWord and set text content equal to hocrBWord
731
- const wordAClone = ocr.cloneWord(wordA);
732
- wordAClone.text = wordB.text;
741
+ const evalRes = await evalWords({
742
+ wordsA: [wordA], wordsB: [wordAClone], binaryImage: binaryImageBit, angle: imgAngle, imgDims, options: { view: Boolean(debugLabel) },
743
+ });
733
744
 
734
- if (wordB.smallCaps && !wordA.smallCaps) {
735
- wordAClone.smallCaps = true;
736
- wordAClone.size = calcWordFontSize(wordB);
737
- }
745
+ hocrAError = evalRes.metricA + (await penalizeWord([wordA]));
746
+ hocrBError = evalRes.metricB + (await penalizeWord([wordB]));
738
747
 
739
- const evalRes = await evalWords({
740
- wordsA: [wordA], wordsB: [wordAClone], binaryImage: binaryImageBit, angle: imgAngle, imgDims, options: { view: Boolean(debugLabel) },
741
- });
748
+ // Reject Tesseract Legacy word if appropriate
749
+ if (legacyLSTMComb && rejectWordLegacy(wordA.text, wordB.text)) hocrBError = 0;
742
750
 
743
- hocrAError = evalRes.metricA + (await penalizeWord([wordA]));
744
- hocrBError = evalRes.metricB + (await penalizeWord([wordB]));
751
+ // The alternative word from Tesseract legacy is tested if both other options are rejected.
752
+ // This can be useful for relatively high-quality scans of non-dictionary words, which both the LSTM model and the Legacy model (after dictionary correction) may fail on,
753
+ // with the raw results from the Legacy classifier being the most accurate.
754
+ if (legacyLSTMComb && hocrAError > 0.5 && hocrBError > 0.5 && wordA.textAlt && wordA.textAlt !== wordB.text) {
755
+ wordAClone.text = wordA.textAlt;
745
756
 
746
- // Reject Tesseract Legacy word if appropriate
747
- if (legacyLSTMComb && rejectWordLegacy(wordA.text, wordB.text)) hocrAError = 1;
757
+ // This would run faster if it was built into the original evalWords function, but this case should be rare enough that it doesn't matter.
758
+ const evalResAlt = await evalWords({
759
+ wordsA: [wordAClone], binaryImage: binaryImageBit, angle: imgAngle, imgDims, options: { view: Boolean(debugLabel) },
760
+ });
748
761
 
749
- if (evalRes.debug) {
750
- const debugObj = evalRes.debug;
751
- debugObj.errorAdjA = hocrAError;
752
- debugObj.errorAdjB = hocrBError;
762
+ hocrAAltError = evalResAlt.metricA + (await penalizeWord([wordAClone]));
753
763
 
754
- debugImg.push(debugObj);
764
+ // To use the alt word, the error must be less than 0.5, and the alt word but be at least 0.1 better than both other options.
765
+ if (hocrAAltError >= 0.5 || (hocrAError - hocrAAltError) < 0.1 || (hocrBError - hocrAAltError) < 0.1) hocrAAltError = 1;
766
+ }
767
+
768
+ if (evalRes.debug) {
769
+ const debugObj = evalRes.debug;
770
+ debugObj.errorAdjA = hocrAError;
771
+ debugObj.errorAdjB = hocrBError;
772
+
773
+ debugImg.push(debugObj);
774
+ }
755
775
  }
756
776
  } else if (twoToOne) {
757
- const evalRes = await evalWords({
758
- wordsA: wordsAArr, wordsB: wordsBArr, binaryImage: binaryImageBit, angle: imgAngle, imgDims, options: { view: Boolean(debugLabel) },
759
- });
760
-
761
777
  const wordsAText = wordsAArr.map((x) => x.text).join('');
762
778
  const wordsBText = wordsBArr.map((x) => x.text).join('');
763
779
 
764
- // The option with more words has a small penalty added, as otherwise words incorrectly split will often score slightly better (due to more precise positioning)
765
- hocrAError = evalRes.metricA + (wordsAArr.length - 1) * 0.025 + (await penalizeWord(wordsAArr));
766
- hocrBError = evalRes.metricB + (wordsBArr.length - 1) * 0.025 + (await penalizeWord(wordsBArr));
767
-
768
- // An additional penalty is added to the option with more words when (1) the text is the same in both options and (2) at least one word has no letters.
769
- // This has 2 primary motivations:
770
- // 1. Tesseract Legacy often splits numbers into separate words.
771
- // For example, the "-" in a negative number may be a different word, or the digits before and after the decimal point may be split into separate words.
772
- // TODO: It may be worth investigating if this issue can be improved in the engine.
773
- // 1. Punctuation characters should not be their own word (e.g. quotes should come before/after alphanumeric characters)
774
- if (wordsAText === wordsBText) {
775
- if (wordsAArr.map((x) => /[a-z]/i.test(x.text)).filter((x) => !x).length > 0 || wordsBArr.map((x) => /[a-z]/i.test(x.text)).filter((x) => !x).length > 0) {
776
- hocrAError += (wordsAArr.length - 1) * 0.05;
777
- hocrBError += (wordsBArr.length - 1) * 0.05;
780
+ if (legacyLSTMComb && rejectWordLegacy(wordsAText, wordsBText)) {
781
+ hocrBError = 0;
782
+ } else {
783
+ const evalRes = await evalWords({
784
+ wordsA: wordsAArr, wordsB: wordsBArr, binaryImage: binaryImageBit, angle: imgAngle, imgDims, options: { view: Boolean(debugLabel) },
785
+ });
786
+
787
+ // The option with more words has a small penalty added, as otherwise words incorrectly split will often score slightly better (due to more precise positioning)
788
+ hocrAError = evalRes.metricA + (wordsAArr.length - 1) * 0.025 + (await penalizeWord(wordsAArr));
789
+ hocrBError = evalRes.metricB + (wordsBArr.length - 1) * 0.025 + (await penalizeWord(wordsBArr));
790
+
791
+ // An additional penalty is added to the option with more words when (1) the text is the same in both options and (2) at least one word has no letters.
792
+ // This has 2 primary motivations:
793
+ // 1. Tesseract Legacy often splits numbers into separate words.
794
+ // For example, the "-" in a negative number may be a different word, or the digits before and after the decimal point may be split into separate words.
795
+ // TODO: It may be worth investigating if this issue can be improved in the engine.
796
+ // 1. Punctuation characters should not be their own word (e.g. quotes should come before/after alphanumeric characters)
797
+ if (wordsAText === wordsBText) {
798
+ if (wordsAArr.map((x) => /[a-z]/i.test(x.text)).filter((x) => !x).length > 0 || wordsBArr.map((x) => /[a-z]/i.test(x.text)).filter((x) => !x).length > 0) {
799
+ hocrAError += (wordsAArr.length - 1) * 0.05;
800
+ hocrBError += (wordsBArr.length - 1) * 0.05;
801
+ }
778
802
  }
779
- }
780
803
 
781
- // Reject Tesseract Legacy word if appropriate
782
- if (legacyLSTMComb && rejectWordLegacy(wordsAText, wordsBText)) hocrAError = 1;
804
+ // Reject Tesseract Legacy word if appropriate
805
+ if (legacyLSTMComb && rejectWordLegacy(wordsAText, wordsBText)) hocrBError = 0;
783
806
 
784
- if (evalRes.debug) {
785
- const debugObj = evalRes.debug;
786
- debugObj.errorAdjA = hocrAError;
787
- debugObj.errorAdjB = hocrBError;
807
+ if (evalRes.debug) {
808
+ const debugObj = evalRes.debug;
809
+ debugObj.errorAdjA = hocrAError;
810
+ debugObj.errorAdjB = hocrBError;
788
811
 
789
- debugImg.push(debugObj);
812
+ debugImg.push(debugObj);
813
+ }
790
814
  }
791
815
  }
792
816
 
793
- if (hocrBError < hocrAError) {
817
+ // The LSTM model is known to be more accurate on average.
818
+ // Therefore, if both metrics are terrible (indicating the word isn't lined up at all), the LSTM word is used.
819
+ if ((hocrBError < hocrAError && hocrBError < hocrAAltError) || (legacyLSTMComb && hocrAError > 0.5 && hocrAAltError > 0.5)) {
794
820
  const skip = ['eg', 'ie'].includes(wordA.text.replace(/\W/g, ''));
795
821
 
796
822
  if (!skip) {
@@ -843,6 +869,10 @@ export async function compareOCRPageImp({
843
869
  break;
844
870
  }
845
871
  }
872
+ } else if (wordA.textAlt && hocrAAltError < 0.5 && hocrAAltError < hocrAError) {
873
+ lineWordsEditedNew += 1;
874
+ if (wordA.text.length !== wordA.textAlt.length) wordA.chars = null;
875
+ wordA.text = wordA.textAlt;
846
876
  }
847
877
  }
848
878
  }
@@ -891,11 +921,37 @@ export async function compareOCRPageImp({
891
921
  // Note: These metrics leave open the door for some fringe edge cases.
892
922
  // For example,
893
923
 
924
+ const hocrBAll = {};
925
+ ocr.getPageWords(pageB).forEach((x) => {
926
+ hocrBAll[x.id] = 1;
927
+ });
928
+
929
+ const hocrAAll = {};
930
+ ocr.getPageWords(pageAInt).forEach((x) => {
931
+ hocrAAll[x.id] = 1;
932
+ });
933
+
934
+ // Delete any punctuation-only words from the stats if they are being ignored.
935
+ if (ignorePunct) {
936
+ const punctOnlyIDsA = ocr.getPageWords(pageA).filter((x) => !x.text.replace(/[\W_]/g, '')).map((x) => x.id);
937
+ punctOnlyIDsA.forEach((x) => {
938
+ delete hocrAAll[x];
939
+ delete hocrAOverlap[x];
940
+ delete hocrACorrect[x];
941
+ });
942
+ const punctOnlyIDsB = ocr.getPageWords(pageB).filter((x) => !x.text.replace(/[\W_]/g, '')).map((x) => x.id);
943
+ punctOnlyIDsB.forEach((x) => {
944
+ delete hocrBAll[x];
945
+ delete hocrBOverlap[x];
946
+ delete hocrBCorrect[x];
947
+ });
948
+ }
949
+
894
950
  // Number of words in ground truth
895
- const totalCountB = ocr.getPageWords(pageB).length;
951
+ const totalCountB = Object.keys(hocrBAll).length;
896
952
 
897
953
  // Number of words in candidate OCR
898
- const totalCountA = ocr.getPageWords(pageAInt).length;
954
+ const totalCountA = Object.keys(hocrAAll).length;
899
955
 
900
956
  // Number of words in ground truth with any overlap with candidate OCR
901
957
  const overlapCountB = Object.keys(hocrBOverlap).length;
@@ -1053,7 +1109,7 @@ export async function evalPageBase({
1053
1109
 
1054
1110
  const binaryImageBit = binaryImage.imageBitmap || await getImageBitmap(binaryImage.src);
1055
1111
 
1056
- if (!fontAll.active) throw new Error('Fonts must be defined before running this function.');
1112
+ if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
1057
1113
  if (!calcCtx) throw new Error('Canvases must be defined before running this function.');
1058
1114
 
1059
1115
  let metricTotal = 0;
@@ -1092,16 +1148,31 @@ export async function evalPageBase({
1092
1148
  * @param {import('../containers/imageContainer.js').ImageWrapper} params.binaryImage
1093
1149
  * @param {PageMetrics} params.pageMetricsObj
1094
1150
  * @param {string} params.font
1151
+ * @param {boolean} [params.opt=false] - Whether to use the optimized font set
1095
1152
  * @returns
1096
1153
  */
1097
1154
  export async function evalPageFont({
1098
- page, binaryImage, pageMetricsObj, font,
1155
+ page, binaryImage, pageMetricsObj, font, opt = false,
1099
1156
  }) {
1100
- /**
1157
+ const enableOptSave = FontCont.enableOpt;
1158
+ const forceOptSave = FontCont.forceOpt;
1159
+
1160
+ // Allowing the font to be set here allows for better performance during font optimization compared to using the `enableFontOpt` function.
1161
+ // This is because the `enableFontOpt` function requires a response from the main thread and *every* worker before completing, which leads to non-trivial waiting time.
1162
+ if (opt === true) {
1163
+ if (!FontCont.opt) throw new Error('Optimized fonts requested but not defined.');
1164
+ FontCont.forceOpt = true;
1165
+ } else if (opt === false) {
1166
+ if (!FontCont.raw) throw new Error('Raw fonts requested but not defined.');
1167
+ FontCont.enableOpt = false;
1168
+ FontCont.forceOpt = false;
1169
+ }
1170
+
1171
+ /**
1101
1172
  * @param {OcrLine} ocrLineJ
1102
1173
  */
1103
1174
  const transformLineFont = (ocrLineJ) => {
1104
- if (!fontAll.active) throw new Error('Fonts must be defined before running this function.');
1175
+ if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
1105
1176
 
1106
1177
  if (!ocrLineJ.words[0]) {
1107
1178
  console.log('Line has 0 words, this should not happen.');
@@ -1109,9 +1180,9 @@ export async function evalPageFont({
1109
1180
  }
1110
1181
 
1111
1182
  // If the font is not set for a specific word, whether it is assumed sans/serif will be determined by the default font.
1112
- const lineFontType = ocrLineJ.words[0].font ? fontAll.getWordFont(ocrLineJ.words[0]).type : fontAll.getFont('Default').type;
1183
+ const lineFontType = ocrLineJ.words[0].font ? FontCont.getWordFont(ocrLineJ.words[0]).type : FontCont.getFont('Default').type;
1113
1184
 
1114
- if (fontAll.active[font].normal.type !== lineFontType) return null;
1185
+ if (FontCont.raw[font].normal.type !== lineFontType) return null;
1115
1186
 
1116
1187
  const ocrLineJClone = ocr.cloneLine(ocrLineJ);
1117
1188
 
@@ -1122,9 +1193,14 @@ export async function evalPageFont({
1122
1193
  return ocrLineJClone;
1123
1194
  };
1124
1195
 
1125
- return await evalPageBase({
1196
+ const res = await evalPageBase({
1126
1197
  page, binaryImage, pageMetricsObj, func: transformLineFont,
1127
1198
  });
1199
+
1200
+ FontCont.enableOpt = enableOptSave;
1201
+ FontCont.forceOpt = forceOptSave;
1202
+
1203
+ return res;
1128
1204
  }
1129
1205
 
1130
1206
  /**
@@ -1157,7 +1233,7 @@ export async function nudgePageBase({
1157
1233
 
1158
1234
  const binaryImageBit = await getImageBitmap(binaryImage);
1159
1235
 
1160
- if (!fontAll.active) throw new Error('Fonts must be defined before running this function.');
1236
+ if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
1161
1237
  if (!calcCtx) throw new Error('Canvases must be defined before running this function.');
1162
1238
 
1163
1239
  let improveCt = 0;
@@ -1249,3 +1325,125 @@ export async function nudgePageBaseline({
1249
1325
  page, binaryImage, imageRotated, imageUpscaled, pageMetricsObj, func, view,
1250
1326
  });
1251
1327
  }
1328
+
1329
+ /**
1330
+ * Render a page to a canvas.
1331
+ * This function is a WIP and not all options are implemented.
1332
+ * @param {Object} args
1333
+ * @param {OcrPage} args.page - Page to render.
1334
+ * @param {import('../containers/imageContainer.js').ImageWrapper} args.image
1335
+ * @param {dims} [args.pageDims] - Dimensions of page.
1336
+ * @param {?number} [args.angle=0] - Angle of page.
1337
+ * @param {("proof" | "invis" | "ebook" | "eval")} [args.displayMode='proof'] - Display mode.
1338
+ * @param {number} [args.confThreshMed=75] - Threshold above which words are medium-confidence (0-100).
1339
+ * @param {number} [args.confThreshHigh=85] - Threshold above which words are high-confidence (0-100).
1340
+ * @returns {Promise<Blob>}
1341
+ *
1342
+ * TODO: This function does not belong here, however it is in this file because this is where the canvases live.
1343
+ * Think about how to refactor--the canvases within workers probably belong in their own container.
1344
+ *
1345
+ */
1346
+ export const renderPageStaticImp = async ({
1347
+ page, image, angle = 0, displayMode = 'proof', confThreshMed = 75, confThreshHigh = 85,
1348
+ }) => {
1349
+ viewCtx0.save();
1350
+
1351
+ if (image) {
1352
+ const dims = imageUtils.getDims(image);
1353
+ viewCtx0.canvas.height = dims.height;
1354
+ viewCtx0.canvas.width = dims.width;
1355
+
1356
+ const imageBit = await getImageBitmap(image.src);
1357
+
1358
+ viewCtx0.drawImage(imageBit, 0, 0);
1359
+ } else {
1360
+ viewCtx0.canvas.height = page.dims.height;
1361
+ viewCtx0.canvas.width = page.dims.width;
1362
+ }
1363
+
1364
+ angle = angle ?? 0;
1365
+
1366
+ viewCtx0.textBaseline = 'alphabetic';
1367
+
1368
+ const sinAngle = Math.sin(angle * (Math.PI / 180));
1369
+ const cosAngle = Math.cos(angle * (Math.PI / 180));
1370
+
1371
+ for (const lineObj of page.lines) {
1372
+ const angleAdjLine = image.rotated ? ocr.calcLineStartAngleAdj(lineObj) : { x: 0, y: 0 };
1373
+
1374
+ const baselineY = lineObj.bbox.bottom + lineObj.baseline[1] + angleAdjLine.y;
1375
+ const lineLeftAdj = lineObj.bbox.left + angleAdjLine.x;
1376
+
1377
+ const rotateText = !image?.rotated;
1378
+
1379
+ if (rotateText) {
1380
+ viewCtx0.setTransform(cosAngle, sinAngle, -sinAngle, cosAngle, lineLeftAdj, baselineY);
1381
+ } else {
1382
+ viewCtx0.setTransform(1, 0, 0, 1, lineLeftAdj, baselineY);
1383
+ }
1384
+
1385
+ for (const wordObj of lineObj.words) {
1386
+ if (!wordObj.text) continue;
1387
+
1388
+ const { fill, opacity } = ocr.getWordFillOpacity(wordObj, displayMode, confThreshMed, confThreshHigh);
1389
+
1390
+ viewCtx0.fillStyle = fill;
1391
+
1392
+ const angleAdjWord = wordObj.sup ? ocr.calcWordAngleAdj(wordObj) : { x: 0, y: 0 };
1393
+
1394
+ // TODO: Test whether the math here is correct for drop caps.
1395
+ let ts = 0;
1396
+ if (wordObj.sup) {
1397
+ ts = (lineObj.bbox.bottom + lineObj.baseline[1] + angleAdjLine.y) - (wordObj.bbox.bottom + angleAdjLine.y + angleAdjWord.y);
1398
+ } else if (wordObj.dropcap) {
1399
+ ts = (lineObj.bbox.bottom + lineObj.baseline[1]) - wordObj.bbox.bottom + angleAdjLine.y + angleAdjWord.y;
1400
+ } else {
1401
+ ts = 0;
1402
+ }
1403
+
1404
+ const width = (wordObj.bbox.left - wordObj.line.bbox.left) / cosAngle;
1405
+
1406
+ const visualLeft = width + angleAdjWord.x;
1407
+
1408
+ const wordMetrics = calcWordMetrics(wordObj);
1409
+ const advanceArr = wordMetrics.advanceArr;
1410
+ const kerningArr = wordMetrics.kerningArr;
1411
+ const charSpacing = wordMetrics.charSpacing;
1412
+ const wordFontSize = wordMetrics.fontSize;
1413
+ const leftSideBearing = wordMetrics.leftSideBearing;
1414
+
1415
+ const advanceArrTotal = [];
1416
+ for (let i = 0; i < advanceArr.length; i++) {
1417
+ let leftI = 0;
1418
+ leftI += advanceArr[i] || 0;
1419
+ leftI += kerningArr[i] || 0;
1420
+ leftI += charSpacing || 0;
1421
+ advanceArrTotal.push(leftI);
1422
+ }
1423
+
1424
+ const font = FontCont.getWordFont(wordObj);
1425
+ viewCtx0.font = `${font.fontFaceStyle} ${font.fontFaceWeight} ${wordFontSize}px ${font.fontFaceName}`;
1426
+ let leftI = wordObj.visualCoords ? visualLeft - leftSideBearing : visualLeft;
1427
+ for (let i = 0; i < wordMetrics.charArr.length; i++) {
1428
+ let charI = wordMetrics.charArr[i];
1429
+
1430
+ if (wordObj.smallCaps) {
1431
+ if (charI === charI.toUpperCase()) {
1432
+ viewCtx0.font = `${font.fontFaceStyle} ${font.fontFaceWeight} ${wordFontSize}px ${font.fontFaceName}`;
1433
+ } else {
1434
+ charI = charI.toUpperCase();
1435
+ viewCtx0.font = `${font.fontFaceStyle} ${font.fontFaceWeight} ${wordFontSize * font.smallCapsMult}px ${font.fontFaceName}`;
1436
+ }
1437
+ }
1438
+
1439
+ viewCtx0.fillText(charI, leftI, -ts);
1440
+ leftI += advanceArrTotal[i];
1441
+ }
1442
+ }
1443
+ }
1444
+
1445
+ const img = typeof process === 'undefined' ? await viewCtx0.canvas.convertToBlob() : await viewCtx0.canvas.toBuffer('image/png');
1446
+
1447
+ viewCtx0.restore();
1448
+ return img;
1449
+ };