scribe.js-ocr 0.7.4 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,15 +5,10 @@ import {
5
5
  determineSansSerif,
6
6
  getStyleLookup,
7
7
  quantile,
8
- replaceObjectProperties,
9
8
  round6,
10
9
  } from './utils/miscUtils.js';
11
10
 
12
- import { FontMetricsFamily, FontMetricsFont, FontMetricsRawFamily } from './objects/fontMetricsObjects.js';
13
-
14
- import { fontMetricsObj } from './containers/dataContainer.js';
15
-
16
- // import { glyphAlts } from "../fonts/glyphs.js";
11
+ import { CharMetricsFamily, CharMetricsFont, CharMetricsRawFamily } from './objects/charMetricsObjects.js';
17
12
 
18
13
  /**
19
14
  * Combine page-level character statistics to calculate overall font metrics.
@@ -21,102 +16,102 @@ import { fontMetricsObj } from './containers/dataContainer.js';
21
16
  *
22
17
  * @param {Array<OcrPage>} pageArr
23
18
  */
24
- export function calcFontMetricsFromPages(pageArr) {
25
- const pageFontMetricsArr = pageArr.map((x) => calcFontMetricsPage(x));
19
+ export function calcCharMetricsFromPages(pageArr) {
20
+ const pageCharMetricsArr = pageArr.map((x) => calcCharMetricsPage(x));
26
21
 
27
- const fontMetricsRawObj = pageFontMetricsArr.reduce((x, y) => unionFontMetricsRawObj(x, y));
22
+ const charMetricsRawObj = pageCharMetricsArr.reduce((x, y) => unionCharMetricsRawObj(x, y));
28
23
 
29
- /** @type {Object.<string, FontMetricsFamily>} */
30
- const fontMetricsOut = {};
24
+ /** @type {Object.<string, CharMetricsFamily>} */
25
+ const charMetricsOut = {};
31
26
 
32
- for (const [family, obj] of Object.entries(fontMetricsRawObj)) {
33
- fontMetricsOut[family] = new FontMetricsFamily();
27
+ for (const [family, obj] of Object.entries(charMetricsRawObj)) {
28
+ charMetricsOut[family] = new CharMetricsFamily();
34
29
  for (const [style, obj2] of Object.entries(obj)) {
35
- fontMetricsOut[family][style] = calculateFontMetrics(obj2);
36
- fontMetricsOut[family].obs += fontMetricsOut[family][style].obs;
30
+ charMetricsOut[family][style] = calculateCharMetrics(obj2);
31
+ charMetricsOut[family].obs += charMetricsOut[family][style].obs;
37
32
  }
38
33
  }
39
34
 
40
- if (Object.keys(fontMetricsOut).length > 0) replaceObjectProperties(fontMetricsObj, fontMetricsOut);
35
+ return charMetricsOut;
41
36
  }
42
37
 
43
- // The following functions are used for combining an array of page-level fontMetrics objects produced by convertPage.js into a single document-level object.
38
+ // The following functions are used for combining an array of page-level charMetrics objects produced by convertPage.js into a single document-level object.
44
39
 
45
40
  /**
46
- * Adds observations from `fontMetricsB` into `fontMetricsA`. Modifies `fontMetricsA` in place.
41
+ * Adds observations from `charMetricsRawFontB` into `charMetricsRawFontA`. Modifies `charMetricsRawFontA` in place.
47
42
  *
48
- * @param {?FontMetricsRawFont} fontMetricsRawFontA
49
- * @param {?FontMetricsRawFont} fontMetricsRawFontB
50
- * @param {?number} xHeight - If specified, values from `fontMetricsRawFontB` will be normalized by dividing by `xHeight`.
51
- * @returns {?FontMetricsRawFont} - Returns fontMetricsFontA after modifying in place
43
+ * @param {?CharMetricsRawFont} charMetricsRawFontA
44
+ * @param {?CharMetricsRawFont} charMetricsRawFontB
45
+ * @param {?number} xHeight - If specified, values from `charMetricsRawFontB` will be normalized by dividing by `xHeight`.
46
+ * @returns {?CharMetricsRawFont} - Returns charMetricsRawFontA after modifying in place
52
47
  */
53
- export function unionFontMetricsFont(fontMetricsRawFontA, fontMetricsRawFontB, xHeight = null) {
48
+ function unionCharMetricsFont(charMetricsRawFontA, charMetricsRawFontB, xHeight = null) {
54
49
  // If one of the inputs is undefined, return early with the only valid object
55
- if (!fontMetricsRawFontA) {
56
- if (!fontMetricsRawFontB) return null;
57
- fontMetricsRawFontA = structuredClone(fontMetricsRawFontB);
58
- return fontMetricsRawFontA;
50
+ if (!charMetricsRawFontA) {
51
+ if (!charMetricsRawFontB) return null;
52
+ charMetricsRawFontA = structuredClone(charMetricsRawFontB);
53
+ return charMetricsRawFontA;
59
54
  }
60
- if (!fontMetricsRawFontB) {
61
- return fontMetricsRawFontA;
55
+ if (!charMetricsRawFontB) {
56
+ return charMetricsRawFontA;
62
57
  }
63
58
 
64
- if (fontMetricsRawFontB?.obs) fontMetricsRawFontA.obs += fontMetricsRawFontB.obs;
59
+ if (charMetricsRawFontB?.obs) charMetricsRawFontA.obs += charMetricsRawFontB.obs;
65
60
 
66
- for (const [prop, obj] of Object.entries(fontMetricsRawFontB)) {
61
+ for (const [prop, obj] of Object.entries(charMetricsRawFontB)) {
67
62
  for (const [key, value] of Object.entries(obj)) {
68
- if (!fontMetricsRawFontA[prop][key]) {
69
- fontMetricsRawFontA[prop][key] = [];
63
+ if (!charMetricsRawFontA[prop][key]) {
64
+ charMetricsRawFontA[prop][key] = [];
70
65
  }
71
66
  if (xHeight) {
72
67
  const valueNorm = value.map((x) => x / xHeight).filter((x) => x);
73
- Array.prototype.push.apply(fontMetricsRawFontA[prop][key], valueNorm);
68
+ Array.prototype.push.apply(charMetricsRawFontA[prop][key], valueNorm);
74
69
  } else {
75
- Array.prototype.push.apply(fontMetricsRawFontA[prop][key], value);
70
+ Array.prototype.push.apply(charMetricsRawFontA[prop][key], value);
76
71
  }
77
72
  }
78
73
  }
79
- return (fontMetricsRawFontA);
74
+ return (charMetricsRawFontA);
80
75
  }
81
76
 
82
77
  /**
83
- * Adds observations from `fontMetricsB` into `fontMetricsA`. Modifies `fontMetricsA` in place.
78
+ * Adds observations from `charMetricsRawObjB` into `charMetricsRawObjA`. Modifies `charMetricsRawObjA` in place.
84
79
  *
85
- * @param {Object.<string, FontMetricsRawFamily>} fontMetricsRawObjA
86
- * @param {Object.<string, FontMetricsRawFamily>} fontMetricsRawObjB
87
- * @returns {Object.<string, FontMetricsRawFamily>} - Returns fontMetricsObjA after modifying in place
80
+ * @param {Object.<string, CharMetricsRawFamily>} charMetricsRawObjA
81
+ * @param {Object.<string, CharMetricsRawFamily>} charMetricsRawObjB
82
+ * @returns {Object.<string, CharMetricsRawFamily>} - Returns charMetricsRawObjA after modifying in place
88
83
  */
89
- function unionFontMetricsRawObj(fontMetricsRawObjA, fontMetricsRawObjB) {
90
- for (const [family, obj] of Object.entries(fontMetricsRawObjB)) {
84
+ function unionCharMetricsRawObj(charMetricsRawObjA, charMetricsRawObjB) {
85
+ for (const [family, obj] of Object.entries(charMetricsRawObjB)) {
91
86
  for (const [style, obj2] of Object.entries(obj)) {
92
87
  if (Object.keys(obj2.width).length === 0) continue;
93
- if (!fontMetricsRawObjA[family]) {
94
- fontMetricsRawObjA[family] = new FontMetricsRawFamily();
88
+ if (!charMetricsRawObjA[family]) {
89
+ charMetricsRawObjA[family] = new CharMetricsRawFamily();
95
90
  }
96
91
  }
97
92
  }
98
93
 
99
- for (const [family, obj] of Object.entries(fontMetricsRawObjA)) {
94
+ for (const [family, obj] of Object.entries(charMetricsRawObjA)) {
100
95
  for (const [style, obj2] of Object.entries(obj)) {
101
- unionFontMetricsFont(fontMetricsRawObjA?.[family]?.[style], fontMetricsRawObjB?.[family]?.[style]);
96
+ unionCharMetricsFont(charMetricsRawObjA?.[family]?.[style], charMetricsRawObjB?.[family]?.[style]);
102
97
  }
103
98
  }
104
99
 
105
- return (fontMetricsRawObjA);
100
+ return (charMetricsRawObjA);
106
101
  }
107
102
 
108
103
  /**
109
104
  * Calculates final font statistics from individual observations.
110
105
  *
111
- * @param {FontMetricsRawFont} fontMetricsRawFontObj
112
- * @returns {FontMetricsFont} -
106
+ * @param {CharMetricsRawFont} charMetricsRawFontObj
107
+ * @returns {CharMetricsFont} -
113
108
  */
114
- function calculateFontMetrics(fontMetricsRawFontObj) {
115
- const fontMetricOut = new FontMetricsFont();
109
+ function calculateCharMetrics(charMetricsRawFontObj) {
110
+ const fontMetricOut = new CharMetricsFont();
116
111
 
117
112
  // Take the median of each array
118
113
  for (const prop of ['width', 'height', 'kerning', 'kerning2']) {
119
- for (const [key, value] of Object.entries(fontMetricsRawFontObj[prop])) {
114
+ for (const [key, value] of Object.entries(charMetricsRawFontObj[prop])) {
120
115
  if (value.length > 0) {
121
116
  fontMetricOut[prop][key] = round6(quantile(value, 0.5));
122
117
  }
@@ -125,7 +120,7 @@ function calculateFontMetrics(fontMetricsRawFontObj) {
125
120
 
126
121
  // Calculate median hight of capital letters only
127
122
  const heightCapsArr = [];
128
- for (const [key, value] of Object.entries(fontMetricsRawFontObj.height)) {
123
+ for (const [key, value] of Object.entries(charMetricsRawFontObj.height)) {
129
124
  if (/[A-Z]/.test(String.fromCharCode(parseInt(key)))) {
130
125
  Array.prototype.push.apply(heightCapsArr, value);
131
126
  }
@@ -134,12 +129,12 @@ function calculateFontMetrics(fontMetricsRawFontObj) {
134
129
  fontMetricOut.heightCaps = round6(quantile(heightCapsArr, 0.5));
135
130
  fontMetricOut.obsCaps = heightCapsArr.length;
136
131
 
137
- fontMetricOut.obs = fontMetricsRawFontObj.obs;
132
+ fontMetricOut.obs = charMetricsRawFontObj.obs;
138
133
 
139
134
  // Standardize all metrics be normalized by x-height
140
135
  // The raw metrics may be normalized by ascHeight (for numbers) or x-height (for all other characters).
141
136
  for (const prop of ['width', 'height', 'kerning', 'kerning2']) {
142
- for (const [key, value] of Object.entries(fontMetricsRawFontObj[prop])) {
137
+ for (const [key, value] of Object.entries(charMetricsRawFontObj[prop])) {
143
138
  const nameFirst = key.match(/\w+/)[0];
144
139
  const charFirst = String.fromCharCode(parseInt(nameFirst));
145
140
  if (/\d/.test(charFirst)) {
@@ -151,7 +146,7 @@ function calculateFontMetrics(fontMetricsRawFontObj) {
151
146
  // The `kerning2` observations contain the measurement between the end of char 1 and the end of char 2.
152
147
  // Therefore, the width of char 2 must be subtracted to get a measurement comparable with `kerning`.
153
148
  for (const prop of ['kerning2']) {
154
- for (const [key, value] of Object.entries(fontMetricsRawFontObj[prop])) {
149
+ for (const [key, value] of Object.entries(charMetricsRawFontObj[prop])) {
155
150
  if (value.length > 0) {
156
151
  const nameSecond = key.match(/\w+$/)[0];
157
152
 
@@ -205,42 +200,43 @@ const roundedVWRegex = new RegExp(roundedVWArr.reduce((x, y) => `${x}|${y}`), 'i
205
200
  const serifStemSerifPQArr = ['Bookman', 'Century_Schoolbook', 'Courier', 'Georgia', 'Times'];
206
201
  const serifStemSerifPQRegex = new RegExp(serifStemSerifPQArr.reduce((x, y) => `${x}|${y}`), 'i');
207
202
 
203
+ // This function is currently unused. Keeping as we may restore this feature in the future.
208
204
  // While the majority of glyphs can be approximated by applying geometric transformations to a single sans and serif font,
209
205
  // there are some exceptions (e.g. the lowercase "g" has 2 distinct variations).
210
206
  // This function identifies variations that require switching out a glyph from the default font entirely.
211
- export function identifyFontVariants(fontScores, fontMetrics) {
212
- if (fontMetrics?.SansDefault?.normal) {
207
+ function identifyFontVariants(fontScores, charMetrics) {
208
+ if (charMetrics?.SansDefault?.normal) {
213
209
  const sansG = calcTopFont(fontScores?.SansDefault?.normal?.g);
214
- fontMetrics.SansDefault.normal.variants.sans_g = singleGRegex.test(sansG);
210
+ charMetrics.SansDefault.normal.variants.sans_g = singleGRegex.test(sansG);
215
211
  const sans1 = calcTopFont(fontScores?.SansDefault?.normal?.['1']);
216
- fontMetrics.SansDefault.normal.variants.sans_1 = base1Regex.test(sans1);
212
+ charMetrics.SansDefault.normal.variants.sans_1 = base1Regex.test(sans1);
217
213
  }
218
214
 
219
- if (fontMetrics?.SerifDefault?.italic) {
215
+ if (charMetrics?.SerifDefault?.italic) {
220
216
  const minY = calcTopFont(fontScores?.SerifDefault?.italic?.y);
221
- fontMetrics.SerifDefault.italic.variants.serif_italic_y = minYRegex.test(minY);
217
+ charMetrics.SerifDefault.italic.variants.serif_italic_y = minYRegex.test(minY);
222
218
  const closedK = calcTopFont(fontScores?.SerifDefault?.italic?.y);
223
- fontMetrics.SerifDefault.italic.variants.serif_open_k = !closedKRegex.test(closedK);
219
+ charMetrics.SerifDefault.italic.variants.serif_open_k = !closedKRegex.test(closedK);
224
220
 
225
221
  const roundedV = calcTopFont(fontScores?.SerifDefault?.italic?.v);
226
222
  const roundedW = calcTopFont(fontScores?.SerifDefault?.italic?.w);
227
- fontMetrics.SerifDefault.italic.variants.serif_pointy_vw = !(roundedVWRegex.test(roundedV) || roundedVWRegex.test(roundedW));
223
+ charMetrics.SerifDefault.italic.variants.serif_pointy_vw = !(roundedVWRegex.test(roundedV) || roundedVWRegex.test(roundedW));
228
224
 
229
225
  const serifItalicP = calcTopFont(fontScores?.SerifDefault?.italic?.p);
230
226
  const serifItalicQ = calcTopFont(fontScores?.SerifDefault?.italic?.q);
231
- fontMetrics.SerifDefault.italic.variants.serif_stem_sans_pq = !(serifStemSerifPQRegex.test(serifItalicP) || serifStemSerifPQRegex.test(serifItalicQ));
227
+ charMetrics.SerifDefault.italic.variants.serif_stem_sans_pq = !(serifStemSerifPQRegex.test(serifItalicP) || serifStemSerifPQRegex.test(serifItalicQ));
232
228
  }
233
229
 
234
- return fontMetrics;
230
+ return charMetrics;
235
231
  }
236
232
 
237
233
  /**
238
234
  *
239
235
  * @param {OcrPage} pageObj
240
236
  */
241
- function calcFontMetricsPage(pageObj) {
242
- /** @type {Object.<string, FontMetricsRawFamily>} */
243
- const fontMetricsRawPage = {};
237
+ function calcCharMetricsPage(pageObj) {
238
+ /** @type {Object.<string, CharMetricsRawFamily>} */
239
+ const charMetricsRawPage = {};
244
240
 
245
241
  for (const lineObj of pageObj.lines) {
246
242
  for (const wordObj of lineObj.words) {
@@ -251,8 +247,8 @@ function calcFontMetricsPage(pageObj) {
251
247
 
252
248
  // Do not include superscripts, dropcaps, and low-confidence words in statistics for font optimization.
253
249
  if (wordObj.conf < 80 || wordObj.lang === 'chi_sim' || wordObj.style.sup || wordObj.style.smallCaps) continue;
254
- /** @type {Object.<string, FontMetricsRawFamily>} */
255
- const fontMetricsRawLine = {};
250
+ /** @type {Object.<string, CharMetricsRawFamily>} */
251
+ const charMetricsRawLine = {};
256
252
 
257
253
  if (wordObj.chars) {
258
254
  for (let k = 0; k < wordObj.chars.length; k++) {
@@ -272,22 +268,22 @@ function calcFontMetricsPage(pageObj) {
272
268
  // May cause future issues as this code assumes one character per <ocrx_cinfo> tag.
273
269
  const charUnicode = String(charObj.text.charCodeAt(0));
274
270
 
275
- if (!fontMetricsRawLine[wordFontFamily]) {
276
- fontMetricsRawLine[wordFontFamily] = new FontMetricsRawFamily();
271
+ if (!charMetricsRawLine[wordFontFamily]) {
272
+ charMetricsRawLine[wordFontFamily] = new CharMetricsRawFamily();
277
273
  }
278
274
 
279
275
  const styleLookup = getStyleLookup(wordObj.style);
280
276
 
281
277
  if (!['normal', 'italic', 'bold'].includes(styleLookup)) continue;
282
278
 
283
- if (!fontMetricsRawLine[wordFontFamily][styleLookup].width[charUnicode]) {
284
- fontMetricsRawLine[wordFontFamily][styleLookup].width[charUnicode] = [];
285
- fontMetricsRawLine[wordFontFamily][styleLookup].height[charUnicode] = [];
279
+ if (!charMetricsRawLine[wordFontFamily][styleLookup].width[charUnicode]) {
280
+ charMetricsRawLine[wordFontFamily][styleLookup].width[charUnicode] = [];
281
+ charMetricsRawLine[wordFontFamily][styleLookup].height[charUnicode] = [];
286
282
  }
287
283
 
288
- fontMetricsRawLine[wordFontFamily][styleLookup].width[charUnicode].push(charWidth / charNorm);
289
- fontMetricsRawLine[wordFontFamily][styleLookup].height[charUnicode].push(charHeight / charNorm);
290
- fontMetricsRawLine[wordFontFamily][styleLookup].obs += 1;
284
+ charMetricsRawLine[wordFontFamily][styleLookup].width[charUnicode].push(charWidth / charNorm);
285
+ charMetricsRawLine[wordFontFamily][styleLookup].height[charUnicode].push(charHeight / charNorm);
286
+ charMetricsRawLine[wordFontFamily][styleLookup].obs += 1;
291
287
 
292
288
  if (k + 1 < wordObj.chars.length) {
293
289
  const charObjNext = wordObj.chars[k + 1];
@@ -300,33 +296,33 @@ function calcFontMetricsPage(pageObj) {
300
296
  if (trailingSpace + charWidthNext > 0) {
301
297
  const bigramUnicode = `${charUnicode},${wordObj.chars[k + 1].text.charCodeAt(0)}`;
302
298
 
303
- if (!fontMetricsRawLine[wordFontFamily][styleLookup].kerning[bigramUnicode]) {
304
- fontMetricsRawLine[wordFontFamily][styleLookup].kerning[bigramUnicode] = [];
305
- fontMetricsRawLine[wordFontFamily][styleLookup].kerning2[bigramUnicode] = [];
299
+ if (!charMetricsRawLine[wordFontFamily][styleLookup].kerning[bigramUnicode]) {
300
+ charMetricsRawLine[wordFontFamily][styleLookup].kerning[bigramUnicode] = [];
301
+ charMetricsRawLine[wordFontFamily][styleLookup].kerning2[bigramUnicode] = [];
306
302
  }
307
- fontMetricsRawLine[wordFontFamily][styleLookup].kerning[bigramUnicode].push(trailingSpace / charNorm);
308
- fontMetricsRawLine[wordFontFamily][styleLookup].kerning2[bigramUnicode].push((trailingSpace + charWidthNext) / charNorm);
303
+ charMetricsRawLine[wordFontFamily][styleLookup].kerning[bigramUnicode].push(trailingSpace / charNorm);
304
+ charMetricsRawLine[wordFontFamily][styleLookup].kerning2[bigramUnicode].push((trailingSpace + charWidthNext) / charNorm);
309
305
  }
310
306
  }
311
307
  }
312
308
  }
313
309
 
314
- for (const [family, obj] of Object.entries(fontMetricsRawLine)) {
310
+ for (const [family, obj] of Object.entries(charMetricsRawLine)) {
315
311
  for (const [style, obj2] of Object.entries(obj)) {
316
312
  if (Object.keys(obj2.width).length === 0) continue;
317
- if (!fontMetricsRawPage[family]) {
318
- fontMetricsRawPage[family] = new FontMetricsRawFamily();
313
+ if (!charMetricsRawPage[family]) {
314
+ charMetricsRawPage[family] = new CharMetricsRawFamily();
319
315
  }
320
316
  }
321
317
  }
322
318
 
323
- for (const [family, obj] of Object.entries(fontMetricsRawPage)) {
319
+ for (const [family, obj] of Object.entries(charMetricsRawPage)) {
324
320
  for (const [style, obj2] of Object.entries(obj)) {
325
- unionFontMetricsFont(fontMetricsRawPage?.[family]?.[style], fontMetricsRawLine?.[family]?.[style]);
321
+ unionCharMetricsFont(charMetricsRawPage?.[family]?.[style], charMetricsRawLine?.[family]?.[style]);
326
322
  }
327
323
  }
328
324
  }
329
325
  }
330
326
 
331
- return fontMetricsRawPage;
327
+ return charMetricsRawPage;
332
328
  }
@@ -326,6 +326,10 @@ export class gs {
326
326
 
327
327
  static terminate = async () => {
328
328
  gs.clear();
329
+ // This function can be run while the scheduler is still initializing.
330
+ // This happens when we pre-load the scheduler, but then terminate before it finishes loading,
331
+ // and it is never actually used.
332
+ await gs.schedulerReady;
329
333
  await gs.schedulerInner.terminate();
330
334
  gs.schedulerInner = null;
331
335
  gs.schedulerReady = null;
package/js/global.d.ts CHANGED
@@ -10,6 +10,24 @@ declare global {
10
10
  sup: boolean;
11
11
  dropcap: boolean;
12
12
  };
13
+
14
+ type FontState = {
15
+ enableOpt: boolean;
16
+ forceOpt: boolean;
17
+ enableCleanToNimbusMono: boolean;
18
+ defaultFontName: string;
19
+ serifDefaultName: string;
20
+ sansDefaultName: string;
21
+ glyphSet: null | 'latin' | 'all';
22
+ charMetrics: { [key: string]: CharMetricsFamily };
23
+ }
24
+
25
+ type ScribeSaveData = {
26
+ ocr: OcrPage[];
27
+ fontState: FontState;
28
+ layoutRegions: LayoutPage[];
29
+ layoutDataTables: LayoutDataTablePage[];
30
+ }
13
31
 
14
32
  type StyleLookup = ('normal'|'bold'|'italic'|'boldItalic');
15
33
 
@@ -20,10 +38,10 @@ declare global {
20
38
  type OcrChar = import("./objects/ocrObjects.js").OcrChar;
21
39
 
22
40
  // Font objects
23
- type FontMetricsFont = import("./objects/fontMetricsObjects.js").FontMetricsFont;
24
- type FontMetricsRawFamily = import("./objects/fontMetricsObjects.js").FontMetricsRawFamily;
25
- type FontMetricsFamily = import("./objects/fontMetricsObjects.js").FontMetricsFamily;
26
- type FontMetricsRawFont = import("./objects/fontMetricsObjects.js").FontMetricsRawFont;
41
+ type CharMetricsFont = import("./objects/charMetricsObjects.js").CharMetricsFont;
42
+ type CharMetricsRawFamily = import("./objects/charMetricsObjects.js").CharMetricsRawFamily;
43
+ type CharMetricsFamily = import("./objects/charMetricsObjects.js").CharMetricsFamily;
44
+ type CharMetricsRawFont = import("./objects/charMetricsObjects.js").CharMetricsRawFont;
27
45
  type FontContainerFont = import("./containers/fontContainer.js").FontContainerFont;
28
46
 
29
47
  type FontContainerFamilyBuiltIn = {
@@ -58,7 +58,7 @@ export async function convertPageAbbyy({ ocrStr, n }) {
58
58
  const warn = { char: 'char_error' };
59
59
 
60
60
  return {
61
- pageObj, fontMetricsObj: {}, dataTables: new LayoutDataTablePage(n), warn,
61
+ pageObj, charMetricsObj: {}, dataTables: new LayoutDataTablePage(n), warn,
62
62
  };
63
63
  }
64
64
 
@@ -82,6 +82,13 @@ export async function convertPageAbbyy({ ocrStr, n }) {
82
82
 
83
83
  const textOrientationFinal = (pageOrientation + textOrientation) % 4;
84
84
 
85
+ /**
86
+ * Convert Abbyy XML paragraph to internal format.
87
+ * Note that Abbyy XML paragraphs are not preserved because paragraphs are re-assigned by the `assignParagraphs` function.
88
+ * Even if this function call was skipped in the code, when saving/restoring the state using .scribe files, paragraph data is not saved.
89
+ * Further development would be needed to preserve paragraph data.
90
+ * @param {string} xmlPar
91
+ */
85
92
  function convertParAbbyy(xmlPar) {
86
93
  /** @type {Array<OcrLine>} */
87
94
  const parLineArr = [];
@@ -5,7 +5,6 @@ import {
5
5
  calcBoxOverlap,
6
6
  calcLang,
7
7
  mean50,
8
- quantile,
9
8
  round6,
10
9
  unescapeXml,
11
10
  } from '../utils/miscUtils.js';
@@ -295,6 +294,7 @@ export async function convertPageStext({ ocrStr, n }) {
295
294
 
296
295
  boldArr.push(boldWord);
297
296
  italicArr.push(italicWord);
297
+ underlineArr.push(underlineWordArr.reduce((a, b) => Number(a) + Number(b), 0) / underlineWordArr.length > 0.5);
298
298
 
299
299
  fontFamilyArr.push(fontFamily);
300
300