scribe.js-ocr 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,8 @@ export class opt {
29
29
 
30
30
  static reflow = true;
31
31
 
32
+ static removeMargins = false;
33
+
32
34
  static pageBreaks = true;
33
35
 
34
36
  /** @type {("invis"|"ebook"|"eval"|"proof")} */
@@ -264,7 +264,7 @@ export class FontCont {
264
264
 
265
265
  static defaultFontName = 'SerifDefault';
266
266
 
267
- static serifDefaultName = 'NimbusRomNo9L';
267
+ static serifDefaultName = 'NimbusRoman';
268
268
 
269
269
  static sansDefaultName = 'NimbusSans';
270
270
 
@@ -388,8 +388,12 @@ export class FontCont {
388
388
  // Option 1: If we have access to the font, use it.
389
389
  // Option 2: If we do not have access to the font, but it closely resembles a built-in font, use the built-in font.
390
390
  if (!FontCont.raw?.[family]?.[style]) {
391
- if (/Times/i.test(family)) {
392
- family = 'NimbusRomNo9L';
391
+ if (/NimbusRom/i.test(family)) {
392
+ family = 'NimbusRoman';
393
+ } else if (/Times/i.test(family)) {
394
+ family = 'NimbusRoman';
395
+ } else if (/NimbusSan/i.test(family)) {
396
+ family = 'NimbusSans';
393
397
  } else if (/Helvetica/i.test(family)) {
394
398
  family = 'NimbusSans';
395
399
  } else if (/Arial/i.test(family)) {
@@ -406,6 +410,8 @@ export class FontCont {
406
410
  family = 'Carlito';
407
411
  } else if (/Courier/i.test(family) && FontCont.enableCleanToNimbusMono) {
408
412
  family = 'NimbusMono';
413
+ } else if (/NimbusMono/i.test(family) && FontCont.enableCleanToNimbusMono) {
414
+ family = 'NimbusMono';
409
415
  }
410
416
  }
411
417
 
@@ -451,7 +457,7 @@ export class FontCont {
451
457
  FontCont.enableCleanToNimbusMono = false;
452
458
 
453
459
  FontCont.defaultFontName = 'SerifDefault';
454
- FontCont.serifDefaultName = 'NimbusRomNo9L';
460
+ FontCont.serifDefaultName = 'NimbusRoman';
455
461
  FontCont.sansDefaultName = 'NimbusSans';
456
462
  };
457
463
 
@@ -8,11 +8,12 @@ import { saveAs } from '../utils/miscUtils.js';
8
8
  import { writePdf } from './writePdf.js';
9
9
  import { writeHocr } from './writeHocr.js';
10
10
  import { writeText } from './writeText.js';
11
+ import { writeHtml } from './writeHtml.js';
11
12
 
12
13
  /**
13
14
  * Export active OCR data to specified format.
14
15
  * @public
15
- * @param {'pdf'|'hocr'|'docx'|'xlsx'|'txt'|'text'} [format='txt']
16
+ * @param {'pdf'|'hocr'|'docx'|'html'|'xlsx'|'txt'|'text'} [format='txt']
16
17
  * @param {number} [minPage=0] - First page to export.
17
18
  * @param {number} [maxPage=-1] - Last page to export (inclusive). -1 exports through the last page.
18
19
  * @returns {Promise<string|ArrayBuffer>}
@@ -183,6 +184,8 @@ export async function exportData(format = 'txt', minPage = 0, maxPage = -1) {
183
184
  }
184
185
  } else if (format === 'hocr') {
185
186
  content = writeHocr(ocrAll.active, minPage, maxPage);
187
+ } else if (format === 'html') {
188
+ content = writeHtml(ocrAll.active, minPage, maxPage, opt.reflow, opt.removeMargins);
186
189
  } else if (format === 'txt') {
187
190
  content = writeText(ocrDownload, minPage, maxPage, opt.reflow, false);
188
191
  // Defining `DISABLE_DOCX_XLSX` disables docx/xlsx exports when using build tools.
@@ -0,0 +1,223 @@
1
+ import { FontCont } from '../containers/fontContainer.js';
2
+ import { opt } from '../containers/app.js';
3
+ import { calcWordMetrics } from '../utils/fontUtils.js';
4
+ import { assignParagraphs } from '../utils/reflowPars.js';
5
+ import { pageMetricsArr } from '../containers/dataContainer.js';
6
+ import ocr from '../objects/ocrObjects.js';
7
+
8
+ /**
9
+ *
10
+ * @param {string} text
11
+ * @param {number} fontSizeHTMLSmallCaps
12
+ */
13
+ const makeSmallCapsDivs = (text, fontSizeHTMLSmallCaps) => {
14
+ const textDivs0 = text.match(/([a-z]+)|([^a-z]+)/g);
15
+ if (!textDivs0) return '';
16
+ const textDivs = textDivs0.map((x) => {
17
+ const lower = /[a-z]/.test(x);
18
+ const styleStr = lower ? `style="font-size:${fontSizeHTMLSmallCaps}px"` : '';
19
+ return `<span class="input-sub" ${styleStr}>${x}</span>`;
20
+ });
21
+ return textDivs.join('');
22
+ };
23
+
24
+ /**
25
+ * Convert an array of ocrPage objects to HTML.
26
+ *
27
+ * @param {Array<OcrPage>} ocrCurrent -
28
+ * @param {number} minpage - The first page to include in the document.
29
+ * @param {number} maxpage - The last page to include in the document.
30
+ * @param {boolean} reflowText - Remove line breaks within what appears to be the same paragraph.
31
+ * @param {boolean} removeMargins - Remove the margins from the text.
32
+ * @param {?Array<string>} wordIds - An array of word IDs to include in the document.
33
+ * If omitted, all words are included.
34
+ */
35
+ export function writeHtml(ocrCurrent, minpage = 0, maxpage = -1, reflowText = false, removeMargins = false, wordIds = null) {
36
+ if (!(typeof process === 'undefined')) {
37
+ throw new Error('HTML exports are not supported in Node.js');
38
+ }
39
+
40
+ const canvas = new OffscreenCanvas(1, 1);
41
+ const ctx = /** @type {OffscreenCanvasRenderingContext2D} */ (canvas.getContext('2d'));
42
+
43
+ const fontsUsed = new Set();
44
+
45
+ const pad = 5;
46
+
47
+ let bodyStr = '<body>';
48
+
49
+ if (maxpage === -1) maxpage = ocrCurrent.length - 1;
50
+
51
+ let newLine = false;
52
+
53
+ let top = 0;
54
+
55
+ for (let g = minpage; g <= maxpage; g++) {
56
+ if (!ocrCurrent[g] || ocrCurrent[g].lines.length === 0) continue;
57
+
58
+ const pageObj = ocrCurrent[g];
59
+
60
+ let minLeft = 0;
61
+ let minTop = 0;
62
+ let maxBottom = 0;
63
+ if (removeMargins) {
64
+ const wordArr = ocr.getPageWords(pageObj);
65
+ for (let h = 0; h < wordArr.length; h++) {
66
+ const wordObj = wordArr[h];
67
+ if (wordIds && !wordIds.includes(wordObj.id)) continue;
68
+ if (minLeft === 0 || wordObj.bbox.left < minLeft) minLeft = wordObj.bbox.left;
69
+ if (minTop === 0 || wordObj.bbox.top < minTop) minTop = wordObj.bbox.top;
70
+ if (wordObj.bbox.bottom > maxBottom) maxBottom = wordObj.bbox.bottom;
71
+ }
72
+ }
73
+
74
+ bodyStr += `<div class="scribe-page" id="page${g}" style="position:absolute;top:${top}px;">`;
75
+ if (removeMargins) {
76
+ top += Math.min((maxBottom - minTop) + 200, pageMetricsArr[g].dims.height + 10);
77
+ } else {
78
+ top += pageMetricsArr[g].dims.height + 10;
79
+ }
80
+
81
+ if (reflowText) {
82
+ const angle = pageMetricsArr[g].angle || 0;
83
+ assignParagraphs(pageObj, angle);
84
+ }
85
+
86
+ let parCurrent = pageObj.lines[0].par;
87
+
88
+ for (let h = 0; h < pageObj.lines.length; h++) {
89
+ const lineObj = pageObj.lines[h];
90
+
91
+ if (reflowText) {
92
+ if (g > 0 && h === 0 || lineObj.par !== parCurrent) newLine = true;
93
+ parCurrent = lineObj.par;
94
+ } else {
95
+ newLine = true;
96
+ }
97
+
98
+ for (let i = 0; i < lineObj.words.length; i++) {
99
+ const wordObj = lineObj.words[i];
100
+ if (!wordObj) continue;
101
+
102
+ if (wordIds && !wordIds.includes(wordObj.id)) continue;
103
+
104
+ if (newLine) {
105
+ bodyStr += '\n';
106
+ } else if (h > 0 || g > 0 || i > 0) {
107
+ bodyStr += ' ';
108
+ }
109
+
110
+ newLine = false;
111
+
112
+ const scale = 1;
113
+ const angle = 0;
114
+
115
+ const fontI = FontCont.getWordFont(wordObj);
116
+ fontsUsed.add(fontI);
117
+
118
+ const {
119
+ charSpacing, leftSideBearing, rightSideBearing, fontSize, charArr, advanceArr, kerningArr, font,
120
+ } = calcWordMetrics(wordObj);
121
+
122
+ const wordStr = charArr.join('');
123
+
124
+ const charSpacingHTML = charSpacing * scale;
125
+
126
+ let x1 = wordObj.bbox.left - minLeft;
127
+ const y1 = wordObj.line.bbox.bottom + wordObj.line.baseline[1] - minTop;
128
+
129
+ if (wordObj.visualCoords) x1 -= leftSideBearing * scale;
130
+
131
+ const fontSizeHTML = fontSize * scale;
132
+
133
+ ctx.font = `${fontI.fontFaceStyle} ${fontI.fontFaceWeight} ${fontSizeHTML}px ${fontI.fontFaceName}`;
134
+
135
+ const metrics = ctx.measureText(wordStr);
136
+
137
+ const fontSizeHTMLSmallCaps = fontSize * scale * fontI.smallCapsMult;
138
+
139
+ // Align with baseline
140
+ const topHTML = Math.round((y1 - metrics.fontBoundingBoxAscent + fontSizeHTML * 0.6) * 1000) / 1000;
141
+
142
+ let styleStr = '';
143
+
144
+ const topPadOffset = 5 * Math.sin(angle * (Math.PI / 180));
145
+ const leftPadOffset = 5 * Math.cos(angle * (Math.PI / 180));
146
+
147
+ styleStr += `left:${x1 - leftPadOffset}px;`;
148
+ styleStr += `top:${topHTML - topPadOffset}px;`;
149
+ styleStr += `font-size:${fontSizeHTML}px;`;
150
+ styleStr += `font-family:${fontI.fontFaceName};`;
151
+
152
+ if (Math.abs(angle ?? 0) > 0.05) {
153
+ styleStr += `transform-origin:left ${y1 - topHTML}px;`;
154
+ styleStr += `transform:rotate(${angle}deg);`;
155
+ }
156
+
157
+ // We cannot make the text uppercase in the input field, as this would result in the text being saved as uppercase.
158
+ // Additionally, while there is a small-caps CSS property, it does not allow for customizing the size of the small caps.
159
+ // Therefore, we handle small caps by making all text print as uppercase using the `text-transform` CSS property,
160
+ // and then wrapping each letter in a span with a smaller font size.
161
+ let innerHTML;
162
+ if (wordObj.smallCaps) {
163
+ styleStr += 'text-transform:uppercase;';
164
+ innerHTML = makeSmallCapsDivs(wordStr, fontSizeHTMLSmallCaps);
165
+ } else {
166
+ innerHTML = wordStr;
167
+ }
168
+
169
+ styleStr += `letter-spacing:${charSpacingHTML}px;`;
170
+
171
+ styleStr += `font-weight:${fontI.fontFaceWeight};`;
172
+ styleStr += `font-style:${fontI.fontFaceStyle};`;
173
+
174
+ // Line height must match the height of the font bounding box for the font metrics to be accurate.
175
+ styleStr += `line-height:${metrics.fontBoundingBoxAscent + metrics.fontBoundingBoxDescent}px;`;
176
+
177
+ bodyStr += `<span class="scribe-word" id="${wordObj.id}" style="${styleStr}">${innerHTML}</span>`;
178
+ }
179
+ }
180
+
181
+ bodyStr += '</div>';
182
+
183
+ opt.progressHandler({ n: g, type: 'export', info: { } });
184
+ }
185
+
186
+ let styleStr = '<style>.scribe-word {';
187
+
188
+ styleStr += 'position:absolute;';
189
+ styleStr += `padding-left:${pad}px;`;
190
+ styleStr += `padding-right:${pad}px;`;
191
+ styleStr += 'z-index:1;';
192
+ styleStr += 'white-space:nowrap;';
193
+ if (opt.kerning) {
194
+ styleStr += 'font-kerning:normal;';
195
+ } else {
196
+ styleStr += 'font-kerning:none;';
197
+ }
198
+
199
+ styleStr += '}';
200
+
201
+ for (const fontI of fontsUsed) {
202
+ const cdnPath = 'https://cdn.jsdelivr.net/npm/scribe.js-ocr@0.7.1/fonts/all/';
203
+ let styleTitleCase = fontI.style.charAt(0).toUpperCase() + fontI.style.slice(1).toLowerCase();
204
+ if (styleTitleCase === 'Normal') styleTitleCase = 'Regular';
205
+ const fontName = `${fontI.family}-${styleTitleCase}.woff`;
206
+ const fontPath = cdnPath + fontName;
207
+
208
+ styleStr += `@font-face {
209
+ font-family: '${fontI.fontFaceName}';
210
+ font-style: ${fontI.fontFaceStyle};
211
+ font-weight: ${fontI.fontFaceWeight};
212
+ src: url('${fontPath}');
213
+ }\n`;
214
+ }
215
+
216
+ styleStr += '</style>';
217
+
218
+ bodyStr += '</body>';
219
+
220
+ const htmlStr = `<html><head>${styleStr}</head>${bodyStr}</html>`;
221
+
222
+ return htmlStr;
223
+ }
@@ -33,9 +33,9 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
33
33
  let /** @type {Promise<ArrayBuffer>} */palatinoNormal;
34
34
  let /** @type {Promise<ArrayBuffer>} */palatinoItalic;
35
35
  let /** @type {Promise<ArrayBuffer>} */palatinoBold;
36
- let /** @type {Promise<ArrayBuffer>} */nimbusRomNo9LNormal;
37
- let /** @type {Promise<ArrayBuffer>} */nimbusRomNo9LItalic;
38
- let /** @type {Promise<ArrayBuffer>} */nimbusRomNo9LBold;
36
+ let /** @type {Promise<ArrayBuffer>} */nimbusRomanNormal;
37
+ let /** @type {Promise<ArrayBuffer>} */nimbusRomanItalic;
38
+ let /** @type {Promise<ArrayBuffer>} */nimbusRomanBold;
39
39
  let /** @type {Promise<ArrayBuffer>} */nimbusSansNormal;
40
40
  let /** @type {Promise<ArrayBuffer>} */nimbusSansItalic;
41
41
  let /** @type {Promise<ArrayBuffer>} */nimbusSansBold;
@@ -56,9 +56,9 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
56
56
  palatinoNormal = fetch(new URL('../fonts/latin/Palatino-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
57
57
  palatinoItalic = fetch(new URL('../fonts/latin/Palatino-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
58
58
  palatinoBold = fetch(new URL('../fonts/latin/Palatino-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
59
- nimbusRomNo9LNormal = fetch(new URL('../fonts/latin/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
60
- nimbusRomNo9LItalic = fetch(new URL('../fonts/latin/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
61
- nimbusRomNo9LBold = fetch(new URL('../fonts/latin/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
59
+ nimbusRomanNormal = fetch(new URL('../fonts/latin/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
60
+ nimbusRomanItalic = fetch(new URL('../fonts/latin/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
61
+ nimbusRomanBold = fetch(new URL('../fonts/latin/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
62
62
  nimbusSansNormal = fetch(new URL('../fonts/latin/NimbusSans-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
63
63
  nimbusSansItalic = fetch(new URL('../fonts/latin/NimbusSans-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
64
64
  nimbusSansBold = fetch(new URL('../fonts/latin/NimbusSans-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
@@ -78,9 +78,9 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
78
78
  palatinoNormal = fetch(new URL('../fonts/all/Palatino-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
79
79
  palatinoItalic = fetch(new URL('../fonts/all/Palatino-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
80
80
  palatinoBold = fetch(new URL('../fonts/all/Palatino-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
81
- nimbusRomNo9LNormal = fetch(new URL('../fonts/all/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
82
- nimbusRomNo9LItalic = fetch(new URL('../fonts/all/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
83
- nimbusRomNo9LBold = fetch(new URL('../fonts/all/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
81
+ nimbusRomanNormal = fetch(new URL('../fonts/all/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
82
+ nimbusRomanItalic = fetch(new URL('../fonts/all/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
83
+ nimbusRomanBold = fetch(new URL('../fonts/all/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
84
84
  nimbusSansNormal = fetch(new URL('../fonts/all/NimbusSans-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
85
85
  nimbusSansItalic = fetch(new URL('../fonts/all/NimbusSans-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
86
86
  nimbusSansBold = fetch(new URL('../fonts/all/NimbusSans-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
@@ -102,9 +102,9 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
102
102
  palatinoNormal = readFile(new URL('../fonts/all/Palatino-Regular.woff', import.meta.url)).then((res) => res.buffer);
103
103
  palatinoItalic = readFile(new URL('../fonts/all/Palatino-Italic.woff', import.meta.url)).then((res) => res.buffer);
104
104
  palatinoBold = readFile(new URL('../fonts/all/Palatino-Bold.woff', import.meta.url)).then((res) => res.buffer);
105
- nimbusRomNo9LNormal = readFile(new URL('../fonts/all/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.buffer);
106
- nimbusRomNo9LItalic = readFile(new URL('../fonts/all/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.buffer);
107
- nimbusRomNo9LBold = readFile(new URL('../fonts/all/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.buffer);
105
+ nimbusRomanNormal = readFile(new URL('../fonts/all/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.buffer);
106
+ nimbusRomanItalic = readFile(new URL('../fonts/all/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.buffer);
107
+ nimbusRomanBold = readFile(new URL('../fonts/all/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.buffer);
108
108
  nimbusSansNormal = readFile(new URL('../fonts/all/NimbusSans-Regular.woff', import.meta.url)).then((res) => res.buffer);
109
109
  nimbusSansItalic = readFile(new URL('../fonts/all/NimbusSans-Italic.woff', import.meta.url)).then((res) => res.buffer);
110
110
  nimbusSansBold = readFile(new URL('../fonts/all/NimbusSans-Bold.woff', import.meta.url)).then((res) => res.buffer);
@@ -118,7 +118,7 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
118
118
  Century: { normal: await centuryNormal, italic: await centuryItalic, bold: await centuryBold },
119
119
  Garamond: { normal: await garamondNormal, italic: await garamondItalic, bold: await garamondBold },
120
120
  Palatino: { normal: await palatinoNormal, italic: await palatinoItalic, bold: await palatinoBold },
121
- NimbusRomNo9L: { normal: await nimbusRomNo9LNormal, italic: await nimbusRomNo9LItalic, bold: await nimbusRomNo9LBold },
121
+ NimbusRoman: { normal: await nimbusRomanNormal, italic: await nimbusRomanItalic, bold: await nimbusRomanBold },
122
122
  NimbusSans: { normal: await nimbusSansNormal, italic: await nimbusSansItalic, bold: await nimbusSansBold },
123
123
  NimbusMono: { normal: await nimbusMonoNormal, italic: await nimbusMonoItalic, bold: await nimbusMonoBold },
124
124
  };
@@ -265,7 +265,7 @@ export async function setUploadFontsWorker(scheduler) {
265
265
  /** @type {Object<string, fontSrcBuiltIn|fontSrcUpload>} */
266
266
  const fontsUpload = {};
267
267
  for (const [key, value] of Object.entries(FontCont.active)) {
268
- if (!['Carlito', 'Century', 'Garamond', 'Palatino', 'NimbusRomNo9L', 'NimbusSans', 'NimbusMono'].includes(key)) {
268
+ if (!['Carlito', 'Century', 'Garamond', 'Palatino', 'NimbusRoman', 'NimbusSans', 'NimbusMono'].includes(key)) {
269
269
  fontsUpload[key] = {
270
270
  normal: value?.normal?.src, italic: value?.italic?.src, bold: value?.bold?.src,
271
271
  };
@@ -287,7 +287,7 @@ export async function setUploadFontsWorker(scheduler) {
287
287
 
288
288
  // Set the active font in the workers to match the active font in `fontAll`
289
289
  const resArr = [];
290
- const opt = FontCont.active.Carlito.normal.opt || FontCont.active.NimbusRomNo9L.normal.opt;
290
+ const opt = FontCont.active.Carlito.normal.opt || FontCont.active.NimbusRoman.normal.opt;
291
291
  for (let i = 0; i < scheduler.workers.length; i++) {
292
292
  const worker = scheduler.workers[i];
293
293
  const res = worker.updateFontContWorker({
@@ -388,11 +388,11 @@ export async function optimizeFontContainerAll(fontPrivate, fontMetricsObj) {
388
388
  const centuryPromise = optimizeFontContainerFamily(fontPrivate.Century, fontMetricsObj);
389
389
  const garamondPromise = optimizeFontContainerFamily(fontPrivate.Garamond, fontMetricsObj);
390
390
  const palatinoPromise = optimizeFontContainerFamily(fontPrivate.Palatino, fontMetricsObj);
391
- const nimbusRomNo9LPromise = optimizeFontContainerFamily(fontPrivate.NimbusRomNo9L, fontMetricsObj);
391
+ const nimbusRomanPromise = optimizeFontContainerFamily(fontPrivate.NimbusRoman, fontMetricsObj);
392
392
  const nimbusSansPromise = optimizeFontContainerFamily(fontPrivate.NimbusSans, fontMetricsObj);
393
393
  const nimbusMonoPromise = optimizeFontContainerFamily(fontPrivate.NimbusMono, fontMetricsObj);
394
394
 
395
- const results = await Promise.all([carlitoPromise, centuryPromise, garamondPromise, palatinoPromise, nimbusRomNo9LPromise, nimbusSansPromise, nimbusMonoPromise]);
395
+ const results = await Promise.all([carlitoPromise, centuryPromise, garamondPromise, palatinoPromise, nimbusRomanPromise, nimbusSansPromise, nimbusMonoPromise]);
396
396
 
397
397
  if (results.every((x) => x === null)) return null;
398
398
 
@@ -401,7 +401,7 @@ export async function optimizeFontContainerAll(fontPrivate, fontMetricsObj) {
401
401
  Century: results[1],
402
402
  Garamond: results[2],
403
403
  Palatino: results[3],
404
- NimbusRomNo9L: results[4],
404
+ NimbusRoman: results[4],
405
405
  NimbusSans: results[5],
406
406
  NimbusMono: results[6],
407
407
  };
package/js/fontEval.js CHANGED
@@ -49,7 +49,7 @@ export async function evaluateFonts(pageArr, opt) {
49
49
  const evalCentury = !!(opt ? FontCont.opt?.Century : FontCont.raw?.Century);
50
50
  const evalPalatino = !!(opt ? FontCont.opt?.Palatino : FontCont.raw?.Palatino);
51
51
  const evalGaramond = !!(opt ? FontCont.opt?.Garamond : FontCont.raw?.Garamond);
52
- const evalNimbusRomNo9L = !!(opt ? FontCont.opt?.NimbusRomNo9L : FontCont.raw?.NimbusRomNo9L);
52
+ const evalNimbusRoman = !!(opt ? FontCont.opt?.NimbusRoman : FontCont.raw?.NimbusRoman);
53
53
  const evalNimbusMono = !!(opt ? FontCont.opt?.NimbusMono : FontCont.raw?.NimbusMono);
54
54
 
55
55
  const fontMetricsPromises = {
@@ -58,7 +58,7 @@ export async function evaluateFonts(pageArr, opt) {
58
58
  century: evalCentury ? evalPagesFont('Century', pageArr, opt) : null,
59
59
  palatino: evalPalatino ? evalPagesFont('Palatino', pageArr, opt) : null,
60
60
  garamond: evalGaramond ? evalPagesFont('Garamond', pageArr, opt) : null,
61
- nimbusRomNo9L: evalNimbusRomNo9L ? evalPagesFont('NimbusRomNo9L', pageArr, opt) : null,
61
+ nimbusRoman: evalNimbusRoman ? evalPagesFont('NimbusRoman', pageArr, opt) : null,
62
62
  nimbusMono: evalNimbusMono ? evalPagesFont('NimbusMono', pageArr, opt) : null,
63
63
  };
64
64
 
@@ -68,7 +68,7 @@ export async function evaluateFonts(pageArr, opt) {
68
68
  century: await fontMetricsPromises.century,
69
69
  palatino: await fontMetricsPromises.palatino,
70
70
  garamond: await fontMetricsPromises.garamond,
71
- nimbusRomNo9L: await fontMetricsPromises.nimbusRomNo9L,
71
+ nimbusRoman: await fontMetricsPromises.nimbusRoman,
72
72
  nimbusMono: await fontMetricsPromises.nimbusMono,
73
73
  };
74
74
 
@@ -78,7 +78,7 @@ export async function evaluateFonts(pageArr, opt) {
78
78
  Century: fontMetricsTmp.century ? fontMetricsTmp.century.metricTotal / fontMetricsTmp.century.wordsTotal : null,
79
79
  Palatino: fontMetricsTmp.palatino ? fontMetricsTmp.palatino.metricTotal / fontMetricsTmp.palatino.wordsTotal : null,
80
80
  Garamond: fontMetricsTmp.garamond ? fontMetricsTmp.garamond.metricTotal / fontMetricsTmp.garamond.wordsTotal : null,
81
- NimbusRomNo9L: fontMetricsTmp.nimbusRomNo9L ? fontMetricsTmp.nimbusRomNo9L.metricTotal / fontMetricsTmp.nimbusRomNo9L.wordsTotal : null,
81
+ NimbusRoman: fontMetricsTmp.nimbusRoman ? fontMetricsTmp.nimbusRoman.metricTotal / fontMetricsTmp.nimbusRoman.wordsTotal : null,
82
82
  NimbusMono: fontMetricsTmp.nimbusMono ? fontMetricsTmp.nimbusMono.metricTotal / fontMetricsTmp.nimbusMono.wordsTotal : null,
83
83
  };
84
84
 
@@ -101,11 +101,11 @@ const calcBestFonts = (fontMetrics) => {
101
101
  }
102
102
  }
103
103
 
104
- let minKeySerif = 'NimbusRomNo9L';
104
+ let minKeySerif = 'NimbusRoman';
105
105
  let minValueSerif = Number.MAX_VALUE;
106
106
 
107
107
  for (const [key, value] of Object.entries(fontMetrics)) {
108
- if (!['Century', 'Palatino', 'Garamond', 'NimbusRomNo9L', 'NimbusMono'].includes(key)) continue;
108
+ if (!['Century', 'Palatino', 'Garamond', 'NimbusRoman', 'NimbusMono'].includes(key)) continue;
109
109
  if (value && value < minValueSerif) {
110
110
  minValueSerif = value;
111
111
  minKeySerif = key;
package/js/global.d.ts CHANGED
@@ -32,7 +32,7 @@ declare global {
32
32
  Century: FontContainerFamilyBuiltIn;
33
33
  Garamond: FontContainerFamilyBuiltIn;
34
34
  Palatino: FontContainerFamilyBuiltIn;
35
- NimbusRomNo9L: FontContainerFamilyBuiltIn;
35
+ NimbusRoman: FontContainerFamilyBuiltIn;
36
36
  NimbusSans: FontContainerFamilyBuiltIn;
37
37
  NimbusMono: FontContainerFamilyBuiltIn;
38
38
  [key: string]: FontContainerFamily;
@@ -133,7 +133,9 @@ export async function importOCRFiles(ocrFilesAll) {
133
133
  const sansFontStr = getMeta('sans-font');
134
134
  if (sansFontStr) sansFont = sansFontStr;
135
135
 
136
- const serifFontStr = getMeta('serif-font');
136
+ let serifFontStr = getMeta('serif-font');
137
+ // Older versions of Scribe used 'NimbusRomNo9L' instead of 'NimbusRoman'.
138
+ if (serifFontStr && serifFontStr === 'NimbusRomNo9L') serifFontStr = 'NimbusRoman';
137
139
  if (serifFontStr) serifFont = serifFontStr;
138
140
  }
139
141
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scribe.js-ocr",
3
- "version": "0.7.1",
3
+ "version": "0.7.2",
4
4
  "description": "High-quality OCR and text extraction for images and PDFs.",
5
5
  "main": "scribe.js",
6
6
  "directories": {