scribe.js-ocr 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/.eslintrc.json +6 -0
  2. package/fonts/all/Carlito-Bold.woff +0 -0
  3. package/fonts/all/Carlito-Italic.woff +0 -0
  4. package/fonts/all/Carlito-Regular.woff +0 -0
  5. package/fonts/all/{C059-Bold.woff → Century-Bold.woff} +0 -0
  6. package/fonts/all/{C059-Italic.woff → Century-Italic.woff} +0 -0
  7. package/fonts/all/{C059-Roman.woff → Century-Regular.woff} +0 -0
  8. package/fonts/all/{EBGaramond-Bold.woff → Garamond-Bold.woff} +0 -0
  9. package/fonts/all/{EBGaramond-Italic.woff → Garamond-Italic.woff} +0 -0
  10. package/fonts/all/{EBGaramond-Regular.woff → Garamond-Regular.woff} +0 -0
  11. package/fonts/all/{NimbusMonoPS-Bold.woff → NimbusMono-Bold.woff} +0 -0
  12. package/fonts/all/{NimbusMonoPS-Italic.woff → NimbusMono-Italic.woff} +0 -0
  13. package/fonts/all/{NimbusMonoPS-Regular.woff → NimbusMono-Regular.woff} +0 -0
  14. package/fonts/all/NimbusRoman-Bold.woff +0 -0
  15. package/fonts/all/NimbusRoman-Italic.woff +0 -0
  16. package/fonts/all/NimbusRoman-Regular.woff +0 -0
  17. package/fonts/all/NimbusSans-Bold.woff +0 -0
  18. package/fonts/all/NimbusSans-Italic.woff +0 -0
  19. package/fonts/all/NimbusSans-Regular.woff +0 -0
  20. package/fonts/all/{P052-Bold.woff → Palatino-Bold.woff} +0 -0
  21. package/fonts/all/{P052-Italic.woff → Palatino-Italic.woff} +0 -0
  22. package/fonts/all/{P052-Roman.woff → Palatino-Regular.woff} +0 -0
  23. package/fonts/latin/Carlito-Bold.woff +0 -0
  24. package/fonts/latin/Carlito-Italic.woff +0 -0
  25. package/fonts/latin/Carlito-Regular.woff +0 -0
  26. package/fonts/latin/{C059-Bold.woff → Century-Bold.woff} +0 -0
  27. package/fonts/latin/{C059-Italic.woff → Century-Italic.woff} +0 -0
  28. package/fonts/latin/{C059-Roman.woff → Century-Regular.woff} +0 -0
  29. package/fonts/latin/{EBGaramond-Bold.woff → Garamond-Bold.woff} +0 -0
  30. package/fonts/latin/{EBGaramond-Italic.woff → Garamond-Italic.woff} +0 -0
  31. package/fonts/latin/{EBGaramond-Regular.woff → Garamond-Regular.woff} +0 -0
  32. package/fonts/latin/{NimbusMonoPS-Bold.woff → NimbusMono-Bold.woff} +0 -0
  33. package/fonts/latin/{NimbusMonoPS-Italic.woff → NimbusMono-Italic.woff} +0 -0
  34. package/fonts/latin/{NimbusMonoPS-Regular.woff → NimbusMono-Regular.woff} +0 -0
  35. package/fonts/latin/NimbusRoman-Bold.woff +0 -0
  36. package/fonts/latin/NimbusRoman-Italic.woff +0 -0
  37. package/fonts/latin/NimbusRoman-Regular.woff +0 -0
  38. package/fonts/latin/NimbusSans-Bold.woff +0 -0
  39. package/fonts/latin/NimbusSans-Italic.woff +0 -0
  40. package/fonts/latin/NimbusSans-Regular.woff +0 -0
  41. package/fonts/latin/{P052-Bold.woff → Palatino-Bold.woff} +0 -0
  42. package/fonts/latin/{P052-Italic.woff → Palatino-Italic.woff} +0 -0
  43. package/fonts/latin/{P052-Roman.woff → Palatino-Regular.woff} +0 -0
  44. package/js/containers/fontContainer.js +8 -8
  45. package/js/fontContainerMain.js +45 -45
  46. package/js/fontEval.js +3 -3
  47. package/js/fontSupp.js +7 -7
  48. package/js/generalWorkerMain.js +16 -16
  49. package/js/import/convertPageHocr.js +6 -6
  50. package/js/import/nodeAdapter.js +8 -8
  51. package/js/objects/imageObjects.js +9 -9
  52. package/js/objects/ocrObjects.js +3 -3
  53. package/js/utils/fontUtils.js +8 -8
  54. package/js/utils/imageUtils.js +9 -10
  55. package/js/utils/reflowPars.js +5 -5
  56. package/js/worker/compareOCRModule.js +29 -29
  57. package/js/worker/generalWorker.js +8 -8
  58. package/package.json +4 -3
  59. package/fonts/all_ttf/C059-Bold.ttf +0 -0
  60. package/fonts/all_ttf/C059-Italic.ttf +0 -0
  61. package/fonts/all_ttf/C059-Roman.ttf +0 -0
  62. package/fonts/all_ttf/Carlito-Bold.ttf +0 -0
  63. package/fonts/all_ttf/Carlito-Italic.ttf +0 -0
  64. package/fonts/all_ttf/Carlito-Regular.ttf +0 -0
  65. package/fonts/all_ttf/EBGaramond-Bold.ttf +0 -0
  66. package/fonts/all_ttf/EBGaramond-Italic.ttf +0 -0
  67. package/fonts/all_ttf/EBGaramond-Regular.ttf +0 -0
  68. package/fonts/all_ttf/NimbusMonoPS-Bold.ttf +0 -0
  69. package/fonts/all_ttf/NimbusMonoPS-Italic.ttf +0 -0
  70. package/fonts/all_ttf/NimbusMonoPS-Regular.ttf +0 -0
  71. package/fonts/all_ttf/NimbusRoman-Bold.ttf +0 -0
  72. package/fonts/all_ttf/NimbusRoman-Italic.ttf +0 -0
  73. package/fonts/all_ttf/NimbusRoman-Regular.ttf +0 -0
  74. package/fonts/all_ttf/NimbusSans-Bold.ttf +0 -0
  75. package/fonts/all_ttf/NimbusSans-Italic.ttf +0 -0
  76. package/fonts/all_ttf/NimbusSans-Regular.ttf +0 -0
  77. package/fonts/all_ttf/P052-Bold.ttf +0 -0
  78. package/fonts/all_ttf/P052-Italic.ttf +0 -0
  79. package/fonts/all_ttf/P052-Roman.ttf +0 -0
package/.eslintrc.json CHANGED
@@ -11,6 +11,9 @@
11
11
  "ecmaVersion": "latest",
12
12
  "sourceType": "module"
13
13
  },
14
+ "plugins": [
15
+ "jsdoc"
16
+ ],
14
17
  // "globals": {
15
18
  // "fabric": "writable"
16
19
  // },
@@ -20,6 +23,9 @@
20
23
  200,
21
24
  { "ignoreRegExpLiterals": true, "ignoreTemplateLiterals": true }
22
25
  ],
26
+
27
+ "jsdoc/check-alignment": 1,
28
+
23
29
  // This rule results in code being deleted.
24
30
  "no-unreachable": "off",
25
31
  // This edit allows for .js files (but not packages) to have an extension.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -365,14 +365,14 @@ export class FontCont {
365
365
  };
366
366
 
367
367
  /**
368
- * Gets a font object. Unlike accessing the font containers directly,
369
- * this method allows for special values 'Default', 'SansDefault', and 'SerifDefault' to be used.
370
- *
371
- * @param {('Default'|'SansDefault'|'SerifDefault'|string)} family - Font family name.
372
- * @param {('normal'|'italic'|'bold'|string)} [style='normal']
373
- * @param {string} [lang='eng']
374
- * @returns {FontContainerFont}
375
- */
368
+ * Gets a font object. Unlike accessing the font containers directly,
369
+ * this method allows for special values 'Default', 'SansDefault', and 'SerifDefault' to be used.
370
+ *
371
+ * @param {('Default'|'SansDefault'|'SerifDefault'|string)} family - Font family name.
372
+ * @param {('normal'|'italic'|'bold'|string)} [style='normal']
373
+ * @param {string} [lang='eng']
374
+ * @returns {FontContainerFont}
375
+ */
376
376
  static getFont = (family, style = 'normal', lang = 'eng') => {
377
377
  if (FontCont.doc?.[family]?.[style] && !FontCont.doc?.[family]?.[style]?.disable) {
378
378
  return FontCont.doc[family][style];
@@ -47,70 +47,70 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') {
47
47
  carlitoNormal = fetch(new URL('../fonts/latin/Carlito-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
48
48
  carlitoItalic = fetch(new URL('../fonts/latin/Carlito-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
49
49
  carlitoBold = fetch(new URL('../fonts/latin/Carlito-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
50
- centuryNormal = fetch(new URL('../fonts/latin/C059-Roman.woff', import.meta.url)).then((res) => res.arrayBuffer());
51
- centuryItalic = fetch(new URL('../fonts/latin/C059-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
52
- centuryBold = fetch(new URL('../fonts/latin/C059-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
53
- garamondNormal = fetch(new URL('../fonts/latin/EBGaramond-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
54
- garamondItalic = fetch(new URL('../fonts/latin/EBGaramond-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
55
- garamondBold = fetch(new URL('../fonts/latin/EBGaramond-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
56
- palatinoNormal = fetch(new URL('../fonts/latin/P052-Roman.woff', import.meta.url)).then((res) => res.arrayBuffer());
57
- palatinoItalic = fetch(new URL('../fonts/latin/P052-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
58
- palatinoBold = fetch(new URL('../fonts/latin/P052-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
50
+ centuryNormal = fetch(new URL('../fonts/latin/Century-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
51
+ centuryItalic = fetch(new URL('../fonts/latin/Century-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
52
+ centuryBold = fetch(new URL('../fonts/latin/Century-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
53
+ garamondNormal = fetch(new URL('../fonts/latin/Garamond-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
54
+ garamondItalic = fetch(new URL('../fonts/latin/Garamond-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
55
+ garamondBold = fetch(new URL('../fonts/latin/Garamond-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
56
+ palatinoNormal = fetch(new URL('../fonts/latin/Palatino-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
57
+ palatinoItalic = fetch(new URL('../fonts/latin/Palatino-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
58
+ palatinoBold = fetch(new URL('../fonts/latin/Palatino-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
59
59
  nimbusRomNo9LNormal = fetch(new URL('../fonts/latin/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
60
60
  nimbusRomNo9LItalic = fetch(new URL('../fonts/latin/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
61
61
  nimbusRomNo9LBold = fetch(new URL('../fonts/latin/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
62
62
  nimbusSansNormal = fetch(new URL('../fonts/latin/NimbusSans-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
63
63
  nimbusSansItalic = fetch(new URL('../fonts/latin/NimbusSans-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
64
64
  nimbusSansBold = fetch(new URL('../fonts/latin/NimbusSans-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
65
- nimbusMonoNormal = fetch(new URL('../fonts/latin/NimbusMonoPS-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
66
- nimbusMonoItalic = fetch(new URL('../fonts/latin/NimbusMonoPS-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
67
- nimbusMonoBold = fetch(new URL('../fonts/latin/NimbusMonoPS-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
65
+ nimbusMonoNormal = fetch(new URL('../fonts/latin/NimbusMono-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
66
+ nimbusMonoItalic = fetch(new URL('../fonts/latin/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
67
+ nimbusMonoBold = fetch(new URL('../fonts/latin/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
68
68
  } else {
69
69
  carlitoNormal = fetch(new URL('../fonts/all/Carlito-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
70
70
  carlitoItalic = fetch(new URL('../fonts/all/Carlito-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
71
71
  carlitoBold = fetch(new URL('../fonts/all/Carlito-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
72
- centuryNormal = fetch(new URL('../fonts/all/C059-Roman.woff', import.meta.url)).then((res) => res.arrayBuffer());
73
- centuryItalic = fetch(new URL('../fonts/all/C059-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
74
- centuryBold = fetch(new URL('../fonts/all/C059-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
75
- garamondNormal = fetch(new URL('../fonts/all/EBGaramond-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
76
- garamondItalic = fetch(new URL('../fonts/all/EBGaramond-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
77
- garamondBold = fetch(new URL('../fonts/all/EBGaramond-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
78
- palatinoNormal = fetch(new URL('../fonts/all/P052-Roman.woff', import.meta.url)).then((res) => res.arrayBuffer());
79
- palatinoItalic = fetch(new URL('../fonts/all/P052-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
80
- palatinoBold = fetch(new URL('../fonts/all/P052-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
72
+ centuryNormal = fetch(new URL('../fonts/all/Century-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
73
+ centuryItalic = fetch(new URL('../fonts/all/Century-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
74
+ centuryBold = fetch(new URL('../fonts/all/Century-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
75
+ garamondNormal = fetch(new URL('../fonts/all/Garamond-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
76
+ garamondItalic = fetch(new URL('../fonts/all/Garamond-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
77
+ garamondBold = fetch(new URL('../fonts/all/Garamond-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
78
+ palatinoNormal = fetch(new URL('../fonts/all/Palatino-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
79
+ palatinoItalic = fetch(new URL('../fonts/all/Palatino-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
80
+ palatinoBold = fetch(new URL('../fonts/all/Palatino-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
81
81
  nimbusRomNo9LNormal = fetch(new URL('../fonts/all/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
82
82
  nimbusRomNo9LItalic = fetch(new URL('../fonts/all/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
83
83
  nimbusRomNo9LBold = fetch(new URL('../fonts/all/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
84
84
  nimbusSansNormal = fetch(new URL('../fonts/all/NimbusSans-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
85
85
  nimbusSansItalic = fetch(new URL('../fonts/all/NimbusSans-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
86
86
  nimbusSansBold = fetch(new URL('../fonts/all/NimbusSans-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
87
- nimbusMonoNormal = fetch(new URL('../fonts/all/NimbusMonoPS-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
88
- nimbusMonoItalic = fetch(new URL('../fonts/all/NimbusMonoPS-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
89
- nimbusMonoBold = fetch(new URL('../fonts/all/NimbusMonoPS-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
87
+ nimbusMonoNormal = fetch(new URL('../fonts/all/NimbusMono-Regular.woff', import.meta.url)).then((res) => res.arrayBuffer());
88
+ nimbusMonoItalic = fetch(new URL('../fonts/all/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.arrayBuffer());
89
+ nimbusMonoBold = fetch(new URL('../fonts/all/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.arrayBuffer());
90
90
  }
91
91
  } else {
92
92
  const { readFile } = await import('fs/promises');
93
- carlitoNormal = readFile(new URL('../fonts/all_ttf/Carlito-Regular.ttf', import.meta.url)).then((res) => res.buffer);
94
- carlitoItalic = readFile(new URL('../fonts/all_ttf/Carlito-Italic.ttf', import.meta.url)).then((res) => res.buffer);
95
- carlitoBold = readFile(new URL('../fonts/all_ttf/Carlito-Bold.ttf', import.meta.url)).then((res) => res.buffer);
96
- centuryNormal = readFile(new URL('../fonts/all_ttf/C059-Roman.ttf', import.meta.url)).then((res) => res.buffer);
97
- centuryItalic = readFile(new URL('../fonts/all_ttf/C059-Italic.ttf', import.meta.url)).then((res) => res.buffer);
98
- centuryBold = readFile(new URL('../fonts/all_ttf/C059-Bold.ttf', import.meta.url)).then((res) => res.buffer);
99
- garamondNormal = readFile(new URL('../fonts/all_ttf/EBGaramond-Regular.ttf', import.meta.url)).then((res) => res.buffer);
100
- garamondItalic = readFile(new URL('../fonts/all_ttf/EBGaramond-Italic.ttf', import.meta.url)).then((res) => res.buffer);
101
- garamondBold = readFile(new URL('../fonts/all_ttf/EBGaramond-Bold.ttf', import.meta.url)).then((res) => res.buffer);
102
- palatinoNormal = readFile(new URL('../fonts/all_ttf/P052-Roman.ttf', import.meta.url)).then((res) => res.buffer);
103
- palatinoItalic = readFile(new URL('../fonts/all_ttf/P052-Italic.ttf', import.meta.url)).then((res) => res.buffer);
104
- palatinoBold = readFile(new URL('../fonts/all_ttf/P052-Bold.ttf', import.meta.url)).then((res) => res.buffer);
105
- nimbusRomNo9LNormal = readFile(new URL('../fonts/all_ttf/NimbusRoman-Regular.ttf', import.meta.url)).then((res) => res.buffer);
106
- nimbusRomNo9LItalic = readFile(new URL('../fonts/all_ttf/NimbusRoman-Italic.ttf', import.meta.url)).then((res) => res.buffer);
107
- nimbusRomNo9LBold = readFile(new URL('../fonts/all_ttf/NimbusRoman-Bold.ttf', import.meta.url)).then((res) => res.buffer);
108
- nimbusSansNormal = readFile(new URL('../fonts/all_ttf/NimbusSans-Regular.ttf', import.meta.url)).then((res) => res.buffer);
109
- nimbusSansItalic = readFile(new URL('../fonts/all_ttf/NimbusSans-Italic.ttf', import.meta.url)).then((res) => res.buffer);
110
- nimbusSansBold = readFile(new URL('../fonts/all_ttf/NimbusSans-Bold.ttf', import.meta.url)).then((res) => res.buffer);
111
- nimbusMonoNormal = readFile(new URL('../fonts/all_ttf/NimbusMonoPS-Regular.ttf', import.meta.url)).then((res) => res.buffer);
112
- nimbusMonoItalic = readFile(new URL('../fonts/all_ttf/NimbusMonoPS-Italic.ttf', import.meta.url)).then((res) => res.buffer);
113
- nimbusMonoBold = readFile(new URL('../fonts/all_ttf/NimbusMonoPS-Bold.ttf', import.meta.url)).then((res) => res.buffer);
93
+ carlitoNormal = readFile(new URL('../fonts/all/Carlito-Regular.woff', import.meta.url)).then((res) => res.buffer);
94
+ carlitoItalic = readFile(new URL('../fonts/all/Carlito-Italic.woff', import.meta.url)).then((res) => res.buffer);
95
+ carlitoBold = readFile(new URL('../fonts/all/Carlito-Bold.woff', import.meta.url)).then((res) => res.buffer);
96
+ centuryNormal = readFile(new URL('../fonts/all/Century-Regular.woff', import.meta.url)).then((res) => res.buffer);
97
+ centuryItalic = readFile(new URL('../fonts/all/Century-Italic.woff', import.meta.url)).then((res) => res.buffer);
98
+ centuryBold = readFile(new URL('../fonts/all/Century-Bold.woff', import.meta.url)).then((res) => res.buffer);
99
+ garamondNormal = readFile(new URL('../fonts/all/Garamond-Regular.woff', import.meta.url)).then((res) => res.buffer);
100
+ garamondItalic = readFile(new URL('../fonts/all/Garamond-Italic.woff', import.meta.url)).then((res) => res.buffer);
101
+ garamondBold = readFile(new URL('../fonts/all/Garamond-Bold.woff', import.meta.url)).then((res) => res.buffer);
102
+ palatinoNormal = readFile(new URL('../fonts/all/Palatino-Regular.woff', import.meta.url)).then((res) => res.buffer);
103
+ palatinoItalic = readFile(new URL('../fonts/all/Palatino-Italic.woff', import.meta.url)).then((res) => res.buffer);
104
+ palatinoBold = readFile(new URL('../fonts/all/Palatino-Bold.woff', import.meta.url)).then((res) => res.buffer);
105
+ nimbusRomNo9LNormal = readFile(new URL('../fonts/all/NimbusRoman-Regular.woff', import.meta.url)).then((res) => res.buffer);
106
+ nimbusRomNo9LItalic = readFile(new URL('../fonts/all/NimbusRoman-Italic.woff', import.meta.url)).then((res) => res.buffer);
107
+ nimbusRomNo9LBold = readFile(new URL('../fonts/all/NimbusRoman-Bold.woff', import.meta.url)).then((res) => res.buffer);
108
+ nimbusSansNormal = readFile(new URL('../fonts/all/NimbusSans-Regular.woff', import.meta.url)).then((res) => res.buffer);
109
+ nimbusSansItalic = readFile(new URL('../fonts/all/NimbusSans-Italic.woff', import.meta.url)).then((res) => res.buffer);
110
+ nimbusSansBold = readFile(new URL('../fonts/all/NimbusSans-Bold.woff', import.meta.url)).then((res) => res.buffer);
111
+ nimbusMonoNormal = readFile(new URL('../fonts/all/NimbusMono-Regular.woff', import.meta.url)).then((res) => res.buffer);
112
+ nimbusMonoItalic = readFile(new URL('../fonts/all/NimbusMono-Italic.woff', import.meta.url)).then((res) => res.buffer);
113
+ nimbusMonoBold = readFile(new URL('../fonts/all/NimbusMono-Bold.woff', import.meta.url)).then((res) => res.buffer);
114
114
  }
115
115
 
116
116
  const srcObj = {
package/js/fontEval.js CHANGED
@@ -40,9 +40,9 @@ export async function evalPagesFont(font, pageArr, opt, n = 500) {
40
40
  }
41
41
 
42
42
  /**
43
- * @param {Array<OcrPage>} pageArr
44
- * @param {boolean} opt - Whether to use optimized fonts.
45
- */
43
+ * @param {Array<OcrPage>} pageArr
44
+ * @param {boolean} opt - Whether to use optimized fonts.
45
+ */
46
46
  export async function evaluateFonts(pageArr, opt) {
47
47
  const evalCarlito = !!(opt ? FontCont.opt?.Carlito : FontCont.raw?.Carlito);
48
48
  const evalNimbusSans = !!(opt ? FontCont.opt?.NimbusSans : FontCont.raw?.NimbusSans);
package/js/fontSupp.js CHANGED
@@ -62,13 +62,13 @@ const calcSuppFontInfoForWords = async (words) => {
62
62
  };
63
63
 
64
64
  /**
65
- *
66
- * @param {Array<OcrPage>} ocrArr
67
- * @returns
68
- * This function runs recognition on certain fonts when we need more information about them.
69
- * Fonts are included when either (1) we need to know if they are sans or serif or (2) if the text is extracted from a PDF,
70
- * and we need to determine how large to render the text.
71
- */
65
+ *
66
+ * @param {Array<OcrPage>} ocrArr
67
+ * @returns
68
+ * This function runs recognition on certain fonts when we need more information about them.
69
+ * Fonts are included when either (1) we need to know if they are sans or serif or (2) if the text is extracted from a PDF,
70
+ * and we need to determine how large to render the text.
71
+ */
72
72
  export const calcSuppFontInfo = async (ocrArr) => {
73
73
  if (!ocrArr) return;
74
74
  await gs.initTesseract({ anyOk: true, langs: ['eng'] });
@@ -57,10 +57,10 @@ export async function initGeneralWorker() {
57
57
  }
58
58
 
59
59
  /**
60
- * Wraps a function to be called via worker messages.
61
- * @param {string} func The function name to call.
62
- * @returns {Function} A function that returns a promise resolving to the worker's response.
63
- */
60
+ * Wraps a function to be called via worker messages.
61
+ * @param {string} func The function name to call.
62
+ * @returns {Function} A function that returns a promise resolving to the worker's response.
63
+ */
64
64
  function wrap(func) {
65
65
  return function (...args) {
66
66
  return new Promise((innerResolve, innerReject) => {
@@ -72,10 +72,10 @@ export async function initGeneralWorker() {
72
72
  }
73
73
 
74
74
  /**
75
- * Similar to wrap, but handles two promises.
76
- * @param {string} func The function name to call.
77
- * @returns {Array} Returns two promises in an array.
78
- */
75
+ * Similar to wrap, but handles two promises.
76
+ * @param {string} func The function name to call.
77
+ * @returns {Array} Returns two promises in an array.
78
+ */
79
79
  function wrap2(func) {
80
80
  return function (...args) {
81
81
  const id = promiseId++;
@@ -187,14 +187,14 @@ export class gs {
187
187
  static optimizeFont = async (args) => (await gs.schedulerInner.addJob('optimizeFont', args));
188
188
 
189
189
  /**
190
- * @template {Partial<Tesseract.OutputFormats>} TO
191
- * @param {Object} args
192
- * @param {Parameters<Tesseract.Worker['recognize']>[0]} args.image
193
- * @param {Parameters<Tesseract.Worker['recognize']>[1]} args.options
194
- * @param {TO} args.output
195
- * @returns {Promise<Tesseract.Page<TO>>}
196
- * Exported for type inference purposes, should not be imported anywhere.
197
- */
190
+ * @template {Partial<Tesseract.OutputFormats>} TO
191
+ * @param {Object} args
192
+ * @param {Parameters<Tesseract.Worker['recognize']>[0]} args.image
193
+ * @param {Parameters<Tesseract.Worker['recognize']>[1]} args.options
194
+ * @param {TO} args.output
195
+ * @returns {Promise<Tesseract.Page<TO>>}
196
+ * Exported for type inference purposes, should not be imported anywhere.
197
+ */
198
198
  static recognize = async (args) => (await gs.schedulerInner.addJob('recognize', args));
199
199
 
200
200
  /**
@@ -87,8 +87,8 @@ export async function convertPageHocr({
87
87
  }
88
88
 
89
89
  /**
90
- * @param {string} match
91
- */
90
+ * @param {string} match
91
+ */
92
92
  function convertLine(match) {
93
93
  const titleStrLine = match.match(/title=['"]([^'"]+)/)?.[1];
94
94
  if (!titleStrLine) return '';
@@ -258,8 +258,8 @@ export async function convertPageHocr({
258
258
  }
259
259
 
260
260
  /**
261
- * @param {string} match
262
- */
261
+ * @param {string} match
262
+ */
263
263
  function convertWord(match) {
264
264
  const wordID = match.match(/id=['"]([^'"]*)['"]/i)?.[1] || `word_${n + 1}_${pageObj.lines.length + 1}_${lineObj.words.length + 1}`;
265
265
 
@@ -357,8 +357,8 @@ export async function convertPageHocr({
357
357
  }
358
358
 
359
359
  /**
360
- * @param {string} match
361
- */
360
+ * @param {string} match
361
+ */
362
362
  const convertPar = (match) => {
363
363
  const parLang = match.match(/^.+?lang=['"]([^'"]*)['"]/i)?.[1];
364
364
  if (parLang) currentLang = parLang;
@@ -9,11 +9,11 @@ import path from 'path';
9
9
  */
10
10
  export class FileNode {
11
11
  /**
12
- * Creates an instance of the File class.
13
- * @param {string} filePath - The path to the file.
14
- * @param {string} name - The name of the file.
15
- * @param {Buffer} fileData - The file's data.
16
- */
12
+ * Creates an instance of the File class.
13
+ * @param {string} filePath - The path to the file.
14
+ * @param {string} name - The name of the file.
15
+ * @param {Buffer} fileData - The file's data.
16
+ */
17
17
  constructor(filePath, name, fileData) {
18
18
  this.filePath = filePath;
19
19
  this.name = name;
@@ -21,9 +21,9 @@ export class FileNode {
21
21
  }
22
22
 
23
23
  /**
24
- * Returns an ArrayBuffer with the file's contents.
25
- * @returns {Promise<ArrayBuffer>} A promise that resolves with the file's contents as an ArrayBuffer.
26
- */
24
+ * Returns an ArrayBuffer with the file's contents.
25
+ * @returns {Promise<ArrayBuffer>} A promise that resolves with the file's contents as an ArrayBuffer.
26
+ */
27
27
  async arrayBuffer() {
28
28
  return this.fileData.buffer.slice(this.fileData.byteOffset, this.fileData.byteOffset + this.fileData.byteLength);
29
29
  }
@@ -2,15 +2,15 @@ import { getJpegDimensions, getPngDimensions } from '../utils/imageUtils.js';
2
2
 
3
3
  export class ImageWrapper {
4
4
  /**
5
- * @param {number} n - Page number
6
- * @param {string} imageStr - Base-64 encoded image string. Should start with "data:image/png" or "data:image/jpeg".
7
- * @param {string} colorMode - Color mode ("color", "gray", or "binary").
8
- * @param {boolean} rotated - Whether image has been rotated.
9
- * @param {boolean} upscaled - Whether image has been upscaled.
10
- *
11
- * All properties of this object must be serializable, as ImageWrapper objects are sent between threads.
12
- * This means that no promises can be used.
13
- */
5
+ * @param {number} n - Page number
6
+ * @param {string} imageStr - Base-64 encoded image string. Should start with "data:image/png" or "data:image/jpeg".
7
+ * @param {string} colorMode - Color mode ("color", "gray", or "binary").
8
+ * @param {boolean} rotated - Whether image has been rotated.
9
+ * @param {boolean} upscaled - Whether image has been upscaled.
10
+ *
11
+ * All properties of this object must be serializable, as ImageWrapper objects are sent between threads.
12
+ * This means that no promises can be used.
13
+ */
14
14
  constructor(n, imageStr, colorMode, rotated = false, upscaled = false) {
15
15
  this.n = n;
16
16
  this.src = imageStr;
@@ -134,7 +134,7 @@ export function OcrWord(line, text, bbox, id) {
134
134
  /**
135
135
  * @type {boolean} - If `true`, left/right coordinates represent the left/rightmost pixel.
136
136
  * If `false`, left/right coordinates represent the start/end of the font bounding box.
137
- */
137
+ */
138
138
  this.visualCoords = true;
139
139
  }
140
140
 
@@ -331,7 +331,7 @@ export const getLineText = (line) => {
331
331
 
332
332
  /**
333
333
  * @param {OcrPar} par
334
- */
334
+ */
335
335
  export const getParText = (par) => {
336
336
  let text = '';
337
337
  for (let i = 0; i < par.lines.length; i++) {
@@ -584,7 +584,7 @@ function rotateLine(line, angle, dims = null, useCharLevel = false) {
584
584
  /**
585
585
  * Clones page object.
586
586
  * @param {OcrPage} page
587
- */
587
+ */
588
588
  function clonePage(page) {
589
589
  const pageNew = new OcrPage(page.n, { ...page.dims });
590
590
  for (const line of page.lines) {
@@ -99,7 +99,7 @@ function calcWordFontSizePrecise(wordArr, fontOpentype, nonLatin = false) {
99
99
  * Adds ligatures to text of `OcrWord` object. Returns an array of letters.
100
100
  * @param {OcrWord} word
101
101
  * @returns {Array<string>}
102
- */
102
+ */
103
103
  export function addLigatures(word) {
104
104
  if (word.smallCaps || !opt.ligatures) return word.text.split('');
105
105
  const fontI = FontCont.getWordFont(word);
@@ -181,9 +181,9 @@ function calcWordCharMetrics(wordText, fontOpentype) {
181
181
  if (opt.kerning) {
182
182
  const glyphJ = fontOpentype.charToGlyph(charJ);
183
183
  const kerning = fontOpentype.getKerningValue(glyphI, glyphJ);
184
- kerningArr.push(kerning);
184
+ kerningArr.push(kerning);
185
185
  } else {
186
- kerningArr.push(0);
186
+ kerningArr.push(0);
187
187
  }
188
188
  }
189
189
  }
@@ -322,11 +322,11 @@ export const calcWordFontSize = (word) => {
322
322
  // Therefore, the appropriate font size must be calculated using (1) the character stats from the input image and
323
323
  // (2) stats regarding the font being used.
324
324
  /**
325
- * Get or calculate font size for line.
326
- * This value will either be (1) a manually set value or (2) a value calculated using line metrics.
327
- * @param {OcrLine} line
328
- * @returns {number}
329
- */
325
+ * Get or calculate font size for line.
326
+ * This value will either be (1) a manually set value or (2) a value calculated using line metrics.
327
+ * @param {OcrLine} line
328
+ * @returns {number}
329
+ */
330
330
  export const calcLineFontSize = (line) => {
331
331
  if (line._size) return line._size;
332
332
 
@@ -1,6 +1,5 @@
1
1
  /* eslint-disable no-bitwise */
2
2
 
3
-
4
3
  /**
5
4
  * Loads an image from a given URL and sets it to a specified HTML element.
6
5
  *
@@ -53,15 +52,15 @@ export function base64ToBytes(base64) {
53
52
  }
54
53
 
55
54
  /**
56
- * Extracts the width and height from the IHDR chunk of a PNG image encoded in base64.
57
- *
58
- * This function decodes the base64 to bytes and parses the IHDR chunk to extract the image dimensions.
59
- * It assumes the base64 string is a valid PNG image and directly starts parsing the binary data.
60
- * Note: This is a basic implementation without extensive error handling or validation.
61
- *
62
- * @param {string} base64 - The base64 encoded string of the PNG image.
63
- * @returns {dims} An object containing the width and height of the image.
64
- */
55
+ * Extracts the width and height from the IHDR chunk of a PNG image encoded in base64.
56
+ *
57
+ * This function decodes the base64 to bytes and parses the IHDR chunk to extract the image dimensions.
58
+ * It assumes the base64 string is a valid PNG image and directly starts parsing the binary data.
59
+ * Note: This is a basic implementation without extensive error handling or validation.
60
+ *
61
+ * @param {string} base64 - The base64 encoded string of the PNG image.
62
+ * @returns {dims} An object containing the width and height of the image.
63
+ */
65
64
  export function getPngDimensions(base64) {
66
65
  // The number 96 is chosen to line up leanly with byte boundaries (97 would result in an error)
67
66
  // but is otherwise arbitrary, while being large enough to contain the IHDR chunk.
@@ -51,11 +51,11 @@ export function assignParagraphs(page, angle) {
51
51
  }
52
52
 
53
53
  /**
54
- * Calculates expected line start and end positions based on surrounding lines.
55
- * If this line varies from those values, it may be the first or last line of a paragraph.
56
- * @param {number} lineIndex - Index of the line to calculate the expected values for.
57
- * @returns
58
- */
54
+ * Calculates expected line start and end positions based on surrounding lines.
55
+ * If this line varies from those values, it may be the first or last line of a paragraph.
56
+ * @param {number} lineIndex - Index of the line to calculate the expected values for.
57
+ * @returns
58
+ */
59
59
  const calcExpected = (lineIndex) => {
60
60
  // Ideally, we compare the current line to the next 5 lines.
61
61
  // When there are fewer than 5 lines after the current line, we add previous lines to the window.
@@ -74,24 +74,24 @@ export async function drawWordActual(words, imageBinaryBit, angle) {
74
74
  }
75
75
 
76
76
  /**
77
- * Function that draws a word on a canvas.
78
- * This code was factored out to allow for drawing multiple times while only calculating metrics once.
79
- * Therefore, only the drawing code should be in this function; the metrics should be calculated elsewhere
80
- * and passed to this function, rather than calcualting from an `OcrWord` object.
81
- *
82
- * @param {Object} params
83
- * @param {CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D} params.ctx
84
- * @param {Array<string>} params.charArr
85
- * @param {number} params.left
86
- * @param {number} params.bottom
87
- * @param {Array<number>} params.advanceArr - Array of pixels to advance for each character.
88
- * Unlike the "advance" property of a glyph, this is the actual distance to advance on the canvas,
89
- * and should include kerning and character spacing.
90
- * @param {FontContainerFont} params.font
91
- * @param {number} params.size
92
- * @param {boolean} params.smallCaps
93
- * @param {string} [params.fillStyle='black']
94
- */
77
+ * Function that draws a word on a canvas.
78
+ * This code was factored out to allow for drawing multiple times while only calculating metrics once.
79
+ * Therefore, only the drawing code should be in this function; the metrics should be calculated elsewhere
80
+ * and passed to this function, rather than calcualting from an `OcrWord` object.
81
+ *
82
+ * @param {Object} params
83
+ * @param {CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D} params.ctx
84
+ * @param {Array<string>} params.charArr
85
+ * @param {number} params.left
86
+ * @param {number} params.bottom
87
+ * @param {Array<number>} params.advanceArr - Array of pixels to advance for each character.
88
+ * Unlike the "advance" property of a glyph, this is the actual distance to advance on the canvas,
89
+ * and should include kerning and character spacing.
90
+ * @param {FontContainerFont} params.font
91
+ * @param {number} params.size
92
+ * @param {boolean} params.smallCaps
93
+ * @param {string} [params.fillStyle='black']
94
+ */
95
95
  const printWordOnCanvas = async ({
96
96
  ctx, charArr, left, bottom, advanceArr, font, size, smallCaps, fillStyle = 'black',
97
97
  }) => {
@@ -118,15 +118,15 @@ const printWordOnCanvas = async ({
118
118
  };
119
119
 
120
120
  /**
121
- * Print word on canvas.
122
- *
123
- * @param {CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D} ctx
124
- * @param {OcrWord} word
125
- * @param {number} offsetX
126
- * @param {number} cropY
127
- * @param {?CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D} ctxView
128
- * @param {boolean} [imageRotated=false] -
129
- */
121
+ * Print word on canvas.
122
+ *
123
+ * @param {CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D} ctx
124
+ * @param {OcrWord} word
125
+ * @param {number} offsetX
126
+ * @param {number} cropY
127
+ * @param {?CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D} ctxView
128
+ * @param {boolean} [imageRotated=false] -
129
+ */
130
130
  export const drawWordRender = async (ctx, word, offsetX = 0, cropY = 0, ctxView = null, imageRotated = false) => {
131
131
  if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
132
132
  if (!ctx) throw new Error('Canvases must be defined before running this function.');
@@ -1304,8 +1304,8 @@ export async function evalPageFont({
1304
1304
  }
1305
1305
 
1306
1306
  /**
1307
- * @param {OcrLine} ocrLineJ
1308
- */
1307
+ * @param {OcrLine} ocrLineJ
1308
+ */
1309
1309
  const transformLineFont = (ocrLineJ) => {
1310
1310
  if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.');
1311
1311
 
@@ -329,14 +329,14 @@ export const recognizeAndConvert2 = async ({
329
329
  };
330
330
 
331
331
  /**
332
- * @template {Partial<Tesseract.OutputFormats>} TO
333
- * @param {Object} args
334
- * @param {Parameters<Tesseract.Worker['recognize']>[0]} args.image
335
- * @param {Parameters<Tesseract.Worker['recognize']>[1]} args.options
336
- * @param {TO} args.output
337
- * @returns {Promise<Tesseract.Page<TO>>}
338
- * Exported for type inference purposes, should not be imported anywhere.
339
- */
332
+ * @template {Partial<Tesseract.OutputFormats>} TO
333
+ * @param {Object} args
334
+ * @param {Parameters<Tesseract.Worker['recognize']>[0]} args.image
335
+ * @param {Parameters<Tesseract.Worker['recognize']>[1]} args.options
336
+ * @param {TO} args.output
337
+ * @returns {Promise<Tesseract.Page<TO>>}
338
+ * Exported for type inference purposes, should not be imported anywhere.
339
+ */
340
340
  export const recognize = async ({ image, options, output }) => {
341
341
  if (!worker) throw new Error('Worker not initialized');
342
342
  const res1 = await worker.recognize(image, options, output);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scribe.js-ocr",
3
- "version": "0.7.0",
3
+ "version": "0.7.1",
4
4
  "description": "High-quality OCR and text extraction for images and PDFs.",
5
5
  "main": "scribe.js",
6
6
  "directories": {
@@ -16,6 +16,7 @@
16
16
  "eslint": "^8.56.0",
17
17
  "eslint-config-airbnb-base": "^15.0.0",
18
18
  "eslint-plugin-import": "^2.29.1",
19
+ "eslint-plugin-jsdoc": "^50.6.2",
19
20
  "express": "^4.18.2",
20
21
  "karma": "^6.4.4",
21
22
  "karma-chrome-launcher": "^3.2.0",
@@ -48,8 +49,8 @@
48
49
  },
49
50
  "homepage": "https://github.com/scribeocr/scribe.js#readme",
50
51
  "dependencies": {
52
+ "@scribe.js/tesseract.js": "^6.0.2",
51
53
  "canvaskit-wasm": "^0.39.1",
52
- "commander": "^11.1.0",
53
- "@scribe.js/tesseract.js": "^6.0.2"
54
+ "commander": "^11.1.0"
54
55
  }
55
56
  }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file