scribe.js-ocr 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/js/import/import.js +4 -1
- package/package.json +1 -1
package/js/import/import.js
CHANGED
|
@@ -191,6 +191,8 @@ export function sortInputFiles(files) {
|
|
|
191
191
|
* @param {Object} [options]
|
|
192
192
|
* @param {boolean} [options.extractPDFTextNative=false] - Extract text from text-native PDF documents.
|
|
193
193
|
* @param {boolean} [options.extractPDFTextOCR=false] - Extract text from image-native PDF documents with existing OCR text layers.
|
|
194
|
+
* @param {boolean} [options.extractPDFTextImage=false] - Extract text from image-native PDF documents with no existing OCR layer.
|
|
195
|
+
* This option exists because documents may still contain some text even if they are determined to be image-native (for example, scanned documents with a text-native header).
|
|
194
196
|
* @returns
|
|
195
197
|
*/
|
|
196
198
|
export async function importFiles(files, options = {}) {
|
|
@@ -199,6 +201,7 @@ export async function importFiles(files, options = {}) {
|
|
|
199
201
|
|
|
200
202
|
const extractPDFTextNative = options?.extractPDFTextNative ?? false;
|
|
201
203
|
const extractPDFTextOCR = options?.extractPDFTextOCR ?? false;
|
|
204
|
+
const extractPDFTextImage = options?.extractPDFTextImage ?? false;
|
|
202
205
|
|
|
203
206
|
/** @type {Array<File|FileNode|ArrayBuffer>} */
|
|
204
207
|
let pdfFiles = [];
|
|
@@ -440,7 +443,7 @@ export async function importFiles(files, options = {}) {
|
|
|
440
443
|
}
|
|
441
444
|
});
|
|
442
445
|
} else if (extractPDFTextNative || extractPDFTextOCR) {
|
|
443
|
-
await extractInternalPDFText({ setActive: true, extractPDFTextNative, extractPDFTextOCR });
|
|
446
|
+
await extractInternalPDFText({ setActive: true, extractPDFTextNative, extractPDFTextOCR, extractPDFTextImage });
|
|
444
447
|
}
|
|
445
448
|
}
|
|
446
449
|
|