npm - scribe.js-ocr - Versions diffs - 0.2.2 → 0.2.3 - Mend

scribe.js-ocr 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/js/import/import.js +4 -1
package/package.json +1 -1

package/js/import/import.js CHANGED Viewed

@@ -191,6 +191,8 @@ export function sortInputFiles(files) {
  * @param {Object} [options]
  * @param {boolean} [options.extractPDFTextNative=false] - Extract text from text-native PDF documents.
  * @param {boolean} [options.extractPDFTextOCR=false] - Extract text from image-native PDF documents with existing OCR text layers.
+ * @param {boolean} [options.extractPDFTextImage=false] - Extract text from image-native PDF documents with no existing OCR layer.
+ *   This option exists because documents may still contain some text even if they are determined to be image-native (for example, scanned documents with a text-native header).
  * @returns
  */
 export async function importFiles(files, options = {}) {
@@ -199,6 +201,7 @@ export async function importFiles(files, options = {}) {
   const extractPDFTextNative = options?.extractPDFTextNative ?? false;
   const extractPDFTextOCR = options?.extractPDFTextOCR ?? false;
+  const extractPDFTextImage = options?.extractPDFTextImage ?? false;
   /** @type {Array<File|FileNode|ArrayBuffer>} */
   let pdfFiles = [];
@@ -440,7 +443,7 @@ export async function importFiles(files, options = {}) {
       }
     });
   } else if (extractPDFTextNative || extractPDFTextOCR) {
-    await extractInternalPDFText({ setActive: true, extractPDFTextNative, extractPDFTextOCR });
+    await extractInternalPDFText({ setActive: true, extractPDFTextNative, extractPDFTextOCR, extractPDFTextImage });
   }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "scribe.js-ocr",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "description": "High-quality OCR and text extraction for images and PDFs.",
   "main": "scribe.js",
   "directories": {