npm - monocr - Versions diffs - 0.1.4 → 0.1.5 - Mend

monocr 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
-# monocr
+# MonOCR (JavaScript SDK)
-Mon language (mnw) OCR for Node.js.
+The professional JavaScript SDK for Mon language OCR, powered by ONNX Runtime. Designed for high-performance server-side and desktop Node.js applications.
 ## Installation
@@ -8,70 +8,70 @@ Mon language (mnw) OCR for Node.js.
 npm install monocr
 ```
-## Quick Start
+## Features
-```javascript
-const { read_image } = require("monocr");
+- **Unified API**: Synchronized with Python and Go equivalents.
+- **Auto-Model Management**: Leverages [MonDevHub/monocr](https://huggingface.co/janakhpon/monocr) for automated model delivery.
+- **PDF Support**: Intelligent document segmentation and multi-page processing.
+- **Zero Dependencies**: Core OCR logic is lean and optimized.
-// Automatically downloads model on first run
-const text = await read_image("image.jpg");
-console.log(text);
-```
+## API Reference
+### `read_image(imagePath, [options])`
+Recognizes text from a single image.
-## API
+- `imagePath`: String path to the image file.
+- `options`: Optional overrides for model/charset paths.
+- **Returns**: `Promise<string>`
-### `read_image(imagePath, [modelPath], [charsetPath])`
+### `read_images(imagePaths, [options])`
-Recognizes text from an image file.
+Recognizes text from multiple images.
-- `imagePath` (string): Path to image file.
-- `modelPath` (string, optional): Path to ONNX model. Defaults to `~/.monocr/models/monocr.onnx`.
-- `charsetPath` (string, optional): Path to charset file. Defaults to bundled charset.
+- **Returns**: `Promise<string[]>`
-Returns: `Promise<string>`
+### `read_pdf(pdfPath, [options])`
-### `read_pdf(pdfPath, [modelPath], [charsetPath])`
+Converts and recognizes text from all pages of a PDF.
-Recognizes text from a PDF file.
+- **Returns**: `Promise<string[]>` (Array of strings per page)
-- `pdfPath` (string): Path to PDF file.
-- `modelPath` (string, optional): As above.
-- `charsetPath` (string, optional): As above.
+### `read_image_with_accuracy(imagePath, groundTruth, [options])`
-Returns: `Promise<string[]>` (Array of text per page)
+Performs OCR and calculates Levenshtein accuracy.
-### `read_image_with_accuracy(imagePath, groundTruth, [modelPath], [charsetPath])`
+- **Returns**: `Promise<{text: string, accuracy: number}>`
-Recognizes text and calculates accuracy against ground truth.
+## Usage Example
-- `imagePath` (string): Path to image file.
-- `groundTruth` (string): Expected text.
+```javascript
+const { read_image } = require("monocr");
-Returns: `Promise<{text: string, accuracy: number}>`
+async function main() {
+  const text = await read_image("scanned_text.png");
+  console.log(text);
+}
-## CLI Usage
+main();
+```
-The package includes a `monocr` command-line tool.
+## CLI Interface
 ```bash
-# Download model to cache (optional, happens automatically on first use)
-monocr download
+# Global installation for CLI usage
+npm install -g monocr
-# Recognize single image
+# Process an image
 monocr image input.jpg
-# Recognize PDF
+# Process a PDF
 monocr pdf document.pdf
-# Batch process directory
-monocr batch ./images -o results.json
 ```
-## Model Files
-The ONNX model (`monocr.onnx`) is downloaded automatically to `~/.monocr/models/` on first use. The charset file is bundled with the package.
+## Maintenance
-To use a custom model, provide the `modelPath` argument to the API functions or CLI.
+Maintained by [MonDevHub](https://github.com/MonDevHub).
 ## License

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "monocr",
-  "version": "0.1.4",
+  "version": "0.1.5",
   "description": "Cross-platform Mon (mnw) language OCR using ONNX Runtime. Supports Node.js.",
   "main": "src/index.js",
   "bin": {
@@ -8,7 +8,7 @@
   },
   "repository": {
     "type": "git",
-    "url": "git+https://github.com/janakh/monocr-onnx.git",
+    "url": "git+https://github.com/MonDevHub/monocr-onnx.git",
     "directory": "js"
   },
   "keywords": [
@@ -21,9 +21,9 @@
   "author": "Janakh",
   "license": "MIT",
   "bugs": {
-    "url": "https://github.com/janakh/monocr-onnx/issues"
+    "url": "https://github.com/MonDevHub/monocr-onnx/issues"
   },
-  "homepage": "https://github.com/janakh/monocr-onnx/tree/main/js#readme",
+  "homepage": "https://github.com/MonDevHub/monocr-onnx/tree/main/js#readme",
   "files": [
     "src/",
     "bin/",
@@ -32,11 +32,11 @@
   ],
   "dependencies": {
     "commander": "^11.1.0",
-    "onnxruntime-node": "^1.15.0",
-    "pdf2pic": "^3.2.0",
+    "onnxruntime-node": "^1.24.1",
+    "pdf-img-convert": "^2.0.0",
     "sharp": "^0.32.0"
   },
   "publishConfig": {
     "access": "public"
   }
-}
+}

package/src/index.js CHANGED Viewed

@@ -5,15 +5,17 @@ module.exports = {
     MonOCR,
     calculateAccuracy,
     read_image,
+    read_images,
     read_pdf,
+    read_pdfs,
     read_image_with_accuracy
 };
 /**
  * Read text from an image file
  * @param {string} imagePath - Path to image file
- * @param {string} modelPath - Path to ONNX model (optional, auto-downloads if not provided)
- * @param {string} charsetPath - Path to charset file (optional, auto-downloads if not provided)
+ * @param {string} modelPath - Path to ONNX model (optional)
+ * @param {string} charsetPath - Path to charset file (optional)
  * @returns {Promise<string>} Recognized text
  */
 async function read_image(imagePath, modelPath = null, charsetPath = null) {
@@ -23,6 +25,25 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
     return results.map(r => r.text).join('\n');
 }
+/**
+ * Read text from multiple image files
+ * @param {string[]} imagePaths - Array of paths to image files
+ * @param {string} modelPath - Path to ONNX model (optional)
+ * @param {string} charsetPath - Path to charset file (optional)
+ * @returns {Promise<string[]>} Array of recognized text
+ */
+async function read_images(imagePaths, modelPath = null, charsetPath = null) {
+    const ocr = new MonOCR(modelPath, charsetPath);
+    await ocr.init();
+    const results = [];
+    for (const path of imagePaths) {
+        const pageResults = await ocr.predictPage(path);
+        results.push(pageResults.map(r => r.text).join('\n'));
+    }
+    return results;
+}
 /**
  * Read text from a PDF file
  * @param {string} pdfPath - Path to PDF file
@@ -31,45 +52,60 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
  * @returns {Promise<string[]>} Array of text per page
  */
 async function read_pdf(pdfPath, modelPath = null, charsetPath = null) {
-    const { fromPath } = require('pdf2pic');
+    const pdfImgConvert = require('pdf-img-convert');
     const path = require('path');
     const fs = require('fs');
-    const os = require('os');
+    // Initialize OCR
     const ocr = new MonOCR(modelPath, charsetPath);
     await ocr.init();
-    const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'monocr-'));
-    const converter = fromPath(pdfPath, {
-        density: 300,
-        format: 'png',
-        width: 2480,
-        height: 3508,
-        saveFilename: 'page',
-        savePath: tempDir
-    });
-    const pages = [];
-    let pageNum = 1;
-    while (true) {
-        try {
-            const result = await converter(pageNum, { responseType: 'image' });
-            const imagePath = result.path;
-            const results = await ocr.predictPage(imagePath);
+    try {
+        // Convert PDF to image buffers (returns Uint8Array[])
+        // pdf-img-convert handles parsing internally using pdf.js
+        const imageBuffers = await pdfImgConvert.convert(pdfPath, {
+            width: 2480,  // High resolution for OCR
+            height: 3508,
+            page_numbers: [] // All pages
+        });
+        if (!imageBuffers || imageBuffers.length === 0) {
+            throw new Error("Failed to convert PDF: No images generated");
+        }
+        const pages = [];
+        for (let i = 0; i < imageBuffers.length; i++) {
+            // pdf-img-convert returns Uint8Array (buffer-like)
+            // MonOCR's predictPage expects a file path or sharp-compatible input
+            // sharp can take a Buffer.
+            const buffer = Buffer.from(imageBuffers[i]);
+            const results = await ocr.predictPage(buffer);
             const pageText = results.map(r => r.text).join('\n');
             pages.push(pageText);
-            pageNum++;
-        } catch (err) {
-            break;
         }
+        return pages;
+    } catch (err) {
+        throw new Error(`Failed to process PDF: ${err.message}`);
     }
-    // Cleanup temp directory
-    fs.rmSync(tempDir, { recursive: true, force: true });
-    return pages;
+}
+/**
+ * Read text from multiple PDF files
+ * @param {string[]} pdfPaths - Array of paths to PDF files
+ * @param {string} modelPath - Path to ONNX model (optional)
+ * @param {string} charsetPath - Path to charset file (optional)
+ * @returns {Promise<string[][]>} Array of arrays of text per page
+ */
+async function read_pdfs(pdfPaths, modelPath = null, charsetPath = null) {
+    const results = [];
+    for (const path of pdfPaths) {
+        const pages = await read_pdf(path, modelPath, charsetPath);
+        results.push(pages);
+    }
+    return results;
 }
 /**

package/src/utils.js CHANGED Viewed

@@ -6,18 +6,38 @@
  */
 function calculateAccuracy(predicted, groundTruth) {
     if (!groundTruth) return 0;
+    if (!predicted) return 0;
-    const len = Math.max(predicted.length, groundTruth.length);
-    if (len === 0) return 100;
+    const s1 = predicted;
+    const s2 = groundTruth;
-    let errors = 0;
-    for (let i = 0; i < len; i++) {
-        if (predicted[i] !== groundTruth[i]) {
-            errors++;
+    const track = Array(s2.length + 1).fill(null).map(() =>
+        Array(s1.length + 1).fill(null));
+    for (let i = 0; i <= s1.length; i += 1) {
+        track[0][i] = i;
+    }
+    for (let j = 0; j <= s2.length; j += 1) {
+        track[j][0] = j;
+    }
+    for (let j = 1; j <= s2.length; j += 1) {
+        for (let i = 1; i <= s1.length; i += 1) {
+            const indicator = s1[i - 1] === s2[j - 1] ? 0 : 1;
+            track[j][i] = Math.min(
+                track[j][i - 1] + 1, // deletion
+                track[j - 1][i] + 1, // insertion
+                track[j - 1][i - 1] + indicator, // substitution
+            );
         }
     }
-    return ((1 - errors / len) * 100).toFixed(2);
+    const distance = track[s2.length][s1.length];
+    const maxLen = Math.max(s1.length, s2.length);
+    if (maxLen === 0) return 100;
+    return ((1 - distance / maxLen) * 100).toFixed(2);
 }
 module.exports = {