npm - monocr - Versions diffs - 0.1.3 → 0.1.5 - Mend

monocr 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
-# monocr (JavaScript/Node.js)
+# MonOCR (JavaScript SDK)
-Mon language OCR using ONNX Runtime for Node.js applications.
+The professional JavaScript SDK for Mon language OCR, powered by ONNX Runtime. Designed for high-performance server-side and desktop Node.js applications.
 ## Installation
@@ -8,112 +8,70 @@ Mon language OCR using ONNX Runtime for Node.js applications.
 npm install monocr
 ```
-## Quick Start
+## Features
-```javascript
-const { read_image } = require("monocr");
-// Auto-downloads model on first run
-const text = await read_image("path/to/image.jpg");
-console.log(text);
-```
-## API
-### read_image(imagePath, [modelPath], [charsetPath])
+- **Unified API**: Synchronized with Python and Go equivalents.
+- **Auto-Model Management**: Leverages [MonDevHub/monocr](https://huggingface.co/janakhpon/monocr) for automated model delivery.
+- **PDF Support**: Intelligent document segmentation and multi-page processing.
+- **Zero Dependencies**: Core OCR logic is lean and optimized.
-Recognize text from an image file.
+## API Reference
-**Parameters:**
+### `read_image(imagePath, [options])`
-- `imagePath` - Path to image file (jpg, png)
-- `modelPath` - (Optional) Path to ONNX model. Defaults to auto-downloaded model.
-- `charsetPath` - (Optional) Path to charset file. Defaults to auto-downloaded file.
+Recognizes text from a single image.
-**Returns:** `Promise<string>` - Recognized text
+- `imagePath`: String path to the image file.
+- `options`: Optional overrides for model/charset paths.
+- **Returns**: `Promise<string>`
-### read_pdf(pdfPath, modelPath, charsetPath)
+### `read_images(imagePaths, [options])`
-Recognize text from a PDF file.
+Recognizes text from multiple images.
-**Parameters:**
+- **Returns**: `Promise<string[]>`
-- `pdfPath` - Path to PDF file
-- `modelPath` - Path to ONNX model (optional)
-- `charsetPath` - Path to charset file (optional)
+### `read_pdf(pdfPath, [options])`
-**Returns:** `Promise<string[]>` - Array of text per page
+Converts and recognizes text from all pages of a PDF.
-### read_image_with_accuracy(imagePath, groundTruth, modelPath, charsetPath)
+- **Returns**: `Promise<string[]>` (Array of strings per page)
-Recognize text with accuracy measurement.
+### `read_image_with_accuracy(imagePath, groundTruth, [options])`
-**Parameters:**
+Performs OCR and calculates Levenshtein accuracy.
-- `imagePath` - Path to image file
-- `groundTruth` - Expected text for accuracy calculation
-- `modelPath` - Path to ONNX model (optional)
-- `charsetPath` - Path to charset file (optional)
+- **Returns**: `Promise<{text: string, accuracy: number}>`
-**Returns:** `Promise<{text: string, accuracy: number}>` - Text and accuracy percentage
-### MonOCR Class
-For advanced usage, use the `MonOCR` class directly:
+## Usage Example
 ```javascript
-const { MonOCR } = require("monocr");
-const ocr = new MonOCR("model.onnx", "charset.txt");
-await ocr.init();
+const { read_image } = require("monocr");
-// Single line
-const text = await ocr.predictLine(imageSource);
+async function main() {
+  const text = await read_image("scanned_text.png");
+  console.log(text);
+}
-// Full page (with line segmentation)
-const results = await ocr.predictPage(imagePath);
+main();
 ```
-## CLI
-The package includes a command-line tool:
+## CLI Interface
 ```bash
-# Single image
-monocr image path/to/image.jpg
+# Global installation for CLI usage
+npm install -g monocr
-# PDF файл
-monocr pdf path/to/document.pdf
+# Process an image
+monocr image input.jpg
-# Batch processing
-monocr batch path/to/images/ -o results.json
+# Process a PDF
+monocr pdf document.pdf
 ```
-## Examples
-See the `examples/` directory for detailed usage examples:
-- `simple.js` - Basic image OCR
-- `with-accuracy.js` - OCR with accuracy measurement
-- `batch.js` - Batch processing
-Run examples:
-```bash
-node examples/simple.js
-```
-## Model Files
-Models are **automatically downloaded** on first use to `~/.monocr/models/`.
-You can also trigger a manual download:
-```bash
-monocr download
-```
+## Maintenance
-Can also specify custom model paths if you prefer offline usage without the default cache.
+Maintained by [MonDevHub](https://github.com/MonDevHub).
 ## License

package/bin/monocr.js CHANGED Viewed

@@ -14,7 +14,7 @@ program
     .command('image <path>')
     .description('Recognize text from an image file')
     .option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
-    .option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
+    .option('-c, --charset <path>', 'Path to charset file (optional)')
     .action(async (imagePath, options) => {
         try {
             const text = await read_image(imagePath, options.model, options.charset);
@@ -29,7 +29,7 @@ program
     .command('pdf <path>')
     .description('Recognize text from a PDF file')
     .option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
-    .option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
+    .option('-c, --charset <path>', 'Path to charset file (optional)')
     .action(async (pdfPath, options) => {
         try {
             const pages = await read_pdf(pdfPath, options.model, options.charset);
@@ -48,7 +48,7 @@ program
     .command('batch <directory>')
     .description('Process all images in a directory')
     .option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
-    .option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
+    .option('-c, --charset <path>', 'Path to charset file (optional)')
     .option('-o, --output <path>', 'Output file for results (optional)')
     .action(async (directory, options) => {
         try {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "monocr",
-  "version": "0.1.3",
+  "version": "0.1.5",
   "description": "Cross-platform Mon (mnw) language OCR using ONNX Runtime. Supports Node.js.",
   "main": "src/index.js",
   "bin": {
@@ -8,7 +8,7 @@
   },
   "repository": {
     "type": "git",
-    "url": "git+https://github.com/janakh/monocr-onnx.git",
+    "url": "git+https://github.com/MonDevHub/monocr-onnx.git",
     "directory": "js"
   },
   "keywords": [
@@ -21,9 +21,9 @@
   "author": "Janakh",
   "license": "MIT",
   "bugs": {
-    "url": "https://github.com/janakh/monocr-onnx/issues"
+    "url": "https://github.com/MonDevHub/monocr-onnx/issues"
   },
-  "homepage": "https://github.com/janakh/monocr-onnx/tree/main/js#readme",
+  "homepage": "https://github.com/MonDevHub/monocr-onnx/tree/main/js#readme",
   "files": [
     "src/",
     "bin/",
@@ -32,11 +32,11 @@
   ],
   "dependencies": {
     "commander": "^11.1.0",
-    "onnxruntime-node": "^1.15.0",
-    "pdf2pic": "^3.2.0",
+    "onnxruntime-node": "^1.24.1",
+    "pdf-img-convert": "^2.0.0",
     "sharp": "^0.32.0"
   },
   "publishConfig": {
     "access": "public"
   }
-}
+}

package/src/index.js CHANGED Viewed

@@ -5,15 +5,17 @@ module.exports = {
     MonOCR,
     calculateAccuracy,
     read_image,
+    read_images,
     read_pdf,
+    read_pdfs,
     read_image_with_accuracy
 };
 /**
  * Read text from an image file
  * @param {string} imagePath - Path to image file
- * @param {string} modelPath - Path to ONNX model (optional, auto-downloads if not provided)
- * @param {string} charsetPath - Path to charset file (optional, auto-downloads if not provided)
+ * @param {string} modelPath - Path to ONNX model (optional)
+ * @param {string} charsetPath - Path to charset file (optional)
  * @returns {Promise<string>} Recognized text
  */
 async function read_image(imagePath, modelPath = null, charsetPath = null) {
@@ -23,6 +25,25 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
     return results.map(r => r.text).join('\n');
 }
+/**
+ * Read text from multiple image files
+ * @param {string[]} imagePaths - Array of paths to image files
+ * @param {string} modelPath - Path to ONNX model (optional)
+ * @param {string} charsetPath - Path to charset file (optional)
+ * @returns {Promise<string[]>} Array of recognized text
+ */
+async function read_images(imagePaths, modelPath = null, charsetPath = null) {
+    const ocr = new MonOCR(modelPath, charsetPath);
+    await ocr.init();
+    const results = [];
+    for (const path of imagePaths) {
+        const pageResults = await ocr.predictPage(path);
+        results.push(pageResults.map(r => r.text).join('\n'));
+    }
+    return results;
+}
 /**
  * Read text from a PDF file
  * @param {string} pdfPath - Path to PDF file
@@ -31,45 +52,60 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
  * @returns {Promise<string[]>} Array of text per page
  */
 async function read_pdf(pdfPath, modelPath = null, charsetPath = null) {
-    const { fromPath } = require('pdf2pic');
+    const pdfImgConvert = require('pdf-img-convert');
     const path = require('path');
     const fs = require('fs');
-    const os = require('os');
+    // Initialize OCR
     const ocr = new MonOCR(modelPath, charsetPath);
     await ocr.init();
-    const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'monocr-'));
-    const converter = fromPath(pdfPath, {
-        density: 300,
-        format: 'png',
-        width: 2480,
-        height: 3508,
-        saveFilename: 'page',
-        savePath: tempDir
-    });
-    const pages = [];
-    let pageNum = 1;
-    while (true) {
-        try {
-            const result = await converter(pageNum, { responseType: 'image' });
-            const imagePath = result.path;
-            const results = await ocr.predictPage(imagePath);
+    try {
+        // Convert PDF to image buffers (returns Uint8Array[])
+        // pdf-img-convert handles parsing internally using pdf.js
+        const imageBuffers = await pdfImgConvert.convert(pdfPath, {
+            width: 2480,  // High resolution for OCR
+            height: 3508,
+            page_numbers: [] // All pages
+        });
+        if (!imageBuffers || imageBuffers.length === 0) {
+            throw new Error("Failed to convert PDF: No images generated");
+        }
+        const pages = [];
+        for (let i = 0; i < imageBuffers.length; i++) {
+            // pdf-img-convert returns Uint8Array (buffer-like)
+            // MonOCR's predictPage expects a file path or sharp-compatible input
+            // sharp can take a Buffer.
+            const buffer = Buffer.from(imageBuffers[i]);
+            const results = await ocr.predictPage(buffer);
             const pageText = results.map(r => r.text).join('\n');
             pages.push(pageText);
-            pageNum++;
-        } catch (err) {
-            break;
         }
+        return pages;
+    } catch (err) {
+        throw new Error(`Failed to process PDF: ${err.message}`);
     }
-    // Cleanup temp directory
-    fs.rmSync(tempDir, { recursive: true, force: true });
-    return pages;
+}
+/**
+ * Read text from multiple PDF files
+ * @param {string[]} pdfPaths - Array of paths to PDF files
+ * @param {string} modelPath - Path to ONNX model (optional)
+ * @param {string} charsetPath - Path to charset file (optional)
+ * @returns {Promise<string[][]>} Array of arrays of text per page
+ */
+async function read_pdfs(pdfPaths, modelPath = null, charsetPath = null) {
+    const results = [];
+    for (const path of pdfPaths) {
+        const pages = await read_pdf(path, modelPath, charsetPath);
+        results.push(pages);
+    }
+    return results;
 }
 /**

package/src/utils.js CHANGED Viewed

@@ -6,18 +6,38 @@
  */
 function calculateAccuracy(predicted, groundTruth) {
     if (!groundTruth) return 0;
+    if (!predicted) return 0;
-    const len = Math.max(predicted.length, groundTruth.length);
-    if (len === 0) return 100;
+    const s1 = predicted;
+    const s2 = groundTruth;
-    let errors = 0;
-    for (let i = 0; i < len; i++) {
-        if (predicted[i] !== groundTruth[i]) {
-            errors++;
+    const track = Array(s2.length + 1).fill(null).map(() =>
+        Array(s1.length + 1).fill(null));
+    for (let i = 0; i <= s1.length; i += 1) {
+        track[0][i] = i;
+    }
+    for (let j = 0; j <= s2.length; j += 1) {
+        track[j][0] = j;
+    }
+    for (let j = 1; j <= s2.length; j += 1) {
+        for (let i = 1; i <= s1.length; i += 1) {
+            const indicator = s1[i - 1] === s2[j - 1] ? 0 : 1;
+            track[j][i] = Math.min(
+                track[j][i - 1] + 1, // deletion
+                track[j - 1][i] + 1, // insertion
+                track[j - 1][i - 1] + indicator, // substitution
+            );
         }
     }
-    return ((1 - errors / len) * 100).toFixed(2);
+    const distance = track[s2.length][s1.length];
+    const maxLen = Math.max(s1.length, s2.length);
+    if (maxLen === 0) return 100;
+    return ((1 - distance / maxLen) * 100).toFixed(2);
 }
 module.exports = {