npm - monocr - Versions diffs - 0.1.2 → 0.1.4 - Mend

monocr 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
-# monocr (JavaScript/Node.js)
+# monocr
-Mon language OCR using ONNX Runtime for Node.js applications.
+Mon language (mnw) OCR for Node.js.
 ## Installation
@@ -13,107 +13,65 @@ npm install monocr
 ```javascript
 const { read_image } = require("monocr");
-// Auto-downloads model on first run
-const text = await read_image("path/to/image.jpg");
+// Automatically downloads model on first run
+const text = await read_image("image.jpg");
 console.log(text);
 ```
 ## API
-### read_image(imagePath, [modelPath], [charsetPath])
+### `read_image(imagePath, [modelPath], [charsetPath])`
-Recognize text from an image file.
+Recognizes text from an image file.
-**Parameters:**
+- `imagePath` (string): Path to image file.
+- `modelPath` (string, optional): Path to ONNX model. Defaults to `~/.monocr/models/monocr.onnx`.
+- `charsetPath` (string, optional): Path to charset file. Defaults to bundled charset.
-- `imagePath` - Path to image file (jpg, png)
-- `modelPath` - (Optional) Path to ONNX model. Defaults to auto-downloaded model.
-- `charsetPath` - (Optional) Path to charset file. Defaults to auto-downloaded file.
+Returns: `Promise<string>`
-**Returns:** `Promise<string>` - Recognized text
+### `read_pdf(pdfPath, [modelPath], [charsetPath])`
-### read_pdf(pdfPath, modelPath, charsetPath)
+Recognizes text from a PDF file.
-Recognize text from a PDF file.
+- `pdfPath` (string): Path to PDF file.
+- `modelPath` (string, optional): As above.
+- `charsetPath` (string, optional): As above.
-**Parameters:**
+Returns: `Promise<string[]>` (Array of text per page)
-- `pdfPath` - Path to PDF file
-- `modelPath` - Path to ONNX model (optional)
-- `charsetPath` - Path to charset file (optional)
+### `read_image_with_accuracy(imagePath, groundTruth, [modelPath], [charsetPath])`
-**Returns:** `Promise<string[]>` - Array of text per page
+Recognizes text and calculates accuracy against ground truth.
-### read_image_with_accuracy(imagePath, groundTruth, modelPath, charsetPath)
+- `imagePath` (string): Path to image file.
+- `groundTruth` (string): Expected text.
-Recognize text with accuracy measurement.
+Returns: `Promise<{text: string, accuracy: number}>`
-**Parameters:**
+## CLI Usage
-- `imagePath` - Path to image file
-- `groundTruth` - Expected text for accuracy calculation
-- `modelPath` - Path to ONNX model (optional)
-- `charsetPath` - Path to charset file (optional)
-**Returns:** `Promise<{text: string, accuracy: number}>` - Text and accuracy percentage
-### MonOCR Class
-For advanced usage, use the `MonOCR` class directly:
-```javascript
-const { MonOCR } = require("monocr");
-const ocr = new MonOCR("model.onnx", "charset.txt");
-await ocr.init();
-// Single line
-const text = await ocr.predictLine(imageSource);
-// Full page (with line segmentation)
-const results = await ocr.predictPage(imagePath);
-```
-## CLI
-The package includes a command-line tool:
+The package includes a `monocr` command-line tool.
 ```bash
-# Single image
-monocr image path/to/image.jpg
-# PDF файл
-monocr pdf path/to/document.pdf
-# Batch processing
-monocr batch path/to/images/ -o results.json
-```
-## Examples
-See the `examples/` directory for detailed usage examples:
+# Download model to cache (optional, happens automatically on first use)
+monocr download
-- `simple.js` - Basic image OCR
-- `with-accuracy.js` - OCR with accuracy measurement
-- `batch.js` - Batch processing
+# Recognize single image
+monocr image input.jpg
-Run examples:
+# Recognize PDF
+monocr pdf document.pdf
-```bash
-node examples/simple.js
+# Batch process directory
+monocr batch ./images -o results.json
 ```
 ## Model Files
-Models are **automatically downloaded** on first use to `~/.monocr/models/`.
-You can also trigger a manual download:
-```bash
-monocr download
-```
+The ONNX model (`monocr.onnx`) is downloaded automatically to `~/.monocr/models/` on first use. The charset file is bundled with the package.
-Can also specify custom model paths if you prefer offline usage without the default cache.
+To use a custom model, provide the `modelPath` argument to the API functions or CLI.
 ## License

package/bin/monocr.js CHANGED Viewed

@@ -14,7 +14,7 @@ program
     .command('image <path>')
     .description('Recognize text from an image file')
     .option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
-    .option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
+    .option('-c, --charset <path>', 'Path to charset file (optional)')
     .action(async (imagePath, options) => {
         try {
             const text = await read_image(imagePath, options.model, options.charset);
@@ -29,7 +29,7 @@ program
     .command('pdf <path>')
     .description('Recognize text from a PDF file')
     .option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
-    .option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
+    .option('-c, --charset <path>', 'Path to charset file (optional)')
     .action(async (pdfPath, options) => {
         try {
             const pages = await read_pdf(pdfPath, options.model, options.charset);
@@ -48,7 +48,7 @@ program
     .command('batch <directory>')
     .description('Process all images in a directory')
     .option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
-    .option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
+    .option('-c, --charset <path>', 'Path to charset file (optional)')
     .option('-o, --output <path>', 'Output file for results (optional)')
     .action(async (directory, options) => {
         try {
@@ -89,7 +89,7 @@ program
         try {
             const { MonOCR } = require('../src/index');
             const ocr = new MonOCR();
-            await ocr.modelManager.downloadModels();
+            await ocr.modelManager.downloadModel();
         } catch (err) {
             console.error('Error:', err.message);
             process.exit(1);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "monocr",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "description": "Cross-platform Mon (mnw) language OCR using ONNX Runtime. Supports Node.js.",
   "main": "src/index.js",
   "bin": {

package/src/charset.txt ADDED Viewed

@@ -0,0 +1 @@

+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဢဣဤဥဦဧဨဩဪါာိီုူေဲဳဴဵံ့း္်ျြွှဿ၀၁၂၃၄၅၆၇၈၉၊။၌၍၎၏ၐၑၓၚၛၜၝၞၟၠၡၢၣၤၥၨၪၰၱၲၳၴၵၷၸၹၺၻၼၾၿႀႄႅႆႇႈႉႊႏ႐႒႓႔႕႘႙ႜႝ႟

package/src/model-manager.js CHANGED Viewed

@@ -9,12 +9,10 @@ class ModelManager {
         // Default cache directory in user's home
         this.cacheDir = path.join(os.homedir(), '.monocr', 'models');
-        // HuggingFace model URLs
+        // HuggingFace model URL
         this.baseUrl = 'https://huggingface.co/janakhpon/monocr/resolve/main';
-        this.modelFiles = {
-            model: 'onnx/monocr.onnx',
-            charset: 'charset.txt'
-        };
+        this.modelFileName = 'monocr.onnx';
+        this.hfModelPath = 'onnx/monocr.onnx';
     }
     /**
@@ -27,19 +25,17 @@ class ModelManager {
     }
     /**
-     * Get local path for a model file
+     * Get local path for the model
      */
-    getLocalPath(fileKey) {
-        return path.join(this.cacheDir, path.basename(this.modelFiles[fileKey]));
+    getModelPath() {
+        return path.join(this.cacheDir, this.modelFileName);
     }
     /**
-     * Check if model files exist locally
+     * Check if model exists locally
      */
-    hasModels() {
-        const modelPath = this.getLocalPath('model');
-        const charsetPath = this.getLocalPath('charset');
-        return fs.existsSync(modelPath) && fs.existsSync(charsetPath);
+    hasModel() {
+        return fs.existsSync(this.getModelPath());
     }
     /**
@@ -49,43 +45,44 @@ class ModelManager {
         return new Promise((resolve, reject) => {
             const file = fs.createWriteStream(destPath);
-            https.get(url, { headers: { 'User-Agent': 'monocr-npm' } }, (response) => {
-                if (response.statusCode === 302 || response.statusCode === 301) {
-                    // Follow redirect
-                    https.get(response.headers.location, (redirectResponse) => {
-                        const totalSize = parseInt(redirectResponse.headers['content-length'], 10);
+            const request = (requestUrl) => {
+                https.get(requestUrl, { headers: { 'User-Agent': 'monocr-npm' } }, (response) => {
+                    if ([301, 302, 307, 308].includes(response.statusCode)) {
+                        let redirectUrl = response.headers.location;
+                        if (!redirectUrl.startsWith('http')) {
+                            const originalUrl = new URL(requestUrl);
+                            redirectUrl = `${originalUrl.protocol}//${originalUrl.host}${redirectUrl}`;
+                        }
+                        request(redirectUrl);
+                    } else if (response.statusCode === 200) {
+                        const totalSize = parseInt(response.headers['content-length'], 10);
                         let downloadedSize = 0;
-                        redirectResponse.on('data', (chunk) => {
+                        response.on('data', (chunk) => {
                             downloadedSize += chunk.length;
-                            const progress = ((downloadedSize / totalSize) * 100).toFixed(1);
-                            process.stdout.write(`\r  Progress: ${progress}% (${(downloadedSize / 1024 / 1024).toFixed(2)} MB)`);
+                            if (totalSize) {
+                                const progress = ((downloadedSize / totalSize) * 100).toFixed(1);
+                                process.stdout.write(`\r  Downloading model: ${progress}% (${(downloadedSize / 1024 / 1024).toFixed(2)} MB)`);
+                            }
                         });
-                        redirectResponse.pipe(file);
+                        response.pipe(file);
                         file.on('finish', () => {
                             file.close();
                             process.stdout.write('\n');
                             resolve();
                         });
-                    }).on('error', (err) => {
-                        fs.unlink(destPath, () => {});
-                        reject(err);
-                    });
-                } else if (response.statusCode === 200) {
-                    response.pipe(file);
-                    file.on('finish', () => {
-                        file.close();
-                        resolve();
-                    });
-                } else {
-                    reject(new Error(`Failed to download: ${response.statusCode}`));
-                }
-            }).on('error', (err) => {
-                fs.unlink(destPath, () => {});
-                reject(err);
-            });
+                    } else {
+                        reject(new Error(`Failed to download: ${response.statusCode}`));
+                    }
+                }).on('error', (err) => {
+                    fs.unlink(destPath, () => {});
+                    reject(err);
+                });
+            };
+            request(url);
             file.on('error', (err) => {
                 fs.unlink(destPath, () => {});
@@ -95,44 +92,32 @@ class ModelManager {
     }
     /**
-     * Download all model files
+     * Download model file
      */
-    async downloadModels() {
+    async downloadModel() {
         this.ensureCacheDir();
-        console.log('Downloading monocr models from HuggingFace...');
-        console.log(`Cache directory: ${this.cacheDir}\n`);
-        // Download model
-        const modelUrl = `${this.baseUrl}/${this.modelFiles.model}`;
-        const modelPath = this.getLocalPath('model');
-        console.log('Downloading monocr.onnx...');
-        await this.downloadFile(modelUrl, modelPath);
-        console.log('✓ Model downloaded\n');
+        console.log('Downloading monocr model from HuggingFace...');
+        console.log(`Cache directory: ${this.cacheDir}`);
-        // Download charset
-        const charsetUrl = `${this.baseUrl}/${this.modelFiles.charset}`;
-        const charsetPath = this.getLocalPath('charset');
-        console.log('Downloading charset.txt...');
-        await this.downloadFile(charsetUrl, charsetPath);
-        console.log('✓ Charset downloaded\n');
+        const modelUrl = `${this.baseUrl}/${this.hfModelPath}`;
+        const destPath = this.getModelPath();
-        console.log('All models downloaded successfully!');
+        await this.downloadFile(modelUrl, destPath);
+        console.log('✓ Model downloaded successfully!');
     }
     /**
-     * Get model paths, downloading if needed
+     * Get model path, downloading if needed
      */
-    async getModelPaths() {
-        if (!this.hasModels()) {
-            await this.downloadModels();
+    async ensureModel() {
+        if (!this.hasModel()) {
+            await this.downloadModel();
         }
-        return {
-            modelPath: this.getLocalPath('model'),
-            charsetPath: this.getLocalPath('charset')
-        };
+        return this.getModelPath();
     }
 }
 module.exports = ModelManager;
+module.exports = ModelManager;

package/src/monocr.js CHANGED Viewed

@@ -1,6 +1,7 @@
 const ort = require('onnxruntime-node');
 const sharp = require('sharp');
 const fs = require('fs');
+const path = require('path');
 const LineSegmenter = require('./segmenter');
 const ModelManager = require('./model-manager');
@@ -21,17 +22,18 @@ class MonOCR {
     async init() {
         if (this.session) return;
-        // If paths not provided, use auto-download
-        if (!this.modelPath || !this.charsetPath) {
-            const paths = await this.modelManager.getModelPaths();
-            this.modelPath = this.modelPath || paths.modelPath;
-            this.charsetPath = this.charsetPath || paths.charsetPath;
+        // Ensure model exists
+        if (!this.modelPath) {
+            this.modelPath = await this.modelManager.ensureModel();
         }
-        this.session = await ort.InferenceSession.create(this.modelPath);
-        if (this.charsetPath) {
-            this.charset = fs.readFileSync(this.charsetPath, 'utf-8').trim();
+        // Use bundled charset if not provided
+        if (!this.charsetPath) {
+            this.charsetPath = path.join(__dirname, 'charset.txt');
         }
+        this.session = await ort.InferenceSession.create(this.modelPath);
+        this.charset = fs.readFileSync(this.charsetPath, 'utf-8').trim();
     }
     /**