monocr 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
- # monocr (JavaScript/Node.js)
1
+ # MonOCR (JavaScript SDK)
2
2
 
3
- Mon language OCR using ONNX Runtime for Node.js applications.
3
+ The professional JavaScript SDK for Mon language OCR, powered by ONNX Runtime. Designed for high-performance server-side and desktop Node.js applications.
4
4
 
5
5
  ## Installation
6
6
 
@@ -8,112 +8,70 @@ Mon language OCR using ONNX Runtime for Node.js applications.
8
8
  npm install monocr
9
9
  ```
10
10
 
11
- ## Quick Start
11
+ ## Features
12
12
 
13
- ```javascript
14
- const { read_image } = require("monocr");
15
-
16
- // Auto-downloads model on first run
17
- const text = await read_image("path/to/image.jpg");
18
- console.log(text);
19
- ```
20
-
21
- ## API
22
-
23
- ### read_image(imagePath, [modelPath], [charsetPath])
13
+ - **Unified API**: Synchronized with Python and Go equivalents.
14
+ - **Auto-Model Management**: Leverages [MonDevHub/monocr](https://huggingface.co/janakhpon/monocr) for automated model delivery.
15
+ - **PDF Support**: Intelligent document segmentation and multi-page processing.
16
+ - **Zero Dependencies**: Core OCR logic is lean and optimized.
24
17
 
25
- Recognize text from an image file.
18
+ ## API Reference
26
19
 
27
- **Parameters:**
20
+ ### `read_image(imagePath, [options])`
28
21
 
29
- - `imagePath` - Path to image file (jpg, png)
30
- - `modelPath` - (Optional) Path to ONNX model. Defaults to auto-downloaded model.
31
- - `charsetPath` - (Optional) Path to charset file. Defaults to auto-downloaded file.
22
+ Recognizes text from a single image.
32
23
 
33
- **Returns:** `Promise<string>` - Recognized text
24
+ - `imagePath`: String path to the image file.
25
+ - `options`: Optional overrides for model/charset paths.
26
+ - **Returns**: `Promise<string>`
34
27
 
35
- ### read_pdf(pdfPath, modelPath, charsetPath)
28
+ ### `read_images(imagePaths, [options])`
36
29
 
37
- Recognize text from a PDF file.
30
+ Recognizes text from multiple images.
38
31
 
39
- **Parameters:**
32
+ - **Returns**: `Promise<string[]>`
40
33
 
41
- - `pdfPath` - Path to PDF file
42
- - `modelPath` - Path to ONNX model (optional)
43
- - `charsetPath` - Path to charset file (optional)
34
+ ### `read_pdf(pdfPath, [options])`
44
35
 
45
- **Returns:** `Promise<string[]>` - Array of text per page
36
+ Converts and recognizes text from all pages of a PDF.
46
37
 
47
- ### read_image_with_accuracy(imagePath, groundTruth, modelPath, charsetPath)
38
+ - **Returns**: `Promise<string[]>` (Array of strings per page)
48
39
 
49
- Recognize text with accuracy measurement.
40
+ ### `read_image_with_accuracy(imagePath, groundTruth, [options])`
50
41
 
51
- **Parameters:**
42
+ Performs OCR and calculates Levenshtein accuracy.
52
43
 
53
- - `imagePath` - Path to image file
54
- - `groundTruth` - Expected text for accuracy calculation
55
- - `modelPath` - Path to ONNX model (optional)
56
- - `charsetPath` - Path to charset file (optional)
44
+ - **Returns**: `Promise<{text: string, accuracy: number}>`
57
45
 
58
- **Returns:** `Promise<{text: string, accuracy: number}>` - Text and accuracy percentage
59
-
60
- ### MonOCR Class
61
-
62
- For advanced usage, use the `MonOCR` class directly:
46
+ ## Usage Example
63
47
 
64
48
  ```javascript
65
- const { MonOCR } = require("monocr");
66
-
67
- const ocr = new MonOCR("model.onnx", "charset.txt");
68
- await ocr.init();
49
+ const { read_image } = require("monocr");
69
50
 
70
- // Single line
71
- const text = await ocr.predictLine(imageSource);
51
+ async function main() {
52
+ const text = await read_image("scanned_text.png");
53
+ console.log(text);
54
+ }
72
55
 
73
- // Full page (with line segmentation)
74
- const results = await ocr.predictPage(imagePath);
56
+ main();
75
57
  ```
76
58
 
77
- ## CLI
78
-
79
- The package includes a command-line tool:
59
+ ## CLI Interface
80
60
 
81
61
  ```bash
82
- # Single image
83
- monocr image path/to/image.jpg
62
+ # Global installation for CLI usage
63
+ npm install -g monocr
84
64
 
85
- # PDF файл
86
- monocr pdf path/to/document.pdf
65
+ # Process an image
66
+ monocr image input.jpg
87
67
 
88
- # Batch processing
89
- monocr batch path/to/images/ -o results.json
68
+ # Process a PDF
69
+ monocr pdf document.pdf
90
70
  ```
91
71
 
92
- ## Examples
93
-
94
- See the `examples/` directory for detailed usage examples:
95
-
96
- - `simple.js` - Basic image OCR
97
- - `with-accuracy.js` - OCR with accuracy measurement
98
- - `batch.js` - Batch processing
99
-
100
- Run examples:
101
-
102
- ```bash
103
- node examples/simple.js
104
- ```
105
-
106
- ## Model Files
107
-
108
- Models are **automatically downloaded** on first use to `~/.monocr/models/`.
109
-
110
- You can also trigger a manual download:
111
-
112
- ```bash
113
- monocr download
114
- ```
72
+ ## Maintenance
115
73
 
116
- Can also specify custom model paths if you prefer offline usage without the default cache.
74
+ Maintained by [MonDevHub](https://github.com/MonDevHub).
117
75
 
118
76
  ## License
119
77
 
package/bin/monocr.js CHANGED
@@ -14,7 +14,7 @@ program
14
14
  .command('image <path>')
15
15
  .description('Recognize text from an image file')
16
16
  .option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
17
- .option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
17
+ .option('-c, --charset <path>', 'Path to charset file (optional)')
18
18
  .action(async (imagePath, options) => {
19
19
  try {
20
20
  const text = await read_image(imagePath, options.model, options.charset);
@@ -29,7 +29,7 @@ program
29
29
  .command('pdf <path>')
30
30
  .description('Recognize text from a PDF file')
31
31
  .option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
32
- .option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
32
+ .option('-c, --charset <path>', 'Path to charset file (optional)')
33
33
  .action(async (pdfPath, options) => {
34
34
  try {
35
35
  const pages = await read_pdf(pdfPath, options.model, options.charset);
@@ -48,7 +48,7 @@ program
48
48
  .command('batch <directory>')
49
49
  .description('Process all images in a directory')
50
50
  .option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
51
- .option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
51
+ .option('-c, --charset <path>', 'Path to charset file (optional)')
52
52
  .option('-o, --output <path>', 'Output file for results (optional)')
53
53
  .action(async (directory, options) => {
54
54
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "monocr",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "Cross-platform Mon (mnw) language OCR using ONNX Runtime. Supports Node.js.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -8,7 +8,7 @@
8
8
  },
9
9
  "repository": {
10
10
  "type": "git",
11
- "url": "git+https://github.com/janakh/monocr-onnx.git",
11
+ "url": "git+https://github.com/MonDevHub/monocr-onnx.git",
12
12
  "directory": "js"
13
13
  },
14
14
  "keywords": [
@@ -21,9 +21,9 @@
21
21
  "author": "Janakh",
22
22
  "license": "MIT",
23
23
  "bugs": {
24
- "url": "https://github.com/janakh/monocr-onnx/issues"
24
+ "url": "https://github.com/MonDevHub/monocr-onnx/issues"
25
25
  },
26
- "homepage": "https://github.com/janakh/monocr-onnx/tree/main/js#readme",
26
+ "homepage": "https://github.com/MonDevHub/monocr-onnx/tree/main/js#readme",
27
27
  "files": [
28
28
  "src/",
29
29
  "bin/",
@@ -32,11 +32,11 @@
32
32
  ],
33
33
  "dependencies": {
34
34
  "commander": "^11.1.0",
35
- "onnxruntime-node": "^1.15.0",
36
- "pdf2pic": "^3.2.0",
35
+ "onnxruntime-node": "^1.24.1",
36
+ "pdf-img-convert": "^2.0.0",
37
37
  "sharp": "^0.32.0"
38
38
  },
39
39
  "publishConfig": {
40
40
  "access": "public"
41
41
  }
42
- }
42
+ }
package/src/index.js CHANGED
@@ -5,15 +5,17 @@ module.exports = {
5
5
  MonOCR,
6
6
  calculateAccuracy,
7
7
  read_image,
8
+ read_images,
8
9
  read_pdf,
10
+ read_pdfs,
9
11
  read_image_with_accuracy
10
12
  };
11
13
 
12
14
  /**
13
15
  * Read text from an image file
14
16
  * @param {string} imagePath - Path to image file
15
- * @param {string} modelPath - Path to ONNX model (optional, auto-downloads if not provided)
16
- * @param {string} charsetPath - Path to charset file (optional, auto-downloads if not provided)
17
+ * @param {string} modelPath - Path to ONNX model (optional)
18
+ * @param {string} charsetPath - Path to charset file (optional)
17
19
  * @returns {Promise<string>} Recognized text
18
20
  */
19
21
  async function read_image(imagePath, modelPath = null, charsetPath = null) {
@@ -23,6 +25,25 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
23
25
  return results.map(r => r.text).join('\n');
24
26
  }
25
27
 
28
+ /**
29
+ * Read text from multiple image files
30
+ * @param {string[]} imagePaths - Array of paths to image files
31
+ * @param {string} modelPath - Path to ONNX model (optional)
32
+ * @param {string} charsetPath - Path to charset file (optional)
33
+ * @returns {Promise<string[]>} Array of recognized text
34
+ */
35
+ async function read_images(imagePaths, modelPath = null, charsetPath = null) {
36
+ const ocr = new MonOCR(modelPath, charsetPath);
37
+ await ocr.init();
38
+
39
+ const results = [];
40
+ for (const path of imagePaths) {
41
+ const pageResults = await ocr.predictPage(path);
42
+ results.push(pageResults.map(r => r.text).join('\n'));
43
+ }
44
+ return results;
45
+ }
46
+
26
47
  /**
27
48
  * Read text from a PDF file
28
49
  * @param {string} pdfPath - Path to PDF file
@@ -31,45 +52,60 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
31
52
  * @returns {Promise<string[]>} Array of text per page
32
53
  */
33
54
  async function read_pdf(pdfPath, modelPath = null, charsetPath = null) {
34
- const { fromPath } = require('pdf2pic');
55
+ const pdfImgConvert = require('pdf-img-convert');
35
56
  const path = require('path');
36
57
  const fs = require('fs');
37
- const os = require('os');
38
58
 
59
+ // Initialize OCR
39
60
  const ocr = new MonOCR(modelPath, charsetPath);
40
61
  await ocr.init();
41
62
 
42
- const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'monocr-'));
43
-
44
- const converter = fromPath(pdfPath, {
45
- density: 300,
46
- format: 'png',
47
- width: 2480,
48
- height: 3508,
49
- saveFilename: 'page',
50
- savePath: tempDir
51
- });
52
-
53
- const pages = [];
54
- let pageNum = 1;
55
-
56
- while (true) {
57
- try {
58
- const result = await converter(pageNum, { responseType: 'image' });
59
- const imagePath = result.path;
60
- const results = await ocr.predictPage(imagePath);
63
+ try {
64
+ // Convert PDF to image buffers (returns Uint8Array[])
65
+ // pdf-img-convert handles parsing internally using pdf.js
66
+ const imageBuffers = await pdfImgConvert.convert(pdfPath, {
67
+ width: 2480, // High resolution for OCR
68
+ height: 3508,
69
+ page_numbers: [] // All pages
70
+ });
71
+
72
+ if (!imageBuffers || imageBuffers.length === 0) {
73
+ throw new Error("Failed to convert PDF: No images generated");
74
+ }
75
+
76
+ const pages = [];
77
+
78
+ for (let i = 0; i < imageBuffers.length; i++) {
79
+ // pdf-img-convert returns Uint8Array (buffer-like)
80
+ // MonOCR's predictPage expects a file path or sharp-compatible input
81
+ // sharp can take a Buffer.
82
+ const buffer = Buffer.from(imageBuffers[i]);
83
+
84
+ const results = await ocr.predictPage(buffer);
61
85
  const pageText = results.map(r => r.text).join('\n');
62
86
  pages.push(pageText);
63
- pageNum++;
64
- } catch (err) {
65
- break;
66
87
  }
88
+
89
+ return pages;
90
+ } catch (err) {
91
+ throw new Error(`Failed to process PDF: ${err.message}`);
67
92
  }
68
-
69
- // Cleanup temp directory
70
- fs.rmSync(tempDir, { recursive: true, force: true });
71
-
72
- return pages;
93
+ }
94
+
95
+ /**
96
+ * Read text from multiple PDF files
97
+ * @param {string[]} pdfPaths - Array of paths to PDF files
98
+ * @param {string} modelPath - Path to ONNX model (optional)
99
+ * @param {string} charsetPath - Path to charset file (optional)
100
+ * @returns {Promise<string[][]>} Array of arrays of text per page
101
+ */
102
+ async function read_pdfs(pdfPaths, modelPath = null, charsetPath = null) {
103
+ const results = [];
104
+ for (const path of pdfPaths) {
105
+ const pages = await read_pdf(path, modelPath, charsetPath);
106
+ results.push(pages);
107
+ }
108
+ return results;
73
109
  }
74
110
 
75
111
  /**
package/src/utils.js CHANGED
@@ -6,18 +6,38 @@
6
6
  */
7
7
  function calculateAccuracy(predicted, groundTruth) {
8
8
  if (!groundTruth) return 0;
9
+ if (!predicted) return 0;
9
10
 
10
- const len = Math.max(predicted.length, groundTruth.length);
11
- if (len === 0) return 100;
11
+ const s1 = predicted;
12
+ const s2 = groundTruth;
12
13
 
13
- let errors = 0;
14
- for (let i = 0; i < len; i++) {
15
- if (predicted[i] !== groundTruth[i]) {
16
- errors++;
14
+ const track = Array(s2.length + 1).fill(null).map(() =>
15
+ Array(s1.length + 1).fill(null));
16
+
17
+ for (let i = 0; i <= s1.length; i += 1) {
18
+ track[0][i] = i;
19
+ }
20
+ for (let j = 0; j <= s2.length; j += 1) {
21
+ track[j][0] = j;
22
+ }
23
+
24
+ for (let j = 1; j <= s2.length; j += 1) {
25
+ for (let i = 1; i <= s1.length; i += 1) {
26
+ const indicator = s1[i - 1] === s2[j - 1] ? 0 : 1;
27
+ track[j][i] = Math.min(
28
+ track[j][i - 1] + 1, // deletion
29
+ track[j - 1][i] + 1, // insertion
30
+ track[j - 1][i - 1] + indicator, // substitution
31
+ );
17
32
  }
18
33
  }
19
34
 
20
- return ((1 - errors / len) * 100).toFixed(2);
35
+ const distance = track[s2.length][s1.length];
36
+ const maxLen = Math.max(s1.length, s2.length);
37
+
38
+ if (maxLen === 0) return 100;
39
+
40
+ return ((1 - distance / maxLen) * 100).toFixed(2);
21
41
  }
22
42
 
23
43
  module.exports = {