monocr 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
- # monocr
1
+ # MonOCR (JavaScript SDK)
2
2
 
3
- Mon language (mnw) OCR for Node.js.
3
+ The professional JavaScript SDK for Mon language OCR, powered by ONNX Runtime. Designed for high-performance server-side and desktop Node.js applications.
4
4
 
5
5
  ## Installation
6
6
 
@@ -8,70 +8,70 @@ Mon language (mnw) OCR for Node.js.
8
8
  npm install monocr
9
9
  ```
10
10
 
11
- ## Quick Start
11
+ ## Features
12
12
 
13
- ```javascript
14
- const { read_image } = require("monocr");
13
+ - **Unified API**: Synchronized with Python and Go equivalents.
14
+ - **Auto-Model Management**: Leverages [MonDevHub/monocr](https://huggingface.co/janakhpon/monocr) for automated model delivery.
15
+ - **PDF Support**: Intelligent document segmentation and multi-page processing.
16
+ - **Zero Dependencies**: Core OCR logic is lean and optimized.
15
17
 
16
- // Automatically downloads model on first run
17
- const text = await read_image("image.jpg");
18
- console.log(text);
19
- ```
18
+ ## API Reference
19
+
20
+ ### `read_image(imagePath, [options])`
21
+
22
+ Recognizes text from a single image.
20
23
 
21
- ## API
24
+ - `imagePath`: String path to the image file.
25
+ - `options`: Optional overrides for model/charset paths.
26
+ - **Returns**: `Promise<string>`
22
27
 
23
- ### `read_image(imagePath, [modelPath], [charsetPath])`
28
+ ### `read_images(imagePaths, [options])`
24
29
 
25
- Recognizes text from an image file.
30
+ Recognizes text from multiple images.
26
31
 
27
- - `imagePath` (string): Path to image file.
28
- - `modelPath` (string, optional): Path to ONNX model. Defaults to `~/.monocr/models/monocr.onnx`.
29
- - `charsetPath` (string, optional): Path to charset file. Defaults to bundled charset.
32
+ - **Returns**: `Promise<string[]>`
30
33
 
31
- Returns: `Promise<string>`
34
+ ### `read_pdf(pdfPath, [options])`
32
35
 
33
- ### `read_pdf(pdfPath, [modelPath], [charsetPath])`
36
+ Converts and recognizes text from all pages of a PDF.
34
37
 
35
- Recognizes text from a PDF file.
38
+ - **Returns**: `Promise<string[]>` (Array of strings per page)
36
39
 
37
- - `pdfPath` (string): Path to PDF file.
38
- - `modelPath` (string, optional): As above.
39
- - `charsetPath` (string, optional): As above.
40
+ ### `read_image_with_accuracy(imagePath, groundTruth, [options])`
40
41
 
41
- Returns: `Promise<string[]>` (Array of text per page)
42
+ Performs OCR and calculates Levenshtein accuracy.
42
43
 
43
- ### `read_image_with_accuracy(imagePath, groundTruth, [modelPath], [charsetPath])`
44
+ - **Returns**: `Promise<{text: string, accuracy: number}>`
44
45
 
45
- Recognizes text and calculates accuracy against ground truth.
46
+ ## Usage Example
46
47
 
47
- - `imagePath` (string): Path to image file.
48
- - `groundTruth` (string): Expected text.
48
+ ```javascript
49
+ const { read_image } = require("monocr");
49
50
 
50
- Returns: `Promise<{text: string, accuracy: number}>`
51
+ async function main() {
52
+ const text = await read_image("scanned_text.png");
53
+ console.log(text);
54
+ }
51
55
 
52
- ## CLI Usage
56
+ main();
57
+ ```
53
58
 
54
- The package includes a `monocr` command-line tool.
59
+ ## CLI Interface
55
60
 
56
61
  ```bash
57
- # Download model to cache (optional, happens automatically on first use)
58
- monocr download
62
+ # Global installation for CLI usage
63
+ npm install -g monocr
59
64
 
60
- # Recognize single image
65
+ # Process an image
61
66
  monocr image input.jpg
62
67
 
63
- # Recognize PDF
68
+ # Process a PDF
64
69
  monocr pdf document.pdf
65
-
66
- # Batch process directory
67
- monocr batch ./images -o results.json
68
70
  ```
69
71
 
70
- ## Model Files
71
-
72
- The ONNX model (`monocr.onnx`) is downloaded automatically to `~/.monocr/models/` on first use. The charset file is bundled with the package.
72
+ ## Maintenance
73
73
 
74
- To use a custom model, provide the `modelPath` argument to the API functions or CLI.
74
+ Maintained by [MonDevHub](https://github.com/MonDevHub).
75
75
 
76
76
  ## License
77
77
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "monocr",
3
- "version": "0.1.4",
3
+ "version": "0.1.5",
4
4
  "description": "Cross-platform Mon (mnw) language OCR using ONNX Runtime. Supports Node.js.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -8,7 +8,7 @@
8
8
  },
9
9
  "repository": {
10
10
  "type": "git",
11
- "url": "git+https://github.com/janakh/monocr-onnx.git",
11
+ "url": "git+https://github.com/MonDevHub/monocr-onnx.git",
12
12
  "directory": "js"
13
13
  },
14
14
  "keywords": [
@@ -21,9 +21,9 @@
21
21
  "author": "Janakh",
22
22
  "license": "MIT",
23
23
  "bugs": {
24
- "url": "https://github.com/janakh/monocr-onnx/issues"
24
+ "url": "https://github.com/MonDevHub/monocr-onnx/issues"
25
25
  },
26
- "homepage": "https://github.com/janakh/monocr-onnx/tree/main/js#readme",
26
+ "homepage": "https://github.com/MonDevHub/monocr-onnx/tree/main/js#readme",
27
27
  "files": [
28
28
  "src/",
29
29
  "bin/",
@@ -32,11 +32,11 @@
32
32
  ],
33
33
  "dependencies": {
34
34
  "commander": "^11.1.0",
35
- "onnxruntime-node": "^1.15.0",
36
- "pdf2pic": "^3.2.0",
35
+ "onnxruntime-node": "^1.24.1",
36
+ "pdf-img-convert": "^2.0.0",
37
37
  "sharp": "^0.32.0"
38
38
  },
39
39
  "publishConfig": {
40
40
  "access": "public"
41
41
  }
42
- }
42
+ }
package/src/index.js CHANGED
@@ -5,15 +5,17 @@ module.exports = {
5
5
  MonOCR,
6
6
  calculateAccuracy,
7
7
  read_image,
8
+ read_images,
8
9
  read_pdf,
10
+ read_pdfs,
9
11
  read_image_with_accuracy
10
12
  };
11
13
 
12
14
  /**
13
15
  * Read text from an image file
14
16
  * @param {string} imagePath - Path to image file
15
- * @param {string} modelPath - Path to ONNX model (optional, auto-downloads if not provided)
16
- * @param {string} charsetPath - Path to charset file (optional, auto-downloads if not provided)
17
+ * @param {string} modelPath - Path to ONNX model (optional)
18
+ * @param {string} charsetPath - Path to charset file (optional)
17
19
  * @returns {Promise<string>} Recognized text
18
20
  */
19
21
  async function read_image(imagePath, modelPath = null, charsetPath = null) {
@@ -23,6 +25,25 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
23
25
  return results.map(r => r.text).join('\n');
24
26
  }
25
27
 
28
+ /**
29
+ * Read text from multiple image files
30
+ * @param {string[]} imagePaths - Array of paths to image files
31
+ * @param {string} modelPath - Path to ONNX model (optional)
32
+ * @param {string} charsetPath - Path to charset file (optional)
33
+ * @returns {Promise<string[]>} Array of recognized text
34
+ */
35
+ async function read_images(imagePaths, modelPath = null, charsetPath = null) {
36
+ const ocr = new MonOCR(modelPath, charsetPath);
37
+ await ocr.init();
38
+
39
+ const results = [];
40
+ for (const path of imagePaths) {
41
+ const pageResults = await ocr.predictPage(path);
42
+ results.push(pageResults.map(r => r.text).join('\n'));
43
+ }
44
+ return results;
45
+ }
46
+
26
47
  /**
27
48
  * Read text from a PDF file
28
49
  * @param {string} pdfPath - Path to PDF file
@@ -31,45 +52,60 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
31
52
  * @returns {Promise<string[]>} Array of text per page
32
53
  */
33
54
  async function read_pdf(pdfPath, modelPath = null, charsetPath = null) {
34
- const { fromPath } = require('pdf2pic');
55
+ const pdfImgConvert = require('pdf-img-convert');
35
56
  const path = require('path');
36
57
  const fs = require('fs');
37
- const os = require('os');
38
58
 
59
+ // Initialize OCR
39
60
  const ocr = new MonOCR(modelPath, charsetPath);
40
61
  await ocr.init();
41
62
 
42
- const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'monocr-'));
43
-
44
- const converter = fromPath(pdfPath, {
45
- density: 300,
46
- format: 'png',
47
- width: 2480,
48
- height: 3508,
49
- saveFilename: 'page',
50
- savePath: tempDir
51
- });
52
-
53
- const pages = [];
54
- let pageNum = 1;
55
-
56
- while (true) {
57
- try {
58
- const result = await converter(pageNum, { responseType: 'image' });
59
- const imagePath = result.path;
60
- const results = await ocr.predictPage(imagePath);
63
+ try {
64
+ // Convert PDF to image buffers (returns Uint8Array[])
65
+ // pdf-img-convert handles parsing internally using pdf.js
66
+ const imageBuffers = await pdfImgConvert.convert(pdfPath, {
67
+ width: 2480, // High resolution for OCR
68
+ height: 3508,
69
+ page_numbers: [] // All pages
70
+ });
71
+
72
+ if (!imageBuffers || imageBuffers.length === 0) {
73
+ throw new Error("Failed to convert PDF: No images generated");
74
+ }
75
+
76
+ const pages = [];
77
+
78
+ for (let i = 0; i < imageBuffers.length; i++) {
79
+ // pdf-img-convert returns Uint8Array (buffer-like)
80
+ // MonOCR's predictPage expects a file path or sharp-compatible input
81
+ // sharp can take a Buffer.
82
+ const buffer = Buffer.from(imageBuffers[i]);
83
+
84
+ const results = await ocr.predictPage(buffer);
61
85
  const pageText = results.map(r => r.text).join('\n');
62
86
  pages.push(pageText);
63
- pageNum++;
64
- } catch (err) {
65
- break;
66
87
  }
88
+
89
+ return pages;
90
+ } catch (err) {
91
+ throw new Error(`Failed to process PDF: ${err.message}`);
67
92
  }
68
-
69
- // Cleanup temp directory
70
- fs.rmSync(tempDir, { recursive: true, force: true });
71
-
72
- return pages;
93
+ }
94
+
95
+ /**
96
+ * Read text from multiple PDF files
97
+ * @param {string[]} pdfPaths - Array of paths to PDF files
98
+ * @param {string} modelPath - Path to ONNX model (optional)
99
+ * @param {string} charsetPath - Path to charset file (optional)
100
+ * @returns {Promise<string[][]>} Array of arrays of text per page
101
+ */
102
+ async function read_pdfs(pdfPaths, modelPath = null, charsetPath = null) {
103
+ const results = [];
104
+ for (const path of pdfPaths) {
105
+ const pages = await read_pdf(path, modelPath, charsetPath);
106
+ results.push(pages);
107
+ }
108
+ return results;
73
109
  }
74
110
 
75
111
  /**
package/src/utils.js CHANGED
@@ -6,18 +6,38 @@
6
6
  */
7
7
  function calculateAccuracy(predicted, groundTruth) {
8
8
  if (!groundTruth) return 0;
9
+ if (!predicted) return 0;
9
10
 
10
- const len = Math.max(predicted.length, groundTruth.length);
11
- if (len === 0) return 100;
11
+ const s1 = predicted;
12
+ const s2 = groundTruth;
12
13
 
13
- let errors = 0;
14
- for (let i = 0; i < len; i++) {
15
- if (predicted[i] !== groundTruth[i]) {
16
- errors++;
14
+ const track = Array(s2.length + 1).fill(null).map(() =>
15
+ Array(s1.length + 1).fill(null));
16
+
17
+ for (let i = 0; i <= s1.length; i += 1) {
18
+ track[0][i] = i;
19
+ }
20
+ for (let j = 0; j <= s2.length; j += 1) {
21
+ track[j][0] = j;
22
+ }
23
+
24
+ for (let j = 1; j <= s2.length; j += 1) {
25
+ for (let i = 1; i <= s1.length; i += 1) {
26
+ const indicator = s1[i - 1] === s2[j - 1] ? 0 : 1;
27
+ track[j][i] = Math.min(
28
+ track[j][i - 1] + 1, // deletion
29
+ track[j - 1][i] + 1, // insertion
30
+ track[j - 1][i - 1] + indicator, // substitution
31
+ );
17
32
  }
18
33
  }
19
34
 
20
- return ((1 - errors / len) * 100).toFixed(2);
35
+ const distance = track[s2.length][s1.length];
36
+ const maxLen = Math.max(s1.length, s2.length);
37
+
38
+ if (maxLen === 0) return 100;
39
+
40
+ return ((1 - distance / maxLen) * 100).toFixed(2);
21
41
  }
22
42
 
23
43
  module.exports = {