monocr 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -40
- package/package.json +7 -7
- package/src/index.js +67 -31
- package/src/utils.js +27 -7
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
#
|
|
1
|
+
# MonOCR (JavaScript SDK)
|
|
2
2
|
|
|
3
|
-
Mon language
|
|
3
|
+
The professional JavaScript SDK for Mon language OCR, powered by ONNX Runtime. Designed for high-performance server-side and desktop Node.js applications.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -8,70 +8,70 @@ Mon language (mnw) OCR for Node.js.
|
|
|
8
8
|
npm install monocr
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## Features
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
- **Unified API**: Synchronized with Python and Go equivalents.
|
|
14
|
+
- **Auto-Model Management**: Leverages [MonDevHub/monocr](https://huggingface.co/janakhpon/monocr) for automated model delivery.
|
|
15
|
+
- **PDF Support**: Intelligent document segmentation and multi-page processing.
|
|
16
|
+
- **Zero Dependencies**: Core OCR logic is lean and optimized.
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
## API Reference
|
|
19
|
+
|
|
20
|
+
### `read_image(imagePath, [options])`
|
|
21
|
+
|
|
22
|
+
Recognizes text from a single image.
|
|
20
23
|
|
|
21
|
-
|
|
24
|
+
- `imagePath`: String path to the image file.
|
|
25
|
+
- `options`: Optional overrides for model/charset paths.
|
|
26
|
+
- **Returns**: `Promise<string>`
|
|
22
27
|
|
|
23
|
-
### `
|
|
28
|
+
### `read_images(imagePaths, [options])`
|
|
24
29
|
|
|
25
|
-
Recognizes text from
|
|
30
|
+
Recognizes text from multiple images.
|
|
26
31
|
|
|
27
|
-
- `
|
|
28
|
-
- `modelPath` (string, optional): Path to ONNX model. Defaults to `~/.monocr/models/monocr.onnx`.
|
|
29
|
-
- `charsetPath` (string, optional): Path to charset file. Defaults to bundled charset.
|
|
32
|
+
- **Returns**: `Promise<string[]>`
|
|
30
33
|
|
|
31
|
-
|
|
34
|
+
### `read_pdf(pdfPath, [options])`
|
|
32
35
|
|
|
33
|
-
|
|
36
|
+
Converts and recognizes text from all pages of a PDF.
|
|
34
37
|
|
|
35
|
-
|
|
38
|
+
- **Returns**: `Promise<string[]>` (Array of strings per page)
|
|
36
39
|
|
|
37
|
-
|
|
38
|
-
- `modelPath` (string, optional): As above.
|
|
39
|
-
- `charsetPath` (string, optional): As above.
|
|
40
|
+
### `read_image_with_accuracy(imagePath, groundTruth, [options])`
|
|
40
41
|
|
|
41
|
-
|
|
42
|
+
Performs OCR and calculates Levenshtein accuracy.
|
|
42
43
|
|
|
43
|
-
|
|
44
|
+
- **Returns**: `Promise<{text: string, accuracy: number}>`
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
## Usage Example
|
|
46
47
|
|
|
47
|
-
|
|
48
|
-
|
|
48
|
+
```javascript
|
|
49
|
+
const { read_image } = require("monocr");
|
|
49
50
|
|
|
50
|
-
|
|
51
|
+
async function main() {
|
|
52
|
+
const text = await read_image("scanned_text.png");
|
|
53
|
+
console.log(text);
|
|
54
|
+
}
|
|
51
55
|
|
|
52
|
-
|
|
56
|
+
main();
|
|
57
|
+
```
|
|
53
58
|
|
|
54
|
-
|
|
59
|
+
## CLI Interface
|
|
55
60
|
|
|
56
61
|
```bash
|
|
57
|
-
#
|
|
58
|
-
monocr
|
|
62
|
+
# Global installation for CLI usage
|
|
63
|
+
npm install -g monocr
|
|
59
64
|
|
|
60
|
-
#
|
|
65
|
+
# Process an image
|
|
61
66
|
monocr image input.jpg
|
|
62
67
|
|
|
63
|
-
#
|
|
68
|
+
# Process a PDF
|
|
64
69
|
monocr pdf document.pdf
|
|
65
|
-
|
|
66
|
-
# Batch process directory
|
|
67
|
-
monocr batch ./images -o results.json
|
|
68
70
|
```
|
|
69
71
|
|
|
70
|
-
##
|
|
71
|
-
|
|
72
|
-
The ONNX model (`monocr.onnx`) is downloaded automatically to `~/.monocr/models/` on first use. The charset file is bundled with the package.
|
|
72
|
+
## Maintenance
|
|
73
73
|
|
|
74
|
-
|
|
74
|
+
Maintained by [MonDevHub](https://github.com/MonDevHub).
|
|
75
75
|
|
|
76
76
|
## License
|
|
77
77
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "monocr",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "Cross-platform Mon (mnw) language OCR using ONNX Runtime. Supports Node.js.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
},
|
|
9
9
|
"repository": {
|
|
10
10
|
"type": "git",
|
|
11
|
-
"url": "git+https://github.com/
|
|
11
|
+
"url": "git+https://github.com/MonDevHub/monocr-onnx.git",
|
|
12
12
|
"directory": "js"
|
|
13
13
|
},
|
|
14
14
|
"keywords": [
|
|
@@ -21,9 +21,9 @@
|
|
|
21
21
|
"author": "Janakh",
|
|
22
22
|
"license": "MIT",
|
|
23
23
|
"bugs": {
|
|
24
|
-
"url": "https://github.com/
|
|
24
|
+
"url": "https://github.com/MonDevHub/monocr-onnx/issues"
|
|
25
25
|
},
|
|
26
|
-
"homepage": "https://github.com/
|
|
26
|
+
"homepage": "https://github.com/MonDevHub/monocr-onnx/tree/main/js#readme",
|
|
27
27
|
"files": [
|
|
28
28
|
"src/",
|
|
29
29
|
"bin/",
|
|
@@ -32,11 +32,11 @@
|
|
|
32
32
|
],
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"commander": "^11.1.0",
|
|
35
|
-
"onnxruntime-node": "^1.
|
|
36
|
-
"
|
|
35
|
+
"onnxruntime-node": "^1.24.1",
|
|
36
|
+
"pdf-img-convert": "^2.0.0",
|
|
37
37
|
"sharp": "^0.32.0"
|
|
38
38
|
},
|
|
39
39
|
"publishConfig": {
|
|
40
40
|
"access": "public"
|
|
41
41
|
}
|
|
42
|
-
}
|
|
42
|
+
}
|
package/src/index.js
CHANGED
|
@@ -5,15 +5,17 @@ module.exports = {
|
|
|
5
5
|
MonOCR,
|
|
6
6
|
calculateAccuracy,
|
|
7
7
|
read_image,
|
|
8
|
+
read_images,
|
|
8
9
|
read_pdf,
|
|
10
|
+
read_pdfs,
|
|
9
11
|
read_image_with_accuracy
|
|
10
12
|
};
|
|
11
13
|
|
|
12
14
|
/**
|
|
13
15
|
* Read text from an image file
|
|
14
16
|
* @param {string} imagePath - Path to image file
|
|
15
|
-
* @param {string} modelPath - Path to ONNX model (optional
|
|
16
|
-
* @param {string} charsetPath - Path to charset file (optional
|
|
17
|
+
* @param {string} modelPath - Path to ONNX model (optional)
|
|
18
|
+
* @param {string} charsetPath - Path to charset file (optional)
|
|
17
19
|
* @returns {Promise<string>} Recognized text
|
|
18
20
|
*/
|
|
19
21
|
async function read_image(imagePath, modelPath = null, charsetPath = null) {
|
|
@@ -23,6 +25,25 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
|
|
|
23
25
|
return results.map(r => r.text).join('\n');
|
|
24
26
|
}
|
|
25
27
|
|
|
28
|
+
/**
|
|
29
|
+
* Read text from multiple image files
|
|
30
|
+
* @param {string[]} imagePaths - Array of paths to image files
|
|
31
|
+
* @param {string} modelPath - Path to ONNX model (optional)
|
|
32
|
+
* @param {string} charsetPath - Path to charset file (optional)
|
|
33
|
+
* @returns {Promise<string[]>} Array of recognized text
|
|
34
|
+
*/
|
|
35
|
+
async function read_images(imagePaths, modelPath = null, charsetPath = null) {
|
|
36
|
+
const ocr = new MonOCR(modelPath, charsetPath);
|
|
37
|
+
await ocr.init();
|
|
38
|
+
|
|
39
|
+
const results = [];
|
|
40
|
+
for (const path of imagePaths) {
|
|
41
|
+
const pageResults = await ocr.predictPage(path);
|
|
42
|
+
results.push(pageResults.map(r => r.text).join('\n'));
|
|
43
|
+
}
|
|
44
|
+
return results;
|
|
45
|
+
}
|
|
46
|
+
|
|
26
47
|
/**
|
|
27
48
|
* Read text from a PDF file
|
|
28
49
|
* @param {string} pdfPath - Path to PDF file
|
|
@@ -31,45 +52,60 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
|
|
|
31
52
|
* @returns {Promise<string[]>} Array of text per page
|
|
32
53
|
*/
|
|
33
54
|
async function read_pdf(pdfPath, modelPath = null, charsetPath = null) {
|
|
34
|
-
const
|
|
55
|
+
const pdfImgConvert = require('pdf-img-convert');
|
|
35
56
|
const path = require('path');
|
|
36
57
|
const fs = require('fs');
|
|
37
|
-
const os = require('os');
|
|
38
58
|
|
|
59
|
+
// Initialize OCR
|
|
39
60
|
const ocr = new MonOCR(modelPath, charsetPath);
|
|
40
61
|
await ocr.init();
|
|
41
62
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
63
|
+
try {
|
|
64
|
+
// Convert PDF to image buffers (returns Uint8Array[])
|
|
65
|
+
// pdf-img-convert handles parsing internally using pdf.js
|
|
66
|
+
const imageBuffers = await pdfImgConvert.convert(pdfPath, {
|
|
67
|
+
width: 2480, // High resolution for OCR
|
|
68
|
+
height: 3508,
|
|
69
|
+
page_numbers: [] // All pages
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
if (!imageBuffers || imageBuffers.length === 0) {
|
|
73
|
+
throw new Error("Failed to convert PDF: No images generated");
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const pages = [];
|
|
77
|
+
|
|
78
|
+
for (let i = 0; i < imageBuffers.length; i++) {
|
|
79
|
+
// pdf-img-convert returns Uint8Array (buffer-like)
|
|
80
|
+
// MonOCR's predictPage expects a file path or sharp-compatible input
|
|
81
|
+
// sharp can take a Buffer.
|
|
82
|
+
const buffer = Buffer.from(imageBuffers[i]);
|
|
83
|
+
|
|
84
|
+
const results = await ocr.predictPage(buffer);
|
|
61
85
|
const pageText = results.map(r => r.text).join('\n');
|
|
62
86
|
pages.push(pageText);
|
|
63
|
-
pageNum++;
|
|
64
|
-
} catch (err) {
|
|
65
|
-
break;
|
|
66
87
|
}
|
|
88
|
+
|
|
89
|
+
return pages;
|
|
90
|
+
} catch (err) {
|
|
91
|
+
throw new Error(`Failed to process PDF: ${err.message}`);
|
|
67
92
|
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Read text from multiple PDF files
|
|
97
|
+
* @param {string[]} pdfPaths - Array of paths to PDF files
|
|
98
|
+
* @param {string} modelPath - Path to ONNX model (optional)
|
|
99
|
+
* @param {string} charsetPath - Path to charset file (optional)
|
|
100
|
+
* @returns {Promise<string[][]>} Array of arrays of text per page
|
|
101
|
+
*/
|
|
102
|
+
async function read_pdfs(pdfPaths, modelPath = null, charsetPath = null) {
|
|
103
|
+
const results = [];
|
|
104
|
+
for (const path of pdfPaths) {
|
|
105
|
+
const pages = await read_pdf(path, modelPath, charsetPath);
|
|
106
|
+
results.push(pages);
|
|
107
|
+
}
|
|
108
|
+
return results;
|
|
73
109
|
}
|
|
74
110
|
|
|
75
111
|
/**
|
package/src/utils.js
CHANGED
|
@@ -6,18 +6,38 @@
|
|
|
6
6
|
*/
|
|
7
7
|
function calculateAccuracy(predicted, groundTruth) {
|
|
8
8
|
if (!groundTruth) return 0;
|
|
9
|
+
if (!predicted) return 0;
|
|
9
10
|
|
|
10
|
-
const
|
|
11
|
-
|
|
11
|
+
const s1 = predicted;
|
|
12
|
+
const s2 = groundTruth;
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
const track = Array(s2.length + 1).fill(null).map(() =>
|
|
15
|
+
Array(s1.length + 1).fill(null));
|
|
16
|
+
|
|
17
|
+
for (let i = 0; i <= s1.length; i += 1) {
|
|
18
|
+
track[0][i] = i;
|
|
19
|
+
}
|
|
20
|
+
for (let j = 0; j <= s2.length; j += 1) {
|
|
21
|
+
track[j][0] = j;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
for (let j = 1; j <= s2.length; j += 1) {
|
|
25
|
+
for (let i = 1; i <= s1.length; i += 1) {
|
|
26
|
+
const indicator = s1[i - 1] === s2[j - 1] ? 0 : 1;
|
|
27
|
+
track[j][i] = Math.min(
|
|
28
|
+
track[j][i - 1] + 1, // deletion
|
|
29
|
+
track[j - 1][i] + 1, // insertion
|
|
30
|
+
track[j - 1][i - 1] + indicator, // substitution
|
|
31
|
+
);
|
|
17
32
|
}
|
|
18
33
|
}
|
|
19
34
|
|
|
20
|
-
|
|
35
|
+
const distance = track[s2.length][s1.length];
|
|
36
|
+
const maxLen = Math.max(s1.length, s2.length);
|
|
37
|
+
|
|
38
|
+
if (maxLen === 0) return 100;
|
|
39
|
+
|
|
40
|
+
return ((1 - distance / maxLen) * 100).toFixed(2);
|
|
21
41
|
}
|
|
22
42
|
|
|
23
43
|
module.exports = {
|