monocr 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -80
- package/bin/monocr.js +3 -3
- package/package.json +7 -7
- package/src/index.js +67 -31
- package/src/utils.js +27 -7
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
#
|
|
1
|
+
# MonOCR (JavaScript SDK)
|
|
2
2
|
|
|
3
|
-
Mon language OCR
|
|
3
|
+
The professional JavaScript SDK for Mon language OCR, powered by ONNX Runtime. Designed for high-performance server-side and desktop Node.js applications.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -8,112 +8,70 @@ Mon language OCR using ONNX Runtime for Node.js applications.
|
|
|
8
8
|
npm install monocr
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## Features
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
const text = await read_image("path/to/image.jpg");
|
|
18
|
-
console.log(text);
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
## API
|
|
22
|
-
|
|
23
|
-
### read_image(imagePath, [modelPath], [charsetPath])
|
|
13
|
+
- **Unified API**: Synchronized with Python and Go equivalents.
|
|
14
|
+
- **Auto-Model Management**: Leverages [MonDevHub/monocr](https://huggingface.co/janakhpon/monocr) for automated model delivery.
|
|
15
|
+
- **PDF Support**: Intelligent document segmentation and multi-page processing.
|
|
16
|
+
- **Zero Dependencies**: Core OCR logic is lean and optimized.
|
|
24
17
|
|
|
25
|
-
|
|
18
|
+
## API Reference
|
|
26
19
|
|
|
27
|
-
|
|
20
|
+
### `read_image(imagePath, [options])`
|
|
28
21
|
|
|
29
|
-
|
|
30
|
-
- `modelPath` - (Optional) Path to ONNX model. Defaults to auto-downloaded model.
|
|
31
|
-
- `charsetPath` - (Optional) Path to charset file. Defaults to auto-downloaded file.
|
|
22
|
+
Recognizes text from a single image.
|
|
32
23
|
|
|
33
|
-
|
|
24
|
+
- `imagePath`: String path to the image file.
|
|
25
|
+
- `options`: Optional overrides for model/charset paths.
|
|
26
|
+
- **Returns**: `Promise<string>`
|
|
34
27
|
|
|
35
|
-
###
|
|
28
|
+
### `read_images(imagePaths, [options])`
|
|
36
29
|
|
|
37
|
-
|
|
30
|
+
Recognizes text from multiple images.
|
|
38
31
|
|
|
39
|
-
**
|
|
32
|
+
- **Returns**: `Promise<string[]>`
|
|
40
33
|
|
|
41
|
-
|
|
42
|
-
- `modelPath` - Path to ONNX model (optional)
|
|
43
|
-
- `charsetPath` - Path to charset file (optional)
|
|
34
|
+
### `read_pdf(pdfPath, [options])`
|
|
44
35
|
|
|
45
|
-
|
|
36
|
+
Converts and recognizes text from all pages of a PDF.
|
|
46
37
|
|
|
47
|
-
|
|
38
|
+
- **Returns**: `Promise<string[]>` (Array of strings per page)
|
|
48
39
|
|
|
49
|
-
|
|
40
|
+
### `read_image_with_accuracy(imagePath, groundTruth, [options])`
|
|
50
41
|
|
|
51
|
-
|
|
42
|
+
Performs OCR and calculates Levenshtein accuracy.
|
|
52
43
|
|
|
53
|
-
- `
|
|
54
|
-
- `groundTruth` - Expected text for accuracy calculation
|
|
55
|
-
- `modelPath` - Path to ONNX model (optional)
|
|
56
|
-
- `charsetPath` - Path to charset file (optional)
|
|
44
|
+
- **Returns**: `Promise<{text: string, accuracy: number}>`
|
|
57
45
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
### MonOCR Class
|
|
61
|
-
|
|
62
|
-
For advanced usage, use the `MonOCR` class directly:
|
|
46
|
+
## Usage Example
|
|
63
47
|
|
|
64
48
|
```javascript
|
|
65
|
-
const {
|
|
66
|
-
|
|
67
|
-
const ocr = new MonOCR("model.onnx", "charset.txt");
|
|
68
|
-
await ocr.init();
|
|
49
|
+
const { read_image } = require("monocr");
|
|
69
50
|
|
|
70
|
-
|
|
71
|
-
const text = await
|
|
51
|
+
async function main() {
|
|
52
|
+
const text = await read_image("scanned_text.png");
|
|
53
|
+
console.log(text);
|
|
54
|
+
}
|
|
72
55
|
|
|
73
|
-
|
|
74
|
-
const results = await ocr.predictPage(imagePath);
|
|
56
|
+
main();
|
|
75
57
|
```
|
|
76
58
|
|
|
77
|
-
## CLI
|
|
78
|
-
|
|
79
|
-
The package includes a command-line tool:
|
|
59
|
+
## CLI Interface
|
|
80
60
|
|
|
81
61
|
```bash
|
|
82
|
-
#
|
|
83
|
-
|
|
62
|
+
# Global installation for CLI usage
|
|
63
|
+
npm install -g monocr
|
|
84
64
|
|
|
85
|
-
#
|
|
86
|
-
monocr
|
|
65
|
+
# Process an image
|
|
66
|
+
monocr image input.jpg
|
|
87
67
|
|
|
88
|
-
#
|
|
89
|
-
monocr
|
|
68
|
+
# Process a PDF
|
|
69
|
+
monocr pdf document.pdf
|
|
90
70
|
```
|
|
91
71
|
|
|
92
|
-
##
|
|
93
|
-
|
|
94
|
-
See the `examples/` directory for detailed usage examples:
|
|
95
|
-
|
|
96
|
-
- `simple.js` - Basic image OCR
|
|
97
|
-
- `with-accuracy.js` - OCR with accuracy measurement
|
|
98
|
-
- `batch.js` - Batch processing
|
|
99
|
-
|
|
100
|
-
Run examples:
|
|
101
|
-
|
|
102
|
-
```bash
|
|
103
|
-
node examples/simple.js
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
## Model Files
|
|
107
|
-
|
|
108
|
-
Models are **automatically downloaded** on first use to `~/.monocr/models/`.
|
|
109
|
-
|
|
110
|
-
You can also trigger a manual download:
|
|
111
|
-
|
|
112
|
-
```bash
|
|
113
|
-
monocr download
|
|
114
|
-
```
|
|
72
|
+
## Maintenance
|
|
115
73
|
|
|
116
|
-
|
|
74
|
+
Maintained by [MonDevHub](https://github.com/MonDevHub).
|
|
117
75
|
|
|
118
76
|
## License
|
|
119
77
|
|
package/bin/monocr.js
CHANGED
|
@@ -14,7 +14,7 @@ program
|
|
|
14
14
|
.command('image <path>')
|
|
15
15
|
.description('Recognize text from an image file')
|
|
16
16
|
.option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
|
|
17
|
-
.option('-c, --charset <path>', 'Path to charset file (optional
|
|
17
|
+
.option('-c, --charset <path>', 'Path to charset file (optional)')
|
|
18
18
|
.action(async (imagePath, options) => {
|
|
19
19
|
try {
|
|
20
20
|
const text = await read_image(imagePath, options.model, options.charset);
|
|
@@ -29,7 +29,7 @@ program
|
|
|
29
29
|
.command('pdf <path>')
|
|
30
30
|
.description('Recognize text from a PDF file')
|
|
31
31
|
.option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
|
|
32
|
-
.option('-c, --charset <path>', 'Path to charset file (optional
|
|
32
|
+
.option('-c, --charset <path>', 'Path to charset file (optional)')
|
|
33
33
|
.action(async (pdfPath, options) => {
|
|
34
34
|
try {
|
|
35
35
|
const pages = await read_pdf(pdfPath, options.model, options.charset);
|
|
@@ -48,7 +48,7 @@ program
|
|
|
48
48
|
.command('batch <directory>')
|
|
49
49
|
.description('Process all images in a directory')
|
|
50
50
|
.option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
|
|
51
|
-
.option('-c, --charset <path>', 'Path to charset file (optional
|
|
51
|
+
.option('-c, --charset <path>', 'Path to charset file (optional)')
|
|
52
52
|
.option('-o, --output <path>', 'Output file for results (optional)')
|
|
53
53
|
.action(async (directory, options) => {
|
|
54
54
|
try {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "monocr",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "Cross-platform Mon (mnw) language OCR using ONNX Runtime. Supports Node.js.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
},
|
|
9
9
|
"repository": {
|
|
10
10
|
"type": "git",
|
|
11
|
-
"url": "git+https://github.com/
|
|
11
|
+
"url": "git+https://github.com/MonDevHub/monocr-onnx.git",
|
|
12
12
|
"directory": "js"
|
|
13
13
|
},
|
|
14
14
|
"keywords": [
|
|
@@ -21,9 +21,9 @@
|
|
|
21
21
|
"author": "Janakh",
|
|
22
22
|
"license": "MIT",
|
|
23
23
|
"bugs": {
|
|
24
|
-
"url": "https://github.com/
|
|
24
|
+
"url": "https://github.com/MonDevHub/monocr-onnx/issues"
|
|
25
25
|
},
|
|
26
|
-
"homepage": "https://github.com/
|
|
26
|
+
"homepage": "https://github.com/MonDevHub/monocr-onnx/tree/main/js#readme",
|
|
27
27
|
"files": [
|
|
28
28
|
"src/",
|
|
29
29
|
"bin/",
|
|
@@ -32,11 +32,11 @@
|
|
|
32
32
|
],
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"commander": "^11.1.0",
|
|
35
|
-
"onnxruntime-node": "^1.
|
|
36
|
-
"
|
|
35
|
+
"onnxruntime-node": "^1.24.1",
|
|
36
|
+
"pdf-img-convert": "^2.0.0",
|
|
37
37
|
"sharp": "^0.32.0"
|
|
38
38
|
},
|
|
39
39
|
"publishConfig": {
|
|
40
40
|
"access": "public"
|
|
41
41
|
}
|
|
42
|
-
}
|
|
42
|
+
}
|
package/src/index.js
CHANGED
|
@@ -5,15 +5,17 @@ module.exports = {
|
|
|
5
5
|
MonOCR,
|
|
6
6
|
calculateAccuracy,
|
|
7
7
|
read_image,
|
|
8
|
+
read_images,
|
|
8
9
|
read_pdf,
|
|
10
|
+
read_pdfs,
|
|
9
11
|
read_image_with_accuracy
|
|
10
12
|
};
|
|
11
13
|
|
|
12
14
|
/**
|
|
13
15
|
* Read text from an image file
|
|
14
16
|
* @param {string} imagePath - Path to image file
|
|
15
|
-
* @param {string} modelPath - Path to ONNX model (optional
|
|
16
|
-
* @param {string} charsetPath - Path to charset file (optional
|
|
17
|
+
* @param {string} modelPath - Path to ONNX model (optional)
|
|
18
|
+
* @param {string} charsetPath - Path to charset file (optional)
|
|
17
19
|
* @returns {Promise<string>} Recognized text
|
|
18
20
|
*/
|
|
19
21
|
async function read_image(imagePath, modelPath = null, charsetPath = null) {
|
|
@@ -23,6 +25,25 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
|
|
|
23
25
|
return results.map(r => r.text).join('\n');
|
|
24
26
|
}
|
|
25
27
|
|
|
28
|
+
/**
|
|
29
|
+
* Read text from multiple image files
|
|
30
|
+
* @param {string[]} imagePaths - Array of paths to image files
|
|
31
|
+
* @param {string} modelPath - Path to ONNX model (optional)
|
|
32
|
+
* @param {string} charsetPath - Path to charset file (optional)
|
|
33
|
+
* @returns {Promise<string[]>} Array of recognized text
|
|
34
|
+
*/
|
|
35
|
+
async function read_images(imagePaths, modelPath = null, charsetPath = null) {
|
|
36
|
+
const ocr = new MonOCR(modelPath, charsetPath);
|
|
37
|
+
await ocr.init();
|
|
38
|
+
|
|
39
|
+
const results = [];
|
|
40
|
+
for (const path of imagePaths) {
|
|
41
|
+
const pageResults = await ocr.predictPage(path);
|
|
42
|
+
results.push(pageResults.map(r => r.text).join('\n'));
|
|
43
|
+
}
|
|
44
|
+
return results;
|
|
45
|
+
}
|
|
46
|
+
|
|
26
47
|
/**
|
|
27
48
|
* Read text from a PDF file
|
|
28
49
|
* @param {string} pdfPath - Path to PDF file
|
|
@@ -31,45 +52,60 @@ async function read_image(imagePath, modelPath = null, charsetPath = null) {
|
|
|
31
52
|
* @returns {Promise<string[]>} Array of text per page
|
|
32
53
|
*/
|
|
33
54
|
async function read_pdf(pdfPath, modelPath = null, charsetPath = null) {
|
|
34
|
-
const
|
|
55
|
+
const pdfImgConvert = require('pdf-img-convert');
|
|
35
56
|
const path = require('path');
|
|
36
57
|
const fs = require('fs');
|
|
37
|
-
const os = require('os');
|
|
38
58
|
|
|
59
|
+
// Initialize OCR
|
|
39
60
|
const ocr = new MonOCR(modelPath, charsetPath);
|
|
40
61
|
await ocr.init();
|
|
41
62
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
63
|
+
try {
|
|
64
|
+
// Convert PDF to image buffers (returns Uint8Array[])
|
|
65
|
+
// pdf-img-convert handles parsing internally using pdf.js
|
|
66
|
+
const imageBuffers = await pdfImgConvert.convert(pdfPath, {
|
|
67
|
+
width: 2480, // High resolution for OCR
|
|
68
|
+
height: 3508,
|
|
69
|
+
page_numbers: [] // All pages
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
if (!imageBuffers || imageBuffers.length === 0) {
|
|
73
|
+
throw new Error("Failed to convert PDF: No images generated");
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const pages = [];
|
|
77
|
+
|
|
78
|
+
for (let i = 0; i < imageBuffers.length; i++) {
|
|
79
|
+
// pdf-img-convert returns Uint8Array (buffer-like)
|
|
80
|
+
// MonOCR's predictPage expects a file path or sharp-compatible input
|
|
81
|
+
// sharp can take a Buffer.
|
|
82
|
+
const buffer = Buffer.from(imageBuffers[i]);
|
|
83
|
+
|
|
84
|
+
const results = await ocr.predictPage(buffer);
|
|
61
85
|
const pageText = results.map(r => r.text).join('\n');
|
|
62
86
|
pages.push(pageText);
|
|
63
|
-
pageNum++;
|
|
64
|
-
} catch (err) {
|
|
65
|
-
break;
|
|
66
87
|
}
|
|
88
|
+
|
|
89
|
+
return pages;
|
|
90
|
+
} catch (err) {
|
|
91
|
+
throw new Error(`Failed to process PDF: ${err.message}`);
|
|
67
92
|
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Read text from multiple PDF files
|
|
97
|
+
* @param {string[]} pdfPaths - Array of paths to PDF files
|
|
98
|
+
* @param {string} modelPath - Path to ONNX model (optional)
|
|
99
|
+
* @param {string} charsetPath - Path to charset file (optional)
|
|
100
|
+
* @returns {Promise<string[][]>} Array of arrays of text per page
|
|
101
|
+
*/
|
|
102
|
+
async function read_pdfs(pdfPaths, modelPath = null, charsetPath = null) {
|
|
103
|
+
const results = [];
|
|
104
|
+
for (const path of pdfPaths) {
|
|
105
|
+
const pages = await read_pdf(path, modelPath, charsetPath);
|
|
106
|
+
results.push(pages);
|
|
107
|
+
}
|
|
108
|
+
return results;
|
|
73
109
|
}
|
|
74
110
|
|
|
75
111
|
/**
|
package/src/utils.js
CHANGED
|
@@ -6,18 +6,38 @@
|
|
|
6
6
|
*/
|
|
7
7
|
function calculateAccuracy(predicted, groundTruth) {
|
|
8
8
|
if (!groundTruth) return 0;
|
|
9
|
+
if (!predicted) return 0;
|
|
9
10
|
|
|
10
|
-
const
|
|
11
|
-
|
|
11
|
+
const s1 = predicted;
|
|
12
|
+
const s2 = groundTruth;
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
const track = Array(s2.length + 1).fill(null).map(() =>
|
|
15
|
+
Array(s1.length + 1).fill(null));
|
|
16
|
+
|
|
17
|
+
for (let i = 0; i <= s1.length; i += 1) {
|
|
18
|
+
track[0][i] = i;
|
|
19
|
+
}
|
|
20
|
+
for (let j = 0; j <= s2.length; j += 1) {
|
|
21
|
+
track[j][0] = j;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
for (let j = 1; j <= s2.length; j += 1) {
|
|
25
|
+
for (let i = 1; i <= s1.length; i += 1) {
|
|
26
|
+
const indicator = s1[i - 1] === s2[j - 1] ? 0 : 1;
|
|
27
|
+
track[j][i] = Math.min(
|
|
28
|
+
track[j][i - 1] + 1, // deletion
|
|
29
|
+
track[j - 1][i] + 1, // insertion
|
|
30
|
+
track[j - 1][i - 1] + indicator, // substitution
|
|
31
|
+
);
|
|
17
32
|
}
|
|
18
33
|
}
|
|
19
34
|
|
|
20
|
-
|
|
35
|
+
const distance = track[s2.length][s1.length];
|
|
36
|
+
const maxLen = Math.max(s1.length, s2.length);
|
|
37
|
+
|
|
38
|
+
if (maxLen === 0) return 100;
|
|
39
|
+
|
|
40
|
+
return ((1 - distance / maxLen) * 100).toFixed(2);
|
|
21
41
|
}
|
|
22
42
|
|
|
23
43
|
module.exports = {
|