monocr 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -7
- package/bin/monocr.js +20 -6
- package/package.json +1 -1
- package/src/index.js +9 -9
- package/src/model-manager.js +138 -0
- package/src/monocr.js +11 -1
package/README.md
CHANGED
|
@@ -13,21 +13,22 @@ npm install monocr
|
|
|
13
13
|
```javascript
|
|
14
14
|
const { read_image } = require("monocr");
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
// Auto-downloads model on first run
|
|
17
|
+
const text = await read_image("path/to/image.jpg");
|
|
17
18
|
console.log(text);
|
|
18
19
|
```
|
|
19
20
|
|
|
20
21
|
## API
|
|
21
22
|
|
|
22
|
-
### read_image(imagePath, modelPath, charsetPath)
|
|
23
|
+
### read_image(imagePath, [modelPath], [charsetPath])
|
|
23
24
|
|
|
24
25
|
Recognize text from an image file.
|
|
25
26
|
|
|
26
27
|
**Parameters:**
|
|
27
28
|
|
|
28
29
|
- `imagePath` - Path to image file (jpg, png)
|
|
29
|
-
- `modelPath` - Path to ONNX model
|
|
30
|
-
- `charsetPath` - Path to charset file
|
|
30
|
+
- `modelPath` - (Optional) Path to ONNX model. Defaults to auto-downloaded model.
|
|
31
|
+
- `charsetPath` - (Optional) Path to charset file. Defaults to auto-downloaded file.
|
|
31
32
|
|
|
32
33
|
**Returns:** `Promise<string>` - Recognized text
|
|
33
34
|
|
|
@@ -104,11 +105,15 @@ node examples/simple.js
|
|
|
104
105
|
|
|
105
106
|
## Model Files
|
|
106
107
|
|
|
107
|
-
|
|
108
|
+
Models are **automatically downloaded** on first use to `~/.monocr/models/`.
|
|
108
109
|
|
|
109
|
-
|
|
110
|
+
You can also trigger a manual download:
|
|
110
111
|
|
|
111
|
-
|
|
112
|
+
```bash
|
|
113
|
+
monocr download
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Can also specify custom model paths if you prefer offline usage without the default cache.
|
|
112
117
|
|
|
113
118
|
## License
|
|
114
119
|
|
package/bin/monocr.js
CHANGED
|
@@ -13,8 +13,8 @@ program
|
|
|
13
13
|
program
|
|
14
14
|
.command('image <path>')
|
|
15
15
|
.description('Recognize text from an image file')
|
|
16
|
-
.option('-m, --model <path>', 'Path to ONNX model
|
|
17
|
-
.option('-c, --charset <path>', 'Path to charset file
|
|
16
|
+
.option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
|
|
17
|
+
.option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
|
|
18
18
|
.action(async (imagePath, options) => {
|
|
19
19
|
try {
|
|
20
20
|
const text = await read_image(imagePath, options.model, options.charset);
|
|
@@ -28,8 +28,8 @@ program
|
|
|
28
28
|
program
|
|
29
29
|
.command('pdf <path>')
|
|
30
30
|
.description('Recognize text from a PDF file')
|
|
31
|
-
.option('-m, --model <path>', 'Path to ONNX model
|
|
32
|
-
.option('-c, --charset <path>', 'Path to charset file
|
|
31
|
+
.option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
|
|
32
|
+
.option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
|
|
33
33
|
.action(async (pdfPath, options) => {
|
|
34
34
|
try {
|
|
35
35
|
const pages = await read_pdf(pdfPath, options.model, options.charset);
|
|
@@ -47,8 +47,8 @@ program
|
|
|
47
47
|
program
|
|
48
48
|
.command('batch <directory>')
|
|
49
49
|
.description('Process all images in a directory')
|
|
50
|
-
.option('-m, --model <path>', 'Path to ONNX model
|
|
51
|
-
.option('-c, --charset <path>', 'Path to charset file
|
|
50
|
+
.option('-m, --model <path>', 'Path to ONNX model (optional, auto-downloads)')
|
|
51
|
+
.option('-c, --charset <path>', 'Path to charset file (optional, auto-downloads)')
|
|
52
52
|
.option('-o, --output <path>', 'Output file for results (optional)')
|
|
53
53
|
.action(async (directory, options) => {
|
|
54
54
|
try {
|
|
@@ -82,4 +82,18 @@ program
|
|
|
82
82
|
}
|
|
83
83
|
});
|
|
84
84
|
|
|
85
|
+
program
|
|
86
|
+
.command('download')
|
|
87
|
+
.description('Download model files to local cache')
|
|
88
|
+
.action(async () => {
|
|
89
|
+
try {
|
|
90
|
+
const { MonOCR } = require('../src/index');
|
|
91
|
+
const ocr = new MonOCR();
|
|
92
|
+
await ocr.modelManager.downloadModels();
|
|
93
|
+
} catch (err) {
|
|
94
|
+
console.error('Error:', err.message);
|
|
95
|
+
process.exit(1);
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
|
|
85
99
|
program.parse();
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -12,11 +12,11 @@ module.exports = {
|
|
|
12
12
|
/**
|
|
13
13
|
* Read text from an image file
|
|
14
14
|
* @param {string} imagePath - Path to image file
|
|
15
|
-
* @param {string} modelPath - Path to ONNX model
|
|
16
|
-
* @param {string} charsetPath - Path to charset file
|
|
15
|
+
* @param {string} modelPath - Path to ONNX model (optional, auto-downloads if not provided)
|
|
16
|
+
* @param {string} charsetPath - Path to charset file (optional, auto-downloads if not provided)
|
|
17
17
|
* @returns {Promise<string>} Recognized text
|
|
18
18
|
*/
|
|
19
|
-
async function read_image(imagePath, modelPath =
|
|
19
|
+
async function read_image(imagePath, modelPath = null, charsetPath = null) {
|
|
20
20
|
const ocr = new MonOCR(modelPath, charsetPath);
|
|
21
21
|
await ocr.init();
|
|
22
22
|
const results = await ocr.predictPage(imagePath);
|
|
@@ -26,11 +26,11 @@ async function read_image(imagePath, modelPath = '../model/monocr.onnx', charset
|
|
|
26
26
|
/**
|
|
27
27
|
* Read text from a PDF file
|
|
28
28
|
* @param {string} pdfPath - Path to PDF file
|
|
29
|
-
* @param {string} modelPath - Path to ONNX model
|
|
30
|
-
* @param {string} charsetPath - Path to charset file
|
|
29
|
+
* @param {string} modelPath - Path to ONNX model (optional, auto-downloads if not provided)
|
|
30
|
+
* @param {string} charsetPath - Path to charset file (optional, auto-downloads if not provided)
|
|
31
31
|
* @returns {Promise<string[]>} Array of text per page
|
|
32
32
|
*/
|
|
33
|
-
async function read_pdf(pdfPath, modelPath =
|
|
33
|
+
async function read_pdf(pdfPath, modelPath = null, charsetPath = null) {
|
|
34
34
|
const { fromPath } = require('pdf2pic');
|
|
35
35
|
const path = require('path');
|
|
36
36
|
const fs = require('fs');
|
|
@@ -76,11 +76,11 @@ async function read_pdf(pdfPath, modelPath = '../model/monocr.onnx', charsetPath
|
|
|
76
76
|
* Read text from an image with accuracy measurement
|
|
77
77
|
* @param {string} imagePath - Path to image file
|
|
78
78
|
* @param {string} groundTruth - Expected text for accuracy calculation
|
|
79
|
-
* @param {string} modelPath - Path to ONNX model
|
|
80
|
-
* @param {string} charsetPath - Path to charset file
|
|
79
|
+
* @param {string} modelPath - Path to ONNX model (optional, auto-downloads if not provided)
|
|
80
|
+
* @param {string} charsetPath - Path to charset file (optional, auto-downloads if not provided)
|
|
81
81
|
* @returns {Promise<{text: string, accuracy: number}>}
|
|
82
82
|
*/
|
|
83
|
-
async function read_image_with_accuracy(imagePath, groundTruth, modelPath =
|
|
83
|
+
async function read_image_with_accuracy(imagePath, groundTruth, modelPath = null, charsetPath = null) {
|
|
84
84
|
const text = await read_image(imagePath, modelPath, charsetPath);
|
|
85
85
|
const accuracy = calculateAccuracy(text, groundTruth);
|
|
86
86
|
return { text, accuracy: parseFloat(accuracy) };
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const os = require('os');
|
|
4
|
+
const https = require('https');
|
|
5
|
+
const { pipeline } = require('stream/promises');
|
|
6
|
+
|
|
7
|
+
class ModelManager {
|
|
8
|
+
constructor() {
|
|
9
|
+
// Default cache directory in user's home
|
|
10
|
+
this.cacheDir = path.join(os.homedir(), '.monocr', 'models');
|
|
11
|
+
|
|
12
|
+
// HuggingFace model URLs
|
|
13
|
+
this.baseUrl = 'https://huggingface.co/janakhpon/monocr/resolve/main';
|
|
14
|
+
this.modelFiles = {
|
|
15
|
+
model: 'onnx/monocr.onnx',
|
|
16
|
+
charset: 'charset.txt'
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Ensure cache directory exists
|
|
22
|
+
*/
|
|
23
|
+
ensureCacheDir() {
|
|
24
|
+
if (!fs.existsSync(this.cacheDir)) {
|
|
25
|
+
fs.mkdirSync(this.cacheDir, { recursive: true });
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Get local path for a model file
|
|
31
|
+
*/
|
|
32
|
+
getLocalPath(fileKey) {
|
|
33
|
+
return path.join(this.cacheDir, path.basename(this.modelFiles[fileKey]));
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Check if model files exist locally
|
|
38
|
+
*/
|
|
39
|
+
hasModels() {
|
|
40
|
+
const modelPath = this.getLocalPath('model');
|
|
41
|
+
const charsetPath = this.getLocalPath('charset');
|
|
42
|
+
return fs.existsSync(modelPath) && fs.existsSync(charsetPath);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Download a file from HuggingFace
|
|
47
|
+
*/
|
|
48
|
+
async downloadFile(url, destPath) {
|
|
49
|
+
return new Promise((resolve, reject) => {
|
|
50
|
+
const file = fs.createWriteStream(destPath);
|
|
51
|
+
|
|
52
|
+
https.get(url, { headers: { 'User-Agent': 'monocr-npm' } }, (response) => {
|
|
53
|
+
if (response.statusCode === 302 || response.statusCode === 301) {
|
|
54
|
+
// Follow redirect
|
|
55
|
+
https.get(response.headers.location, (redirectResponse) => {
|
|
56
|
+
const totalSize = parseInt(redirectResponse.headers['content-length'], 10);
|
|
57
|
+
let downloadedSize = 0;
|
|
58
|
+
|
|
59
|
+
redirectResponse.on('data', (chunk) => {
|
|
60
|
+
downloadedSize += chunk.length;
|
|
61
|
+
const progress = ((downloadedSize / totalSize) * 100).toFixed(1);
|
|
62
|
+
process.stdout.write(`\r Progress: ${progress}% (${(downloadedSize / 1024 / 1024).toFixed(2)} MB)`);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
redirectResponse.pipe(file);
|
|
66
|
+
|
|
67
|
+
file.on('finish', () => {
|
|
68
|
+
file.close();
|
|
69
|
+
process.stdout.write('\n');
|
|
70
|
+
resolve();
|
|
71
|
+
});
|
|
72
|
+
}).on('error', (err) => {
|
|
73
|
+
fs.unlink(destPath, () => {});
|
|
74
|
+
reject(err);
|
|
75
|
+
});
|
|
76
|
+
} else if (response.statusCode === 200) {
|
|
77
|
+
response.pipe(file);
|
|
78
|
+
file.on('finish', () => {
|
|
79
|
+
file.close();
|
|
80
|
+
resolve();
|
|
81
|
+
});
|
|
82
|
+
} else {
|
|
83
|
+
reject(new Error(`Failed to download: ${response.statusCode}`));
|
|
84
|
+
}
|
|
85
|
+
}).on('error', (err) => {
|
|
86
|
+
fs.unlink(destPath, () => {});
|
|
87
|
+
reject(err);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
file.on('error', (err) => {
|
|
91
|
+
fs.unlink(destPath, () => {});
|
|
92
|
+
reject(err);
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Download all model files
|
|
99
|
+
*/
|
|
100
|
+
async downloadModels() {
|
|
101
|
+
this.ensureCacheDir();
|
|
102
|
+
|
|
103
|
+
console.log('Downloading monocr models from HuggingFace...');
|
|
104
|
+
console.log(`Cache directory: ${this.cacheDir}\n`);
|
|
105
|
+
|
|
106
|
+
// Download model
|
|
107
|
+
const modelUrl = `${this.baseUrl}/${this.modelFiles.model}`;
|
|
108
|
+
const modelPath = this.getLocalPath('model');
|
|
109
|
+
console.log('Downloading monocr.onnx...');
|
|
110
|
+
await this.downloadFile(modelUrl, modelPath);
|
|
111
|
+
console.log('✓ Model downloaded\n');
|
|
112
|
+
|
|
113
|
+
// Download charset
|
|
114
|
+
const charsetUrl = `${this.baseUrl}/${this.modelFiles.charset}`;
|
|
115
|
+
const charsetPath = this.getLocalPath('charset');
|
|
116
|
+
console.log('Downloading charset.txt...');
|
|
117
|
+
await this.downloadFile(charsetUrl, charsetPath);
|
|
118
|
+
console.log('✓ Charset downloaded\n');
|
|
119
|
+
|
|
120
|
+
console.log('All models downloaded successfully!');
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Get model paths, downloading if needed
|
|
125
|
+
*/
|
|
126
|
+
async getModelPaths() {
|
|
127
|
+
if (!this.hasModels()) {
|
|
128
|
+
await this.downloadModels();
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return {
|
|
132
|
+
modelPath: this.getLocalPath('model'),
|
|
133
|
+
charsetPath: this.getLocalPath('charset')
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
module.exports = ModelManager;
|
package/src/monocr.js
CHANGED
|
@@ -2,14 +2,16 @@ const ort = require('onnxruntime-node');
|
|
|
2
2
|
const sharp = require('sharp');
|
|
3
3
|
const fs = require('fs');
|
|
4
4
|
const LineSegmenter = require('./segmenter');
|
|
5
|
+
const ModelManager = require('./model-manager');
|
|
5
6
|
|
|
6
7
|
class MonOCR {
|
|
7
|
-
constructor(modelPath, charsetPath) {
|
|
8
|
+
constructor(modelPath = null, charsetPath = null) {
|
|
8
9
|
this.modelPath = modelPath;
|
|
9
10
|
this.charsetPath = charsetPath;
|
|
10
11
|
this.session = null;
|
|
11
12
|
this.charset = "";
|
|
12
13
|
this.segmenter = new LineSegmenter();
|
|
14
|
+
this.modelManager = new ModelManager();
|
|
13
15
|
|
|
14
16
|
// Metadata
|
|
15
17
|
this.targetHeight = 64;
|
|
@@ -18,6 +20,14 @@ class MonOCR {
|
|
|
18
20
|
|
|
19
21
|
async init() {
|
|
20
22
|
if (this.session) return;
|
|
23
|
+
|
|
24
|
+
// If paths not provided, use auto-download
|
|
25
|
+
if (!this.modelPath || !this.charsetPath) {
|
|
26
|
+
const paths = await this.modelManager.getModelPaths();
|
|
27
|
+
this.modelPath = this.modelPath || paths.modelPath;
|
|
28
|
+
this.charsetPath = this.charsetPath || paths.charsetPath;
|
|
29
|
+
}
|
|
30
|
+
|
|
21
31
|
this.session = await ort.InferenceSession.create(this.modelPath);
|
|
22
32
|
if (this.charsetPath) {
|
|
23
33
|
this.charset = fs.readFileSync(this.charsetPath, 'utf-8').trim();
|