monocr 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/monocr.js +1 -1
- package/package.json +1 -1
- package/src/charset.txt +1 -0
- package/src/model-manager.js +52 -67
- package/src/monocr.js +10 -8
package/bin/monocr.js
CHANGED
package/package.json
CHANGED
package/src/charset.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဢဣဤဥဦဧဨဩဪါာိီုူေဲဳဴဵံ့း္်ျြွှဿ၀၁၂၃၄၅၆၇၈၉၊။၌၍၎၏ၐၑၓၚၛၜၝၞၟၠၡၢၣၤၥၨၪၰၱၲၳၴၵၷၸၹၺၻၼၾၿႀႄႅႆႇႈႉႊႏ႐႒႓႔႕႘႙ႜႝ႟
|
package/src/model-manager.js
CHANGED
|
@@ -9,12 +9,10 @@ class ModelManager {
|
|
|
9
9
|
// Default cache directory in user's home
|
|
10
10
|
this.cacheDir = path.join(os.homedir(), '.monocr', 'models');
|
|
11
11
|
|
|
12
|
-
// HuggingFace model
|
|
12
|
+
// HuggingFace model URL
|
|
13
13
|
this.baseUrl = 'https://huggingface.co/janakhpon/monocr/resolve/main';
|
|
14
|
-
this.
|
|
15
|
-
|
|
16
|
-
charset: 'charset.txt'
|
|
17
|
-
};
|
|
14
|
+
this.modelFileName = 'monocr.onnx';
|
|
15
|
+
this.hfModelPath = 'onnx/monocr.onnx';
|
|
18
16
|
}
|
|
19
17
|
|
|
20
18
|
/**
|
|
@@ -27,19 +25,17 @@ class ModelManager {
|
|
|
27
25
|
}
|
|
28
26
|
|
|
29
27
|
/**
|
|
30
|
-
* Get local path for
|
|
28
|
+
* Get local path for the model
|
|
31
29
|
*/
|
|
32
|
-
|
|
33
|
-
return path.join(this.cacheDir,
|
|
30
|
+
getModelPath() {
|
|
31
|
+
return path.join(this.cacheDir, this.modelFileName);
|
|
34
32
|
}
|
|
35
33
|
|
|
36
34
|
/**
|
|
37
|
-
* Check if model
|
|
35
|
+
* Check if model exists locally
|
|
38
36
|
*/
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
const charsetPath = this.getLocalPath('charset');
|
|
42
|
-
return fs.existsSync(modelPath) && fs.existsSync(charsetPath);
|
|
37
|
+
hasModel() {
|
|
38
|
+
return fs.existsSync(this.getModelPath());
|
|
43
39
|
}
|
|
44
40
|
|
|
45
41
|
/**
|
|
@@ -49,43 +45,44 @@ class ModelManager {
|
|
|
49
45
|
return new Promise((resolve, reject) => {
|
|
50
46
|
const file = fs.createWriteStream(destPath);
|
|
51
47
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
48
|
+
const request = (requestUrl) => {
|
|
49
|
+
https.get(requestUrl, { headers: { 'User-Agent': 'monocr-npm' } }, (response) => {
|
|
50
|
+
if ([301, 302, 307, 308].includes(response.statusCode)) {
|
|
51
|
+
let redirectUrl = response.headers.location;
|
|
52
|
+
if (!redirectUrl.startsWith('http')) {
|
|
53
|
+
const originalUrl = new URL(requestUrl);
|
|
54
|
+
redirectUrl = `${originalUrl.protocol}//${originalUrl.host}${redirectUrl}`;
|
|
55
|
+
}
|
|
56
|
+
request(redirectUrl);
|
|
57
|
+
} else if (response.statusCode === 200) {
|
|
58
|
+
const totalSize = parseInt(response.headers['content-length'], 10);
|
|
57
59
|
let downloadedSize = 0;
|
|
58
60
|
|
|
59
|
-
|
|
61
|
+
response.on('data', (chunk) => {
|
|
60
62
|
downloadedSize += chunk.length;
|
|
61
|
-
|
|
62
|
-
|
|
63
|
+
if (totalSize) {
|
|
64
|
+
const progress = ((downloadedSize / totalSize) * 100).toFixed(1);
|
|
65
|
+
process.stdout.write(`\r Downloading model: ${progress}% (${(downloadedSize / 1024 / 1024).toFixed(2)} MB)`);
|
|
66
|
+
}
|
|
63
67
|
});
|
|
64
|
-
|
|
65
|
-
|
|
68
|
+
|
|
69
|
+
response.pipe(file);
|
|
66
70
|
|
|
67
71
|
file.on('finish', () => {
|
|
68
72
|
file.close();
|
|
69
73
|
process.stdout.write('\n');
|
|
70
74
|
resolve();
|
|
71
75
|
});
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
} else {
|
|
83
|
-
reject(new Error(`Failed to download: ${response.statusCode}`));
|
|
84
|
-
}
|
|
85
|
-
}).on('error', (err) => {
|
|
86
|
-
fs.unlink(destPath, () => {});
|
|
87
|
-
reject(err);
|
|
88
|
-
});
|
|
76
|
+
} else {
|
|
77
|
+
reject(new Error(`Failed to download: ${response.statusCode}`));
|
|
78
|
+
}
|
|
79
|
+
}).on('error', (err) => {
|
|
80
|
+
fs.unlink(destPath, () => {});
|
|
81
|
+
reject(err);
|
|
82
|
+
});
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
request(url);
|
|
89
86
|
|
|
90
87
|
file.on('error', (err) => {
|
|
91
88
|
fs.unlink(destPath, () => {});
|
|
@@ -95,44 +92,32 @@ class ModelManager {
|
|
|
95
92
|
}
|
|
96
93
|
|
|
97
94
|
/**
|
|
98
|
-
* Download
|
|
95
|
+
* Download model file
|
|
99
96
|
*/
|
|
100
|
-
async
|
|
97
|
+
async downloadModel() {
|
|
101
98
|
this.ensureCacheDir();
|
|
102
99
|
|
|
103
|
-
console.log('Downloading monocr
|
|
104
|
-
console.log(`Cache directory: ${this.cacheDir}
|
|
105
|
-
|
|
106
|
-
// Download model
|
|
107
|
-
const modelUrl = `${this.baseUrl}/${this.modelFiles.model}`;
|
|
108
|
-
const modelPath = this.getLocalPath('model');
|
|
109
|
-
console.log('Downloading monocr.onnx...');
|
|
110
|
-
await this.downloadFile(modelUrl, modelPath);
|
|
111
|
-
console.log('✓ Model downloaded\n');
|
|
100
|
+
console.log('Downloading monocr model from HuggingFace...');
|
|
101
|
+
console.log(`Cache directory: ${this.cacheDir}`);
|
|
112
102
|
|
|
113
|
-
|
|
114
|
-
const
|
|
115
|
-
const charsetPath = this.getLocalPath('charset');
|
|
116
|
-
console.log('Downloading charset.txt...');
|
|
117
|
-
await this.downloadFile(charsetUrl, charsetPath);
|
|
118
|
-
console.log('✓ Charset downloaded\n');
|
|
103
|
+
const modelUrl = `${this.baseUrl}/${this.hfModelPath}`;
|
|
104
|
+
const destPath = this.getModelPath();
|
|
119
105
|
|
|
120
|
-
|
|
106
|
+
await this.downloadFile(modelUrl, destPath);
|
|
107
|
+
console.log('✓ Model downloaded successfully!');
|
|
121
108
|
}
|
|
122
109
|
|
|
123
110
|
/**
|
|
124
|
-
* Get model
|
|
111
|
+
* Get model path, downloading if needed
|
|
125
112
|
*/
|
|
126
|
-
async
|
|
127
|
-
if (!this.
|
|
128
|
-
await this.
|
|
113
|
+
async ensureModel() {
|
|
114
|
+
if (!this.hasModel()) {
|
|
115
|
+
await this.downloadModel();
|
|
129
116
|
}
|
|
130
|
-
|
|
131
|
-
return {
|
|
132
|
-
modelPath: this.getLocalPath('model'),
|
|
133
|
-
charsetPath: this.getLocalPath('charset')
|
|
134
|
-
};
|
|
117
|
+
return this.getModelPath();
|
|
135
118
|
}
|
|
136
119
|
}
|
|
137
120
|
|
|
138
121
|
module.exports = ModelManager;
|
|
122
|
+
|
|
123
|
+
module.exports = ModelManager;
|
package/src/monocr.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const ort = require('onnxruntime-node');
|
|
2
2
|
const sharp = require('sharp');
|
|
3
3
|
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
4
5
|
const LineSegmenter = require('./segmenter');
|
|
5
6
|
const ModelManager = require('./model-manager');
|
|
6
7
|
|
|
@@ -21,17 +22,18 @@ class MonOCR {
|
|
|
21
22
|
async init() {
|
|
22
23
|
if (this.session) return;
|
|
23
24
|
|
|
24
|
-
//
|
|
25
|
-
if (!this.modelPath
|
|
26
|
-
|
|
27
|
-
this.modelPath = this.modelPath || paths.modelPath;
|
|
28
|
-
this.charsetPath = this.charsetPath || paths.charsetPath;
|
|
25
|
+
// Ensure model exists
|
|
26
|
+
if (!this.modelPath) {
|
|
27
|
+
this.modelPath = await this.modelManager.ensureModel();
|
|
29
28
|
}
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
if (this.charsetPath) {
|
|
33
|
-
this.
|
|
30
|
+
// Use bundled charset if not provided
|
|
31
|
+
if (!this.charsetPath) {
|
|
32
|
+
this.charsetPath = path.join(__dirname, 'charset.txt');
|
|
34
33
|
}
|
|
34
|
+
|
|
35
|
+
this.session = await ort.InferenceSession.create(this.modelPath);
|
|
36
|
+
this.charset = fs.readFileSync(this.charsetPath, 'utf-8').trim();
|
|
35
37
|
}
|
|
36
38
|
|
|
37
39
|
/**
|