mathcraft-ocr 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mathcraft_ocr-0.1.0/LICENSE +21 -0
- mathcraft_ocr-0.1.0/MANIFEST.in +14 -0
- mathcraft_ocr-0.1.0/PKG-INFO +184 -0
- mathcraft_ocr-0.1.0/README_MATHCRAFT_OCR.md +142 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/__init__.py +39 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/__main__.py +6 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/__init__.py +13 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/common.py +46 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/formula_detector.py +131 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/formula_recognizer.py +151 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/text_detector.py +57 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/text_recognizer.py +121 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/api.py +14 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/cache.py +135 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/cli.py +110 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/debug_blocks.py +202 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/doctor.py +50 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/downloader.py +97 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/errors.py +21 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/hardware.py +203 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/image.py +33 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/layout.py +892 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/manifest.py +89 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/manifests/models.v1.json +89 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/providers.py +80 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/results.py +53 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/runtime.py +535 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/serialization.py +120 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr/worker.py +131 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/PKG-INFO +184 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/SOURCES.txt +35 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/dependency_links.txt +1 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/entry_points.txt +3 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/requires.txt +19 -0
- mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/top_level.txt +1 -0
- mathcraft_ocr-0.1.0/pyproject.toml +86 -0
- mathcraft_ocr-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 MathCraft
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
include LICENSE
|
|
2
|
+
include README_MATHCRAFT_OCR.md
|
|
3
|
+
include pyproject.toml
|
|
4
|
+
graft mathcraft_ocr
|
|
5
|
+
prune build
|
|
6
|
+
prune dist
|
|
7
|
+
prune docs
|
|
8
|
+
prune release_assets
|
|
9
|
+
prune scripts
|
|
10
|
+
prune src
|
|
11
|
+
prune test
|
|
12
|
+
prune test_pdf
|
|
13
|
+
global-exclude __pycache__
|
|
14
|
+
global-exclude *.py[cod]
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mathcraft-ocr
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ONNX-only OCR runtime for mathematical documents
|
|
5
|
+
Author: SakuraMathcraft
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/SakuraMathcraft/LaTeXSnipper
|
|
8
|
+
Project-URL: Models, https://github.com/SakuraMathcraft/MathCraft-Models
|
|
9
|
+
Project-URL: Issues, https://github.com/SakuraMathcraft/LaTeXSnipper/issues
|
|
10
|
+
Keywords: ocr,latex,math,onnx,document-ocr,formula-recognition
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
21
|
+
Classifier: Topic :: Text Processing
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: numpy<3,>=1.26
|
|
26
|
+
Requires-Dist: pillow<12,>=10
|
|
27
|
+
Requires-Dist: opencv-python<5,>=4.10
|
|
28
|
+
Requires-Dist: rapidocr==3.5.0
|
|
29
|
+
Requires-Dist: transformers==4.55.4
|
|
30
|
+
Requires-Dist: tokenizers==0.21.4
|
|
31
|
+
Requires-Dist: requests<3,>=2.31
|
|
32
|
+
Requires-Dist: packaging>=23
|
|
33
|
+
Provides-Extra: cpu
|
|
34
|
+
Requires-Dist: onnxruntime~=1.19.2; extra == "cpu"
|
|
35
|
+
Provides-Extra: gpu
|
|
36
|
+
Requires-Dist: onnxruntime-gpu~=1.19.2; extra == "gpu"
|
|
37
|
+
Provides-Extra: dev
|
|
38
|
+
Requires-Dist: ruff>=0.8; extra == "dev"
|
|
39
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
40
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
41
|
+
Dynamic: license-file
|
|
42
|
+
|
|
43
|
+
# MathCraft OCR
|
|
44
|
+
|
|
45
|
+
MathCraft OCR is an ONNX-only OCR runtime for mathematical documents. It provides formula recognition, text recognition, mixed text/formula page OCR, explicit model-cache management, and structured block output for downstream Markdown or TeX document engines.
|
|
46
|
+
|
|
47
|
+
The package is developed for LaTeXSnipper but is usable as a standalone Python library.
|
|
48
|
+
|
|
49
|
+
## Features
|
|
50
|
+
|
|
51
|
+
- ONNX Runtime inference only; no active PyTorch OCR runtime.
|
|
52
|
+
- Formula OCR: image to LaTeX.
|
|
53
|
+
- Text OCR: multilingual PP-OCRv5 mobile detector/recognizer.
|
|
54
|
+
- Mixed OCR: formula detection, text masking, batched recognition, and layout merge.
|
|
55
|
+
- Manifest-driven model cache with SHA-256 file checks.
|
|
56
|
+
- Automatic repair for missing or incomplete model directories.
|
|
57
|
+
- CPU/GPU provider selection through ONNX Runtime.
|
|
58
|
+
- JSONL worker mode for GUI or service integration.
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
CPU backend:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install "mathcraft-ocr[cpu]"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
GPU backend:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install "mathcraft-ocr[gpu]"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Install only one backend extra in a clean environment. `onnxruntime` and `onnxruntime-gpu` should not be mixed in the same environment.
|
|
75
|
+
|
|
76
|
+
## Quick Start
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from mathcraft_ocr import MathCraftRuntime
|
|
80
|
+
|
|
81
|
+
runtime = MathCraftRuntime(provider_preference="auto")
|
|
82
|
+
result = runtime.recognize_mixed("page.png")
|
|
83
|
+
|
|
84
|
+
print(result.text)
|
|
85
|
+
for block in result.blocks:
|
|
86
|
+
print(block.role, block.kind, block.text[:80])
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Formula-only recognition:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from mathcraft_ocr import MathCraftRuntime
|
|
93
|
+
|
|
94
|
+
runtime = MathCraftRuntime(provider_preference="cpu")
|
|
95
|
+
formula = runtime.recognize_formula("formula.png")
|
|
96
|
+
print(formula.text)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## CLI
|
|
100
|
+
|
|
101
|
+
Check model cache:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
mathcraft models check
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Inspect runtime:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
mathcraft doctor --provider auto
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Warm up models:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
mathcraft warmup --profile mixed --provider auto
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Recognize an image:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
mathcraft ocr page.png --profile mixed --provider auto --output result.md
|
|
123
|
+
mathcraft ocr formula.png --profile formula --json
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Run JSONL worker mode:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
mathcraft worker --provider auto
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Model Cache
|
|
133
|
+
|
|
134
|
+
MathCraft reads models from:
|
|
135
|
+
|
|
136
|
+
```text
|
|
137
|
+
%APPDATA%\MathCraft\models
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
or from a custom root:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
set MATHCRAFT_HOME=D:\MathCraft\models
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Model artifacts are downloaded from the MathCraft-Models release assets declared in `mathcraft_ocr/manifests/models.v1.json`.
|
|
147
|
+
|
|
148
|
+
## Runtime Profiles
|
|
149
|
+
|
|
150
|
+
| Profile | Models | Output |
|
|
151
|
+
| --- | --- | --- |
|
|
152
|
+
| `formula` | formula detector + formula recognizer | LaTeX string |
|
|
153
|
+
| `text` | text detector + text recognizer | OCR text and text blocks |
|
|
154
|
+
| `mixed` | formula detector + formula recognizer + text detector + text recognizer | Markdown-ready structured blocks |
|
|
155
|
+
|
|
156
|
+
## Provider Selection
|
|
157
|
+
|
|
158
|
+
`provider_preference` accepts:
|
|
159
|
+
|
|
160
|
+
- `auto`: prefer CUDA when available and valid, otherwise CPU.
|
|
161
|
+
- `cpu`: force CPU.
|
|
162
|
+
- `gpu`: request CUDA-capable ONNX Runtime.
|
|
163
|
+
|
|
164
|
+
The actual provider is available on results through the `provider` field.
|
|
165
|
+
|
|
166
|
+
## Development
|
|
167
|
+
|
|
168
|
+
Run tests from the repository root:
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
python test/test_mathcraft_ocr.py
|
|
172
|
+
python test/test_mathcraft_document_engine.py
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Build package artifacts:
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
python -m pip wheel . --no-deps -w release_assets/mathcraft-ocr-package/dist
|
|
179
|
+
python -m build --outdir release_assets/mathcraft-ocr-package/dist
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## License
|
|
183
|
+
|
|
184
|
+
MIT. See `LICENSE`.
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# MathCraft OCR
|
|
2
|
+
|
|
3
|
+
MathCraft OCR is an ONNX-only OCR runtime for mathematical documents. It provides formula recognition, text recognition, mixed text/formula page OCR, explicit model-cache management, and structured block output for downstream Markdown or TeX document engines.
|
|
4
|
+
|
|
5
|
+
The package is developed for LaTeXSnipper but is usable as a standalone Python library.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- ONNX Runtime inference only; no active PyTorch OCR runtime.
|
|
10
|
+
- Formula OCR: image to LaTeX.
|
|
11
|
+
- Text OCR: multilingual PP-OCRv5 mobile detector/recognizer.
|
|
12
|
+
- Mixed OCR: formula detection, text masking, batched recognition, and layout merge.
|
|
13
|
+
- Manifest-driven model cache with SHA-256 file checks.
|
|
14
|
+
- Automatic repair for missing or incomplete model directories.
|
|
15
|
+
- CPU/GPU provider selection through ONNX Runtime.
|
|
16
|
+
- JSONL worker mode for GUI or service integration.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
CPU backend:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install "mathcraft-ocr[cpu]"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
GPU backend:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install "mathcraft-ocr[gpu]"
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Install only one backend extra in a clean environment. `onnxruntime` and `onnxruntime-gpu` should not be mixed in the same environment.
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from mathcraft_ocr import MathCraftRuntime
|
|
38
|
+
|
|
39
|
+
runtime = MathCraftRuntime(provider_preference="auto")
|
|
40
|
+
result = runtime.recognize_mixed("page.png")
|
|
41
|
+
|
|
42
|
+
print(result.text)
|
|
43
|
+
for block in result.blocks:
|
|
44
|
+
print(block.role, block.kind, block.text[:80])
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Formula-only recognition:
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from mathcraft_ocr import MathCraftRuntime
|
|
51
|
+
|
|
52
|
+
runtime = MathCraftRuntime(provider_preference="cpu")
|
|
53
|
+
formula = runtime.recognize_formula("formula.png")
|
|
54
|
+
print(formula.text)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## CLI
|
|
58
|
+
|
|
59
|
+
Check model cache:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
mathcraft models check
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Inspect runtime:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
mathcraft doctor --provider auto
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Warm up models:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
mathcraft warmup --profile mixed --provider auto
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Recognize an image:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
mathcraft ocr page.png --profile mixed --provider auto --output result.md
|
|
81
|
+
mathcraft ocr formula.png --profile formula --json
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Run JSONL worker mode:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
mathcraft worker --provider auto
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Model Cache
|
|
91
|
+
|
|
92
|
+
MathCraft reads models from:
|
|
93
|
+
|
|
94
|
+
```text
|
|
95
|
+
%APPDATA%\MathCraft\models
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
or from a custom root:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
set MATHCRAFT_HOME=D:\MathCraft\models
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Model artifacts are downloaded from the MathCraft-Models release assets declared in `mathcraft_ocr/manifests/models.v1.json`.
|
|
105
|
+
|
|
106
|
+
## Runtime Profiles
|
|
107
|
+
|
|
108
|
+
| Profile | Models | Output |
|
|
109
|
+
| --- | --- | --- |
|
|
110
|
+
| `formula` | formula detector + formula recognizer | LaTeX string |
|
|
111
|
+
| `text` | text detector + text recognizer | OCR text and text blocks |
|
|
112
|
+
| `mixed` | formula detector + formula recognizer + text detector + text recognizer | Markdown-ready structured blocks |
|
|
113
|
+
|
|
114
|
+
## Provider Selection
|
|
115
|
+
|
|
116
|
+
`provider_preference` accepts:
|
|
117
|
+
|
|
118
|
+
- `auto`: prefer CUDA when available and valid, otherwise CPU.
|
|
119
|
+
- `cpu`: force CPU.
|
|
120
|
+
- `gpu`: request CUDA-capable ONNX Runtime.
|
|
121
|
+
|
|
122
|
+
The actual provider is available on results through the `provider` field.
|
|
123
|
+
|
|
124
|
+
## Development
|
|
125
|
+
|
|
126
|
+
Run tests from the repository root:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
python test/test_mathcraft_ocr.py
|
|
130
|
+
python test/test_mathcraft_document_engine.py
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Build package artifacts:
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
python -m pip wheel . --no-deps -w release_assets/mathcraft-ocr-package/dist
|
|
137
|
+
python -m build --outdir release_assets/mathcraft-ocr-package/dist
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## License
|
|
141
|
+
|
|
142
|
+
MIT. See `LICENSE`.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__version__ = "0.1.0"
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"DoctorReport",
|
|
9
|
+
"FormulaRecognitionResult",
|
|
10
|
+
"MathCraftBlock",
|
|
11
|
+
"MathCraftError",
|
|
12
|
+
"MathCraftRuntime",
|
|
13
|
+
"MixedRecognitionResult",
|
|
14
|
+
"OCRRegion",
|
|
15
|
+
"__version__",
|
|
16
|
+
"run_doctor",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def __getattr__(name: str) -> object:
|
|
21
|
+
if name in {
|
|
22
|
+
"FormulaRecognitionResult",
|
|
23
|
+
"MathCraftBlock",
|
|
24
|
+
"MathCraftRuntime",
|
|
25
|
+
"MixedRecognitionResult",
|
|
26
|
+
"OCRRegion",
|
|
27
|
+
}:
|
|
28
|
+
from . import api
|
|
29
|
+
|
|
30
|
+
return getattr(api, name)
|
|
31
|
+
if name in {"DoctorReport", "run_doctor"}:
|
|
32
|
+
from . import doctor
|
|
33
|
+
|
|
34
|
+
return getattr(doctor, name)
|
|
35
|
+
if name == "MathCraftError":
|
|
36
|
+
from .errors import MathCraftError
|
|
37
|
+
|
|
38
|
+
return MathCraftError
|
|
39
|
+
raise AttributeError(name)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
from .formula_detector import warmup_formula_detector
|
|
4
|
+
from .formula_recognizer import warmup_formula_recognizer
|
|
5
|
+
from .text_detector import warmup_text_detector
|
|
6
|
+
from .text_recognizer import warmup_pp_text_recognizer
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"warmup_formula_detector",
|
|
10
|
+
"warmup_formula_recognizer",
|
|
11
|
+
"warmup_text_detector",
|
|
12
|
+
"warmup_pp_text_recognizer",
|
|
13
|
+
]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
from functools import lru_cache
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from ..providers import GPU_PROVIDER_NAMES, ProviderInfo
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _ort():
|
|
13
|
+
return importlib.import_module("onnxruntime")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def session_providers(provider_info: ProviderInfo) -> list[str]:
|
|
17
|
+
available = list(provider_info.available_providers)
|
|
18
|
+
active = provider_info.active_provider
|
|
19
|
+
if active and active in GPU_PROVIDER_NAMES and "CPUExecutionProvider" in available:
|
|
20
|
+
return [active, "CPUExecutionProvider"]
|
|
21
|
+
if "CPUExecutionProvider" in available:
|
|
22
|
+
return ["CPUExecutionProvider"]
|
|
23
|
+
return available
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_session(model_path: str | Path, provider_info: ProviderInfo):
|
|
27
|
+
model_path = str(Path(model_path).resolve())
|
|
28
|
+
providers = tuple(session_providers(provider_info))
|
|
29
|
+
session = _create_session_cached(model_path, providers)
|
|
30
|
+
actual = list(session.get_providers() or [])
|
|
31
|
+
active = provider_info.active_provider
|
|
32
|
+
if active and active in GPU_PROVIDER_NAMES and active not in actual:
|
|
33
|
+
raise RuntimeError(
|
|
34
|
+
f"requested ONNX GPU provider {active}, but session providers are {actual}"
|
|
35
|
+
)
|
|
36
|
+
return session
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@lru_cache(maxsize=16)
|
|
40
|
+
def _create_session_cached(model_path: str, providers: tuple[str, ...]):
|
|
41
|
+
ort = _ort()
|
|
42
|
+
return ort.InferenceSession(model_path, providers=list(providers))
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def clear_session_cache() -> None:
|
|
46
|
+
_create_session_cached.cache_clear()
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import cv2
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from .common import create_session
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class FormulaBox:
|
|
16
|
+
box: tuple[
|
|
17
|
+
tuple[float, float],
|
|
18
|
+
tuple[float, float],
|
|
19
|
+
tuple[float, float],
|
|
20
|
+
tuple[float, float],
|
|
21
|
+
]
|
|
22
|
+
score: float
|
|
23
|
+
label: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def warmup_formula_detector(model_dir: str | Path, provider_info) -> None:
|
|
27
|
+
root = Path(model_dir)
|
|
28
|
+
candidates = sorted(root.glob("*mfd*.onnx"))
|
|
29
|
+
if not candidates:
|
|
30
|
+
raise FileNotFoundError(f"no mfd onnx file found under {root}")
|
|
31
|
+
create_session(candidates[0], provider_info)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _letterbox(image: np.ndarray, target_size: int = 768) -> tuple[np.ndarray, float, tuple[float, float]]:
|
|
35
|
+
height, width = image.shape[:2]
|
|
36
|
+
scale = min(target_size / width, target_size / height)
|
|
37
|
+
new_w = int(round(width * scale))
|
|
38
|
+
new_h = int(round(height * scale))
|
|
39
|
+
resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
|
40
|
+
canvas = np.full((target_size, target_size, 3), 114, dtype=np.uint8)
|
|
41
|
+
pad_x = (target_size - new_w) / 2
|
|
42
|
+
pad_y = (target_size - new_h) / 2
|
|
43
|
+
left = int(round(pad_x - 0.1))
|
|
44
|
+
top = int(round(pad_y - 0.1))
|
|
45
|
+
canvas[top : top + new_h, left : left + new_w] = resized
|
|
46
|
+
return canvas, scale, (float(left), float(top))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _nms_xyxy(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> list[int]:
|
|
50
|
+
if len(boxes) == 0:
|
|
51
|
+
return []
|
|
52
|
+
x1 = boxes[:, 0]
|
|
53
|
+
y1 = boxes[:, 1]
|
|
54
|
+
x2 = boxes[:, 2]
|
|
55
|
+
y2 = boxes[:, 3]
|
|
56
|
+
areas = np.maximum(0.0, x2 - x1) * np.maximum(0.0, y2 - y1)
|
|
57
|
+
order = scores.argsort()[::-1]
|
|
58
|
+
keep: list[int] = []
|
|
59
|
+
while order.size > 0:
|
|
60
|
+
current = int(order[0])
|
|
61
|
+
keep.append(current)
|
|
62
|
+
if order.size == 1:
|
|
63
|
+
break
|
|
64
|
+
rest = order[1:]
|
|
65
|
+
xx1 = np.maximum(x1[current], x1[rest])
|
|
66
|
+
yy1 = np.maximum(y1[current], y1[rest])
|
|
67
|
+
xx2 = np.minimum(x2[current], x2[rest])
|
|
68
|
+
yy2 = np.minimum(y2[current], y2[rest])
|
|
69
|
+
inter_w = np.maximum(0.0, xx2 - xx1)
|
|
70
|
+
inter_h = np.maximum(0.0, yy2 - yy1)
|
|
71
|
+
intersection = inter_w * inter_h
|
|
72
|
+
union = areas[current] + areas[rest] - intersection
|
|
73
|
+
iou = np.divide(intersection, union, out=np.zeros_like(intersection), where=union > 0)
|
|
74
|
+
order = rest[iou <= iou_threshold]
|
|
75
|
+
return keep
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def detect_formula_boxes(
|
|
79
|
+
image_rgb: np.ndarray,
|
|
80
|
+
model_dir: str | Path,
|
|
81
|
+
provider_info,
|
|
82
|
+
*,
|
|
83
|
+
confidence_threshold: float = 0.25,
|
|
84
|
+
iou_threshold: float = 0.45,
|
|
85
|
+
input_size: int = 768,
|
|
86
|
+
) -> tuple[FormulaBox, ...]:
|
|
87
|
+
root = Path(model_dir)
|
|
88
|
+
candidates = sorted(root.glob("*mfd*.onnx"))
|
|
89
|
+
if not candidates:
|
|
90
|
+
raise FileNotFoundError(f"no mfd onnx file found under {root}")
|
|
91
|
+
session = create_session(candidates[0], provider_info)
|
|
92
|
+
preprocessed, scale, (pad_x, pad_y) = _letterbox(image_rgb, input_size)
|
|
93
|
+
model_input = (
|
|
94
|
+
preprocessed.astype(np.float32).transpose(2, 0, 1)[np.newaxis, ...] / 255.0
|
|
95
|
+
)
|
|
96
|
+
output = session.run(None, {session.get_inputs()[0].name: model_input})[0]
|
|
97
|
+
preds = np.asarray(output[0]).T
|
|
98
|
+
if preds.size == 0 or preds.shape[1] < 6:
|
|
99
|
+
return ()
|
|
100
|
+
xywh = preds[:, :4]
|
|
101
|
+
class_scores = preds[:, 4:]
|
|
102
|
+
class_ids = np.argmax(class_scores, axis=1)
|
|
103
|
+
scores = class_scores[np.arange(len(class_scores)), class_ids]
|
|
104
|
+
mask = scores >= confidence_threshold
|
|
105
|
+
if not np.any(mask):
|
|
106
|
+
return ()
|
|
107
|
+
xywh = xywh[mask]
|
|
108
|
+
class_ids = class_ids[mask]
|
|
109
|
+
scores = scores[mask]
|
|
110
|
+
|
|
111
|
+
x, y, w, h = xywh[:, 0], xywh[:, 1], xywh[:, 2], xywh[:, 3]
|
|
112
|
+
boxes = np.stack([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=1)
|
|
113
|
+
boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_x) / scale
|
|
114
|
+
boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_y) / scale
|
|
115
|
+
height, width = image_rgb.shape[:2]
|
|
116
|
+
boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, width)
|
|
117
|
+
boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, height)
|
|
118
|
+
|
|
119
|
+
labels = ("embedding", "isolated")
|
|
120
|
+
keep = _nms_xyxy(boxes, scores, iou_threshold)
|
|
121
|
+
results: list[FormulaBox] = []
|
|
122
|
+
for index in keep:
|
|
123
|
+
x1, y1, x2, y2 = boxes[index].tolist()
|
|
124
|
+
results.append(
|
|
125
|
+
FormulaBox(
|
|
126
|
+
box=((x1, y1), (x2, y1), (x2, y2), (x1, y2)),
|
|
127
|
+
score=float(scores[index]),
|
|
128
|
+
label=labels[int(class_ids[index])] if int(class_ids[index]) < len(labels) else str(int(class_ids[index])),
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
return tuple(results)
|