mathcraft-ocr 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. mathcraft_ocr-0.1.0/LICENSE +21 -0
  2. mathcraft_ocr-0.1.0/MANIFEST.in +14 -0
  3. mathcraft_ocr-0.1.0/PKG-INFO +184 -0
  4. mathcraft_ocr-0.1.0/README_MATHCRAFT_OCR.md +142 -0
  5. mathcraft_ocr-0.1.0/mathcraft_ocr/__init__.py +39 -0
  6. mathcraft_ocr-0.1.0/mathcraft_ocr/__main__.py +6 -0
  7. mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/__init__.py +13 -0
  8. mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/common.py +46 -0
  9. mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/formula_detector.py +131 -0
  10. mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/formula_recognizer.py +151 -0
  11. mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/text_detector.py +57 -0
  12. mathcraft_ocr-0.1.0/mathcraft_ocr/adapters/text_recognizer.py +121 -0
  13. mathcraft_ocr-0.1.0/mathcraft_ocr/api.py +14 -0
  14. mathcraft_ocr-0.1.0/mathcraft_ocr/cache.py +135 -0
  15. mathcraft_ocr-0.1.0/mathcraft_ocr/cli.py +110 -0
  16. mathcraft_ocr-0.1.0/mathcraft_ocr/debug_blocks.py +202 -0
  17. mathcraft_ocr-0.1.0/mathcraft_ocr/doctor.py +50 -0
  18. mathcraft_ocr-0.1.0/mathcraft_ocr/downloader.py +97 -0
  19. mathcraft_ocr-0.1.0/mathcraft_ocr/errors.py +21 -0
  20. mathcraft_ocr-0.1.0/mathcraft_ocr/hardware.py +203 -0
  21. mathcraft_ocr-0.1.0/mathcraft_ocr/image.py +33 -0
  22. mathcraft_ocr-0.1.0/mathcraft_ocr/layout.py +892 -0
  23. mathcraft_ocr-0.1.0/mathcraft_ocr/manifest.py +89 -0
  24. mathcraft_ocr-0.1.0/mathcraft_ocr/manifests/models.v1.json +89 -0
  25. mathcraft_ocr-0.1.0/mathcraft_ocr/providers.py +80 -0
  26. mathcraft_ocr-0.1.0/mathcraft_ocr/results.py +53 -0
  27. mathcraft_ocr-0.1.0/mathcraft_ocr/runtime.py +535 -0
  28. mathcraft_ocr-0.1.0/mathcraft_ocr/serialization.py +120 -0
  29. mathcraft_ocr-0.1.0/mathcraft_ocr/worker.py +131 -0
  30. mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/PKG-INFO +184 -0
  31. mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/SOURCES.txt +35 -0
  32. mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/dependency_links.txt +1 -0
  33. mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/entry_points.txt +3 -0
  34. mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/requires.txt +19 -0
  35. mathcraft_ocr-0.1.0/mathcraft_ocr.egg-info/top_level.txt +1 -0
  36. mathcraft_ocr-0.1.0/pyproject.toml +86 -0
  37. mathcraft_ocr-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 MathCraft
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,14 @@
1
+ include LICENSE
2
+ include README_MATHCRAFT_OCR.md
3
+ include pyproject.toml
4
+ graft mathcraft_ocr
5
+ prune build
6
+ prune dist
7
+ prune docs
8
+ prune release_assets
9
+ prune scripts
10
+ prune src
11
+ prune test
12
+ prune test_pdf
13
+ global-exclude __pycache__
14
+ global-exclude *.py[cod]
@@ -0,0 +1,184 @@
1
+ Metadata-Version: 2.4
2
+ Name: mathcraft-ocr
3
+ Version: 0.1.0
4
+ Summary: ONNX-only OCR runtime for mathematical documents
5
+ Author: SakuraMathcraft
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/SakuraMathcraft/LaTeXSnipper
8
+ Project-URL: Models, https://github.com/SakuraMathcraft/MathCraft-Models
9
+ Project-URL: Issues, https://github.com/SakuraMathcraft/LaTeXSnipper/issues
10
+ Keywords: ocr,latex,math,onnx,document-ocr,formula-recognition
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Operating System :: Microsoft :: Windows
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
21
+ Classifier: Topic :: Text Processing
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: numpy<3,>=1.26
26
+ Requires-Dist: pillow<12,>=10
27
+ Requires-Dist: opencv-python<5,>=4.10
28
+ Requires-Dist: rapidocr==3.5.0
29
+ Requires-Dist: transformers==4.55.4
30
+ Requires-Dist: tokenizers==0.21.4
31
+ Requires-Dist: requests<3,>=2.31
32
+ Requires-Dist: packaging>=23
33
+ Provides-Extra: cpu
34
+ Requires-Dist: onnxruntime~=1.19.2; extra == "cpu"
35
+ Provides-Extra: gpu
36
+ Requires-Dist: onnxruntime-gpu~=1.19.2; extra == "gpu"
37
+ Provides-Extra: dev
38
+ Requires-Dist: ruff>=0.8; extra == "dev"
39
+ Requires-Dist: build>=1.2; extra == "dev"
40
+ Requires-Dist: twine>=5.0; extra == "dev"
41
+ Dynamic: license-file
42
+
43
+ # MathCraft OCR
44
+
45
+ MathCraft OCR is an ONNX-only OCR runtime for mathematical documents. It provides formula recognition, text recognition, mixed text/formula page OCR, explicit model-cache management, and structured block output for downstream Markdown or TeX document engines.
46
+
47
+ The package is developed for LaTeXSnipper but is usable as a standalone Python library.
48
+
49
+ ## Features
50
+
51
+ - ONNX Runtime inference only; no active PyTorch OCR runtime.
52
+ - Formula OCR: image to LaTeX.
53
+ - Text OCR: multilingual PP-OCRv5 mobile detector/recognizer.
54
+ - Mixed OCR: formula detection, text masking, batched recognition, and layout merge.
55
+ - Manifest-driven model cache with SHA-256 file checks.
56
+ - Automatic repair for missing or incomplete model directories.
57
+ - CPU/GPU provider selection through ONNX Runtime.
58
+ - JSONL worker mode for GUI or service integration.
59
+
60
+ ## Installation
61
+
62
+ CPU backend:
63
+
64
+ ```bash
65
+ pip install "mathcraft-ocr[cpu]"
66
+ ```
67
+
68
+ GPU backend:
69
+
70
+ ```bash
71
+ pip install "mathcraft-ocr[gpu]"
72
+ ```
73
+
74
+ Install only one backend extra in a clean environment. `onnxruntime` and `onnxruntime-gpu` should not be mixed in the same environment.
75
+
76
+ ## Quick Start
77
+
78
+ ```python
79
+ from mathcraft_ocr import MathCraftRuntime
80
+
81
+ runtime = MathCraftRuntime(provider_preference="auto")
82
+ result = runtime.recognize_mixed("page.png")
83
+
84
+ print(result.text)
85
+ for block in result.blocks:
86
+ print(block.role, block.kind, block.text[:80])
87
+ ```
88
+
89
+ Formula-only recognition:
90
+
91
+ ```python
92
+ from mathcraft_ocr import MathCraftRuntime
93
+
94
+ runtime = MathCraftRuntime(provider_preference="cpu")
95
+ formula = runtime.recognize_formula("formula.png")
96
+ print(formula.text)
97
+ ```
98
+
99
+ ## CLI
100
+
101
+ Check model cache:
102
+
103
+ ```bash
104
+ mathcraft models check
105
+ ```
106
+
107
+ Inspect runtime:
108
+
109
+ ```bash
110
+ mathcraft doctor --provider auto
111
+ ```
112
+
113
+ Warm up models:
114
+
115
+ ```bash
116
+ mathcraft warmup --profile mixed --provider auto
117
+ ```
118
+
119
+ Recognize an image:
120
+
121
+ ```bash
122
+ mathcraft ocr page.png --profile mixed --provider auto --output result.md
123
+ mathcraft ocr formula.png --profile formula --json
124
+ ```
125
+
126
+ Run JSONL worker mode:
127
+
128
+ ```bash
129
+ mathcraft worker --provider auto
130
+ ```
131
+
132
+ ## Model Cache
133
+
134
+ MathCraft reads models from:
135
+
136
+ ```text
137
+ %APPDATA%\MathCraft\models
138
+ ```
139
+
140
+ or from a custom root:
141
+
142
+ ```bash
143
+ set MATHCRAFT_HOME=D:\MathCraft\models
144
+ ```
145
+
146
+ Model artifacts are downloaded from the MathCraft-Models release assets declared in `mathcraft_ocr/manifests/models.v1.json`.
147
+
148
+ ## Runtime Profiles
149
+
150
+ | Profile | Models | Output |
151
+ | --- | --- | --- |
152
+ | `formula` | formula detector + formula recognizer | LaTeX string |
153
+ | `text` | text detector + text recognizer | OCR text and text blocks |
154
+ | `mixed` | formula detector + formula recognizer + text detector + text recognizer | Markdown-ready structured blocks |
155
+
156
+ ## Provider Selection
157
+
158
+ `provider_preference` accepts:
159
+
160
+ - `auto`: prefer CUDA when available and valid, otherwise CPU.
161
+ - `cpu`: force CPU.
162
+ - `gpu`: request CUDA-capable ONNX Runtime.
163
+
164
+ The actual provider is available on results through the `provider` field.
165
+
166
+ ## Development
167
+
168
+ Run tests from the repository root:
169
+
170
+ ```bash
171
+ python test/test_mathcraft_ocr.py
172
+ python test/test_mathcraft_document_engine.py
173
+ ```
174
+
175
+ Build package artifacts:
176
+
177
+ ```bash
178
+ python -m pip wheel . --no-deps -w release_assets/mathcraft-ocr-package/dist
179
+ python -m build --outdir release_assets/mathcraft-ocr-package/dist
180
+ ```
181
+
182
+ ## License
183
+
184
+ MIT. See `LICENSE`.
@@ -0,0 +1,142 @@
1
+ # MathCraft OCR
2
+
3
+ MathCraft OCR is an ONNX-only OCR runtime for mathematical documents. It provides formula recognition, text recognition, mixed text/formula page OCR, explicit model-cache management, and structured block output for downstream Markdown or TeX document engines.
4
+
5
+ The package is developed for LaTeXSnipper but is usable as a standalone Python library.
6
+
7
+ ## Features
8
+
9
+ - ONNX Runtime inference only; no active PyTorch OCR runtime.
10
+ - Formula OCR: image to LaTeX.
11
+ - Text OCR: multilingual PP-OCRv5 mobile detector/recognizer.
12
+ - Mixed OCR: formula detection, text masking, batched recognition, and layout merge.
13
+ - Manifest-driven model cache with SHA-256 file checks.
14
+ - Automatic repair for missing or incomplete model directories.
15
+ - CPU/GPU provider selection through ONNX Runtime.
16
+ - JSONL worker mode for GUI or service integration.
17
+
18
+ ## Installation
19
+
20
+ CPU backend:
21
+
22
+ ```bash
23
+ pip install "mathcraft-ocr[cpu]"
24
+ ```
25
+
26
+ GPU backend:
27
+
28
+ ```bash
29
+ pip install "mathcraft-ocr[gpu]"
30
+ ```
31
+
32
+ Install only one backend extra in a clean environment. `onnxruntime` and `onnxruntime-gpu` should not be mixed in the same environment.
33
+
34
+ ## Quick Start
35
+
36
+ ```python
37
+ from mathcraft_ocr import MathCraftRuntime
38
+
39
+ runtime = MathCraftRuntime(provider_preference="auto")
40
+ result = runtime.recognize_mixed("page.png")
41
+
42
+ print(result.text)
43
+ for block in result.blocks:
44
+ print(block.role, block.kind, block.text[:80])
45
+ ```
46
+
47
+ Formula-only recognition:
48
+
49
+ ```python
50
+ from mathcraft_ocr import MathCraftRuntime
51
+
52
+ runtime = MathCraftRuntime(provider_preference="cpu")
53
+ formula = runtime.recognize_formula("formula.png")
54
+ print(formula.text)
55
+ ```
56
+
57
+ ## CLI
58
+
59
+ Check model cache:
60
+
61
+ ```bash
62
+ mathcraft models check
63
+ ```
64
+
65
+ Inspect runtime:
66
+
67
+ ```bash
68
+ mathcraft doctor --provider auto
69
+ ```
70
+
71
+ Warm up models:
72
+
73
+ ```bash
74
+ mathcraft warmup --profile mixed --provider auto
75
+ ```
76
+
77
+ Recognize an image:
78
+
79
+ ```bash
80
+ mathcraft ocr page.png --profile mixed --provider auto --output result.md
81
+ mathcraft ocr formula.png --profile formula --json
82
+ ```
83
+
84
+ Run JSONL worker mode:
85
+
86
+ ```bash
87
+ mathcraft worker --provider auto
88
+ ```
89
+
90
+ ## Model Cache
91
+
92
+ MathCraft reads models from:
93
+
94
+ ```text
95
+ %APPDATA%\MathCraft\models
96
+ ```
97
+
98
+ or from a custom root:
99
+
100
+ ```bash
101
+ set MATHCRAFT_HOME=D:\MathCraft\models
102
+ ```
103
+
104
+ Model artifacts are downloaded from the MathCraft-Models release assets declared in `mathcraft_ocr/manifests/models.v1.json`.
105
+
106
+ ## Runtime Profiles
107
+
108
+ | Profile | Models | Output |
109
+ | --- | --- | --- |
110
+ | `formula` | formula detector + formula recognizer | LaTeX string |
111
+ | `text` | text detector + text recognizer | OCR text and text blocks |
112
+ | `mixed` | formula detector + formula recognizer + text detector + text recognizer | Markdown-ready structured blocks |
113
+
114
+ ## Provider Selection
115
+
116
+ `provider_preference` accepts:
117
+
118
+ - `auto`: prefer CUDA when available and valid, otherwise CPU.
119
+ - `cpu`: force CPU.
120
+ - `gpu`: request CUDA-capable ONNX Runtime.
121
+
122
+ The actual provider is available on results through the `provider` field.
123
+
124
+ ## Development
125
+
126
+ Run tests from the repository root:
127
+
128
+ ```bash
129
+ python test/test_mathcraft_ocr.py
130
+ python test/test_mathcraft_document_engine.py
131
+ ```
132
+
133
+ Build package artifacts:
134
+
135
+ ```bash
136
+ python -m pip wheel . --no-deps -w release_assets/mathcraft-ocr-package/dist
137
+ python -m build --outdir release_assets/mathcraft-ocr-package/dist
138
+ ```
139
+
140
+ ## License
141
+
142
+ MIT. See `LICENSE`.
@@ -0,0 +1,39 @@
1
+ # coding: utf-8
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.1.0"
6
+
7
+ __all__ = [
8
+ "DoctorReport",
9
+ "FormulaRecognitionResult",
10
+ "MathCraftBlock",
11
+ "MathCraftError",
12
+ "MathCraftRuntime",
13
+ "MixedRecognitionResult",
14
+ "OCRRegion",
15
+ "__version__",
16
+ "run_doctor",
17
+ ]
18
+
19
+
20
+ def __getattr__(name: str) -> object:
21
+ if name in {
22
+ "FormulaRecognitionResult",
23
+ "MathCraftBlock",
24
+ "MathCraftRuntime",
25
+ "MixedRecognitionResult",
26
+ "OCRRegion",
27
+ }:
28
+ from . import api
29
+
30
+ return getattr(api, name)
31
+ if name in {"DoctorReport", "run_doctor"}:
32
+ from . import doctor
33
+
34
+ return getattr(doctor, name)
35
+ if name == "MathCraftError":
36
+ from .errors import MathCraftError
37
+
38
+ return MathCraftError
39
+ raise AttributeError(name)
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+
3
+ from .cli import main
4
+
5
+
6
+ raise SystemExit(main())
@@ -0,0 +1,13 @@
1
+ # coding: utf-8
2
+
3
+ from .formula_detector import warmup_formula_detector
4
+ from .formula_recognizer import warmup_formula_recognizer
5
+ from .text_detector import warmup_text_detector
6
+ from .text_recognizer import warmup_pp_text_recognizer
7
+
8
+ __all__ = [
9
+ "warmup_formula_detector",
10
+ "warmup_formula_recognizer",
11
+ "warmup_text_detector",
12
+ "warmup_pp_text_recognizer",
13
+ ]
@@ -0,0 +1,46 @@
1
+ # coding: utf-8
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib
6
+ from functools import lru_cache
7
+ from pathlib import Path
8
+
9
+ from ..providers import GPU_PROVIDER_NAMES, ProviderInfo
10
+
11
+
12
+ def _ort():
13
+ return importlib.import_module("onnxruntime")
14
+
15
+
16
+ def session_providers(provider_info: ProviderInfo) -> list[str]:
17
+ available = list(provider_info.available_providers)
18
+ active = provider_info.active_provider
19
+ if active and active in GPU_PROVIDER_NAMES and "CPUExecutionProvider" in available:
20
+ return [active, "CPUExecutionProvider"]
21
+ if "CPUExecutionProvider" in available:
22
+ return ["CPUExecutionProvider"]
23
+ return available
24
+
25
+
26
+ def create_session(model_path: str | Path, provider_info: ProviderInfo):
27
+ model_path = str(Path(model_path).resolve())
28
+ providers = tuple(session_providers(provider_info))
29
+ session = _create_session_cached(model_path, providers)
30
+ actual = list(session.get_providers() or [])
31
+ active = provider_info.active_provider
32
+ if active and active in GPU_PROVIDER_NAMES and active not in actual:
33
+ raise RuntimeError(
34
+ f"requested ONNX GPU provider {active}, but session providers are {actual}"
35
+ )
36
+ return session
37
+
38
+
39
+ @lru_cache(maxsize=16)
40
+ def _create_session_cached(model_path: str, providers: tuple[str, ...]):
41
+ ort = _ort()
42
+ return ort.InferenceSession(model_path, providers=list(providers))
43
+
44
+
45
+ def clear_session_cache() -> None:
46
+ _create_session_cached.cache_clear()
@@ -0,0 +1,131 @@
1
+ # coding: utf-8
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+ import cv2
9
+ import numpy as np
10
+
11
+ from .common import create_session
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class FormulaBox:
16
+ box: tuple[
17
+ tuple[float, float],
18
+ tuple[float, float],
19
+ tuple[float, float],
20
+ tuple[float, float],
21
+ ]
22
+ score: float
23
+ label: str
24
+
25
+
26
+ def warmup_formula_detector(model_dir: str | Path, provider_info) -> None:
27
+ root = Path(model_dir)
28
+ candidates = sorted(root.glob("*mfd*.onnx"))
29
+ if not candidates:
30
+ raise FileNotFoundError(f"no mfd onnx file found under {root}")
31
+ create_session(candidates[0], provider_info)
32
+
33
+
34
+ def _letterbox(image: np.ndarray, target_size: int = 768) -> tuple[np.ndarray, float, tuple[float, float]]:
35
+ height, width = image.shape[:2]
36
+ scale = min(target_size / width, target_size / height)
37
+ new_w = int(round(width * scale))
38
+ new_h = int(round(height * scale))
39
+ resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
40
+ canvas = np.full((target_size, target_size, 3), 114, dtype=np.uint8)
41
+ pad_x = (target_size - new_w) / 2
42
+ pad_y = (target_size - new_h) / 2
43
+ left = int(round(pad_x - 0.1))
44
+ top = int(round(pad_y - 0.1))
45
+ canvas[top : top + new_h, left : left + new_w] = resized
46
+ return canvas, scale, (float(left), float(top))
47
+
48
+
49
+ def _nms_xyxy(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> list[int]:
50
+ if len(boxes) == 0:
51
+ return []
52
+ x1 = boxes[:, 0]
53
+ y1 = boxes[:, 1]
54
+ x2 = boxes[:, 2]
55
+ y2 = boxes[:, 3]
56
+ areas = np.maximum(0.0, x2 - x1) * np.maximum(0.0, y2 - y1)
57
+ order = scores.argsort()[::-1]
58
+ keep: list[int] = []
59
+ while order.size > 0:
60
+ current = int(order[0])
61
+ keep.append(current)
62
+ if order.size == 1:
63
+ break
64
+ rest = order[1:]
65
+ xx1 = np.maximum(x1[current], x1[rest])
66
+ yy1 = np.maximum(y1[current], y1[rest])
67
+ xx2 = np.minimum(x2[current], x2[rest])
68
+ yy2 = np.minimum(y2[current], y2[rest])
69
+ inter_w = np.maximum(0.0, xx2 - xx1)
70
+ inter_h = np.maximum(0.0, yy2 - yy1)
71
+ intersection = inter_w * inter_h
72
+ union = areas[current] + areas[rest] - intersection
73
+ iou = np.divide(intersection, union, out=np.zeros_like(intersection), where=union > 0)
74
+ order = rest[iou <= iou_threshold]
75
+ return keep
76
+
77
+
78
+ def detect_formula_boxes(
79
+ image_rgb: np.ndarray,
80
+ model_dir: str | Path,
81
+ provider_info,
82
+ *,
83
+ confidence_threshold: float = 0.25,
84
+ iou_threshold: float = 0.45,
85
+ input_size: int = 768,
86
+ ) -> tuple[FormulaBox, ...]:
87
+ root = Path(model_dir)
88
+ candidates = sorted(root.glob("*mfd*.onnx"))
89
+ if not candidates:
90
+ raise FileNotFoundError(f"no mfd onnx file found under {root}")
91
+ session = create_session(candidates[0], provider_info)
92
+ preprocessed, scale, (pad_x, pad_y) = _letterbox(image_rgb, input_size)
93
+ model_input = (
94
+ preprocessed.astype(np.float32).transpose(2, 0, 1)[np.newaxis, ...] / 255.0
95
+ )
96
+ output = session.run(None, {session.get_inputs()[0].name: model_input})[0]
97
+ preds = np.asarray(output[0]).T
98
+ if preds.size == 0 or preds.shape[1] < 6:
99
+ return ()
100
+ xywh = preds[:, :4]
101
+ class_scores = preds[:, 4:]
102
+ class_ids = np.argmax(class_scores, axis=1)
103
+ scores = class_scores[np.arange(len(class_scores)), class_ids]
104
+ mask = scores >= confidence_threshold
105
+ if not np.any(mask):
106
+ return ()
107
+ xywh = xywh[mask]
108
+ class_ids = class_ids[mask]
109
+ scores = scores[mask]
110
+
111
+ x, y, w, h = xywh[:, 0], xywh[:, 1], xywh[:, 2], xywh[:, 3]
112
+ boxes = np.stack([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=1)
113
+ boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_x) / scale
114
+ boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_y) / scale
115
+ height, width = image_rgb.shape[:2]
116
+ boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, width)
117
+ boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, height)
118
+
119
+ labels = ("embedding", "isolated")
120
+ keep = _nms_xyxy(boxes, scores, iou_threshold)
121
+ results: list[FormulaBox] = []
122
+ for index in keep:
123
+ x1, y1, x2, y2 = boxes[index].tolist()
124
+ results.append(
125
+ FormulaBox(
126
+ box=((x1, y1), (x2, y1), (x2, y2), (x1, y2)),
127
+ score=float(scores[index]),
128
+ label=labels[int(class_ids[index])] if int(class_ids[index]) < len(labels) else str(int(class_ids[index])),
129
+ )
130
+ )
131
+ return tuple(results)