onnxtr 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnxtr/contrib/__init__.py +1 -0
- onnxtr/contrib/artefacts.py +6 -8
- onnxtr/contrib/base.py +7 -16
- onnxtr/file_utils.py +1 -3
- onnxtr/io/elements.py +45 -59
- onnxtr/io/html.py +0 -2
- onnxtr/io/image.py +1 -4
- onnxtr/io/pdf.py +3 -5
- onnxtr/io/reader.py +4 -10
- onnxtr/models/_utils.py +10 -17
- onnxtr/models/builder.py +17 -30
- onnxtr/models/classification/models/mobilenet.py +7 -12
- onnxtr/models/classification/predictor/base.py +6 -7
- onnxtr/models/classification/zoo.py +25 -11
- onnxtr/models/detection/_utils/base.py +3 -7
- onnxtr/models/detection/core.py +2 -8
- onnxtr/models/detection/models/differentiable_binarization.py +10 -17
- onnxtr/models/detection/models/fast.py +10 -17
- onnxtr/models/detection/models/linknet.py +10 -17
- onnxtr/models/detection/postprocessor/base.py +3 -9
- onnxtr/models/detection/predictor/base.py +4 -5
- onnxtr/models/detection/zoo.py +20 -6
- onnxtr/models/engine.py +9 -9
- onnxtr/models/factory/hub.py +3 -7
- onnxtr/models/predictor/base.py +29 -30
- onnxtr/models/predictor/predictor.py +4 -5
- onnxtr/models/preprocessor/base.py +8 -12
- onnxtr/models/recognition/core.py +0 -1
- onnxtr/models/recognition/models/crnn.py +11 -23
- onnxtr/models/recognition/models/master.py +9 -15
- onnxtr/models/recognition/models/parseq.py +8 -12
- onnxtr/models/recognition/models/sar.py +8 -12
- onnxtr/models/recognition/models/vitstr.py +9 -15
- onnxtr/models/recognition/predictor/_utils.py +6 -9
- onnxtr/models/recognition/predictor/base.py +3 -3
- onnxtr/models/recognition/utils.py +2 -7
- onnxtr/models/recognition/zoo.py +19 -7
- onnxtr/models/zoo.py +7 -9
- onnxtr/transforms/base.py +17 -6
- onnxtr/utils/common_types.py +7 -8
- onnxtr/utils/data.py +7 -11
- onnxtr/utils/fonts.py +1 -6
- onnxtr/utils/geometry.py +18 -49
- onnxtr/utils/multithreading.py +3 -5
- onnxtr/utils/reconstitution.py +6 -8
- onnxtr/utils/repr.py +1 -2
- onnxtr/utils/visualization.py +12 -21
- onnxtr/utils/vocabs.py +1 -2
- onnxtr/version.py +1 -1
- {onnxtr-0.5.1.dist-info → onnxtr-0.6.0.dist-info}/METADATA +70 -41
- onnxtr-0.6.0.dist-info/RECORD +75 -0
- {onnxtr-0.5.1.dist-info → onnxtr-0.6.0.dist-info}/WHEEL +1 -1
- onnxtr-0.5.1.dist-info/RECORD +0 -75
- {onnxtr-0.5.1.dist-info → onnxtr-0.6.0.dist-info}/LICENSE +0 -0
- {onnxtr-0.5.1.dist-info → onnxtr-0.6.0.dist-info}/top_level.txt +0 -0
- {onnxtr-0.5.1.dist-info → onnxtr-0.6.0.dist-info}/zip-safe +0 -0
onnxtr/contrib/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .artefacts import ArtefactDetector
|
onnxtr/contrib/artefacts.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import cv2
|
|
9
9
|
import numpy as np
|
|
@@ -14,7 +14,7 @@ from .base import _BasePredictor
|
|
|
14
14
|
|
|
15
15
|
__all__ = ["ArtefactDetector"]
|
|
16
16
|
|
|
17
|
-
default_cfgs:
|
|
17
|
+
default_cfgs: dict[str, dict[str, Any]] = {
|
|
18
18
|
"yolov8_artefact": {
|
|
19
19
|
"input_shape": (3, 1024, 1024),
|
|
20
20
|
"labels": ["bar_code", "qr_code", "logo", "photo"],
|
|
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
|
|
|
34
34
|
>>> results = detector(doc)
|
|
35
35
|
|
|
36
36
|
Args:
|
|
37
|
-
----
|
|
38
37
|
arch: the architecture to use
|
|
39
38
|
batch_size: the batch size to use
|
|
40
39
|
model_path: the path to the model to use
|
|
@@ -50,9 +49,9 @@ class ArtefactDetector(_BasePredictor):
|
|
|
50
49
|
self,
|
|
51
50
|
arch: str = "yolov8_artefact",
|
|
52
51
|
batch_size: int = 2,
|
|
53
|
-
model_path:
|
|
54
|
-
labels:
|
|
55
|
-
input_shape:
|
|
52
|
+
model_path: str | None = None,
|
|
53
|
+
labels: list[str] | None = None,
|
|
54
|
+
input_shape: tuple[int, int, int] | None = None,
|
|
56
55
|
conf_threshold: float = 0.5,
|
|
57
56
|
iou_threshold: float = 0.5,
|
|
58
57
|
**kwargs: Any,
|
|
@@ -66,7 +65,7 @@ class ArtefactDetector(_BasePredictor):
|
|
|
66
65
|
def preprocess(self, img: np.ndarray) -> np.ndarray:
|
|
67
66
|
return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
|
|
68
67
|
|
|
69
|
-
def postprocess(self, output:
|
|
68
|
+
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
|
|
70
69
|
results = []
|
|
71
70
|
|
|
72
71
|
for batch in zip(output, input_images):
|
|
@@ -109,7 +108,6 @@ class ArtefactDetector(_BasePredictor):
|
|
|
109
108
|
Display the results
|
|
110
109
|
|
|
111
110
|
Args:
|
|
112
|
-
----
|
|
113
111
|
**kwargs: additional keyword arguments to be passed to `plt.show`
|
|
114
112
|
"""
|
|
115
113
|
requires_package("matplotlib", "`.show()` requires matplotlib installed")
|
onnxtr/contrib/base.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import onnxruntime as ort
|
|
@@ -16,32 +16,29 @@ class _BasePredictor:
|
|
|
16
16
|
Base class for all predictors
|
|
17
17
|
|
|
18
18
|
Args:
|
|
19
|
-
----
|
|
20
19
|
batch_size: the batch size to use
|
|
21
20
|
url: the url to use to download a model if needed
|
|
22
21
|
model_path: the path to the model to use
|
|
23
22
|
**kwargs: additional arguments to be passed to `download_from_url`
|
|
24
23
|
"""
|
|
25
24
|
|
|
26
|
-
def __init__(self, batch_size: int, url:
|
|
25
|
+
def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
|
|
27
26
|
self.batch_size = batch_size
|
|
28
27
|
self.session = self._init_model(url, model_path, **kwargs)
|
|
29
28
|
|
|
30
|
-
self._inputs:
|
|
31
|
-
self._results:
|
|
29
|
+
self._inputs: list[np.ndarray] = []
|
|
30
|
+
self._results: list[Any] = []
|
|
32
31
|
|
|
33
|
-
def _init_model(self, url:
|
|
32
|
+
def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
|
|
34
33
|
"""
|
|
35
34
|
Download the model from the given url if needed
|
|
36
35
|
|
|
37
36
|
Args:
|
|
38
|
-
----
|
|
39
37
|
url: the url to use
|
|
40
38
|
model_path: the path to the model to use
|
|
41
39
|
**kwargs: additional arguments to be passed to `download_from_url`
|
|
42
40
|
|
|
43
41
|
Returns:
|
|
44
|
-
-------
|
|
45
42
|
Any: the ONNX loaded model
|
|
46
43
|
"""
|
|
47
44
|
if not url and not model_path:
|
|
@@ -54,40 +51,34 @@ class _BasePredictor:
|
|
|
54
51
|
Preprocess the input image
|
|
55
52
|
|
|
56
53
|
Args:
|
|
57
|
-
----
|
|
58
54
|
img: the input image to preprocess
|
|
59
55
|
|
|
60
56
|
Returns:
|
|
61
|
-
-------
|
|
62
57
|
np.ndarray: the preprocessed image
|
|
63
58
|
"""
|
|
64
59
|
raise NotImplementedError
|
|
65
60
|
|
|
66
|
-
def postprocess(self, output:
|
|
61
|
+
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
|
|
67
62
|
"""
|
|
68
63
|
Postprocess the model output
|
|
69
64
|
|
|
70
65
|
Args:
|
|
71
|
-
----
|
|
72
66
|
output: the model output to postprocess
|
|
73
67
|
input_images: the input images used to generate the output
|
|
74
68
|
|
|
75
69
|
Returns:
|
|
76
|
-
-------
|
|
77
70
|
Any: the postprocessed output
|
|
78
71
|
"""
|
|
79
72
|
raise NotImplementedError
|
|
80
73
|
|
|
81
|
-
def __call__(self, inputs:
|
|
74
|
+
def __call__(self, inputs: list[np.ndarray]) -> Any:
|
|
82
75
|
"""
|
|
83
76
|
Call the model on the given inputs
|
|
84
77
|
|
|
85
78
|
Args:
|
|
86
|
-
----
|
|
87
79
|
inputs: the inputs to use
|
|
88
80
|
|
|
89
81
|
Returns:
|
|
90
|
-
-------
|
|
91
82
|
Any: the postprocessed output
|
|
92
83
|
"""
|
|
93
84
|
self._inputs = inputs
|
onnxtr/file_utils.py
CHANGED
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
import importlib.metadata
|
|
7
7
|
import importlib.util
|
|
8
8
|
import logging
|
|
9
|
-
from typing import Optional
|
|
10
9
|
|
|
11
10
|
__all__ = ["requires_package"]
|
|
12
11
|
|
|
@@ -14,12 +13,11 @@ ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
|
|
|
14
13
|
ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
|
|
15
14
|
|
|
16
15
|
|
|
17
|
-
def requires_package(name: str, extra_message:
|
|
16
|
+
def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover
|
|
18
17
|
"""
|
|
19
18
|
package requirement helper
|
|
20
19
|
|
|
21
20
|
Args:
|
|
22
|
-
----
|
|
23
21
|
name: name of the package
|
|
24
22
|
extra_message: additional message to display if the package is not found
|
|
25
23
|
"""
|
onnxtr/io/elements.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
from defusedxml import defuse_stdlib
|
|
9
9
|
|
|
@@ -32,8 +32,8 @@ __all__ = ["Element", "Word", "Artefact", "Line", "Block", "Page", "Document"]
|
|
|
32
32
|
class Element(NestedObject):
|
|
33
33
|
"""Implements an abstract document element with exporting and text rendering capabilities"""
|
|
34
34
|
|
|
35
|
-
_children_names:
|
|
36
|
-
_exported_keys:
|
|
35
|
+
_children_names: list[str] = []
|
|
36
|
+
_exported_keys: list[str] = []
|
|
37
37
|
|
|
38
38
|
def __init__(self, **kwargs: Any) -> None:
|
|
39
39
|
for k, v in kwargs.items():
|
|
@@ -42,7 +42,7 @@ class Element(NestedObject):
|
|
|
42
42
|
else:
|
|
43
43
|
raise KeyError(f"{self.__class__.__name__} object does not have any attribute named '{k}'")
|
|
44
44
|
|
|
45
|
-
def export(self) ->
|
|
45
|
+
def export(self) -> dict[str, Any]:
|
|
46
46
|
"""Exports the object into a nested dict format"""
|
|
47
47
|
export_dict = {k: getattr(self, k) for k in self._exported_keys}
|
|
48
48
|
for children_name in self._children_names:
|
|
@@ -51,7 +51,7 @@ class Element(NestedObject):
|
|
|
51
51
|
return export_dict
|
|
52
52
|
|
|
53
53
|
@classmethod
|
|
54
|
-
def from_dict(cls, save_dict:
|
|
54
|
+
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
|
|
55
55
|
raise NotImplementedError
|
|
56
56
|
|
|
57
57
|
def render(self) -> str:
|
|
@@ -62,7 +62,6 @@ class Word(Element):
|
|
|
62
62
|
"""Implements a word element
|
|
63
63
|
|
|
64
64
|
Args:
|
|
65
|
-
----
|
|
66
65
|
value: the text string of the word
|
|
67
66
|
confidence: the confidence associated with the text prediction
|
|
68
67
|
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
|
|
@@ -71,16 +70,16 @@ class Word(Element):
|
|
|
71
70
|
crop_orientation: the general orientation of the crop in degrees and its confidence
|
|
72
71
|
"""
|
|
73
72
|
|
|
74
|
-
_exported_keys:
|
|
75
|
-
_children_names:
|
|
73
|
+
_exported_keys: list[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
|
|
74
|
+
_children_names: list[str] = []
|
|
76
75
|
|
|
77
76
|
def __init__(
|
|
78
77
|
self,
|
|
79
78
|
value: str,
|
|
80
79
|
confidence: float,
|
|
81
|
-
geometry:
|
|
80
|
+
geometry: BoundingBox | np.ndarray,
|
|
82
81
|
objectness_score: float,
|
|
83
|
-
crop_orientation:
|
|
82
|
+
crop_orientation: dict[str, Any],
|
|
84
83
|
) -> None:
|
|
85
84
|
super().__init__()
|
|
86
85
|
self.value = value
|
|
@@ -97,7 +96,7 @@ class Word(Element):
|
|
|
97
96
|
return f"value='{self.value}', confidence={self.confidence:.2}"
|
|
98
97
|
|
|
99
98
|
@classmethod
|
|
100
|
-
def from_dict(cls, save_dict:
|
|
99
|
+
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
|
|
101
100
|
kwargs = {k: save_dict[k] for k in cls._exported_keys}
|
|
102
101
|
return cls(**kwargs)
|
|
103
102
|
|
|
@@ -106,15 +105,14 @@ class Artefact(Element):
|
|
|
106
105
|
"""Implements a non-textual element
|
|
107
106
|
|
|
108
107
|
Args:
|
|
109
|
-
----
|
|
110
108
|
artefact_type: the type of artefact
|
|
111
109
|
confidence: the confidence of the type prediction
|
|
112
110
|
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
|
|
113
111
|
the page's size.
|
|
114
112
|
"""
|
|
115
113
|
|
|
116
|
-
_exported_keys:
|
|
117
|
-
_children_names:
|
|
114
|
+
_exported_keys: list[str] = ["geometry", "type", "confidence"]
|
|
115
|
+
_children_names: list[str] = []
|
|
118
116
|
|
|
119
117
|
def __init__(self, artefact_type: str, confidence: float, geometry: BoundingBox) -> None:
|
|
120
118
|
super().__init__()
|
|
@@ -130,7 +128,7 @@ class Artefact(Element):
|
|
|
130
128
|
return f"type='{self.type}', confidence={self.confidence:.2}"
|
|
131
129
|
|
|
132
130
|
@classmethod
|
|
133
|
-
def from_dict(cls, save_dict:
|
|
131
|
+
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
|
|
134
132
|
kwargs = {k: save_dict[k] for k in cls._exported_keys}
|
|
135
133
|
return cls(**kwargs)
|
|
136
134
|
|
|
@@ -139,22 +137,21 @@ class Line(Element):
|
|
|
139
137
|
"""Implements a line element as a collection of words
|
|
140
138
|
|
|
141
139
|
Args:
|
|
142
|
-
----
|
|
143
140
|
words: list of word elements
|
|
144
141
|
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
|
|
145
142
|
the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
|
|
146
143
|
all words in it.
|
|
147
144
|
"""
|
|
148
145
|
|
|
149
|
-
_exported_keys:
|
|
150
|
-
_children_names:
|
|
151
|
-
words:
|
|
146
|
+
_exported_keys: list[str] = ["geometry", "objectness_score"]
|
|
147
|
+
_children_names: list[str] = ["words"]
|
|
148
|
+
words: list[Word] = []
|
|
152
149
|
|
|
153
150
|
def __init__(
|
|
154
151
|
self,
|
|
155
|
-
words:
|
|
156
|
-
geometry:
|
|
157
|
-
objectness_score:
|
|
152
|
+
words: list[Word],
|
|
153
|
+
geometry: BoundingBox | np.ndarray | None = None,
|
|
154
|
+
objectness_score: float | None = None,
|
|
158
155
|
) -> None:
|
|
159
156
|
# Compute the objectness score of the line
|
|
160
157
|
if objectness_score is None:
|
|
@@ -174,7 +171,7 @@ class Line(Element):
|
|
|
174
171
|
return " ".join(w.render() for w in self.words)
|
|
175
172
|
|
|
176
173
|
@classmethod
|
|
177
|
-
def from_dict(cls, save_dict:
|
|
174
|
+
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
|
|
178
175
|
kwargs = {k: save_dict[k] for k in cls._exported_keys}
|
|
179
176
|
kwargs.update({
|
|
180
177
|
"words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
|
|
@@ -186,7 +183,6 @@ class Block(Element):
|
|
|
186
183
|
"""Implements a block element as a collection of lines and artefacts
|
|
187
184
|
|
|
188
185
|
Args:
|
|
189
|
-
----
|
|
190
186
|
lines: list of line elements
|
|
191
187
|
artefacts: list of artefacts
|
|
192
188
|
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
|
|
@@ -194,17 +190,17 @@ class Block(Element):
|
|
|
194
190
|
all lines and artefacts in it.
|
|
195
191
|
"""
|
|
196
192
|
|
|
197
|
-
_exported_keys:
|
|
198
|
-
_children_names:
|
|
199
|
-
lines:
|
|
200
|
-
artefacts:
|
|
193
|
+
_exported_keys: list[str] = ["geometry", "objectness_score"]
|
|
194
|
+
_children_names: list[str] = ["lines", "artefacts"]
|
|
195
|
+
lines: list[Line] = []
|
|
196
|
+
artefacts: list[Artefact] = []
|
|
201
197
|
|
|
202
198
|
def __init__(
|
|
203
199
|
self,
|
|
204
|
-
lines:
|
|
205
|
-
artefacts:
|
|
206
|
-
geometry:
|
|
207
|
-
objectness_score:
|
|
200
|
+
lines: list[Line] = [],
|
|
201
|
+
artefacts: list[Artefact] = [],
|
|
202
|
+
geometry: BoundingBox | np.ndarray | None = None,
|
|
203
|
+
objectness_score: float | None = None,
|
|
208
204
|
) -> None:
|
|
209
205
|
# Compute the objectness score of the line
|
|
210
206
|
if objectness_score is None:
|
|
@@ -227,7 +223,7 @@ class Block(Element):
|
|
|
227
223
|
return line_break.join(line.render() for line in self.lines)
|
|
228
224
|
|
|
229
225
|
@classmethod
|
|
230
|
-
def from_dict(cls, save_dict:
|
|
226
|
+
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
|
|
231
227
|
kwargs = {k: save_dict[k] for k in cls._exported_keys}
|
|
232
228
|
kwargs.update({
|
|
233
229
|
"lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
|
|
@@ -240,7 +236,6 @@ class Page(Element):
|
|
|
240
236
|
"""Implements a page element as a collection of blocks
|
|
241
237
|
|
|
242
238
|
Args:
|
|
243
|
-
----
|
|
244
239
|
page: image encoded as a numpy array in uint8
|
|
245
240
|
blocks: list of block elements
|
|
246
241
|
page_idx: the index of the page in the input raw document
|
|
@@ -249,18 +244,18 @@ class Page(Element):
|
|
|
249
244
|
language: a dictionary with the language value and confidence of the prediction
|
|
250
245
|
"""
|
|
251
246
|
|
|
252
|
-
_exported_keys:
|
|
253
|
-
_children_names:
|
|
254
|
-
blocks:
|
|
247
|
+
_exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"]
|
|
248
|
+
_children_names: list[str] = ["blocks"]
|
|
249
|
+
blocks: list[Block] = []
|
|
255
250
|
|
|
256
251
|
def __init__(
|
|
257
252
|
self,
|
|
258
253
|
page: np.ndarray,
|
|
259
|
-
blocks:
|
|
254
|
+
blocks: list[Block],
|
|
260
255
|
page_idx: int,
|
|
261
|
-
dimensions:
|
|
262
|
-
orientation:
|
|
263
|
-
language:
|
|
256
|
+
dimensions: tuple[int, int],
|
|
257
|
+
orientation: dict[str, Any] | None = None,
|
|
258
|
+
language: dict[str, Any] | None = None,
|
|
264
259
|
) -> None:
|
|
265
260
|
super().__init__(blocks=blocks)
|
|
266
261
|
self.page = page
|
|
@@ -295,25 +290,21 @@ class Page(Element):
|
|
|
295
290
|
"""Synthesize the page from the predictions
|
|
296
291
|
|
|
297
292
|
Args:
|
|
298
|
-
----
|
|
299
293
|
**kwargs: keyword arguments passed to the `synthesize_page` method
|
|
300
294
|
|
|
301
295
|
Returns
|
|
302
|
-
-------
|
|
303
296
|
synthesized page
|
|
304
297
|
"""
|
|
305
298
|
return synthesize_page(self.export(), **kwargs)
|
|
306
299
|
|
|
307
|
-
def export_as_xml(self, file_title: str = "OnnxTR - XML export (hOCR)") ->
|
|
300
|
+
def export_as_xml(self, file_title: str = "OnnxTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]:
|
|
308
301
|
"""Export the page as XML (hOCR-format)
|
|
309
302
|
convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
|
|
310
303
|
|
|
311
304
|
Args:
|
|
312
|
-
----
|
|
313
305
|
file_title: the title of the XML file
|
|
314
306
|
|
|
315
307
|
Returns:
|
|
316
|
-
-------
|
|
317
308
|
a tuple of the XML byte string, and its ElementTree
|
|
318
309
|
"""
|
|
319
310
|
p_idx = self.page_idx
|
|
@@ -411,7 +402,7 @@ class Page(Element):
|
|
|
411
402
|
return (ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr))
|
|
412
403
|
|
|
413
404
|
@classmethod
|
|
414
|
-
def from_dict(cls, save_dict:
|
|
405
|
+
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
|
|
415
406
|
kwargs = {k: save_dict[k] for k in cls._exported_keys}
|
|
416
407
|
kwargs.update({"blocks": [Block.from_dict(block_dict) for block_dict in save_dict["blocks"]]})
|
|
417
408
|
return cls(**kwargs)
|
|
@@ -421,16 +412,15 @@ class Document(Element):
|
|
|
421
412
|
"""Implements a document element as a collection of pages
|
|
422
413
|
|
|
423
414
|
Args:
|
|
424
|
-
----
|
|
425
415
|
pages: list of page elements
|
|
426
416
|
"""
|
|
427
417
|
|
|
428
|
-
_children_names:
|
|
429
|
-
pages:
|
|
418
|
+
_children_names: list[str] = ["pages"]
|
|
419
|
+
pages: list[Page] = []
|
|
430
420
|
|
|
431
421
|
def __init__(
|
|
432
422
|
self,
|
|
433
|
-
pages:
|
|
423
|
+
pages: list[Page],
|
|
434
424
|
) -> None:
|
|
435
425
|
super().__init__(pages=pages)
|
|
436
426
|
|
|
@@ -443,34 +433,30 @@ class Document(Element):
|
|
|
443
433
|
for result in self.pages:
|
|
444
434
|
result.show(**kwargs)
|
|
445
435
|
|
|
446
|
-
def synthesize(self, **kwargs) ->
|
|
436
|
+
def synthesize(self, **kwargs) -> list[np.ndarray]:
|
|
447
437
|
"""Synthesize all pages from their predictions
|
|
448
438
|
|
|
449
439
|
Args:
|
|
450
|
-
----
|
|
451
440
|
**kwargs: keyword arguments passed to the `Page.synthesize` method
|
|
452
441
|
|
|
453
|
-
Returns
|
|
454
|
-
-------
|
|
442
|
+
Returns:
|
|
455
443
|
list of synthesized pages
|
|
456
444
|
"""
|
|
457
445
|
return [page.synthesize(**kwargs) for page in self.pages]
|
|
458
446
|
|
|
459
|
-
def export_as_xml(self, **kwargs) ->
|
|
447
|
+
def export_as_xml(self, **kwargs) -> list[tuple[bytes, ET.ElementTree]]:
|
|
460
448
|
"""Export the document as XML (hOCR-format)
|
|
461
449
|
|
|
462
450
|
Args:
|
|
463
|
-
----
|
|
464
451
|
**kwargs: additional keyword arguments passed to the Page.export_as_xml method
|
|
465
452
|
|
|
466
453
|
Returns:
|
|
467
|
-
-------
|
|
468
454
|
list of tuple of (bytes, ElementTree)
|
|
469
455
|
"""
|
|
470
456
|
return [page.export_as_xml(**kwargs) for page in self.pages]
|
|
471
457
|
|
|
472
458
|
@classmethod
|
|
473
|
-
def from_dict(cls, save_dict:
|
|
459
|
+
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
|
|
474
460
|
kwargs = {k: save_dict[k] for k in cls._exported_keys}
|
|
475
461
|
kwargs.update({"pages": [Page.from_dict(page_dict) for page_dict in save_dict["pages"]]})
|
|
476
462
|
return cls(**kwargs)
|
onnxtr/io/html.py
CHANGED
|
@@ -15,12 +15,10 @@ def read_html(url: str, **kwargs: Any) -> bytes:
|
|
|
15
15
|
>>> doc = read_html("https://www.yoursite.com")
|
|
16
16
|
|
|
17
17
|
Args:
|
|
18
|
-
----
|
|
19
18
|
url: URL of the target web page
|
|
20
19
|
**kwargs: keyword arguments from `weasyprint.HTML`
|
|
21
20
|
|
|
22
21
|
Returns:
|
|
23
|
-
-------
|
|
24
22
|
decoded PDF file as a bytes stream
|
|
25
23
|
"""
|
|
26
24
|
from weasyprint import HTML
|
onnxtr/io/image.py
CHANGED
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Optional, Tuple
|
|
8
7
|
|
|
9
8
|
import cv2
|
|
10
9
|
import numpy as np
|
|
@@ -16,7 +15,7 @@ __all__ = ["read_img_as_numpy"]
|
|
|
16
15
|
|
|
17
16
|
def read_img_as_numpy(
|
|
18
17
|
file: AbstractFile,
|
|
19
|
-
output_size:
|
|
18
|
+
output_size: tuple[int, int] | None = None,
|
|
20
19
|
rgb_output: bool = True,
|
|
21
20
|
) -> np.ndarray:
|
|
22
21
|
"""Read an image file into numpy format
|
|
@@ -25,13 +24,11 @@ def read_img_as_numpy(
|
|
|
25
24
|
>>> page = read_img_as_numpy("path/to/your/doc.jpg")
|
|
26
25
|
|
|
27
26
|
Args:
|
|
28
|
-
----
|
|
29
27
|
file: the path to the image file
|
|
30
28
|
output_size: the expected output size of each page in format H x W
|
|
31
29
|
rgb_output: whether the output ndarray channel order should be RGB instead of BGR.
|
|
32
30
|
|
|
33
31
|
Returns:
|
|
34
|
-
-------
|
|
35
32
|
the page decoded as numpy ndarray of shape H x W x 3
|
|
36
33
|
"""
|
|
37
34
|
if isinstance(file, (str, Path)):
|
onnxtr/io/pdf.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pypdfium2 as pdfium
|
|
@@ -17,16 +17,15 @@ def read_pdf(
|
|
|
17
17
|
file: AbstractFile,
|
|
18
18
|
scale: int = 2,
|
|
19
19
|
rgb_mode: bool = True,
|
|
20
|
-
password:
|
|
20
|
+
password: str | None = None,
|
|
21
21
|
**kwargs: Any,
|
|
22
|
-
) ->
|
|
22
|
+
) -> list[np.ndarray]:
|
|
23
23
|
"""Read a PDF file and convert it into an image in numpy format
|
|
24
24
|
|
|
25
25
|
>>> from onnxtr.io import read_pdf
|
|
26
26
|
>>> doc = read_pdf("path/to/your/doc.pdf")
|
|
27
27
|
|
|
28
28
|
Args:
|
|
29
|
-
----
|
|
30
29
|
file: the path to the PDF file
|
|
31
30
|
scale: rendering scale (1 corresponds to 72dpi)
|
|
32
31
|
rgb_mode: if True, the output will be RGB, otherwise BGR
|
|
@@ -34,7 +33,6 @@ def read_pdf(
|
|
|
34
33
|
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
|
|
35
34
|
|
|
36
35
|
Returns:
|
|
37
|
-
-------
|
|
38
36
|
the list of pages decoded as numpy ndarray of shape H x W x C
|
|
39
37
|
"""
|
|
40
38
|
# Rasterise pages to numpy ndarrays with pypdfium2
|
onnxtr/io/reader.py
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
+
from collections.abc import Sequence
|
|
6
7
|
from pathlib import Path
|
|
7
|
-
from typing import List, Sequence, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
|
|
@@ -22,37 +22,33 @@ class DocumentFile:
|
|
|
22
22
|
"""Read a document from multiple extensions"""
|
|
23
23
|
|
|
24
24
|
@classmethod
|
|
25
|
-
def from_pdf(cls, file: AbstractFile, **kwargs) ->
|
|
25
|
+
def from_pdf(cls, file: AbstractFile, **kwargs) -> list[np.ndarray]:
|
|
26
26
|
"""Read a PDF file
|
|
27
27
|
|
|
28
28
|
>>> from onnxtr.io import DocumentFile
|
|
29
29
|
>>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
|
-
----
|
|
33
32
|
file: the path to the PDF file or a binary stream
|
|
34
33
|
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
|
|
35
34
|
|
|
36
35
|
Returns:
|
|
37
|
-
-------
|
|
38
36
|
the list of pages decoded as numpy ndarray of shape H x W x 3
|
|
39
37
|
"""
|
|
40
38
|
return read_pdf(file, **kwargs)
|
|
41
39
|
|
|
42
40
|
@classmethod
|
|
43
|
-
def from_url(cls, url: str, **kwargs) ->
|
|
41
|
+
def from_url(cls, url: str, **kwargs) -> list[np.ndarray]:
|
|
44
42
|
"""Interpret a web page as a PDF document
|
|
45
43
|
|
|
46
44
|
>>> from onnxtr.io import DocumentFile
|
|
47
45
|
>>> doc = DocumentFile.from_url("https://www.yoursite.com")
|
|
48
46
|
|
|
49
47
|
Args:
|
|
50
|
-
----
|
|
51
48
|
url: the URL of the target web page
|
|
52
49
|
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
|
|
53
50
|
|
|
54
51
|
Returns:
|
|
55
|
-
-------
|
|
56
52
|
the list of pages decoded as numpy ndarray of shape H x W x 3
|
|
57
53
|
"""
|
|
58
54
|
requires_package(
|
|
@@ -64,19 +60,17 @@ class DocumentFile:
|
|
|
64
60
|
return cls.from_pdf(pdf_stream, **kwargs)
|
|
65
61
|
|
|
66
62
|
@classmethod
|
|
67
|
-
def from_images(cls, files:
|
|
63
|
+
def from_images(cls, files: Sequence[AbstractFile] | AbstractFile, **kwargs) -> list[np.ndarray]:
|
|
68
64
|
"""Read an image file (or a collection of image files) and convert it into an image in numpy format
|
|
69
65
|
|
|
70
66
|
>>> from onnxtr.io import DocumentFile
|
|
71
67
|
>>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"])
|
|
72
68
|
|
|
73
69
|
Args:
|
|
74
|
-
----
|
|
75
70
|
files: the path to the image file or a binary stream, or a collection of those
|
|
76
71
|
**kwargs: additional parameters to :meth:`onnxtr.io.image.read_img_as_numpy`
|
|
77
72
|
|
|
78
73
|
Returns:
|
|
79
|
-
-------
|
|
80
74
|
the list of pages decoded as numpy ndarray of shape H x W x 3
|
|
81
75
|
"""
|
|
82
76
|
if isinstance(files, (str, Path, bytes)):
|