onnxtr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnxtr/__init__.py +2 -0
- onnxtr/contrib/__init__.py +0 -0
- onnxtr/contrib/artefacts.py +131 -0
- onnxtr/contrib/base.py +105 -0
- onnxtr/file_utils.py +33 -0
- onnxtr/io/__init__.py +5 -0
- onnxtr/io/elements.py +455 -0
- onnxtr/io/html.py +28 -0
- onnxtr/io/image.py +56 -0
- onnxtr/io/pdf.py +42 -0
- onnxtr/io/reader.py +85 -0
- onnxtr/models/__init__.py +4 -0
- onnxtr/models/_utils.py +141 -0
- onnxtr/models/builder.py +355 -0
- onnxtr/models/classification/__init__.py +2 -0
- onnxtr/models/classification/models/__init__.py +1 -0
- onnxtr/models/classification/models/mobilenet.py +120 -0
- onnxtr/models/classification/predictor/__init__.py +1 -0
- onnxtr/models/classification/predictor/base.py +57 -0
- onnxtr/models/classification/zoo.py +76 -0
- onnxtr/models/detection/__init__.py +2 -0
- onnxtr/models/detection/core.py +101 -0
- onnxtr/models/detection/models/__init__.py +3 -0
- onnxtr/models/detection/models/differentiable_binarization.py +159 -0
- onnxtr/models/detection/models/fast.py +160 -0
- onnxtr/models/detection/models/linknet.py +160 -0
- onnxtr/models/detection/postprocessor/__init__.py +0 -0
- onnxtr/models/detection/postprocessor/base.py +144 -0
- onnxtr/models/detection/predictor/__init__.py +1 -0
- onnxtr/models/detection/predictor/base.py +54 -0
- onnxtr/models/detection/zoo.py +73 -0
- onnxtr/models/engine.py +50 -0
- onnxtr/models/predictor/__init__.py +1 -0
- onnxtr/models/predictor/base.py +175 -0
- onnxtr/models/predictor/predictor.py +145 -0
- onnxtr/models/preprocessor/__init__.py +1 -0
- onnxtr/models/preprocessor/base.py +118 -0
- onnxtr/models/recognition/__init__.py +2 -0
- onnxtr/models/recognition/core.py +28 -0
- onnxtr/models/recognition/models/__init__.py +5 -0
- onnxtr/models/recognition/models/crnn.py +226 -0
- onnxtr/models/recognition/models/master.py +145 -0
- onnxtr/models/recognition/models/parseq.py +134 -0
- onnxtr/models/recognition/models/sar.py +134 -0
- onnxtr/models/recognition/models/vitstr.py +166 -0
- onnxtr/models/recognition/predictor/__init__.py +1 -0
- onnxtr/models/recognition/predictor/_utils.py +86 -0
- onnxtr/models/recognition/predictor/base.py +79 -0
- onnxtr/models/recognition/utils.py +89 -0
- onnxtr/models/recognition/zoo.py +69 -0
- onnxtr/models/zoo.py +114 -0
- onnxtr/transforms/__init__.py +1 -0
- onnxtr/transforms/base.py +112 -0
- onnxtr/utils/__init__.py +4 -0
- onnxtr/utils/common_types.py +18 -0
- onnxtr/utils/data.py +126 -0
- onnxtr/utils/fonts.py +41 -0
- onnxtr/utils/geometry.py +498 -0
- onnxtr/utils/multithreading.py +50 -0
- onnxtr/utils/reconstitution.py +70 -0
- onnxtr/utils/repr.py +64 -0
- onnxtr/utils/visualization.py +291 -0
- onnxtr/utils/vocabs.py +71 -0
- onnxtr/version.py +1 -0
- onnxtr-0.1.0.dist-info/LICENSE +201 -0
- onnxtr-0.1.0.dist-info/METADATA +481 -0
- onnxtr-0.1.0.dist-info/RECORD +70 -0
- onnxtr-0.1.0.dist-info/WHEEL +5 -0
- onnxtr-0.1.0.dist-info/top_level.txt +2 -0
- onnxtr-0.1.0.dist-info/zip-safe +1 -0
onnxtr/utils/geometry.py
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
from math import ceil
|
|
8
|
+
from typing import List, Optional, Tuple, Union
|
|
9
|
+
|
|
10
|
+
import cv2
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from .common_types import BoundingBox, Polygon4P
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"bbox_to_polygon",
|
|
17
|
+
"polygon_to_bbox",
|
|
18
|
+
"resolve_enclosing_bbox",
|
|
19
|
+
"resolve_enclosing_rbbox",
|
|
20
|
+
"rotate_boxes",
|
|
21
|
+
"compute_expanded_shape",
|
|
22
|
+
"rotate_image",
|
|
23
|
+
"estimate_page_angle",
|
|
24
|
+
"convert_to_relative_coords",
|
|
25
|
+
"rotate_abs_geoms",
|
|
26
|
+
"extract_crops",
|
|
27
|
+
"extract_rcrops",
|
|
28
|
+
"shape_translate",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P:
|
|
33
|
+
"""Convert a bounding box to a polygon
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
----
|
|
37
|
+
bbox: a bounding box
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
-------
|
|
41
|
+
a polygon
|
|
42
|
+
"""
|
|
43
|
+
return bbox[0], (bbox[1][0], bbox[0][1]), (bbox[0][0], bbox[1][1]), bbox[1]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
|
|
47
|
+
"""Convert a polygon to a bounding box
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
----
|
|
51
|
+
polygon: a polygon
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
-------
|
|
55
|
+
a bounding box
|
|
56
|
+
"""
|
|
57
|
+
x, y = zip(*polygon)
|
|
58
|
+
return (min(x), min(y)), (max(x), max(y))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def shape_translate(data: np.ndarray, format: str) -> np.ndarray:
|
|
62
|
+
"""Translate the shape of the input data to the desired format
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
----
|
|
66
|
+
data: input data in shape (B, C, H, W) or (B, H, W, C) or (C, H, W) or (H, W, C)
|
|
67
|
+
format: target format ('BCHW', 'BHWC', 'CHW', or 'HWC')
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
-------
|
|
71
|
+
the reshaped data
|
|
72
|
+
"""
|
|
73
|
+
# Get the current shape
|
|
74
|
+
current_shape = data.shape
|
|
75
|
+
|
|
76
|
+
# Check the number of dimensions
|
|
77
|
+
num_dims = len(current_shape)
|
|
78
|
+
|
|
79
|
+
if num_dims != len(format):
|
|
80
|
+
return data
|
|
81
|
+
|
|
82
|
+
if format == "BCHW" and data.shape[1] in [1, 3]:
|
|
83
|
+
return data
|
|
84
|
+
elif format == "BHWC" and data.shape[-1] in [1, 3]:
|
|
85
|
+
return data
|
|
86
|
+
elif format == "CHW" and data.shape[0] in [1, 3]:
|
|
87
|
+
return data
|
|
88
|
+
elif format == "HWC" and data.shape[-1] in [1, 3]:
|
|
89
|
+
return data
|
|
90
|
+
elif format == "BCHW" and data.shape[1] not in [1, 3]:
|
|
91
|
+
return np.moveaxis(data, -1, 1)
|
|
92
|
+
elif format == "BHWC" and data.shape[-1] not in [1, 3]:
|
|
93
|
+
return np.moveaxis(data, 1, -1)
|
|
94
|
+
elif format == "CHW" and data.shape[0] not in [1, 3]:
|
|
95
|
+
return np.moveaxis(data, -1, 0)
|
|
96
|
+
elif format == "HWC" and data.shape[-1] not in [1, 3]:
|
|
97
|
+
return np.moveaxis(data, 0, -1)
|
|
98
|
+
else:
|
|
99
|
+
return data
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]:
|
|
103
|
+
"""Compute enclosing bbox either from:
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
----
|
|
107
|
+
bboxes: boxes in one of the following formats:
|
|
108
|
+
|
|
109
|
+
- an array of boxes: (*, 5), where boxes have this shape:
|
|
110
|
+
(xmin, ymin, xmax, ymax, score)
|
|
111
|
+
|
|
112
|
+
- a list of BoundingBox
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
-------
|
|
116
|
+
a (1, 5) array (enclosing boxarray), or a BoundingBox
|
|
117
|
+
"""
|
|
118
|
+
if isinstance(bboxes, np.ndarray):
|
|
119
|
+
xmin, ymin, xmax, ymax, score = np.split(bboxes, 5, axis=1)
|
|
120
|
+
return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max(), score.mean()])
|
|
121
|
+
else:
|
|
122
|
+
x, y = zip(*[point for box in bboxes for point in box])
|
|
123
|
+
return (min(x), min(y)), (max(x), max(y))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024) -> np.ndarray:
|
|
127
|
+
"""Compute enclosing rotated bbox either from:
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
----
|
|
131
|
+
rbboxes: boxes in one of the following formats:
|
|
132
|
+
|
|
133
|
+
- an array of boxes: (*, 5), where boxes have this shape:
|
|
134
|
+
(xmin, ymin, xmax, ymax, score)
|
|
135
|
+
|
|
136
|
+
- a list of BoundingBox
|
|
137
|
+
intermed_size: size of the intermediate image
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
-------
|
|
141
|
+
a (1, 5) array (enclosing boxarray), or a BoundingBox
|
|
142
|
+
"""
|
|
143
|
+
cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
|
|
144
|
+
# Convert to absolute for minAreaRect
|
|
145
|
+
cloud *= intermed_size
|
|
146
|
+
rect = cv2.minAreaRect(cloud.astype(np.int32))
|
|
147
|
+
return cv2.boxPoints(rect) / intermed_size # type: ignore[operator]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
|
|
151
|
+
"""Rotate points counter-clockwise.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
----
|
|
155
|
+
points: array of size (N, 2)
|
|
156
|
+
angle: angle between -90 and +90 degrees
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
-------
|
|
160
|
+
Rotated points
|
|
161
|
+
"""
|
|
162
|
+
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
|
|
163
|
+
rotation_mat = np.array(
|
|
164
|
+
[[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype
|
|
165
|
+
)
|
|
166
|
+
return np.matmul(points, rotation_mat.T)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def compute_expanded_shape(img_shape: Tuple[int, int], angle: float) -> Tuple[int, int]:
|
|
170
|
+
"""Compute the shape of an expanded rotated image
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
----
|
|
174
|
+
img_shape: the height and width of the image
|
|
175
|
+
angle: angle between -90 and +90 degrees
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
-------
|
|
179
|
+
the height and width of the rotated image
|
|
180
|
+
"""
|
|
181
|
+
points: np.ndarray = np.array([
|
|
182
|
+
[img_shape[1] / 2, img_shape[0] / 2],
|
|
183
|
+
[-img_shape[1] / 2, img_shape[0] / 2],
|
|
184
|
+
])
|
|
185
|
+
|
|
186
|
+
rotated_points = rotate_abs_points(points, angle)
|
|
187
|
+
|
|
188
|
+
wh_shape = 2 * np.abs(rotated_points).max(axis=0)
|
|
189
|
+
return wh_shape[1], wh_shape[0]
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def rotate_abs_geoms(
|
|
193
|
+
geoms: np.ndarray,
|
|
194
|
+
angle: float,
|
|
195
|
+
img_shape: Tuple[int, int],
|
|
196
|
+
expand: bool = True,
|
|
197
|
+
) -> np.ndarray:
|
|
198
|
+
"""Rotate a batch of bounding boxes or polygons by an angle around the
|
|
199
|
+
image center.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
----
|
|
203
|
+
geoms: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes
|
|
204
|
+
angle: anti-clockwise rotation angle in degrees
|
|
205
|
+
img_shape: the height and width of the image
|
|
206
|
+
expand: whether the image should be padded to avoid information loss
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
-------
|
|
210
|
+
A batch of rotated polygons (N, 4, 2)
|
|
211
|
+
"""
|
|
212
|
+
# Switch to polygons
|
|
213
|
+
polys = (
|
|
214
|
+
np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1)
|
|
215
|
+
if geoms.ndim == 2
|
|
216
|
+
else geoms
|
|
217
|
+
)
|
|
218
|
+
polys = polys.astype(np.float32)
|
|
219
|
+
|
|
220
|
+
# Switch to image center as referential
|
|
221
|
+
polys[..., 0] -= img_shape[1] / 2
|
|
222
|
+
polys[..., 1] = img_shape[0] / 2 - polys[..., 1]
|
|
223
|
+
|
|
224
|
+
# Rotated them around image center
|
|
225
|
+
rotated_polys = rotate_abs_points(polys.reshape(-1, 2), angle).reshape(-1, 4, 2)
|
|
226
|
+
# Switch back to top-left corner as referential
|
|
227
|
+
target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape
|
|
228
|
+
# Clip coords to fit since there is no expansion
|
|
229
|
+
rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1])
|
|
230
|
+
rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0])
|
|
231
|
+
|
|
232
|
+
return rotated_polys
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int]) -> np.ndarray:
|
|
236
|
+
"""Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape.
|
|
237
|
+
This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox
|
|
238
|
+
coordinates after a resizing of the image.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
----
|
|
242
|
+
loc_preds: (N, 4, 2) array of RELATIVE loc_preds
|
|
243
|
+
orig_shape: shape of the origin image
|
|
244
|
+
dest_shape: shape of the destination image
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
-------
|
|
248
|
+
A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial
|
|
249
|
+
"""
|
|
250
|
+
if len(dest_shape) != 2:
|
|
251
|
+
raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}")
|
|
252
|
+
if len(orig_shape) != 2:
|
|
253
|
+
raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}")
|
|
254
|
+
orig_height, orig_width = orig_shape
|
|
255
|
+
dest_height, dest_width = dest_shape
|
|
256
|
+
mboxes = loc_preds.copy()
|
|
257
|
+
mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width
|
|
258
|
+
mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height
|
|
259
|
+
|
|
260
|
+
return mboxes
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def rotate_boxes(
|
|
264
|
+
loc_preds: np.ndarray,
|
|
265
|
+
angle: float,
|
|
266
|
+
orig_shape: Tuple[int, int],
|
|
267
|
+
min_angle: float = 1.0,
|
|
268
|
+
target_shape: Optional[Tuple[int, int]] = None,
|
|
269
|
+
) -> np.ndarray:
|
|
270
|
+
"""Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes
|
|
271
|
+
(4, 2) of an angle, if angle > min_angle, around the center of the page.
|
|
272
|
+
If target_shape is specified, the boxes are remapped to the target shape after the rotation. This
|
|
273
|
+
is done to remove the padding that is created by rotate_page(expand=True)
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
----
|
|
277
|
+
loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes
|
|
278
|
+
angle: angle between -90 and +90 degrees
|
|
279
|
+
orig_shape: shape of the origin image
|
|
280
|
+
min_angle: minimum angle to rotate boxes
|
|
281
|
+
target_shape: shape of the destination image
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
-------
|
|
285
|
+
A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes
|
|
286
|
+
"""
|
|
287
|
+
# Change format of the boxes to rotated boxes
|
|
288
|
+
_boxes = loc_preds.copy()
|
|
289
|
+
if _boxes.ndim == 2:
|
|
290
|
+
_boxes = np.stack(
|
|
291
|
+
[
|
|
292
|
+
_boxes[:, [0, 1]],
|
|
293
|
+
_boxes[:, [2, 1]],
|
|
294
|
+
_boxes[:, [2, 3]],
|
|
295
|
+
_boxes[:, [0, 3]],
|
|
296
|
+
],
|
|
297
|
+
axis=1,
|
|
298
|
+
)
|
|
299
|
+
# If small angle, return boxes (no rotation)
|
|
300
|
+
if abs(angle) < min_angle or abs(angle) > 90 - min_angle:
|
|
301
|
+
return _boxes
|
|
302
|
+
# Compute rotation matrix
|
|
303
|
+
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
|
|
304
|
+
rotation_mat = np.array(
|
|
305
|
+
[[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype
|
|
306
|
+
)
|
|
307
|
+
# Rotate absolute points
|
|
308
|
+
points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1)
|
|
309
|
+
image_center = (orig_shape[1] / 2, orig_shape[0] / 2)
|
|
310
|
+
rotated_points = image_center + np.matmul(points - image_center, rotation_mat)
|
|
311
|
+
rotated_boxes: np.ndarray = np.stack(
|
|
312
|
+
(rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Apply a mask if requested
|
|
316
|
+
if target_shape is not None:
|
|
317
|
+
rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape)
|
|
318
|
+
|
|
319
|
+
return rotated_boxes
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def rotate_image(
|
|
323
|
+
image: np.ndarray,
|
|
324
|
+
angle: float,
|
|
325
|
+
expand: bool = False,
|
|
326
|
+
preserve_origin_shape: bool = False,
|
|
327
|
+
) -> np.ndarray:
|
|
328
|
+
"""Rotate an image counterclockwise by an given angle.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
----
|
|
332
|
+
image: numpy tensor to rotate
|
|
333
|
+
angle: rotation angle in degrees, between -90 and +90
|
|
334
|
+
expand: whether the image should be padded before the rotation
|
|
335
|
+
preserve_origin_shape: if expand is set to True, resizes the final output to the original image size
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
-------
|
|
339
|
+
Rotated array, padded by 0 by default.
|
|
340
|
+
"""
|
|
341
|
+
# Compute the expanded padding
|
|
342
|
+
exp_img: np.ndarray
|
|
343
|
+
if expand:
|
|
344
|
+
exp_shape = compute_expanded_shape(image.shape[:2], angle) # type: ignore[arg-type]
|
|
345
|
+
h_pad, w_pad = (
|
|
346
|
+
int(max(0, ceil(exp_shape[0] - image.shape[0]))),
|
|
347
|
+
int(max(0, ceil(exp_shape[1] - image.shape[1]))),
|
|
348
|
+
)
|
|
349
|
+
exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
|
|
350
|
+
else:
|
|
351
|
+
exp_img = image
|
|
352
|
+
|
|
353
|
+
height, width = exp_img.shape[:2]
|
|
354
|
+
rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0)
|
|
355
|
+
rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height))
|
|
356
|
+
if expand:
|
|
357
|
+
# Pad to get the same aspect ratio
|
|
358
|
+
if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]):
|
|
359
|
+
# Pad width
|
|
360
|
+
if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]):
|
|
361
|
+
h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1])
|
|
362
|
+
# Pad height
|
|
363
|
+
else:
|
|
364
|
+
h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
|
|
365
|
+
rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
|
|
366
|
+
if preserve_origin_shape:
|
|
367
|
+
# rescale
|
|
368
|
+
rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)
|
|
369
|
+
|
|
370
|
+
return rot_img
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def estimate_page_angle(polys: np.ndarray) -> float:
|
|
374
|
+
"""Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the
|
|
375
|
+
estimated angle ccw in degrees
|
|
376
|
+
"""
|
|
377
|
+
# Compute mean left points and mean right point with respect to the reading direction (oriented polygon)
|
|
378
|
+
xleft = polys[:, 0, 0] + polys[:, 3, 0]
|
|
379
|
+
yleft = polys[:, 0, 1] + polys[:, 3, 1]
|
|
380
|
+
xright = polys[:, 1, 0] + polys[:, 2, 0]
|
|
381
|
+
yright = polys[:, 1, 1] + polys[:, 2, 1]
|
|
382
|
+
with np.errstate(divide="raise", invalid="raise"):
|
|
383
|
+
try:
|
|
384
|
+
return float(
|
|
385
|
+
np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom!
|
|
386
|
+
)
|
|
387
|
+
except FloatingPointError:
|
|
388
|
+
return 0.0
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray:
|
|
392
|
+
"""Convert a geometry to relative coordinates
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
----
|
|
396
|
+
geoms: a set of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4)
|
|
397
|
+
img_shape: the height and width of the image
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
-------
|
|
401
|
+
the updated geometry
|
|
402
|
+
"""
|
|
403
|
+
# Polygon
|
|
404
|
+
if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
|
|
405
|
+
polygons: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
|
|
406
|
+
polygons[..., 0] = geoms[..., 0] / img_shape[1]
|
|
407
|
+
polygons[..., 1] = geoms[..., 1] / img_shape[0]
|
|
408
|
+
return polygons.clip(0, 1)
|
|
409
|
+
if geoms.ndim == 2 and geoms.shape[1] == 4:
|
|
410
|
+
boxes: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
|
|
411
|
+
boxes[:, ::2] = geoms[:, ::2] / img_shape[1]
|
|
412
|
+
boxes[:, 1::2] = geoms[:, 1::2] / img_shape[0]
|
|
413
|
+
return boxes.clip(0, 1)
|
|
414
|
+
|
|
415
|
+
raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}")
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> List[np.ndarray]:
|
|
419
|
+
"""Created cropped images from list of bounding boxes
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
----
|
|
423
|
+
img: input image
|
|
424
|
+
boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative
|
|
425
|
+
coordinates (xmin, ymin, xmax, ymax)
|
|
426
|
+
channels_last: whether the channel dimensions is the last one instead of the last one
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
-------
|
|
430
|
+
list of cropped images
|
|
431
|
+
"""
|
|
432
|
+
if boxes.shape[0] == 0:
|
|
433
|
+
return []
|
|
434
|
+
if boxes.shape[1] != 4:
|
|
435
|
+
raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)")
|
|
436
|
+
|
|
437
|
+
# Project relative coordinates
|
|
438
|
+
_boxes = boxes.copy()
|
|
439
|
+
h, w = img.shape[:2] if channels_last else img.shape[-2:]
|
|
440
|
+
if not np.issubdtype(_boxes.dtype, np.integer):
|
|
441
|
+
_boxes[:, [0, 2]] *= w
|
|
442
|
+
_boxes[:, [1, 3]] *= h
|
|
443
|
+
_boxes = _boxes.round().astype(int)
|
|
444
|
+
# Add last index
|
|
445
|
+
_boxes[2:] += 1
|
|
446
|
+
if channels_last:
|
|
447
|
+
return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes])
|
|
448
|
+
|
|
449
|
+
return deepcopy([img[:, box[1] : box[3], box[0] : box[2]] for box in _boxes])
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def extract_rcrops(
|
|
453
|
+
img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
|
|
454
|
+
) -> List[np.ndarray]:
|
|
455
|
+
"""Created cropped images from list of rotated bounding boxes
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
----
|
|
459
|
+
img: input image
|
|
460
|
+
polys: bounding boxes of shape (N, 4, 2)
|
|
461
|
+
dtype: target data type of bounding boxes
|
|
462
|
+
channels_last: whether the channel dimensions is the last one instead of the last one
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
-------
|
|
466
|
+
list of cropped images
|
|
467
|
+
"""
|
|
468
|
+
if polys.shape[0] == 0:
|
|
469
|
+
return []
|
|
470
|
+
if polys.shape[1:] != (4, 2):
|
|
471
|
+
raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)")
|
|
472
|
+
|
|
473
|
+
# Project relative coordinates
|
|
474
|
+
_boxes = polys.copy()
|
|
475
|
+
height, width = img.shape[:2] if channels_last else img.shape[-2:]
|
|
476
|
+
if not np.issubdtype(_boxes.dtype, np.integer):
|
|
477
|
+
_boxes[:, :, 0] *= width
|
|
478
|
+
_boxes[:, :, 1] *= height
|
|
479
|
+
|
|
480
|
+
src_pts = _boxes[:, :3].astype(np.float32)
|
|
481
|
+
# Preserve size
|
|
482
|
+
d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
|
|
483
|
+
d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
|
|
484
|
+
# (N, 3, 2)
|
|
485
|
+
dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
|
|
486
|
+
dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
|
|
487
|
+
dst_pts[:, 2, 1] = d2 - 1
|
|
488
|
+
# Use a warp transformation to extract the crop
|
|
489
|
+
crops = [
|
|
490
|
+
cv2.warpAffine(
|
|
491
|
+
img if channels_last else img.transpose(1, 2, 0),
|
|
492
|
+
# Transformation matrix
|
|
493
|
+
cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
|
|
494
|
+
(int(d1[idx]), int(d2[idx])),
|
|
495
|
+
)
|
|
496
|
+
for idx in range(_boxes.shape[0])
|
|
497
|
+
]
|
|
498
|
+
return crops
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
import multiprocessing as mp
|
|
8
|
+
import os
|
|
9
|
+
from multiprocessing.pool import ThreadPool
|
|
10
|
+
from typing import Any, Callable, Iterable, Iterator, Optional
|
|
11
|
+
|
|
12
|
+
from onnxtr.file_utils import ENV_VARS_TRUE_VALUES
|
|
13
|
+
|
|
14
|
+
__all__ = ["multithread_exec"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Optional[int] = None) -> Iterator[Any]:
|
|
18
|
+
"""Execute a given function in parallel for each element of a given sequence
|
|
19
|
+
|
|
20
|
+
>>> from onnxtr.utils.multithreading import multithread_exec
|
|
21
|
+
>>> entries = [1, 4, 8]
|
|
22
|
+
>>> results = multithread_exec(lambda x: x ** 2, entries)
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
----
|
|
26
|
+
func: function to be executed on each element of the iterable
|
|
27
|
+
seq: iterable
|
|
28
|
+
threads: number of workers to be used for multiprocessing
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
-------
|
|
32
|
+
iterator of the function's results using the iterable as inputs
|
|
33
|
+
|
|
34
|
+
Notes:
|
|
35
|
+
-----
|
|
36
|
+
This function uses ThreadPool from multiprocessing package, which uses `/dev/shm` directory for shared memory.
|
|
37
|
+
If you do not have write permissions for this directory (if you run `onnxtr` on AWS Lambda for instance),
|
|
38
|
+
you might want to disable multiprocessing. To achieve that, set 'ONNXTR_MULTIPROCESSING_DISABLE' to 'TRUE'.
|
|
39
|
+
"""
|
|
40
|
+
threads = threads if isinstance(threads, int) else min(16, mp.cpu_count())
|
|
41
|
+
# Single-thread
|
|
42
|
+
if threads < 2 or os.environ.get("ONNXTR_MULTIPROCESSING_DISABLE", "").upper() in ENV_VARS_TRUE_VALUES:
|
|
43
|
+
results = map(func, seq)
|
|
44
|
+
# Multi-threading
|
|
45
|
+
else:
|
|
46
|
+
with ThreadPool(threads) as tp:
|
|
47
|
+
# ThreadPool's map function returns a list, but seq could be of a different type
|
|
48
|
+
# That's why wrapping result in map to return iterator
|
|
49
|
+
results = map(lambda x: x, tp.map(func, seq)) # noqa: C417
|
|
50
|
+
return results
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from anyascii import anyascii
|
|
9
|
+
from PIL import Image, ImageDraw
|
|
10
|
+
|
|
11
|
+
from .fonts import get_font
|
|
12
|
+
|
|
13
|
+
__all__ = ["synthesize_page"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def synthesize_page(
|
|
17
|
+
page: Dict[str, Any],
|
|
18
|
+
draw_proba: bool = False,
|
|
19
|
+
font_family: Optional[str] = None,
|
|
20
|
+
) -> np.ndarray:
|
|
21
|
+
"""Draw a the content of the element page (OCR response) on a blank page.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
----
|
|
25
|
+
page: exported Page object to represent
|
|
26
|
+
draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
|
|
27
|
+
font_size: size of the font, default font = 13
|
|
28
|
+
font_family: family of the font
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
-------
|
|
32
|
+
the synthesized page
|
|
33
|
+
"""
|
|
34
|
+
# Draw template
|
|
35
|
+
h, w = page["dimensions"]
|
|
36
|
+
response = 255 * np.ones((h, w, 3), dtype=np.int32)
|
|
37
|
+
|
|
38
|
+
# Draw each word
|
|
39
|
+
for block in page["blocks"]:
|
|
40
|
+
for line in block["lines"]:
|
|
41
|
+
for word in line["words"]:
|
|
42
|
+
# Get absolute word geometry
|
|
43
|
+
(xmin, ymin), (xmax, ymax) = word["geometry"]
|
|
44
|
+
xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
|
|
45
|
+
ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
|
|
46
|
+
|
|
47
|
+
# White drawing context adapted to font size, 0.75 factor to convert pts --> pix
|
|
48
|
+
font = get_font(font_family, int(0.75 * (ymax - ymin)))
|
|
49
|
+
img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
|
|
50
|
+
d = ImageDraw.Draw(img)
|
|
51
|
+
# Draw in black the value of the word
|
|
52
|
+
try:
|
|
53
|
+
d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
|
|
54
|
+
except UnicodeEncodeError:
|
|
55
|
+
# When character cannot be encoded, use its anyascii version
|
|
56
|
+
d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))
|
|
57
|
+
|
|
58
|
+
# Colorize if draw_proba
|
|
59
|
+
if draw_proba:
|
|
60
|
+
p = int(255 * word["confidence"])
|
|
61
|
+
mask = np.where(np.array(img) == 0, 1, 0)
|
|
62
|
+
proba: np.ndarray = np.array([255 - p, 0, p])
|
|
63
|
+
color = mask * proba[np.newaxis, np.newaxis, :]
|
|
64
|
+
white_mask = 255 * (1 - mask)
|
|
65
|
+
img = color + white_mask
|
|
66
|
+
|
|
67
|
+
# Write to response page
|
|
68
|
+
response[ymin:ymax, xmin:xmax, :] = np.array(img)
|
|
69
|
+
|
|
70
|
+
return response
|
onnxtr/utils/repr.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
# Adapted from https://github.com/pytorch/torch/blob/master/torch/nn/modules/module.py
|
|
7
|
+
|
|
8
|
+
from typing import List
|
|
9
|
+
|
|
10
|
+
__all__ = ["NestedObject"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _addindent(s_, num_spaces):
|
|
14
|
+
s = s_.split("\n")
|
|
15
|
+
# don't do anything for single-line stuff
|
|
16
|
+
if len(s) == 1:
|
|
17
|
+
return s_
|
|
18
|
+
first = s.pop(0)
|
|
19
|
+
s = [(num_spaces * " ") + line for line in s]
|
|
20
|
+
s = "\n".join(s)
|
|
21
|
+
s = first + "\n" + s
|
|
22
|
+
return s
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class NestedObject:
|
|
26
|
+
"""Base class for all nested objects in onnxtr"""
|
|
27
|
+
|
|
28
|
+
_children_names: List[str]
|
|
29
|
+
|
|
30
|
+
def extra_repr(self) -> str:
|
|
31
|
+
return ""
|
|
32
|
+
|
|
33
|
+
def __repr__(self):
|
|
34
|
+
# We treat the extra repr like the sub-object, one item per line
|
|
35
|
+
extra_lines = []
|
|
36
|
+
extra_repr = self.extra_repr()
|
|
37
|
+
# empty string will be split into list ['']
|
|
38
|
+
if extra_repr:
|
|
39
|
+
extra_lines = extra_repr.split("\n")
|
|
40
|
+
child_lines = []
|
|
41
|
+
if hasattr(self, "_children_names"):
|
|
42
|
+
for key in self._children_names:
|
|
43
|
+
child = getattr(self, key)
|
|
44
|
+
if isinstance(child, list) and len(child) > 0:
|
|
45
|
+
child_str = ",\n".join([repr(subchild) for subchild in child])
|
|
46
|
+
if len(child) > 1:
|
|
47
|
+
child_str = _addindent(f"\n{child_str},", 2) + "\n"
|
|
48
|
+
child_str = f"[{child_str}]"
|
|
49
|
+
else:
|
|
50
|
+
child_str = repr(child)
|
|
51
|
+
child_str = _addindent(child_str, 2)
|
|
52
|
+
child_lines.append("(" + key + "): " + child_str)
|
|
53
|
+
lines = extra_lines + child_lines
|
|
54
|
+
|
|
55
|
+
main_str = self.__class__.__name__ + "("
|
|
56
|
+
if lines:
|
|
57
|
+
# simple one-liner info, which most builtin Modules will use
|
|
58
|
+
if len(extra_lines) == 1 and not child_lines:
|
|
59
|
+
main_str += extra_lines[0]
|
|
60
|
+
else:
|
|
61
|
+
main_str += "\n " + "\n ".join(lines) + "\n"
|
|
62
|
+
|
|
63
|
+
main_str += ")"
|
|
64
|
+
return main_str
|