python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/datasets/__init__.py +2 -0
- doctr/datasets/cord.py +6 -4
- doctr/datasets/datasets/base.py +3 -2
- doctr/datasets/datasets/pytorch.py +4 -2
- doctr/datasets/datasets/tensorflow.py +4 -2
- doctr/datasets/detection.py +6 -3
- doctr/datasets/doc_artefacts.py +2 -1
- doctr/datasets/funsd.py +7 -8
- doctr/datasets/generator/base.py +3 -2
- doctr/datasets/generator/pytorch.py +3 -1
- doctr/datasets/generator/tensorflow.py +3 -1
- doctr/datasets/ic03.py +3 -2
- doctr/datasets/ic13.py +2 -1
- doctr/datasets/iiit5k.py +6 -4
- doctr/datasets/iiithws.py +2 -1
- doctr/datasets/imgur5k.py +3 -2
- doctr/datasets/loader.py +4 -2
- doctr/datasets/mjsynth.py +2 -1
- doctr/datasets/ocr.py +2 -1
- doctr/datasets/orientation.py +40 -0
- doctr/datasets/recognition.py +3 -2
- doctr/datasets/sroie.py +2 -1
- doctr/datasets/svhn.py +2 -1
- doctr/datasets/svt.py +3 -2
- doctr/datasets/synthtext.py +2 -1
- doctr/datasets/utils.py +27 -11
- doctr/datasets/vocabs.py +26 -1
- doctr/datasets/wildreceipt.py +111 -0
- doctr/file_utils.py +3 -1
- doctr/io/elements.py +52 -35
- doctr/io/html.py +5 -3
- doctr/io/image/base.py +5 -4
- doctr/io/image/pytorch.py +12 -7
- doctr/io/image/tensorflow.py +11 -6
- doctr/io/pdf.py +5 -4
- doctr/io/reader.py +13 -5
- doctr/models/_utils.py +30 -53
- doctr/models/artefacts/barcode.py +4 -3
- doctr/models/artefacts/face.py +4 -2
- doctr/models/builder.py +58 -43
- doctr/models/classification/__init__.py +1 -0
- doctr/models/classification/magc_resnet/pytorch.py +5 -2
- doctr/models/classification/magc_resnet/tensorflow.py +5 -2
- doctr/models/classification/mobilenet/pytorch.py +16 -4
- doctr/models/classification/mobilenet/tensorflow.py +29 -20
- doctr/models/classification/predictor/pytorch.py +3 -2
- doctr/models/classification/predictor/tensorflow.py +2 -1
- doctr/models/classification/resnet/pytorch.py +23 -13
- doctr/models/classification/resnet/tensorflow.py +33 -26
- doctr/models/classification/textnet/__init__.py +6 -0
- doctr/models/classification/textnet/pytorch.py +275 -0
- doctr/models/classification/textnet/tensorflow.py +267 -0
- doctr/models/classification/vgg/pytorch.py +4 -2
- doctr/models/classification/vgg/tensorflow.py +5 -2
- doctr/models/classification/vit/pytorch.py +9 -3
- doctr/models/classification/vit/tensorflow.py +9 -3
- doctr/models/classification/zoo.py +7 -2
- doctr/models/core.py +1 -1
- doctr/models/detection/__init__.py +1 -0
- doctr/models/detection/_utils/pytorch.py +7 -1
- doctr/models/detection/_utils/tensorflow.py +7 -3
- doctr/models/detection/core.py +9 -3
- doctr/models/detection/differentiable_binarization/base.py +37 -25
- doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
- doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
- doctr/models/detection/fast/__init__.py +6 -0
- doctr/models/detection/fast/base.py +256 -0
- doctr/models/detection/fast/pytorch.py +442 -0
- doctr/models/detection/fast/tensorflow.py +428 -0
- doctr/models/detection/linknet/base.py +12 -5
- doctr/models/detection/linknet/pytorch.py +28 -15
- doctr/models/detection/linknet/tensorflow.py +68 -88
- doctr/models/detection/predictor/pytorch.py +16 -6
- doctr/models/detection/predictor/tensorflow.py +13 -5
- doctr/models/detection/zoo.py +19 -16
- doctr/models/factory/hub.py +20 -10
- doctr/models/kie_predictor/base.py +2 -1
- doctr/models/kie_predictor/pytorch.py +28 -36
- doctr/models/kie_predictor/tensorflow.py +27 -27
- doctr/models/modules/__init__.py +1 -0
- doctr/models/modules/layers/__init__.py +6 -0
- doctr/models/modules/layers/pytorch.py +166 -0
- doctr/models/modules/layers/tensorflow.py +175 -0
- doctr/models/modules/transformer/pytorch.py +24 -22
- doctr/models/modules/transformer/tensorflow.py +6 -4
- doctr/models/modules/vision_transformer/pytorch.py +2 -4
- doctr/models/modules/vision_transformer/tensorflow.py +2 -4
- doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
- doctr/models/predictor/base.py +14 -3
- doctr/models/predictor/pytorch.py +26 -29
- doctr/models/predictor/tensorflow.py +25 -22
- doctr/models/preprocessor/pytorch.py +14 -9
- doctr/models/preprocessor/tensorflow.py +10 -5
- doctr/models/recognition/core.py +4 -1
- doctr/models/recognition/crnn/pytorch.py +23 -16
- doctr/models/recognition/crnn/tensorflow.py +25 -17
- doctr/models/recognition/master/base.py +4 -1
- doctr/models/recognition/master/pytorch.py +20 -9
- doctr/models/recognition/master/tensorflow.py +20 -8
- doctr/models/recognition/parseq/base.py +4 -1
- doctr/models/recognition/parseq/pytorch.py +28 -22
- doctr/models/recognition/parseq/tensorflow.py +22 -11
- doctr/models/recognition/predictor/_utils.py +3 -2
- doctr/models/recognition/predictor/pytorch.py +3 -2
- doctr/models/recognition/predictor/tensorflow.py +2 -1
- doctr/models/recognition/sar/pytorch.py +14 -7
- doctr/models/recognition/sar/tensorflow.py +23 -14
- doctr/models/recognition/utils.py +5 -1
- doctr/models/recognition/vitstr/base.py +4 -1
- doctr/models/recognition/vitstr/pytorch.py +22 -13
- doctr/models/recognition/vitstr/tensorflow.py +21 -10
- doctr/models/recognition/zoo.py +4 -2
- doctr/models/utils/pytorch.py +24 -6
- doctr/models/utils/tensorflow.py +22 -3
- doctr/models/zoo.py +21 -3
- doctr/transforms/functional/base.py +8 -3
- doctr/transforms/functional/pytorch.py +23 -6
- doctr/transforms/functional/tensorflow.py +25 -5
- doctr/transforms/modules/base.py +12 -5
- doctr/transforms/modules/pytorch.py +10 -12
- doctr/transforms/modules/tensorflow.py +17 -9
- doctr/utils/common_types.py +1 -1
- doctr/utils/data.py +4 -2
- doctr/utils/fonts.py +3 -2
- doctr/utils/geometry.py +95 -26
- doctr/utils/metrics.py +36 -22
- doctr/utils/multithreading.py +5 -3
- doctr/utils/repr.py +3 -1
- doctr/utils/visualization.py +31 -8
- doctr/version.py +1 -1
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
- python_doctr-0.8.1.dist-info/RECORD +173 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
- python_doctr-0.7.0.dist-info/RECORD +0 -161
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0
doctr/utils/geometry.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -29,10 +29,30 @@ __all__ = [
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P:
|
|
32
|
+
"""Convert a bounding box to a polygon
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
----
|
|
36
|
+
bbox: a bounding box
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
-------
|
|
40
|
+
a polygon
|
|
41
|
+
"""
|
|
32
42
|
return bbox[0], (bbox[1][0], bbox[0][1]), (bbox[0][0], bbox[1][1]), bbox[1]
|
|
33
43
|
|
|
34
44
|
|
|
35
45
|
def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
|
|
46
|
+
"""Convert a polygon to a bounding box
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
----
|
|
50
|
+
polygon: a polygon
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
-------
|
|
54
|
+
a bounding box
|
|
55
|
+
"""
|
|
36
56
|
x, y = zip(*polygon)
|
|
37
57
|
return (min(x), min(y)), (max(x), max(y))
|
|
38
58
|
|
|
@@ -40,12 +60,18 @@ def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
|
|
|
40
60
|
def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]:
|
|
41
61
|
"""Compute enclosing bbox either from:
|
|
42
62
|
|
|
43
|
-
|
|
44
|
-
|
|
63
|
+
Args:
|
|
64
|
+
----
|
|
65
|
+
bboxes: boxes in one of the following formats:
|
|
66
|
+
|
|
67
|
+
- an array of boxes: (*, 5), where boxes have this shape:
|
|
68
|
+
(xmin, ymin, xmax, ymax, score)
|
|
45
69
|
|
|
46
|
-
|
|
70
|
+
- a list of BoundingBox
|
|
47
71
|
|
|
48
|
-
|
|
72
|
+
Returns:
|
|
73
|
+
-------
|
|
74
|
+
a (1, 5) array (enclosing boxarray), or a BoundingBox
|
|
49
75
|
"""
|
|
50
76
|
if isinstance(bboxes, np.ndarray):
|
|
51
77
|
xmin, ymin, xmax, ymax, score = np.split(bboxes, 5, axis=1)
|
|
@@ -56,18 +82,41 @@ def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Unio
|
|
|
56
82
|
|
|
57
83
|
|
|
58
84
|
def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024) -> np.ndarray:
|
|
85
|
+
"""Compute enclosing rotated bbox either from:
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
----
|
|
89
|
+
rbboxes: boxes in one of the following formats:
|
|
90
|
+
|
|
91
|
+
- an array of boxes: (*, 5), where boxes have this shape:
|
|
92
|
+
(xmin, ymin, xmax, ymax, score)
|
|
93
|
+
|
|
94
|
+
- a list of BoundingBox
|
|
95
|
+
intermed_size: size of the intermediate image
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
-------
|
|
99
|
+
a (1, 5) array (enclosing boxarray), or a BoundingBox
|
|
100
|
+
"""
|
|
59
101
|
cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
|
|
60
102
|
# Convert to absolute for minAreaRect
|
|
61
103
|
cloud *= intermed_size
|
|
62
104
|
rect = cv2.minAreaRect(cloud.astype(np.int32))
|
|
63
|
-
return cv2.boxPoints(rect) / intermed_size
|
|
105
|
+
return cv2.boxPoints(rect) / intermed_size # type: ignore[operator]
|
|
64
106
|
|
|
65
107
|
|
|
66
108
|
def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
|
|
67
109
|
"""Rotate points counter-clockwise.
|
|
68
|
-
Points: array of size (N, 2)
|
|
69
|
-
"""
|
|
70
110
|
|
|
111
|
+
Args:
|
|
112
|
+
----
|
|
113
|
+
points: array of size (N, 2)
|
|
114
|
+
angle: angle between -90 and +90 degrees
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
-------
|
|
118
|
+
Rotated points
|
|
119
|
+
"""
|
|
71
120
|
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
|
|
72
121
|
rotation_mat = np.array(
|
|
73
122
|
[[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype
|
|
@@ -79,19 +128,18 @@ def compute_expanded_shape(img_shape: Tuple[int, int], angle: float) -> Tuple[in
|
|
|
79
128
|
"""Compute the shape of an expanded rotated image
|
|
80
129
|
|
|
81
130
|
Args:
|
|
131
|
+
----
|
|
82
132
|
img_shape: the height and width of the image
|
|
83
133
|
angle: angle between -90 and +90 degrees
|
|
84
134
|
|
|
85
135
|
Returns:
|
|
136
|
+
-------
|
|
86
137
|
the height and width of the rotated image
|
|
87
138
|
"""
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
[
|
|
91
|
-
|
|
92
|
-
[-img_shape[1] / 2, img_shape[0] / 2],
|
|
93
|
-
]
|
|
94
|
-
)
|
|
139
|
+
points: np.ndarray = np.array([
|
|
140
|
+
[img_shape[1] / 2, img_shape[0] / 2],
|
|
141
|
+
[-img_shape[1] / 2, img_shape[0] / 2],
|
|
142
|
+
])
|
|
95
143
|
|
|
96
144
|
rotated_points = rotate_abs_points(points, angle)
|
|
97
145
|
|
|
@@ -109,15 +157,16 @@ def rotate_abs_geoms(
|
|
|
109
157
|
image center.
|
|
110
158
|
|
|
111
159
|
Args:
|
|
112
|
-
|
|
160
|
+
----
|
|
161
|
+
geoms: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes
|
|
113
162
|
angle: anti-clockwise rotation angle in degrees
|
|
114
163
|
img_shape: the height and width of the image
|
|
115
164
|
expand: whether the image should be padded to avoid information loss
|
|
116
165
|
|
|
117
166
|
Returns:
|
|
167
|
+
-------
|
|
118
168
|
A batch of rotated polygons (N, 4, 2)
|
|
119
169
|
"""
|
|
120
|
-
|
|
121
170
|
# Switch to polygons
|
|
122
171
|
polys = (
|
|
123
172
|
np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1)
|
|
@@ -147,14 +196,15 @@ def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape:
|
|
|
147
196
|
coordinates after a resizing of the image.
|
|
148
197
|
|
|
149
198
|
Args:
|
|
199
|
+
----
|
|
150
200
|
loc_preds: (N, 4, 2) array of RELATIVE loc_preds
|
|
151
201
|
orig_shape: shape of the origin image
|
|
152
202
|
dest_shape: shape of the destination image
|
|
153
203
|
|
|
154
204
|
Returns:
|
|
205
|
+
-------
|
|
155
206
|
A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial
|
|
156
207
|
"""
|
|
157
|
-
|
|
158
208
|
if len(dest_shape) != 2:
|
|
159
209
|
raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}")
|
|
160
210
|
if len(orig_shape) != 2:
|
|
@@ -181,15 +231,17 @@ def rotate_boxes(
|
|
|
181
231
|
is done to remove the padding that is created by rotate_page(expand=True)
|
|
182
232
|
|
|
183
233
|
Args:
|
|
234
|
+
----
|
|
184
235
|
loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes
|
|
185
236
|
angle: angle between -90 and +90 degrees
|
|
186
237
|
orig_shape: shape of the origin image
|
|
187
238
|
min_angle: minimum angle to rotate boxes
|
|
239
|
+
target_shape: shape of the destination image
|
|
188
240
|
|
|
189
241
|
Returns:
|
|
242
|
+
-------
|
|
190
243
|
A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes
|
|
191
244
|
"""
|
|
192
|
-
|
|
193
245
|
# Change format of the boxes to rotated boxes
|
|
194
246
|
_boxes = loc_preds.copy()
|
|
195
247
|
if _boxes.ndim == 2:
|
|
@@ -234,21 +286,23 @@ def rotate_image(
|
|
|
234
286
|
"""Rotate an image counterclockwise by an given angle.
|
|
235
287
|
|
|
236
288
|
Args:
|
|
289
|
+
----
|
|
237
290
|
image: numpy tensor to rotate
|
|
238
291
|
angle: rotation angle in degrees, between -90 and +90
|
|
239
292
|
expand: whether the image should be padded before the rotation
|
|
240
293
|
preserve_origin_shape: if expand is set to True, resizes the final output to the original image size
|
|
241
294
|
|
|
242
295
|
Returns:
|
|
296
|
+
-------
|
|
243
297
|
Rotated array, padded by 0 by default.
|
|
244
298
|
"""
|
|
245
|
-
|
|
246
299
|
# Compute the expanded padding
|
|
247
300
|
exp_img: np.ndarray
|
|
248
301
|
if expand:
|
|
249
302
|
exp_shape = compute_expanded_shape(image.shape[:2], angle) # type: ignore[arg-type]
|
|
250
|
-
h_pad, w_pad =
|
|
251
|
-
max(0, ceil(exp_shape[
|
|
303
|
+
h_pad, w_pad = (
|
|
304
|
+
int(max(0, ceil(exp_shape[0] - image.shape[0]))),
|
|
305
|
+
int(max(0, ceil(exp_shape[1] - image.shape[1]))),
|
|
252
306
|
)
|
|
253
307
|
exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
|
|
254
308
|
else:
|
|
@@ -283,20 +337,27 @@ def estimate_page_angle(polys: np.ndarray) -> float:
|
|
|
283
337
|
yleft = polys[:, 0, 1] + polys[:, 3, 1]
|
|
284
338
|
xright = polys[:, 1, 0] + polys[:, 2, 0]
|
|
285
339
|
yright = polys[:, 1, 1] + polys[:, 2, 1]
|
|
286
|
-
|
|
340
|
+
with np.errstate(divide="raise", invalid="raise"):
|
|
341
|
+
try:
|
|
342
|
+
return float(
|
|
343
|
+
np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom!
|
|
344
|
+
)
|
|
345
|
+
except FloatingPointError:
|
|
346
|
+
return 0.0
|
|
287
347
|
|
|
288
348
|
|
|
289
349
|
def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray:
|
|
290
350
|
"""Convert a geometry to relative coordinates
|
|
291
351
|
|
|
292
352
|
Args:
|
|
353
|
+
----
|
|
293
354
|
geoms: a set of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4)
|
|
294
355
|
img_shape: the height and width of the image
|
|
295
356
|
|
|
296
357
|
Returns:
|
|
358
|
+
-------
|
|
297
359
|
the updated geometry
|
|
298
360
|
"""
|
|
299
|
-
|
|
300
361
|
# Polygon
|
|
301
362
|
if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
|
|
302
363
|
polygons: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
|
|
@@ -314,12 +375,16 @@ def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) ->
|
|
|
314
375
|
|
|
315
376
|
def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> List[np.ndarray]:
|
|
316
377
|
"""Created cropped images from list of bounding boxes
|
|
378
|
+
|
|
317
379
|
Args:
|
|
380
|
+
----
|
|
318
381
|
img: input image
|
|
319
382
|
boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative
|
|
320
383
|
coordinates (xmin, ymin, xmax, ymax)
|
|
321
384
|
channels_last: whether the channel dimensions is the last one instead of the last one
|
|
385
|
+
|
|
322
386
|
Returns:
|
|
387
|
+
-------
|
|
323
388
|
list of cropped images
|
|
324
389
|
"""
|
|
325
390
|
if boxes.shape[0] == 0:
|
|
@@ -330,7 +395,7 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True
|
|
|
330
395
|
# Project relative coordinates
|
|
331
396
|
_boxes = boxes.copy()
|
|
332
397
|
h, w = img.shape[:2] if channels_last else img.shape[-2:]
|
|
333
|
-
if _boxes.dtype
|
|
398
|
+
if not np.issubdtype(_boxes.dtype, np.integer):
|
|
334
399
|
_boxes[:, [0, 2]] *= w
|
|
335
400
|
_boxes[:, [1, 3]] *= h
|
|
336
401
|
_boxes = _boxes.round().astype(int)
|
|
@@ -346,12 +411,16 @@ def extract_rcrops(
|
|
|
346
411
|
img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
|
|
347
412
|
) -> List[np.ndarray]:
|
|
348
413
|
"""Created cropped images from list of rotated bounding boxes
|
|
414
|
+
|
|
349
415
|
Args:
|
|
416
|
+
----
|
|
350
417
|
img: input image
|
|
351
418
|
polys: bounding boxes of shape (N, 4, 2)
|
|
352
419
|
dtype: target data type of bounding boxes
|
|
353
420
|
channels_last: whether the channel dimensions is the last one instead of the last one
|
|
421
|
+
|
|
354
422
|
Returns:
|
|
423
|
+
-------
|
|
355
424
|
list of cropped images
|
|
356
425
|
"""
|
|
357
426
|
if polys.shape[0] == 0:
|
|
@@ -362,7 +431,7 @@ def extract_rcrops(
|
|
|
362
431
|
# Project relative coordinates
|
|
363
432
|
_boxes = polys.copy()
|
|
364
433
|
height, width = img.shape[:2] if channels_last else img.shape[-2:]
|
|
365
|
-
if _boxes.dtype
|
|
434
|
+
if not np.issubdtype(_boxes.dtype, np.integer):
|
|
366
435
|
_boxes[:, :, 0] *= width
|
|
367
436
|
_boxes[:, :, 1] *= height
|
|
368
437
|
|
doctr/utils/metrics.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -27,10 +27,12 @@ def string_match(word1: str, word2: str) -> Tuple[bool, bool, bool, bool]:
|
|
|
27
27
|
"""Performs string comparison with multiple levels of tolerance
|
|
28
28
|
|
|
29
29
|
Args:
|
|
30
|
+
----
|
|
30
31
|
word1: a string
|
|
31
32
|
word2: another string
|
|
32
33
|
|
|
33
34
|
Returns:
|
|
35
|
+
-------
|
|
34
36
|
a tuple with booleans specifying respectively whether the raw strings, their lower-case counterparts, their
|
|
35
37
|
unidecode counterparts and their lower-case unidecode counterparts match
|
|
36
38
|
"""
|
|
@@ -84,10 +86,10 @@ class TextMatch:
|
|
|
84
86
|
"""Update the state of the metric with new predictions
|
|
85
87
|
|
|
86
88
|
Args:
|
|
89
|
+
----
|
|
87
90
|
gt: list of groung-truth character sequences
|
|
88
91
|
pred: list of predicted character sequences
|
|
89
92
|
"""
|
|
90
|
-
|
|
91
93
|
if len(gt) != len(pred):
|
|
92
94
|
raise AssertionError("prediction size does not match with ground-truth labels size")
|
|
93
95
|
|
|
@@ -103,7 +105,8 @@ class TextMatch:
|
|
|
103
105
|
def summary(self) -> Dict[str, float]:
|
|
104
106
|
"""Computes the aggregated metrics
|
|
105
107
|
|
|
106
|
-
Returns
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
107
110
|
a dictionary with the exact match score for the raw data, its lower-case counterpart, its unidecode
|
|
108
111
|
counterpart and its lower-case unidecode counterpart
|
|
109
112
|
"""
|
|
@@ -129,13 +132,14 @@ def box_iou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
|
|
|
129
132
|
"""Computes the IoU between two sets of bounding boxes
|
|
130
133
|
|
|
131
134
|
Args:
|
|
135
|
+
----
|
|
132
136
|
boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax)
|
|
133
137
|
boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax)
|
|
134
138
|
|
|
135
139
|
Returns:
|
|
140
|
+
-------
|
|
136
141
|
the IoU matrix of shape (N, M)
|
|
137
142
|
"""
|
|
138
|
-
|
|
139
143
|
iou_mat: np.ndarray = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32)
|
|
140
144
|
|
|
141
145
|
if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0:
|
|
@@ -159,13 +163,14 @@ def box_ioa(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
|
|
|
159
163
|
ioa(i, j) = inter(i, j) / area(i)
|
|
160
164
|
|
|
161
165
|
Args:
|
|
166
|
+
----
|
|
162
167
|
boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax)
|
|
163
168
|
boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax)
|
|
164
169
|
|
|
165
170
|
Returns:
|
|
171
|
+
-------
|
|
166
172
|
the IoA matrix of shape (N, M)
|
|
167
173
|
"""
|
|
168
|
-
|
|
169
174
|
ioa_mat: np.ndarray = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32)
|
|
170
175
|
|
|
171
176
|
if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0:
|
|
@@ -188,13 +193,14 @@ def mask_iou(masks_1: np.ndarray, masks_2: np.ndarray) -> np.ndarray:
|
|
|
188
193
|
"""Computes the IoU between two sets of boolean masks
|
|
189
194
|
|
|
190
195
|
Args:
|
|
196
|
+
----
|
|
191
197
|
masks_1: boolean masks of shape (N, H, W)
|
|
192
198
|
masks_2: boolean masks of shape (M, H, W)
|
|
193
199
|
|
|
194
200
|
Returns:
|
|
201
|
+
-------
|
|
195
202
|
the IoU matrix of shape (N, M)
|
|
196
203
|
"""
|
|
197
|
-
|
|
198
204
|
if masks_1.shape[1:] != masks_2.shape[1:]:
|
|
199
205
|
raise AssertionError("both boolean masks should have the same spatial shape")
|
|
200
206
|
|
|
@@ -215,15 +221,16 @@ def polygon_iou(
|
|
|
215
221
|
"""Computes the IoU between two sets of rotated bounding boxes
|
|
216
222
|
|
|
217
223
|
Args:
|
|
224
|
+
----
|
|
218
225
|
polys_1: rotated bounding boxes of shape (N, 4, 2)
|
|
219
226
|
polys_2: rotated bounding boxes of shape (M, 4, 2)
|
|
220
227
|
mask_shape: spatial shape of the intermediate masks
|
|
221
228
|
use_broadcasting: if set to True, leverage broadcasting speedup by consuming more memory
|
|
222
229
|
|
|
223
230
|
Returns:
|
|
231
|
+
-------
|
|
224
232
|
the IoU matrix of shape (N, M)
|
|
225
233
|
"""
|
|
226
|
-
|
|
227
234
|
if polys_1.ndim != 3 or polys_2.ndim != 3:
|
|
228
235
|
raise AssertionError("expects boxes to be in format (N, 4, 2)")
|
|
229
236
|
|
|
@@ -249,16 +256,17 @@ def _rbox_to_mask(box: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
|
|
|
249
256
|
"""Converts a rotated bounding box to a boolean mask
|
|
250
257
|
|
|
251
258
|
Args:
|
|
259
|
+
----
|
|
252
260
|
box: rotated bounding box of shape (4, 2)
|
|
253
261
|
shape: spatial shapes of the output masks
|
|
254
262
|
|
|
255
263
|
Returns:
|
|
264
|
+
-------
|
|
256
265
|
the boolean mask of the specified shape
|
|
257
266
|
"""
|
|
258
|
-
|
|
259
267
|
mask: np.ndarray = np.zeros(shape, dtype=np.uint8)
|
|
260
268
|
# Get absolute coords
|
|
261
|
-
if box.dtype
|
|
269
|
+
if not np.issubdtype(box.dtype, np.integer):
|
|
262
270
|
abs_box = box.copy()
|
|
263
271
|
abs_box[:, 0] = abs_box[:, 0] * shape[1]
|
|
264
272
|
abs_box[:, 1] = abs_box[:, 1] * shape[0]
|
|
@@ -266,7 +274,7 @@ def _rbox_to_mask(box: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
|
|
|
266
274
|
else:
|
|
267
275
|
abs_box = box
|
|
268
276
|
abs_box[2:] = abs_box[2:] + 1
|
|
269
|
-
cv2.fillPoly(mask, [abs_box - 1], 1)
|
|
277
|
+
cv2.fillPoly(mask, [abs_box - 1], 1.0) # type: ignore[call-overload]
|
|
270
278
|
|
|
271
279
|
return mask.astype(bool)
|
|
272
280
|
|
|
@@ -275,18 +283,19 @@ def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
|
|
|
275
283
|
"""Converts rotated bounding boxes to boolean masks
|
|
276
284
|
|
|
277
285
|
Args:
|
|
286
|
+
----
|
|
278
287
|
boxes: rotated bounding boxes of shape (N, 4, 2)
|
|
279
288
|
shape: spatial shapes of the output masks
|
|
280
289
|
|
|
281
290
|
Returns:
|
|
291
|
+
-------
|
|
282
292
|
the boolean masks of shape (N, H, W)
|
|
283
293
|
"""
|
|
284
|
-
|
|
285
294
|
masks: np.ndarray = np.zeros((boxes.shape[0], *shape), dtype=np.uint8)
|
|
286
295
|
|
|
287
296
|
if boxes.shape[0] > 0:
|
|
288
297
|
# Get absolute coordinates
|
|
289
|
-
if boxes.dtype
|
|
298
|
+
if not np.issubdtype(boxes.dtype, np.integer):
|
|
290
299
|
abs_boxes = boxes.copy()
|
|
291
300
|
abs_boxes[:, :, 0] = abs_boxes[:, :, 0] * shape[1]
|
|
292
301
|
abs_boxes[:, :, 1] = abs_boxes[:, :, 1] * shape[0]
|
|
@@ -297,7 +306,7 @@ def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
|
|
|
297
306
|
|
|
298
307
|
# TODO: optimize slicing to improve vectorization
|
|
299
308
|
for idx, _box in enumerate(abs_boxes):
|
|
300
|
-
cv2.fillPoly(masks[idx], [_box - 1], 1)
|
|
309
|
+
cv2.fillPoly(masks[idx], [_box - 1], 1.0) # type: ignore[call-overload]
|
|
301
310
|
return masks.astype(bool)
|
|
302
311
|
|
|
303
312
|
|
|
@@ -305,10 +314,12 @@ def nms(boxes: np.ndarray, thresh: float = 0.5) -> List[int]:
|
|
|
305
314
|
"""Perform non-max suppression, borrowed from <https://github.com/rbgirshick/fast-rcnn>`_.
|
|
306
315
|
|
|
307
316
|
Args:
|
|
317
|
+
----
|
|
308
318
|
boxes: np array of straight boxes: (*, 5), (xmin, ymin, xmax, ymax, score)
|
|
309
319
|
thresh: iou threshold to perform box suppression.
|
|
310
320
|
|
|
311
321
|
Returns:
|
|
322
|
+
-------
|
|
312
323
|
A list of box indexes to keep
|
|
313
324
|
"""
|
|
314
325
|
x1 = boxes[:, 0]
|
|
@@ -372,6 +383,7 @@ class LocalizationConfusion:
|
|
|
372
383
|
>>> metric.summary()
|
|
373
384
|
|
|
374
385
|
Args:
|
|
386
|
+
----
|
|
375
387
|
iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
|
|
376
388
|
use_polygons: if set to True, predictions and targets will be expected to have rotated format
|
|
377
389
|
mask_shape: if use_polygons is True, describes the spatial shape of the image used
|
|
@@ -395,10 +407,10 @@ class LocalizationConfusion:
|
|
|
395
407
|
"""Updates the metric
|
|
396
408
|
|
|
397
409
|
Args:
|
|
410
|
+
----
|
|
398
411
|
gts: a set of relative bounding boxes either of shape (N, 4) or (N, 5) if they are rotated ones
|
|
399
412
|
preds: a set of relative bounding boxes either of shape (M, 4) or (M, 5) if they are rotated ones
|
|
400
413
|
"""
|
|
401
|
-
|
|
402
414
|
if preds.shape[0] > 0:
|
|
403
415
|
# Compute IoU
|
|
404
416
|
if self.use_polygons:
|
|
@@ -418,10 +430,10 @@ class LocalizationConfusion:
|
|
|
418
430
|
def summary(self) -> Tuple[Optional[float], Optional[float], Optional[float]]:
|
|
419
431
|
"""Computes the aggregated metrics
|
|
420
432
|
|
|
421
|
-
Returns
|
|
433
|
+
Returns
|
|
434
|
+
-------
|
|
422
435
|
a tuple with the recall, precision and meanIoU scores
|
|
423
436
|
"""
|
|
424
|
-
|
|
425
437
|
# Recall
|
|
426
438
|
recall = self.matches / self.num_gts if self.num_gts > 0 else None
|
|
427
439
|
|
|
@@ -477,6 +489,7 @@ class OCRMetric:
|
|
|
477
489
|
>>> metric.summary()
|
|
478
490
|
|
|
479
491
|
Args:
|
|
492
|
+
----
|
|
480
493
|
iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
|
|
481
494
|
use_polygons: if set to True, predictions and targets will be expected to have rotated format
|
|
482
495
|
mask_shape: if use_polygons is True, describes the spatial shape of the image used
|
|
@@ -506,12 +519,12 @@ class OCRMetric:
|
|
|
506
519
|
"""Updates the metric
|
|
507
520
|
|
|
508
521
|
Args:
|
|
522
|
+
----
|
|
509
523
|
gt_boxes: a set of relative bounding boxes either of shape (N, 4) or (N, 5) if they are rotated ones
|
|
510
524
|
pred_boxes: a set of relative bounding boxes either of shape (M, 4) or (M, 5) if they are rotated ones
|
|
511
525
|
gt_labels: a list of N string labels
|
|
512
526
|
pred_labels: a list of M string labels
|
|
513
527
|
"""
|
|
514
|
-
|
|
515
528
|
if gt_boxes.shape[0] != len(gt_labels) or pred_boxes.shape[0] != len(pred_labels):
|
|
516
529
|
raise AssertionError(
|
|
517
530
|
"there should be the same number of boxes and string both for the ground truth " "and the predictions"
|
|
@@ -543,10 +556,10 @@ class OCRMetric:
|
|
|
543
556
|
def summary(self) -> Tuple[Dict[str, Optional[float]], Dict[str, Optional[float]], Optional[float]]:
|
|
544
557
|
"""Computes the aggregated metrics
|
|
545
558
|
|
|
546
|
-
Returns
|
|
559
|
+
Returns
|
|
560
|
+
-------
|
|
547
561
|
a tuple with the recall & precision for each string comparison and the mean IoU
|
|
548
562
|
"""
|
|
549
|
-
|
|
550
563
|
# Recall
|
|
551
564
|
recall = dict(
|
|
552
565
|
raw=self.raw_matches / self.num_gts if self.num_gts > 0 else None,
|
|
@@ -615,6 +628,7 @@ class DetectionMetric:
|
|
|
615
628
|
>>> metric.summary()
|
|
616
629
|
|
|
617
630
|
Args:
|
|
631
|
+
----
|
|
618
632
|
iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
|
|
619
633
|
use_polygons: if set to True, predictions and targets will be expected to have rotated format
|
|
620
634
|
mask_shape: if use_polygons is True, describes the spatial shape of the image used
|
|
@@ -644,12 +658,12 @@ class DetectionMetric:
|
|
|
644
658
|
"""Updates the metric
|
|
645
659
|
|
|
646
660
|
Args:
|
|
661
|
+
----
|
|
647
662
|
gt_boxes: a set of relative bounding boxes either of shape (N, 4) or (N, 5) if they are rotated ones
|
|
648
663
|
pred_boxes: a set of relative bounding boxes either of shape (M, 4) or (M, 5) if they are rotated ones
|
|
649
664
|
gt_labels: an array of class indices of shape (N,)
|
|
650
665
|
pred_labels: an array of class indices of shape (M,)
|
|
651
666
|
"""
|
|
652
|
-
|
|
653
667
|
if gt_boxes.shape[0] != gt_labels.shape[0] or pred_boxes.shape[0] != pred_labels.shape[0]:
|
|
654
668
|
raise AssertionError(
|
|
655
669
|
"there should be the same number of boxes and string both for the ground truth " "and the predictions"
|
|
@@ -676,10 +690,10 @@ class DetectionMetric:
|
|
|
676
690
|
def summary(self) -> Tuple[Optional[float], Optional[float], Optional[float]]:
|
|
677
691
|
"""Computes the aggregated metrics
|
|
678
692
|
|
|
679
|
-
Returns
|
|
693
|
+
Returns
|
|
694
|
+
-------
|
|
680
695
|
a tuple with the recall & precision for each class prediction and the mean IoU
|
|
681
696
|
"""
|
|
682
|
-
|
|
683
697
|
# Recall
|
|
684
698
|
recall = self.num_matches / self.num_gts if self.num_gts > 0 else None
|
|
685
699
|
|
doctr/utils/multithreading.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -22,19 +22,21 @@ def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Op
|
|
|
22
22
|
>>> results = multithread_exec(lambda x: x ** 2, entries)
|
|
23
23
|
|
|
24
24
|
Args:
|
|
25
|
+
----
|
|
25
26
|
func: function to be executed on each element of the iterable
|
|
26
27
|
seq: iterable
|
|
27
28
|
threads: number of workers to be used for multiprocessing
|
|
28
29
|
|
|
29
30
|
Returns:
|
|
31
|
+
-------
|
|
30
32
|
iterator of the function's results using the iterable as inputs
|
|
31
33
|
|
|
32
34
|
Notes:
|
|
35
|
+
-----
|
|
33
36
|
This function uses ThreadPool from multiprocessing package, which uses `/dev/shm` directory for shared memory.
|
|
34
37
|
If you do not have write permissions for this directory (if you run `doctr` on AWS Lambda for instance),
|
|
35
38
|
you might want to disable multiprocessing. To achieve that, set 'DOCTR_MULTIPROCESSING_DISABLE' to 'TRUE'.
|
|
36
39
|
"""
|
|
37
|
-
|
|
38
40
|
threads = threads if isinstance(threads, int) else min(16, mp.cpu_count())
|
|
39
41
|
# Single-thread
|
|
40
42
|
if threads < 2 or os.environ.get("DOCTR_MULTIPROCESSING_DISABLE", "").upper() in ENV_VARS_TRUE_VALUES:
|
|
@@ -44,5 +46,5 @@ def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Op
|
|
|
44
46
|
with ThreadPool(threads) as tp:
|
|
45
47
|
# ThreadPool's map function returns a list, but seq could be of a different type
|
|
46
48
|
# That's why wrapping result in map to return iterator
|
|
47
|
-
results = map(lambda x: x, tp.map(func, seq))
|
|
49
|
+
results = map(lambda x: x, tp.map(func, seq)) # noqa: C417
|
|
48
50
|
return results
|
doctr/utils/repr.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -23,6 +23,8 @@ def _addindent(s_, num_spaces):
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class NestedObject:
|
|
26
|
+
"""Base class for all nested objects in doctr"""
|
|
27
|
+
|
|
26
28
|
_children_names: List[str]
|
|
27
29
|
|
|
28
30
|
def extra_repr(self) -> str:
|