python-doctr 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/__init__.py +1 -1
- doctr/contrib/__init__.py +0 -0
- doctr/contrib/artefacts.py +131 -0
- doctr/contrib/base.py +105 -0
- doctr/datasets/datasets/pytorch.py +2 -2
- doctr/datasets/generator/base.py +6 -5
- doctr/datasets/imgur5k.py +1 -1
- doctr/datasets/loader.py +1 -6
- doctr/datasets/utils.py +2 -1
- doctr/datasets/vocabs.py +9 -2
- doctr/file_utils.py +26 -12
- doctr/io/elements.py +40 -6
- doctr/io/html.py +2 -2
- doctr/io/image/pytorch.py +6 -8
- doctr/io/image/tensorflow.py +1 -1
- doctr/io/pdf.py +5 -2
- doctr/io/reader.py +6 -0
- doctr/models/__init__.py +0 -1
- doctr/models/_utils.py +57 -20
- doctr/models/builder.py +71 -13
- doctr/models/classification/mobilenet/pytorch.py +45 -9
- doctr/models/classification/mobilenet/tensorflow.py +38 -7
- doctr/models/classification/predictor/pytorch.py +18 -11
- doctr/models/classification/predictor/tensorflow.py +16 -10
- doctr/models/classification/textnet/pytorch.py +3 -3
- doctr/models/classification/textnet/tensorflow.py +3 -3
- doctr/models/classification/zoo.py +39 -15
- doctr/models/detection/__init__.py +1 -0
- doctr/models/detection/_utils/__init__.py +1 -0
- doctr/models/detection/_utils/base.py +66 -0
- doctr/models/detection/differentiable_binarization/base.py +4 -3
- doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
- doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
- doctr/models/detection/fast/__init__.py +6 -0
- doctr/models/detection/fast/base.py +257 -0
- doctr/models/detection/fast/pytorch.py +442 -0
- doctr/models/detection/fast/tensorflow.py +428 -0
- doctr/models/detection/linknet/base.py +4 -3
- doctr/models/detection/predictor/pytorch.py +15 -1
- doctr/models/detection/predictor/tensorflow.py +15 -1
- doctr/models/detection/zoo.py +21 -4
- doctr/models/factory/hub.py +3 -12
- doctr/models/kie_predictor/base.py +9 -3
- doctr/models/kie_predictor/pytorch.py +41 -20
- doctr/models/kie_predictor/tensorflow.py +36 -16
- doctr/models/modules/layers/pytorch.py +89 -10
- doctr/models/modules/layers/tensorflow.py +88 -10
- doctr/models/modules/transformer/pytorch.py +2 -2
- doctr/models/predictor/base.py +77 -50
- doctr/models/predictor/pytorch.py +31 -20
- doctr/models/predictor/tensorflow.py +27 -17
- doctr/models/preprocessor/pytorch.py +4 -4
- doctr/models/preprocessor/tensorflow.py +3 -2
- doctr/models/recognition/master/pytorch.py +2 -2
- doctr/models/recognition/parseq/pytorch.py +4 -3
- doctr/models/recognition/parseq/tensorflow.py +4 -3
- doctr/models/recognition/sar/pytorch.py +7 -6
- doctr/models/recognition/sar/tensorflow.py +3 -9
- doctr/models/recognition/vitstr/pytorch.py +1 -1
- doctr/models/recognition/zoo.py +1 -1
- doctr/models/zoo.py +2 -2
- doctr/py.typed +0 -0
- doctr/transforms/functional/base.py +1 -1
- doctr/transforms/functional/pytorch.py +4 -4
- doctr/transforms/modules/base.py +37 -15
- doctr/transforms/modules/pytorch.py +66 -8
- doctr/transforms/modules/tensorflow.py +63 -7
- doctr/utils/fonts.py +7 -5
- doctr/utils/geometry.py +35 -12
- doctr/utils/metrics.py +33 -174
- doctr/utils/reconstitution.py +126 -0
- doctr/utils/visualization.py +5 -118
- doctr/version.py +1 -1
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/METADATA +96 -91
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/RECORD +79 -75
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/WHEEL +1 -1
- doctr/models/artefacts/__init__.py +0 -2
- doctr/models/artefacts/barcode.py +0 -74
- doctr/models/artefacts/face.py +0 -63
- doctr/models/obj_detection/__init__.py +0 -1
- doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
- doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/LICENSE +0 -0
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/top_level.txt +0 -0
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/zip-safe +0 -0
|
@@ -35,9 +35,9 @@ def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor:
|
|
|
35
35
|
rgb_shift = min_val + (1 - min_val) * torch.rand(shift_shape)
|
|
36
36
|
# Inverse the color
|
|
37
37
|
if out.dtype == torch.uint8:
|
|
38
|
-
out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)
|
|
38
|
+
out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)
|
|
39
39
|
else:
|
|
40
|
-
out = out * rgb_shift.to(dtype=out.dtype)
|
|
40
|
+
out = out * rgb_shift.to(dtype=out.dtype)
|
|
41
41
|
# Inverse the color
|
|
42
42
|
out = 255 - out if out.dtype == torch.uint8 else 1 - out
|
|
43
43
|
return out
|
|
@@ -81,7 +81,7 @@ def rotate_sample(
|
|
|
81
81
|
rotated_geoms: np.ndarray = rotate_abs_geoms(
|
|
82
82
|
_geoms,
|
|
83
83
|
angle,
|
|
84
|
-
img.shape[1:],
|
|
84
|
+
img.shape[1:], # type: ignore[arg-type]
|
|
85
85
|
expand,
|
|
86
86
|
).astype(np.float32)
|
|
87
87
|
|
|
@@ -132,7 +132,7 @@ def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwarg
|
|
|
132
132
|
-------
|
|
133
133
|
shaded image
|
|
134
134
|
"""
|
|
135
|
-
shadow_mask = create_shadow_mask(img.shape[1:], **kwargs)
|
|
135
|
+
shadow_mask = create_shadow_mask(img.shape[1:], **kwargs) # type: ignore[arg-type]
|
|
136
136
|
|
|
137
137
|
opacity = np.random.uniform(*opacity_range)
|
|
138
138
|
shadow_tensor = 1 - torch.from_numpy(shadow_mask[None, ...])
|
doctr/transforms/modules/base.py
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import math
|
|
7
7
|
import random
|
|
8
|
-
from typing import Any, Callable,
|
|
8
|
+
from typing import Any, Callable, List, Optional, Tuple, Union
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
|
|
@@ -168,11 +168,11 @@ class OneOf(NestedObject):
|
|
|
168
168
|
def __init__(self, transforms: List[Callable[[Any], Any]]) -> None:
|
|
169
169
|
self.transforms = transforms
|
|
170
170
|
|
|
171
|
-
def __call__(self, img: Any) -> Any:
|
|
171
|
+
def __call__(self, img: Any, target: Optional[np.ndarray] = None) -> Union[Any, Tuple[Any, np.ndarray]]:
|
|
172
172
|
# Pick transformation
|
|
173
173
|
transfo = self.transforms[int(random.random() * len(self.transforms))]
|
|
174
174
|
# Apply
|
|
175
|
-
return transfo(img)
|
|
175
|
+
return transfo(img) if target is None else transfo(img, target) # type: ignore[call-arg]
|
|
176
176
|
|
|
177
177
|
|
|
178
178
|
class RandomApply(NestedObject):
|
|
@@ -261,17 +261,39 @@ class RandomCrop(NestedObject):
|
|
|
261
261
|
def extra_repr(self) -> str:
|
|
262
262
|
return f"scale={self.scale}, ratio={self.ratio}"
|
|
263
263
|
|
|
264
|
-
def __call__(self, img: Any, target:
|
|
264
|
+
def __call__(self, img: Any, target: np.ndarray) -> Tuple[Any, np.ndarray]:
|
|
265
265
|
scale = random.uniform(self.scale[0], self.scale[1])
|
|
266
266
|
ratio = random.uniform(self.ratio[0], self.ratio[1])
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
267
|
+
|
|
268
|
+
height, width = img.shape[:2]
|
|
269
|
+
|
|
270
|
+
# Calculate crop size
|
|
271
|
+
crop_area = scale * width * height
|
|
272
|
+
aspect_ratio = ratio * (width / height)
|
|
273
|
+
crop_width = int(round(math.sqrt(crop_area * aspect_ratio)))
|
|
274
|
+
crop_height = int(round(math.sqrt(crop_area / aspect_ratio)))
|
|
275
|
+
|
|
276
|
+
# Ensure crop size does not exceed image dimensions
|
|
277
|
+
crop_width = min(crop_width, width)
|
|
278
|
+
crop_height = min(crop_height, height)
|
|
279
|
+
|
|
280
|
+
# Randomly select crop position
|
|
281
|
+
x = random.randint(0, width - crop_width)
|
|
282
|
+
y = random.randint(0, height - crop_height)
|
|
283
|
+
|
|
284
|
+
# relative crop box
|
|
285
|
+
crop_box = (x / width, y / height, (x + crop_width) / width, (y + crop_height) / height)
|
|
286
|
+
if target.shape[1:] == (4, 2):
|
|
287
|
+
min_xy = np.min(target, axis=1)
|
|
288
|
+
max_xy = np.max(target, axis=1)
|
|
289
|
+
_target = np.concatenate((min_xy, max_xy), axis=1)
|
|
290
|
+
else:
|
|
291
|
+
_target = target
|
|
292
|
+
|
|
293
|
+
# Crop image and targets
|
|
294
|
+
croped_img, crop_boxes = F.crop_detection(img, _target, crop_box)
|
|
295
|
+
# hard fallback if no box is kept
|
|
296
|
+
if crop_boxes.shape[0] == 0:
|
|
297
|
+
return img, target
|
|
298
|
+
# clip boxes
|
|
299
|
+
return croped_img, np.clip(crop_boxes, 0, 1)
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import math
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Optional, Tuple, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import torch
|
|
@@ -15,7 +15,7 @@ from torchvision.transforms import transforms as T
|
|
|
15
15
|
|
|
16
16
|
from ..functional.pytorch import random_shadow
|
|
17
17
|
|
|
18
|
-
__all__ = ["Resize", "GaussianNoise", "ChannelShuffle", "RandomHorizontalFlip", "RandomShadow"]
|
|
18
|
+
__all__ = ["Resize", "GaussianNoise", "ChannelShuffle", "RandomHorizontalFlip", "RandomShadow", "RandomResize"]
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class Resize(T.Resize):
|
|
@@ -135,9 +135,9 @@ class GaussianNoise(torch.nn.Module):
|
|
|
135
135
|
# Reshape the distribution
|
|
136
136
|
noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
|
|
137
137
|
if x.dtype == torch.uint8:
|
|
138
|
-
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
|
|
138
|
+
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
|
|
139
139
|
else:
|
|
140
|
-
return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
|
|
140
|
+
return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
|
|
141
141
|
|
|
142
142
|
def extra_repr(self) -> str:
|
|
143
143
|
return f"mean={self.mean}, std={self.std}"
|
|
@@ -159,13 +159,16 @@ class RandomHorizontalFlip(T.RandomHorizontalFlip):
|
|
|
159
159
|
"""Randomly flip the input image horizontally"""
|
|
160
160
|
|
|
161
161
|
def forward(
|
|
162
|
-
self, img: Union[torch.Tensor, Image], target:
|
|
163
|
-
) -> Tuple[Union[torch.Tensor, Image],
|
|
162
|
+
self, img: Union[torch.Tensor, Image], target: np.ndarray
|
|
163
|
+
) -> Tuple[Union[torch.Tensor, Image], np.ndarray]:
|
|
164
164
|
if torch.rand(1) < self.p:
|
|
165
165
|
_img = F.hflip(img)
|
|
166
166
|
_target = target.copy()
|
|
167
167
|
# Changing the relative bbox coordinates
|
|
168
|
-
|
|
168
|
+
if target.shape[1:] == (4,):
|
|
169
|
+
_target[:, ::2] = 1 - target[:, [2, 0]]
|
|
170
|
+
else:
|
|
171
|
+
_target[..., 0] = 1 - target[..., 0]
|
|
169
172
|
return _img, _target
|
|
170
173
|
return img, target
|
|
171
174
|
|
|
@@ -199,7 +202,7 @@ class RandomShadow(torch.nn.Module):
|
|
|
199
202
|
self.opacity_range,
|
|
200
203
|
)
|
|
201
204
|
)
|
|
202
|
-
.round()
|
|
205
|
+
.round()
|
|
203
206
|
.clip(0, 255)
|
|
204
207
|
.to(dtype=torch.uint8)
|
|
205
208
|
)
|
|
@@ -210,3 +213,58 @@ class RandomShadow(torch.nn.Module):
|
|
|
210
213
|
|
|
211
214
|
def extra_repr(self) -> str:
|
|
212
215
|
return f"opacity_range={self.opacity_range}"
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class RandomResize(torch.nn.Module):
|
|
219
|
+
"""Randomly resize the input image and align corresponding targets
|
|
220
|
+
|
|
221
|
+
>>> import torch
|
|
222
|
+
>>> from doctr.transforms import RandomResize
|
|
223
|
+
>>> transfo = RandomResize((0.3, 0.9), preserve_aspect_ratio=True, symmetric_pad=True, p=0.5)
|
|
224
|
+
>>> out = transfo(torch.rand((3, 64, 64)))
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
----
|
|
228
|
+
scale_range: range of the resizing factor for width and height (independently)
|
|
229
|
+
preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
|
|
230
|
+
given a float value, the aspect ratio will be preserved with this probability
|
|
231
|
+
symmetric_pad: whether to symmetrically pad the image,
|
|
232
|
+
given a float value, the symmetric padding will be applied with this probability
|
|
233
|
+
p: probability to apply the transformation
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
def __init__(
|
|
237
|
+
self,
|
|
238
|
+
scale_range: Tuple[float, float] = (0.3, 0.9),
|
|
239
|
+
preserve_aspect_ratio: Union[bool, float] = False,
|
|
240
|
+
symmetric_pad: Union[bool, float] = False,
|
|
241
|
+
p: float = 0.5,
|
|
242
|
+
) -> None:
|
|
243
|
+
super().__init__()
|
|
244
|
+
self.scale_range = scale_range
|
|
245
|
+
self.preserve_aspect_ratio = preserve_aspect_ratio
|
|
246
|
+
self.symmetric_pad = symmetric_pad
|
|
247
|
+
self.p = p
|
|
248
|
+
self._resize = Resize
|
|
249
|
+
|
|
250
|
+
def forward(self, img: torch.Tensor, target: np.ndarray) -> Tuple[torch.Tensor, np.ndarray]:
|
|
251
|
+
if torch.rand(1) < self.p:
|
|
252
|
+
scale_h = np.random.uniform(*self.scale_range)
|
|
253
|
+
scale_w = np.random.uniform(*self.scale_range)
|
|
254
|
+
new_size = (int(img.shape[-2] * scale_h), int(img.shape[-1] * scale_w))
|
|
255
|
+
|
|
256
|
+
_img, _target = self._resize(
|
|
257
|
+
new_size,
|
|
258
|
+
preserve_aspect_ratio=self.preserve_aspect_ratio
|
|
259
|
+
if isinstance(self.preserve_aspect_ratio, bool)
|
|
260
|
+
else bool(torch.rand(1) <= self.symmetric_pad),
|
|
261
|
+
symmetric_pad=self.symmetric_pad
|
|
262
|
+
if isinstance(self.symmetric_pad, bool)
|
|
263
|
+
else bool(torch.rand(1) <= self.symmetric_pad),
|
|
264
|
+
)(img, target)
|
|
265
|
+
|
|
266
|
+
return _img, _target
|
|
267
|
+
return img, target
|
|
268
|
+
|
|
269
|
+
def extra_repr(self) -> str:
|
|
270
|
+
return f"scale_range={self.scale_range}, preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}, p={self.p}" # noqa: E501
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import random
|
|
7
|
-
from typing import Any, Callable,
|
|
7
|
+
from typing import Any, Callable, Iterable, List, Optional, Tuple, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import tensorflow as tf
|
|
@@ -30,6 +30,7 @@ __all__ = [
|
|
|
30
30
|
"GaussianNoise",
|
|
31
31
|
"RandomHorizontalFlip",
|
|
32
32
|
"RandomShadow",
|
|
33
|
+
"RandomResize",
|
|
33
34
|
]
|
|
34
35
|
|
|
35
36
|
|
|
@@ -457,10 +458,7 @@ class RandomHorizontalFlip(NestedObject):
|
|
|
457
458
|
>>> from doctr.transforms import RandomHorizontalFlip
|
|
458
459
|
>>> transfo = RandomHorizontalFlip(p=0.5)
|
|
459
460
|
>>> image = tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)
|
|
460
|
-
>>> target =
|
|
461
|
-
>>> "boxes": np.array([[0.1, 0.1, 0.4, 0.5] ], dtype= np.float32),
|
|
462
|
-
>>> "labels": np.ones(1, dtype= np.int64)
|
|
463
|
-
>>> }
|
|
461
|
+
>>> target = np.array([[0.1, 0.1, 0.4, 0.5] ], dtype= np.float32)
|
|
464
462
|
>>> out = transfo(image, target)
|
|
465
463
|
|
|
466
464
|
Args:
|
|
@@ -472,12 +470,15 @@ class RandomHorizontalFlip(NestedObject):
|
|
|
472
470
|
super().__init__()
|
|
473
471
|
self.p = p
|
|
474
472
|
|
|
475
|
-
def __call__(self, img: Union[tf.Tensor, np.ndarray], target:
|
|
473
|
+
def __call__(self, img: Union[tf.Tensor, np.ndarray], target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]:
|
|
476
474
|
if np.random.rand(1) <= self.p:
|
|
477
475
|
_img = tf.image.flip_left_right(img)
|
|
478
476
|
_target = target.copy()
|
|
479
477
|
# Changing the relative bbox coordinates
|
|
480
|
-
|
|
478
|
+
if target.shape[1:] == (4,):
|
|
479
|
+
_target[:, ::2] = 1 - target[:, [2, 0]]
|
|
480
|
+
else:
|
|
481
|
+
_target[..., 0] = 1 - target[..., 0]
|
|
481
482
|
return _img, _target
|
|
482
483
|
return img, target
|
|
483
484
|
|
|
@@ -515,3 +516,58 @@ class RandomShadow(NestedObject):
|
|
|
515
516
|
|
|
516
517
|
def extra_repr(self) -> str:
|
|
517
518
|
return f"opacity_range={self.opacity_range}"
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
class RandomResize(NestedObject):
|
|
522
|
+
"""Randomly resize the input image and align corresponding targets
|
|
523
|
+
|
|
524
|
+
>>> import tensorflow as tf
|
|
525
|
+
>>> from doctr.transforms import RandomResize
|
|
526
|
+
>>> transfo = RandomResize((0.3, 0.9), preserve_aspect_ratio=True, symmetric_pad=True, p=0.5)
|
|
527
|
+
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
----
|
|
531
|
+
scale_range: range of the resizing factor for width and height (independently)
|
|
532
|
+
preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
|
|
533
|
+
given a float value, the aspect ratio will be preserved with this probability
|
|
534
|
+
symmetric_pad: whether to symmetrically pad the image,
|
|
535
|
+
given a float value, the symmetric padding will be applied with this probability
|
|
536
|
+
p: probability to apply the transformation
|
|
537
|
+
"""
|
|
538
|
+
|
|
539
|
+
def __init__(
|
|
540
|
+
self,
|
|
541
|
+
scale_range: Tuple[float, float] = (0.3, 0.9),
|
|
542
|
+
preserve_aspect_ratio: Union[bool, float] = False,
|
|
543
|
+
symmetric_pad: Union[bool, float] = False,
|
|
544
|
+
p: float = 0.5,
|
|
545
|
+
):
|
|
546
|
+
super().__init__()
|
|
547
|
+
self.scale_range = scale_range
|
|
548
|
+
self.preserve_aspect_ratio = preserve_aspect_ratio
|
|
549
|
+
self.symmetric_pad = symmetric_pad
|
|
550
|
+
self.p = p
|
|
551
|
+
self._resize = Resize
|
|
552
|
+
|
|
553
|
+
def __call__(self, img: tf.Tensor, target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]:
|
|
554
|
+
if np.random.rand(1) <= self.p:
|
|
555
|
+
scale_h = random.uniform(*self.scale_range)
|
|
556
|
+
scale_w = random.uniform(*self.scale_range)
|
|
557
|
+
new_size = (int(img.shape[-3] * scale_h), int(img.shape[-2] * scale_w))
|
|
558
|
+
|
|
559
|
+
_img, _target = self._resize(
|
|
560
|
+
new_size,
|
|
561
|
+
preserve_aspect_ratio=self.preserve_aspect_ratio
|
|
562
|
+
if isinstance(self.preserve_aspect_ratio, bool)
|
|
563
|
+
else bool(np.random.rand(1) <= self.symmetric_pad),
|
|
564
|
+
symmetric_pad=self.symmetric_pad
|
|
565
|
+
if isinstance(self.symmetric_pad, bool)
|
|
566
|
+
else bool(np.random.rand(1) <= self.symmetric_pad),
|
|
567
|
+
)(img, target)
|
|
568
|
+
|
|
569
|
+
return _img, _target
|
|
570
|
+
return img, target
|
|
571
|
+
|
|
572
|
+
def extra_repr(self) -> str:
|
|
573
|
+
return f"scale_range={self.scale_range}, preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}, p={self.p}" # noqa: E501
|
doctr/utils/fonts.py
CHANGED
|
@@ -5,14 +5,16 @@
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
import platform
|
|
8
|
-
from typing import Optional
|
|
8
|
+
from typing import Optional, Union
|
|
9
9
|
|
|
10
10
|
from PIL import ImageFont
|
|
11
11
|
|
|
12
12
|
__all__ = ["get_font"]
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def get_font(
|
|
15
|
+
def get_font(
|
|
16
|
+
font_family: Optional[str] = None, font_size: int = 13
|
|
17
|
+
) -> Union[ImageFont.FreeTypeFont, ImageFont.ImageFont]:
|
|
16
18
|
"""Resolves a compatible ImageFont for the system
|
|
17
19
|
|
|
18
20
|
Args:
|
|
@@ -28,14 +30,14 @@ def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFon
|
|
|
28
30
|
if font_family is None:
|
|
29
31
|
try:
|
|
30
32
|
font = ImageFont.truetype("FreeMono.ttf" if platform.system() == "Linux" else "Arial.ttf", font_size)
|
|
31
|
-
except OSError:
|
|
32
|
-
font = ImageFont.load_default()
|
|
33
|
+
except OSError: # pragma: no cover
|
|
34
|
+
font = ImageFont.load_default() # type: ignore[assignment]
|
|
33
35
|
logging.warning(
|
|
34
36
|
"unable to load recommended font family. Loading default PIL font,"
|
|
35
37
|
"font size issues may be expected."
|
|
36
38
|
"To prevent this, it is recommended to specify the value of 'font_family'."
|
|
37
39
|
)
|
|
38
|
-
else:
|
|
40
|
+
else: # pragma: no cover
|
|
39
41
|
font = ImageFont.truetype(font_family, font_size)
|
|
40
42
|
|
|
41
43
|
return font
|
doctr/utils/geometry.py
CHANGED
|
@@ -25,6 +25,7 @@ __all__ = [
|
|
|
25
25
|
"rotate_abs_geoms",
|
|
26
26
|
"extract_crops",
|
|
27
27
|
"extract_rcrops",
|
|
28
|
+
"detach_scores",
|
|
28
29
|
]
|
|
29
30
|
|
|
30
31
|
|
|
@@ -57,6 +58,28 @@ def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
|
|
|
57
58
|
return (min(x), min(y)), (max(x), max(y))
|
|
58
59
|
|
|
59
60
|
|
|
61
|
+
def detach_scores(boxes: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.ndarray]]:
|
|
62
|
+
"""Detach the objectness scores from box predictions
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
----
|
|
66
|
+
boxes: list of arrays with boxes of shape (N, 5) or (N, 5, 2)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
-------
|
|
70
|
+
a tuple of two lists: the first one contains the boxes without the objectness scores,
|
|
71
|
+
the second one contains the objectness scores
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def _detach(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
75
|
+
if boxes.ndim == 2:
|
|
76
|
+
return boxes[:, :-1], boxes[:, -1]
|
|
77
|
+
return boxes[:, :-1], boxes[:, -1, -1]
|
|
78
|
+
|
|
79
|
+
loc_preds, obj_scores = zip(*(_detach(box) for box in boxes))
|
|
80
|
+
return list(loc_preds), list(obj_scores)
|
|
81
|
+
|
|
82
|
+
|
|
60
83
|
def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]:
|
|
61
84
|
"""Compute enclosing bbox either from:
|
|
62
85
|
|
|
@@ -64,18 +87,18 @@ def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Unio
|
|
|
64
87
|
----
|
|
65
88
|
bboxes: boxes in one of the following formats:
|
|
66
89
|
|
|
67
|
-
- an array of boxes: (*,
|
|
68
|
-
(xmin, ymin, xmax, ymax
|
|
90
|
+
- an array of boxes: (*, 4), where boxes have this shape:
|
|
91
|
+
(xmin, ymin, xmax, ymax)
|
|
69
92
|
|
|
70
93
|
- a list of BoundingBox
|
|
71
94
|
|
|
72
95
|
Returns:
|
|
73
96
|
-------
|
|
74
|
-
a (1,
|
|
97
|
+
a (1, 4) array (enclosing boxarray), or a BoundingBox
|
|
75
98
|
"""
|
|
76
99
|
if isinstance(bboxes, np.ndarray):
|
|
77
|
-
xmin, ymin, xmax, ymax
|
|
78
|
-
return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max()
|
|
100
|
+
xmin, ymin, xmax, ymax = np.split(bboxes, 4, axis=1)
|
|
101
|
+
return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max()])
|
|
79
102
|
else:
|
|
80
103
|
x, y = zip(*[point for box in bboxes for point in box])
|
|
81
104
|
return (min(x), min(y)), (max(x), max(y))
|
|
@@ -88,21 +111,21 @@ def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024
|
|
|
88
111
|
----
|
|
89
112
|
rbboxes: boxes in one of the following formats:
|
|
90
113
|
|
|
91
|
-
- an array of boxes: (*,
|
|
92
|
-
(
|
|
114
|
+
- an array of boxes: (*, 4, 2), where boxes have this shape:
|
|
115
|
+
(x1, y1), (x2, y2), (x3, y3), (x4, y4)
|
|
93
116
|
|
|
94
117
|
- a list of BoundingBox
|
|
95
118
|
intermed_size: size of the intermediate image
|
|
96
119
|
|
|
97
120
|
Returns:
|
|
98
121
|
-------
|
|
99
|
-
a (
|
|
122
|
+
a (4, 2) array (enclosing rotated box)
|
|
100
123
|
"""
|
|
101
124
|
cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
|
|
102
125
|
# Convert to absolute for minAreaRect
|
|
103
126
|
cloud *= intermed_size
|
|
104
127
|
rect = cv2.minAreaRect(cloud.astype(np.int32))
|
|
105
|
-
return cv2.boxPoints(rect) / intermed_size # type: ignore[
|
|
128
|
+
return cv2.boxPoints(rect) / intermed_size # type: ignore[return-value]
|
|
106
129
|
|
|
107
130
|
|
|
108
131
|
def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
|
|
@@ -232,7 +255,7 @@ def rotate_boxes(
|
|
|
232
255
|
|
|
233
256
|
Args:
|
|
234
257
|
----
|
|
235
|
-
loc_preds: (N,
|
|
258
|
+
loc_preds: (N, 4) or (N, 4, 2) array of RELATIVE boxes
|
|
236
259
|
angle: angle between -90 and +90 degrees
|
|
237
260
|
orig_shape: shape of the origin image
|
|
238
261
|
min_angle: minimum angle to rotate boxes
|
|
@@ -320,7 +343,7 @@ def rotate_image(
|
|
|
320
343
|
# Pad height
|
|
321
344
|
else:
|
|
322
345
|
h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
|
|
323
|
-
rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
|
|
346
|
+
rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) # type: ignore[assignment]
|
|
324
347
|
if preserve_origin_shape:
|
|
325
348
|
# rescale
|
|
326
349
|
rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)
|
|
@@ -453,4 +476,4 @@ def extract_rcrops(
|
|
|
453
476
|
)
|
|
454
477
|
for idx in range(_boxes.shape[0])
|
|
455
478
|
]
|
|
456
|
-
return crops
|
|
479
|
+
return crops # type: ignore[return-value]
|