python-doctr 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/contrib/__init__.py +1 -0
- doctr/contrib/artefacts.py +7 -9
- doctr/contrib/base.py +8 -17
- doctr/datasets/cord.py +8 -7
- doctr/datasets/datasets/__init__.py +4 -4
- doctr/datasets/datasets/base.py +16 -16
- doctr/datasets/datasets/pytorch.py +12 -12
- doctr/datasets/datasets/tensorflow.py +10 -10
- doctr/datasets/detection.py +6 -9
- doctr/datasets/doc_artefacts.py +3 -4
- doctr/datasets/funsd.py +7 -6
- doctr/datasets/generator/__init__.py +4 -4
- doctr/datasets/generator/base.py +16 -17
- doctr/datasets/generator/pytorch.py +1 -3
- doctr/datasets/generator/tensorflow.py +1 -3
- doctr/datasets/ic03.py +4 -5
- doctr/datasets/ic13.py +4 -5
- doctr/datasets/iiit5k.py +6 -5
- doctr/datasets/iiithws.py +4 -5
- doctr/datasets/imgur5k.py +6 -5
- doctr/datasets/loader.py +4 -7
- doctr/datasets/mjsynth.py +6 -5
- doctr/datasets/ocr.py +3 -4
- doctr/datasets/orientation.py +3 -4
- doctr/datasets/recognition.py +3 -4
- doctr/datasets/sroie.py +6 -5
- doctr/datasets/svhn.py +6 -5
- doctr/datasets/svt.py +4 -5
- doctr/datasets/synthtext.py +4 -5
- doctr/datasets/utils.py +34 -29
- doctr/datasets/vocabs.py +17 -7
- doctr/datasets/wildreceipt.py +14 -10
- doctr/file_utils.py +2 -7
- doctr/io/elements.py +59 -79
- doctr/io/html.py +1 -3
- doctr/io/image/__init__.py +3 -3
- doctr/io/image/base.py +2 -5
- doctr/io/image/pytorch.py +3 -12
- doctr/io/image/tensorflow.py +2 -11
- doctr/io/pdf.py +5 -7
- doctr/io/reader.py +5 -11
- doctr/models/_utils.py +14 -22
- doctr/models/builder.py +30 -48
- doctr/models/classification/magc_resnet/__init__.py +3 -3
- doctr/models/classification/magc_resnet/pytorch.py +10 -13
- doctr/models/classification/magc_resnet/tensorflow.py +8 -11
- doctr/models/classification/mobilenet/__init__.py +3 -3
- doctr/models/classification/mobilenet/pytorch.py +5 -17
- doctr/models/classification/mobilenet/tensorflow.py +8 -21
- doctr/models/classification/predictor/__init__.py +4 -4
- doctr/models/classification/predictor/pytorch.py +6 -8
- doctr/models/classification/predictor/tensorflow.py +6 -8
- doctr/models/classification/resnet/__init__.py +4 -4
- doctr/models/classification/resnet/pytorch.py +21 -31
- doctr/models/classification/resnet/tensorflow.py +20 -31
- doctr/models/classification/textnet/__init__.py +3 -3
- doctr/models/classification/textnet/pytorch.py +10 -17
- doctr/models/classification/textnet/tensorflow.py +8 -15
- doctr/models/classification/vgg/__init__.py +3 -3
- doctr/models/classification/vgg/pytorch.py +5 -7
- doctr/models/classification/vgg/tensorflow.py +9 -12
- doctr/models/classification/vit/__init__.py +3 -3
- doctr/models/classification/vit/pytorch.py +8 -14
- doctr/models/classification/vit/tensorflow.py +6 -12
- doctr/models/classification/zoo.py +19 -14
- doctr/models/core.py +3 -3
- doctr/models/detection/_utils/__init__.py +4 -4
- doctr/models/detection/_utils/base.py +4 -7
- doctr/models/detection/_utils/pytorch.py +1 -5
- doctr/models/detection/_utils/tensorflow.py +1 -5
- doctr/models/detection/core.py +2 -8
- doctr/models/detection/differentiable_binarization/__init__.py +4 -4
- doctr/models/detection/differentiable_binarization/base.py +7 -17
- doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
- doctr/models/detection/differentiable_binarization/tensorflow.py +15 -25
- doctr/models/detection/fast/__init__.py +4 -4
- doctr/models/detection/fast/base.py +6 -14
- doctr/models/detection/fast/pytorch.py +24 -31
- doctr/models/detection/fast/tensorflow.py +14 -26
- doctr/models/detection/linknet/__init__.py +4 -4
- doctr/models/detection/linknet/base.py +6 -15
- doctr/models/detection/linknet/pytorch.py +24 -27
- doctr/models/detection/linknet/tensorflow.py +14 -23
- doctr/models/detection/predictor/__init__.py +5 -5
- doctr/models/detection/predictor/pytorch.py +6 -7
- doctr/models/detection/predictor/tensorflow.py +5 -6
- doctr/models/detection/zoo.py +27 -7
- doctr/models/factory/hub.py +3 -7
- doctr/models/kie_predictor/__init__.py +5 -5
- doctr/models/kie_predictor/base.py +4 -5
- doctr/models/kie_predictor/pytorch.py +18 -19
- doctr/models/kie_predictor/tensorflow.py +13 -14
- doctr/models/modules/layers/__init__.py +3 -3
- doctr/models/modules/layers/pytorch.py +6 -9
- doctr/models/modules/layers/tensorflow.py +5 -7
- doctr/models/modules/transformer/__init__.py +3 -3
- doctr/models/modules/transformer/pytorch.py +12 -13
- doctr/models/modules/transformer/tensorflow.py +9 -10
- doctr/models/modules/vision_transformer/__init__.py +3 -3
- doctr/models/modules/vision_transformer/pytorch.py +2 -3
- doctr/models/modules/vision_transformer/tensorflow.py +3 -3
- doctr/models/predictor/__init__.py +5 -5
- doctr/models/predictor/base.py +28 -29
- doctr/models/predictor/pytorch.py +12 -13
- doctr/models/predictor/tensorflow.py +8 -9
- doctr/models/preprocessor/__init__.py +4 -4
- doctr/models/preprocessor/pytorch.py +13 -17
- doctr/models/preprocessor/tensorflow.py +10 -14
- doctr/models/recognition/core.py +3 -7
- doctr/models/recognition/crnn/__init__.py +4 -4
- doctr/models/recognition/crnn/pytorch.py +20 -28
- doctr/models/recognition/crnn/tensorflow.py +11 -23
- doctr/models/recognition/master/__init__.py +3 -3
- doctr/models/recognition/master/base.py +3 -7
- doctr/models/recognition/master/pytorch.py +22 -24
- doctr/models/recognition/master/tensorflow.py +12 -22
- doctr/models/recognition/parseq/__init__.py +3 -3
- doctr/models/recognition/parseq/base.py +3 -7
- doctr/models/recognition/parseq/pytorch.py +26 -26
- doctr/models/recognition/parseq/tensorflow.py +16 -22
- doctr/models/recognition/predictor/__init__.py +5 -5
- doctr/models/recognition/predictor/_utils.py +7 -10
- doctr/models/recognition/predictor/pytorch.py +6 -6
- doctr/models/recognition/predictor/tensorflow.py +5 -6
- doctr/models/recognition/sar/__init__.py +4 -4
- doctr/models/recognition/sar/pytorch.py +20 -21
- doctr/models/recognition/sar/tensorflow.py +12 -21
- doctr/models/recognition/utils.py +5 -10
- doctr/models/recognition/vitstr/__init__.py +4 -4
- doctr/models/recognition/vitstr/base.py +3 -7
- doctr/models/recognition/vitstr/pytorch.py +18 -20
- doctr/models/recognition/vitstr/tensorflow.py +12 -20
- doctr/models/recognition/zoo.py +22 -11
- doctr/models/utils/__init__.py +4 -4
- doctr/models/utils/pytorch.py +14 -17
- doctr/models/utils/tensorflow.py +17 -16
- doctr/models/zoo.py +1 -5
- doctr/transforms/functional/__init__.py +3 -3
- doctr/transforms/functional/base.py +4 -11
- doctr/transforms/functional/pytorch.py +20 -28
- doctr/transforms/functional/tensorflow.py +10 -22
- doctr/transforms/modules/__init__.py +4 -4
- doctr/transforms/modules/base.py +48 -55
- doctr/transforms/modules/pytorch.py +58 -22
- doctr/transforms/modules/tensorflow.py +18 -32
- doctr/utils/common_types.py +8 -9
- doctr/utils/data.py +8 -12
- doctr/utils/fonts.py +2 -7
- doctr/utils/geometry.py +16 -47
- doctr/utils/metrics.py +17 -37
- doctr/utils/multithreading.py +4 -6
- doctr/utils/reconstitution.py +9 -13
- doctr/utils/repr.py +2 -3
- doctr/utils/visualization.py +16 -29
- doctr/version.py +1 -1
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +54 -52
- python_doctr-0.11.0.dist-info/RECORD +173 -0
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
- python_doctr-0.10.0.dist-info/RECORD +0 -173
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0
|
@@ -1,21 +1,29 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import math
|
|
7
|
-
from typing import Optional, Tuple, Union
|
|
8
7
|
|
|
9
8
|
import numpy as np
|
|
10
9
|
import torch
|
|
11
10
|
from PIL.Image import Image
|
|
11
|
+
from scipy.ndimage import gaussian_filter
|
|
12
12
|
from torch.nn.functional import pad
|
|
13
13
|
from torchvision.transforms import functional as F
|
|
14
14
|
from torchvision.transforms import transforms as T
|
|
15
15
|
|
|
16
16
|
from ..functional.pytorch import random_shadow
|
|
17
17
|
|
|
18
|
-
__all__ = [
|
|
18
|
+
__all__ = [
|
|
19
|
+
"Resize",
|
|
20
|
+
"GaussianNoise",
|
|
21
|
+
"ChannelShuffle",
|
|
22
|
+
"RandomHorizontalFlip",
|
|
23
|
+
"RandomShadow",
|
|
24
|
+
"RandomResize",
|
|
25
|
+
"GaussianBlur",
|
|
26
|
+
]
|
|
19
27
|
|
|
20
28
|
|
|
21
29
|
class Resize(T.Resize):
|
|
@@ -23,7 +31,7 @@ class Resize(T.Resize):
|
|
|
23
31
|
|
|
24
32
|
def __init__(
|
|
25
33
|
self,
|
|
26
|
-
size:
|
|
34
|
+
size: int | tuple[int, int],
|
|
27
35
|
interpolation=F.InterpolationMode.BILINEAR,
|
|
28
36
|
preserve_aspect_ratio: bool = False,
|
|
29
37
|
symmetric_pad: bool = False,
|
|
@@ -38,8 +46,8 @@ class Resize(T.Resize):
|
|
|
38
46
|
def forward(
|
|
39
47
|
self,
|
|
40
48
|
img: torch.Tensor,
|
|
41
|
-
target:
|
|
42
|
-
) ->
|
|
49
|
+
target: np.ndarray | None = None,
|
|
50
|
+
) -> torch.Tensor | tuple[torch.Tensor, np.ndarray]:
|
|
43
51
|
if isinstance(self.size, int):
|
|
44
52
|
target_ratio = img.shape[-2] / img.shape[-1]
|
|
45
53
|
else:
|
|
@@ -122,7 +130,6 @@ class GaussianNoise(torch.nn.Module):
|
|
|
122
130
|
>>> out = transfo(torch.rand((3, 224, 224)))
|
|
123
131
|
|
|
124
132
|
Args:
|
|
125
|
-
----
|
|
126
133
|
mean : mean of the gaussian distribution
|
|
127
134
|
std : std of the gaussian distribution
|
|
128
135
|
"""
|
|
@@ -136,14 +143,47 @@ class GaussianNoise(torch.nn.Module):
|
|
|
136
143
|
# Reshape the distribution
|
|
137
144
|
noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
|
|
138
145
|
if x.dtype == torch.uint8:
|
|
139
|
-
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
|
|
146
|
+
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) # type: ignore[attr-defined]
|
|
140
147
|
else:
|
|
141
|
-
return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
|
|
148
|
+
return (x + noise.to(dtype=x.dtype)).clamp(0, 1) # type: ignore[attr-defined]
|
|
142
149
|
|
|
143
150
|
def extra_repr(self) -> str:
|
|
144
151
|
return f"mean={self.mean}, std={self.std}"
|
|
145
152
|
|
|
146
153
|
|
|
154
|
+
class GaussianBlur(torch.nn.Module):
|
|
155
|
+
"""Apply Gaussian Blur to the input tensor
|
|
156
|
+
|
|
157
|
+
>>> import torch
|
|
158
|
+
>>> from doctr.transforms import GaussianBlur
|
|
159
|
+
>>> transfo = GaussianBlur(sigma=(0.0, 1.0))
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
sigma : standard deviation range for the gaussian kernel
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
def __init__(self, sigma: tuple[float, float]) -> None:
|
|
166
|
+
super().__init__()
|
|
167
|
+
self.sigma_range = sigma
|
|
168
|
+
|
|
169
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
170
|
+
# Sample a random sigma value within the specified range
|
|
171
|
+
sigma = torch.empty(1).uniform_(*self.sigma_range).item()
|
|
172
|
+
|
|
173
|
+
# Apply Gaussian blur along spatial dimensions only
|
|
174
|
+
blurred = torch.tensor(
|
|
175
|
+
gaussian_filter(
|
|
176
|
+
x.numpy(),
|
|
177
|
+
sigma=sigma,
|
|
178
|
+
mode="reflect",
|
|
179
|
+
truncate=4.0,
|
|
180
|
+
),
|
|
181
|
+
dtype=x.dtype,
|
|
182
|
+
device=x.device,
|
|
183
|
+
)
|
|
184
|
+
return blurred
|
|
185
|
+
|
|
186
|
+
|
|
147
187
|
class ChannelShuffle(torch.nn.Module):
|
|
148
188
|
"""Randomly shuffle channel order of a given image"""
|
|
149
189
|
|
|
@@ -159,9 +199,7 @@ class ChannelShuffle(torch.nn.Module):
|
|
|
159
199
|
class RandomHorizontalFlip(T.RandomHorizontalFlip):
|
|
160
200
|
"""Randomly flip the input image horizontally"""
|
|
161
201
|
|
|
162
|
-
def forward(
|
|
163
|
-
self, img: Union[torch.Tensor, Image], target: np.ndarray
|
|
164
|
-
) -> Tuple[Union[torch.Tensor, Image], np.ndarray]:
|
|
202
|
+
def forward(self, img: torch.Tensor | Image, target: np.ndarray) -> tuple[torch.Tensor | Image, np.ndarray]:
|
|
165
203
|
if torch.rand(1) < self.p:
|
|
166
204
|
_img = F.hflip(img)
|
|
167
205
|
_target = target.copy()
|
|
@@ -183,11 +221,10 @@ class RandomShadow(torch.nn.Module):
|
|
|
183
221
|
>>> out = transfo(torch.rand((3, 64, 64)))
|
|
184
222
|
|
|
185
223
|
Args:
|
|
186
|
-
----
|
|
187
224
|
opacity_range : minimum and maximum opacity of the shade
|
|
188
225
|
"""
|
|
189
226
|
|
|
190
|
-
def __init__(self, opacity_range:
|
|
227
|
+
def __init__(self, opacity_range: tuple[float, float] | None = None) -> None:
|
|
191
228
|
super().__init__()
|
|
192
229
|
self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (0.2, 0.8)
|
|
193
230
|
|
|
@@ -196,7 +233,7 @@ class RandomShadow(torch.nn.Module):
|
|
|
196
233
|
try:
|
|
197
234
|
if x.dtype == torch.uint8:
|
|
198
235
|
return (
|
|
199
|
-
(
|
|
236
|
+
( # type: ignore[attr-defined]
|
|
200
237
|
255
|
|
201
238
|
* random_shadow(
|
|
202
239
|
x.to(dtype=torch.float32) / 255,
|
|
@@ -225,20 +262,19 @@ class RandomResize(torch.nn.Module):
|
|
|
225
262
|
>>> out = transfo(torch.rand((3, 64, 64)))
|
|
226
263
|
|
|
227
264
|
Args:
|
|
228
|
-
----
|
|
229
265
|
scale_range: range of the resizing factor for width and height (independently)
|
|
230
266
|
preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
|
|
231
|
-
|
|
267
|
+
given a float value, the aspect ratio will be preserved with this probability
|
|
232
268
|
symmetric_pad: whether to symmetrically pad the image,
|
|
233
|
-
|
|
269
|
+
given a float value, the symmetric padding will be applied with this probability
|
|
234
270
|
p: probability to apply the transformation
|
|
235
271
|
"""
|
|
236
272
|
|
|
237
273
|
def __init__(
|
|
238
274
|
self,
|
|
239
|
-
scale_range:
|
|
240
|
-
preserve_aspect_ratio:
|
|
241
|
-
symmetric_pad:
|
|
275
|
+
scale_range: tuple[float, float] = (0.3, 0.9),
|
|
276
|
+
preserve_aspect_ratio: bool | float = False,
|
|
277
|
+
symmetric_pad: bool | float = False,
|
|
242
278
|
p: float = 0.5,
|
|
243
279
|
) -> None:
|
|
244
280
|
super().__init__()
|
|
@@ -248,7 +284,7 @@ class RandomResize(torch.nn.Module):
|
|
|
248
284
|
self.p = p
|
|
249
285
|
self._resize = Resize
|
|
250
286
|
|
|
251
|
-
def forward(self, img: torch.Tensor, target: np.ndarray) ->
|
|
287
|
+
def forward(self, img: torch.Tensor, target: np.ndarray) -> tuple[torch.Tensor, np.ndarray]:
|
|
252
288
|
if torch.rand(1) < self.p:
|
|
253
289
|
scale_h = np.random.uniform(*self.scale_range)
|
|
254
290
|
scale_w = np.random.uniform(*self.scale_range)
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import random
|
|
7
|
-
from
|
|
7
|
+
from collections.abc import Callable, Iterable
|
|
8
|
+
from typing import Any
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
10
11
|
import tensorflow as tf
|
|
@@ -43,13 +44,12 @@ class Compose(NestedObject):
|
|
|
43
44
|
>>> out = transfos(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
44
45
|
|
|
45
46
|
Args:
|
|
46
|
-
----
|
|
47
47
|
transforms: list of transformation modules
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
|
-
_children_names:
|
|
50
|
+
_children_names: list[str] = ["transforms"]
|
|
51
51
|
|
|
52
|
-
def __init__(self, transforms:
|
|
52
|
+
def __init__(self, transforms: list[Callable[[Any], Any]]) -> None:
|
|
53
53
|
self.transforms = transforms
|
|
54
54
|
|
|
55
55
|
def __call__(self, x: Any) -> Any:
|
|
@@ -68,7 +68,6 @@ class Resize(NestedObject):
|
|
|
68
68
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
69
69
|
|
|
70
70
|
Args:
|
|
71
|
-
----
|
|
72
71
|
output_size: expected output size
|
|
73
72
|
method: interpolation method
|
|
74
73
|
preserve_aspect_ratio: if `True`, preserve aspect ratio and pad the rest with zeros
|
|
@@ -77,7 +76,7 @@ class Resize(NestedObject):
|
|
|
77
76
|
|
|
78
77
|
def __init__(
|
|
79
78
|
self,
|
|
80
|
-
output_size:
|
|
79
|
+
output_size: int | tuple[int, int],
|
|
81
80
|
method: str = "bilinear",
|
|
82
81
|
preserve_aspect_ratio: bool = False,
|
|
83
82
|
symmetric_pad: bool = False,
|
|
@@ -104,8 +103,8 @@ class Resize(NestedObject):
|
|
|
104
103
|
def __call__(
|
|
105
104
|
self,
|
|
106
105
|
img: tf.Tensor,
|
|
107
|
-
target:
|
|
108
|
-
) ->
|
|
106
|
+
target: np.ndarray | None = None,
|
|
107
|
+
) -> tf.Tensor | tuple[tf.Tensor, np.ndarray]:
|
|
109
108
|
input_dtype = img.dtype
|
|
110
109
|
self.output_size = (
|
|
111
110
|
(self.output_size, self.output_size) if isinstance(self.output_size, int) else self.output_size
|
|
@@ -164,12 +163,11 @@ class Normalize(NestedObject):
|
|
|
164
163
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
165
164
|
|
|
166
165
|
Args:
|
|
167
|
-
----
|
|
168
166
|
mean: average value per channel
|
|
169
167
|
std: standard deviation per channel
|
|
170
168
|
"""
|
|
171
169
|
|
|
172
|
-
def __init__(self, mean:
|
|
170
|
+
def __init__(self, mean: tuple[float, float, float], std: tuple[float, float, float]) -> None:
|
|
173
171
|
self.mean = tf.constant(mean)
|
|
174
172
|
self.std = tf.constant(std)
|
|
175
173
|
|
|
@@ -191,7 +189,6 @@ class LambdaTransformation(NestedObject):
|
|
|
191
189
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
192
190
|
|
|
193
191
|
Args:
|
|
194
|
-
----
|
|
195
192
|
fn: the function to be applied to the input tensor
|
|
196
193
|
"""
|
|
197
194
|
|
|
@@ -229,7 +226,6 @@ class RandomBrightness(NestedObject):
|
|
|
229
226
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
230
227
|
|
|
231
228
|
Args:
|
|
232
|
-
----
|
|
233
229
|
max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
|
|
234
230
|
p: probability to apply transformation
|
|
235
231
|
"""
|
|
@@ -254,7 +250,6 @@ class RandomContrast(NestedObject):
|
|
|
254
250
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
255
251
|
|
|
256
252
|
Args:
|
|
257
|
-
----
|
|
258
253
|
delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce contrast if factor<1)
|
|
259
254
|
"""
|
|
260
255
|
|
|
@@ -278,7 +273,6 @@ class RandomSaturation(NestedObject):
|
|
|
278
273
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
279
274
|
|
|
280
275
|
Args:
|
|
281
|
-
----
|
|
282
276
|
delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce saturation if factor<1)
|
|
283
277
|
"""
|
|
284
278
|
|
|
@@ -301,7 +295,6 @@ class RandomHue(NestedObject):
|
|
|
301
295
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
302
296
|
|
|
303
297
|
Args:
|
|
304
|
-
----
|
|
305
298
|
max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
|
|
306
299
|
"""
|
|
307
300
|
|
|
@@ -324,7 +317,6 @@ class RandomGamma(NestedObject):
|
|
|
324
317
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
325
318
|
|
|
326
319
|
Args:
|
|
327
|
-
----
|
|
328
320
|
min_gamma: non-negative real number, lower bound for gamma param
|
|
329
321
|
max_gamma: non-negative real number, upper bound for gamma
|
|
330
322
|
min_gain: lower bound for constant multiplier
|
|
@@ -362,7 +354,6 @@ class RandomJpegQuality(NestedObject):
|
|
|
362
354
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
363
355
|
|
|
364
356
|
Args:
|
|
365
|
-
----
|
|
366
357
|
min_quality: int between [0, 100]
|
|
367
358
|
max_quality: int between [0, 100]
|
|
368
359
|
"""
|
|
@@ -387,12 +378,11 @@ class GaussianBlur(NestedObject):
|
|
|
387
378
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
388
379
|
|
|
389
380
|
Args:
|
|
390
|
-
----
|
|
391
381
|
kernel_shape: size of the blurring kernel
|
|
392
382
|
std: min and max value of the standard deviation
|
|
393
383
|
"""
|
|
394
384
|
|
|
395
|
-
def __init__(self, kernel_shape:
|
|
385
|
+
def __init__(self, kernel_shape: int | Iterable[int], std: tuple[float, float]) -> None:
|
|
396
386
|
self.kernel_shape = kernel_shape
|
|
397
387
|
self.std = std
|
|
398
388
|
|
|
@@ -430,7 +420,6 @@ class GaussianNoise(NestedObject):
|
|
|
430
420
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
431
421
|
|
|
432
422
|
Args:
|
|
433
|
-
----
|
|
434
423
|
mean : mean of the gaussian distribution
|
|
435
424
|
std : std of the gaussian distribution
|
|
436
425
|
"""
|
|
@@ -465,7 +454,6 @@ class RandomHorizontalFlip(NestedObject):
|
|
|
465
454
|
>>> out = transfo(image, target)
|
|
466
455
|
|
|
467
456
|
Args:
|
|
468
|
-
----
|
|
469
457
|
p : probability of Horizontal Flip
|
|
470
458
|
"""
|
|
471
459
|
|
|
@@ -473,7 +461,7 @@ class RandomHorizontalFlip(NestedObject):
|
|
|
473
461
|
super().__init__()
|
|
474
462
|
self.p = p
|
|
475
463
|
|
|
476
|
-
def __call__(self, img:
|
|
464
|
+
def __call__(self, img: tf.Tensor | np.ndarray, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]:
|
|
477
465
|
if np.random.rand(1) <= self.p:
|
|
478
466
|
_img = tf.image.flip_left_right(img)
|
|
479
467
|
_target = target.copy()
|
|
@@ -495,11 +483,10 @@ class RandomShadow(NestedObject):
|
|
|
495
483
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
496
484
|
|
|
497
485
|
Args:
|
|
498
|
-
----
|
|
499
486
|
opacity_range : minimum and maximum opacity of the shade
|
|
500
487
|
"""
|
|
501
488
|
|
|
502
|
-
def __init__(self, opacity_range:
|
|
489
|
+
def __init__(self, opacity_range: tuple[float, float] | None = None) -> None:
|
|
503
490
|
super().__init__()
|
|
504
491
|
self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (0.2, 0.8)
|
|
505
492
|
|
|
@@ -530,20 +517,19 @@ class RandomResize(NestedObject):
|
|
|
530
517
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
531
518
|
|
|
532
519
|
Args:
|
|
533
|
-
----
|
|
534
520
|
scale_range: range of the resizing factor for width and height (independently)
|
|
535
521
|
preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
|
|
536
|
-
|
|
522
|
+
given a float value, the aspect ratio will be preserved with this probability
|
|
537
523
|
symmetric_pad: whether to symmetrically pad the image,
|
|
538
|
-
|
|
524
|
+
given a float value, the symmetric padding will be applied with this probability
|
|
539
525
|
p: probability to apply the transformation
|
|
540
526
|
"""
|
|
541
527
|
|
|
542
528
|
def __init__(
|
|
543
529
|
self,
|
|
544
|
-
scale_range:
|
|
545
|
-
preserve_aspect_ratio:
|
|
546
|
-
symmetric_pad:
|
|
530
|
+
scale_range: tuple[float, float] = (0.3, 0.9),
|
|
531
|
+
preserve_aspect_ratio: bool | float = False,
|
|
532
|
+
symmetric_pad: bool | float = False,
|
|
547
533
|
p: float = 0.5,
|
|
548
534
|
):
|
|
549
535
|
super().__init__()
|
|
@@ -553,7 +539,7 @@ class RandomResize(NestedObject):
|
|
|
553
539
|
self.p = p
|
|
554
540
|
self._resize = Resize
|
|
555
541
|
|
|
556
|
-
def __call__(self, img: tf.Tensor, target: np.ndarray) ->
|
|
542
|
+
def __call__(self, img: tf.Tensor, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]:
|
|
557
543
|
if np.random.rand(1) <= self.p:
|
|
558
544
|
scale_h = random.uniform(*self.scale_range)
|
|
559
545
|
scale_w = random.uniform(*self.scale_range)
|
doctr/utils/common_types.py
CHANGED
|
@@ -1,18 +1,17 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import List, Tuple, Union
|
|
8
7
|
|
|
9
8
|
__all__ = ["Point2D", "BoundingBox", "Polygon4P", "Polygon", "Bbox"]
|
|
10
9
|
|
|
11
10
|
|
|
12
|
-
Point2D =
|
|
13
|
-
BoundingBox =
|
|
14
|
-
Polygon4P =
|
|
15
|
-
Polygon =
|
|
16
|
-
AbstractPath =
|
|
17
|
-
AbstractFile =
|
|
18
|
-
Bbox =
|
|
11
|
+
Point2D = tuple[float, float]
|
|
12
|
+
BoundingBox = tuple[Point2D, Point2D]
|
|
13
|
+
Polygon4P = tuple[Point2D, Point2D, Point2D, Point2D]
|
|
14
|
+
Polygon = list[Point2D]
|
|
15
|
+
AbstractPath = str | Path
|
|
16
|
+
AbstractFile = AbstractPath | bytes
|
|
17
|
+
Bbox = tuple[float, float, float, float]
|
doctr/utils/data.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -13,7 +13,6 @@ import urllib
|
|
|
13
13
|
import urllib.error
|
|
14
14
|
import urllib.request
|
|
15
15
|
from pathlib import Path
|
|
16
|
-
from typing import Optional, Union
|
|
17
16
|
|
|
18
17
|
from tqdm.auto import tqdm
|
|
19
18
|
|
|
@@ -25,7 +24,7 @@ HASH_REGEX = re.compile(r"-([a-f0-9]*)\.")
|
|
|
25
24
|
USER_AGENT = "mindee/doctr"
|
|
26
25
|
|
|
27
26
|
|
|
28
|
-
def _urlretrieve(url: str, filename:
|
|
27
|
+
def _urlretrieve(url: str, filename: Path | str, chunk_size: int = 1024) -> None:
|
|
29
28
|
with open(filename, "wb") as fh:
|
|
30
29
|
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response:
|
|
31
30
|
with tqdm(total=response.length) as pbar:
|
|
@@ -36,7 +35,7 @@ def _urlretrieve(url: str, filename: Union[Path, str], chunk_size: int = 1024) -
|
|
|
36
35
|
fh.write(chunk)
|
|
37
36
|
|
|
38
37
|
|
|
39
|
-
def _check_integrity(file_path:
|
|
38
|
+
def _check_integrity(file_path: str | Path, hash_prefix: str) -> bool:
|
|
40
39
|
with open(file_path, "rb") as f:
|
|
41
40
|
sha_hash = hashlib.sha256(f.read()).hexdigest()
|
|
42
41
|
|
|
@@ -45,10 +44,10 @@ def _check_integrity(file_path: Union[str, Path], hash_prefix: str) -> bool:
|
|
|
45
44
|
|
|
46
45
|
def download_from_url(
|
|
47
46
|
url: str,
|
|
48
|
-
file_name:
|
|
49
|
-
hash_prefix:
|
|
50
|
-
cache_dir:
|
|
51
|
-
cache_subdir:
|
|
47
|
+
file_name: str | None = None,
|
|
48
|
+
hash_prefix: str | None = None,
|
|
49
|
+
cache_dir: str | None = None,
|
|
50
|
+
cache_subdir: str | None = None,
|
|
52
51
|
) -> Path:
|
|
53
52
|
"""Download a file using its URL
|
|
54
53
|
|
|
@@ -56,7 +55,6 @@ def download_from_url(
|
|
|
56
55
|
>>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip")
|
|
57
56
|
|
|
58
57
|
Args:
|
|
59
|
-
----
|
|
60
58
|
url: the URL of the file to download
|
|
61
59
|
file_name: optional name of the file once downloaded
|
|
62
60
|
hash_prefix: optional expected SHA256 hash of the file
|
|
@@ -64,11 +62,9 @@ def download_from_url(
|
|
|
64
62
|
cache_subdir: subfolder to use in the cache
|
|
65
63
|
|
|
66
64
|
Returns:
|
|
67
|
-
-------
|
|
68
65
|
the location of the downloaded file
|
|
69
66
|
|
|
70
67
|
Note:
|
|
71
|
-
----
|
|
72
68
|
You can change cache directory location by using `DOCTR_CACHE_DIR` environment variable.
|
|
73
69
|
"""
|
|
74
70
|
if not isinstance(file_name, str):
|
|
@@ -112,7 +108,7 @@ def download_from_url(
|
|
|
112
108
|
except (urllib.error.URLError, IOError) as e:
|
|
113
109
|
if url[:5] == "https":
|
|
114
110
|
url = url.replace("https:", "http:")
|
|
115
|
-
print("Failed download. Trying https -> http instead.
|
|
111
|
+
print(f"Failed download. Trying https -> http instead. Downloading {url} to {file_path}")
|
|
116
112
|
_urlretrieve(url, file_path)
|
|
117
113
|
else:
|
|
118
114
|
raise e
|
doctr/utils/fonts.py
CHANGED
|
@@ -1,29 +1,24 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
import platform
|
|
8
|
-
from typing import Optional, Union
|
|
9
8
|
|
|
10
9
|
from PIL import ImageFont
|
|
11
10
|
|
|
12
11
|
__all__ = ["get_font"]
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
def get_font(
|
|
16
|
-
font_family: Optional[str] = None, font_size: int = 13
|
|
17
|
-
) -> Union[ImageFont.FreeTypeFont, ImageFont.ImageFont]:
|
|
14
|
+
def get_font(font_family: str | None = None, font_size: int = 13) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
|
|
18
15
|
"""Resolves a compatible ImageFont for the system
|
|
19
16
|
|
|
20
17
|
Args:
|
|
21
|
-
----
|
|
22
18
|
font_family: the font family to use
|
|
23
19
|
font_size: the size of the font upon rendering
|
|
24
20
|
|
|
25
21
|
Returns:
|
|
26
|
-
-------
|
|
27
22
|
the Pillow font
|
|
28
23
|
"""
|
|
29
24
|
# Font selection
|