python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/datasets/__init__.py +2 -0
- doctr/datasets/cord.py +6 -4
- doctr/datasets/datasets/base.py +3 -2
- doctr/datasets/datasets/pytorch.py +4 -2
- doctr/datasets/datasets/tensorflow.py +4 -2
- doctr/datasets/detection.py +6 -3
- doctr/datasets/doc_artefacts.py +2 -1
- doctr/datasets/funsd.py +7 -8
- doctr/datasets/generator/base.py +3 -2
- doctr/datasets/generator/pytorch.py +3 -1
- doctr/datasets/generator/tensorflow.py +3 -1
- doctr/datasets/ic03.py +3 -2
- doctr/datasets/ic13.py +2 -1
- doctr/datasets/iiit5k.py +6 -4
- doctr/datasets/iiithws.py +2 -1
- doctr/datasets/imgur5k.py +3 -2
- doctr/datasets/loader.py +4 -2
- doctr/datasets/mjsynth.py +2 -1
- doctr/datasets/ocr.py +2 -1
- doctr/datasets/orientation.py +40 -0
- doctr/datasets/recognition.py +3 -2
- doctr/datasets/sroie.py +2 -1
- doctr/datasets/svhn.py +2 -1
- doctr/datasets/svt.py +3 -2
- doctr/datasets/synthtext.py +2 -1
- doctr/datasets/utils.py +27 -11
- doctr/datasets/vocabs.py +26 -1
- doctr/datasets/wildreceipt.py +111 -0
- doctr/file_utils.py +3 -1
- doctr/io/elements.py +52 -35
- doctr/io/html.py +5 -3
- doctr/io/image/base.py +5 -4
- doctr/io/image/pytorch.py +12 -7
- doctr/io/image/tensorflow.py +11 -6
- doctr/io/pdf.py +5 -4
- doctr/io/reader.py +13 -5
- doctr/models/_utils.py +30 -53
- doctr/models/artefacts/barcode.py +4 -3
- doctr/models/artefacts/face.py +4 -2
- doctr/models/builder.py +58 -43
- doctr/models/classification/__init__.py +1 -0
- doctr/models/classification/magc_resnet/pytorch.py +5 -2
- doctr/models/classification/magc_resnet/tensorflow.py +5 -2
- doctr/models/classification/mobilenet/pytorch.py +16 -4
- doctr/models/classification/mobilenet/tensorflow.py +29 -20
- doctr/models/classification/predictor/pytorch.py +3 -2
- doctr/models/classification/predictor/tensorflow.py +2 -1
- doctr/models/classification/resnet/pytorch.py +23 -13
- doctr/models/classification/resnet/tensorflow.py +33 -26
- doctr/models/classification/textnet/__init__.py +6 -0
- doctr/models/classification/textnet/pytorch.py +275 -0
- doctr/models/classification/textnet/tensorflow.py +267 -0
- doctr/models/classification/vgg/pytorch.py +4 -2
- doctr/models/classification/vgg/tensorflow.py +5 -2
- doctr/models/classification/vit/pytorch.py +9 -3
- doctr/models/classification/vit/tensorflow.py +9 -3
- doctr/models/classification/zoo.py +7 -2
- doctr/models/core.py +1 -1
- doctr/models/detection/__init__.py +1 -0
- doctr/models/detection/_utils/pytorch.py +7 -1
- doctr/models/detection/_utils/tensorflow.py +7 -3
- doctr/models/detection/core.py +9 -3
- doctr/models/detection/differentiable_binarization/base.py +37 -25
- doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
- doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
- doctr/models/detection/fast/__init__.py +6 -0
- doctr/models/detection/fast/base.py +256 -0
- doctr/models/detection/fast/pytorch.py +442 -0
- doctr/models/detection/fast/tensorflow.py +428 -0
- doctr/models/detection/linknet/base.py +12 -5
- doctr/models/detection/linknet/pytorch.py +28 -15
- doctr/models/detection/linknet/tensorflow.py +68 -88
- doctr/models/detection/predictor/pytorch.py +16 -6
- doctr/models/detection/predictor/tensorflow.py +13 -5
- doctr/models/detection/zoo.py +19 -16
- doctr/models/factory/hub.py +20 -10
- doctr/models/kie_predictor/base.py +2 -1
- doctr/models/kie_predictor/pytorch.py +28 -36
- doctr/models/kie_predictor/tensorflow.py +27 -27
- doctr/models/modules/__init__.py +1 -0
- doctr/models/modules/layers/__init__.py +6 -0
- doctr/models/modules/layers/pytorch.py +166 -0
- doctr/models/modules/layers/tensorflow.py +175 -0
- doctr/models/modules/transformer/pytorch.py +24 -22
- doctr/models/modules/transformer/tensorflow.py +6 -4
- doctr/models/modules/vision_transformer/pytorch.py +2 -4
- doctr/models/modules/vision_transformer/tensorflow.py +2 -4
- doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
- doctr/models/predictor/base.py +14 -3
- doctr/models/predictor/pytorch.py +26 -29
- doctr/models/predictor/tensorflow.py +25 -22
- doctr/models/preprocessor/pytorch.py +14 -9
- doctr/models/preprocessor/tensorflow.py +10 -5
- doctr/models/recognition/core.py +4 -1
- doctr/models/recognition/crnn/pytorch.py +23 -16
- doctr/models/recognition/crnn/tensorflow.py +25 -17
- doctr/models/recognition/master/base.py +4 -1
- doctr/models/recognition/master/pytorch.py +20 -9
- doctr/models/recognition/master/tensorflow.py +20 -8
- doctr/models/recognition/parseq/base.py +4 -1
- doctr/models/recognition/parseq/pytorch.py +28 -22
- doctr/models/recognition/parseq/tensorflow.py +22 -11
- doctr/models/recognition/predictor/_utils.py +3 -2
- doctr/models/recognition/predictor/pytorch.py +3 -2
- doctr/models/recognition/predictor/tensorflow.py +2 -1
- doctr/models/recognition/sar/pytorch.py +14 -7
- doctr/models/recognition/sar/tensorflow.py +23 -14
- doctr/models/recognition/utils.py +5 -1
- doctr/models/recognition/vitstr/base.py +4 -1
- doctr/models/recognition/vitstr/pytorch.py +22 -13
- doctr/models/recognition/vitstr/tensorflow.py +21 -10
- doctr/models/recognition/zoo.py +4 -2
- doctr/models/utils/pytorch.py +24 -6
- doctr/models/utils/tensorflow.py +22 -3
- doctr/models/zoo.py +21 -3
- doctr/transforms/functional/base.py +8 -3
- doctr/transforms/functional/pytorch.py +23 -6
- doctr/transforms/functional/tensorflow.py +25 -5
- doctr/transforms/modules/base.py +12 -5
- doctr/transforms/modules/pytorch.py +10 -12
- doctr/transforms/modules/tensorflow.py +17 -9
- doctr/utils/common_types.py +1 -1
- doctr/utils/data.py +4 -2
- doctr/utils/fonts.py +3 -2
- doctr/utils/geometry.py +95 -26
- doctr/utils/metrics.py +36 -22
- doctr/utils/multithreading.py +5 -3
- doctr/utils/repr.py +3 -1
- doctr/utils/visualization.py +31 -8
- doctr/version.py +1 -1
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
- python_doctr-0.8.1.dist-info/RECORD +173 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
- python_doctr-0.7.0.dist-info/RECORD +0 -161
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -18,15 +18,26 @@ __all__ = ["invert_colors", "rotate_sample", "crop_detection", "random_shadow"]
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor:
|
|
21
|
+
"""Invert the colors of an image
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
----
|
|
25
|
+
img : torch.Tensor, the image to invert
|
|
26
|
+
min_val : minimum value of the random shift
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
-------
|
|
30
|
+
the inverted image
|
|
31
|
+
"""
|
|
21
32
|
out = F.rgb_to_grayscale(img, num_output_channels=3)
|
|
22
33
|
# Random RGB shift
|
|
23
34
|
shift_shape = [img.shape[0], 3, 1, 1] if img.ndim == 4 else [3, 1, 1]
|
|
24
35
|
rgb_shift = min_val + (1 - min_val) * torch.rand(shift_shape)
|
|
25
36
|
# Inverse the color
|
|
26
37
|
if out.dtype == torch.uint8:
|
|
27
|
-
out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)
|
|
38
|
+
out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8) # type: ignore[attr-defined]
|
|
28
39
|
else:
|
|
29
|
-
out = out * rgb_shift.to(dtype=out.dtype)
|
|
40
|
+
out = out * rgb_shift.to(dtype=out.dtype) # type: ignore[attr-defined]
|
|
30
41
|
# Inverse the color
|
|
31
42
|
out = 255 - out if out.dtype == torch.uint8 else 1 - out
|
|
32
43
|
return out
|
|
@@ -41,12 +52,14 @@ def rotate_sample(
|
|
|
41
52
|
"""Rotate image around the center, interpolation=NEAREST, pad with 0 (black)
|
|
42
53
|
|
|
43
54
|
Args:
|
|
55
|
+
----
|
|
44
56
|
img: image to rotate
|
|
45
57
|
geoms: array of geometries of shape (N, 4) or (N, 4, 2)
|
|
46
58
|
angle: angle in degrees. +: counter-clockwise, -: clockwise
|
|
47
59
|
expand: whether the image should be padded before the rotation
|
|
48
60
|
|
|
49
61
|
Returns:
|
|
62
|
+
-------
|
|
50
63
|
A tuple of rotated img (tensor), rotated geometries of shape (N, 4, 2)
|
|
51
64
|
"""
|
|
52
65
|
rotated_img = F.rotate(img, angle=angle, fill=0, expand=expand) # Interpolation NEAREST by default
|
|
@@ -68,7 +81,7 @@ def rotate_sample(
|
|
|
68
81
|
rotated_geoms: np.ndarray = rotate_abs_geoms(
|
|
69
82
|
_geoms,
|
|
70
83
|
angle,
|
|
71
|
-
img.shape[1:],
|
|
84
|
+
img.shape[1:],
|
|
72
85
|
expand,
|
|
73
86
|
).astype(np.float32)
|
|
74
87
|
|
|
@@ -85,11 +98,13 @@ def crop_detection(
|
|
|
85
98
|
"""Crop and image and associated bboxes
|
|
86
99
|
|
|
87
100
|
Args:
|
|
101
|
+
----
|
|
88
102
|
img: image to crop
|
|
89
103
|
boxes: array of boxes to clip, absolute (int) or relative (float)
|
|
90
104
|
crop_box: box (xmin, ymin, xmax, ymax) to crop the image. Relative coords.
|
|
91
105
|
|
|
92
106
|
Returns:
|
|
107
|
+
-------
|
|
93
108
|
A tuple of cropped image, cropped boxes, where the image is not resized.
|
|
94
109
|
"""
|
|
95
110
|
if any(val < 0 or val > 1 for val in crop_box):
|
|
@@ -108,14 +123,16 @@ def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwarg
|
|
|
108
123
|
"""Crop and image and associated bboxes
|
|
109
124
|
|
|
110
125
|
Args:
|
|
126
|
+
----
|
|
111
127
|
img: image to modify
|
|
112
128
|
opacity_range: the minimum and maximum desired opacity of the shadow
|
|
129
|
+
**kwargs: additional arguments to pass to `create_shadow_mask`
|
|
113
130
|
|
|
114
131
|
Returns:
|
|
132
|
+
-------
|
|
115
133
|
shaded image
|
|
116
134
|
"""
|
|
117
|
-
|
|
118
|
-
shadow_mask = create_shadow_mask(img.shape[1:], **kwargs) # type: ignore[arg-type]
|
|
135
|
+
shadow_mask = create_shadow_mask(img.shape[1:], **kwargs)
|
|
119
136
|
|
|
120
137
|
opacity = np.random.uniform(*opacity_range)
|
|
121
138
|
shadow_tensor = 1 - torch.from_numpy(shadow_mask[None, ...])
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -15,10 +15,21 @@ from doctr.utils.geometry import compute_expanded_shape, rotate_abs_geoms
|
|
|
15
15
|
|
|
16
16
|
from .base import create_shadow_mask, crop_boxes
|
|
17
17
|
|
|
18
|
-
__all__ = ["invert_colors", "rotate_sample", "crop_detection", "random_shadow"]
|
|
18
|
+
__all__ = ["invert_colors", "rotate_sample", "crop_detection", "random_shadow", "rotated_img_tensor"]
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def invert_colors(img: tf.Tensor, min_val: float = 0.6) -> tf.Tensor:
|
|
22
|
+
"""Invert the colors of an image
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
----
|
|
26
|
+
img : tf.Tensor, the image to invert
|
|
27
|
+
min_val : minimum value of the random shift
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
-------
|
|
31
|
+
the inverted image
|
|
32
|
+
"""
|
|
22
33
|
out = tf.image.rgb_to_grayscale(img) # Convert to gray
|
|
23
34
|
# Random RGB shift
|
|
24
35
|
shift_shape = [img.shape[0], 1, 1, 3] if img.ndim == 4 else [1, 1, 3]
|
|
@@ -37,11 +48,13 @@ def rotated_img_tensor(img: tf.Tensor, angle: float, expand: bool = False) -> tf
|
|
|
37
48
|
"""Rotate image around the center, interpolation=NEAREST, pad with 0 (black)
|
|
38
49
|
|
|
39
50
|
Args:
|
|
51
|
+
----
|
|
40
52
|
img: image to rotate
|
|
41
53
|
angle: angle in degrees. +: counter-clockwise, -: clockwise
|
|
42
54
|
expand: whether the image should be padded before the rotation
|
|
43
55
|
|
|
44
56
|
Returns:
|
|
57
|
+
-------
|
|
45
58
|
the rotated image (tensor)
|
|
46
59
|
"""
|
|
47
60
|
# Compute the expanded padding
|
|
@@ -94,12 +107,14 @@ def rotate_sample(
|
|
|
94
107
|
"""Rotate image around the center, interpolation=NEAREST, pad with 0 (black)
|
|
95
108
|
|
|
96
109
|
Args:
|
|
110
|
+
----
|
|
97
111
|
img: image to rotate
|
|
98
112
|
geoms: array of geometries of shape (N, 4) or (N, 4, 2)
|
|
99
113
|
angle: angle in degrees. +: counter-clockwise, -: clockwise
|
|
100
114
|
expand: whether the image should be padded before the rotation
|
|
101
115
|
|
|
102
116
|
Returns:
|
|
117
|
+
-------
|
|
103
118
|
A tuple of rotated img (tensor), rotated boxes (np array)
|
|
104
119
|
"""
|
|
105
120
|
# Rotated the image
|
|
@@ -134,11 +149,13 @@ def crop_detection(
|
|
|
134
149
|
"""Crop and image and associated bboxes
|
|
135
150
|
|
|
136
151
|
Args:
|
|
152
|
+
----
|
|
137
153
|
img: image to crop
|
|
138
154
|
boxes: array of boxes to clip, absolute (int) or relative (float)
|
|
139
155
|
crop_box: box (xmin, ymin, xmax, ymax) to crop the image. Relative coords.
|
|
140
156
|
|
|
141
157
|
Returns:
|
|
158
|
+
-------
|
|
142
159
|
A tuple of cropped image, cropped boxes, where the image is not resized.
|
|
143
160
|
"""
|
|
144
161
|
if any(val < 0 or val > 1 for val in crop_box):
|
|
@@ -164,14 +181,15 @@ def _gaussian_filter(
|
|
|
164
181
|
Adapted from: https://github.com/tensorflow/addons/blob/master/tensorflow_addons/image/filters.py
|
|
165
182
|
|
|
166
183
|
Args:
|
|
167
|
-
|
|
168
|
-
|
|
184
|
+
----
|
|
185
|
+
img: image to filter of shape (N, H, W, C)
|
|
169
186
|
kernel_size: kernel size of the filter
|
|
170
187
|
sigma: standard deviation of the Gaussian filter
|
|
171
188
|
mode: padding mode, one of "CONSTANT", "REFLECT", "SYMMETRIC"
|
|
172
189
|
pad_value: value to pad the image with
|
|
173
190
|
|
|
174
191
|
Returns:
|
|
192
|
+
-------
|
|
175
193
|
A tensor of shape (N, H, W, C)
|
|
176
194
|
"""
|
|
177
195
|
ksize = tf.convert_to_tensor(tf.broadcast_to(kernel_size, [2]), dtype=tf.int32)
|
|
@@ -221,13 +239,15 @@ def random_shadow(img: tf.Tensor, opacity_range: Tuple[float, float], **kwargs)
|
|
|
221
239
|
"""Apply a random shadow to a given image
|
|
222
240
|
|
|
223
241
|
Args:
|
|
242
|
+
----
|
|
224
243
|
img: image to modify
|
|
225
244
|
opacity_range: the minimum and maximum desired opacity of the shadow
|
|
245
|
+
**kwargs: additional arguments to pass to `create_shadow_mask`
|
|
226
246
|
|
|
227
247
|
Returns:
|
|
248
|
+
-------
|
|
228
249
|
shadowed image
|
|
229
250
|
"""
|
|
230
|
-
|
|
231
251
|
shadow_mask = create_shadow_mask(img.shape[:2], **kwargs)
|
|
232
252
|
|
|
233
253
|
opacity = np.random.uniform(*opacity_range)
|
doctr/transforms/modules/base.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import math
|
|
7
7
|
import random
|
|
8
|
-
from typing import Any, Callable, Dict, List, Tuple
|
|
8
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
|
|
@@ -42,6 +42,7 @@ class SampleCompose(NestedObject):
|
|
|
42
42
|
>>> out, out_boxes = transfos(torch.rand(8, 64, 64, 3), np.zeros((2, 4)))
|
|
43
43
|
|
|
44
44
|
Args:
|
|
45
|
+
----
|
|
45
46
|
transforms: list of transformation modules
|
|
46
47
|
"""
|
|
47
48
|
|
|
@@ -81,6 +82,7 @@ class ImageTransform(NestedObject):
|
|
|
81
82
|
>>> out, _ = transfo(torch.rand(8, 64, 64, 3), None)
|
|
82
83
|
|
|
83
84
|
Args:
|
|
85
|
+
----
|
|
84
86
|
transform: the image transformation module to wrap
|
|
85
87
|
"""
|
|
86
88
|
|
|
@@ -119,6 +121,7 @@ class ColorInversion(NestedObject):
|
|
|
119
121
|
>>> out = transfo(torch.rand(8, 64, 64, 3))
|
|
120
122
|
|
|
121
123
|
Args:
|
|
124
|
+
----
|
|
122
125
|
min_val: range [min_val, 1] to colorize RGB pixels
|
|
123
126
|
"""
|
|
124
127
|
|
|
@@ -156,6 +159,7 @@ class OneOf(NestedObject):
|
|
|
156
159
|
>>> out = transfo(torch.rand(1, 64, 64, 3))
|
|
157
160
|
|
|
158
161
|
Args:
|
|
162
|
+
----
|
|
159
163
|
transforms: list of transformations, one only will be picked
|
|
160
164
|
"""
|
|
161
165
|
|
|
@@ -195,6 +199,7 @@ class RandomApply(NestedObject):
|
|
|
195
199
|
>>> out = transfo(torch.rand(1, 64, 64, 3))
|
|
196
200
|
|
|
197
201
|
Args:
|
|
202
|
+
----
|
|
198
203
|
transform: transformation to apply
|
|
199
204
|
p: probability to apply
|
|
200
205
|
"""
|
|
@@ -206,10 +211,10 @@ class RandomApply(NestedObject):
|
|
|
206
211
|
def extra_repr(self) -> str:
|
|
207
212
|
return f"transform={self.transform}, p={self.p}"
|
|
208
213
|
|
|
209
|
-
def __call__(self, img: Any) -> Any:
|
|
214
|
+
def __call__(self, img: Any, target: Optional[np.ndarray] = None) -> Union[Any, Tuple[Any, np.ndarray]]:
|
|
210
215
|
if random.random() < self.p:
|
|
211
|
-
return self.transform(img)
|
|
212
|
-
return img
|
|
216
|
+
return self.transform(img) if target is None else self.transform(img, target) # type: ignore[call-arg]
|
|
217
|
+
return img if target is None else (img, target)
|
|
213
218
|
|
|
214
219
|
|
|
215
220
|
class RandomRotate(NestedObject):
|
|
@@ -219,6 +224,7 @@ class RandomRotate(NestedObject):
|
|
|
219
224
|
:align: center
|
|
220
225
|
|
|
221
226
|
Args:
|
|
227
|
+
----
|
|
222
228
|
max_angle: maximum angle for rotation, in degrees. Angles will be uniformly picked in
|
|
223
229
|
[-max_angle, max_angle]
|
|
224
230
|
expand: whether the image should be padded before the rotation
|
|
@@ -243,6 +249,7 @@ class RandomCrop(NestedObject):
|
|
|
243
249
|
"""Randomly crop a tensor image and its boxes
|
|
244
250
|
|
|
245
251
|
Args:
|
|
252
|
+
----
|
|
246
253
|
scale: tuple of floats, relative (min_area, max_area) of the crop
|
|
247
254
|
ratio: tuple of float, relative (min_ratio, max_ratio) where ratio = h/w
|
|
248
255
|
"""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -19,6 +19,8 @@ __all__ = ["Resize", "GaussianNoise", "ChannelShuffle", "RandomHorizontalFlip",
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class Resize(T.Resize):
|
|
22
|
+
"""Resize the input image to the given size"""
|
|
23
|
+
|
|
22
24
|
def __init__(
|
|
23
25
|
self,
|
|
24
26
|
size: Union[int, Tuple[int, int]],
|
|
@@ -119,6 +121,7 @@ class GaussianNoise(torch.nn.Module):
|
|
|
119
121
|
>>> out = transfo(torch.rand((3, 224, 224)))
|
|
120
122
|
|
|
121
123
|
Args:
|
|
124
|
+
----
|
|
122
125
|
mean : mean of the gaussian distribution
|
|
123
126
|
std : std of the gaussian distribution
|
|
124
127
|
"""
|
|
@@ -132,9 +135,9 @@ class GaussianNoise(torch.nn.Module):
|
|
|
132
135
|
# Reshape the distribution
|
|
133
136
|
noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
|
|
134
137
|
if x.dtype == torch.uint8:
|
|
135
|
-
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
|
|
138
|
+
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) # type: ignore[attr-defined]
|
|
136
139
|
else:
|
|
137
|
-
return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
|
|
140
|
+
return (x + noise.to(dtype=x.dtype)).clamp(0, 1) # type: ignore[attr-defined]
|
|
138
141
|
|
|
139
142
|
def extra_repr(self) -> str:
|
|
140
143
|
return f"mean={self.mean}, std={self.std}"
|
|
@@ -153,17 +156,11 @@ class ChannelShuffle(torch.nn.Module):
|
|
|
153
156
|
|
|
154
157
|
|
|
155
158
|
class RandomHorizontalFlip(T.RandomHorizontalFlip):
|
|
159
|
+
"""Randomly flip the input image horizontally"""
|
|
160
|
+
|
|
156
161
|
def forward(
|
|
157
162
|
self, img: Union[torch.Tensor, Image], target: Dict[str, Any]
|
|
158
163
|
) -> Tuple[Union[torch.Tensor, Image], Dict[str, Any]]:
|
|
159
|
-
"""
|
|
160
|
-
Args:
|
|
161
|
-
img: Image to be flipped.
|
|
162
|
-
target: Dictionary with boxes (in relative coordinates of shape (N, 4)) and labels as keys
|
|
163
|
-
|
|
164
|
-
Returns:
|
|
165
|
-
Tuple of PIL Image or Tensor and target
|
|
166
|
-
"""
|
|
167
164
|
if torch.rand(1) < self.p:
|
|
168
165
|
_img = F.hflip(img)
|
|
169
166
|
_target = target.copy()
|
|
@@ -182,6 +179,7 @@ class RandomShadow(torch.nn.Module):
|
|
|
182
179
|
>>> out = transfo(torch.rand((3, 64, 64)))
|
|
183
180
|
|
|
184
181
|
Args:
|
|
182
|
+
----
|
|
185
183
|
opacity_range : minimum and maximum opacity of the shade
|
|
186
184
|
"""
|
|
187
185
|
|
|
@@ -201,7 +199,7 @@ class RandomShadow(torch.nn.Module):
|
|
|
201
199
|
self.opacity_range,
|
|
202
200
|
)
|
|
203
201
|
)
|
|
204
|
-
.round()
|
|
202
|
+
.round() # type: ignore[attr-defined]
|
|
205
203
|
.clip(0, 255)
|
|
206
204
|
.to(dtype=torch.uint8)
|
|
207
205
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -42,6 +42,7 @@ class Compose(NestedObject):
|
|
|
42
42
|
>>> out = transfos(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
43
43
|
|
|
44
44
|
Args:
|
|
45
|
+
----
|
|
45
46
|
transforms: list of transformation modules
|
|
46
47
|
"""
|
|
47
48
|
|
|
@@ -66,6 +67,7 @@ class Resize(NestedObject):
|
|
|
66
67
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
67
68
|
|
|
68
69
|
Args:
|
|
70
|
+
----
|
|
69
71
|
output_size: expected output size
|
|
70
72
|
method: interpolation method
|
|
71
73
|
preserve_aspect_ratio: if `True`, preserve aspect ratio and pad the rest with zeros
|
|
@@ -83,6 +85,7 @@ class Resize(NestedObject):
|
|
|
83
85
|
self.method = method
|
|
84
86
|
self.preserve_aspect_ratio = preserve_aspect_ratio
|
|
85
87
|
self.symmetric_pad = symmetric_pad
|
|
88
|
+
self.antialias = True
|
|
86
89
|
|
|
87
90
|
if isinstance(self.output_size, int):
|
|
88
91
|
self.wanted_size = (self.output_size, self.output_size)
|
|
@@ -104,7 +107,7 @@ class Resize(NestedObject):
|
|
|
104
107
|
) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]:
|
|
105
108
|
input_dtype = img.dtype
|
|
106
109
|
|
|
107
|
-
img = tf.image.resize(img, self.wanted_size, self.method, self.preserve_aspect_ratio)
|
|
110
|
+
img = tf.image.resize(img, self.wanted_size, self.method, self.preserve_aspect_ratio, self.antialias)
|
|
108
111
|
# It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio
|
|
109
112
|
raw_shape = img.shape[:2]
|
|
110
113
|
if self.preserve_aspect_ratio:
|
|
@@ -156,6 +159,7 @@ class Normalize(NestedObject):
|
|
|
156
159
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
157
160
|
|
|
158
161
|
Args:
|
|
162
|
+
----
|
|
159
163
|
mean: average value per channel
|
|
160
164
|
std: standard deviation per channel
|
|
161
165
|
"""
|
|
@@ -182,6 +186,7 @@ class LambdaTransformation(NestedObject):
|
|
|
182
186
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
183
187
|
|
|
184
188
|
Args:
|
|
189
|
+
----
|
|
185
190
|
fn: the function to be applied to the input tensor
|
|
186
191
|
"""
|
|
187
192
|
|
|
@@ -219,6 +224,7 @@ class RandomBrightness(NestedObject):
|
|
|
219
224
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
220
225
|
|
|
221
226
|
Args:
|
|
227
|
+
----
|
|
222
228
|
max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
|
|
223
229
|
p: probability to apply transformation
|
|
224
230
|
"""
|
|
@@ -243,6 +249,7 @@ class RandomContrast(NestedObject):
|
|
|
243
249
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
244
250
|
|
|
245
251
|
Args:
|
|
252
|
+
----
|
|
246
253
|
delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce contrast if factor<1)
|
|
247
254
|
"""
|
|
248
255
|
|
|
@@ -266,6 +273,7 @@ class RandomSaturation(NestedObject):
|
|
|
266
273
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
267
274
|
|
|
268
275
|
Args:
|
|
276
|
+
----
|
|
269
277
|
delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce saturation if factor<1)
|
|
270
278
|
"""
|
|
271
279
|
|
|
@@ -288,6 +296,7 @@ class RandomHue(NestedObject):
|
|
|
288
296
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
289
297
|
|
|
290
298
|
Args:
|
|
299
|
+
----
|
|
291
300
|
max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
|
|
292
301
|
"""
|
|
293
302
|
|
|
@@ -310,6 +319,7 @@ class RandomGamma(NestedObject):
|
|
|
310
319
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
311
320
|
|
|
312
321
|
Args:
|
|
322
|
+
----
|
|
313
323
|
min_gamma: non-negative real number, lower bound for gamma param
|
|
314
324
|
max_gamma: non-negative real number, upper bound for gamma
|
|
315
325
|
min_gain: lower bound for constant multiplier
|
|
@@ -347,6 +357,7 @@ class RandomJpegQuality(NestedObject):
|
|
|
347
357
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
348
358
|
|
|
349
359
|
Args:
|
|
360
|
+
----
|
|
350
361
|
min_quality: int between [0, 100]
|
|
351
362
|
max_quality: int between [0, 100]
|
|
352
363
|
"""
|
|
@@ -371,6 +382,7 @@ class GaussianBlur(NestedObject):
|
|
|
371
382
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
372
383
|
|
|
373
384
|
Args:
|
|
385
|
+
----
|
|
374
386
|
kernel_shape: size of the blurring kernel
|
|
375
387
|
std: min and max value of the standard deviation
|
|
376
388
|
"""
|
|
@@ -414,6 +426,7 @@ class GaussianNoise(NestedObject):
|
|
|
414
426
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
415
427
|
|
|
416
428
|
Args:
|
|
429
|
+
----
|
|
417
430
|
mean : mean of the gaussian distribution
|
|
418
431
|
std : std of the gaussian distribution
|
|
419
432
|
"""
|
|
@@ -451,6 +464,7 @@ class RandomHorizontalFlip(NestedObject):
|
|
|
451
464
|
>>> out = transfo(image, target)
|
|
452
465
|
|
|
453
466
|
Args:
|
|
467
|
+
----
|
|
454
468
|
p : probability of Horizontal Flip
|
|
455
469
|
"""
|
|
456
470
|
|
|
@@ -459,13 +473,6 @@ class RandomHorizontalFlip(NestedObject):
|
|
|
459
473
|
self.p = p
|
|
460
474
|
|
|
461
475
|
def __call__(self, img: Union[tf.Tensor, np.ndarray], target: Dict[str, Any]) -> Tuple[tf.Tensor, Dict[str, Any]]:
|
|
462
|
-
"""
|
|
463
|
-
Args:
|
|
464
|
-
img: Image to be flipped.
|
|
465
|
-
target: Dictionary with boxes (in relative coordinates of shape (N, 4)) and labels as keys
|
|
466
|
-
Returns:
|
|
467
|
-
Tuple of numpy nd-array or Tensor and target
|
|
468
|
-
"""
|
|
469
476
|
if np.random.rand(1) <= self.p:
|
|
470
477
|
_img = tf.image.flip_left_right(img)
|
|
471
478
|
_target = target.copy()
|
|
@@ -484,6 +491,7 @@ class RandomShadow(NestedObject):
|
|
|
484
491
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
485
492
|
|
|
486
493
|
Args:
|
|
494
|
+
----
|
|
487
495
|
opacity_range : minimum and maximum opacity of the shade
|
|
488
496
|
"""
|
|
489
497
|
|
doctr/utils/common_types.py
CHANGED
doctr/utils/data.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -56,6 +56,7 @@ def download_from_url(
|
|
|
56
56
|
>>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip")
|
|
57
57
|
|
|
58
58
|
Args:
|
|
59
|
+
----
|
|
59
60
|
url: the URL of the file to download
|
|
60
61
|
file_name: optional name of the file once downloaded
|
|
61
62
|
hash_prefix: optional expected SHA256 hash of the file
|
|
@@ -63,12 +64,13 @@ def download_from_url(
|
|
|
63
64
|
cache_subdir: subfolder to use in the cache
|
|
64
65
|
|
|
65
66
|
Returns:
|
|
67
|
+
-------
|
|
66
68
|
the location of the downloaded file
|
|
67
69
|
|
|
68
70
|
Note:
|
|
71
|
+
----
|
|
69
72
|
You can change cache directory location by using `DOCTR_CACHE_DIR` environment variable.
|
|
70
73
|
"""
|
|
71
|
-
|
|
72
74
|
if not isinstance(file_name, str):
|
|
73
75
|
file_name = url.rpartition("/")[-1].split("&")[0]
|
|
74
76
|
|
doctr/utils/fonts.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -16,13 +16,14 @@ def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFon
|
|
|
16
16
|
"""Resolves a compatible ImageFont for the system
|
|
17
17
|
|
|
18
18
|
Args:
|
|
19
|
+
----
|
|
19
20
|
font_family: the font family to use
|
|
20
21
|
font_size: the size of the font upon rendering
|
|
21
22
|
|
|
22
23
|
Returns:
|
|
24
|
+
-------
|
|
23
25
|
the Pillow font
|
|
24
26
|
"""
|
|
25
|
-
|
|
26
27
|
# Font selection
|
|
27
28
|
if font_family is None:
|
|
28
29
|
try:
|