python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. doctr/contrib/__init__.py +1 -0
  2. doctr/contrib/artefacts.py +7 -9
  3. doctr/contrib/base.py +8 -17
  4. doctr/datasets/__init__.py +1 -0
  5. doctr/datasets/coco_text.py +139 -0
  6. doctr/datasets/cord.py +10 -8
  7. doctr/datasets/datasets/__init__.py +4 -4
  8. doctr/datasets/datasets/base.py +16 -16
  9. doctr/datasets/datasets/pytorch.py +12 -12
  10. doctr/datasets/datasets/tensorflow.py +10 -10
  11. doctr/datasets/detection.py +6 -9
  12. doctr/datasets/doc_artefacts.py +3 -4
  13. doctr/datasets/funsd.py +9 -8
  14. doctr/datasets/generator/__init__.py +4 -4
  15. doctr/datasets/generator/base.py +16 -17
  16. doctr/datasets/generator/pytorch.py +1 -3
  17. doctr/datasets/generator/tensorflow.py +1 -3
  18. doctr/datasets/ic03.py +5 -6
  19. doctr/datasets/ic13.py +6 -6
  20. doctr/datasets/iiit5k.py +10 -6
  21. doctr/datasets/iiithws.py +4 -5
  22. doctr/datasets/imgur5k.py +15 -7
  23. doctr/datasets/loader.py +4 -7
  24. doctr/datasets/mjsynth.py +6 -5
  25. doctr/datasets/ocr.py +3 -4
  26. doctr/datasets/orientation.py +3 -4
  27. doctr/datasets/recognition.py +4 -5
  28. doctr/datasets/sroie.py +6 -5
  29. doctr/datasets/svhn.py +7 -6
  30. doctr/datasets/svt.py +6 -7
  31. doctr/datasets/synthtext.py +19 -7
  32. doctr/datasets/utils.py +41 -35
  33. doctr/datasets/vocabs.py +1107 -49
  34. doctr/datasets/wildreceipt.py +14 -10
  35. doctr/file_utils.py +11 -7
  36. doctr/io/elements.py +96 -82
  37. doctr/io/html.py +1 -3
  38. doctr/io/image/__init__.py +3 -3
  39. doctr/io/image/base.py +2 -5
  40. doctr/io/image/pytorch.py +3 -12
  41. doctr/io/image/tensorflow.py +2 -11
  42. doctr/io/pdf.py +5 -7
  43. doctr/io/reader.py +5 -11
  44. doctr/models/_utils.py +15 -23
  45. doctr/models/builder.py +30 -48
  46. doctr/models/classification/__init__.py +1 -0
  47. doctr/models/classification/magc_resnet/__init__.py +3 -3
  48. doctr/models/classification/magc_resnet/pytorch.py +11 -15
  49. doctr/models/classification/magc_resnet/tensorflow.py +11 -14
  50. doctr/models/classification/mobilenet/__init__.py +3 -3
  51. doctr/models/classification/mobilenet/pytorch.py +20 -18
  52. doctr/models/classification/mobilenet/tensorflow.py +19 -23
  53. doctr/models/classification/predictor/__init__.py +4 -4
  54. doctr/models/classification/predictor/pytorch.py +7 -9
  55. doctr/models/classification/predictor/tensorflow.py +6 -8
  56. doctr/models/classification/resnet/__init__.py +4 -4
  57. doctr/models/classification/resnet/pytorch.py +47 -34
  58. doctr/models/classification/resnet/tensorflow.py +45 -35
  59. doctr/models/classification/textnet/__init__.py +3 -3
  60. doctr/models/classification/textnet/pytorch.py +20 -18
  61. doctr/models/classification/textnet/tensorflow.py +19 -17
  62. doctr/models/classification/vgg/__init__.py +3 -3
  63. doctr/models/classification/vgg/pytorch.py +21 -8
  64. doctr/models/classification/vgg/tensorflow.py +20 -14
  65. doctr/models/classification/vip/__init__.py +4 -0
  66. doctr/models/classification/vip/layers/__init__.py +4 -0
  67. doctr/models/classification/vip/layers/pytorch.py +615 -0
  68. doctr/models/classification/vip/pytorch.py +505 -0
  69. doctr/models/classification/vit/__init__.py +3 -3
  70. doctr/models/classification/vit/pytorch.py +18 -15
  71. doctr/models/classification/vit/tensorflow.py +15 -12
  72. doctr/models/classification/zoo.py +23 -14
  73. doctr/models/core.py +3 -3
  74. doctr/models/detection/_utils/__init__.py +4 -4
  75. doctr/models/detection/_utils/base.py +4 -7
  76. doctr/models/detection/_utils/pytorch.py +1 -5
  77. doctr/models/detection/_utils/tensorflow.py +1 -5
  78. doctr/models/detection/core.py +2 -8
  79. doctr/models/detection/differentiable_binarization/__init__.py +4 -4
  80. doctr/models/detection/differentiable_binarization/base.py +10 -21
  81. doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
  82. doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
  83. doctr/models/detection/fast/__init__.py +4 -4
  84. doctr/models/detection/fast/base.py +8 -17
  85. doctr/models/detection/fast/pytorch.py +37 -35
  86. doctr/models/detection/fast/tensorflow.py +24 -28
  87. doctr/models/detection/linknet/__init__.py +4 -4
  88. doctr/models/detection/linknet/base.py +8 -18
  89. doctr/models/detection/linknet/pytorch.py +34 -28
  90. doctr/models/detection/linknet/tensorflow.py +24 -25
  91. doctr/models/detection/predictor/__init__.py +5 -5
  92. doctr/models/detection/predictor/pytorch.py +6 -7
  93. doctr/models/detection/predictor/tensorflow.py +5 -6
  94. doctr/models/detection/zoo.py +27 -7
  95. doctr/models/factory/hub.py +6 -10
  96. doctr/models/kie_predictor/__init__.py +5 -5
  97. doctr/models/kie_predictor/base.py +4 -5
  98. doctr/models/kie_predictor/pytorch.py +19 -20
  99. doctr/models/kie_predictor/tensorflow.py +14 -15
  100. doctr/models/modules/layers/__init__.py +3 -3
  101. doctr/models/modules/layers/pytorch.py +55 -10
  102. doctr/models/modules/layers/tensorflow.py +5 -7
  103. doctr/models/modules/transformer/__init__.py +3 -3
  104. doctr/models/modules/transformer/pytorch.py +12 -13
  105. doctr/models/modules/transformer/tensorflow.py +9 -10
  106. doctr/models/modules/vision_transformer/__init__.py +3 -3
  107. doctr/models/modules/vision_transformer/pytorch.py +2 -3
  108. doctr/models/modules/vision_transformer/tensorflow.py +3 -3
  109. doctr/models/predictor/__init__.py +5 -5
  110. doctr/models/predictor/base.py +28 -29
  111. doctr/models/predictor/pytorch.py +13 -14
  112. doctr/models/predictor/tensorflow.py +9 -10
  113. doctr/models/preprocessor/__init__.py +4 -4
  114. doctr/models/preprocessor/pytorch.py +13 -17
  115. doctr/models/preprocessor/tensorflow.py +10 -14
  116. doctr/models/recognition/__init__.py +1 -0
  117. doctr/models/recognition/core.py +3 -7
  118. doctr/models/recognition/crnn/__init__.py +4 -4
  119. doctr/models/recognition/crnn/pytorch.py +30 -29
  120. doctr/models/recognition/crnn/tensorflow.py +21 -24
  121. doctr/models/recognition/master/__init__.py +3 -3
  122. doctr/models/recognition/master/base.py +3 -7
  123. doctr/models/recognition/master/pytorch.py +32 -25
  124. doctr/models/recognition/master/tensorflow.py +22 -25
  125. doctr/models/recognition/parseq/__init__.py +3 -3
  126. doctr/models/recognition/parseq/base.py +3 -7
  127. doctr/models/recognition/parseq/pytorch.py +47 -29
  128. doctr/models/recognition/parseq/tensorflow.py +29 -27
  129. doctr/models/recognition/predictor/__init__.py +5 -5
  130. doctr/models/recognition/predictor/_utils.py +111 -52
  131. doctr/models/recognition/predictor/pytorch.py +9 -9
  132. doctr/models/recognition/predictor/tensorflow.py +8 -9
  133. doctr/models/recognition/sar/__init__.py +4 -4
  134. doctr/models/recognition/sar/pytorch.py +30 -22
  135. doctr/models/recognition/sar/tensorflow.py +22 -24
  136. doctr/models/recognition/utils.py +57 -53
  137. doctr/models/recognition/viptr/__init__.py +4 -0
  138. doctr/models/recognition/viptr/pytorch.py +277 -0
  139. doctr/models/recognition/vitstr/__init__.py +4 -4
  140. doctr/models/recognition/vitstr/base.py +3 -7
  141. doctr/models/recognition/vitstr/pytorch.py +28 -21
  142. doctr/models/recognition/vitstr/tensorflow.py +22 -23
  143. doctr/models/recognition/zoo.py +27 -11
  144. doctr/models/utils/__init__.py +4 -4
  145. doctr/models/utils/pytorch.py +41 -34
  146. doctr/models/utils/tensorflow.py +31 -23
  147. doctr/models/zoo.py +1 -5
  148. doctr/transforms/functional/__init__.py +3 -3
  149. doctr/transforms/functional/base.py +4 -11
  150. doctr/transforms/functional/pytorch.py +20 -28
  151. doctr/transforms/functional/tensorflow.py +10 -22
  152. doctr/transforms/modules/__init__.py +4 -4
  153. doctr/transforms/modules/base.py +48 -55
  154. doctr/transforms/modules/pytorch.py +58 -22
  155. doctr/transforms/modules/tensorflow.py +18 -32
  156. doctr/utils/common_types.py +8 -9
  157. doctr/utils/data.py +9 -13
  158. doctr/utils/fonts.py +2 -7
  159. doctr/utils/geometry.py +17 -48
  160. doctr/utils/metrics.py +17 -37
  161. doctr/utils/multithreading.py +4 -6
  162. doctr/utils/reconstitution.py +9 -13
  163. doctr/utils/repr.py +2 -3
  164. doctr/utils/visualization.py +16 -29
  165. doctr/version.py +1 -1
  166. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
  167. python_doctr-0.12.0.dist-info/RECORD +180 -0
  168. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
  169. python_doctr-0.10.0.dist-info/RECORD +0 -173
  170. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
  171. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
  172. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0
@@ -1,11 +1,12 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  import math
7
7
  import random
8
- from typing import Any, Callable, List, Optional, Tuple, Union
8
+ from collections.abc import Callable
9
+ from typing import Any
9
10
 
10
11
  import numpy as np
11
12
 
@@ -21,37 +22,36 @@ class SampleCompose(NestedObject):
21
22
 
22
23
  .. tabs::
23
24
 
24
- .. tab:: TensorFlow
25
+ .. tab:: PyTorch
25
26
 
26
27
  .. code:: python
27
28
 
28
29
  >>> import numpy as np
29
- >>> import tensorflow as tf
30
+ >>> import torch
30
31
  >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate
31
- >>> transfo = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)])
32
- >>> out, out_boxes = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), np.zeros((2, 4)))
32
+ >>> transfos = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)])
33
+ >>> out, out_boxes = transfos(torch.rand(8, 64, 64, 3), np.zeros((2, 4)))
33
34
 
34
- .. tab:: PyTorch
35
+ .. tab:: TensorFlow
35
36
 
36
37
  .. code:: python
37
38
 
38
39
  >>> import numpy as np
39
- >>> import torch
40
+ >>> import tensorflow as tf
40
41
  >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate
41
- >>> transfos = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)])
42
- >>> out, out_boxes = transfos(torch.rand(8, 64, 64, 3), np.zeros((2, 4)))
42
+ >>> transfo = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)])
43
+ >>> out, out_boxes = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), np.zeros((2, 4)))
43
44
 
44
45
  Args:
45
- ----
46
46
  transforms: list of transformation modules
47
47
  """
48
48
 
49
- _children_names: List[str] = ["sample_transforms"]
49
+ _children_names: list[str] = ["sample_transforms"]
50
50
 
51
- def __init__(self, transforms: List[Callable[[Any, Any], Tuple[Any, Any]]]) -> None:
51
+ def __init__(self, transforms: list[Callable[[Any, Any], tuple[Any, Any]]]) -> None:
52
52
  self.sample_transforms = transforms
53
53
 
54
- def __call__(self, x: Any, target: Any) -> Tuple[Any, Any]:
54
+ def __call__(self, x: Any, target: Any) -> tuple[Any, Any]:
55
55
  for t in self.sample_transforms:
56
56
  x, target = t(x, target)
57
57
 
@@ -63,35 +63,34 @@ class ImageTransform(NestedObject):
63
63
 
64
64
  .. tabs::
65
65
 
66
- .. tab:: TensorFlow
66
+ .. tab:: PyTorch
67
67
 
68
68
  .. code:: python
69
69
 
70
- >>> import tensorflow as tf
70
+ >>> import torch
71
71
  >>> from doctr.transforms import ImageTransform, ColorInversion
72
72
  >>> transfo = ImageTransform(ColorInversion((32, 32)))
73
- >>> out, _ = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), None)
73
+ >>> out, _ = transfo(torch.rand(8, 64, 64, 3), None)
74
74
 
75
- .. tab:: PyTorch
75
+ .. tab:: TensorFlow
76
76
 
77
77
  .. code:: python
78
78
 
79
- >>> import torch
79
+ >>> import tensorflow as tf
80
80
  >>> from doctr.transforms import ImageTransform, ColorInversion
81
81
  >>> transfo = ImageTransform(ColorInversion((32, 32)))
82
- >>> out, _ = transfo(torch.rand(8, 64, 64, 3), None)
82
+ >>> out, _ = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), None)
83
83
 
84
84
  Args:
85
- ----
86
85
  transform: the image transformation module to wrap
87
86
  """
88
87
 
89
- _children_names: List[str] = ["img_transform"]
88
+ _children_names: list[str] = ["img_transform"]
90
89
 
91
90
  def __init__(self, transform: Callable[[Any], Any]) -> None:
92
91
  self.img_transform = transform
93
92
 
94
- def __call__(self, img: Any, target: Any) -> Tuple[Any, Any]:
93
+ def __call__(self, img: Any, target: Any) -> tuple[Any, Any]:
95
94
  img = self.img_transform(img)
96
95
  return img, target
97
96
 
@@ -102,26 +101,25 @@ class ColorInversion(NestedObject):
102
101
 
103
102
  .. tabs::
104
103
 
105
- .. tab:: TensorFlow
104
+ .. tab:: PyTorch
106
105
 
107
106
  .. code:: python
108
107
 
109
- >>> import tensorflow as tf
108
+ >>> import torch
110
109
  >>> from doctr.transforms import ColorInversion
111
110
  >>> transfo = ColorInversion(min_val=0.6)
112
- >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
111
+ >>> out = transfo(torch.rand(8, 64, 64, 3))
113
112
 
114
- .. tab:: PyTorch
113
+ .. tab:: TensorFlow
115
114
 
116
115
  .. code:: python
117
116
 
118
- >>> import torch
117
+ >>> import tensorflow as tf
119
118
  >>> from doctr.transforms import ColorInversion
120
119
  >>> transfo = ColorInversion(min_val=0.6)
121
- >>> out = transfo(torch.rand(8, 64, 64, 3))
120
+ >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
122
121
 
123
122
  Args:
124
- ----
125
123
  min_val: range [min_val, 1] to colorize RGB pixels
126
124
  """
127
125
 
@@ -140,35 +138,34 @@ class OneOf(NestedObject):
140
138
 
141
139
  .. tabs::
142
140
 
143
- .. tab:: TensorFlow
141
+ .. tab:: PyTorch
144
142
 
145
143
  .. code:: python
146
144
 
147
- >>> import tensorflow as tf
145
+ >>> import torch
148
146
  >>> from doctr.transforms import OneOf
149
147
  >>> transfo = OneOf([JpegQuality(), Gamma()])
150
- >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
148
+ >>> out = transfo(torch.rand(1, 64, 64, 3))
151
149
 
152
- .. tab:: PyTorch
150
+ .. tab:: TensorFlow
153
151
 
154
152
  .. code:: python
155
153
 
156
- >>> import torch
154
+ >>> import tensorflow as tf
157
155
  >>> from doctr.transforms import OneOf
158
156
  >>> transfo = OneOf([JpegQuality(), Gamma()])
159
- >>> out = transfo(torch.rand(1, 64, 64, 3))
157
+ >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
160
158
 
161
159
  Args:
162
- ----
163
160
  transforms: list of transformations, one only will be picked
164
161
  """
165
162
 
166
- _children_names: List[str] = ["transforms"]
163
+ _children_names: list[str] = ["transforms"]
167
164
 
168
- def __init__(self, transforms: List[Callable[[Any], Any]]) -> None:
165
+ def __init__(self, transforms: list[Callable[[Any], Any]]) -> None:
169
166
  self.transforms = transforms
170
167
 
171
- def __call__(self, img: Any, target: Optional[np.ndarray] = None) -> Union[Any, Tuple[Any, np.ndarray]]:
168
+ def __call__(self, img: Any, target: np.ndarray | None = None) -> Any | tuple[Any, np.ndarray]:
172
169
  # Pick transformation
173
170
  transfo = self.transforms[int(random.random() * len(self.transforms))]
174
171
  # Apply
@@ -180,26 +177,25 @@ class RandomApply(NestedObject):
180
177
 
181
178
  .. tabs::
182
179
 
183
- .. tab:: TensorFlow
180
+ .. tab:: PyTorch
184
181
 
185
182
  .. code:: python
186
183
 
187
- >>> import tensorflow as tf
184
+ >>> import torch
188
185
  >>> from doctr.transforms import RandomApply
189
186
  >>> transfo = RandomApply(Gamma(), p=.5)
190
- >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
187
+ >>> out = transfo(torch.rand(1, 64, 64, 3))
191
188
 
192
- .. tab:: PyTorch
189
+ .. tab:: TensorFlow
193
190
 
194
191
  .. code:: python
195
192
 
196
- >>> import torch
193
+ >>> import tensorflow as tf
197
194
  >>> from doctr.transforms import RandomApply
198
195
  >>> transfo = RandomApply(Gamma(), p=.5)
199
- >>> out = transfo(torch.rand(1, 64, 64, 3))
196
+ >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
200
197
 
201
198
  Args:
202
- ----
203
199
  transform: transformation to apply
204
200
  p: probability to apply
205
201
  """
@@ -211,7 +207,7 @@ class RandomApply(NestedObject):
211
207
  def extra_repr(self) -> str:
212
208
  return f"transform={self.transform}, p={self.p}"
213
209
 
214
- def __call__(self, img: Any, target: Optional[np.ndarray] = None) -> Union[Any, Tuple[Any, np.ndarray]]:
210
+ def __call__(self, img: Any, target: np.ndarray | None = None) -> Any | tuple[Any, np.ndarray]:
215
211
  if random.random() < self.p:
216
212
  return self.transform(img) if target is None else self.transform(img, target) # type: ignore[call-arg]
217
213
  return img if target is None else (img, target)
@@ -224,9 +220,7 @@ class RandomRotate(NestedObject):
224
220
  :align: center
225
221
 
226
222
  Args:
227
- ----
228
- max_angle: maximum angle for rotation, in degrees. Angles will be uniformly picked in
229
- [-max_angle, max_angle]
223
+ max_angle: maximum angle for rotation, in degrees. Angles will be uniformly picked in [-max_angle, max_angle]
230
224
  expand: whether the image should be padded before the rotation
231
225
  """
232
226
 
@@ -237,7 +231,7 @@ class RandomRotate(NestedObject):
237
231
  def extra_repr(self) -> str:
238
232
  return f"max_angle={self.max_angle}, expand={self.expand}"
239
233
 
240
- def __call__(self, img: Any, target: np.ndarray) -> Tuple[Any, np.ndarray]:
234
+ def __call__(self, img: Any, target: np.ndarray) -> tuple[Any, np.ndarray]:
241
235
  angle = random.uniform(-self.max_angle, self.max_angle)
242
236
  r_img, r_polys = F.rotate_sample(img, target, angle, self.expand)
243
237
  # Removes deleted boxes
@@ -249,19 +243,18 @@ class RandomCrop(NestedObject):
249
243
  """Randomly crop a tensor image and its boxes
250
244
 
251
245
  Args:
252
- ----
253
246
  scale: tuple of floats, relative (min_area, max_area) of the crop
254
247
  ratio: tuple of float, relative (min_ratio, max_ratio) where ratio = h/w
255
248
  """
256
249
 
257
- def __init__(self, scale: Tuple[float, float] = (0.08, 1.0), ratio: Tuple[float, float] = (0.75, 1.33)) -> None:
250
+ def __init__(self, scale: tuple[float, float] = (0.08, 1.0), ratio: tuple[float, float] = (0.75, 1.33)) -> None:
258
251
  self.scale = scale
259
252
  self.ratio = ratio
260
253
 
261
254
  def extra_repr(self) -> str:
262
255
  return f"scale={self.scale}, ratio={self.ratio}"
263
256
 
264
- def __call__(self, img: Any, target: np.ndarray) -> Tuple[Any, np.ndarray]:
257
+ def __call__(self, img: Any, target: np.ndarray) -> tuple[Any, np.ndarray]:
265
258
  scale = random.uniform(self.scale[0], self.scale[1])
266
259
  ratio = random.uniform(self.ratio[0], self.ratio[1])
267
260
 
@@ -1,21 +1,29 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  import math
7
- from typing import Optional, Tuple, Union
8
7
 
9
8
  import numpy as np
10
9
  import torch
11
10
  from PIL.Image import Image
11
+ from scipy.ndimage import gaussian_filter
12
12
  from torch.nn.functional import pad
13
13
  from torchvision.transforms import functional as F
14
14
  from torchvision.transforms import transforms as T
15
15
 
16
16
  from ..functional.pytorch import random_shadow
17
17
 
18
- __all__ = ["Resize", "GaussianNoise", "ChannelShuffle", "RandomHorizontalFlip", "RandomShadow", "RandomResize"]
18
+ __all__ = [
19
+ "Resize",
20
+ "GaussianNoise",
21
+ "ChannelShuffle",
22
+ "RandomHorizontalFlip",
23
+ "RandomShadow",
24
+ "RandomResize",
25
+ "GaussianBlur",
26
+ ]
19
27
 
20
28
 
21
29
  class Resize(T.Resize):
@@ -23,7 +31,7 @@ class Resize(T.Resize):
23
31
 
24
32
  def __init__(
25
33
  self,
26
- size: Union[int, Tuple[int, int]],
34
+ size: int | tuple[int, int],
27
35
  interpolation=F.InterpolationMode.BILINEAR,
28
36
  preserve_aspect_ratio: bool = False,
29
37
  symmetric_pad: bool = False,
@@ -38,8 +46,8 @@ class Resize(T.Resize):
38
46
  def forward(
39
47
  self,
40
48
  img: torch.Tensor,
41
- target: Optional[np.ndarray] = None,
42
- ) -> Union[torch.Tensor, Tuple[torch.Tensor, np.ndarray]]:
49
+ target: np.ndarray | None = None,
50
+ ) -> torch.Tensor | tuple[torch.Tensor, np.ndarray]:
43
51
  if isinstance(self.size, int):
44
52
  target_ratio = img.shape[-2] / img.shape[-1]
45
53
  else:
@@ -122,7 +130,6 @@ class GaussianNoise(torch.nn.Module):
122
130
  >>> out = transfo(torch.rand((3, 224, 224)))
123
131
 
124
132
  Args:
125
- ----
126
133
  mean : mean of the gaussian distribution
127
134
  std : std of the gaussian distribution
128
135
  """
@@ -136,14 +143,47 @@ class GaussianNoise(torch.nn.Module):
136
143
  # Reshape the distribution
137
144
  noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
138
145
  if x.dtype == torch.uint8:
139
- return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
146
+ return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) # type: ignore[attr-defined]
140
147
  else:
141
- return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
148
+ return (x + noise.to(dtype=x.dtype)).clamp(0, 1) # type: ignore[attr-defined]
142
149
 
143
150
  def extra_repr(self) -> str:
144
151
  return f"mean={self.mean}, std={self.std}"
145
152
 
146
153
 
154
+ class GaussianBlur(torch.nn.Module):
155
+ """Apply Gaussian Blur to the input tensor
156
+
157
+ >>> import torch
158
+ >>> from doctr.transforms import GaussianBlur
159
+ >>> transfo = GaussianBlur(sigma=(0.0, 1.0))
160
+
161
+ Args:
162
+ sigma : standard deviation range for the gaussian kernel
163
+ """
164
+
165
+ def __init__(self, sigma: tuple[float, float]) -> None:
166
+ super().__init__()
167
+ self.sigma_range = sigma
168
+
169
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
170
+ # Sample a random sigma value within the specified range
171
+ sigma = torch.empty(1).uniform_(*self.sigma_range).item()
172
+
173
+ # Apply Gaussian blur along spatial dimensions only
174
+ blurred = torch.tensor(
175
+ gaussian_filter(
176
+ x.numpy(),
177
+ sigma=sigma,
178
+ mode="reflect",
179
+ truncate=4.0,
180
+ ),
181
+ dtype=x.dtype,
182
+ device=x.device,
183
+ )
184
+ return blurred
185
+
186
+
147
187
  class ChannelShuffle(torch.nn.Module):
148
188
  """Randomly shuffle channel order of a given image"""
149
189
 
@@ -159,9 +199,7 @@ class ChannelShuffle(torch.nn.Module):
159
199
  class RandomHorizontalFlip(T.RandomHorizontalFlip):
160
200
  """Randomly flip the input image horizontally"""
161
201
 
162
- def forward(
163
- self, img: Union[torch.Tensor, Image], target: np.ndarray
164
- ) -> Tuple[Union[torch.Tensor, Image], np.ndarray]:
202
+ def forward(self, img: torch.Tensor | Image, target: np.ndarray) -> tuple[torch.Tensor | Image, np.ndarray]:
165
203
  if torch.rand(1) < self.p:
166
204
  _img = F.hflip(img)
167
205
  _target = target.copy()
@@ -183,11 +221,10 @@ class RandomShadow(torch.nn.Module):
183
221
  >>> out = transfo(torch.rand((3, 64, 64)))
184
222
 
185
223
  Args:
186
- ----
187
224
  opacity_range : minimum and maximum opacity of the shade
188
225
  """
189
226
 
190
- def __init__(self, opacity_range: Optional[Tuple[float, float]] = None) -> None:
227
+ def __init__(self, opacity_range: tuple[float, float] | None = None) -> None:
191
228
  super().__init__()
192
229
  self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (0.2, 0.8)
193
230
 
@@ -196,7 +233,7 @@ class RandomShadow(torch.nn.Module):
196
233
  try:
197
234
  if x.dtype == torch.uint8:
198
235
  return (
199
- (
236
+ ( # type: ignore[attr-defined]
200
237
  255
201
238
  * random_shadow(
202
239
  x.to(dtype=torch.float32) / 255,
@@ -225,20 +262,19 @@ class RandomResize(torch.nn.Module):
225
262
  >>> out = transfo(torch.rand((3, 64, 64)))
226
263
 
227
264
  Args:
228
- ----
229
265
  scale_range: range of the resizing factor for width and height (independently)
230
266
  preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
231
- given a float value, the aspect ratio will be preserved with this probability
267
+ given a float value, the aspect ratio will be preserved with this probability
232
268
  symmetric_pad: whether to symmetrically pad the image,
233
- given a float value, the symmetric padding will be applied with this probability
269
+ given a float value, the symmetric padding will be applied with this probability
234
270
  p: probability to apply the transformation
235
271
  """
236
272
 
237
273
  def __init__(
238
274
  self,
239
- scale_range: Tuple[float, float] = (0.3, 0.9),
240
- preserve_aspect_ratio: Union[bool, float] = False,
241
- symmetric_pad: Union[bool, float] = False,
275
+ scale_range: tuple[float, float] = (0.3, 0.9),
276
+ preserve_aspect_ratio: bool | float = False,
277
+ symmetric_pad: bool | float = False,
242
278
  p: float = 0.5,
243
279
  ) -> None:
244
280
  super().__init__()
@@ -248,7 +284,7 @@ class RandomResize(torch.nn.Module):
248
284
  self.p = p
249
285
  self._resize = Resize
250
286
 
251
- def forward(self, img: torch.Tensor, target: np.ndarray) -> Tuple[torch.Tensor, np.ndarray]:
287
+ def forward(self, img: torch.Tensor, target: np.ndarray) -> tuple[torch.Tensor, np.ndarray]:
252
288
  if torch.rand(1) < self.p:
253
289
  scale_h = np.random.uniform(*self.scale_range)
254
290
  scale_w = np.random.uniform(*self.scale_range)
@@ -1,10 +1,11 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  import random
7
- from typing import Any, Callable, Iterable, List, Optional, Tuple, Union
7
+ from collections.abc import Callable, Iterable
8
+ from typing import Any
8
9
 
9
10
  import numpy as np
10
11
  import tensorflow as tf
@@ -43,13 +44,12 @@ class Compose(NestedObject):
43
44
  >>> out = transfos(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
44
45
 
45
46
  Args:
46
- ----
47
47
  transforms: list of transformation modules
48
48
  """
49
49
 
50
- _children_names: List[str] = ["transforms"]
50
+ _children_names: list[str] = ["transforms"]
51
51
 
52
- def __init__(self, transforms: List[Callable[[Any], Any]]) -> None:
52
+ def __init__(self, transforms: list[Callable[[Any], Any]]) -> None:
53
53
  self.transforms = transforms
54
54
 
55
55
  def __call__(self, x: Any) -> Any:
@@ -68,7 +68,6 @@ class Resize(NestedObject):
68
68
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
69
69
 
70
70
  Args:
71
- ----
72
71
  output_size: expected output size
73
72
  method: interpolation method
74
73
  preserve_aspect_ratio: if `True`, preserve aspect ratio and pad the rest with zeros
@@ -77,7 +76,7 @@ class Resize(NestedObject):
77
76
 
78
77
  def __init__(
79
78
  self,
80
- output_size: Union[int, Tuple[int, int]],
79
+ output_size: int | tuple[int, int],
81
80
  method: str = "bilinear",
82
81
  preserve_aspect_ratio: bool = False,
83
82
  symmetric_pad: bool = False,
@@ -104,8 +103,8 @@ class Resize(NestedObject):
104
103
  def __call__(
105
104
  self,
106
105
  img: tf.Tensor,
107
- target: Optional[np.ndarray] = None,
108
- ) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]:
106
+ target: np.ndarray | None = None,
107
+ ) -> tf.Tensor | tuple[tf.Tensor, np.ndarray]:
109
108
  input_dtype = img.dtype
110
109
  self.output_size = (
111
110
  (self.output_size, self.output_size) if isinstance(self.output_size, int) else self.output_size
@@ -164,12 +163,11 @@ class Normalize(NestedObject):
164
163
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
165
164
 
166
165
  Args:
167
- ----
168
166
  mean: average value per channel
169
167
  std: standard deviation per channel
170
168
  """
171
169
 
172
- def __init__(self, mean: Tuple[float, float, float], std: Tuple[float, float, float]) -> None:
170
+ def __init__(self, mean: tuple[float, float, float], std: tuple[float, float, float]) -> None:
173
171
  self.mean = tf.constant(mean)
174
172
  self.std = tf.constant(std)
175
173
 
@@ -191,7 +189,6 @@ class LambdaTransformation(NestedObject):
191
189
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
192
190
 
193
191
  Args:
194
- ----
195
192
  fn: the function to be applied to the input tensor
196
193
  """
197
194
 
@@ -229,7 +226,6 @@ class RandomBrightness(NestedObject):
229
226
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
230
227
 
231
228
  Args:
232
- ----
233
229
  max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
234
230
  p: probability to apply transformation
235
231
  """
@@ -254,7 +250,6 @@ class RandomContrast(NestedObject):
254
250
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
255
251
 
256
252
  Args:
257
- ----
258
253
  delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce contrast if factor<1)
259
254
  """
260
255
 
@@ -278,7 +273,6 @@ class RandomSaturation(NestedObject):
278
273
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
279
274
 
280
275
  Args:
281
- ----
282
276
  delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce saturation if factor<1)
283
277
  """
284
278
 
@@ -301,7 +295,6 @@ class RandomHue(NestedObject):
301
295
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
302
296
 
303
297
  Args:
304
- ----
305
298
  max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
306
299
  """
307
300
 
@@ -324,7 +317,6 @@ class RandomGamma(NestedObject):
324
317
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
325
318
 
326
319
  Args:
327
- ----
328
320
  min_gamma: non-negative real number, lower bound for gamma param
329
321
  max_gamma: non-negative real number, upper bound for gamma
330
322
  min_gain: lower bound for constant multiplier
@@ -362,7 +354,6 @@ class RandomJpegQuality(NestedObject):
362
354
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
363
355
 
364
356
  Args:
365
- ----
366
357
  min_quality: int between [0, 100]
367
358
  max_quality: int between [0, 100]
368
359
  """
@@ -387,12 +378,11 @@ class GaussianBlur(NestedObject):
387
378
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
388
379
 
389
380
  Args:
390
- ----
391
381
  kernel_shape: size of the blurring kernel
392
382
  std: min and max value of the standard deviation
393
383
  """
394
384
 
395
- def __init__(self, kernel_shape: Union[int, Iterable[int]], std: Tuple[float, float]) -> None:
385
+ def __init__(self, kernel_shape: int | Iterable[int], std: tuple[float, float]) -> None:
396
386
  self.kernel_shape = kernel_shape
397
387
  self.std = std
398
388
 
@@ -430,7 +420,6 @@ class GaussianNoise(NestedObject):
430
420
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
431
421
 
432
422
  Args:
433
- ----
434
423
  mean : mean of the gaussian distribution
435
424
  std : std of the gaussian distribution
436
425
  """
@@ -465,7 +454,6 @@ class RandomHorizontalFlip(NestedObject):
465
454
  >>> out = transfo(image, target)
466
455
 
467
456
  Args:
468
- ----
469
457
  p : probability of Horizontal Flip
470
458
  """
471
459
 
@@ -473,7 +461,7 @@ class RandomHorizontalFlip(NestedObject):
473
461
  super().__init__()
474
462
  self.p = p
475
463
 
476
- def __call__(self, img: Union[tf.Tensor, np.ndarray], target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]:
464
+ def __call__(self, img: tf.Tensor | np.ndarray, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]:
477
465
  if np.random.rand(1) <= self.p:
478
466
  _img = tf.image.flip_left_right(img)
479
467
  _target = target.copy()
@@ -495,11 +483,10 @@ class RandomShadow(NestedObject):
495
483
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
496
484
 
497
485
  Args:
498
- ----
499
486
  opacity_range : minimum and maximum opacity of the shade
500
487
  """
501
488
 
502
- def __init__(self, opacity_range: Optional[Tuple[float, float]] = None) -> None:
489
+ def __init__(self, opacity_range: tuple[float, float] | None = None) -> None:
503
490
  super().__init__()
504
491
  self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (0.2, 0.8)
505
492
 
@@ -530,20 +517,19 @@ class RandomResize(NestedObject):
530
517
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
531
518
 
532
519
  Args:
533
- ----
534
520
  scale_range: range of the resizing factor for width and height (independently)
535
521
  preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
536
- given a float value, the aspect ratio will be preserved with this probability
522
+ given a float value, the aspect ratio will be preserved with this probability
537
523
  symmetric_pad: whether to symmetrically pad the image,
538
- given a float value, the symmetric padding will be applied with this probability
524
+ given a float value, the symmetric padding will be applied with this probability
539
525
  p: probability to apply the transformation
540
526
  """
541
527
 
542
528
  def __init__(
543
529
  self,
544
- scale_range: Tuple[float, float] = (0.3, 0.9),
545
- preserve_aspect_ratio: Union[bool, float] = False,
546
- symmetric_pad: Union[bool, float] = False,
530
+ scale_range: tuple[float, float] = (0.3, 0.9),
531
+ preserve_aspect_ratio: bool | float = False,
532
+ symmetric_pad: bool | float = False,
547
533
  p: float = 0.5,
548
534
  ):
549
535
  super().__init__()
@@ -553,7 +539,7 @@ class RandomResize(NestedObject):
553
539
  self.p = p
554
540
  self._resize = Resize
555
541
 
556
- def __call__(self, img: tf.Tensor, target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]:
542
+ def __call__(self, img: tf.Tensor, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]:
557
543
  if np.random.rand(1) <= self.p:
558
544
  scale_h = random.uniform(*self.scale_range)
559
545
  scale_w = random.uniform(*self.scale_range)
@@ -1,18 +1,17 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  from pathlib import Path
7
- from typing import List, Tuple, Union
8
7
 
9
8
  __all__ = ["Point2D", "BoundingBox", "Polygon4P", "Polygon", "Bbox"]
10
9
 
11
10
 
12
- Point2D = Tuple[float, float]
13
- BoundingBox = Tuple[Point2D, Point2D]
14
- Polygon4P = Tuple[Point2D, Point2D, Point2D, Point2D]
15
- Polygon = List[Point2D]
16
- AbstractPath = Union[str, Path]
17
- AbstractFile = Union[AbstractPath, bytes]
18
- Bbox = Tuple[float, float, float, float]
11
+ Point2D = tuple[float, float]
12
+ BoundingBox = tuple[Point2D, Point2D]
13
+ Polygon4P = tuple[Point2D, Point2D, Point2D, Point2D]
14
+ Polygon = list[Point2D]
15
+ AbstractPath = str | Path
16
+ AbstractFile = AbstractPath | bytes
17
+ Bbox = tuple[float, float, float, float]