python-doctr 0.12.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. doctr/__init__.py +0 -1
  2. doctr/datasets/__init__.py +0 -5
  3. doctr/datasets/datasets/__init__.py +1 -6
  4. doctr/datasets/datasets/pytorch.py +2 -2
  5. doctr/datasets/generator/__init__.py +1 -6
  6. doctr/datasets/vocabs.py +0 -2
  7. doctr/file_utils.py +2 -101
  8. doctr/io/image/__init__.py +1 -7
  9. doctr/io/image/pytorch.py +1 -1
  10. doctr/models/_utils.py +3 -3
  11. doctr/models/classification/magc_resnet/__init__.py +1 -6
  12. doctr/models/classification/magc_resnet/pytorch.py +2 -2
  13. doctr/models/classification/mobilenet/__init__.py +1 -6
  14. doctr/models/classification/predictor/__init__.py +1 -6
  15. doctr/models/classification/predictor/pytorch.py +1 -1
  16. doctr/models/classification/resnet/__init__.py +1 -6
  17. doctr/models/classification/textnet/__init__.py +1 -6
  18. doctr/models/classification/textnet/pytorch.py +1 -1
  19. doctr/models/classification/vgg/__init__.py +1 -6
  20. doctr/models/classification/vip/__init__.py +1 -4
  21. doctr/models/classification/vip/layers/__init__.py +1 -4
  22. doctr/models/classification/vip/layers/pytorch.py +1 -1
  23. doctr/models/classification/vit/__init__.py +1 -6
  24. doctr/models/classification/vit/pytorch.py +2 -2
  25. doctr/models/classification/zoo.py +6 -11
  26. doctr/models/detection/_utils/__init__.py +1 -6
  27. doctr/models/detection/core.py +1 -1
  28. doctr/models/detection/differentiable_binarization/__init__.py +1 -6
  29. doctr/models/detection/differentiable_binarization/base.py +4 -12
  30. doctr/models/detection/differentiable_binarization/pytorch.py +3 -3
  31. doctr/models/detection/fast/__init__.py +1 -6
  32. doctr/models/detection/fast/base.py +4 -14
  33. doctr/models/detection/fast/pytorch.py +4 -4
  34. doctr/models/detection/linknet/__init__.py +1 -6
  35. doctr/models/detection/linknet/base.py +3 -12
  36. doctr/models/detection/linknet/pytorch.py +2 -2
  37. doctr/models/detection/predictor/__init__.py +1 -6
  38. doctr/models/detection/predictor/pytorch.py +1 -1
  39. doctr/models/detection/zoo.py +15 -32
  40. doctr/models/factory/hub.py +8 -21
  41. doctr/models/kie_predictor/__init__.py +1 -6
  42. doctr/models/kie_predictor/pytorch.py +2 -6
  43. doctr/models/modules/layers/__init__.py +1 -6
  44. doctr/models/modules/layers/pytorch.py +3 -3
  45. doctr/models/modules/transformer/__init__.py +1 -6
  46. doctr/models/modules/transformer/pytorch.py +2 -2
  47. doctr/models/modules/vision_transformer/__init__.py +1 -6
  48. doctr/models/predictor/__init__.py +1 -6
  49. doctr/models/predictor/base.py +3 -8
  50. doctr/models/predictor/pytorch.py +2 -5
  51. doctr/models/preprocessor/__init__.py +1 -6
  52. doctr/models/preprocessor/pytorch.py +27 -32
  53. doctr/models/recognition/crnn/__init__.py +1 -6
  54. doctr/models/recognition/crnn/pytorch.py +6 -6
  55. doctr/models/recognition/master/__init__.py +1 -6
  56. doctr/models/recognition/master/pytorch.py +5 -5
  57. doctr/models/recognition/parseq/__init__.py +1 -6
  58. doctr/models/recognition/parseq/pytorch.py +5 -5
  59. doctr/models/recognition/predictor/__init__.py +1 -6
  60. doctr/models/recognition/predictor/_utils.py +7 -16
  61. doctr/models/recognition/predictor/pytorch.py +1 -2
  62. doctr/models/recognition/sar/__init__.py +1 -6
  63. doctr/models/recognition/sar/pytorch.py +3 -3
  64. doctr/models/recognition/viptr/__init__.py +1 -4
  65. doctr/models/recognition/viptr/pytorch.py +3 -3
  66. doctr/models/recognition/vitstr/__init__.py +1 -6
  67. doctr/models/recognition/vitstr/pytorch.py +3 -3
  68. doctr/models/recognition/zoo.py +13 -13
  69. doctr/models/utils/__init__.py +1 -6
  70. doctr/models/utils/pytorch.py +1 -1
  71. doctr/transforms/functional/__init__.py +1 -6
  72. doctr/transforms/functional/pytorch.py +4 -4
  73. doctr/transforms/modules/__init__.py +1 -7
  74. doctr/transforms/modules/base.py +26 -92
  75. doctr/transforms/modules/pytorch.py +28 -26
  76. doctr/utils/geometry.py +6 -10
  77. doctr/utils/visualization.py +1 -1
  78. doctr/version.py +1 -1
  79. {python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/METADATA +18 -75
  80. python_doctr-1.0.0.dist-info/RECORD +149 -0
  81. doctr/datasets/datasets/tensorflow.py +0 -59
  82. doctr/datasets/generator/tensorflow.py +0 -58
  83. doctr/datasets/loader.py +0 -94
  84. doctr/io/image/tensorflow.py +0 -101
  85. doctr/models/classification/magc_resnet/tensorflow.py +0 -196
  86. doctr/models/classification/mobilenet/tensorflow.py +0 -442
  87. doctr/models/classification/predictor/tensorflow.py +0 -60
  88. doctr/models/classification/resnet/tensorflow.py +0 -418
  89. doctr/models/classification/textnet/tensorflow.py +0 -275
  90. doctr/models/classification/vgg/tensorflow.py +0 -125
  91. doctr/models/classification/vit/tensorflow.py +0 -201
  92. doctr/models/detection/_utils/tensorflow.py +0 -34
  93. doctr/models/detection/differentiable_binarization/tensorflow.py +0 -421
  94. doctr/models/detection/fast/tensorflow.py +0 -427
  95. doctr/models/detection/linknet/tensorflow.py +0 -377
  96. doctr/models/detection/predictor/tensorflow.py +0 -70
  97. doctr/models/kie_predictor/tensorflow.py +0 -187
  98. doctr/models/modules/layers/tensorflow.py +0 -171
  99. doctr/models/modules/transformer/tensorflow.py +0 -235
  100. doctr/models/modules/vision_transformer/tensorflow.py +0 -100
  101. doctr/models/predictor/tensorflow.py +0 -155
  102. doctr/models/preprocessor/tensorflow.py +0 -122
  103. doctr/models/recognition/crnn/tensorflow.py +0 -317
  104. doctr/models/recognition/master/tensorflow.py +0 -320
  105. doctr/models/recognition/parseq/tensorflow.py +0 -516
  106. doctr/models/recognition/predictor/tensorflow.py +0 -79
  107. doctr/models/recognition/sar/tensorflow.py +0 -423
  108. doctr/models/recognition/vitstr/tensorflow.py +0 -285
  109. doctr/models/utils/tensorflow.py +0 -189
  110. doctr/transforms/functional/tensorflow.py +0 -254
  111. doctr/transforms/modules/tensorflow.py +0 -562
  112. python_doctr-0.12.0.dist-info/RECORD +0 -180
  113. {python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/WHEEL +0 -0
  114. {python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/licenses/LICENSE +0 -0
  115. {python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/top_level.txt +0 -0
  116. {python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/zip-safe +0 -0
@@ -1,562 +0,0 @@
1
- # Copyright (C) 2021-2025, Mindee.
2
-
3
- # This program is licensed under the Apache License 2.0.
4
- # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
-
6
- import random
7
- from collections.abc import Callable, Iterable
8
- from typing import Any
9
-
10
- import numpy as np
11
- import tensorflow as tf
12
-
13
- from doctr.utils.repr import NestedObject
14
-
15
- from ..functional.tensorflow import _gaussian_filter, random_shadow
16
-
17
- __all__ = [
18
- "Compose",
19
- "Resize",
20
- "Normalize",
21
- "LambdaTransformation",
22
- "ToGray",
23
- "RandomBrightness",
24
- "RandomContrast",
25
- "RandomSaturation",
26
- "RandomHue",
27
- "RandomGamma",
28
- "RandomJpegQuality",
29
- "GaussianBlur",
30
- "ChannelShuffle",
31
- "GaussianNoise",
32
- "RandomHorizontalFlip",
33
- "RandomShadow",
34
- "RandomResize",
35
- ]
36
-
37
-
38
- class Compose(NestedObject):
39
- """Implements a wrapper that will apply transformations sequentially
40
-
41
- >>> import tensorflow as tf
42
- >>> from doctr.transforms import Compose, Resize
43
- >>> transfos = Compose([Resize((32, 32))])
44
- >>> out = transfos(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
45
-
46
- Args:
47
- transforms: list of transformation modules
48
- """
49
-
50
- _children_names: list[str] = ["transforms"]
51
-
52
- def __init__(self, transforms: list[Callable[[Any], Any]]) -> None:
53
- self.transforms = transforms
54
-
55
- def __call__(self, x: Any) -> Any:
56
- for t in self.transforms:
57
- x = t(x)
58
-
59
- return x
60
-
61
-
62
- class Resize(NestedObject):
63
- """Resizes a tensor to a target size
64
-
65
- >>> import tensorflow as tf
66
- >>> from doctr.transforms import Resize
67
- >>> transfo = Resize((32, 32))
68
- >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
69
-
70
- Args:
71
- output_size: expected output size
72
- method: interpolation method
73
- preserve_aspect_ratio: if `True`, preserve aspect ratio and pad the rest with zeros
74
- symmetric_pad: if `True` while preserving aspect ratio, the padding will be done symmetrically
75
- """
76
-
77
- def __init__(
78
- self,
79
- output_size: int | tuple[int, int],
80
- method: str = "bilinear",
81
- preserve_aspect_ratio: bool = False,
82
- symmetric_pad: bool = False,
83
- ) -> None:
84
- self.output_size = output_size
85
- self.method = method
86
- self.preserve_aspect_ratio = preserve_aspect_ratio
87
- self.symmetric_pad = symmetric_pad
88
- self.antialias = True
89
-
90
- if isinstance(self.output_size, int):
91
- self.wanted_size = (self.output_size, self.output_size)
92
- elif isinstance(self.output_size, (tuple, list)):
93
- self.wanted_size = self.output_size
94
- else:
95
- raise AssertionError("Output size should be either a list, a tuple or an int")
96
-
97
- def extra_repr(self) -> str:
98
- _repr = f"output_size={self.output_size}, method='{self.method}'"
99
- if self.preserve_aspect_ratio:
100
- _repr += f", preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}"
101
- return _repr
102
-
103
- def __call__(
104
- self,
105
- img: tf.Tensor,
106
- target: np.ndarray | None = None,
107
- ) -> tf.Tensor | tuple[tf.Tensor, np.ndarray]:
108
- input_dtype = img.dtype
109
- self.output_size = (
110
- (self.output_size, self.output_size) if isinstance(self.output_size, int) else self.output_size
111
- )
112
-
113
- img = tf.image.resize(img, self.wanted_size, self.method, self.preserve_aspect_ratio, self.antialias)
114
- # It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio
115
- raw_shape = img.shape[:2]
116
- if self.symmetric_pad:
117
- half_pad = (int((self.output_size[0] - img.shape[0]) / 2), 0)
118
- if self.preserve_aspect_ratio:
119
- if isinstance(self.output_size, (tuple, list)):
120
- # In that case we need to pad because we want to enforce both width and height
121
- if not self.symmetric_pad:
122
- half_pad = (0, 0)
123
- elif self.output_size[0] == img.shape[0]:
124
- half_pad = (0, int((self.output_size[1] - img.shape[1]) / 2))
125
- # Pad image
126
- img = tf.image.pad_to_bounding_box(img, *half_pad, *self.output_size)
127
-
128
- # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio)
129
- if target is not None:
130
- if self.symmetric_pad:
131
- offset = half_pad[0] / img.shape[0], half_pad[1] / img.shape[1]
132
-
133
- if self.preserve_aspect_ratio:
134
- # Get absolute coords
135
- if target.shape[1:] == (4,):
136
- if isinstance(self.output_size, (tuple, list)) and self.symmetric_pad:
137
- target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1]
138
- target[:, [1, 3]] = offset[0] + target[:, [1, 3]] * raw_shape[0] / img.shape[0]
139
- else:
140
- target[:, [0, 2]] *= raw_shape[1] / img.shape[1]
141
- target[:, [1, 3]] *= raw_shape[0] / img.shape[0]
142
- elif target.shape[1:] == (4, 2):
143
- if isinstance(self.output_size, (tuple, list)) and self.symmetric_pad:
144
- target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1]
145
- target[..., 1] = offset[0] + target[..., 1] * raw_shape[0] / img.shape[0]
146
- else:
147
- target[..., 0] *= raw_shape[1] / img.shape[1]
148
- target[..., 1] *= raw_shape[0] / img.shape[0]
149
- else:
150
- raise AssertionError("Boxes should be in the format (n_boxes, 4, 2) or (n_boxes, 4)")
151
-
152
- return tf.cast(img, dtype=input_dtype), np.clip(target, 0, 1)
153
-
154
- return tf.cast(img, dtype=input_dtype)
155
-
156
-
157
- class Normalize(NestedObject):
158
- """Normalize a tensor to a Gaussian distribution for each channel
159
-
160
- >>> import tensorflow as tf
161
- >>> from doctr.transforms import Normalize
162
- >>> transfo = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
163
- >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
164
-
165
- Args:
166
- mean: average value per channel
167
- std: standard deviation per channel
168
- """
169
-
170
- def __init__(self, mean: tuple[float, float, float], std: tuple[float, float, float]) -> None:
171
- self.mean = tf.constant(mean)
172
- self.std = tf.constant(std)
173
-
174
- def extra_repr(self) -> str:
175
- return f"mean={self.mean.numpy().tolist()}, std={self.std.numpy().tolist()}"
176
-
177
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
178
- img -= tf.cast(self.mean, dtype=img.dtype)
179
- img /= tf.cast(self.std, dtype=img.dtype)
180
- return img
181
-
182
-
183
- class LambdaTransformation(NestedObject):
184
- """Normalize a tensor to a Gaussian distribution for each channel
185
-
186
- >>> import tensorflow as tf
187
- >>> from doctr.transforms import LambdaTransformation
188
- >>> transfo = LambdaTransformation(lambda x: x/ 255.)
189
- >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
190
-
191
- Args:
192
- fn: the function to be applied to the input tensor
193
- """
194
-
195
- def __init__(self, fn: Callable[[tf.Tensor], tf.Tensor]) -> None:
196
- self.fn = fn
197
-
198
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
199
- return self.fn(img)
200
-
201
-
202
- class ToGray(NestedObject):
203
- """Convert a RGB tensor (batch of images or image) to a 3-channels grayscale tensor
204
-
205
- >>> import tensorflow as tf
206
- >>> from doctr.transforms import ToGray
207
- >>> transfo = ToGray()
208
- >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
209
- """
210
-
211
- def __init__(self, num_output_channels: int = 1):
212
- self.num_output_channels = num_output_channels
213
-
214
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
215
- img = tf.image.rgb_to_grayscale(img)
216
- return img if self.num_output_channels == 1 else tf.repeat(img, self.num_output_channels, axis=-1)
217
-
218
-
219
- class RandomBrightness(NestedObject):
220
- """Randomly adjust brightness of a tensor (batch of images or image) by adding a delta
221
- to all pixels
222
-
223
- >>> import tensorflow as tf
224
- >>> from doctr.transforms import RandomBrightness
225
- >>> transfo = RandomBrightness()
226
- >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
227
-
228
- Args:
229
- max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
230
- p: probability to apply transformation
231
- """
232
-
233
- def __init__(self, max_delta: float = 0.3) -> None:
234
- self.max_delta = max_delta
235
-
236
- def extra_repr(self) -> str:
237
- return f"max_delta={self.max_delta}"
238
-
239
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
240
- return tf.image.random_brightness(img, max_delta=self.max_delta)
241
-
242
-
243
- class RandomContrast(NestedObject):
244
- """Randomly adjust contrast of a tensor (batch of images or image) by adjusting
245
- each pixel: (img - mean) * contrast_factor + mean.
246
-
247
- >>> import tensorflow as tf
248
- >>> from doctr.transforms import RandomContrast
249
- >>> transfo = RandomContrast()
250
- >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
251
-
252
- Args:
253
- delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce contrast if factor<1)
254
- """
255
-
256
- def __init__(self, delta: float = 0.3) -> None:
257
- self.delta = delta
258
-
259
- def extra_repr(self) -> str:
260
- return f"delta={self.delta}"
261
-
262
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
263
- return tf.image.random_contrast(img, lower=1 - self.delta, upper=1 / (1 - self.delta))
264
-
265
-
266
- class RandomSaturation(NestedObject):
267
- """Randomly adjust saturation of a tensor (batch of images or image) by converting to HSV and
268
- increasing saturation by a factor.
269
-
270
- >>> import tensorflow as tf
271
- >>> from doctr.transforms import RandomSaturation
272
- >>> transfo = RandomSaturation()
273
- >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
274
-
275
- Args:
276
- delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce saturation if factor<1)
277
- """
278
-
279
- def __init__(self, delta: float = 0.5) -> None:
280
- self.delta = delta
281
-
282
- def extra_repr(self) -> str:
283
- return f"delta={self.delta}"
284
-
285
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
286
- return tf.image.random_saturation(img, lower=1 - self.delta, upper=1 + self.delta)
287
-
288
-
289
- class RandomHue(NestedObject):
290
- """Randomly adjust hue of a tensor (batch of images or image) by converting to HSV and adding a delta
291
-
292
- >>> import tensorflow as tf
293
- >>> from doctr.transforms import RandomHue
294
- >>> transfo = RandomHue()
295
- >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
296
-
297
- Args:
298
- max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
299
- """
300
-
301
- def __init__(self, max_delta: float = 0.3) -> None:
302
- self.max_delta = max_delta
303
-
304
- def extra_repr(self) -> str:
305
- return f"max_delta={self.max_delta}"
306
-
307
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
308
- return tf.image.random_hue(img, max_delta=self.max_delta)
309
-
310
-
311
- class RandomGamma(NestedObject):
312
- """randomly performs gamma correction for a tensor (batch of images or image)
313
-
314
- >>> import tensorflow as tf
315
- >>> from doctr.transforms import RandomGamma
316
- >>> transfo = RandomGamma()
317
- >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
318
-
319
- Args:
320
- min_gamma: non-negative real number, lower bound for gamma param
321
- max_gamma: non-negative real number, upper bound for gamma
322
- min_gain: lower bound for constant multiplier
323
- max_gain: upper bound for constant multiplier
324
- """
325
-
326
- def __init__(
327
- self,
328
- min_gamma: float = 0.5,
329
- max_gamma: float = 1.5,
330
- min_gain: float = 0.8,
331
- max_gain: float = 1.2,
332
- ) -> None:
333
- self.min_gamma = min_gamma
334
- self.max_gamma = max_gamma
335
- self.min_gain = min_gain
336
- self.max_gain = max_gain
337
-
338
- def extra_repr(self) -> str:
339
- return f"""gamma_range=({self.min_gamma}, {self.max_gamma}),
340
- gain_range=({self.min_gain}, {self.max_gain})"""
341
-
342
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
343
- gamma = random.uniform(self.min_gamma, self.max_gamma)
344
- gain = random.uniform(self.min_gain, self.max_gain)
345
- return tf.image.adjust_gamma(img, gamma=gamma, gain=gain)
346
-
347
-
348
- class RandomJpegQuality(NestedObject):
349
- """Randomly adjust jpeg quality of a 3 dimensional RGB image
350
-
351
- >>> import tensorflow as tf
352
- >>> from doctr.transforms import RandomJpegQuality
353
- >>> transfo = RandomJpegQuality()
354
- >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
355
-
356
- Args:
357
- min_quality: int between [0, 100]
358
- max_quality: int between [0, 100]
359
- """
360
-
361
- def __init__(self, min_quality: int = 60, max_quality: int = 100) -> None:
362
- self.min_quality = min_quality
363
- self.max_quality = max_quality
364
-
365
- def extra_repr(self) -> str:
366
- return f"min_quality={self.min_quality}"
367
-
368
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
369
- return tf.image.random_jpeg_quality(img, min_jpeg_quality=self.min_quality, max_jpeg_quality=self.max_quality)
370
-
371
-
372
- class GaussianBlur(NestedObject):
373
- """Randomly adjust jpeg quality of a 3 dimensional RGB image
374
-
375
- >>> import tensorflow as tf
376
- >>> from doctr.transforms import GaussianBlur
377
- >>> transfo = GaussianBlur(3, (.1, 5))
378
- >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
379
-
380
- Args:
381
- kernel_shape: size of the blurring kernel
382
- std: min and max value of the standard deviation
383
- """
384
-
385
- def __init__(self, kernel_shape: int | Iterable[int], std: tuple[float, float]) -> None:
386
- self.kernel_shape = kernel_shape
387
- self.std = std
388
-
389
- def extra_repr(self) -> str:
390
- return f"kernel_shape={self.kernel_shape}, std={self.std}"
391
-
392
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
393
- return tf.squeeze(
394
- _gaussian_filter(
395
- img[tf.newaxis, ...],
396
- kernel_size=self.kernel_shape,
397
- sigma=random.uniform(self.std[0], self.std[1]),
398
- mode="REFLECT",
399
- ),
400
- axis=0,
401
- )
402
-
403
-
404
- class ChannelShuffle(NestedObject):
405
- """Randomly shuffle channel order of a given image"""
406
-
407
- def __init__(self):
408
- pass
409
-
410
- def __call__(self, img: tf.Tensor) -> tf.Tensor:
411
- return tf.transpose(tf.random.shuffle(tf.transpose(img, perm=[2, 0, 1])), perm=[1, 2, 0])
412
-
413
-
414
- class GaussianNoise(NestedObject):
415
- """Adds Gaussian Noise to the input tensor
416
-
417
- >>> import tensorflow as tf
418
- >>> from doctr.transforms import GaussianNoise
419
- >>> transfo = GaussianNoise(0., 1.)
420
- >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
421
-
422
- Args:
423
- mean : mean of the gaussian distribution
424
- std : std of the gaussian distribution
425
- """
426
-
427
- def __init__(self, mean: float = 0.0, std: float = 1.0) -> None:
428
- super().__init__()
429
- self.std = std
430
- self.mean = mean
431
-
432
- def __call__(self, x: tf.Tensor) -> tf.Tensor:
433
- # Reshape the distribution
434
- noise = self.mean + 2 * self.std * tf.random.uniform(x.shape) - self.std
435
- if x.dtype == tf.uint8:
436
- return tf.cast(
437
- tf.clip_by_value(tf.math.round(tf.cast(x, dtype=tf.float32) + 255 * noise), 0, 255), dtype=tf.uint8
438
- )
439
- else:
440
- return tf.cast(tf.clip_by_value(x + noise, 0, 1), dtype=x.dtype)
441
-
442
- def extra_repr(self) -> str:
443
- return f"mean={self.mean}, std={self.std}"
444
-
445
-
446
- class RandomHorizontalFlip(NestedObject):
447
- """Adds random horizontal flip to the input tensor/np.ndarray
448
-
449
- >>> import tensorflow as tf
450
- >>> from doctr.transforms import RandomHorizontalFlip
451
- >>> transfo = RandomHorizontalFlip(p=0.5)
452
- >>> image = tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)
453
- >>> target = np.array([[0.1, 0.1, 0.4, 0.5] ], dtype= np.float32)
454
- >>> out = transfo(image, target)
455
-
456
- Args:
457
- p : probability of Horizontal Flip
458
- """
459
-
460
- def __init__(self, p: float) -> None:
461
- super().__init__()
462
- self.p = p
463
-
464
- def __call__(self, img: tf.Tensor | np.ndarray, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]:
465
- if np.random.rand(1) <= self.p:
466
- _img = tf.image.flip_left_right(img)
467
- _target = target.copy()
468
- # Changing the relative bbox coordinates
469
- if target.shape[1:] == (4,):
470
- _target[:, ::2] = 1 - target[:, [2, 0]]
471
- else:
472
- _target[..., 0] = 1 - target[..., 0]
473
- return _img, _target
474
- return img, target
475
-
476
-
477
- class RandomShadow(NestedObject):
478
- """Adds random shade to the input image
479
-
480
- >>> import tensorflow as tf
481
- >>> from doctr.transforms import RandomShadow
482
- >>> transfo = RandomShadow(0., 1.)
483
- >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
484
-
485
- Args:
486
- opacity_range : minimum and maximum opacity of the shade
487
- """
488
-
489
- def __init__(self, opacity_range: tuple[float, float] | None = None) -> None:
490
- super().__init__()
491
- self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (0.2, 0.8)
492
-
493
- def __call__(self, x: tf.Tensor) -> tf.Tensor:
494
- # Reshape the distribution
495
- if x.dtype == tf.uint8:
496
- return tf.cast(
497
- tf.clip_by_value(
498
- tf.math.round(255 * random_shadow(tf.cast(x, dtype=tf.float32) / 255, self.opacity_range)),
499
- 0,
500
- 255,
501
- ),
502
- dtype=tf.uint8,
503
- )
504
- else:
505
- return tf.clip_by_value(random_shadow(x, self.opacity_range), 0, 1)
506
-
507
- def extra_repr(self) -> str:
508
- return f"opacity_range={self.opacity_range}"
509
-
510
-
511
- class RandomResize(NestedObject):
512
- """Randomly resize the input image and align corresponding targets
513
-
514
- >>> import tensorflow as tf
515
- >>> from doctr.transforms import RandomResize
516
- >>> transfo = RandomResize((0.3, 0.9), preserve_aspect_ratio=True, symmetric_pad=True, p=0.5)
517
- >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
518
-
519
- Args:
520
- scale_range: range of the resizing factor for width and height (independently)
521
- preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
522
- given a float value, the aspect ratio will be preserved with this probability
523
- symmetric_pad: whether to symmetrically pad the image,
524
- given a float value, the symmetric padding will be applied with this probability
525
- p: probability to apply the transformation
526
- """
527
-
528
- def __init__(
529
- self,
530
- scale_range: tuple[float, float] = (0.3, 0.9),
531
- preserve_aspect_ratio: bool | float = False,
532
- symmetric_pad: bool | float = False,
533
- p: float = 0.5,
534
- ):
535
- super().__init__()
536
- self.scale_range = scale_range
537
- self.preserve_aspect_ratio = preserve_aspect_ratio
538
- self.symmetric_pad = symmetric_pad
539
- self.p = p
540
- self._resize = Resize
541
-
542
- def __call__(self, img: tf.Tensor, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]:
543
- if np.random.rand(1) <= self.p:
544
- scale_h = random.uniform(*self.scale_range)
545
- scale_w = random.uniform(*self.scale_range)
546
- new_size = (int(img.shape[-3] * scale_h), int(img.shape[-2] * scale_w))
547
-
548
- _img, _target = self._resize(
549
- new_size,
550
- preserve_aspect_ratio=self.preserve_aspect_ratio
551
- if isinstance(self.preserve_aspect_ratio, bool)
552
- else bool(np.random.rand(1) <= self.symmetric_pad),
553
- symmetric_pad=self.symmetric_pad
554
- if isinstance(self.symmetric_pad, bool)
555
- else bool(np.random.rand(1) <= self.symmetric_pad),
556
- )(img, target)
557
-
558
- return _img, _target
559
- return img, target
560
-
561
- def extra_repr(self) -> str:
562
- return f"scale_range={self.scale_range}, preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}, p={self.p}" # noqa: E501