python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. doctr/datasets/__init__.py +2 -0
  2. doctr/datasets/cord.py +6 -4
  3. doctr/datasets/datasets/base.py +3 -2
  4. doctr/datasets/datasets/pytorch.py +4 -2
  5. doctr/datasets/datasets/tensorflow.py +4 -2
  6. doctr/datasets/detection.py +6 -3
  7. doctr/datasets/doc_artefacts.py +2 -1
  8. doctr/datasets/funsd.py +7 -8
  9. doctr/datasets/generator/base.py +3 -2
  10. doctr/datasets/generator/pytorch.py +3 -1
  11. doctr/datasets/generator/tensorflow.py +3 -1
  12. doctr/datasets/ic03.py +3 -2
  13. doctr/datasets/ic13.py +2 -1
  14. doctr/datasets/iiit5k.py +6 -4
  15. doctr/datasets/iiithws.py +2 -1
  16. doctr/datasets/imgur5k.py +3 -2
  17. doctr/datasets/loader.py +4 -2
  18. doctr/datasets/mjsynth.py +2 -1
  19. doctr/datasets/ocr.py +2 -1
  20. doctr/datasets/orientation.py +40 -0
  21. doctr/datasets/recognition.py +3 -2
  22. doctr/datasets/sroie.py +2 -1
  23. doctr/datasets/svhn.py +2 -1
  24. doctr/datasets/svt.py +3 -2
  25. doctr/datasets/synthtext.py +2 -1
  26. doctr/datasets/utils.py +27 -11
  27. doctr/datasets/vocabs.py +26 -1
  28. doctr/datasets/wildreceipt.py +111 -0
  29. doctr/file_utils.py +3 -1
  30. doctr/io/elements.py +52 -35
  31. doctr/io/html.py +5 -3
  32. doctr/io/image/base.py +5 -4
  33. doctr/io/image/pytorch.py +12 -7
  34. doctr/io/image/tensorflow.py +11 -6
  35. doctr/io/pdf.py +5 -4
  36. doctr/io/reader.py +13 -5
  37. doctr/models/_utils.py +30 -53
  38. doctr/models/artefacts/barcode.py +4 -3
  39. doctr/models/artefacts/face.py +4 -2
  40. doctr/models/builder.py +58 -43
  41. doctr/models/classification/__init__.py +1 -0
  42. doctr/models/classification/magc_resnet/pytorch.py +5 -2
  43. doctr/models/classification/magc_resnet/tensorflow.py +5 -2
  44. doctr/models/classification/mobilenet/pytorch.py +16 -4
  45. doctr/models/classification/mobilenet/tensorflow.py +29 -20
  46. doctr/models/classification/predictor/pytorch.py +3 -2
  47. doctr/models/classification/predictor/tensorflow.py +2 -1
  48. doctr/models/classification/resnet/pytorch.py +23 -13
  49. doctr/models/classification/resnet/tensorflow.py +33 -26
  50. doctr/models/classification/textnet/__init__.py +6 -0
  51. doctr/models/classification/textnet/pytorch.py +275 -0
  52. doctr/models/classification/textnet/tensorflow.py +267 -0
  53. doctr/models/classification/vgg/pytorch.py +4 -2
  54. doctr/models/classification/vgg/tensorflow.py +5 -2
  55. doctr/models/classification/vit/pytorch.py +9 -3
  56. doctr/models/classification/vit/tensorflow.py +9 -3
  57. doctr/models/classification/zoo.py +7 -2
  58. doctr/models/core.py +1 -1
  59. doctr/models/detection/__init__.py +1 -0
  60. doctr/models/detection/_utils/pytorch.py +7 -1
  61. doctr/models/detection/_utils/tensorflow.py +7 -3
  62. doctr/models/detection/core.py +9 -3
  63. doctr/models/detection/differentiable_binarization/base.py +37 -25
  64. doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
  65. doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
  66. doctr/models/detection/fast/__init__.py +6 -0
  67. doctr/models/detection/fast/base.py +256 -0
  68. doctr/models/detection/fast/pytorch.py +442 -0
  69. doctr/models/detection/fast/tensorflow.py +428 -0
  70. doctr/models/detection/linknet/base.py +12 -5
  71. doctr/models/detection/linknet/pytorch.py +28 -15
  72. doctr/models/detection/linknet/tensorflow.py +68 -88
  73. doctr/models/detection/predictor/pytorch.py +16 -6
  74. doctr/models/detection/predictor/tensorflow.py +13 -5
  75. doctr/models/detection/zoo.py +19 -16
  76. doctr/models/factory/hub.py +20 -10
  77. doctr/models/kie_predictor/base.py +2 -1
  78. doctr/models/kie_predictor/pytorch.py +28 -36
  79. doctr/models/kie_predictor/tensorflow.py +27 -27
  80. doctr/models/modules/__init__.py +1 -0
  81. doctr/models/modules/layers/__init__.py +6 -0
  82. doctr/models/modules/layers/pytorch.py +166 -0
  83. doctr/models/modules/layers/tensorflow.py +175 -0
  84. doctr/models/modules/transformer/pytorch.py +24 -22
  85. doctr/models/modules/transformer/tensorflow.py +6 -4
  86. doctr/models/modules/vision_transformer/pytorch.py +2 -4
  87. doctr/models/modules/vision_transformer/tensorflow.py +2 -4
  88. doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
  89. doctr/models/predictor/base.py +14 -3
  90. doctr/models/predictor/pytorch.py +26 -29
  91. doctr/models/predictor/tensorflow.py +25 -22
  92. doctr/models/preprocessor/pytorch.py +14 -9
  93. doctr/models/preprocessor/tensorflow.py +10 -5
  94. doctr/models/recognition/core.py +4 -1
  95. doctr/models/recognition/crnn/pytorch.py +23 -16
  96. doctr/models/recognition/crnn/tensorflow.py +25 -17
  97. doctr/models/recognition/master/base.py +4 -1
  98. doctr/models/recognition/master/pytorch.py +20 -9
  99. doctr/models/recognition/master/tensorflow.py +20 -8
  100. doctr/models/recognition/parseq/base.py +4 -1
  101. doctr/models/recognition/parseq/pytorch.py +28 -22
  102. doctr/models/recognition/parseq/tensorflow.py +22 -11
  103. doctr/models/recognition/predictor/_utils.py +3 -2
  104. doctr/models/recognition/predictor/pytorch.py +3 -2
  105. doctr/models/recognition/predictor/tensorflow.py +2 -1
  106. doctr/models/recognition/sar/pytorch.py +14 -7
  107. doctr/models/recognition/sar/tensorflow.py +23 -14
  108. doctr/models/recognition/utils.py +5 -1
  109. doctr/models/recognition/vitstr/base.py +4 -1
  110. doctr/models/recognition/vitstr/pytorch.py +22 -13
  111. doctr/models/recognition/vitstr/tensorflow.py +21 -10
  112. doctr/models/recognition/zoo.py +4 -2
  113. doctr/models/utils/pytorch.py +24 -6
  114. doctr/models/utils/tensorflow.py +22 -3
  115. doctr/models/zoo.py +21 -3
  116. doctr/transforms/functional/base.py +8 -3
  117. doctr/transforms/functional/pytorch.py +23 -6
  118. doctr/transforms/functional/tensorflow.py +25 -5
  119. doctr/transforms/modules/base.py +12 -5
  120. doctr/transforms/modules/pytorch.py +10 -12
  121. doctr/transforms/modules/tensorflow.py +17 -9
  122. doctr/utils/common_types.py +1 -1
  123. doctr/utils/data.py +4 -2
  124. doctr/utils/fonts.py +3 -2
  125. doctr/utils/geometry.py +95 -26
  126. doctr/utils/metrics.py +36 -22
  127. doctr/utils/multithreading.py +5 -3
  128. doctr/utils/repr.py +3 -1
  129. doctr/utils/visualization.py +31 -8
  130. doctr/version.py +1 -1
  131. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
  132. python_doctr-0.8.1.dist-info/RECORD +173 -0
  133. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
  134. python_doctr-0.7.0.dist-info/RECORD +0 -161
  135. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
  136. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
  137. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -18,15 +18,26 @@ __all__ = ["invert_colors", "rotate_sample", "crop_detection", "random_shadow"]
18
18
 
19
19
 
20
20
  def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor:
21
+ """Invert the colors of an image
22
+
23
+ Args:
24
+ ----
25
+ img : torch.Tensor, the image to invert
26
+ min_val : minimum value of the random shift
27
+
28
+ Returns:
29
+ -------
30
+ the inverted image
31
+ """
21
32
  out = F.rgb_to_grayscale(img, num_output_channels=3)
22
33
  # Random RGB shift
23
34
  shift_shape = [img.shape[0], 3, 1, 1] if img.ndim == 4 else [3, 1, 1]
24
35
  rgb_shift = min_val + (1 - min_val) * torch.rand(shift_shape)
25
36
  # Inverse the color
26
37
  if out.dtype == torch.uint8:
27
- out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)
38
+ out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8) # type: ignore[attr-defined]
28
39
  else:
29
- out = out * rgb_shift.to(dtype=out.dtype)
40
+ out = out * rgb_shift.to(dtype=out.dtype) # type: ignore[attr-defined]
30
41
  # Inverse the color
31
42
  out = 255 - out if out.dtype == torch.uint8 else 1 - out
32
43
  return out
@@ -41,12 +52,14 @@ def rotate_sample(
41
52
  """Rotate image around the center, interpolation=NEAREST, pad with 0 (black)
42
53
 
43
54
  Args:
55
+ ----
44
56
  img: image to rotate
45
57
  geoms: array of geometries of shape (N, 4) or (N, 4, 2)
46
58
  angle: angle in degrees. +: counter-clockwise, -: clockwise
47
59
  expand: whether the image should be padded before the rotation
48
60
 
49
61
  Returns:
62
+ -------
50
63
  A tuple of rotated img (tensor), rotated geometries of shape (N, 4, 2)
51
64
  """
52
65
  rotated_img = F.rotate(img, angle=angle, fill=0, expand=expand) # Interpolation NEAREST by default
@@ -68,7 +81,7 @@ def rotate_sample(
68
81
  rotated_geoms: np.ndarray = rotate_abs_geoms(
69
82
  _geoms,
70
83
  angle,
71
- img.shape[1:], # type: ignore[arg-type]
84
+ img.shape[1:],
72
85
  expand,
73
86
  ).astype(np.float32)
74
87
 
@@ -85,11 +98,13 @@ def crop_detection(
85
98
  """Crop and image and associated bboxes
86
99
 
87
100
  Args:
101
+ ----
88
102
  img: image to crop
89
103
  boxes: array of boxes to clip, absolute (int) or relative (float)
90
104
  crop_box: box (xmin, ymin, xmax, ymax) to crop the image. Relative coords.
91
105
 
92
106
  Returns:
107
+ -------
93
108
  A tuple of cropped image, cropped boxes, where the image is not resized.
94
109
  """
95
110
  if any(val < 0 or val > 1 for val in crop_box):
@@ -108,14 +123,16 @@ def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwarg
108
123
  """Crop and image and associated bboxes
109
124
 
110
125
  Args:
126
+ ----
111
127
  img: image to modify
112
128
  opacity_range: the minimum and maximum desired opacity of the shadow
129
+ **kwargs: additional arguments to pass to `create_shadow_mask`
113
130
 
114
131
  Returns:
132
+ -------
115
133
  shaded image
116
134
  """
117
-
118
- shadow_mask = create_shadow_mask(img.shape[1:], **kwargs) # type: ignore[arg-type]
135
+ shadow_mask = create_shadow_mask(img.shape[1:], **kwargs)
119
136
 
120
137
  opacity = np.random.uniform(*opacity_range)
121
138
  shadow_tensor = 1 - torch.from_numpy(shadow_mask[None, ...])
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -15,10 +15,21 @@ from doctr.utils.geometry import compute_expanded_shape, rotate_abs_geoms
15
15
 
16
16
  from .base import create_shadow_mask, crop_boxes
17
17
 
18
- __all__ = ["invert_colors", "rotate_sample", "crop_detection", "random_shadow"]
18
+ __all__ = ["invert_colors", "rotate_sample", "crop_detection", "random_shadow", "rotated_img_tensor"]
19
19
 
20
20
 
21
21
  def invert_colors(img: tf.Tensor, min_val: float = 0.6) -> tf.Tensor:
22
+ """Invert the colors of an image
23
+
24
+ Args:
25
+ ----
26
+ img : tf.Tensor, the image to invert
27
+ min_val : minimum value of the random shift
28
+
29
+ Returns:
30
+ -------
31
+ the inverted image
32
+ """
22
33
  out = tf.image.rgb_to_grayscale(img) # Convert to gray
23
34
  # Random RGB shift
24
35
  shift_shape = [img.shape[0], 1, 1, 3] if img.ndim == 4 else [1, 1, 3]
@@ -37,11 +48,13 @@ def rotated_img_tensor(img: tf.Tensor, angle: float, expand: bool = False) -> tf
37
48
  """Rotate image around the center, interpolation=NEAREST, pad with 0 (black)
38
49
 
39
50
  Args:
51
+ ----
40
52
  img: image to rotate
41
53
  angle: angle in degrees. +: counter-clockwise, -: clockwise
42
54
  expand: whether the image should be padded before the rotation
43
55
 
44
56
  Returns:
57
+ -------
45
58
  the rotated image (tensor)
46
59
  """
47
60
  # Compute the expanded padding
@@ -94,12 +107,14 @@ def rotate_sample(
94
107
  """Rotate image around the center, interpolation=NEAREST, pad with 0 (black)
95
108
 
96
109
  Args:
110
+ ----
97
111
  img: image to rotate
98
112
  geoms: array of geometries of shape (N, 4) or (N, 4, 2)
99
113
  angle: angle in degrees. +: counter-clockwise, -: clockwise
100
114
  expand: whether the image should be padded before the rotation
101
115
 
102
116
  Returns:
117
+ -------
103
118
  A tuple of rotated img (tensor), rotated boxes (np array)
104
119
  """
105
120
  # Rotated the image
@@ -134,11 +149,13 @@ def crop_detection(
134
149
  """Crop and image and associated bboxes
135
150
 
136
151
  Args:
152
+ ----
137
153
  img: image to crop
138
154
  boxes: array of boxes to clip, absolute (int) or relative (float)
139
155
  crop_box: box (xmin, ymin, xmax, ymax) to crop the image. Relative coords.
140
156
 
141
157
  Returns:
158
+ -------
142
159
  A tuple of cropped image, cropped boxes, where the image is not resized.
143
160
  """
144
161
  if any(val < 0 or val > 1 for val in crop_box):
@@ -164,14 +181,15 @@ def _gaussian_filter(
164
181
  Adapted from: https://github.com/tensorflow/addons/blob/master/tensorflow_addons/image/filters.py
165
182
 
166
183
  Args:
167
-
168
- input: image to filter of shape (N, H, W, C)
184
+ ----
185
+ img: image to filter of shape (N, H, W, C)
169
186
  kernel_size: kernel size of the filter
170
187
  sigma: standard deviation of the Gaussian filter
171
188
  mode: padding mode, one of "CONSTANT", "REFLECT", "SYMMETRIC"
172
189
  pad_value: value to pad the image with
173
190
 
174
191
  Returns:
192
+ -------
175
193
  A tensor of shape (N, H, W, C)
176
194
  """
177
195
  ksize = tf.convert_to_tensor(tf.broadcast_to(kernel_size, [2]), dtype=tf.int32)
@@ -221,13 +239,15 @@ def random_shadow(img: tf.Tensor, opacity_range: Tuple[float, float], **kwargs)
221
239
  """Apply a random shadow to a given image
222
240
 
223
241
  Args:
242
+ ----
224
243
  img: image to modify
225
244
  opacity_range: the minimum and maximum desired opacity of the shadow
245
+ **kwargs: additional arguments to pass to `create_shadow_mask`
226
246
 
227
247
  Returns:
248
+ -------
228
249
  shadowed image
229
250
  """
230
-
231
251
  shadow_mask = create_shadow_mask(img.shape[:2], **kwargs)
232
252
 
233
253
  opacity = np.random.uniform(*opacity_range)
@@ -1,11 +1,11 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  import math
7
7
  import random
8
- from typing import Any, Callable, Dict, List, Tuple
8
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
9
9
 
10
10
  import numpy as np
11
11
 
@@ -42,6 +42,7 @@ class SampleCompose(NestedObject):
42
42
  >>> out, out_boxes = transfos(torch.rand(8, 64, 64, 3), np.zeros((2, 4)))
43
43
 
44
44
  Args:
45
+ ----
45
46
  transforms: list of transformation modules
46
47
  """
47
48
 
@@ -81,6 +82,7 @@ class ImageTransform(NestedObject):
81
82
  >>> out, _ = transfo(torch.rand(8, 64, 64, 3), None)
82
83
 
83
84
  Args:
85
+ ----
84
86
  transform: the image transformation module to wrap
85
87
  """
86
88
 
@@ -119,6 +121,7 @@ class ColorInversion(NestedObject):
119
121
  >>> out = transfo(torch.rand(8, 64, 64, 3))
120
122
 
121
123
  Args:
124
+ ----
122
125
  min_val: range [min_val, 1] to colorize RGB pixels
123
126
  """
124
127
 
@@ -156,6 +159,7 @@ class OneOf(NestedObject):
156
159
  >>> out = transfo(torch.rand(1, 64, 64, 3))
157
160
 
158
161
  Args:
162
+ ----
159
163
  transforms: list of transformations, one only will be picked
160
164
  """
161
165
 
@@ -195,6 +199,7 @@ class RandomApply(NestedObject):
195
199
  >>> out = transfo(torch.rand(1, 64, 64, 3))
196
200
 
197
201
  Args:
202
+ ----
198
203
  transform: transformation to apply
199
204
  p: probability to apply
200
205
  """
@@ -206,10 +211,10 @@ class RandomApply(NestedObject):
206
211
  def extra_repr(self) -> str:
207
212
  return f"transform={self.transform}, p={self.p}"
208
213
 
209
- def __call__(self, img: Any) -> Any:
214
+ def __call__(self, img: Any, target: Optional[np.ndarray] = None) -> Union[Any, Tuple[Any, np.ndarray]]:
210
215
  if random.random() < self.p:
211
- return self.transform(img)
212
- return img
216
+ return self.transform(img) if target is None else self.transform(img, target) # type: ignore[call-arg]
217
+ return img if target is None else (img, target)
213
218
 
214
219
 
215
220
  class RandomRotate(NestedObject):
@@ -219,6 +224,7 @@ class RandomRotate(NestedObject):
219
224
  :align: center
220
225
 
221
226
  Args:
227
+ ----
222
228
  max_angle: maximum angle for rotation, in degrees. Angles will be uniformly picked in
223
229
  [-max_angle, max_angle]
224
230
  expand: whether the image should be padded before the rotation
@@ -243,6 +249,7 @@ class RandomCrop(NestedObject):
243
249
  """Randomly crop a tensor image and its boxes
244
250
 
245
251
  Args:
252
+ ----
246
253
  scale: tuple of floats, relative (min_area, max_area) of the crop
247
254
  ratio: tuple of float, relative (min_ratio, max_ratio) where ratio = h/w
248
255
  """
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -19,6 +19,8 @@ __all__ = ["Resize", "GaussianNoise", "ChannelShuffle", "RandomHorizontalFlip",
19
19
 
20
20
 
21
21
  class Resize(T.Resize):
22
+ """Resize the input image to the given size"""
23
+
22
24
  def __init__(
23
25
  self,
24
26
  size: Union[int, Tuple[int, int]],
@@ -119,6 +121,7 @@ class GaussianNoise(torch.nn.Module):
119
121
  >>> out = transfo(torch.rand((3, 224, 224)))
120
122
 
121
123
  Args:
124
+ ----
122
125
  mean : mean of the gaussian distribution
123
126
  std : std of the gaussian distribution
124
127
  """
@@ -132,9 +135,9 @@ class GaussianNoise(torch.nn.Module):
132
135
  # Reshape the distribution
133
136
  noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
134
137
  if x.dtype == torch.uint8:
135
- return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
138
+ return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) # type: ignore[attr-defined]
136
139
  else:
137
- return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
140
+ return (x + noise.to(dtype=x.dtype)).clamp(0, 1) # type: ignore[attr-defined]
138
141
 
139
142
  def extra_repr(self) -> str:
140
143
  return f"mean={self.mean}, std={self.std}"
@@ -153,17 +156,11 @@ class ChannelShuffle(torch.nn.Module):
153
156
 
154
157
 
155
158
  class RandomHorizontalFlip(T.RandomHorizontalFlip):
159
+ """Randomly flip the input image horizontally"""
160
+
156
161
  def forward(
157
162
  self, img: Union[torch.Tensor, Image], target: Dict[str, Any]
158
163
  ) -> Tuple[Union[torch.Tensor, Image], Dict[str, Any]]:
159
- """
160
- Args:
161
- img: Image to be flipped.
162
- target: Dictionary with boxes (in relative coordinates of shape (N, 4)) and labels as keys
163
-
164
- Returns:
165
- Tuple of PIL Image or Tensor and target
166
- """
167
164
  if torch.rand(1) < self.p:
168
165
  _img = F.hflip(img)
169
166
  _target = target.copy()
@@ -182,6 +179,7 @@ class RandomShadow(torch.nn.Module):
182
179
  >>> out = transfo(torch.rand((3, 64, 64)))
183
180
 
184
181
  Args:
182
+ ----
185
183
  opacity_range : minimum and maximum opacity of the shade
186
184
  """
187
185
 
@@ -201,7 +199,7 @@ class RandomShadow(torch.nn.Module):
201
199
  self.opacity_range,
202
200
  )
203
201
  )
204
- .round()
202
+ .round() # type: ignore[attr-defined]
205
203
  .clip(0, 255)
206
204
  .to(dtype=torch.uint8)
207
205
  )
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -42,6 +42,7 @@ class Compose(NestedObject):
42
42
  >>> out = transfos(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
43
43
 
44
44
  Args:
45
+ ----
45
46
  transforms: list of transformation modules
46
47
  """
47
48
 
@@ -66,6 +67,7 @@ class Resize(NestedObject):
66
67
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
67
68
 
68
69
  Args:
70
+ ----
69
71
  output_size: expected output size
70
72
  method: interpolation method
71
73
  preserve_aspect_ratio: if `True`, preserve aspect ratio and pad the rest with zeros
@@ -83,6 +85,7 @@ class Resize(NestedObject):
83
85
  self.method = method
84
86
  self.preserve_aspect_ratio = preserve_aspect_ratio
85
87
  self.symmetric_pad = symmetric_pad
88
+ self.antialias = True
86
89
 
87
90
  if isinstance(self.output_size, int):
88
91
  self.wanted_size = (self.output_size, self.output_size)
@@ -104,7 +107,7 @@ class Resize(NestedObject):
104
107
  ) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]:
105
108
  input_dtype = img.dtype
106
109
 
107
- img = tf.image.resize(img, self.wanted_size, self.method, self.preserve_aspect_ratio)
110
+ img = tf.image.resize(img, self.wanted_size, self.method, self.preserve_aspect_ratio, self.antialias)
108
111
  # It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio
109
112
  raw_shape = img.shape[:2]
110
113
  if self.preserve_aspect_ratio:
@@ -156,6 +159,7 @@ class Normalize(NestedObject):
156
159
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
157
160
 
158
161
  Args:
162
+ ----
159
163
  mean: average value per channel
160
164
  std: standard deviation per channel
161
165
  """
@@ -182,6 +186,7 @@ class LambdaTransformation(NestedObject):
182
186
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
183
187
 
184
188
  Args:
189
+ ----
185
190
  fn: the function to be applied to the input tensor
186
191
  """
187
192
 
@@ -219,6 +224,7 @@ class RandomBrightness(NestedObject):
219
224
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
220
225
 
221
226
  Args:
227
+ ----
222
228
  max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
223
229
  p: probability to apply transformation
224
230
  """
@@ -243,6 +249,7 @@ class RandomContrast(NestedObject):
243
249
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
244
250
 
245
251
  Args:
252
+ ----
246
253
  delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce contrast if factor<1)
247
254
  """
248
255
 
@@ -266,6 +273,7 @@ class RandomSaturation(NestedObject):
266
273
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
267
274
 
268
275
  Args:
276
+ ----
269
277
  delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce saturation if factor<1)
270
278
  """
271
279
 
@@ -288,6 +296,7 @@ class RandomHue(NestedObject):
288
296
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
289
297
 
290
298
  Args:
299
+ ----
291
300
  max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
292
301
  """
293
302
 
@@ -310,6 +319,7 @@ class RandomGamma(NestedObject):
310
319
  >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
311
320
 
312
321
  Args:
322
+ ----
313
323
  min_gamma: non-negative real number, lower bound for gamma param
314
324
  max_gamma: non-negative real number, upper bound for gamma
315
325
  min_gain: lower bound for constant multiplier
@@ -347,6 +357,7 @@ class RandomJpegQuality(NestedObject):
347
357
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
348
358
 
349
359
  Args:
360
+ ----
350
361
  min_quality: int between [0, 100]
351
362
  max_quality: int between [0, 100]
352
363
  """
@@ -371,6 +382,7 @@ class GaussianBlur(NestedObject):
371
382
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
372
383
 
373
384
  Args:
385
+ ----
374
386
  kernel_shape: size of the blurring kernel
375
387
  std: min and max value of the standard deviation
376
388
  """
@@ -414,6 +426,7 @@ class GaussianNoise(NestedObject):
414
426
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
415
427
 
416
428
  Args:
429
+ ----
417
430
  mean : mean of the gaussian distribution
418
431
  std : std of the gaussian distribution
419
432
  """
@@ -451,6 +464,7 @@ class RandomHorizontalFlip(NestedObject):
451
464
  >>> out = transfo(image, target)
452
465
 
453
466
  Args:
467
+ ----
454
468
  p : probability of Horizontal Flip
455
469
  """
456
470
 
@@ -459,13 +473,6 @@ class RandomHorizontalFlip(NestedObject):
459
473
  self.p = p
460
474
 
461
475
  def __call__(self, img: Union[tf.Tensor, np.ndarray], target: Dict[str, Any]) -> Tuple[tf.Tensor, Dict[str, Any]]:
462
- """
463
- Args:
464
- img: Image to be flipped.
465
- target: Dictionary with boxes (in relative coordinates of shape (N, 4)) and labels as keys
466
- Returns:
467
- Tuple of numpy nd-array or Tensor and target
468
- """
469
476
  if np.random.rand(1) <= self.p:
470
477
  _img = tf.image.flip_left_right(img)
471
478
  _target = target.copy()
@@ -484,6 +491,7 @@ class RandomShadow(NestedObject):
484
491
  >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
485
492
 
486
493
  Args:
494
+ ----
487
495
  opacity_range : minimum and maximum opacity of the shade
488
496
  """
489
497
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
doctr/utils/data.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -56,6 +56,7 @@ def download_from_url(
56
56
  >>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip")
57
57
 
58
58
  Args:
59
+ ----
59
60
  url: the URL of the file to download
60
61
  file_name: optional name of the file once downloaded
61
62
  hash_prefix: optional expected SHA256 hash of the file
@@ -63,12 +64,13 @@ def download_from_url(
63
64
  cache_subdir: subfolder to use in the cache
64
65
 
65
66
  Returns:
67
+ -------
66
68
  the location of the downloaded file
67
69
 
68
70
  Note:
71
+ ----
69
72
  You can change cache directory location by using `DOCTR_CACHE_DIR` environment variable.
70
73
  """
71
-
72
74
  if not isinstance(file_name, str):
73
75
  file_name = url.rpartition("/")[-1].split("&")[0]
74
76
 
doctr/utils/fonts.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -16,13 +16,14 @@ def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFon
16
16
  """Resolves a compatible ImageFont for the system
17
17
 
18
18
  Args:
19
+ ----
19
20
  font_family: the font family to use
20
21
  font_size: the size of the font upon rendering
21
22
 
22
23
  Returns:
24
+ -------
23
25
  the Pillow font
24
26
  """
25
-
26
27
  # Font selection
27
28
  if font_family is None:
28
29
  try: