python-doctr 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. doctr/__init__.py +1 -1
  2. doctr/contrib/__init__.py +0 -0
  3. doctr/contrib/artefacts.py +131 -0
  4. doctr/contrib/base.py +105 -0
  5. doctr/datasets/datasets/pytorch.py +2 -2
  6. doctr/datasets/generator/base.py +6 -5
  7. doctr/datasets/imgur5k.py +1 -1
  8. doctr/datasets/loader.py +1 -6
  9. doctr/datasets/utils.py +2 -1
  10. doctr/datasets/vocabs.py +9 -2
  11. doctr/file_utils.py +26 -12
  12. doctr/io/elements.py +40 -6
  13. doctr/io/html.py +2 -2
  14. doctr/io/image/pytorch.py +6 -8
  15. doctr/io/image/tensorflow.py +1 -1
  16. doctr/io/pdf.py +5 -2
  17. doctr/io/reader.py +6 -0
  18. doctr/models/__init__.py +0 -1
  19. doctr/models/_utils.py +57 -20
  20. doctr/models/builder.py +71 -13
  21. doctr/models/classification/mobilenet/pytorch.py +45 -9
  22. doctr/models/classification/mobilenet/tensorflow.py +38 -7
  23. doctr/models/classification/predictor/pytorch.py +18 -11
  24. doctr/models/classification/predictor/tensorflow.py +16 -10
  25. doctr/models/classification/textnet/pytorch.py +3 -3
  26. doctr/models/classification/textnet/tensorflow.py +3 -3
  27. doctr/models/classification/zoo.py +39 -15
  28. doctr/models/detection/__init__.py +1 -0
  29. doctr/models/detection/_utils/__init__.py +1 -0
  30. doctr/models/detection/_utils/base.py +66 -0
  31. doctr/models/detection/differentiable_binarization/base.py +4 -3
  32. doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
  33. doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
  34. doctr/models/detection/fast/__init__.py +6 -0
  35. doctr/models/detection/fast/base.py +257 -0
  36. doctr/models/detection/fast/pytorch.py +442 -0
  37. doctr/models/detection/fast/tensorflow.py +428 -0
  38. doctr/models/detection/linknet/base.py +4 -3
  39. doctr/models/detection/predictor/pytorch.py +15 -1
  40. doctr/models/detection/predictor/tensorflow.py +15 -1
  41. doctr/models/detection/zoo.py +21 -4
  42. doctr/models/factory/hub.py +3 -12
  43. doctr/models/kie_predictor/base.py +9 -3
  44. doctr/models/kie_predictor/pytorch.py +41 -20
  45. doctr/models/kie_predictor/tensorflow.py +36 -16
  46. doctr/models/modules/layers/pytorch.py +89 -10
  47. doctr/models/modules/layers/tensorflow.py +88 -10
  48. doctr/models/modules/transformer/pytorch.py +2 -2
  49. doctr/models/predictor/base.py +77 -50
  50. doctr/models/predictor/pytorch.py +31 -20
  51. doctr/models/predictor/tensorflow.py +27 -17
  52. doctr/models/preprocessor/pytorch.py +4 -4
  53. doctr/models/preprocessor/tensorflow.py +3 -2
  54. doctr/models/recognition/master/pytorch.py +2 -2
  55. doctr/models/recognition/parseq/pytorch.py +4 -3
  56. doctr/models/recognition/parseq/tensorflow.py +4 -3
  57. doctr/models/recognition/sar/pytorch.py +7 -6
  58. doctr/models/recognition/sar/tensorflow.py +3 -9
  59. doctr/models/recognition/vitstr/pytorch.py +1 -1
  60. doctr/models/recognition/zoo.py +1 -1
  61. doctr/models/zoo.py +2 -2
  62. doctr/py.typed +0 -0
  63. doctr/transforms/functional/base.py +1 -1
  64. doctr/transforms/functional/pytorch.py +4 -4
  65. doctr/transforms/modules/base.py +37 -15
  66. doctr/transforms/modules/pytorch.py +66 -8
  67. doctr/transforms/modules/tensorflow.py +63 -7
  68. doctr/utils/fonts.py +7 -5
  69. doctr/utils/geometry.py +35 -12
  70. doctr/utils/metrics.py +33 -174
  71. doctr/utils/reconstitution.py +126 -0
  72. doctr/utils/visualization.py +5 -118
  73. doctr/version.py +1 -1
  74. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/METADATA +96 -91
  75. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/RECORD +79 -75
  76. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/WHEEL +1 -1
  77. doctr/models/artefacts/__init__.py +0 -2
  78. doctr/models/artefacts/barcode.py +0 -74
  79. doctr/models/artefacts/face.py +0 -63
  80. doctr/models/obj_detection/__init__.py +0 -1
  81. doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
  82. doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
  83. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/LICENSE +0 -0
  84. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/top_level.txt +0 -0
  85. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/zip-safe +0 -0
@@ -35,9 +35,9 @@ def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor:
35
35
  rgb_shift = min_val + (1 - min_val) * torch.rand(shift_shape)
36
36
  # Inverse the color
37
37
  if out.dtype == torch.uint8:
38
- out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8) # type: ignore[attr-defined]
38
+ out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)
39
39
  else:
40
- out = out * rgb_shift.to(dtype=out.dtype) # type: ignore[attr-defined]
40
+ out = out * rgb_shift.to(dtype=out.dtype)
41
41
  # Inverse the color
42
42
  out = 255 - out if out.dtype == torch.uint8 else 1 - out
43
43
  return out
@@ -81,7 +81,7 @@ def rotate_sample(
81
81
  rotated_geoms: np.ndarray = rotate_abs_geoms(
82
82
  _geoms,
83
83
  angle,
84
- img.shape[1:],
84
+ img.shape[1:], # type: ignore[arg-type]
85
85
  expand,
86
86
  ).astype(np.float32)
87
87
 
@@ -132,7 +132,7 @@ def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwarg
132
132
  -------
133
133
  shaded image
134
134
  """
135
- shadow_mask = create_shadow_mask(img.shape[1:], **kwargs)
135
+ shadow_mask = create_shadow_mask(img.shape[1:], **kwargs) # type: ignore[arg-type]
136
136
 
137
137
  opacity = np.random.uniform(*opacity_range)
138
138
  shadow_tensor = 1 - torch.from_numpy(shadow_mask[None, ...])
@@ -5,7 +5,7 @@
5
5
 
6
6
  import math
7
7
  import random
8
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union
8
+ from typing import Any, Callable, List, Optional, Tuple, Union
9
9
 
10
10
  import numpy as np
11
11
 
@@ -168,11 +168,11 @@ class OneOf(NestedObject):
168
168
  def __init__(self, transforms: List[Callable[[Any], Any]]) -> None:
169
169
  self.transforms = transforms
170
170
 
171
- def __call__(self, img: Any) -> Any:
171
+ def __call__(self, img: Any, target: Optional[np.ndarray] = None) -> Union[Any, Tuple[Any, np.ndarray]]:
172
172
  # Pick transformation
173
173
  transfo = self.transforms[int(random.random() * len(self.transforms))]
174
174
  # Apply
175
- return transfo(img)
175
+ return transfo(img) if target is None else transfo(img, target) # type: ignore[call-arg]
176
176
 
177
177
 
178
178
  class RandomApply(NestedObject):
@@ -261,17 +261,39 @@ class RandomCrop(NestedObject):
261
261
  def extra_repr(self) -> str:
262
262
  return f"scale={self.scale}, ratio={self.ratio}"
263
263
 
264
- def __call__(self, img: Any, target: Dict[str, np.ndarray]) -> Tuple[Any, Dict[str, np.ndarray]]:
264
+ def __call__(self, img: Any, target: np.ndarray) -> Tuple[Any, np.ndarray]:
265
265
  scale = random.uniform(self.scale[0], self.scale[1])
266
266
  ratio = random.uniform(self.ratio[0], self.ratio[1])
267
- # Those might overflow
268
- crop_h = math.sqrt(scale * ratio)
269
- crop_w = math.sqrt(scale / ratio)
270
- xmin, ymin = random.uniform(0, 1 - crop_w), random.uniform(0, 1 - crop_h)
271
- xmax, ymax = xmin + crop_w, ymin + crop_h
272
- # Clip them
273
- xmin, ymin = max(xmin, 0), max(ymin, 0)
274
- xmax, ymax = min(xmax, 1), min(ymax, 1)
275
-
276
- croped_img, crop_boxes = F.crop_detection(img, target["boxes"], (xmin, ymin, xmax, ymax))
277
- return croped_img, dict(boxes=crop_boxes)
267
+
268
+ height, width = img.shape[:2]
269
+
270
+ # Calculate crop size
271
+ crop_area = scale * width * height
272
+ aspect_ratio = ratio * (width / height)
273
+ crop_width = int(round(math.sqrt(crop_area * aspect_ratio)))
274
+ crop_height = int(round(math.sqrt(crop_area / aspect_ratio)))
275
+
276
+ # Ensure crop size does not exceed image dimensions
277
+ crop_width = min(crop_width, width)
278
+ crop_height = min(crop_height, height)
279
+
280
+ # Randomly select crop position
281
+ x = random.randint(0, width - crop_width)
282
+ y = random.randint(0, height - crop_height)
283
+
284
+ # relative crop box
285
+ crop_box = (x / width, y / height, (x + crop_width) / width, (y + crop_height) / height)
286
+ if target.shape[1:] == (4, 2):
287
+ min_xy = np.min(target, axis=1)
288
+ max_xy = np.max(target, axis=1)
289
+ _target = np.concatenate((min_xy, max_xy), axis=1)
290
+ else:
291
+ _target = target
292
+
293
+ # Crop image and targets
294
+ croped_img, crop_boxes = F.crop_detection(img, _target, crop_box)
295
+ # hard fallback if no box is kept
296
+ if crop_boxes.shape[0] == 0:
297
+ return img, target
298
+ # clip boxes
299
+ return croped_img, np.clip(crop_boxes, 0, 1)
@@ -4,7 +4,7 @@
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  import math
7
- from typing import Any, Dict, Optional, Tuple, Union
7
+ from typing import Optional, Tuple, Union
8
8
 
9
9
  import numpy as np
10
10
  import torch
@@ -15,7 +15,7 @@ from torchvision.transforms import transforms as T
15
15
 
16
16
  from ..functional.pytorch import random_shadow
17
17
 
18
- __all__ = ["Resize", "GaussianNoise", "ChannelShuffle", "RandomHorizontalFlip", "RandomShadow"]
18
+ __all__ = ["Resize", "GaussianNoise", "ChannelShuffle", "RandomHorizontalFlip", "RandomShadow", "RandomResize"]
19
19
 
20
20
 
21
21
  class Resize(T.Resize):
@@ -135,9 +135,9 @@ class GaussianNoise(torch.nn.Module):
135
135
  # Reshape the distribution
136
136
  noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
137
137
  if x.dtype == torch.uint8:
138
- return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) # type: ignore[attr-defined]
138
+ return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
139
139
  else:
140
- return (x + noise.to(dtype=x.dtype)).clamp(0, 1) # type: ignore[attr-defined]
140
+ return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
141
141
 
142
142
  def extra_repr(self) -> str:
143
143
  return f"mean={self.mean}, std={self.std}"
@@ -159,13 +159,16 @@ class RandomHorizontalFlip(T.RandomHorizontalFlip):
159
159
  """Randomly flip the input image horizontally"""
160
160
 
161
161
  def forward(
162
- self, img: Union[torch.Tensor, Image], target: Dict[str, Any]
163
- ) -> Tuple[Union[torch.Tensor, Image], Dict[str, Any]]:
162
+ self, img: Union[torch.Tensor, Image], target: np.ndarray
163
+ ) -> Tuple[Union[torch.Tensor, Image], np.ndarray]:
164
164
  if torch.rand(1) < self.p:
165
165
  _img = F.hflip(img)
166
166
  _target = target.copy()
167
167
  # Changing the relative bbox coordinates
168
- _target["boxes"][:, ::2] = 1 - target["boxes"][:, [2, 0]]
168
+ if target.shape[1:] == (4,):
169
+ _target[:, ::2] = 1 - target[:, [2, 0]]
170
+ else:
171
+ _target[..., 0] = 1 - target[..., 0]
169
172
  return _img, _target
170
173
  return img, target
171
174
 
@@ -199,7 +202,7 @@ class RandomShadow(torch.nn.Module):
199
202
  self.opacity_range,
200
203
  )
201
204
  )
202
- .round() # type: ignore[attr-defined]
205
+ .round()
203
206
  .clip(0, 255)
204
207
  .to(dtype=torch.uint8)
205
208
  )
@@ -210,3 +213,58 @@ class RandomShadow(torch.nn.Module):
210
213
 
211
214
  def extra_repr(self) -> str:
212
215
  return f"opacity_range={self.opacity_range}"
216
+
217
+
218
+ class RandomResize(torch.nn.Module):
219
+ """Randomly resize the input image and align corresponding targets
220
+
221
+ >>> import torch
222
+ >>> from doctr.transforms import RandomResize
223
+ >>> transfo = RandomResize((0.3, 0.9), preserve_aspect_ratio=True, symmetric_pad=True, p=0.5)
224
+ >>> out = transfo(torch.rand((3, 64, 64)))
225
+
226
+ Args:
227
+ ----
228
+ scale_range: range of the resizing factor for width and height (independently)
229
+ preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
230
+ given a float value, the aspect ratio will be preserved with this probability
231
+ symmetric_pad: whether to symmetrically pad the image,
232
+ given a float value, the symmetric padding will be applied with this probability
233
+ p: probability to apply the transformation
234
+ """
235
+
236
+ def __init__(
237
+ self,
238
+ scale_range: Tuple[float, float] = (0.3, 0.9),
239
+ preserve_aspect_ratio: Union[bool, float] = False,
240
+ symmetric_pad: Union[bool, float] = False,
241
+ p: float = 0.5,
242
+ ) -> None:
243
+ super().__init__()
244
+ self.scale_range = scale_range
245
+ self.preserve_aspect_ratio = preserve_aspect_ratio
246
+ self.symmetric_pad = symmetric_pad
247
+ self.p = p
248
+ self._resize = Resize
249
+
250
+ def forward(self, img: torch.Tensor, target: np.ndarray) -> Tuple[torch.Tensor, np.ndarray]:
251
+ if torch.rand(1) < self.p:
252
+ scale_h = np.random.uniform(*self.scale_range)
253
+ scale_w = np.random.uniform(*self.scale_range)
254
+ new_size = (int(img.shape[-2] * scale_h), int(img.shape[-1] * scale_w))
255
+
256
+ _img, _target = self._resize(
257
+ new_size,
258
+ preserve_aspect_ratio=self.preserve_aspect_ratio
259
+ if isinstance(self.preserve_aspect_ratio, bool)
260
+ else bool(torch.rand(1) <= self.symmetric_pad),
261
+ symmetric_pad=self.symmetric_pad
262
+ if isinstance(self.symmetric_pad, bool)
263
+ else bool(torch.rand(1) <= self.symmetric_pad),
264
+ )(img, target)
265
+
266
+ return _img, _target
267
+ return img, target
268
+
269
+ def extra_repr(self) -> str:
270
+ return f"scale_range={self.scale_range}, preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}, p={self.p}" # noqa: E501
@@ -4,7 +4,7 @@
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  import random
7
- from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
7
+ from typing import Any, Callable, Iterable, List, Optional, Tuple, Union
8
8
 
9
9
  import numpy as np
10
10
  import tensorflow as tf
@@ -30,6 +30,7 @@ __all__ = [
30
30
  "GaussianNoise",
31
31
  "RandomHorizontalFlip",
32
32
  "RandomShadow",
33
+ "RandomResize",
33
34
  ]
34
35
 
35
36
 
@@ -457,10 +458,7 @@ class RandomHorizontalFlip(NestedObject):
457
458
  >>> from doctr.transforms import RandomHorizontalFlip
458
459
  >>> transfo = RandomHorizontalFlip(p=0.5)
459
460
  >>> image = tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)
460
- >>> target = {
461
- >>> "boxes": np.array([[0.1, 0.1, 0.4, 0.5] ], dtype= np.float32),
462
- >>> "labels": np.ones(1, dtype= np.int64)
463
- >>> }
461
+ >>> target = np.array([[0.1, 0.1, 0.4, 0.5] ], dtype= np.float32)
464
462
  >>> out = transfo(image, target)
465
463
 
466
464
  Args:
@@ -472,12 +470,15 @@ class RandomHorizontalFlip(NestedObject):
472
470
  super().__init__()
473
471
  self.p = p
474
472
 
475
- def __call__(self, img: Union[tf.Tensor, np.ndarray], target: Dict[str, Any]) -> Tuple[tf.Tensor, Dict[str, Any]]:
473
+ def __call__(self, img: Union[tf.Tensor, np.ndarray], target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]:
476
474
  if np.random.rand(1) <= self.p:
477
475
  _img = tf.image.flip_left_right(img)
478
476
  _target = target.copy()
479
477
  # Changing the relative bbox coordinates
480
- _target["boxes"][:, ::2] = 1 - target["boxes"][:, [2, 0]]
478
+ if target.shape[1:] == (4,):
479
+ _target[:, ::2] = 1 - target[:, [2, 0]]
480
+ else:
481
+ _target[..., 0] = 1 - target[..., 0]
481
482
  return _img, _target
482
483
  return img, target
483
484
 
@@ -515,3 +516,58 @@ class RandomShadow(NestedObject):
515
516
 
516
517
  def extra_repr(self) -> str:
517
518
  return f"opacity_range={self.opacity_range}"
519
+
520
+
521
+ class RandomResize(NestedObject):
522
+ """Randomly resize the input image and align corresponding targets
523
+
524
+ >>> import tensorflow as tf
525
+ >>> from doctr.transforms import RandomResize
526
+ >>> transfo = RandomResize((0.3, 0.9), preserve_aspect_ratio=True, symmetric_pad=True, p=0.5)
527
+ >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
528
+
529
+ Args:
530
+ ----
531
+ scale_range: range of the resizing factor for width and height (independently)
532
+ preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
533
+ given a float value, the aspect ratio will be preserved with this probability
534
+ symmetric_pad: whether to symmetrically pad the image,
535
+ given a float value, the symmetric padding will be applied with this probability
536
+ p: probability to apply the transformation
537
+ """
538
+
539
+ def __init__(
540
+ self,
541
+ scale_range: Tuple[float, float] = (0.3, 0.9),
542
+ preserve_aspect_ratio: Union[bool, float] = False,
543
+ symmetric_pad: Union[bool, float] = False,
544
+ p: float = 0.5,
545
+ ):
546
+ super().__init__()
547
+ self.scale_range = scale_range
548
+ self.preserve_aspect_ratio = preserve_aspect_ratio
549
+ self.symmetric_pad = symmetric_pad
550
+ self.p = p
551
+ self._resize = Resize
552
+
553
+ def __call__(self, img: tf.Tensor, target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]:
554
+ if np.random.rand(1) <= self.p:
555
+ scale_h = random.uniform(*self.scale_range)
556
+ scale_w = random.uniform(*self.scale_range)
557
+ new_size = (int(img.shape[-3] * scale_h), int(img.shape[-2] * scale_w))
558
+
559
+ _img, _target = self._resize(
560
+ new_size,
561
+ preserve_aspect_ratio=self.preserve_aspect_ratio
562
+ if isinstance(self.preserve_aspect_ratio, bool)
563
+ else bool(np.random.rand(1) <= self.symmetric_pad),
564
+ symmetric_pad=self.symmetric_pad
565
+ if isinstance(self.symmetric_pad, bool)
566
+ else bool(np.random.rand(1) <= self.symmetric_pad),
567
+ )(img, target)
568
+
569
+ return _img, _target
570
+ return img, target
571
+
572
+ def extra_repr(self) -> str:
573
+ return f"scale_range={self.scale_range}, preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}, p={self.p}" # noqa: E501
doctr/utils/fonts.py CHANGED
@@ -5,14 +5,16 @@
5
5
 
6
6
  import logging
7
7
  import platform
8
- from typing import Optional
8
+ from typing import Optional, Union
9
9
 
10
10
  from PIL import ImageFont
11
11
 
12
12
  __all__ = ["get_font"]
13
13
 
14
14
 
15
- def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFont.ImageFont:
15
+ def get_font(
16
+ font_family: Optional[str] = None, font_size: int = 13
17
+ ) -> Union[ImageFont.FreeTypeFont, ImageFont.ImageFont]:
16
18
  """Resolves a compatible ImageFont for the system
17
19
 
18
20
  Args:
@@ -28,14 +30,14 @@ def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFon
28
30
  if font_family is None:
29
31
  try:
30
32
  font = ImageFont.truetype("FreeMono.ttf" if platform.system() == "Linux" else "Arial.ttf", font_size)
31
- except OSError:
32
- font = ImageFont.load_default()
33
+ except OSError: # pragma: no cover
34
+ font = ImageFont.load_default() # type: ignore[assignment]
33
35
  logging.warning(
34
36
  "unable to load recommended font family. Loading default PIL font,"
35
37
  "font size issues may be expected."
36
38
  "To prevent this, it is recommended to specify the value of 'font_family'."
37
39
  )
38
- else:
40
+ else: # pragma: no cover
39
41
  font = ImageFont.truetype(font_family, font_size)
40
42
 
41
43
  return font
doctr/utils/geometry.py CHANGED
@@ -25,6 +25,7 @@ __all__ = [
25
25
  "rotate_abs_geoms",
26
26
  "extract_crops",
27
27
  "extract_rcrops",
28
+ "detach_scores",
28
29
  ]
29
30
 
30
31
 
@@ -57,6 +58,28 @@ def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
57
58
  return (min(x), min(y)), (max(x), max(y))
58
59
 
59
60
 
61
+ def detach_scores(boxes: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.ndarray]]:
62
+ """Detach the objectness scores from box predictions
63
+
64
+ Args:
65
+ ----
66
+ boxes: list of arrays with boxes of shape (N, 5) or (N, 5, 2)
67
+
68
+ Returns:
69
+ -------
70
+ a tuple of two lists: the first one contains the boxes without the objectness scores,
71
+ the second one contains the objectness scores
72
+ """
73
+
74
+ def _detach(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
75
+ if boxes.ndim == 2:
76
+ return boxes[:, :-1], boxes[:, -1]
77
+ return boxes[:, :-1], boxes[:, -1, -1]
78
+
79
+ loc_preds, obj_scores = zip(*(_detach(box) for box in boxes))
80
+ return list(loc_preds), list(obj_scores)
81
+
82
+
60
83
  def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]:
61
84
  """Compute enclosing bbox either from:
62
85
 
@@ -64,18 +87,18 @@ def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Unio
64
87
  ----
65
88
  bboxes: boxes in one of the following formats:
66
89
 
67
- - an array of boxes: (*, 5), where boxes have this shape:
68
- (xmin, ymin, xmax, ymax, score)
90
+ - an array of boxes: (*, 4), where boxes have this shape:
91
+ (xmin, ymin, xmax, ymax)
69
92
 
70
93
  - a list of BoundingBox
71
94
 
72
95
  Returns:
73
96
  -------
74
- a (1, 5) array (enclosing boxarray), or a BoundingBox
97
+ a (1, 4) array (enclosing boxarray), or a BoundingBox
75
98
  """
76
99
  if isinstance(bboxes, np.ndarray):
77
- xmin, ymin, xmax, ymax, score = np.split(bboxes, 5, axis=1)
78
- return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max(), score.mean()])
100
+ xmin, ymin, xmax, ymax = np.split(bboxes, 4, axis=1)
101
+ return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max()])
79
102
  else:
80
103
  x, y = zip(*[point for box in bboxes for point in box])
81
104
  return (min(x), min(y)), (max(x), max(y))
@@ -88,21 +111,21 @@ def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024
88
111
  ----
89
112
  rbboxes: boxes in one of the following formats:
90
113
 
91
- - an array of boxes: (*, 5), where boxes have this shape:
92
- (xmin, ymin, xmax, ymax, score)
114
+ - an array of boxes: (*, 4, 2), where boxes have this shape:
115
+ (x1, y1), (x2, y2), (x3, y3), (x4, y4)
93
116
 
94
117
  - a list of BoundingBox
95
118
  intermed_size: size of the intermediate image
96
119
 
97
120
  Returns:
98
121
  -------
99
- a (1, 5) array (enclosing boxarray), or a BoundingBox
122
+ a (4, 2) array (enclosing rotated box)
100
123
  """
101
124
  cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
102
125
  # Convert to absolute for minAreaRect
103
126
  cloud *= intermed_size
104
127
  rect = cv2.minAreaRect(cloud.astype(np.int32))
105
- return cv2.boxPoints(rect) / intermed_size # type: ignore[operator]
128
+ return cv2.boxPoints(rect) / intermed_size # type: ignore[return-value]
106
129
 
107
130
 
108
131
  def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
@@ -232,7 +255,7 @@ def rotate_boxes(
232
255
 
233
256
  Args:
234
257
  ----
235
- loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes
258
+ loc_preds: (N, 4) or (N, 4, 2) array of RELATIVE boxes
236
259
  angle: angle between -90 and +90 degrees
237
260
  orig_shape: shape of the origin image
238
261
  min_angle: minimum angle to rotate boxes
@@ -320,7 +343,7 @@ def rotate_image(
320
343
  # Pad height
321
344
  else:
322
345
  h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
323
- rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
346
+ rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) # type: ignore[assignment]
324
347
  if preserve_origin_shape:
325
348
  # rescale
326
349
  rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)
@@ -453,4 +476,4 @@ def extract_rcrops(
453
476
  )
454
477
  for idx in range(_boxes.shape[0])
455
478
  ]
456
- return crops
479
+ return crops # type: ignore[return-value]