nrtk-albumentations 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nrtk-albumentations might be problematic. Click here for more details.
- albumentations/__init__.py +21 -0
- albumentations/augmentations/__init__.py +23 -0
- albumentations/augmentations/blur/__init__.py +0 -0
- albumentations/augmentations/blur/functional.py +438 -0
- albumentations/augmentations/blur/transforms.py +1633 -0
- albumentations/augmentations/crops/__init__.py +0 -0
- albumentations/augmentations/crops/functional.py +494 -0
- albumentations/augmentations/crops/transforms.py +3647 -0
- albumentations/augmentations/dropout/__init__.py +0 -0
- albumentations/augmentations/dropout/channel_dropout.py +134 -0
- albumentations/augmentations/dropout/coarse_dropout.py +567 -0
- albumentations/augmentations/dropout/functional.py +1017 -0
- albumentations/augmentations/dropout/grid_dropout.py +166 -0
- albumentations/augmentations/dropout/mask_dropout.py +274 -0
- albumentations/augmentations/dropout/transforms.py +461 -0
- albumentations/augmentations/dropout/xy_masking.py +186 -0
- albumentations/augmentations/geometric/__init__.py +0 -0
- albumentations/augmentations/geometric/distortion.py +1238 -0
- albumentations/augmentations/geometric/flip.py +752 -0
- albumentations/augmentations/geometric/functional.py +4151 -0
- albumentations/augmentations/geometric/pad.py +676 -0
- albumentations/augmentations/geometric/resize.py +956 -0
- albumentations/augmentations/geometric/rotate.py +864 -0
- albumentations/augmentations/geometric/transforms.py +1962 -0
- albumentations/augmentations/mixing/__init__.py +0 -0
- albumentations/augmentations/mixing/domain_adaptation.py +787 -0
- albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
- albumentations/augmentations/mixing/functional.py +878 -0
- albumentations/augmentations/mixing/transforms.py +832 -0
- albumentations/augmentations/other/__init__.py +0 -0
- albumentations/augmentations/other/lambda_transform.py +180 -0
- albumentations/augmentations/other/type_transform.py +261 -0
- albumentations/augmentations/pixel/__init__.py +0 -0
- albumentations/augmentations/pixel/functional.py +4226 -0
- albumentations/augmentations/pixel/transforms.py +7556 -0
- albumentations/augmentations/spectrogram/__init__.py +0 -0
- albumentations/augmentations/spectrogram/transform.py +220 -0
- albumentations/augmentations/text/__init__.py +0 -0
- albumentations/augmentations/text/functional.py +272 -0
- albumentations/augmentations/text/transforms.py +299 -0
- albumentations/augmentations/transforms3d/__init__.py +0 -0
- albumentations/augmentations/transforms3d/functional.py +393 -0
- albumentations/augmentations/transforms3d/transforms.py +1422 -0
- albumentations/augmentations/utils.py +249 -0
- albumentations/core/__init__.py +0 -0
- albumentations/core/bbox_utils.py +920 -0
- albumentations/core/composition.py +1885 -0
- albumentations/core/hub_mixin.py +299 -0
- albumentations/core/keypoints_utils.py +521 -0
- albumentations/core/label_manager.py +339 -0
- albumentations/core/pydantic.py +239 -0
- albumentations/core/serialization.py +352 -0
- albumentations/core/transforms_interface.py +976 -0
- albumentations/core/type_definitions.py +127 -0
- albumentations/core/utils.py +605 -0
- albumentations/core/validation.py +129 -0
- albumentations/pytorch/__init__.py +1 -0
- albumentations/pytorch/transforms.py +189 -0
- nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
- nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
- nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
- nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,920 @@
|
|
|
1
|
+
"""Utilities for handling bounding box operations during image augmentation.
|
|
2
|
+
|
|
3
|
+
This module provides tools for processing bounding boxes in various formats (COCO, Pascal VOC, YOLO),
|
|
4
|
+
converting between coordinate systems, normalizing and denormalizing coordinates, filtering
|
|
5
|
+
boxes based on visibility and size criteria, and performing transformations on boxes to match
|
|
6
|
+
image augmentations. It forms the core functionality for all bounding box-related operations
|
|
7
|
+
in the albumentations library.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Sequence
|
|
13
|
+
from typing import Any, Literal
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from albumentations.augmentations.utils import handle_empty_array
|
|
18
|
+
from albumentations.core.type_definitions import MONO_CHANNEL_DIMENSIONS, NUM_BBOXES_COLUMNS_IN_ALBUMENTATIONS
|
|
19
|
+
|
|
20
|
+
from .utils import DataProcessor, Params, ShapeType
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"BboxParams",
|
|
24
|
+
"BboxProcessor",
|
|
25
|
+
"check_bboxes",
|
|
26
|
+
"convert_bboxes_from_albumentations",
|
|
27
|
+
"convert_bboxes_to_albumentations",
|
|
28
|
+
"denormalize_bboxes",
|
|
29
|
+
"filter_bboxes",
|
|
30
|
+
"normalize_bboxes",
|
|
31
|
+
"union_of_bboxes",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
BBOX_WITH_LABEL_SHAPE = 5
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BboxParams(Params):
|
|
38
|
+
"""Parameters for bounding box transforms.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
format (Literal["coco", "pascal_voc", "albumentations", "yolo"]): Format of bounding boxes.
|
|
42
|
+
Should be one of:
|
|
43
|
+
- 'coco': [x_min, y_min, width, height], e.g. [97, 12, 150, 200].
|
|
44
|
+
- 'pascal_voc': [x_min, y_min, x_max, y_max], e.g. [97, 12, 247, 212].
|
|
45
|
+
- 'albumentations': like pascal_voc but normalized in [0, 1] range, e.g. [0.2, 0.3, 0.4, 0.5].
|
|
46
|
+
- 'yolo': [x_center, y_center, width, height] normalized in [0, 1] range, e.g. [0.1, 0.2, 0.3, 0.4].
|
|
47
|
+
|
|
48
|
+
label_fields (Sequence[str] | None): List of fields that are joined with boxes,
|
|
49
|
+
e.g., ['class_labels', 'scores']. Default: None.
|
|
50
|
+
|
|
51
|
+
min_area (float): Minimum area of a bounding box. All bounding boxes whose visible area in pixels is less than
|
|
52
|
+
this value will be removed. Default: 0.0.
|
|
53
|
+
|
|
54
|
+
min_visibility (float): Minimum fraction of area for a bounding box to remain this box in the result.
|
|
55
|
+
Should be in [0.0, 1.0] range. Default: 0.0.
|
|
56
|
+
|
|
57
|
+
min_width (float): Minimum width of a bounding box in pixels or normalized units. Bounding boxes with width
|
|
58
|
+
less than this value will be removed. Default: 0.0.
|
|
59
|
+
|
|
60
|
+
min_height (float): Minimum height of a bounding box in pixels or normalized units. Bounding boxes with height
|
|
61
|
+
less than this value will be removed. Default: 0.0.
|
|
62
|
+
|
|
63
|
+
check_each_transform (bool): If True, performs checks for each dual transform. Default: True.
|
|
64
|
+
|
|
65
|
+
clip (bool): If True, clips bounding boxes to image boundaries before applying any transform. Default: False.
|
|
66
|
+
|
|
67
|
+
filter_invalid_bboxes (bool): If True, filters out invalid bounding boxes (e.g., boxes with negative dimensions
|
|
68
|
+
or boxes where x_max < x_min or y_max < y_min) at the beginning of the pipeline. If clip=True, filtering
|
|
69
|
+
is applied after clipping. Default: False.
|
|
70
|
+
|
|
71
|
+
max_accept_ratio (float | None): Maximum allowed aspect ratio for bounding boxes. The aspect ratio is calculated
|
|
72
|
+
as max(width/height, height/width), so it's always >= 1. Boxes with aspect ratio greater than this value
|
|
73
|
+
will be filtered out. For example, if max_accept_ratio=3.0, boxes with width:height or height:width ratios
|
|
74
|
+
greater than 3:1 will be removed. Set to None to disable aspect ratio filtering. Default: None.
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
Note:
|
|
78
|
+
The processing order for bounding boxes is:
|
|
79
|
+
1. Convert to albumentations format (normalized pascal_voc)
|
|
80
|
+
2. Clip boxes to image boundaries (if clip=True)
|
|
81
|
+
3. Filter invalid boxes (if filter_invalid_bboxes=True)
|
|
82
|
+
4. Apply transformations
|
|
83
|
+
5. Filter boxes based on min_area, min_visibility, min_width, min_height
|
|
84
|
+
6. Convert back to the original format
|
|
85
|
+
|
|
86
|
+
Examples:
|
|
87
|
+
>>> # Create BboxParams for COCO format with class labels
|
|
88
|
+
>>> bbox_params = BboxParams(
|
|
89
|
+
... format='coco',
|
|
90
|
+
... label_fields=['class_labels'],
|
|
91
|
+
... min_area=1024,
|
|
92
|
+
... min_visibility=0.1
|
|
93
|
+
... )
|
|
94
|
+
|
|
95
|
+
>>> # Create BboxParams that clips and filters invalid boxes
|
|
96
|
+
>>> bbox_params = BboxParams(
|
|
97
|
+
... format='pascal_voc',
|
|
98
|
+
... clip=True,
|
|
99
|
+
... filter_invalid_bboxes=True
|
|
100
|
+
... )
|
|
101
|
+
>>> # Create BboxParams that filters extremely elongated boxes
|
|
102
|
+
>>> bbox_params = BboxParams(
|
|
103
|
+
... format='yolo',
|
|
104
|
+
... max_accept_ratio=5.0, # Filter boxes with aspect ratio > 5:1
|
|
105
|
+
... clip=True
|
|
106
|
+
... )
|
|
107
|
+
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
def __init__(
|
|
111
|
+
self,
|
|
112
|
+
format: Literal["coco", "pascal_voc", "albumentations", "yolo"], # noqa: A002
|
|
113
|
+
label_fields: Sequence[Any] | None = None,
|
|
114
|
+
min_area: float = 0.0,
|
|
115
|
+
min_visibility: float = 0.0,
|
|
116
|
+
min_width: float = 0.0,
|
|
117
|
+
min_height: float = 0.0,
|
|
118
|
+
check_each_transform: bool = True,
|
|
119
|
+
clip: bool = False,
|
|
120
|
+
filter_invalid_bboxes: bool = False,
|
|
121
|
+
max_accept_ratio: float | None = None,
|
|
122
|
+
):
|
|
123
|
+
super().__init__(format, label_fields)
|
|
124
|
+
self.min_area = min_area
|
|
125
|
+
self.min_visibility = min_visibility
|
|
126
|
+
self.min_width = min_width
|
|
127
|
+
self.min_height = min_height
|
|
128
|
+
self.check_each_transform = check_each_transform
|
|
129
|
+
self.clip = clip
|
|
130
|
+
self.filter_invalid_bboxes = filter_invalid_bboxes
|
|
131
|
+
if max_accept_ratio is not None and max_accept_ratio < 1.0:
|
|
132
|
+
raise ValueError(
|
|
133
|
+
"max_accept_ratio must be >= 1.0 when provided, as aspect ratio is calculated as max(w/h, h/w)",
|
|
134
|
+
)
|
|
135
|
+
self.max_accept_ratio = max_accept_ratio # e.g., 5.0
|
|
136
|
+
|
|
137
|
+
def to_dict_private(self) -> dict[str, Any]:
|
|
138
|
+
"""Get the private dictionary representation of bounding box parameters.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
dict[str, Any]: Dictionary containing the bounding box parameters.
|
|
142
|
+
|
|
143
|
+
"""
|
|
144
|
+
data = super().to_dict_private()
|
|
145
|
+
data.update(
|
|
146
|
+
{
|
|
147
|
+
"min_area": self.min_area,
|
|
148
|
+
"min_visibility": self.min_visibility,
|
|
149
|
+
"min_width": self.min_width,
|
|
150
|
+
"min_height": self.min_height,
|
|
151
|
+
"check_each_transform": self.check_each_transform,
|
|
152
|
+
"clip": self.clip,
|
|
153
|
+
"max_accept_ratio": self.max_accept_ratio,
|
|
154
|
+
},
|
|
155
|
+
)
|
|
156
|
+
return data
|
|
157
|
+
|
|
158
|
+
@classmethod
|
|
159
|
+
def is_serializable(cls) -> bool:
|
|
160
|
+
"""Check if the bounding box parameters are serializable.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
bool: Always returns True as BboxParams is serializable.
|
|
164
|
+
|
|
165
|
+
"""
|
|
166
|
+
return True
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def get_class_fullname(cls) -> str:
|
|
170
|
+
"""Get the full name of the class.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
str: The string "BboxParams".
|
|
174
|
+
|
|
175
|
+
"""
|
|
176
|
+
return "BboxParams"
|
|
177
|
+
|
|
178
|
+
def __repr__(self) -> str:
|
|
179
|
+
return (
|
|
180
|
+
f"BboxParams(format={self.format}, label_fields={self.label_fields}, min_area={self.min_area},"
|
|
181
|
+
f" min_visibility={self.min_visibility}, min_width={self.min_width}, min_height={self.min_height},"
|
|
182
|
+
f" check_each_transform={self.check_each_transform}, clip={self.clip})"
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class BboxProcessor(DataProcessor):
|
|
187
|
+
"""Processor for bounding box transformations.
|
|
188
|
+
|
|
189
|
+
This class handles the preprocessing and postprocessing of bounding boxes during augmentation pipeline,
|
|
190
|
+
including format conversion, validation, clipping, and filtering.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
params (BboxParams): Parameters that control bounding box processing.
|
|
194
|
+
See BboxParams class for details.
|
|
195
|
+
additional_targets (dict[str, str] | None): Dictionary with additional targets to process.
|
|
196
|
+
Keys are names of additional targets, values are their types.
|
|
197
|
+
For example: {'bbox2': 'bboxes'} will handle 'bbox2' as another bounding box target.
|
|
198
|
+
Default: None.
|
|
199
|
+
|
|
200
|
+
Note:
|
|
201
|
+
The processing order for bounding boxes is:
|
|
202
|
+
1. Convert to albumentations format (normalized pascal_voc)
|
|
203
|
+
2. Clip boxes to image boundaries (if params.clip=True)
|
|
204
|
+
3. Filter invalid boxes (if params.filter_invalid_bboxes=True)
|
|
205
|
+
4. Apply transformations
|
|
206
|
+
5. Filter boxes based on min_area, min_visibility, min_width, min_height
|
|
207
|
+
6. Convert back to the original format
|
|
208
|
+
|
|
209
|
+
Examples:
|
|
210
|
+
>>> import albumentations as A
|
|
211
|
+
>>> # Process COCO format bboxes with class labels
|
|
212
|
+
>>> params = A.BboxParams(
|
|
213
|
+
... format='coco',
|
|
214
|
+
... label_fields=['class_labels'],
|
|
215
|
+
... min_area=1024,
|
|
216
|
+
... min_visibility=0.1
|
|
217
|
+
... )
|
|
218
|
+
>>> processor = BboxProcessor(params)
|
|
219
|
+
>>>
|
|
220
|
+
>>> # Process multiple bbox fields
|
|
221
|
+
>>> params = A.BboxParams('pascal_voc')
|
|
222
|
+
>>> processor = BboxProcessor(
|
|
223
|
+
... params,
|
|
224
|
+
... additional_targets={'bbox2': 'bboxes'}
|
|
225
|
+
... )
|
|
226
|
+
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
def __init__(self, params: BboxParams, additional_targets: dict[str, str] | None = None):
|
|
230
|
+
super().__init__(params, additional_targets)
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def default_data_name(self) -> str:
|
|
234
|
+
"""Returns the default key for bounding box data in transformations.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
str: The string 'bboxes'.
|
|
238
|
+
|
|
239
|
+
"""
|
|
240
|
+
return "bboxes"
|
|
241
|
+
|
|
242
|
+
def ensure_data_valid(self, data: dict[str, Any]) -> None:
|
|
243
|
+
"""Validates the input bounding box data.
|
|
244
|
+
|
|
245
|
+
Checks that:
|
|
246
|
+
- Bounding boxes have labels (either in the bbox array or in label_fields)
|
|
247
|
+
- All specified label_fields exist in the data
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
data (dict[str, Any]): Dict with bounding boxes and optional label fields.
|
|
251
|
+
|
|
252
|
+
Raises:
|
|
253
|
+
ValueError: If bounding boxes don't have labels or if label_fields are invalid.
|
|
254
|
+
|
|
255
|
+
"""
|
|
256
|
+
if self.params.label_fields and not all(i in data for i in self.params.label_fields):
|
|
257
|
+
msg = "Your 'label_fields' are not valid - them must have same names as params in dict"
|
|
258
|
+
raise ValueError(msg)
|
|
259
|
+
|
|
260
|
+
def filter(self, data: np.ndarray, shape: ShapeType) -> np.ndarray:
|
|
261
|
+
"""Filter bounding boxes based on size and visibility criteria.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
data (np.ndarray): Array of bounding boxes in Albumentations format.
|
|
265
|
+
shape (ShapeType): Shape information for validation.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
np.ndarray: Filtered bounding boxes that meet the criteria.
|
|
269
|
+
|
|
270
|
+
"""
|
|
271
|
+
self.params: BboxParams
|
|
272
|
+
return filter_bboxes(
|
|
273
|
+
data,
|
|
274
|
+
shape,
|
|
275
|
+
min_area=self.params.min_area,
|
|
276
|
+
min_visibility=self.params.min_visibility,
|
|
277
|
+
min_width=self.params.min_width,
|
|
278
|
+
min_height=self.params.min_height,
|
|
279
|
+
max_accept_ratio=self.params.max_accept_ratio,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
def check_and_convert(
|
|
283
|
+
self,
|
|
284
|
+
data: np.ndarray,
|
|
285
|
+
shape: ShapeType,
|
|
286
|
+
direction: Literal["to", "from"] = "to",
|
|
287
|
+
) -> np.ndarray:
|
|
288
|
+
"""Converts bounding boxes between formats and applies preprocessing/postprocessing.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
data (np.ndarray): Array of bounding boxes to process.
|
|
292
|
+
shape (ShapeType): Image shape as dict with height and width keys.
|
|
293
|
+
direction (Literal["to", "from"]): Direction of conversion:
|
|
294
|
+
- "to": Convert from original format to albumentations format
|
|
295
|
+
- "from": Convert from albumentations format to original format
|
|
296
|
+
Default: "to".
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
np.ndarray: Processed bounding boxes.
|
|
300
|
+
|
|
301
|
+
Note:
|
|
302
|
+
When direction="to":
|
|
303
|
+
1. Converts to albumentations format
|
|
304
|
+
2. Clips boxes if params.clip=True
|
|
305
|
+
3. Filters invalid boxes if params.filter_invalid_bboxes=True
|
|
306
|
+
4. Validates remaining boxes
|
|
307
|
+
|
|
308
|
+
When direction="from":
|
|
309
|
+
1. Validates boxes
|
|
310
|
+
2. Converts back to original format
|
|
311
|
+
|
|
312
|
+
"""
|
|
313
|
+
if direction == "to":
|
|
314
|
+
# First convert to albumentations format
|
|
315
|
+
if self.params.format == "albumentations":
|
|
316
|
+
converted_data = data
|
|
317
|
+
else:
|
|
318
|
+
converted_data = convert_bboxes_to_albumentations(
|
|
319
|
+
data,
|
|
320
|
+
self.params.format,
|
|
321
|
+
shape,
|
|
322
|
+
check_validity=False, # Don't check validity yet
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if self.params.clip and converted_data.size > 0:
|
|
326
|
+
converted_data[:, :4] = np.clip(converted_data[:, :4], 0, 1)
|
|
327
|
+
|
|
328
|
+
# Then filter invalid boxes if requested
|
|
329
|
+
if self.params.filter_invalid_bboxes:
|
|
330
|
+
converted_data = filter_bboxes(
|
|
331
|
+
converted_data,
|
|
332
|
+
shape,
|
|
333
|
+
min_area=0,
|
|
334
|
+
min_visibility=0,
|
|
335
|
+
min_width=0,
|
|
336
|
+
min_height=0,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Finally check the remaining boxes
|
|
340
|
+
self.check(converted_data, shape)
|
|
341
|
+
return converted_data
|
|
342
|
+
self.check(data, shape)
|
|
343
|
+
if self.params.format == "albumentations":
|
|
344
|
+
return data
|
|
345
|
+
return convert_bboxes_from_albumentations(data, self.params.format, shape)
|
|
346
|
+
|
|
347
|
+
def check(self, data: np.ndarray, shape: ShapeType) -> None:
|
|
348
|
+
"""Check if bounding boxes are valid.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
data (np.ndarray): Array of bounding boxes to validate.
|
|
352
|
+
shape (ShapeType): Shape to check against.
|
|
353
|
+
|
|
354
|
+
"""
|
|
355
|
+
check_bboxes(data)
|
|
356
|
+
|
|
357
|
+
def convert_from_albumentations(self, data: np.ndarray, shape: ShapeType) -> np.ndarray:
|
|
358
|
+
"""Convert bounding boxes from internal Albumentations format to the specified format.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
data (np.ndarray): Bounding boxes in Albumentations format.
|
|
362
|
+
shape (ShapeType): Shape information for validation.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
np.ndarray: Converted bounding boxes in the target format.
|
|
366
|
+
|
|
367
|
+
"""
|
|
368
|
+
return np.array(
|
|
369
|
+
convert_bboxes_from_albumentations(data, self.params.format, shape, check_validity=True),
|
|
370
|
+
dtype=data.dtype,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
def convert_to_albumentations(self, data: np.ndarray, shape: ShapeType) -> np.ndarray:
|
|
374
|
+
"""Convert bounding boxes from the specified format to internal Albumentations format.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
data (np.ndarray): Bounding boxes in source format.
|
|
378
|
+
shape (ShapeType): Shape information for validation.
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
np.ndarray: Converted bounding boxes in Albumentations format.
|
|
382
|
+
|
|
383
|
+
"""
|
|
384
|
+
if self.params.clip:
|
|
385
|
+
data_np = convert_bboxes_to_albumentations(data, self.params.format, shape, check_validity=False)
|
|
386
|
+
data_np = filter_bboxes(data_np, shape, min_area=0, min_visibility=0, min_width=0, min_height=0)
|
|
387
|
+
check_bboxes(data_np)
|
|
388
|
+
return data_np
|
|
389
|
+
|
|
390
|
+
return convert_bboxes_to_albumentations(data, self.params.format, shape, check_validity=True)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
@handle_empty_array("bboxes")
|
|
394
|
+
def normalize_bboxes(bboxes: np.ndarray, shape: ShapeType | tuple[int, int]) -> np.ndarray:
|
|
395
|
+
"""Normalize array of bounding boxes.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
bboxes (np.ndarray): Denormalized bounding boxes `[(x_min, y_min, x_max, y_max, ...)]`.
|
|
399
|
+
shape (ShapeType | tuple[int, int]): Image shape `(height, width)`.
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
np.ndarray: Normalized bounding boxes `[(x_min, y_min, x_max, y_max, ...)]`.
|
|
403
|
+
|
|
404
|
+
"""
|
|
405
|
+
if isinstance(shape, tuple):
|
|
406
|
+
rows, cols = shape[:2]
|
|
407
|
+
else:
|
|
408
|
+
rows, cols = shape["height"], shape["width"]
|
|
409
|
+
|
|
410
|
+
normalized = bboxes.copy().astype(float)
|
|
411
|
+
normalized[:, [0, 2]] /= cols
|
|
412
|
+
normalized[:, [1, 3]] /= rows
|
|
413
|
+
return normalized
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
@handle_empty_array("bboxes")
|
|
417
|
+
def denormalize_bboxes(
|
|
418
|
+
bboxes: np.ndarray,
|
|
419
|
+
shape: ShapeType | tuple[int, int],
|
|
420
|
+
) -> np.ndarray:
|
|
421
|
+
"""Denormalize array of bounding boxes.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
bboxes (np.ndarray): Normalized bounding boxes `[(x_min, y_min, x_max, y_max, ...)]`.
|
|
425
|
+
shape (ShapeType | tuple[int, int]): Image shape `(height, width)`.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
np.ndarray: Denormalized bounding boxes `[(x_min, y_min, x_max, y_max, ...)]`.
|
|
429
|
+
|
|
430
|
+
"""
|
|
431
|
+
scale_factors = (shape[1], shape[0]) if isinstance(shape, tuple) else (shape["width"], shape["height"])
|
|
432
|
+
|
|
433
|
+
# Vectorized scaling of bbox coordinates
|
|
434
|
+
return bboxes * np.array([*scale_factors, *scale_factors, *[1] * (bboxes.shape[1] - 4)], dtype=float)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def calculate_bbox_areas_in_pixels(bboxes: np.ndarray, shape: ShapeType) -> np.ndarray:
|
|
438
|
+
"""Calculate areas for multiple bounding boxes.
|
|
439
|
+
This function computes the areas of bounding boxes given their normalized coordinates
|
|
440
|
+
and the dimensions of the image they belong to. The bounding boxes are expected to be
|
|
441
|
+
in the format [x_min, y_min, x_max, y_max] with normalized coordinates (0 to 1).
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
bboxes (np.ndarray): A numpy array of shape (N, 4+) where N is the number of bounding boxes.
|
|
445
|
+
Each row contains [x_min, y_min, x_max, y_max] in normalized coordinates.
|
|
446
|
+
Additional columns beyond the first 4 are ignored.
|
|
447
|
+
shape (ShapeType): A tuple containing the height and width of the image (height, width).
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
np.ndarray: A 1D numpy array of shape (N,) containing the areas of the bounding boxes in pixels.
|
|
451
|
+
Returns an empty array if the input `bboxes` is empty.
|
|
452
|
+
|
|
453
|
+
Note:
|
|
454
|
+
- The function assumes that the input bounding boxes are valid (i.e., x_max > x_min and y_max > y_min).
|
|
455
|
+
Invalid bounding boxes may result in negative areas.
|
|
456
|
+
- The function preserves the input array and creates a copy for internal calculations.
|
|
457
|
+
- The returned areas are in pixel units, not normalized.
|
|
458
|
+
|
|
459
|
+
Examples:
|
|
460
|
+
>>> bboxes = np.array([[0.1, 0.1, 0.5, 0.5], [0.2, 0.2, 0.8, 0.8]])
|
|
461
|
+
>>> image_shape = (100, 100)
|
|
462
|
+
>>> areas = calculate_bbox_areas(bboxes, image_shape)
|
|
463
|
+
>>> print(areas)
|
|
464
|
+
[1600. 3600.]
|
|
465
|
+
|
|
466
|
+
"""
|
|
467
|
+
if len(bboxes) == 0:
|
|
468
|
+
return np.array([], dtype=np.float32)
|
|
469
|
+
|
|
470
|
+
# Unpack shape to variables
|
|
471
|
+
height, width = shape["height"], shape["width"]
|
|
472
|
+
|
|
473
|
+
# Directly compute denormalized bbox dimensions and areas
|
|
474
|
+
widths = (bboxes[:, 2] - bboxes[:, 0]) * width
|
|
475
|
+
heights = (bboxes[:, 3] - bboxes[:, 1]) * height
|
|
476
|
+
|
|
477
|
+
return widths * heights
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
@handle_empty_array("bboxes")
|
|
481
|
+
def convert_bboxes_to_albumentations(
|
|
482
|
+
bboxes: np.ndarray,
|
|
483
|
+
source_format: Literal["coco", "pascal_voc", "yolo"],
|
|
484
|
+
shape: ShapeType,
|
|
485
|
+
check_validity: bool = False,
|
|
486
|
+
) -> np.ndarray:
|
|
487
|
+
"""Convert bounding boxes from a specified format to the format used by albumentations:
|
|
488
|
+
normalized coordinates of top-left and bottom-right corners of the bounding box in the form of
|
|
489
|
+
`(x_min, y_min, x_max, y_max)` e.g. `(0.15, 0.27, 0.67, 0.5)`.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
bboxes (np.ndarray): A numpy array of bounding boxes with shape (num_bboxes, 4+).
|
|
493
|
+
source_format (Literal["coco", "pascal_voc", "yolo"]): Format of the input bounding boxes.
|
|
494
|
+
shape (ShapeType): Image shape (height, width).
|
|
495
|
+
check_validity (bool): Check if all boxes are valid boxes.
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
np.ndarray: An array of bounding boxes in albumentations format with shape (num_bboxes, 4+).
|
|
499
|
+
|
|
500
|
+
Raises:
|
|
501
|
+
ValueError: If `source_format` is not 'coco', 'pascal_voc', or 'yolo'.
|
|
502
|
+
ValueError: If in YOLO format, any coordinates are not in the range (0, 1].
|
|
503
|
+
|
|
504
|
+
"""
|
|
505
|
+
if source_format not in {"coco", "pascal_voc", "yolo"}:
|
|
506
|
+
raise ValueError(
|
|
507
|
+
f"Unknown source_format {source_format}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'",
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
bboxes = bboxes.copy().astype(np.float32)
|
|
511
|
+
converted_bboxes = np.zeros_like(bboxes)
|
|
512
|
+
converted_bboxes[:, 4:] = bboxes[:, 4:] # Preserve additional columns
|
|
513
|
+
|
|
514
|
+
if source_format == "coco":
|
|
515
|
+
converted_bboxes[:, 0] = bboxes[:, 0] # x_min
|
|
516
|
+
converted_bboxes[:, 1] = bboxes[:, 1] # y_min
|
|
517
|
+
converted_bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2] # x_max
|
|
518
|
+
converted_bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3] # y_max
|
|
519
|
+
elif source_format == "yolo":
|
|
520
|
+
if check_validity and np.any((bboxes[:, :4] <= 0) | (bboxes[:, :4] > 1)):
|
|
521
|
+
raise ValueError(f"In YOLO format all coordinates must be float and in range (0, 1], got {bboxes}")
|
|
522
|
+
|
|
523
|
+
w_half, h_half = bboxes[:, 2] / 2, bboxes[:, 3] / 2
|
|
524
|
+
converted_bboxes[:, 0] = bboxes[:, 0] - w_half # x_min
|
|
525
|
+
converted_bboxes[:, 1] = bboxes[:, 1] - h_half # y_min
|
|
526
|
+
converted_bboxes[:, 2] = bboxes[:, 0] + w_half # x_max
|
|
527
|
+
converted_bboxes[:, 3] = bboxes[:, 1] + h_half # y_max
|
|
528
|
+
else: # pascal_voc
|
|
529
|
+
converted_bboxes[:, :4] = bboxes[:, :4]
|
|
530
|
+
|
|
531
|
+
if source_format != "yolo":
|
|
532
|
+
converted_bboxes[:, :4] = normalize_bboxes(converted_bboxes[:, :4], shape)
|
|
533
|
+
|
|
534
|
+
if check_validity:
|
|
535
|
+
check_bboxes(converted_bboxes)
|
|
536
|
+
|
|
537
|
+
return converted_bboxes
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
@handle_empty_array("bboxes")
|
|
541
|
+
def convert_bboxes_from_albumentations(
|
|
542
|
+
bboxes: np.ndarray,
|
|
543
|
+
target_format: Literal["coco", "pascal_voc", "yolo"],
|
|
544
|
+
shape: ShapeType,
|
|
545
|
+
check_validity: bool = False,
|
|
546
|
+
) -> np.ndarray:
|
|
547
|
+
"""Convert bounding boxes from the format used by albumentations to a specified format.
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
bboxes (np.ndarray): A numpy array of albumentations bounding boxes with shape (num_bboxes, 4+).
|
|
551
|
+
The first 4 columns are [x_min, y_min, x_max, y_max].
|
|
552
|
+
target_format (Literal["coco", "pascal_voc", "yolo"]): Required format of the output bounding boxes.
|
|
553
|
+
shape (ShapeType): Image shape (height, width).
|
|
554
|
+
check_validity (bool): Check if all boxes are valid boxes.
|
|
555
|
+
|
|
556
|
+
Returns:
|
|
557
|
+
np.ndarray: An array of bounding boxes in the target format with shape (num_bboxes, 4+).
|
|
558
|
+
|
|
559
|
+
Raises:
|
|
560
|
+
ValueError: If `target_format` is not 'coco', 'pascal_voc' or 'yolo'.
|
|
561
|
+
|
|
562
|
+
"""
|
|
563
|
+
if target_format not in {"coco", "pascal_voc", "yolo"}:
|
|
564
|
+
raise ValueError(
|
|
565
|
+
f"Unknown target_format {target_format}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'",
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
if check_validity:
|
|
569
|
+
check_bboxes(bboxes)
|
|
570
|
+
|
|
571
|
+
converted_bboxes = np.zeros_like(bboxes)
|
|
572
|
+
converted_bboxes[:, 4:] = bboxes[:, 4:] # Preserve additional columns
|
|
573
|
+
|
|
574
|
+
denormalized_bboxes = denormalize_bboxes(bboxes[:, :4], shape) if target_format != "yolo" else bboxes[:, :4]
|
|
575
|
+
|
|
576
|
+
if target_format == "coco":
|
|
577
|
+
converted_bboxes[:, 0] = denormalized_bboxes[:, 0] # x_min
|
|
578
|
+
converted_bboxes[:, 1] = denormalized_bboxes[:, 1] # y_min
|
|
579
|
+
converted_bboxes[:, 2] = denormalized_bboxes[:, 2] - denormalized_bboxes[:, 0] # width
|
|
580
|
+
converted_bboxes[:, 3] = denormalized_bboxes[:, 3] - denormalized_bboxes[:, 1] # height
|
|
581
|
+
elif target_format == "yolo":
|
|
582
|
+
converted_bboxes[:, 0] = (denormalized_bboxes[:, 0] + denormalized_bboxes[:, 2]) / 2 # x_center
|
|
583
|
+
converted_bboxes[:, 1] = (denormalized_bboxes[:, 1] + denormalized_bboxes[:, 3]) / 2 # y_center
|
|
584
|
+
converted_bboxes[:, 2] = denormalized_bboxes[:, 2] - denormalized_bboxes[:, 0] # width
|
|
585
|
+
converted_bboxes[:, 3] = denormalized_bboxes[:, 3] - denormalized_bboxes[:, 1] # height
|
|
586
|
+
else: # pascal_voc
|
|
587
|
+
converted_bboxes[:, :4] = denormalized_bboxes
|
|
588
|
+
|
|
589
|
+
return converted_bboxes
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
@handle_empty_array("bboxes")
|
|
593
|
+
def check_bboxes(bboxes: np.ndarray) -> None:
|
|
594
|
+
"""Check if bounding boxes are valid.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
bboxes (np.ndarray): A numpy array of bounding boxes with shape (num_bboxes, 4+).
|
|
598
|
+
|
|
599
|
+
Raises:
|
|
600
|
+
ValueError: If any bounding box is invalid.
|
|
601
|
+
|
|
602
|
+
"""
|
|
603
|
+
# Check if all values are in range [0, 1]
|
|
604
|
+
in_range = (bboxes[:, :4] >= 0) & (bboxes[:, :4] <= 1)
|
|
605
|
+
close_to_zero = np.isclose(bboxes[:, :4], 0)
|
|
606
|
+
close_to_one = np.isclose(bboxes[:, :4], 1)
|
|
607
|
+
valid_range = in_range | close_to_zero | close_to_one
|
|
608
|
+
|
|
609
|
+
if not np.all(valid_range):
|
|
610
|
+
invalid_idx = np.where(~np.all(valid_range, axis=1))[0][0]
|
|
611
|
+
invalid_bbox = bboxes[invalid_idx]
|
|
612
|
+
invalid_coord = ["x_min", "y_min", "x_max", "y_max"][np.where(~valid_range[invalid_idx])[0][0]]
|
|
613
|
+
invalid_value = invalid_bbox[np.where(~valid_range[invalid_idx])[0][0]]
|
|
614
|
+
raise ValueError(
|
|
615
|
+
f"Expected {invalid_coord} for bbox {invalid_bbox} to be in the range [0.0, 1.0], got {invalid_value}.",
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
# Check if x_max > x_min and y_max > y_min
|
|
619
|
+
valid_order = (bboxes[:, 2] > bboxes[:, 0]) & (bboxes[:, 3] > bboxes[:, 1])
|
|
620
|
+
|
|
621
|
+
if not np.all(valid_order):
|
|
622
|
+
invalid_idx = np.where(~valid_order)[0][0]
|
|
623
|
+
invalid_bbox = bboxes[invalid_idx]
|
|
624
|
+
if invalid_bbox[2] <= invalid_bbox[0]:
|
|
625
|
+
raise ValueError(f"x_max is less than or equal to x_min for bbox {invalid_bbox}.")
|
|
626
|
+
|
|
627
|
+
raise ValueError(f"y_max is less than or equal to y_min for bbox {invalid_bbox}.")
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
@handle_empty_array("bboxes")
|
|
631
|
+
def clip_bboxes(bboxes: np.ndarray, shape: ShapeType) -> np.ndarray:
|
|
632
|
+
"""Clip bounding boxes to the image shape.
|
|
633
|
+
|
|
634
|
+
Args:
|
|
635
|
+
bboxes (np.ndarray): A numpy array of bounding boxes with shape (num_bboxes, 4+).
|
|
636
|
+
shape (ShapeType): The shape of the image/volume:
|
|
637
|
+
- For 2D: {'height': int, 'width': int}
|
|
638
|
+
- For 3D: {'height': int, 'width': int, 'depth': int}
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
np.ndarray: A numpy array of bounding boxes with shape (num_bboxes, 4+).
|
|
642
|
+
|
|
643
|
+
"""
|
|
644
|
+
height, width = shape["height"], shape["width"]
|
|
645
|
+
|
|
646
|
+
# Denormalize bboxes
|
|
647
|
+
denorm_bboxes = denormalize_bboxes(bboxes, shape)
|
|
648
|
+
|
|
649
|
+
## Note:
|
|
650
|
+
# It could be tempting to use cols - 1 and rows - 1 as the upper bounds for the clipping
|
|
651
|
+
|
|
652
|
+
# But this would cause the bounding box to be clipped to the image dimensions - 1 which is not what we want.
|
|
653
|
+
# Bounding box lives not in the middle of pixels but between them.
|
|
654
|
+
|
|
655
|
+
# Examples: for image with height 100, width 100, the pixel values are in the range [0, 99]
|
|
656
|
+
# but if we want bounding box to be 1 pixel width and height and lie on the boundary of the image
|
|
657
|
+
# it will be described as [99, 99, 100, 100] => clip by image_size - 1 will lead to [99, 99, 99, 99]
|
|
658
|
+
# which is incorrect
|
|
659
|
+
|
|
660
|
+
# It could be also tempting to clip `x_min`` to `cols - 1`` and `y_min` to `rows - 1`, but this also leads
|
|
661
|
+
# to another error. If image fully lies outside of the visible area and min_area is set to 0, then
|
|
662
|
+
# the bounding box will be clipped to the image size - 1 and will be 1 pixel in size and fully visible,
|
|
663
|
+
# but it should be completely removed.
|
|
664
|
+
|
|
665
|
+
# Clip coordinates
|
|
666
|
+
denorm_bboxes[:, [0, 2]] = np.clip(denorm_bboxes[:, [0, 2]], 0, width, out=denorm_bboxes[:, [0, 2]])
|
|
667
|
+
denorm_bboxes[:, [1, 3]] = np.clip(denorm_bboxes[:, [1, 3]], 0, height, out=denorm_bboxes[:, [1, 3]])
|
|
668
|
+
|
|
669
|
+
# Normalize clipped bboxes
|
|
670
|
+
return normalize_bboxes(denorm_bboxes, shape)
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def filter_bboxes(
|
|
674
|
+
bboxes: np.ndarray,
|
|
675
|
+
shape: ShapeType,
|
|
676
|
+
min_area: float = 0.0,
|
|
677
|
+
min_visibility: float = 0.0,
|
|
678
|
+
min_width: float = 1.0,
|
|
679
|
+
min_height: float = 1.0,
|
|
680
|
+
max_accept_ratio: float | None = None,
|
|
681
|
+
) -> np.ndarray:
|
|
682
|
+
"""Remove bounding boxes that either lie outside of the visible area by more than min_visibility
|
|
683
|
+
or whose area in pixels is under the threshold set by `min_area`. Also crops boxes to final image size.
|
|
684
|
+
|
|
685
|
+
Args:
|
|
686
|
+
bboxes (np.ndarray): A numpy array of bounding boxes with shape (num_bboxes, 4+).
|
|
687
|
+
shape (ShapeType): The shape of the image/volume:
|
|
688
|
+
- For 2D: {'height': int, 'width': int}
|
|
689
|
+
- For 3D: {'height': int, 'width': int, 'depth': int}
|
|
690
|
+
min_area (float): Minimum area of a bounding box in pixels. Default: 0.0.
|
|
691
|
+
min_visibility (float): Minimum fraction of area for a bounding box to remain. Default: 0.0.
|
|
692
|
+
min_width (float): Minimum width of a bounding box in pixels. Default: 0.0.
|
|
693
|
+
min_height (float): Minimum height of a bounding box in pixels. Default: 0.0.
|
|
694
|
+
max_accept_ratio (float | None): Maximum allowed aspect ratio, calculated as max(width/height, height/width).
|
|
695
|
+
Boxes with higher ratios will be filtered out. Default: None.
|
|
696
|
+
|
|
697
|
+
Returns:
|
|
698
|
+
np.ndarray: Filtered bounding boxes.
|
|
699
|
+
|
|
700
|
+
"""
|
|
701
|
+
epsilon = 1e-7
|
|
702
|
+
|
|
703
|
+
if len(bboxes) == 0:
|
|
704
|
+
return np.array([], dtype=np.float32).reshape(0, 4)
|
|
705
|
+
|
|
706
|
+
# Calculate areas of bounding boxes before clipping in pixels
|
|
707
|
+
denormalized_box_areas = calculate_bbox_areas_in_pixels(bboxes, shape)
|
|
708
|
+
|
|
709
|
+
# Clip bounding boxes in ratio
|
|
710
|
+
clipped_bboxes = clip_bboxes(bboxes, shape)
|
|
711
|
+
|
|
712
|
+
# Calculate areas of clipped bounding boxes in pixels
|
|
713
|
+
clipped_box_areas = calculate_bbox_areas_in_pixels(clipped_bboxes, shape)
|
|
714
|
+
|
|
715
|
+
# Calculate width and height of the clipped bounding boxes
|
|
716
|
+
denormalized_bboxes = denormalize_bboxes(clipped_bboxes[:, :4], shape)
|
|
717
|
+
|
|
718
|
+
clipped_widths = denormalized_bboxes[:, 2] - denormalized_bboxes[:, 0]
|
|
719
|
+
clipped_heights = denormalized_bboxes[:, 3] - denormalized_bboxes[:, 1]
|
|
720
|
+
|
|
721
|
+
# Calculate aspect ratios if needed
|
|
722
|
+
if max_accept_ratio is not None:
|
|
723
|
+
aspect_ratios = np.maximum(
|
|
724
|
+
clipped_widths / (clipped_heights + epsilon),
|
|
725
|
+
clipped_heights / (clipped_widths + epsilon),
|
|
726
|
+
)
|
|
727
|
+
valid_ratios = aspect_ratios <= max_accept_ratio
|
|
728
|
+
else:
|
|
729
|
+
valid_ratios = np.ones_like(denormalized_box_areas, dtype=bool)
|
|
730
|
+
|
|
731
|
+
# Create a mask for bboxes that meet all criteria
|
|
732
|
+
mask = (
|
|
733
|
+
(denormalized_box_areas >= epsilon)
|
|
734
|
+
& (clipped_box_areas >= min_area - epsilon)
|
|
735
|
+
& (clipped_box_areas / (denormalized_box_areas + epsilon) >= min_visibility)
|
|
736
|
+
& (clipped_widths >= min_width - epsilon)
|
|
737
|
+
& (clipped_heights >= min_height - epsilon)
|
|
738
|
+
& valid_ratios
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
# Apply the mask to get the filtered bboxes
|
|
742
|
+
filtered_bboxes = clipped_bboxes[mask]
|
|
743
|
+
|
|
744
|
+
return np.array([], dtype=np.float32).reshape(0, 4) if len(filtered_bboxes) == 0 else filtered_bboxes
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def union_of_bboxes(bboxes: np.ndarray, erosion_rate: float) -> np.ndarray | None:
|
|
748
|
+
"""Calculate union of bounding boxes. Boxes could be in albumentations or Pascal Voc format.
|
|
749
|
+
|
|
750
|
+
Args:
|
|
751
|
+
bboxes (np.ndarray): List of bounding boxes
|
|
752
|
+
erosion_rate (float): How much each bounding box can be shrunk, useful for erosive cropping.
|
|
753
|
+
Set this in range [0, 1]. 0 will not be erosive at all, 1.0 can make any bbox lose its volume.
|
|
754
|
+
|
|
755
|
+
Returns:
|
|
756
|
+
np.ndarray | None: A bounding box `(x_min, y_min, x_max, y_max)` or None if no bboxes are given or if
|
|
757
|
+
the bounding boxes become invalid after erosion.
|
|
758
|
+
|
|
759
|
+
"""
|
|
760
|
+
if not bboxes.size:
|
|
761
|
+
return None
|
|
762
|
+
|
|
763
|
+
if erosion_rate == 1:
|
|
764
|
+
return None
|
|
765
|
+
|
|
766
|
+
if bboxes.shape[0] == 1:
|
|
767
|
+
return bboxes[0][:4]
|
|
768
|
+
|
|
769
|
+
epsilon = 1e-6
|
|
770
|
+
|
|
771
|
+
x_min, y_min = np.min(bboxes[:, :2], axis=0)
|
|
772
|
+
x_max, y_max = np.max(bboxes[:, 2:4], axis=0)
|
|
773
|
+
|
|
774
|
+
width = x_max - x_min
|
|
775
|
+
height = y_max - y_min
|
|
776
|
+
|
|
777
|
+
erosion_x = width * erosion_rate * 0.5
|
|
778
|
+
erosion_y = height * erosion_rate * 0.5
|
|
779
|
+
|
|
780
|
+
x_min += erosion_x
|
|
781
|
+
y_min += erosion_y
|
|
782
|
+
x_max -= erosion_x
|
|
783
|
+
y_max -= erosion_y
|
|
784
|
+
|
|
785
|
+
if abs(x_max - x_min) < epsilon or abs(y_max - y_min) < epsilon:
|
|
786
|
+
return None
|
|
787
|
+
|
|
788
|
+
return np.array([x_min, y_min, x_max, y_max], dtype=np.float32)
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
def bboxes_from_masks(masks: np.ndarray) -> np.ndarray:
|
|
792
|
+
"""Create bounding boxes from binary masks (fast version)
|
|
793
|
+
|
|
794
|
+
Args:
|
|
795
|
+
masks (np.ndarray): Binary masks of shape (H, W) or (N, H, W) where N is the number of masks,
|
|
796
|
+
and H, W are the height and width of each mask.
|
|
797
|
+
|
|
798
|
+
Returns:
|
|
799
|
+
np.ndarray: An array of bounding boxes with shape (N, 4), where each row is
|
|
800
|
+
(x_min, y_min, x_max, y_max).
|
|
801
|
+
|
|
802
|
+
"""
|
|
803
|
+
# Handle single mask case by adding batch dimension
|
|
804
|
+
if len(masks.shape) == MONO_CHANNEL_DIMENSIONS:
|
|
805
|
+
masks = masks[np.newaxis, ...]
|
|
806
|
+
|
|
807
|
+
rows = np.any(masks, axis=2)
|
|
808
|
+
cols = np.any(masks, axis=1)
|
|
809
|
+
|
|
810
|
+
bboxes = np.zeros((masks.shape[0], 4), dtype=np.int32)
|
|
811
|
+
|
|
812
|
+
for i, (row, col) in enumerate(zip(rows, cols)):
|
|
813
|
+
if not np.any(row) or not np.any(col):
|
|
814
|
+
bboxes[i] = [-1, -1, -1, -1]
|
|
815
|
+
else:
|
|
816
|
+
y_min, y_max = np.where(row)[0][[0, -1]]
|
|
817
|
+
x_min, x_max = np.where(col)[0][[0, -1]]
|
|
818
|
+
bboxes[i] = [x_min, y_min, x_max + 1, y_max + 1]
|
|
819
|
+
|
|
820
|
+
return bboxes
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def masks_from_bboxes(bboxes: np.ndarray, shape: ShapeType | tuple[int, int]) -> np.ndarray:
|
|
824
|
+
"""Convert bounding boxes to masks.
|
|
825
|
+
|
|
826
|
+
Args:
|
|
827
|
+
bboxes (np.ndarray): A numpy array of bounding boxes with shape (num_bboxes, 4+).
|
|
828
|
+
shape (ShapeType | tuple[int, int]): Image shape (height, width).
|
|
829
|
+
|
|
830
|
+
Returns:
|
|
831
|
+
np.ndarray: A numpy array of masks with shape (num_bboxes, height, width).
|
|
832
|
+
|
|
833
|
+
"""
|
|
834
|
+
if isinstance(shape, dict):
|
|
835
|
+
height, width = shape["height"], shape["width"]
|
|
836
|
+
else:
|
|
837
|
+
height, width = shape[:2]
|
|
838
|
+
|
|
839
|
+
masks = np.zeros((len(bboxes), height, width), dtype=np.uint8)
|
|
840
|
+
y, x = np.ogrid[:height, :width]
|
|
841
|
+
|
|
842
|
+
for i, (x_min, y_min, x_max, y_max) in enumerate(bboxes[:, :4].astype(int)):
|
|
843
|
+
masks[i] = (x_min <= x) & (x < x_max) & (y_min <= y) & (y < y_max)
|
|
844
|
+
|
|
845
|
+
return masks
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
def bboxes_to_mask(
|
|
849
|
+
bboxes: np.ndarray,
|
|
850
|
+
image_shape: tuple[int, int],
|
|
851
|
+
) -> np.ndarray:
|
|
852
|
+
"""Convert bounding boxes to a single mask.
|
|
853
|
+
|
|
854
|
+
Args:
|
|
855
|
+
bboxes (np.ndarray): A numpy array of bounding boxes with shape (num_bboxes, 4+).
|
|
856
|
+
image_shape (tuple[int, int]): Image shape (height, width).
|
|
857
|
+
|
|
858
|
+
Returns:
|
|
859
|
+
np.ndarray: A numpy array of shape (height, width) with 1s where any bounding box is present.
|
|
860
|
+
|
|
861
|
+
"""
|
|
862
|
+
height, width = image_shape[:2]
|
|
863
|
+
num_boxes = len(bboxes)
|
|
864
|
+
|
|
865
|
+
# Create multi-channel mask where each channel represents one bbox
|
|
866
|
+
bbox_masks = np.zeros((height, width, num_boxes), dtype=np.uint8)
|
|
867
|
+
|
|
868
|
+
# Fill each bbox in its channel
|
|
869
|
+
for idx, box in enumerate(bboxes):
|
|
870
|
+
x_min, y_min, x_max, y_max = map(round, box[:4])
|
|
871
|
+
x_min = max(0, min(width - 1, x_min))
|
|
872
|
+
x_max = max(0, min(width - 1, x_max))
|
|
873
|
+
y_min = max(0, min(height - 1, y_min))
|
|
874
|
+
y_max = max(0, min(height - 1, y_max))
|
|
875
|
+
bbox_masks[y_min : y_max + 1, x_min : x_max + 1, idx] = 1
|
|
876
|
+
|
|
877
|
+
return bbox_masks
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
def mask_to_bboxes(
|
|
881
|
+
masks: np.ndarray,
|
|
882
|
+
original_bboxes: np.ndarray,
|
|
883
|
+
) -> np.ndarray:
|
|
884
|
+
"""Convert masks back to bounding boxes.
|
|
885
|
+
|
|
886
|
+
Args:
|
|
887
|
+
masks (np.ndarray): A numpy array of masks with shape (num_masks, height, width).
|
|
888
|
+
original_bboxes (np.ndarray): Original bounding boxes with shape (num_bboxes, 4+).
|
|
889
|
+
|
|
890
|
+
Returns:
|
|
891
|
+
np.ndarray: A numpy array of bounding boxes with shape (num_masks, 4+).
|
|
892
|
+
|
|
893
|
+
"""
|
|
894
|
+
num_boxes = masks.shape[-1]
|
|
895
|
+
new_bboxes = []
|
|
896
|
+
|
|
897
|
+
num_boxes = masks.shape[-1]
|
|
898
|
+
|
|
899
|
+
if num_boxes == 0:
|
|
900
|
+
# Return empty array with correct shape
|
|
901
|
+
return np.zeros((0, original_bboxes.shape[1]), dtype=original_bboxes.dtype)
|
|
902
|
+
|
|
903
|
+
for idx in range(num_boxes):
|
|
904
|
+
mask = masks[..., idx]
|
|
905
|
+
if np.any(mask):
|
|
906
|
+
y_coords, x_coords = np.where(mask)
|
|
907
|
+
x_min, x_max = x_coords.min(), x_coords.max()
|
|
908
|
+
y_min, y_max = y_coords.min(), y_coords.max()
|
|
909
|
+
new_bboxes.append([x_min, y_min, x_max, y_max])
|
|
910
|
+
else:
|
|
911
|
+
# If bbox disappeared, use original coords
|
|
912
|
+
new_bboxes.append(original_bboxes[idx, :4])
|
|
913
|
+
|
|
914
|
+
new_bboxes = np.array(new_bboxes)
|
|
915
|
+
|
|
916
|
+
return (
|
|
917
|
+
np.column_stack([new_bboxes, original_bboxes[:, 4:]])
|
|
918
|
+
if original_bboxes.shape[1] > NUM_BBOXES_COLUMNS_IN_ALBUMENTATIONS
|
|
919
|
+
else new_bboxes
|
|
920
|
+
)
|