nrtk-albumentations 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nrtk-albumentations might be problematic. Click here for more details.
- albumentations/__init__.py +21 -0
- albumentations/augmentations/__init__.py +23 -0
- albumentations/augmentations/blur/__init__.py +0 -0
- albumentations/augmentations/blur/functional.py +438 -0
- albumentations/augmentations/blur/transforms.py +1633 -0
- albumentations/augmentations/crops/__init__.py +0 -0
- albumentations/augmentations/crops/functional.py +494 -0
- albumentations/augmentations/crops/transforms.py +3647 -0
- albumentations/augmentations/dropout/__init__.py +0 -0
- albumentations/augmentations/dropout/channel_dropout.py +134 -0
- albumentations/augmentations/dropout/coarse_dropout.py +567 -0
- albumentations/augmentations/dropout/functional.py +1017 -0
- albumentations/augmentations/dropout/grid_dropout.py +166 -0
- albumentations/augmentations/dropout/mask_dropout.py +274 -0
- albumentations/augmentations/dropout/transforms.py +461 -0
- albumentations/augmentations/dropout/xy_masking.py +186 -0
- albumentations/augmentations/geometric/__init__.py +0 -0
- albumentations/augmentations/geometric/distortion.py +1238 -0
- albumentations/augmentations/geometric/flip.py +752 -0
- albumentations/augmentations/geometric/functional.py +4151 -0
- albumentations/augmentations/geometric/pad.py +676 -0
- albumentations/augmentations/geometric/resize.py +956 -0
- albumentations/augmentations/geometric/rotate.py +864 -0
- albumentations/augmentations/geometric/transforms.py +1962 -0
- albumentations/augmentations/mixing/__init__.py +0 -0
- albumentations/augmentations/mixing/domain_adaptation.py +787 -0
- albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
- albumentations/augmentations/mixing/functional.py +878 -0
- albumentations/augmentations/mixing/transforms.py +832 -0
- albumentations/augmentations/other/__init__.py +0 -0
- albumentations/augmentations/other/lambda_transform.py +180 -0
- albumentations/augmentations/other/type_transform.py +261 -0
- albumentations/augmentations/pixel/__init__.py +0 -0
- albumentations/augmentations/pixel/functional.py +4226 -0
- albumentations/augmentations/pixel/transforms.py +7556 -0
- albumentations/augmentations/spectrogram/__init__.py +0 -0
- albumentations/augmentations/spectrogram/transform.py +220 -0
- albumentations/augmentations/text/__init__.py +0 -0
- albumentations/augmentations/text/functional.py +272 -0
- albumentations/augmentations/text/transforms.py +299 -0
- albumentations/augmentations/transforms3d/__init__.py +0 -0
- albumentations/augmentations/transforms3d/functional.py +393 -0
- albumentations/augmentations/transforms3d/transforms.py +1422 -0
- albumentations/augmentations/utils.py +249 -0
- albumentations/core/__init__.py +0 -0
- albumentations/core/bbox_utils.py +920 -0
- albumentations/core/composition.py +1885 -0
- albumentations/core/hub_mixin.py +299 -0
- albumentations/core/keypoints_utils.py +521 -0
- albumentations/core/label_manager.py +339 -0
- albumentations/core/pydantic.py +239 -0
- albumentations/core/serialization.py +352 -0
- albumentations/core/transforms_interface.py +976 -0
- albumentations/core/type_definitions.py +127 -0
- albumentations/core/utils.py +605 -0
- albumentations/core/validation.py +129 -0
- albumentations/pytorch/__init__.py +1 -0
- albumentations/pytorch/transforms.py +189 -0
- nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
- nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
- nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
- nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1017 @@
|
|
|
1
|
+
"""Functional implementations of dropout operations for image augmentation.
|
|
2
|
+
|
|
3
|
+
This module provides low-level functions for various dropout techniques used in image
|
|
4
|
+
augmentation, including channel dropout, grid dropout, mask dropout, and coarse dropout.
|
|
5
|
+
These functions create and apply dropout patterns to images, masks, bounding boxes, and
|
|
6
|
+
keypoints, with support for different filling methods and hole generation strategies.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Literal, cast
|
|
12
|
+
|
|
13
|
+
import cv2
|
|
14
|
+
import numpy as np
|
|
15
|
+
from albucore import (
|
|
16
|
+
MAX_VALUES_BY_DTYPE,
|
|
17
|
+
NUM_MULTI_CHANNEL_DIMENSIONS,
|
|
18
|
+
get_num_channels,
|
|
19
|
+
is_grayscale_image,
|
|
20
|
+
preserve_channel_dim,
|
|
21
|
+
uint8_io,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from albumentations.augmentations.geometric.functional import split_uniform_grid
|
|
25
|
+
from albumentations.augmentations.utils import handle_empty_array
|
|
26
|
+
from albumentations.core.type_definitions import MONO_CHANNEL_DIMENSIONS
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"calculate_grid_dimensions",
|
|
30
|
+
"channel_dropout",
|
|
31
|
+
"cutout",
|
|
32
|
+
"filter_bboxes_by_holes",
|
|
33
|
+
"filter_keypoints_in_holes",
|
|
34
|
+
"generate_grid_holes",
|
|
35
|
+
"generate_random_fill",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@preserve_channel_dim
|
|
40
|
+
def channel_dropout(
|
|
41
|
+
img: np.ndarray,
|
|
42
|
+
channels_to_drop: int | tuple[int, ...] | np.ndarray,
|
|
43
|
+
fill: tuple[float, ...] | float = 0,
|
|
44
|
+
) -> np.ndarray:
|
|
45
|
+
"""Drop channels from an image.
|
|
46
|
+
|
|
47
|
+
This function drops channels from an image.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
img (np.ndarray): Input image.
|
|
51
|
+
channels_to_drop (int | tuple[int, ...] | np.ndarray): Channels to drop.
|
|
52
|
+
fill (tuple[float, ...] | float): Value to fill the dropped channels with.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
np.ndarray: Image with channels dropped.
|
|
56
|
+
|
|
57
|
+
"""
|
|
58
|
+
if is_grayscale_image(img):
|
|
59
|
+
msg = "Only one channel. ChannelDropout is not defined."
|
|
60
|
+
raise NotImplementedError(msg)
|
|
61
|
+
|
|
62
|
+
img = img.copy()
|
|
63
|
+
img[..., channels_to_drop] = fill
|
|
64
|
+
return img
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def generate_random_fill(
|
|
68
|
+
dtype: np.dtype,
|
|
69
|
+
shape: tuple[int, ...],
|
|
70
|
+
random_generator: np.random.Generator,
|
|
71
|
+
) -> np.ndarray:
|
|
72
|
+
"""Generate a random fill array based on the given dtype and target shape.
|
|
73
|
+
|
|
74
|
+
This function creates a numpy array filled with random values. The range and type of these values
|
|
75
|
+
depend on the input dtype. For integer dtypes, it generates random integers. For floating-point
|
|
76
|
+
dtypes, it generates random floats.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
dtype (np.dtype): The data type of the array to be generated.
|
|
80
|
+
shape (tuple[int, ...]): The shape of the array to be generated.
|
|
81
|
+
random_generator (np.random.Generator): The random generator to use for generating values.
|
|
82
|
+
If None, the default numpy random generator is used.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
np.ndarray: A numpy array of the specified shape and dtype, filled with random values.
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
ValueError: If the input dtype is neither integer nor floating-point.
|
|
89
|
+
|
|
90
|
+
Examples:
|
|
91
|
+
>>> import numpy as np
|
|
92
|
+
>>> random_state = np.random.RandomState(42)
|
|
93
|
+
>>> result = generate_random_fill(np.dtype('uint8'), (2, 2), random_state)
|
|
94
|
+
>>> print(result)
|
|
95
|
+
[[172 251]
|
|
96
|
+
[ 80 141]]
|
|
97
|
+
|
|
98
|
+
"""
|
|
99
|
+
max_value = MAX_VALUES_BY_DTYPE[dtype]
|
|
100
|
+
if np.issubdtype(dtype, np.integer):
|
|
101
|
+
return random_generator.integers(0, max_value + 1, size=shape, dtype=dtype)
|
|
102
|
+
if np.issubdtype(dtype, np.floating):
|
|
103
|
+
return random_generator.uniform(0, max_value, size=shape).astype(dtype)
|
|
104
|
+
raise ValueError(f"Unsupported dtype: {dtype}")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@uint8_io
|
|
108
|
+
def apply_inpainting(img: np.ndarray, holes: np.ndarray, method: Literal["inpaint_telea", "inpaint_ns"]) -> np.ndarray:
|
|
109
|
+
"""Apply OpenCV inpainting to fill the holes in the image.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
img (np.ndarray): Input image (grayscale or BGR)
|
|
113
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
114
|
+
method (Literal["inpaint_telea", "inpaint_ns"]): Inpainting method to use
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
np.ndarray: Inpainted image
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
NotImplementedError: If image has more than 3 channels
|
|
121
|
+
|
|
122
|
+
"""
|
|
123
|
+
num_channels = get_num_channels(img)
|
|
124
|
+
# Create inpainting mask
|
|
125
|
+
mask = np.zeros(img.shape[:2], dtype=np.uint8)
|
|
126
|
+
for x_min, y_min, x_max, y_max in holes:
|
|
127
|
+
mask[y_min:y_max, x_min:x_max] = 255
|
|
128
|
+
|
|
129
|
+
inpaint_method = cv2.INPAINT_TELEA if method == "inpaint_telea" else cv2.INPAINT_NS
|
|
130
|
+
|
|
131
|
+
# Handle grayscale images by converting to 3 channels and back
|
|
132
|
+
if num_channels == 1:
|
|
133
|
+
if img.ndim == NUM_MULTI_CHANNEL_DIMENSIONS:
|
|
134
|
+
img = img.squeeze()
|
|
135
|
+
img_3ch = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
|
136
|
+
result = cv2.inpaint(img_3ch, mask, 3, inpaint_method)
|
|
137
|
+
return (
|
|
138
|
+
cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)[..., None]
|
|
139
|
+
if num_channels == NUM_MULTI_CHANNEL_DIMENSIONS
|
|
140
|
+
else cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
return cv2.inpaint(img, mask, 3, inpaint_method)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def fill_holes_with_value(img: np.ndarray, holes: np.ndarray, fill: np.ndarray) -> np.ndarray:
|
|
147
|
+
"""Fill holes with a constant value.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
img (np.ndarray): Input image
|
|
151
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
152
|
+
fill (np.ndarray): Value to fill the holes with
|
|
153
|
+
|
|
154
|
+
"""
|
|
155
|
+
for x_min, y_min, x_max, y_max in holes:
|
|
156
|
+
img[y_min:y_max, x_min:x_max] = fill
|
|
157
|
+
return img
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def fill_volume_holes_with_value(volume: np.ndarray, holes: np.ndarray, fill: np.ndarray) -> np.ndarray:
|
|
161
|
+
"""Fill holes in a volume with a constant value.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
volume (np.ndarray): Input volume
|
|
165
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
166
|
+
fill (np.ndarray): Value to fill the holes with
|
|
167
|
+
|
|
168
|
+
"""
|
|
169
|
+
for x_min, y_min, x_max, y_max in holes:
|
|
170
|
+
volume[:, y_min:y_max, x_min:x_max] = fill
|
|
171
|
+
return volume
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def fill_volumes_holes_with_value(volumes: np.ndarray, holes: np.ndarray, fill: np.ndarray) -> np.ndarray:
|
|
175
|
+
"""Fill holes in a batch of volumes with a constant value.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
volumes (np.ndarray): Input batch of volumes
|
|
179
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
180
|
+
fill (np.ndarray): Value to fill the holes with
|
|
181
|
+
|
|
182
|
+
"""
|
|
183
|
+
for x_min, y_min, x_max, y_max in holes:
|
|
184
|
+
volumes[:, :, y_min:y_max, x_min:x_max] = fill
|
|
185
|
+
return volumes
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def fill_holes_with_random(
|
|
189
|
+
img: np.ndarray,
|
|
190
|
+
holes: np.ndarray,
|
|
191
|
+
random_generator: np.random.Generator,
|
|
192
|
+
uniform: bool,
|
|
193
|
+
) -> np.ndarray:
|
|
194
|
+
"""Fill holes with random values.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
img (np.ndarray): Input image
|
|
198
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
199
|
+
random_generator (np.random.Generator): Random number generator
|
|
200
|
+
uniform (bool): If True, use same random value for entire hole
|
|
201
|
+
|
|
202
|
+
"""
|
|
203
|
+
for x_min, y_min, x_max, y_max in holes:
|
|
204
|
+
shape = (1,) if uniform else (y_max - y_min, x_max - x_min)
|
|
205
|
+
if img.ndim != MONO_CHANNEL_DIMENSIONS:
|
|
206
|
+
shape = (1, img.shape[2]) if uniform else (*shape, img.shape[2])
|
|
207
|
+
|
|
208
|
+
random_fill = generate_random_fill(img.dtype, shape, random_generator)
|
|
209
|
+
img[y_min:y_max, x_min:x_max] = random_fill
|
|
210
|
+
return img
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def fill_volume_holes_with_random(
|
|
214
|
+
volume: np.ndarray,
|
|
215
|
+
holes: np.ndarray,
|
|
216
|
+
random_generator: np.random.Generator,
|
|
217
|
+
uniform: bool,
|
|
218
|
+
) -> np.ndarray:
|
|
219
|
+
"""Fill holes in a volume with random values.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
volume (np.ndarray): Input volume of shape (D, H, W, C) or (D, H, W)
|
|
223
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
224
|
+
random_generator (np.random.Generator): Random number generator
|
|
225
|
+
uniform (bool): If True, use same random value for entire hole in each image.
|
|
226
|
+
|
|
227
|
+
"""
|
|
228
|
+
for x_min, y_min, x_max, y_max in holes:
|
|
229
|
+
shape = (volume.shape[0], 1, 1) if uniform else (volume.shape[0], y_max - y_min, x_max - x_min)
|
|
230
|
+
if volume.ndim != 3:
|
|
231
|
+
shape = (volume.shape[0], 1, 1, volume.shape[3]) if uniform else (*shape, volume.shape[3])
|
|
232
|
+
|
|
233
|
+
random_fill = generate_random_fill(volume.dtype, shape, random_generator)
|
|
234
|
+
volume[:, y_min:y_max, x_min:x_max] = random_fill
|
|
235
|
+
return volume
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def fill_volumes_holes_with_random(
|
|
239
|
+
volumes: np.ndarray,
|
|
240
|
+
holes: np.ndarray,
|
|
241
|
+
random_generator: np.random.Generator,
|
|
242
|
+
uniform: bool,
|
|
243
|
+
) -> np.ndarray:
|
|
244
|
+
"""Fill holes in a batch of volumes with random values.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
volumes (np.ndarray): Input volume of shape (N, D, H, W, C) or (N, D, H, W)
|
|
248
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
249
|
+
random_generator (np.random.Generator): Random number generator
|
|
250
|
+
uniform (bool): If True, use same random value for entire hole for each image
|
|
251
|
+
|
|
252
|
+
"""
|
|
253
|
+
for x_min, y_min, x_max, y_max in holes:
|
|
254
|
+
shape = (
|
|
255
|
+
(volumes.shape[0], volumes.shape[1], 1, 1)
|
|
256
|
+
if uniform
|
|
257
|
+
else (volumes.shape[0], volumes.shape[1], y_max - y_min, x_max - x_min)
|
|
258
|
+
)
|
|
259
|
+
if volumes.ndim != 4:
|
|
260
|
+
shape = (
|
|
261
|
+
(volumes.shape[0], volumes.shape[1], 1, 1, volumes.shape[4]) if uniform else (*shape, volumes.shape[4])
|
|
262
|
+
)
|
|
263
|
+
random_fill = generate_random_fill(volumes.dtype, shape, random_generator)
|
|
264
|
+
volumes[:, :, y_min:y_max, x_min:x_max] = random_fill
|
|
265
|
+
return volumes
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def cutout(
|
|
269
|
+
img: np.ndarray,
|
|
270
|
+
holes: np.ndarray,
|
|
271
|
+
fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
|
|
272
|
+
random_generator: np.random.Generator,
|
|
273
|
+
) -> np.ndarray:
|
|
274
|
+
"""Apply cutout augmentation to the image by cutting out holes and filling them.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
img (np.ndarray): The image to augment
|
|
278
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
279
|
+
fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
|
|
280
|
+
Value to fill holes with. Can be:
|
|
281
|
+
- number (int/float): Will be broadcast to all channels
|
|
282
|
+
- sequence (tuple/list/ndarray): Must match number of channels
|
|
283
|
+
- "random": Different random values for each pixel
|
|
284
|
+
- "random_uniform": Same random value for entire hole
|
|
285
|
+
- "inpaint_telea"/"inpaint_ns": OpenCV inpainting methods
|
|
286
|
+
random_generator (np.random.Generator): Random number generator for random fills
|
|
287
|
+
|
|
288
|
+
Raises:
|
|
289
|
+
ValueError: If fill length doesn't match number of channels
|
|
290
|
+
|
|
291
|
+
"""
|
|
292
|
+
img = img.copy()
|
|
293
|
+
|
|
294
|
+
# Handle inpainting methods
|
|
295
|
+
if isinstance(fill, str):
|
|
296
|
+
if fill in {"inpaint_telea", "inpaint_ns"}:
|
|
297
|
+
return apply_inpainting(img, holes, cast("Literal['inpaint_telea', 'inpaint_ns']", fill))
|
|
298
|
+
if fill == "random":
|
|
299
|
+
return fill_holes_with_random(img, holes, random_generator, uniform=False)
|
|
300
|
+
if fill == "random_uniform":
|
|
301
|
+
return fill_holes_with_random(img, holes, random_generator, uniform=True)
|
|
302
|
+
raise ValueError(f"Unsupported string fill: {fill}")
|
|
303
|
+
|
|
304
|
+
# Convert numeric fill values to numpy array
|
|
305
|
+
if isinstance(fill, (int, float)):
|
|
306
|
+
fill_array = np.array(fill, dtype=img.dtype)
|
|
307
|
+
return fill_holes_with_value(img, holes, fill_array)
|
|
308
|
+
|
|
309
|
+
# Handle sequence fill values
|
|
310
|
+
fill_array = np.array(fill, dtype=img.dtype)
|
|
311
|
+
|
|
312
|
+
# For multi-channel images, verify fill matches number of channels
|
|
313
|
+
if img.ndim == NUM_MULTI_CHANNEL_DIMENSIONS:
|
|
314
|
+
fill_array = fill_array.ravel()
|
|
315
|
+
if fill_array.size != img.shape[2]:
|
|
316
|
+
raise ValueError(
|
|
317
|
+
f"Fill value must have same number of channels as image. "
|
|
318
|
+
f"Got {fill_array.size}, expected {img.shape[2]}",
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
return fill_holes_with_value(img, holes, fill_array)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def cutout_on_volume(
|
|
325
|
+
volume: np.ndarray,
|
|
326
|
+
holes: np.ndarray,
|
|
327
|
+
fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
|
|
328
|
+
random_generator: np.random.Generator,
|
|
329
|
+
) -> np.ndarray:
|
|
330
|
+
"""Apply cutout augmentation to a volume of shape (D, H, W) or (D, H, W, C) by cutting out holes and filling them.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
volume (np.ndarray): The volume to augment
|
|
334
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
335
|
+
fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
|
|
336
|
+
Value to fill holes with. Can be:
|
|
337
|
+
- number (int/float): Will be broadcast to all channels
|
|
338
|
+
- sequence (tuple/list/ndarray): Must match number of channels
|
|
339
|
+
- "random": Different random values for each pixel
|
|
340
|
+
- "random_uniform": Same random value for entire hole, different values across images
|
|
341
|
+
- "inpaint_telea"/"inpaint_ns": OpenCV inpainting methods
|
|
342
|
+
random_generator (np.random.Generator): Random number generator for random fills
|
|
343
|
+
|
|
344
|
+
Raises:
|
|
345
|
+
ValueError: If fill length doesn't match number of channels
|
|
346
|
+
|
|
347
|
+
"""
|
|
348
|
+
volume = volume.copy()
|
|
349
|
+
|
|
350
|
+
# Handle inpainting methods
|
|
351
|
+
if isinstance(fill, str):
|
|
352
|
+
if fill in {"inpaint_telea", "inpaint_ns"}:
|
|
353
|
+
processed_images = [
|
|
354
|
+
apply_inpainting(img, holes, cast("Literal['inpaint_telea', 'inpaint_ns']", fill)) for img in volume
|
|
355
|
+
]
|
|
356
|
+
result = np.array(processed_images)
|
|
357
|
+
# Reshape to original volume shape: (D, H, W, C) or (D, H, W)
|
|
358
|
+
return result.reshape(volume.shape)
|
|
359
|
+
if fill == "random":
|
|
360
|
+
return fill_volume_holes_with_random(volume, holes, random_generator, uniform=False)
|
|
361
|
+
if fill == "random_uniform":
|
|
362
|
+
return fill_volume_holes_with_random(volume, holes, random_generator, uniform=True)
|
|
363
|
+
raise ValueError(f"Unsupported string fill: {fill}")
|
|
364
|
+
|
|
365
|
+
# Convert numeric fill values to numpy array
|
|
366
|
+
if isinstance(fill, (int, float)):
|
|
367
|
+
fill_array = np.array(fill, dtype=volume.dtype)
|
|
368
|
+
return fill_volume_holes_with_value(volume, holes, fill_array)
|
|
369
|
+
|
|
370
|
+
# Handle sequence fill values
|
|
371
|
+
fill_array = np.array(fill, dtype=volume.dtype)
|
|
372
|
+
|
|
373
|
+
# For multi-channel images, verify fill matches number of channels
|
|
374
|
+
if volume.ndim == 4:
|
|
375
|
+
fill_array = fill_array.ravel()
|
|
376
|
+
if fill_array.size != volume.shape[3]:
|
|
377
|
+
raise ValueError(
|
|
378
|
+
f"Fill value must have same number of channels as image. "
|
|
379
|
+
f"Got {fill_array.size}, expected {volume.shape[3]}",
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
return fill_volume_holes_with_value(volume, holes, fill_array)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def cutout_on_volumes(
|
|
386
|
+
volumes: np.ndarray,
|
|
387
|
+
holes: np.ndarray,
|
|
388
|
+
fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
|
|
389
|
+
random_generator: np.random.Generator,
|
|
390
|
+
) -> np.ndarray:
|
|
391
|
+
"""Apply cutout augmentation to a batch of volumes of shape (N, D, H, W) or (N, D, H, W, C)
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
volumes (np.ndarray): The image to augment
|
|
395
|
+
holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
|
|
396
|
+
fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
|
|
397
|
+
Value to fill holes with. Can be:
|
|
398
|
+
- number (int/float): Will be broadcast to all channels
|
|
399
|
+
- sequence (tuple/list/ndarray): Must match number of channels
|
|
400
|
+
- "random": Different random values for each pixel
|
|
401
|
+
- "random_uniform": Same random value for entire hole, different values across images
|
|
402
|
+
- "inpaint_telea"/"inpaint_ns": OpenCV inpainting methods
|
|
403
|
+
random_generator (np.random.Generator): Random number generator for random fills
|
|
404
|
+
|
|
405
|
+
Raises:
|
|
406
|
+
ValueError: If fill length doesn't match number of channels
|
|
407
|
+
|
|
408
|
+
"""
|
|
409
|
+
volumes = volumes.copy()
|
|
410
|
+
|
|
411
|
+
# Handle inpainting methods
|
|
412
|
+
if isinstance(fill, str):
|
|
413
|
+
if fill in {"inpaint_telea", "inpaint_ns"}:
|
|
414
|
+
processed_images = [
|
|
415
|
+
apply_inpainting(img, holes, cast("Literal['inpaint_telea', 'inpaint_ns']", fill))
|
|
416
|
+
for volume in volumes
|
|
417
|
+
for img in volume
|
|
418
|
+
]
|
|
419
|
+
result = np.array(processed_images)
|
|
420
|
+
# Reshape to original batch of volumes shape: (N, D, H, W, C) or (N, D, H, W)
|
|
421
|
+
return result.reshape(volumes.shape)
|
|
422
|
+
if fill == "random":
|
|
423
|
+
return fill_volumes_holes_with_random(volumes, holes, random_generator, uniform=False)
|
|
424
|
+
if fill == "random_uniform":
|
|
425
|
+
return fill_volumes_holes_with_random(volumes, holes, random_generator, uniform=True)
|
|
426
|
+
raise ValueError(f"Unsupported string fill: {fill}")
|
|
427
|
+
|
|
428
|
+
# Convert numeric fill values to numpy array
|
|
429
|
+
if isinstance(fill, (int, float)):
|
|
430
|
+
fill_array = np.array(fill, dtype=volumes.dtype)
|
|
431
|
+
return fill_volumes_holes_with_value(volumes, holes, fill_array)
|
|
432
|
+
|
|
433
|
+
# Handle sequence fill values
|
|
434
|
+
fill_array = np.array(fill, dtype=volumes.dtype)
|
|
435
|
+
|
|
436
|
+
# For multi-channel images, verify fill matches number of channels
|
|
437
|
+
if volumes.ndim == 5:
|
|
438
|
+
fill_array = fill_array.ravel()
|
|
439
|
+
if fill_array.size != volumes.shape[4]:
|
|
440
|
+
raise ValueError(
|
|
441
|
+
f"Fill value must have same number of channels as image. "
|
|
442
|
+
f"Got {fill_array.size}, expected {volumes.shape[4]}",
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
return fill_volumes_holes_with_value(volumes, holes, fill_array)
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
@handle_empty_array("keypoints")
|
|
449
|
+
def filter_keypoints_in_holes(keypoints: np.ndarray, holes: np.ndarray) -> np.ndarray:
|
|
450
|
+
"""Filter out keypoints that are inside any of the holes.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
keypoints (np.ndarray): Array of keypoints with shape (num_keypoints, 2+).
|
|
454
|
+
The first two columns are x and y coordinates.
|
|
455
|
+
holes (np.ndarray): Array of holes with shape (num_holes, 4).
|
|
456
|
+
Each hole is represented as [x1, y1, x2, y2].
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
np.ndarray: Array of keypoints that are not inside any hole.
|
|
460
|
+
|
|
461
|
+
"""
|
|
462
|
+
# Broadcast keypoints and holes for vectorized comparison
|
|
463
|
+
kp_x = keypoints[:, 0][:, np.newaxis] # Shape: (num_keypoints, 1)
|
|
464
|
+
kp_y = keypoints[:, 1][:, np.newaxis] # Shape: (num_keypoints, 1)
|
|
465
|
+
|
|
466
|
+
hole_x1 = holes[:, 0] # Shape: (num_holes,)
|
|
467
|
+
hole_y1 = holes[:, 1] # Shape: (num_holes,)
|
|
468
|
+
hole_x2 = holes[:, 2] # Shape: (num_holes,)
|
|
469
|
+
hole_y2 = holes[:, 3] # Shape: (num_holes,)
|
|
470
|
+
|
|
471
|
+
# Check if each keypoint is inside each hole
|
|
472
|
+
inside_hole = (kp_x >= hole_x1) & (kp_x < hole_x2) & (kp_y >= hole_y1) & (kp_y < hole_y2)
|
|
473
|
+
|
|
474
|
+
# A keypoint is valid if it's not inside any hole
|
|
475
|
+
valid_keypoints = ~np.any(inside_hole, axis=1)
|
|
476
|
+
|
|
477
|
+
return keypoints[valid_keypoints]
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
@handle_empty_array("bboxes")
|
|
481
|
+
def resize_boxes_to_visible_area(
|
|
482
|
+
boxes: np.ndarray,
|
|
483
|
+
hole_mask: np.ndarray,
|
|
484
|
+
) -> np.ndarray:
|
|
485
|
+
"""Resize boxes to their largest visible rectangular regions."""
|
|
486
|
+
# Extract box coordinates
|
|
487
|
+
x1 = boxes[:, 0].astype(int)
|
|
488
|
+
y1 = boxes[:, 1].astype(int)
|
|
489
|
+
x2 = boxes[:, 2].astype(int)
|
|
490
|
+
y2 = boxes[:, 3].astype(int)
|
|
491
|
+
|
|
492
|
+
# Process each box individually to avoid array shape issues
|
|
493
|
+
new_boxes: list[np.ndarray] = []
|
|
494
|
+
|
|
495
|
+
regions = [hole_mask[y1[i] : y2[i], x1[i] : x2[i]] for i in range(len(boxes))]
|
|
496
|
+
visible_areas = [1 - region for region in regions]
|
|
497
|
+
|
|
498
|
+
for i, (visible, box) in enumerate(zip(visible_areas, boxes)):
|
|
499
|
+
if not visible.any():
|
|
500
|
+
continue
|
|
501
|
+
|
|
502
|
+
# Find visible coordinates
|
|
503
|
+
y_visible = visible.any(axis=1)
|
|
504
|
+
x_visible = visible.any(axis=0)
|
|
505
|
+
|
|
506
|
+
y_coords = np.nonzero(y_visible)[0]
|
|
507
|
+
x_coords = np.nonzero(x_visible)[0]
|
|
508
|
+
|
|
509
|
+
# Update only the coordinate part of the box
|
|
510
|
+
new_box = box.copy()
|
|
511
|
+
new_box[0] = x1[i] + x_coords[0] # x_min
|
|
512
|
+
new_box[1] = y1[i] + y_coords[0] # y_min
|
|
513
|
+
new_box[2] = x1[i] + x_coords[-1] + 1 # x_max
|
|
514
|
+
new_box[3] = y1[i] + y_coords[-1] + 1 # y_max
|
|
515
|
+
|
|
516
|
+
new_boxes.append(new_box)
|
|
517
|
+
|
|
518
|
+
# Return empty array with correct shape if all boxes were removed
|
|
519
|
+
|
|
520
|
+
return np.array(new_boxes) if new_boxes else np.zeros((0, boxes.shape[1]), dtype=boxes.dtype)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def filter_bboxes_by_holes(
|
|
524
|
+
bboxes: np.ndarray,
|
|
525
|
+
holes: np.ndarray,
|
|
526
|
+
image_shape: tuple[int, int],
|
|
527
|
+
min_area: float,
|
|
528
|
+
min_visibility: float,
|
|
529
|
+
) -> np.ndarray:
|
|
530
|
+
"""Filter bounding boxes by holes.
|
|
531
|
+
|
|
532
|
+
This function filters bounding boxes by holes.
|
|
533
|
+
|
|
534
|
+
Args:
|
|
535
|
+
bboxes (np.ndarray): Array of bounding boxes.
|
|
536
|
+
holes (np.ndarray): Array of holes.
|
|
537
|
+
image_shape (tuple[int, int]): Shape of the image.
|
|
538
|
+
min_area (float): Minimum area of a bounding box.
|
|
539
|
+
min_visibility (float): Minimum visibility of a bounding box.
|
|
540
|
+
|
|
541
|
+
Returns:
|
|
542
|
+
np.ndarray: Filtered bounding boxes.
|
|
543
|
+
|
|
544
|
+
"""
|
|
545
|
+
if len(bboxes) == 0 or len(holes) == 0:
|
|
546
|
+
return bboxes
|
|
547
|
+
|
|
548
|
+
# Create hole mask
|
|
549
|
+
hole_mask = np.zeros(image_shape, dtype=np.uint8)
|
|
550
|
+
for hole in holes:
|
|
551
|
+
x_min, y_min, x_max, y_max = hole.astype(int)
|
|
552
|
+
hole_mask[y_min:y_max, x_min:x_max] = 1
|
|
553
|
+
|
|
554
|
+
# Filter boxes by area and visibility
|
|
555
|
+
bboxes_int = bboxes.astype(int)
|
|
556
|
+
box_areas = (bboxes_int[:, 2] - bboxes_int[:, 0]) * (bboxes_int[:, 3] - bboxes_int[:, 1])
|
|
557
|
+
intersection_areas = np.array([np.sum(hole_mask[y:y2, x:x2]) for x, y, x2, y2 in bboxes_int[:, :4]])
|
|
558
|
+
remaining_areas = box_areas - intersection_areas
|
|
559
|
+
visibility_ratios = remaining_areas / box_areas
|
|
560
|
+
mask = (remaining_areas >= min_area) & (visibility_ratios >= min_visibility) & (remaining_areas > 0)
|
|
561
|
+
|
|
562
|
+
valid_boxes = bboxes[mask]
|
|
563
|
+
if len(valid_boxes) == 0:
|
|
564
|
+
return np.empty((0, bboxes.shape[1]))
|
|
565
|
+
|
|
566
|
+
# Try to resize valid boxes
|
|
567
|
+
return resize_boxes_to_visible_area(valid_boxes, hole_mask)
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def calculate_grid_dimensions(
|
|
571
|
+
image_shape: tuple[int, int],
|
|
572
|
+
unit_size_range: tuple[int, int] | None,
|
|
573
|
+
holes_number_xy: tuple[int, int] | None,
|
|
574
|
+
random_generator: np.random.Generator,
|
|
575
|
+
) -> tuple[int, int]:
|
|
576
|
+
"""Calculate the dimensions of grid units for GridDropout.
|
|
577
|
+
|
|
578
|
+
This function determines the size of grid units based on the input parameters.
|
|
579
|
+
It supports three modes of operation:
|
|
580
|
+
1. Using a range of unit sizes
|
|
581
|
+
2. Using a specified number of holes in x and y directions
|
|
582
|
+
3. Falling back to a default calculation
|
|
583
|
+
|
|
584
|
+
Args:
|
|
585
|
+
image_shape (tuple[int, int]): The shape of the image as (height, width).
|
|
586
|
+
unit_size_range (tuple[int, int] | None, optional): A range of possible unit sizes.
|
|
587
|
+
If provided, a random size within this range will be chosen for both height and width.
|
|
588
|
+
holes_number_xy (tuple[int, int] | None, optional): The number of holes in the x and y directions.
|
|
589
|
+
If provided, the grid dimensions will be calculated to fit this number of holes.
|
|
590
|
+
random_generator (np.random.Generator): The random generator to use for generating random values.
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
tuple[int, int]: The calculated grid unit dimensions as (unit_height, unit_width).
|
|
594
|
+
|
|
595
|
+
Raises:
|
|
596
|
+
ValueError: If the upper limit of unit_size_range is greater than the shortest image edge.
|
|
597
|
+
|
|
598
|
+
Notes:
|
|
599
|
+
- If both unit_size_range and holes_number_xy are None, the function falls back to a default calculation,
|
|
600
|
+
where the grid unit size is set to max(2, image_dimension // 10) for both height and width.
|
|
601
|
+
- The function prioritizes unit_size_range over holes_number_xy if both are provided.
|
|
602
|
+
- When using holes_number_xy, the actual number of holes may be slightly different due to integer division.
|
|
603
|
+
|
|
604
|
+
Examples:
|
|
605
|
+
>>> image_shape = (100, 200)
|
|
606
|
+
>>> calculate_grid_dimensions(image_shape, unit_size_range=(10, 20))
|
|
607
|
+
(15, 15) # Random value between 10 and 20
|
|
608
|
+
|
|
609
|
+
>>> calculate_grid_dimensions(image_shape, holes_number_xy=(5, 10))
|
|
610
|
+
(20, 20) # 100 // 5 and 200 // 10
|
|
611
|
+
|
|
612
|
+
>>> calculate_grid_dimensions(image_shape)
|
|
613
|
+
(10, 20) # Default calculation: max(2, dimension // 10)
|
|
614
|
+
|
|
615
|
+
"""
|
|
616
|
+
height, width = image_shape[:2]
|
|
617
|
+
|
|
618
|
+
if unit_size_range is not None:
|
|
619
|
+
if unit_size_range[1] > min(image_shape[:2]):
|
|
620
|
+
raise ValueError("Grid size limits must be within the shortest image edge.")
|
|
621
|
+
unit_size = random_generator.integers(*unit_size_range)
|
|
622
|
+
return unit_size, unit_size
|
|
623
|
+
|
|
624
|
+
if holes_number_xy:
|
|
625
|
+
holes_number_x, holes_number_y = holes_number_xy
|
|
626
|
+
unit_width = width // holes_number_x
|
|
627
|
+
unit_height = height // holes_number_y
|
|
628
|
+
return unit_height, unit_width
|
|
629
|
+
|
|
630
|
+
# Default fallback
|
|
631
|
+
unit_width = max(2, width // 10)
|
|
632
|
+
unit_height = max(2, height // 10)
|
|
633
|
+
return unit_height, unit_width
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def generate_grid_holes(
|
|
637
|
+
image_shape: tuple[int, int],
|
|
638
|
+
grid: tuple[int, int],
|
|
639
|
+
ratio: float,
|
|
640
|
+
random_offset: bool,
|
|
641
|
+
shift_xy: tuple[int, int],
|
|
642
|
+
random_generator: np.random.Generator,
|
|
643
|
+
) -> np.ndarray:
|
|
644
|
+
"""Generate a list of holes for GridDropout using a uniform grid.
|
|
645
|
+
|
|
646
|
+
This function creates a grid of holes for use in the GridDropout augmentation technique.
|
|
647
|
+
It allows for customization of the grid size, hole size ratio, and positioning of holes.
|
|
648
|
+
|
|
649
|
+
Args:
|
|
650
|
+
image_shape (tuple[int, int]): The shape of the image as (height, width).
|
|
651
|
+
grid (tuple[int, int]): The grid size as (rows, columns). This determines the number of cells
|
|
652
|
+
in the grid, where each cell may contain a hole.
|
|
653
|
+
ratio (float): The ratio of the hole size to the grid cell size. Should be between 0 and 1.
|
|
654
|
+
A ratio of 1 means the hole will fill the entire grid cell.
|
|
655
|
+
random_offset (bool): If True, applies random offsets to each hole within its grid cell.
|
|
656
|
+
If False, uses the global shift specified by shift_xy.
|
|
657
|
+
shift_xy (tuple[int, int]): The global shift to apply to all holes as (shift_x, shift_y).
|
|
658
|
+
Only used when random_offset is False.
|
|
659
|
+
random_generator (np.random.Generator): The random generator for generating random offsets
|
|
660
|
+
and shuffling. If None, a new Generator will be created.
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
np.ndarray: An array of hole coordinates, where each hole is represented as
|
|
664
|
+
[x1, y1, x2, y2]. The shape of the array is (n_holes, 4), where n_holes
|
|
665
|
+
is determined by the grid size.
|
|
666
|
+
|
|
667
|
+
Notes:
|
|
668
|
+
- The function first creates a uniform grid based on the image shape and specified grid size.
|
|
669
|
+
- Hole sizes are calculated based on the provided ratio and grid cell sizes.
|
|
670
|
+
- If random_offset is True, each hole is randomly positioned within its grid cell.
|
|
671
|
+
- If random_offset is False, all holes are shifted by the global shift_xy value.
|
|
672
|
+
- The function ensures that all holes remain within the image boundaries.
|
|
673
|
+
|
|
674
|
+
Examples:
|
|
675
|
+
>>> image_shape = (100, 100)
|
|
676
|
+
>>> grid = (5, 5)
|
|
677
|
+
>>> ratio = 0.5
|
|
678
|
+
>>> random_offset = True
|
|
679
|
+
>>> random_state = np.random.RandomState(42)
|
|
680
|
+
>>> shift_xy = (0, 0)
|
|
681
|
+
>>> holes = generate_grid_holes(image_shape, grid, ratio, random_offset, random_state, shift_xy)
|
|
682
|
+
>>> print(holes.shape)
|
|
683
|
+
(25, 4)
|
|
684
|
+
>>> print(holes[0]) # Example output: [x1, y1, x2, y2] of the first hole
|
|
685
|
+
[ 1 21 11 31]
|
|
686
|
+
|
|
687
|
+
"""
|
|
688
|
+
height, width = image_shape[:2]
|
|
689
|
+
|
|
690
|
+
# Generate the uniform grid
|
|
691
|
+
cells = split_uniform_grid(image_shape, grid, random_generator)
|
|
692
|
+
|
|
693
|
+
# Calculate hole sizes based on the ratio
|
|
694
|
+
cell_heights = cells[:, 2] - cells[:, 0]
|
|
695
|
+
cell_widths = cells[:, 3] - cells[:, 1]
|
|
696
|
+
hole_heights = np.clip(cell_heights * ratio, 1, cell_heights - 1).astype(int)
|
|
697
|
+
hole_widths = np.clip(cell_widths * ratio, 1, cell_widths - 1).astype(int)
|
|
698
|
+
|
|
699
|
+
# Calculate maximum possible offsets
|
|
700
|
+
max_offset_y = cell_heights - hole_heights
|
|
701
|
+
max_offset_x = cell_widths - hole_widths
|
|
702
|
+
|
|
703
|
+
if random_offset:
|
|
704
|
+
# Generate random offsets for each hole
|
|
705
|
+
offset_y = random_generator.integers(0, max_offset_y + 1)
|
|
706
|
+
offset_x = random_generator.integers(0, max_offset_x + 1)
|
|
707
|
+
else:
|
|
708
|
+
# Use global shift
|
|
709
|
+
offset_y = np.full_like(max_offset_y, shift_xy[1])
|
|
710
|
+
offset_x = np.full_like(max_offset_x, shift_xy[0])
|
|
711
|
+
|
|
712
|
+
# Calculate hole coordinates
|
|
713
|
+
x_min = np.clip(cells[:, 1] + offset_x, 0, width - hole_widths)
|
|
714
|
+
y_min = np.clip(cells[:, 0] + offset_y, 0, height - hole_heights)
|
|
715
|
+
x_max = np.minimum(x_min + hole_widths, width)
|
|
716
|
+
y_max = np.minimum(y_min + hole_heights, height)
|
|
717
|
+
|
|
718
|
+
return np.column_stack((x_min, y_min, x_max, y_max))
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
@handle_empty_array("bboxes")
|
|
722
|
+
def mask_dropout_bboxes(
|
|
723
|
+
bboxes: np.ndarray,
|
|
724
|
+
dropout_mask: np.ndarray,
|
|
725
|
+
image_shape: tuple[int, int],
|
|
726
|
+
min_area: float,
|
|
727
|
+
min_visibility: float,
|
|
728
|
+
) -> np.ndarray:
|
|
729
|
+
"""Filter and resize bounding boxes based on dropout mask.
|
|
730
|
+
|
|
731
|
+
Args:
|
|
732
|
+
bboxes (np.ndarray): Array of bounding boxes with shape (num_boxes, 4+)
|
|
733
|
+
dropout_mask (np.ndarray): Binary mask indicating dropped areas
|
|
734
|
+
image_shape (tuple[int, int]): Shape of the image (height, width)
|
|
735
|
+
min_area (float): Minimum area of a bounding box to keep
|
|
736
|
+
min_visibility (float): Minimum visibility ratio of a bounding box to keep
|
|
737
|
+
|
|
738
|
+
Returns:
|
|
739
|
+
np.ndarray: Filtered and resized bounding boxes
|
|
740
|
+
|
|
741
|
+
"""
|
|
742
|
+
height, width = image_shape
|
|
743
|
+
|
|
744
|
+
# Ensure dropout_mask is 2D
|
|
745
|
+
if dropout_mask.ndim > 2:
|
|
746
|
+
if dropout_mask.shape[0] == 1: # Shape is (1, H, W)
|
|
747
|
+
dropout_mask = dropout_mask.squeeze(0)
|
|
748
|
+
elif dropout_mask.shape[-1] <= 4: # Shape is (H, W, C)
|
|
749
|
+
dropout_mask = np.any(dropout_mask, axis=-1)
|
|
750
|
+
else: # Shape is (C, H, W)
|
|
751
|
+
dropout_mask = np.any(dropout_mask, axis=0)
|
|
752
|
+
|
|
753
|
+
# Create binary masks for each bounding box
|
|
754
|
+
y, x = np.ogrid[:height, :width]
|
|
755
|
+
box_masks = (
|
|
756
|
+
(x[None, :] >= bboxes[:, 0, None, None])
|
|
757
|
+
& (x[None, :] <= bboxes[:, 2, None, None])
|
|
758
|
+
& (y[None, :] >= bboxes[:, 1, None, None])
|
|
759
|
+
& (y[None, :] <= bboxes[:, 3, None, None])
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
# Calculate the area of each bounding box
|
|
763
|
+
box_areas = (bboxes[:, 2] - bboxes[:, 0]) * (bboxes[:, 3] - bboxes[:, 1])
|
|
764
|
+
|
|
765
|
+
# Calculate the visible area of each box (non-intersecting area with dropout mask)
|
|
766
|
+
visible_areas = np.sum(box_masks & ~dropout_mask, axis=(1, 2))
|
|
767
|
+
|
|
768
|
+
# Calculate visibility ratio (visible area / total box area)
|
|
769
|
+
visibility_ratio = visible_areas / box_areas
|
|
770
|
+
|
|
771
|
+
# Create a boolean mask for boxes to keep
|
|
772
|
+
keep_mask = (visible_areas >= min_area) & (visibility_ratio >= min_visibility)
|
|
773
|
+
|
|
774
|
+
return bboxes[keep_mask]
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
@handle_empty_array("keypoints")
|
|
778
|
+
def mask_dropout_keypoints(
|
|
779
|
+
keypoints: np.ndarray,
|
|
780
|
+
dropout_mask: np.ndarray,
|
|
781
|
+
) -> np.ndarray:
|
|
782
|
+
"""Filter keypoints based on dropout mask.
|
|
783
|
+
|
|
784
|
+
Args:
|
|
785
|
+
keypoints (np.ndarray): Array of keypoints with shape (num_keypoints, 2+)
|
|
786
|
+
dropout_mask (np.ndarray): Binary mask indicating dropped areas
|
|
787
|
+
|
|
788
|
+
Returns:
|
|
789
|
+
np.ndarray: Filtered keypoints
|
|
790
|
+
|
|
791
|
+
"""
|
|
792
|
+
# Ensure dropout_mask is 2D
|
|
793
|
+
if dropout_mask.ndim > 2:
|
|
794
|
+
if dropout_mask.shape[0] == 1: # Shape is (1, H, W)
|
|
795
|
+
dropout_mask = dropout_mask.squeeze(0)
|
|
796
|
+
elif dropout_mask.shape[-1] <= 4: # Shape is (H, W, C)
|
|
797
|
+
dropout_mask = np.any(dropout_mask, axis=-1)
|
|
798
|
+
else: # Shape is (C, H, W)
|
|
799
|
+
dropout_mask = np.any(dropout_mask, axis=0)
|
|
800
|
+
|
|
801
|
+
# Get coordinates as integers
|
|
802
|
+
coords = keypoints[:, :2].astype(int)
|
|
803
|
+
|
|
804
|
+
# Filter out keypoints that are outside the mask dimensions
|
|
805
|
+
valid_mask = (
|
|
806
|
+
(coords[:, 0] >= 0)
|
|
807
|
+
& (coords[:, 0] < dropout_mask.shape[1])
|
|
808
|
+
& (coords[:, 1] >= 0)
|
|
809
|
+
& (coords[:, 1] < dropout_mask.shape[0])
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
# For valid keypoints, check if they fall on non-dropped pixels
|
|
813
|
+
if np.any(valid_mask):
|
|
814
|
+
valid_coords = coords[valid_mask]
|
|
815
|
+
valid_mask[valid_mask] = ~dropout_mask[valid_coords[:, 1], valid_coords[:, 0]]
|
|
816
|
+
|
|
817
|
+
return keypoints[valid_mask]
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
def label(mask: np.ndarray, return_num: bool = False, connectivity: int = 2) -> np.ndarray | tuple[np.ndarray, int]:
|
|
821
|
+
"""Label connected regions of an integer array.
|
|
822
|
+
|
|
823
|
+
This function uses OpenCV's connectedComponents under the hood but mimics
|
|
824
|
+
the behavior of scikit-image's label function.
|
|
825
|
+
|
|
826
|
+
Args:
|
|
827
|
+
mask (np.ndarray): The array to label. Must be of integer type.
|
|
828
|
+
return_num (bool): If True, return the number of labels (default: False).
|
|
829
|
+
connectivity (int): Maximum number of orthogonal hops to consider a pixel/voxel
|
|
830
|
+
as a neighbor. Accepted values are 1 or 2. Default is 2.
|
|
831
|
+
|
|
832
|
+
Returns:
|
|
833
|
+
np.ndarray | tuple[np.ndarray, int]: Labeled array, where all connected regions are
|
|
834
|
+
assigned the same integer value. If return_num is True, it also returns the number of labels.
|
|
835
|
+
|
|
836
|
+
"""
|
|
837
|
+
# Create a copy of the original mask
|
|
838
|
+
labeled = np.zeros_like(mask, dtype=np.int32)
|
|
839
|
+
|
|
840
|
+
# Get unique non-zero values from the original mask
|
|
841
|
+
unique_values = np.unique(mask[mask != 0])
|
|
842
|
+
|
|
843
|
+
# Label each unique value separately
|
|
844
|
+
next_label = 1
|
|
845
|
+
for value in unique_values:
|
|
846
|
+
binary_mask = (mask == value).astype(np.uint8)
|
|
847
|
+
|
|
848
|
+
# Set connectivity for OpenCV (4 or 8)
|
|
849
|
+
cv2_connectivity = 4 if connectivity == 1 else 8
|
|
850
|
+
|
|
851
|
+
# Use OpenCV's connectedComponents
|
|
852
|
+
num_labels, labels = cv2.connectedComponents(binary_mask, connectivity=cv2_connectivity)
|
|
853
|
+
|
|
854
|
+
# Assign new labels
|
|
855
|
+
for i in range(1, num_labels):
|
|
856
|
+
labeled[labels == i] = next_label
|
|
857
|
+
next_label += 1
|
|
858
|
+
|
|
859
|
+
num_labels = next_label - 1
|
|
860
|
+
|
|
861
|
+
return (labeled, num_labels) if return_num else labeled
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
def get_holes_from_boxes(
|
|
865
|
+
target_boxes: np.ndarray,
|
|
866
|
+
num_holes_per_box: int,
|
|
867
|
+
hole_height_range: tuple[float, float],
|
|
868
|
+
hole_width_range: tuple[float, float],
|
|
869
|
+
random_generator: np.random.Generator,
|
|
870
|
+
) -> np.ndarray:
|
|
871
|
+
"""Generate holes based on bounding boxes."""
|
|
872
|
+
num_boxes = len(target_boxes)
|
|
873
|
+
|
|
874
|
+
# Get box dimensions (N, )
|
|
875
|
+
box_widths = target_boxes[:, 2] - target_boxes[:, 0]
|
|
876
|
+
box_heights = target_boxes[:, 3] - target_boxes[:, 1]
|
|
877
|
+
|
|
878
|
+
# Sample hole dimensions (N, num_holes)
|
|
879
|
+
hole_heights = (
|
|
880
|
+
random_generator.uniform(
|
|
881
|
+
hole_height_range[0],
|
|
882
|
+
hole_height_range[1],
|
|
883
|
+
size=(num_boxes, num_holes_per_box),
|
|
884
|
+
)
|
|
885
|
+
* box_heights[:, None]
|
|
886
|
+
).astype(np.int32)
|
|
887
|
+
|
|
888
|
+
hole_widths = (
|
|
889
|
+
random_generator.uniform(
|
|
890
|
+
hole_width_range[0],
|
|
891
|
+
hole_width_range[1],
|
|
892
|
+
size=(num_boxes, num_holes_per_box),
|
|
893
|
+
)
|
|
894
|
+
* box_widths[:, None]
|
|
895
|
+
).astype(np.int32)
|
|
896
|
+
|
|
897
|
+
# Sample positions (N, num_holes)
|
|
898
|
+
x_offsets = random_generator.uniform(0, 1, size=(num_boxes, num_holes_per_box)) * (
|
|
899
|
+
box_widths[:, None] - hole_widths
|
|
900
|
+
)
|
|
901
|
+
y_offsets = random_generator.uniform(0, 1, size=(num_boxes, num_holes_per_box)) * (
|
|
902
|
+
box_heights[:, None] - hole_heights
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
# Calculate final coordinates (N, num_holes)
|
|
906
|
+
x_min = target_boxes[:, 0, None] + x_offsets
|
|
907
|
+
y_min = target_boxes[:, 1, None] + y_offsets
|
|
908
|
+
x_max = x_min + hole_widths
|
|
909
|
+
y_max = y_min + hole_heights
|
|
910
|
+
|
|
911
|
+
return np.stack([x_min, y_min, x_max, y_max], axis=-1).astype(np.int32).reshape(-1, 4)
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
def sample_points_from_components(
|
|
915
|
+
mask: np.ndarray,
|
|
916
|
+
num_points: int,
|
|
917
|
+
random_generator: np.random.Generator,
|
|
918
|
+
) -> tuple[np.ndarray, np.ndarray] | None:
|
|
919
|
+
"""Sample points from connected components in a mask.
|
|
920
|
+
|
|
921
|
+
Args:
|
|
922
|
+
mask (np.ndarray): Binary mask
|
|
923
|
+
num_points (int): Number of points to sample
|
|
924
|
+
random_generator (np.random.Generator): Random number generator
|
|
925
|
+
|
|
926
|
+
Returns:
|
|
927
|
+
tuple[np.ndarray, np.ndarray] | None: Tuple of (x_coordinates, y_coordinates) or None if no valid components
|
|
928
|
+
|
|
929
|
+
"""
|
|
930
|
+
num_labels, labels = cv2.connectedComponents(mask.astype(np.uint8))
|
|
931
|
+
|
|
932
|
+
if num_labels == 1: # Only background
|
|
933
|
+
return None
|
|
934
|
+
|
|
935
|
+
centers = []
|
|
936
|
+
obj_sizes = []
|
|
937
|
+
for label in range(1, num_labels): # Skip background (0)
|
|
938
|
+
points = np.argwhere(labels == label) # Returns (y, x) coordinates
|
|
939
|
+
if len(points) == 0:
|
|
940
|
+
continue
|
|
941
|
+
|
|
942
|
+
# Calculate object size once per component
|
|
943
|
+
obj_size = np.sqrt(len(points))
|
|
944
|
+
|
|
945
|
+
# Randomly sample points from the component, allowing repeats
|
|
946
|
+
indices = random_generator.choice(len(points), size=num_points, replace=True)
|
|
947
|
+
sampled_points = points[indices]
|
|
948
|
+
# Convert from (y, x) to (x, y)
|
|
949
|
+
centers.extend(sampled_points[:, ::-1])
|
|
950
|
+
# Add corresponding object size for each point
|
|
951
|
+
obj_sizes.extend([obj_size] * num_points)
|
|
952
|
+
|
|
953
|
+
return (np.array(centers), np.array(obj_sizes)) if centers else None
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
def get_holes_from_mask(
|
|
957
|
+
mask: np.ndarray,
|
|
958
|
+
num_holes_per_obj: int,
|
|
959
|
+
mask_indices: list[int],
|
|
960
|
+
hole_height_range: tuple[float, float],
|
|
961
|
+
hole_width_range: tuple[float, float],
|
|
962
|
+
random_generator: np.random.Generator,
|
|
963
|
+
) -> np.ndarray:
|
|
964
|
+
"""Generate holes based on segmentation mask."""
|
|
965
|
+
# Create binary mask for target indices
|
|
966
|
+
binary_mask = np.isin(mask, np.array(mask_indices))
|
|
967
|
+
if not np.any(binary_mask): # If no target objects found
|
|
968
|
+
return np.array([], dtype=np.int32).reshape((0, 4))
|
|
969
|
+
|
|
970
|
+
result = sample_points_from_components(binary_mask, num_holes_per_obj, random_generator)
|
|
971
|
+
if result is None:
|
|
972
|
+
return np.array([], dtype=np.int32).reshape((0, 4))
|
|
973
|
+
|
|
974
|
+
centers, obj_sizes = result
|
|
975
|
+
num_centers = len(centers)
|
|
976
|
+
height, width = mask.shape[:2]
|
|
977
|
+
|
|
978
|
+
# Sample hole dimensions (N,) using per-component object sizes
|
|
979
|
+
hole_heights = (
|
|
980
|
+
random_generator.uniform(
|
|
981
|
+
hole_height_range[0],
|
|
982
|
+
hole_height_range[1],
|
|
983
|
+
size=num_centers,
|
|
984
|
+
)
|
|
985
|
+
* obj_sizes
|
|
986
|
+
)
|
|
987
|
+
hole_widths = (
|
|
988
|
+
random_generator.uniform(
|
|
989
|
+
hole_width_range[0],
|
|
990
|
+
hole_width_range[1],
|
|
991
|
+
size=num_centers,
|
|
992
|
+
)
|
|
993
|
+
* obj_sizes
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
# Calculate hole coordinates around centers
|
|
997
|
+
half_heights = hole_heights // 2
|
|
998
|
+
half_widths = hole_widths // 2
|
|
999
|
+
|
|
1000
|
+
holes = np.column_stack(
|
|
1001
|
+
[
|
|
1002
|
+
centers[:, 0] - half_widths, # x_min
|
|
1003
|
+
centers[:, 1] - half_heights, # y_min
|
|
1004
|
+
centers[:, 0] + half_widths, # x_max
|
|
1005
|
+
centers[:, 1] + half_heights, # y_max
|
|
1006
|
+
],
|
|
1007
|
+
).astype(np.int32)
|
|
1008
|
+
|
|
1009
|
+
# Clip holes to image boundaries
|
|
1010
|
+
holes[:, 0] = np.clip(holes[:, 0], 0, width - 1) # x_min
|
|
1011
|
+
holes[:, 1] = np.clip(holes[:, 1], 0, height - 1) # y_min
|
|
1012
|
+
holes[:, 2] = np.clip(holes[:, 2], 0, width) # x_max
|
|
1013
|
+
holes[:, 3] = np.clip(holes[:, 3], 0, height) # y_max
|
|
1014
|
+
|
|
1015
|
+
# Filter out holes that became too small after clipping
|
|
1016
|
+
valid_holes = (holes[:, 2] - holes[:, 0] > 0) & (holes[:, 3] - holes[:, 1] > 0)
|
|
1017
|
+
return holes[valid_holes]
|