nrtk-albumentations 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nrtk-albumentations might be problematic. Click here for more details.
- albumentations/__init__.py +21 -0
- albumentations/augmentations/__init__.py +23 -0
- albumentations/augmentations/blur/__init__.py +0 -0
- albumentations/augmentations/blur/functional.py +438 -0
- albumentations/augmentations/blur/transforms.py +1633 -0
- albumentations/augmentations/crops/__init__.py +0 -0
- albumentations/augmentations/crops/functional.py +494 -0
- albumentations/augmentations/crops/transforms.py +3647 -0
- albumentations/augmentations/dropout/__init__.py +0 -0
- albumentations/augmentations/dropout/channel_dropout.py +134 -0
- albumentations/augmentations/dropout/coarse_dropout.py +567 -0
- albumentations/augmentations/dropout/functional.py +1017 -0
- albumentations/augmentations/dropout/grid_dropout.py +166 -0
- albumentations/augmentations/dropout/mask_dropout.py +274 -0
- albumentations/augmentations/dropout/transforms.py +461 -0
- albumentations/augmentations/dropout/xy_masking.py +186 -0
- albumentations/augmentations/geometric/__init__.py +0 -0
- albumentations/augmentations/geometric/distortion.py +1238 -0
- albumentations/augmentations/geometric/flip.py +752 -0
- albumentations/augmentations/geometric/functional.py +4151 -0
- albumentations/augmentations/geometric/pad.py +676 -0
- albumentations/augmentations/geometric/resize.py +956 -0
- albumentations/augmentations/geometric/rotate.py +864 -0
- albumentations/augmentations/geometric/transforms.py +1962 -0
- albumentations/augmentations/mixing/__init__.py +0 -0
- albumentations/augmentations/mixing/domain_adaptation.py +787 -0
- albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
- albumentations/augmentations/mixing/functional.py +878 -0
- albumentations/augmentations/mixing/transforms.py +832 -0
- albumentations/augmentations/other/__init__.py +0 -0
- albumentations/augmentations/other/lambda_transform.py +180 -0
- albumentations/augmentations/other/type_transform.py +261 -0
- albumentations/augmentations/pixel/__init__.py +0 -0
- albumentations/augmentations/pixel/functional.py +4226 -0
- albumentations/augmentations/pixel/transforms.py +7556 -0
- albumentations/augmentations/spectrogram/__init__.py +0 -0
- albumentations/augmentations/spectrogram/transform.py +220 -0
- albumentations/augmentations/text/__init__.py +0 -0
- albumentations/augmentations/text/functional.py +272 -0
- albumentations/augmentations/text/transforms.py +299 -0
- albumentations/augmentations/transforms3d/__init__.py +0 -0
- albumentations/augmentations/transforms3d/functional.py +393 -0
- albumentations/augmentations/transforms3d/transforms.py +1422 -0
- albumentations/augmentations/utils.py +249 -0
- albumentations/core/__init__.py +0 -0
- albumentations/core/bbox_utils.py +920 -0
- albumentations/core/composition.py +1885 -0
- albumentations/core/hub_mixin.py +299 -0
- albumentations/core/keypoints_utils.py +521 -0
- albumentations/core/label_manager.py +339 -0
- albumentations/core/pydantic.py +239 -0
- albumentations/core/serialization.py +352 -0
- albumentations/core/transforms_interface.py +976 -0
- albumentations/core/type_definitions.py +127 -0
- albumentations/core/utils.py +605 -0
- albumentations/core/validation.py +129 -0
- albumentations/pytorch/__init__.py +1 -0
- albumentations/pytorch/transforms.py +189 -0
- nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
- nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
- nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
- nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
"""Transform classes for dropout-based augmentations.
|
|
2
|
+
|
|
3
|
+
This module contains transform classes for various dropout techniques used in image
|
|
4
|
+
augmentation. It provides the base dropout class and specialized implementations like
|
|
5
|
+
PixelDropout. These transforms randomly remove or modify pixels, channels, or regions
|
|
6
|
+
in images, which can help models become more robust to occlusions and missing information.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Any, Literal, cast
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from albucore import get_num_channels
|
|
15
|
+
from pydantic import Field
|
|
16
|
+
|
|
17
|
+
from albumentations.augmentations.dropout import functional as fdropout
|
|
18
|
+
from albumentations.augmentations.dropout.functional import (
|
|
19
|
+
cutout,
|
|
20
|
+
cutout_on_volume,
|
|
21
|
+
cutout_on_volumes,
|
|
22
|
+
filter_bboxes_by_holes,
|
|
23
|
+
filter_keypoints_in_holes,
|
|
24
|
+
)
|
|
25
|
+
from albumentations.augmentations.pixel import functional as fpixel
|
|
26
|
+
from albumentations.core.bbox_utils import BboxProcessor, denormalize_bboxes, normalize_bboxes
|
|
27
|
+
from albumentations.core.keypoints_utils import KeypointsProcessor
|
|
28
|
+
from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
|
|
29
|
+
from albumentations.core.type_definitions import ALL_TARGETS, Targets
|
|
30
|
+
|
|
31
|
+
__all__ = ["PixelDropout"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BaseDropout(DualTransform):
|
|
35
|
+
"""Base class for dropout-style transformations.
|
|
36
|
+
|
|
37
|
+
This class provides common functionality for various dropout techniques,
|
|
38
|
+
including applying cutouts to images and masks.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
|
|
42
|
+
Value to fill dropped regions.
|
|
43
|
+
fill_mask (tuple[float, ...] | float | None): Value to fill
|
|
44
|
+
dropped regions in the mask. If None, the mask is not modified.
|
|
45
|
+
p (float): Probability of applying the transform.
|
|
46
|
+
|
|
47
|
+
Targets:
|
|
48
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
49
|
+
|
|
50
|
+
Image types:
|
|
51
|
+
uint8, float32
|
|
52
|
+
|
|
53
|
+
Examples:
|
|
54
|
+
>>> import numpy as np
|
|
55
|
+
>>> import albumentations as A
|
|
56
|
+
>>>
|
|
57
|
+
>>> # Example of a custom dropout transform inheriting from BaseDropout
|
|
58
|
+
>>> class CustomDropout(A.BaseDropout):
|
|
59
|
+
... def __init__(self, num_holes_range=(4, 8), hole_size_range=(10, 20), *args, **kwargs):
|
|
60
|
+
... super().__init__(*args, **kwargs)
|
|
61
|
+
... self.num_holes_range = num_holes_range
|
|
62
|
+
... self.hole_size_range = hole_size_range
|
|
63
|
+
...
|
|
64
|
+
... def get_params_dependent_on_data(self, params, data):
|
|
65
|
+
... img = data["image"]
|
|
66
|
+
... height, width = img.shape[:2]
|
|
67
|
+
...
|
|
68
|
+
... # Generate random holes
|
|
69
|
+
... num_holes = self.py_random.randint(*self.num_holes_range)
|
|
70
|
+
... hole_sizes = self.py_random.randint(*self.hole_size_range, size=num_holes)
|
|
71
|
+
...
|
|
72
|
+
... holes = []
|
|
73
|
+
... for i in range(num_holes):
|
|
74
|
+
... # Random position for each hole
|
|
75
|
+
... x1 = self.py_random.randint(0, max(1, width - hole_sizes[i]))
|
|
76
|
+
... y1 = self.py_random.randint(0, max(1, height - hole_sizes[i]))
|
|
77
|
+
... x2 = min(width, x1 + hole_sizes[i])
|
|
78
|
+
... y2 = min(height, y1 + hole_sizes[i])
|
|
79
|
+
... holes.append([x1, y1, x2, y2])
|
|
80
|
+
...
|
|
81
|
+
... # Return holes and random seed
|
|
82
|
+
... return {
|
|
83
|
+
... "holes": np.array(holes) if holes else np.empty((0, 4), dtype=np.int32),
|
|
84
|
+
... "seed": self.py_random.integers(0, 100000)
|
|
85
|
+
... }
|
|
86
|
+
>>>
|
|
87
|
+
>>> # Prepare sample data
|
|
88
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
89
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
90
|
+
>>> bboxes = np.array([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.9, 0.9]])
|
|
91
|
+
>>>
|
|
92
|
+
>>> # Create a transform with custom dropout
|
|
93
|
+
>>> transform = A.Compose([
|
|
94
|
+
... CustomDropout(
|
|
95
|
+
... num_holes_range=(3, 6), # Generate 3-6 random holes
|
|
96
|
+
... hole_size_range=(5, 15), # Holes of size 5-15 pixels
|
|
97
|
+
... fill=0, # Fill holes with black
|
|
98
|
+
... fill_mask=1, # Fill mask holes with 1
|
|
99
|
+
... p=1.0 # Always apply for this example
|
|
100
|
+
... )
|
|
101
|
+
... ], bbox_params=A.BboxParams(format='yolo', min_visibility=0.3))
|
|
102
|
+
>>>
|
|
103
|
+
>>> # Apply the transform
|
|
104
|
+
>>> transformed = transform(image=image, mask=mask, bboxes=bboxes)
|
|
105
|
+
>>>
|
|
106
|
+
>>> # Get the transformed data
|
|
107
|
+
>>> dropout_image = transformed["image"] # Image with random holes filled with 0
|
|
108
|
+
>>> dropout_mask = transformed["mask"] # Mask with same holes filled with 1
|
|
109
|
+
>>> dropout_bboxes = transformed["bboxes"] # Bboxes filtered by visibility threshold
|
|
110
|
+
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
_targets: tuple[Targets, ...] | Targets = ALL_TARGETS
|
|
114
|
+
|
|
115
|
+
class InitSchema(BaseTransformInitSchema):
|
|
116
|
+
fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]
|
|
117
|
+
fill_mask: tuple[float, ...] | float | None
|
|
118
|
+
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
|
|
122
|
+
fill_mask: tuple[float, ...] | float | None,
|
|
123
|
+
p: float,
|
|
124
|
+
):
|
|
125
|
+
super().__init__(p=p)
|
|
126
|
+
self.fill = fill # type: ignore[assignment]
|
|
127
|
+
self.fill_mask = fill_mask
|
|
128
|
+
|
|
129
|
+
def apply(self, img: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
|
|
130
|
+
if holes.size == 0:
|
|
131
|
+
return img
|
|
132
|
+
if self.fill in {"inpaint_telea", "inpaint_ns"}:
|
|
133
|
+
num_channels = get_num_channels(img)
|
|
134
|
+
if num_channels not in {1, 3}:
|
|
135
|
+
raise ValueError("Inpainting works only for 1 or 3 channel images")
|
|
136
|
+
return cutout(img, holes, self.fill, np.random.default_rng(seed))
|
|
137
|
+
|
|
138
|
+
def apply_to_images(self, images: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
|
|
139
|
+
if holes.size == 0:
|
|
140
|
+
return images
|
|
141
|
+
if self.fill in {"inpaint_telea", "inpaint_ns"}:
|
|
142
|
+
num_channels = images.shape[3] if images.ndim == 4 else 1
|
|
143
|
+
if num_channels not in {1, 3}:
|
|
144
|
+
raise ValueError("Inpainting works only for 1 or 3 channel images")
|
|
145
|
+
# Images (N, H, W, C) have the same structure as volumes (D, H, W, C)
|
|
146
|
+
return cutout_on_volume(images, holes, self.fill, np.random.default_rng(seed))
|
|
147
|
+
|
|
148
|
+
def apply_to_volume(self, volume: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
|
|
149
|
+
# Volume (D, H, W, C) has the same structure as images (N, H, W, C)
|
|
150
|
+
# We can reuse the same logic
|
|
151
|
+
return self.apply_to_images(volume, holes, seed, **params)
|
|
152
|
+
|
|
153
|
+
def apply_to_volumes(self, volumes: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
|
|
154
|
+
if holes.size == 0:
|
|
155
|
+
return volumes
|
|
156
|
+
if self.fill in {"inpaint_telea", "inpaint_ns"}:
|
|
157
|
+
num_channels = volumes.shape[4] if volumes.ndim == 5 else 1
|
|
158
|
+
if num_channels not in {1, 3}:
|
|
159
|
+
raise ValueError("Inpainting works only for 1 or 3 channel images")
|
|
160
|
+
return cutout_on_volumes(volumes, holes, self.fill, np.random.default_rng(seed))
|
|
161
|
+
|
|
162
|
+
def apply_to_mask3d(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
|
|
163
|
+
if self.fill_mask is None or holes.size == 0:
|
|
164
|
+
return mask
|
|
165
|
+
return cutout_on_volume(mask, holes, self.fill_mask, np.random.default_rng(seed))
|
|
166
|
+
|
|
167
|
+
def apply_to_masks3d(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
|
|
168
|
+
if self.fill_mask is None or holes.size == 0:
|
|
169
|
+
return mask
|
|
170
|
+
return cutout_on_volumes(mask, holes, self.fill_mask, np.random.default_rng(seed))
|
|
171
|
+
|
|
172
|
+
def apply_to_mask(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
|
|
173
|
+
if self.fill_mask is None or holes.size == 0:
|
|
174
|
+
return mask
|
|
175
|
+
return cutout(mask, holes, self.fill_mask, np.random.default_rng(seed))
|
|
176
|
+
|
|
177
|
+
def apply_to_bboxes(
|
|
178
|
+
self,
|
|
179
|
+
bboxes: np.ndarray,
|
|
180
|
+
holes: np.ndarray,
|
|
181
|
+
**params: Any,
|
|
182
|
+
) -> np.ndarray:
|
|
183
|
+
if holes.size == 0:
|
|
184
|
+
return bboxes
|
|
185
|
+
processor = cast("BboxProcessor", self.get_processor("bboxes"))
|
|
186
|
+
if processor is None:
|
|
187
|
+
return bboxes
|
|
188
|
+
|
|
189
|
+
image_shape = params["shape"][:2]
|
|
190
|
+
denormalized_bboxes = denormalize_bboxes(bboxes, image_shape)
|
|
191
|
+
|
|
192
|
+
return normalize_bboxes(
|
|
193
|
+
filter_bboxes_by_holes(
|
|
194
|
+
denormalized_bboxes,
|
|
195
|
+
holes,
|
|
196
|
+
image_shape,
|
|
197
|
+
min_area=processor.params.min_area,
|
|
198
|
+
min_visibility=processor.params.min_visibility,
|
|
199
|
+
),
|
|
200
|
+
image_shape,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def apply_to_keypoints(
|
|
204
|
+
self,
|
|
205
|
+
keypoints: np.ndarray,
|
|
206
|
+
holes: np.ndarray,
|
|
207
|
+
**params: Any,
|
|
208
|
+
) -> np.ndarray:
|
|
209
|
+
if holes.size == 0:
|
|
210
|
+
return keypoints
|
|
211
|
+
processor = cast("KeypointsProcessor", self.get_processor("keypoints"))
|
|
212
|
+
|
|
213
|
+
if processor is None or not processor.params.remove_invisible:
|
|
214
|
+
return keypoints
|
|
215
|
+
|
|
216
|
+
return filter_keypoints_in_holes(keypoints, holes)
|
|
217
|
+
|
|
218
|
+
def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
|
|
219
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class PixelDropout(DualTransform):
|
|
223
|
+
"""Drops random pixels from the image.
|
|
224
|
+
|
|
225
|
+
This transform randomly sets pixels in the image to a specified value, effectively "dropping out" those pixels.
|
|
226
|
+
It can be applied to both the image and its corresponding mask.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
dropout_prob (float): Probability of dropping out each pixel. Should be in the range [0, 1].
|
|
230
|
+
Default: 0.01
|
|
231
|
+
|
|
232
|
+
per_channel (bool): If True, the dropout mask will be generated independently for each channel.
|
|
233
|
+
If False, the same dropout mask will be applied to all channels.
|
|
234
|
+
Default: False
|
|
235
|
+
|
|
236
|
+
drop_value (float | tuple[float, ...] | None): Value to assign to the dropped pixels.
|
|
237
|
+
If None, the value will be randomly sampled for each application:
|
|
238
|
+
- For uint8 images: Random integer in [0, 255]
|
|
239
|
+
- For float32 images: Random float in [0, 1]
|
|
240
|
+
If a single number, that value will be used for all dropped pixels.
|
|
241
|
+
If a sequence, it should contain one value per channel.
|
|
242
|
+
Default: 0
|
|
243
|
+
|
|
244
|
+
mask_drop_value (float | tuple[float, ...] | None): Value to assign to dropped pixels in the mask.
|
|
245
|
+
If None, the mask will remain unchanged.
|
|
246
|
+
If a single number, that value will be used for all dropped pixels in the mask.
|
|
247
|
+
If a sequence, it should contain one value per channel.
|
|
248
|
+
Default: None
|
|
249
|
+
|
|
250
|
+
p (float): Probability of applying the transform. Should be in the range [0, 1].
|
|
251
|
+
Default: 0.5
|
|
252
|
+
|
|
253
|
+
Targets:
|
|
254
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
255
|
+
|
|
256
|
+
Image types:
|
|
257
|
+
uint8, float32
|
|
258
|
+
|
|
259
|
+
Note:
|
|
260
|
+
- When applied to bounding boxes, this transform may cause some boxes to have zero area
|
|
261
|
+
if all pixels within the box are dropped. Such boxes will be removed.
|
|
262
|
+
- When applied to keypoints, keypoints that fall on dropped pixels will be removed if
|
|
263
|
+
the keypoint processor is configured to remove invisible keypoints.
|
|
264
|
+
|
|
265
|
+
Examples:
|
|
266
|
+
>>> import numpy as np
|
|
267
|
+
>>> import albumentations as A
|
|
268
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
269
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
270
|
+
>>> transform = A.PixelDropout(dropout_prob=0.1, per_channel=True, p=1.0)
|
|
271
|
+
>>> result = transform(image=image, mask=mask)
|
|
272
|
+
>>> dropped_image, dropped_mask = result['image'], result['mask']
|
|
273
|
+
|
|
274
|
+
"""
|
|
275
|
+
|
|
276
|
+
class InitSchema(BaseTransformInitSchema):
|
|
277
|
+
dropout_prob: float = Field(ge=0, le=1)
|
|
278
|
+
per_channel: bool
|
|
279
|
+
drop_value: tuple[float, ...] | float | None
|
|
280
|
+
mask_drop_value: tuple[float, ...] | float | None
|
|
281
|
+
|
|
282
|
+
_targets = ALL_TARGETS
|
|
283
|
+
|
|
284
|
+
def __init__(
|
|
285
|
+
self,
|
|
286
|
+
dropout_prob: float = 0.01,
|
|
287
|
+
per_channel: bool = False,
|
|
288
|
+
drop_value: tuple[float, ...] | float | None = 0,
|
|
289
|
+
mask_drop_value: tuple[float, ...] | float | None = None,
|
|
290
|
+
p: float = 0.5,
|
|
291
|
+
):
|
|
292
|
+
super().__init__(p=p)
|
|
293
|
+
self.dropout_prob = dropout_prob
|
|
294
|
+
self.per_channel = per_channel
|
|
295
|
+
self.drop_value = drop_value
|
|
296
|
+
self.mask_drop_value = mask_drop_value
|
|
297
|
+
|
|
298
|
+
def apply(
|
|
299
|
+
self,
|
|
300
|
+
img: np.ndarray,
|
|
301
|
+
drop_mask: np.ndarray,
|
|
302
|
+
drop_values: np.ndarray,
|
|
303
|
+
**params: Any,
|
|
304
|
+
) -> np.ndarray:
|
|
305
|
+
"""Apply pixel dropout to the image.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
img (np.ndarray): The image to apply the transform to.
|
|
309
|
+
drop_mask (np.ndarray): The dropout mask.
|
|
310
|
+
drop_values (np.ndarray): The values to assign to the dropped pixels.
|
|
311
|
+
**params (Any): Additional parameters for the transform.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
np.ndarray: The transformed image.
|
|
315
|
+
|
|
316
|
+
"""
|
|
317
|
+
return fpixel.pixel_dropout(img, drop_mask, drop_values)
|
|
318
|
+
|
|
319
|
+
def apply_to_mask(
|
|
320
|
+
self,
|
|
321
|
+
mask: np.ndarray,
|
|
322
|
+
mask_drop_mask: np.ndarray,
|
|
323
|
+
mask_drop_values: float | np.ndarray,
|
|
324
|
+
**params: Any,
|
|
325
|
+
) -> np.ndarray:
|
|
326
|
+
"""Apply pixel dropout to the mask.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
mask (np.ndarray): The mask to apply the transform to.
|
|
330
|
+
mask_drop_mask (np.ndarray): The dropout mask for the mask.
|
|
331
|
+
mask_drop_values (float | np.ndarray): The values to assign to the dropped pixels in the mask.
|
|
332
|
+
**params (Any): Additional parameters for the transform.
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
np.ndarray: The transformed mask.
|
|
336
|
+
|
|
337
|
+
"""
|
|
338
|
+
if self.mask_drop_value is None:
|
|
339
|
+
return mask
|
|
340
|
+
|
|
341
|
+
return fpixel.pixel_dropout(mask, mask_drop_mask, mask_drop_values)
|
|
342
|
+
|
|
343
|
+
def apply_to_bboxes(
|
|
344
|
+
self,
|
|
345
|
+
bboxes: np.ndarray,
|
|
346
|
+
drop_mask: np.ndarray | None,
|
|
347
|
+
**params: Any,
|
|
348
|
+
) -> np.ndarray:
|
|
349
|
+
"""Apply pixel dropout to the bounding boxes.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
bboxes (np.ndarray): The bounding boxes to apply the transform to.
|
|
353
|
+
drop_mask (np.ndarray | None): The dropout mask for the bounding boxes.
|
|
354
|
+
**params (Any): Additional parameters for the transform.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
np.ndarray: The transformed bounding boxes.
|
|
358
|
+
|
|
359
|
+
"""
|
|
360
|
+
if drop_mask is None or self.per_channel:
|
|
361
|
+
return bboxes
|
|
362
|
+
|
|
363
|
+
processor = cast("BboxProcessor", self.get_processor("bboxes"))
|
|
364
|
+
if processor is None:
|
|
365
|
+
return bboxes
|
|
366
|
+
|
|
367
|
+
image_shape = params["shape"][:2]
|
|
368
|
+
|
|
369
|
+
denormalized_bboxes = denormalize_bboxes(bboxes, image_shape)
|
|
370
|
+
|
|
371
|
+
# If per_channel is True, we need to create a single channel mask
|
|
372
|
+
# by combining the multi-channel mask (considering a pixel dropped if it's dropped in any channel)
|
|
373
|
+
if self.per_channel and len(drop_mask.shape) > 2:
|
|
374
|
+
# Create a single channel mask where a pixel is considered dropped if it's dropped in any channel
|
|
375
|
+
combined_mask = np.any(drop_mask, axis=-1 if drop_mask.shape[-1] <= 4 else 0)
|
|
376
|
+
# Ensure the mask has the right shape for the bboxes function
|
|
377
|
+
if combined_mask.ndim == 3 and combined_mask.shape[0] == 1:
|
|
378
|
+
combined_mask = combined_mask[0]
|
|
379
|
+
else:
|
|
380
|
+
combined_mask = drop_mask
|
|
381
|
+
|
|
382
|
+
result = fdropout.mask_dropout_bboxes(
|
|
383
|
+
denormalized_bboxes,
|
|
384
|
+
combined_mask,
|
|
385
|
+
image_shape,
|
|
386
|
+
processor.params.min_area,
|
|
387
|
+
processor.params.min_visibility,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
return normalize_bboxes(result, image_shape)
|
|
391
|
+
|
|
392
|
+
def apply_to_keypoints(
|
|
393
|
+
self,
|
|
394
|
+
keypoints: np.ndarray,
|
|
395
|
+
**params: Any,
|
|
396
|
+
) -> np.ndarray:
|
|
397
|
+
"""Apply pixel dropout to the keypoints.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
keypoints (np.ndarray): The keypoints to apply the transform to.
|
|
401
|
+
**params (Any): Additional parameters for the transform.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
np.ndarray: The transformed keypoints.
|
|
405
|
+
|
|
406
|
+
"""
|
|
407
|
+
return keypoints
|
|
408
|
+
|
|
409
|
+
def get_params_dependent_on_data(
|
|
410
|
+
self,
|
|
411
|
+
params: dict[str, Any],
|
|
412
|
+
data: dict[str, Any],
|
|
413
|
+
) -> dict[str, Any]:
|
|
414
|
+
"""Generate parameters for pixel dropout based on input data.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
params (dict[str, Any]): Transform parameters
|
|
418
|
+
data (dict[str, Any]): Input data dictionary
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
dict[str, Any]: Dictionary of parameters for applying the transform
|
|
422
|
+
|
|
423
|
+
"""
|
|
424
|
+
reference_array = data["image"] if "image" in data else data["images"][0]
|
|
425
|
+
|
|
426
|
+
# Generate drop mask and values for all targets
|
|
427
|
+
drop_mask = fpixel.get_drop_mask(
|
|
428
|
+
reference_array.shape,
|
|
429
|
+
self.per_channel,
|
|
430
|
+
self.dropout_prob,
|
|
431
|
+
self.random_generator,
|
|
432
|
+
)
|
|
433
|
+
drop_values = fpixel.prepare_drop_values(
|
|
434
|
+
reference_array,
|
|
435
|
+
self.drop_value,
|
|
436
|
+
self.random_generator,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
# Handle mask drop values if specified
|
|
440
|
+
mask_drop_mask = None
|
|
441
|
+
mask_drop_values = None
|
|
442
|
+
mask = fpixel.get_mask_array(data)
|
|
443
|
+
if self.mask_drop_value is not None and mask is not None:
|
|
444
|
+
mask_drop_mask = fpixel.get_drop_mask(
|
|
445
|
+
mask.shape,
|
|
446
|
+
self.per_channel,
|
|
447
|
+
self.dropout_prob,
|
|
448
|
+
self.random_generator,
|
|
449
|
+
)
|
|
450
|
+
mask_drop_values = fpixel.prepare_drop_values(
|
|
451
|
+
mask,
|
|
452
|
+
self.mask_drop_value,
|
|
453
|
+
self.random_generator,
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
return {
|
|
457
|
+
"drop_mask": drop_mask,
|
|
458
|
+
"drop_values": drop_values,
|
|
459
|
+
"mask_drop_mask": mask_drop_mask if mask_drop_mask is not None else None,
|
|
460
|
+
"mask_drop_values": mask_drop_values if mask_drop_values is not None else None,
|
|
461
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Implementation of XY masking for time-frequency domain transformations.
|
|
2
|
+
|
|
3
|
+
This module provides the XYMasking transform, which applies masking strips along the X and Y axes
|
|
4
|
+
of an image. This is particularly useful for audio spectrograms, time-series data visualizations,
|
|
5
|
+
and other grid-like data representations where masking in specific directions (time or frequency)
|
|
6
|
+
can improve model robustness and generalization.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Any, Literal, cast
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from pydantic import model_validator
|
|
15
|
+
from typing_extensions import Self
|
|
16
|
+
|
|
17
|
+
from albumentations.augmentations.dropout.transforms import BaseDropout
|
|
18
|
+
from albumentations.core.pydantic import NonNegativeIntRangeType
|
|
19
|
+
from albumentations.core.transforms_interface import BaseTransformInitSchema
|
|
20
|
+
|
|
21
|
+
__all__ = ["XYMasking"]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class XYMasking(BaseDropout):
|
|
25
|
+
"""Applies masking strips to an image, either horizontally (X axis) or vertically (Y axis),
|
|
26
|
+
simulating occlusions. This transform is useful for training models to recognize images
|
|
27
|
+
with varied visibility conditions. It's particularly effective for spectrogram images,
|
|
28
|
+
allowing spectral and frequency masking to improve model robustness.
|
|
29
|
+
|
|
30
|
+
At least one of `max_x_length` or `max_y_length` must be specified, dictating the mask's
|
|
31
|
+
maximum size along each axis.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
num_masks_x (int | tuple[int, int]): Number or range of horizontal regions to mask. Defaults to 0.
|
|
35
|
+
num_masks_y (int | tuple[int, int]): Number or range of vertical regions to mask. Defaults to 0.
|
|
36
|
+
mask_x_length (int | tuple[int, int]): Specifies the length of the masks along
|
|
37
|
+
the X (horizontal) axis. If an integer is provided, it sets a fixed mask length.
|
|
38
|
+
If a tuple of two integers (min, max) is provided,
|
|
39
|
+
the mask length is randomly chosen within this range for each mask.
|
|
40
|
+
This allows for variable-length masks in the horizontal direction.
|
|
41
|
+
mask_y_length (int | tuple[int, int]): Specifies the height of the masks along
|
|
42
|
+
the Y (vertical) axis. Similar to `mask_x_length`, an integer sets a fixed mask height,
|
|
43
|
+
while a tuple (min, max) allows for variable-height masks, chosen randomly
|
|
44
|
+
within the specified range for each mask. This flexibility facilitates creating masks of various
|
|
45
|
+
sizes in the vertical direction.
|
|
46
|
+
fill (tuple[float, float] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
|
|
47
|
+
Value for the dropped pixels. Can be:
|
|
48
|
+
- int or float: all channels are filled with this value
|
|
49
|
+
- tuple: tuple of values for each channel
|
|
50
|
+
- 'random': each pixel is filled with random values
|
|
51
|
+
- 'random_uniform': each hole is filled with a single random color
|
|
52
|
+
- 'inpaint_telea': uses OpenCV Telea inpainting method
|
|
53
|
+
- 'inpaint_ns': uses OpenCV Navier-Stokes inpainting method
|
|
54
|
+
Default: 0
|
|
55
|
+
fill_mask (tuple[float, float] | float | None): Fill value for dropout regions in the mask.
|
|
56
|
+
If None, mask regions corresponding to image dropouts are unchanged. Default: None
|
|
57
|
+
p (float): Probability of applying the transform. Defaults to 0.5.
|
|
58
|
+
|
|
59
|
+
Targets:
|
|
60
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
61
|
+
|
|
62
|
+
Image types:
|
|
63
|
+
uint8, float32
|
|
64
|
+
|
|
65
|
+
Note: Either `max_x_length` or `max_y_length` or both must be defined.
|
|
66
|
+
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
class InitSchema(BaseTransformInitSchema):
|
|
70
|
+
num_masks_x: NonNegativeIntRangeType
|
|
71
|
+
num_masks_y: NonNegativeIntRangeType
|
|
72
|
+
mask_x_length: NonNegativeIntRangeType
|
|
73
|
+
mask_y_length: NonNegativeIntRangeType
|
|
74
|
+
|
|
75
|
+
fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]
|
|
76
|
+
fill_mask: tuple[float, ...] | float | None
|
|
77
|
+
|
|
78
|
+
@model_validator(mode="after")
|
|
79
|
+
def _check_mask_length(self) -> Self:
|
|
80
|
+
if (
|
|
81
|
+
isinstance(self.mask_x_length, int)
|
|
82
|
+
and self.mask_x_length <= 0
|
|
83
|
+
and isinstance(self.mask_y_length, int)
|
|
84
|
+
and self.mask_y_length <= 0
|
|
85
|
+
):
|
|
86
|
+
msg = "At least one of `mask_x_length` or `mask_y_length` Should be a positive number."
|
|
87
|
+
raise ValueError(msg)
|
|
88
|
+
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
num_masks_x: tuple[int, int] | int = 0,
|
|
94
|
+
num_masks_y: tuple[int, int] | int = 0,
|
|
95
|
+
mask_x_length: tuple[int, int] | int = 0,
|
|
96
|
+
mask_y_length: tuple[int, int] | int = 0,
|
|
97
|
+
fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"] = 0,
|
|
98
|
+
fill_mask: tuple[float, ...] | float | None = None,
|
|
99
|
+
p: float = 0.5,
|
|
100
|
+
):
|
|
101
|
+
super().__init__(p=p, fill=fill, fill_mask=fill_mask)
|
|
102
|
+
self.num_masks_x = cast("tuple[int, int]", num_masks_x)
|
|
103
|
+
self.num_masks_y = cast("tuple[int, int]", num_masks_y)
|
|
104
|
+
|
|
105
|
+
self.mask_x_length = cast("tuple[int, int]", mask_x_length)
|
|
106
|
+
self.mask_y_length = cast("tuple[int, int]", mask_y_length)
|
|
107
|
+
|
|
108
|
+
def _validate_mask_length(
|
|
109
|
+
self,
|
|
110
|
+
mask_length: tuple[int, int] | None,
|
|
111
|
+
dimension_size: int,
|
|
112
|
+
dimension_name: str,
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Validate the mask length against the corresponding image dimension size."""
|
|
115
|
+
if mask_length is not None:
|
|
116
|
+
if isinstance(mask_length, (tuple, list)):
|
|
117
|
+
if mask_length[0] < 0 or mask_length[1] > dimension_size:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f"{dimension_name} range {mask_length} is out of valid range [0, {dimension_size}]",
|
|
120
|
+
)
|
|
121
|
+
elif mask_length < 0 or mask_length > dimension_size:
|
|
122
|
+
raise ValueError(f"{dimension_name} {mask_length} exceeds image {dimension_name} {dimension_size}")
|
|
123
|
+
|
|
124
|
+
def get_params_dependent_on_data(
|
|
125
|
+
self,
|
|
126
|
+
params: dict[str, Any],
|
|
127
|
+
data: dict[str, Any],
|
|
128
|
+
) -> dict[str, np.ndarray]:
|
|
129
|
+
"""Get parameters dependent on the data.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
params (dict[str, Any]): Dictionary containing parameters.
|
|
133
|
+
data (dict[str, Any]): Dictionary containing data.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
dict[str, np.ndarray]: Dictionary with parameters for transformation.
|
|
137
|
+
|
|
138
|
+
"""
|
|
139
|
+
image_shape = params["shape"][:2]
|
|
140
|
+
|
|
141
|
+
height, width = image_shape
|
|
142
|
+
|
|
143
|
+
self._validate_mask_length(self.mask_x_length, width, "mask_x_length")
|
|
144
|
+
self._validate_mask_length(self.mask_y_length, height, "mask_y_length")
|
|
145
|
+
|
|
146
|
+
masks_x = self._generate_masks(self.num_masks_x, image_shape, self.mask_x_length, axis="x")
|
|
147
|
+
masks_y = self._generate_masks(self.num_masks_y, image_shape, self.mask_y_length, axis="y")
|
|
148
|
+
|
|
149
|
+
holes = np.array(masks_x + masks_y)
|
|
150
|
+
|
|
151
|
+
return {"holes": holes, "seed": self.random_generator.integers(0, 2**32 - 1)}
|
|
152
|
+
|
|
153
|
+
def _generate_mask_size(self, mask_length: tuple[int, int]) -> int:
|
|
154
|
+
return self.py_random.randint(*mask_length)
|
|
155
|
+
|
|
156
|
+
def _generate_masks(
|
|
157
|
+
self,
|
|
158
|
+
num_masks: tuple[int, int],
|
|
159
|
+
image_shape: tuple[int, int],
|
|
160
|
+
max_length: tuple[int, int] | None,
|
|
161
|
+
axis: str,
|
|
162
|
+
) -> list[tuple[int, int, int, int]]:
|
|
163
|
+
if max_length is None or max_length == 0 or (isinstance(num_masks, (int, float)) and num_masks == 0):
|
|
164
|
+
return []
|
|
165
|
+
|
|
166
|
+
masks = []
|
|
167
|
+
num_masks_integer = (
|
|
168
|
+
num_masks if isinstance(num_masks, int) else self.py_random.randint(num_masks[0], num_masks[1])
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
height, width = image_shape
|
|
172
|
+
|
|
173
|
+
for _ in range(num_masks_integer):
|
|
174
|
+
length = self._generate_mask_size(max_length)
|
|
175
|
+
|
|
176
|
+
if axis == "x":
|
|
177
|
+
x_min = self.py_random.randint(0, width - length)
|
|
178
|
+
y_min = 0
|
|
179
|
+
x_max, y_max = x_min + length, height
|
|
180
|
+
else: # axis == 'y'
|
|
181
|
+
y_min = self.py_random.randint(0, height - length)
|
|
182
|
+
x_min = 0
|
|
183
|
+
x_max, y_max = width, y_min + length
|
|
184
|
+
|
|
185
|
+
masks.append((x_min, y_min, x_max, y_max))
|
|
186
|
+
return masks
|
|
File without changes
|