nrtk-albumentations 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nrtk-albumentations might be problematic. Click here for more details.

Files changed (62) hide show
  1. albumentations/__init__.py +21 -0
  2. albumentations/augmentations/__init__.py +23 -0
  3. albumentations/augmentations/blur/__init__.py +0 -0
  4. albumentations/augmentations/blur/functional.py +438 -0
  5. albumentations/augmentations/blur/transforms.py +1633 -0
  6. albumentations/augmentations/crops/__init__.py +0 -0
  7. albumentations/augmentations/crops/functional.py +494 -0
  8. albumentations/augmentations/crops/transforms.py +3647 -0
  9. albumentations/augmentations/dropout/__init__.py +0 -0
  10. albumentations/augmentations/dropout/channel_dropout.py +134 -0
  11. albumentations/augmentations/dropout/coarse_dropout.py +567 -0
  12. albumentations/augmentations/dropout/functional.py +1017 -0
  13. albumentations/augmentations/dropout/grid_dropout.py +166 -0
  14. albumentations/augmentations/dropout/mask_dropout.py +274 -0
  15. albumentations/augmentations/dropout/transforms.py +461 -0
  16. albumentations/augmentations/dropout/xy_masking.py +186 -0
  17. albumentations/augmentations/geometric/__init__.py +0 -0
  18. albumentations/augmentations/geometric/distortion.py +1238 -0
  19. albumentations/augmentations/geometric/flip.py +752 -0
  20. albumentations/augmentations/geometric/functional.py +4151 -0
  21. albumentations/augmentations/geometric/pad.py +676 -0
  22. albumentations/augmentations/geometric/resize.py +956 -0
  23. albumentations/augmentations/geometric/rotate.py +864 -0
  24. albumentations/augmentations/geometric/transforms.py +1962 -0
  25. albumentations/augmentations/mixing/__init__.py +0 -0
  26. albumentations/augmentations/mixing/domain_adaptation.py +787 -0
  27. albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
  28. albumentations/augmentations/mixing/functional.py +878 -0
  29. albumentations/augmentations/mixing/transforms.py +832 -0
  30. albumentations/augmentations/other/__init__.py +0 -0
  31. albumentations/augmentations/other/lambda_transform.py +180 -0
  32. albumentations/augmentations/other/type_transform.py +261 -0
  33. albumentations/augmentations/pixel/__init__.py +0 -0
  34. albumentations/augmentations/pixel/functional.py +4226 -0
  35. albumentations/augmentations/pixel/transforms.py +7556 -0
  36. albumentations/augmentations/spectrogram/__init__.py +0 -0
  37. albumentations/augmentations/spectrogram/transform.py +220 -0
  38. albumentations/augmentations/text/__init__.py +0 -0
  39. albumentations/augmentations/text/functional.py +272 -0
  40. albumentations/augmentations/text/transforms.py +299 -0
  41. albumentations/augmentations/transforms3d/__init__.py +0 -0
  42. albumentations/augmentations/transforms3d/functional.py +393 -0
  43. albumentations/augmentations/transforms3d/transforms.py +1422 -0
  44. albumentations/augmentations/utils.py +249 -0
  45. albumentations/core/__init__.py +0 -0
  46. albumentations/core/bbox_utils.py +920 -0
  47. albumentations/core/composition.py +1885 -0
  48. albumentations/core/hub_mixin.py +299 -0
  49. albumentations/core/keypoints_utils.py +521 -0
  50. albumentations/core/label_manager.py +339 -0
  51. albumentations/core/pydantic.py +239 -0
  52. albumentations/core/serialization.py +352 -0
  53. albumentations/core/transforms_interface.py +976 -0
  54. albumentations/core/type_definitions.py +127 -0
  55. albumentations/core/utils.py +605 -0
  56. albumentations/core/validation.py +129 -0
  57. albumentations/pytorch/__init__.py +1 -0
  58. albumentations/pytorch/transforms.py +189 -0
  59. nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
  60. nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
  61. nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
  62. nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,461 @@
1
+ """Transform classes for dropout-based augmentations.
2
+
3
+ This module contains transform classes for various dropout techniques used in image
4
+ augmentation. It provides the base dropout class and specialized implementations like
5
+ PixelDropout. These transforms randomly remove or modify pixels, channels, or regions
6
+ in images, which can help models become more robust to occlusions and missing information.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, Literal, cast
12
+
13
+ import numpy as np
14
+ from albucore import get_num_channels
15
+ from pydantic import Field
16
+
17
+ from albumentations.augmentations.dropout import functional as fdropout
18
+ from albumentations.augmentations.dropout.functional import (
19
+ cutout,
20
+ cutout_on_volume,
21
+ cutout_on_volumes,
22
+ filter_bboxes_by_holes,
23
+ filter_keypoints_in_holes,
24
+ )
25
+ from albumentations.augmentations.pixel import functional as fpixel
26
+ from albumentations.core.bbox_utils import BboxProcessor, denormalize_bboxes, normalize_bboxes
27
+ from albumentations.core.keypoints_utils import KeypointsProcessor
28
+ from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
29
+ from albumentations.core.type_definitions import ALL_TARGETS, Targets
30
+
31
+ __all__ = ["PixelDropout"]
32
+
33
+
34
+ class BaseDropout(DualTransform):
35
+ """Base class for dropout-style transformations.
36
+
37
+ This class provides common functionality for various dropout techniques,
38
+ including applying cutouts to images and masks.
39
+
40
+ Args:
41
+ fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
42
+ Value to fill dropped regions.
43
+ fill_mask (tuple[float, ...] | float | None): Value to fill
44
+ dropped regions in the mask. If None, the mask is not modified.
45
+ p (float): Probability of applying the transform.
46
+
47
+ Targets:
48
+ image, mask, bboxes, keypoints, volume, mask3d
49
+
50
+ Image types:
51
+ uint8, float32
52
+
53
+ Examples:
54
+ >>> import numpy as np
55
+ >>> import albumentations as A
56
+ >>>
57
+ >>> # Example of a custom dropout transform inheriting from BaseDropout
58
+ >>> class CustomDropout(A.BaseDropout):
59
+ ... def __init__(self, num_holes_range=(4, 8), hole_size_range=(10, 20), *args, **kwargs):
60
+ ... super().__init__(*args, **kwargs)
61
+ ... self.num_holes_range = num_holes_range
62
+ ... self.hole_size_range = hole_size_range
63
+ ...
64
+ ... def get_params_dependent_on_data(self, params, data):
65
+ ... img = data["image"]
66
+ ... height, width = img.shape[:2]
67
+ ...
68
+ ... # Generate random holes
69
+ ... num_holes = self.py_random.randint(*self.num_holes_range)
70
+ ... hole_sizes = self.py_random.randint(*self.hole_size_range, size=num_holes)
71
+ ...
72
+ ... holes = []
73
+ ... for i in range(num_holes):
74
+ ... # Random position for each hole
75
+ ... x1 = self.py_random.randint(0, max(1, width - hole_sizes[i]))
76
+ ... y1 = self.py_random.randint(0, max(1, height - hole_sizes[i]))
77
+ ... x2 = min(width, x1 + hole_sizes[i])
78
+ ... y2 = min(height, y1 + hole_sizes[i])
79
+ ... holes.append([x1, y1, x2, y2])
80
+ ...
81
+ ... # Return holes and random seed
82
+ ... return {
83
+ ... "holes": np.array(holes) if holes else np.empty((0, 4), dtype=np.int32),
84
+ ... "seed": self.py_random.integers(0, 100000)
85
+ ... }
86
+ >>>
87
+ >>> # Prepare sample data
88
+ >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
89
+ >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
90
+ >>> bboxes = np.array([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.9, 0.9]])
91
+ >>>
92
+ >>> # Create a transform with custom dropout
93
+ >>> transform = A.Compose([
94
+ ... CustomDropout(
95
+ ... num_holes_range=(3, 6), # Generate 3-6 random holes
96
+ ... hole_size_range=(5, 15), # Holes of size 5-15 pixels
97
+ ... fill=0, # Fill holes with black
98
+ ... fill_mask=1, # Fill mask holes with 1
99
+ ... p=1.0 # Always apply for this example
100
+ ... )
101
+ ... ], bbox_params=A.BboxParams(format='yolo', min_visibility=0.3))
102
+ >>>
103
+ >>> # Apply the transform
104
+ >>> transformed = transform(image=image, mask=mask, bboxes=bboxes)
105
+ >>>
106
+ >>> # Get the transformed data
107
+ >>> dropout_image = transformed["image"] # Image with random holes filled with 0
108
+ >>> dropout_mask = transformed["mask"] # Mask with same holes filled with 1
109
+ >>> dropout_bboxes = transformed["bboxes"] # Bboxes filtered by visibility threshold
110
+
111
+ """
112
+
113
+ _targets: tuple[Targets, ...] | Targets = ALL_TARGETS
114
+
115
+ class InitSchema(BaseTransformInitSchema):
116
+ fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]
117
+ fill_mask: tuple[float, ...] | float | None
118
+
119
+ def __init__(
120
+ self,
121
+ fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
122
+ fill_mask: tuple[float, ...] | float | None,
123
+ p: float,
124
+ ):
125
+ super().__init__(p=p)
126
+ self.fill = fill # type: ignore[assignment]
127
+ self.fill_mask = fill_mask
128
+
129
+ def apply(self, img: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
130
+ if holes.size == 0:
131
+ return img
132
+ if self.fill in {"inpaint_telea", "inpaint_ns"}:
133
+ num_channels = get_num_channels(img)
134
+ if num_channels not in {1, 3}:
135
+ raise ValueError("Inpainting works only for 1 or 3 channel images")
136
+ return cutout(img, holes, self.fill, np.random.default_rng(seed))
137
+
138
+ def apply_to_images(self, images: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
139
+ if holes.size == 0:
140
+ return images
141
+ if self.fill in {"inpaint_telea", "inpaint_ns"}:
142
+ num_channels = images.shape[3] if images.ndim == 4 else 1
143
+ if num_channels not in {1, 3}:
144
+ raise ValueError("Inpainting works only for 1 or 3 channel images")
145
+ # Images (N, H, W, C) have the same structure as volumes (D, H, W, C)
146
+ return cutout_on_volume(images, holes, self.fill, np.random.default_rng(seed))
147
+
148
+ def apply_to_volume(self, volume: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
149
+ # Volume (D, H, W, C) has the same structure as images (N, H, W, C)
150
+ # We can reuse the same logic
151
+ return self.apply_to_images(volume, holes, seed, **params)
152
+
153
+ def apply_to_volumes(self, volumes: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
154
+ if holes.size == 0:
155
+ return volumes
156
+ if self.fill in {"inpaint_telea", "inpaint_ns"}:
157
+ num_channels = volumes.shape[4] if volumes.ndim == 5 else 1
158
+ if num_channels not in {1, 3}:
159
+ raise ValueError("Inpainting works only for 1 or 3 channel images")
160
+ return cutout_on_volumes(volumes, holes, self.fill, np.random.default_rng(seed))
161
+
162
+ def apply_to_mask3d(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
163
+ if self.fill_mask is None or holes.size == 0:
164
+ return mask
165
+ return cutout_on_volume(mask, holes, self.fill_mask, np.random.default_rng(seed))
166
+
167
+ def apply_to_masks3d(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
168
+ if self.fill_mask is None or holes.size == 0:
169
+ return mask
170
+ return cutout_on_volumes(mask, holes, self.fill_mask, np.random.default_rng(seed))
171
+
172
+ def apply_to_mask(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
173
+ if self.fill_mask is None or holes.size == 0:
174
+ return mask
175
+ return cutout(mask, holes, self.fill_mask, np.random.default_rng(seed))
176
+
177
+ def apply_to_bboxes(
178
+ self,
179
+ bboxes: np.ndarray,
180
+ holes: np.ndarray,
181
+ **params: Any,
182
+ ) -> np.ndarray:
183
+ if holes.size == 0:
184
+ return bboxes
185
+ processor = cast("BboxProcessor", self.get_processor("bboxes"))
186
+ if processor is None:
187
+ return bboxes
188
+
189
+ image_shape = params["shape"][:2]
190
+ denormalized_bboxes = denormalize_bboxes(bboxes, image_shape)
191
+
192
+ return normalize_bboxes(
193
+ filter_bboxes_by_holes(
194
+ denormalized_bboxes,
195
+ holes,
196
+ image_shape,
197
+ min_area=processor.params.min_area,
198
+ min_visibility=processor.params.min_visibility,
199
+ ),
200
+ image_shape,
201
+ )
202
+
203
+ def apply_to_keypoints(
204
+ self,
205
+ keypoints: np.ndarray,
206
+ holes: np.ndarray,
207
+ **params: Any,
208
+ ) -> np.ndarray:
209
+ if holes.size == 0:
210
+ return keypoints
211
+ processor = cast("KeypointsProcessor", self.get_processor("keypoints"))
212
+
213
+ if processor is None or not processor.params.remove_invisible:
214
+ return keypoints
215
+
216
+ return filter_keypoints_in_holes(keypoints, holes)
217
+
218
+ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
219
+ raise NotImplementedError("Subclasses must implement this method.")
220
+
221
+
222
+ class PixelDropout(DualTransform):
223
+ """Drops random pixels from the image.
224
+
225
+ This transform randomly sets pixels in the image to a specified value, effectively "dropping out" those pixels.
226
+ It can be applied to both the image and its corresponding mask.
227
+
228
+ Args:
229
+ dropout_prob (float): Probability of dropping out each pixel. Should be in the range [0, 1].
230
+ Default: 0.01
231
+
232
+ per_channel (bool): If True, the dropout mask will be generated independently for each channel.
233
+ If False, the same dropout mask will be applied to all channels.
234
+ Default: False
235
+
236
+ drop_value (float | tuple[float, ...] | None): Value to assign to the dropped pixels.
237
+ If None, the value will be randomly sampled for each application:
238
+ - For uint8 images: Random integer in [0, 255]
239
+ - For float32 images: Random float in [0, 1]
240
+ If a single number, that value will be used for all dropped pixels.
241
+ If a sequence, it should contain one value per channel.
242
+ Default: 0
243
+
244
+ mask_drop_value (float | tuple[float, ...] | None): Value to assign to dropped pixels in the mask.
245
+ If None, the mask will remain unchanged.
246
+ If a single number, that value will be used for all dropped pixels in the mask.
247
+ If a sequence, it should contain one value per channel.
248
+ Default: None
249
+
250
+ p (float): Probability of applying the transform. Should be in the range [0, 1].
251
+ Default: 0.5
252
+
253
+ Targets:
254
+ image, mask, bboxes, keypoints, volume, mask3d
255
+
256
+ Image types:
257
+ uint8, float32
258
+
259
+ Note:
260
+ - When applied to bounding boxes, this transform may cause some boxes to have zero area
261
+ if all pixels within the box are dropped. Such boxes will be removed.
262
+ - When applied to keypoints, keypoints that fall on dropped pixels will be removed if
263
+ the keypoint processor is configured to remove invisible keypoints.
264
+
265
+ Examples:
266
+ >>> import numpy as np
267
+ >>> import albumentations as A
268
+ >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
269
+ >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
270
+ >>> transform = A.PixelDropout(dropout_prob=0.1, per_channel=True, p=1.0)
271
+ >>> result = transform(image=image, mask=mask)
272
+ >>> dropped_image, dropped_mask = result['image'], result['mask']
273
+
274
+ """
275
+
276
+ class InitSchema(BaseTransformInitSchema):
277
+ dropout_prob: float = Field(ge=0, le=1)
278
+ per_channel: bool
279
+ drop_value: tuple[float, ...] | float | None
280
+ mask_drop_value: tuple[float, ...] | float | None
281
+
282
+ _targets = ALL_TARGETS
283
+
284
+ def __init__(
285
+ self,
286
+ dropout_prob: float = 0.01,
287
+ per_channel: bool = False,
288
+ drop_value: tuple[float, ...] | float | None = 0,
289
+ mask_drop_value: tuple[float, ...] | float | None = None,
290
+ p: float = 0.5,
291
+ ):
292
+ super().__init__(p=p)
293
+ self.dropout_prob = dropout_prob
294
+ self.per_channel = per_channel
295
+ self.drop_value = drop_value
296
+ self.mask_drop_value = mask_drop_value
297
+
298
+ def apply(
299
+ self,
300
+ img: np.ndarray,
301
+ drop_mask: np.ndarray,
302
+ drop_values: np.ndarray,
303
+ **params: Any,
304
+ ) -> np.ndarray:
305
+ """Apply pixel dropout to the image.
306
+
307
+ Args:
308
+ img (np.ndarray): The image to apply the transform to.
309
+ drop_mask (np.ndarray): The dropout mask.
310
+ drop_values (np.ndarray): The values to assign to the dropped pixels.
311
+ **params (Any): Additional parameters for the transform.
312
+
313
+ Returns:
314
+ np.ndarray: The transformed image.
315
+
316
+ """
317
+ return fpixel.pixel_dropout(img, drop_mask, drop_values)
318
+
319
+ def apply_to_mask(
320
+ self,
321
+ mask: np.ndarray,
322
+ mask_drop_mask: np.ndarray,
323
+ mask_drop_values: float | np.ndarray,
324
+ **params: Any,
325
+ ) -> np.ndarray:
326
+ """Apply pixel dropout to the mask.
327
+
328
+ Args:
329
+ mask (np.ndarray): The mask to apply the transform to.
330
+ mask_drop_mask (np.ndarray): The dropout mask for the mask.
331
+ mask_drop_values (float | np.ndarray): The values to assign to the dropped pixels in the mask.
332
+ **params (Any): Additional parameters for the transform.
333
+
334
+ Returns:
335
+ np.ndarray: The transformed mask.
336
+
337
+ """
338
+ if self.mask_drop_value is None:
339
+ return mask
340
+
341
+ return fpixel.pixel_dropout(mask, mask_drop_mask, mask_drop_values)
342
+
343
+ def apply_to_bboxes(
344
+ self,
345
+ bboxes: np.ndarray,
346
+ drop_mask: np.ndarray | None,
347
+ **params: Any,
348
+ ) -> np.ndarray:
349
+ """Apply pixel dropout to the bounding boxes.
350
+
351
+ Args:
352
+ bboxes (np.ndarray): The bounding boxes to apply the transform to.
353
+ drop_mask (np.ndarray | None): The dropout mask for the bounding boxes.
354
+ **params (Any): Additional parameters for the transform.
355
+
356
+ Returns:
357
+ np.ndarray: The transformed bounding boxes.
358
+
359
+ """
360
+ if drop_mask is None or self.per_channel:
361
+ return bboxes
362
+
363
+ processor = cast("BboxProcessor", self.get_processor("bboxes"))
364
+ if processor is None:
365
+ return bboxes
366
+
367
+ image_shape = params["shape"][:2]
368
+
369
+ denormalized_bboxes = denormalize_bboxes(bboxes, image_shape)
370
+
371
+ # If per_channel is True, we need to create a single channel mask
372
+ # by combining the multi-channel mask (considering a pixel dropped if it's dropped in any channel)
373
+ if self.per_channel and len(drop_mask.shape) > 2:
374
+ # Create a single channel mask where a pixel is considered dropped if it's dropped in any channel
375
+ combined_mask = np.any(drop_mask, axis=-1 if drop_mask.shape[-1] <= 4 else 0)
376
+ # Ensure the mask has the right shape for the bboxes function
377
+ if combined_mask.ndim == 3 and combined_mask.shape[0] == 1:
378
+ combined_mask = combined_mask[0]
379
+ else:
380
+ combined_mask = drop_mask
381
+
382
+ result = fdropout.mask_dropout_bboxes(
383
+ denormalized_bboxes,
384
+ combined_mask,
385
+ image_shape,
386
+ processor.params.min_area,
387
+ processor.params.min_visibility,
388
+ )
389
+
390
+ return normalize_bboxes(result, image_shape)
391
+
392
+ def apply_to_keypoints(
393
+ self,
394
+ keypoints: np.ndarray,
395
+ **params: Any,
396
+ ) -> np.ndarray:
397
+ """Apply pixel dropout to the keypoints.
398
+
399
+ Args:
400
+ keypoints (np.ndarray): The keypoints to apply the transform to.
401
+ **params (Any): Additional parameters for the transform.
402
+
403
+ Returns:
404
+ np.ndarray: The transformed keypoints.
405
+
406
+ """
407
+ return keypoints
408
+
409
+ def get_params_dependent_on_data(
410
+ self,
411
+ params: dict[str, Any],
412
+ data: dict[str, Any],
413
+ ) -> dict[str, Any]:
414
+ """Generate parameters for pixel dropout based on input data.
415
+
416
+ Args:
417
+ params (dict[str, Any]): Transform parameters
418
+ data (dict[str, Any]): Input data dictionary
419
+
420
+ Returns:
421
+ dict[str, Any]: Dictionary of parameters for applying the transform
422
+
423
+ """
424
+ reference_array = data["image"] if "image" in data else data["images"][0]
425
+
426
+ # Generate drop mask and values for all targets
427
+ drop_mask = fpixel.get_drop_mask(
428
+ reference_array.shape,
429
+ self.per_channel,
430
+ self.dropout_prob,
431
+ self.random_generator,
432
+ )
433
+ drop_values = fpixel.prepare_drop_values(
434
+ reference_array,
435
+ self.drop_value,
436
+ self.random_generator,
437
+ )
438
+
439
+ # Handle mask drop values if specified
440
+ mask_drop_mask = None
441
+ mask_drop_values = None
442
+ mask = fpixel.get_mask_array(data)
443
+ if self.mask_drop_value is not None and mask is not None:
444
+ mask_drop_mask = fpixel.get_drop_mask(
445
+ mask.shape,
446
+ self.per_channel,
447
+ self.dropout_prob,
448
+ self.random_generator,
449
+ )
450
+ mask_drop_values = fpixel.prepare_drop_values(
451
+ mask,
452
+ self.mask_drop_value,
453
+ self.random_generator,
454
+ )
455
+
456
+ return {
457
+ "drop_mask": drop_mask,
458
+ "drop_values": drop_values,
459
+ "mask_drop_mask": mask_drop_mask if mask_drop_mask is not None else None,
460
+ "mask_drop_values": mask_drop_values if mask_drop_values is not None else None,
461
+ }
@@ -0,0 +1,186 @@
1
+ """Implementation of XY masking for time-frequency domain transformations.
2
+
3
+ This module provides the XYMasking transform, which applies masking strips along the X and Y axes
4
+ of an image. This is particularly useful for audio spectrograms, time-series data visualizations,
5
+ and other grid-like data representations where masking in specific directions (time or frequency)
6
+ can improve model robustness and generalization.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, Literal, cast
12
+
13
+ import numpy as np
14
+ from pydantic import model_validator
15
+ from typing_extensions import Self
16
+
17
+ from albumentations.augmentations.dropout.transforms import BaseDropout
18
+ from albumentations.core.pydantic import NonNegativeIntRangeType
19
+ from albumentations.core.transforms_interface import BaseTransformInitSchema
20
+
21
+ __all__ = ["XYMasking"]
22
+
23
+
24
+ class XYMasking(BaseDropout):
25
+ """Applies masking strips to an image, either horizontally (X axis) or vertically (Y axis),
26
+ simulating occlusions. This transform is useful for training models to recognize images
27
+ with varied visibility conditions. It's particularly effective for spectrogram images,
28
+ allowing spectral and frequency masking to improve model robustness.
29
+
30
+ At least one of `max_x_length` or `max_y_length` must be specified, dictating the mask's
31
+ maximum size along each axis.
32
+
33
+ Args:
34
+ num_masks_x (int | tuple[int, int]): Number or range of horizontal regions to mask. Defaults to 0.
35
+ num_masks_y (int | tuple[int, int]): Number or range of vertical regions to mask. Defaults to 0.
36
+ mask_x_length (int | tuple[int, int]): Specifies the length of the masks along
37
+ the X (horizontal) axis. If an integer is provided, it sets a fixed mask length.
38
+ If a tuple of two integers (min, max) is provided,
39
+ the mask length is randomly chosen within this range for each mask.
40
+ This allows for variable-length masks in the horizontal direction.
41
+ mask_y_length (int | tuple[int, int]): Specifies the height of the masks along
42
+ the Y (vertical) axis. Similar to `mask_x_length`, an integer sets a fixed mask height,
43
+ while a tuple (min, max) allows for variable-height masks, chosen randomly
44
+ within the specified range for each mask. This flexibility facilitates creating masks of various
45
+ sizes in the vertical direction.
46
+ fill (tuple[float, float] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
47
+ Value for the dropped pixels. Can be:
48
+ - int or float: all channels are filled with this value
49
+ - tuple: tuple of values for each channel
50
+ - 'random': each pixel is filled with random values
51
+ - 'random_uniform': each hole is filled with a single random color
52
+ - 'inpaint_telea': uses OpenCV Telea inpainting method
53
+ - 'inpaint_ns': uses OpenCV Navier-Stokes inpainting method
54
+ Default: 0
55
+ fill_mask (tuple[float, float] | float | None): Fill value for dropout regions in the mask.
56
+ If None, mask regions corresponding to image dropouts are unchanged. Default: None
57
+ p (float): Probability of applying the transform. Defaults to 0.5.
58
+
59
+ Targets:
60
+ image, mask, bboxes, keypoints, volume, mask3d
61
+
62
+ Image types:
63
+ uint8, float32
64
+
65
+ Note: Either `max_x_length` or `max_y_length` or both must be defined.
66
+
67
+ """
68
+
69
+ class InitSchema(BaseTransformInitSchema):
70
+ num_masks_x: NonNegativeIntRangeType
71
+ num_masks_y: NonNegativeIntRangeType
72
+ mask_x_length: NonNegativeIntRangeType
73
+ mask_y_length: NonNegativeIntRangeType
74
+
75
+ fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]
76
+ fill_mask: tuple[float, ...] | float | None
77
+
78
+ @model_validator(mode="after")
79
+ def _check_mask_length(self) -> Self:
80
+ if (
81
+ isinstance(self.mask_x_length, int)
82
+ and self.mask_x_length <= 0
83
+ and isinstance(self.mask_y_length, int)
84
+ and self.mask_y_length <= 0
85
+ ):
86
+ msg = "At least one of `mask_x_length` or `mask_y_length` Should be a positive number."
87
+ raise ValueError(msg)
88
+
89
+ return self
90
+
91
+ def __init__(
92
+ self,
93
+ num_masks_x: tuple[int, int] | int = 0,
94
+ num_masks_y: tuple[int, int] | int = 0,
95
+ mask_x_length: tuple[int, int] | int = 0,
96
+ mask_y_length: tuple[int, int] | int = 0,
97
+ fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"] = 0,
98
+ fill_mask: tuple[float, ...] | float | None = None,
99
+ p: float = 0.5,
100
+ ):
101
+ super().__init__(p=p, fill=fill, fill_mask=fill_mask)
102
+ self.num_masks_x = cast("tuple[int, int]", num_masks_x)
103
+ self.num_masks_y = cast("tuple[int, int]", num_masks_y)
104
+
105
+ self.mask_x_length = cast("tuple[int, int]", mask_x_length)
106
+ self.mask_y_length = cast("tuple[int, int]", mask_y_length)
107
+
108
+ def _validate_mask_length(
109
+ self,
110
+ mask_length: tuple[int, int] | None,
111
+ dimension_size: int,
112
+ dimension_name: str,
113
+ ) -> None:
114
+ """Validate the mask length against the corresponding image dimension size."""
115
+ if mask_length is not None:
116
+ if isinstance(mask_length, (tuple, list)):
117
+ if mask_length[0] < 0 or mask_length[1] > dimension_size:
118
+ raise ValueError(
119
+ f"{dimension_name} range {mask_length} is out of valid range [0, {dimension_size}]",
120
+ )
121
+ elif mask_length < 0 or mask_length > dimension_size:
122
+ raise ValueError(f"{dimension_name} {mask_length} exceeds image {dimension_name} {dimension_size}")
123
+
124
+ def get_params_dependent_on_data(
125
+ self,
126
+ params: dict[str, Any],
127
+ data: dict[str, Any],
128
+ ) -> dict[str, np.ndarray]:
129
+ """Get parameters dependent on the data.
130
+
131
+ Args:
132
+ params (dict[str, Any]): Dictionary containing parameters.
133
+ data (dict[str, Any]): Dictionary containing data.
134
+
135
+ Returns:
136
+ dict[str, np.ndarray]: Dictionary with parameters for transformation.
137
+
138
+ """
139
+ image_shape = params["shape"][:2]
140
+
141
+ height, width = image_shape
142
+
143
+ self._validate_mask_length(self.mask_x_length, width, "mask_x_length")
144
+ self._validate_mask_length(self.mask_y_length, height, "mask_y_length")
145
+
146
+ masks_x = self._generate_masks(self.num_masks_x, image_shape, self.mask_x_length, axis="x")
147
+ masks_y = self._generate_masks(self.num_masks_y, image_shape, self.mask_y_length, axis="y")
148
+
149
+ holes = np.array(masks_x + masks_y)
150
+
151
+ return {"holes": holes, "seed": self.random_generator.integers(0, 2**32 - 1)}
152
+
153
+ def _generate_mask_size(self, mask_length: tuple[int, int]) -> int:
154
+ return self.py_random.randint(*mask_length)
155
+
156
+ def _generate_masks(
157
+ self,
158
+ num_masks: tuple[int, int],
159
+ image_shape: tuple[int, int],
160
+ max_length: tuple[int, int] | None,
161
+ axis: str,
162
+ ) -> list[tuple[int, int, int, int]]:
163
+ if max_length is None or max_length == 0 or (isinstance(num_masks, (int, float)) and num_masks == 0):
164
+ return []
165
+
166
+ masks = []
167
+ num_masks_integer = (
168
+ num_masks if isinstance(num_masks, int) else self.py_random.randint(num_masks[0], num_masks[1])
169
+ )
170
+
171
+ height, width = image_shape
172
+
173
+ for _ in range(num_masks_integer):
174
+ length = self._generate_mask_size(max_length)
175
+
176
+ if axis == "x":
177
+ x_min = self.py_random.randint(0, width - length)
178
+ y_min = 0
179
+ x_max, y_max = x_min + length, height
180
+ else: # axis == 'y'
181
+ y_min = self.py_random.randint(0, height - length)
182
+ x_min = 0
183
+ x_max, y_max = width, y_min + length
184
+
185
+ masks.append((x_min, y_min, x_max, y_max))
186
+ return masks
File without changes