nrtk-albumentations 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nrtk-albumentations might be problematic. Click here for more details.

Files changed (62) hide show
  1. albumentations/__init__.py +21 -0
  2. albumentations/augmentations/__init__.py +23 -0
  3. albumentations/augmentations/blur/__init__.py +0 -0
  4. albumentations/augmentations/blur/functional.py +438 -0
  5. albumentations/augmentations/blur/transforms.py +1633 -0
  6. albumentations/augmentations/crops/__init__.py +0 -0
  7. albumentations/augmentations/crops/functional.py +494 -0
  8. albumentations/augmentations/crops/transforms.py +3647 -0
  9. albumentations/augmentations/dropout/__init__.py +0 -0
  10. albumentations/augmentations/dropout/channel_dropout.py +134 -0
  11. albumentations/augmentations/dropout/coarse_dropout.py +567 -0
  12. albumentations/augmentations/dropout/functional.py +1017 -0
  13. albumentations/augmentations/dropout/grid_dropout.py +166 -0
  14. albumentations/augmentations/dropout/mask_dropout.py +274 -0
  15. albumentations/augmentations/dropout/transforms.py +461 -0
  16. albumentations/augmentations/dropout/xy_masking.py +186 -0
  17. albumentations/augmentations/geometric/__init__.py +0 -0
  18. albumentations/augmentations/geometric/distortion.py +1238 -0
  19. albumentations/augmentations/geometric/flip.py +752 -0
  20. albumentations/augmentations/geometric/functional.py +4151 -0
  21. albumentations/augmentations/geometric/pad.py +676 -0
  22. albumentations/augmentations/geometric/resize.py +956 -0
  23. albumentations/augmentations/geometric/rotate.py +864 -0
  24. albumentations/augmentations/geometric/transforms.py +1962 -0
  25. albumentations/augmentations/mixing/__init__.py +0 -0
  26. albumentations/augmentations/mixing/domain_adaptation.py +787 -0
  27. albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
  28. albumentations/augmentations/mixing/functional.py +878 -0
  29. albumentations/augmentations/mixing/transforms.py +832 -0
  30. albumentations/augmentations/other/__init__.py +0 -0
  31. albumentations/augmentations/other/lambda_transform.py +180 -0
  32. albumentations/augmentations/other/type_transform.py +261 -0
  33. albumentations/augmentations/pixel/__init__.py +0 -0
  34. albumentations/augmentations/pixel/functional.py +4226 -0
  35. albumentations/augmentations/pixel/transforms.py +7556 -0
  36. albumentations/augmentations/spectrogram/__init__.py +0 -0
  37. albumentations/augmentations/spectrogram/transform.py +220 -0
  38. albumentations/augmentations/text/__init__.py +0 -0
  39. albumentations/augmentations/text/functional.py +272 -0
  40. albumentations/augmentations/text/transforms.py +299 -0
  41. albumentations/augmentations/transforms3d/__init__.py +0 -0
  42. albumentations/augmentations/transforms3d/functional.py +393 -0
  43. albumentations/augmentations/transforms3d/transforms.py +1422 -0
  44. albumentations/augmentations/utils.py +249 -0
  45. albumentations/core/__init__.py +0 -0
  46. albumentations/core/bbox_utils.py +920 -0
  47. albumentations/core/composition.py +1885 -0
  48. albumentations/core/hub_mixin.py +299 -0
  49. albumentations/core/keypoints_utils.py +521 -0
  50. albumentations/core/label_manager.py +339 -0
  51. albumentations/core/pydantic.py +239 -0
  52. albumentations/core/serialization.py +352 -0
  53. albumentations/core/transforms_interface.py +976 -0
  54. albumentations/core/type_definitions.py +127 -0
  55. albumentations/core/utils.py +605 -0
  56. albumentations/core/validation.py +129 -0
  57. albumentations/pytorch/__init__.py +1 -0
  58. albumentations/pytorch/transforms.py +189 -0
  59. nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
  60. nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
  61. nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
  62. nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,567 @@
1
+ """Implementation of coarse dropout and random erasing augmentations.
2
+
3
+ This module provides several variations of coarse dropout augmentations, which drop out
4
+ rectangular regions from images. It includes CoarseDropout for randomly placed dropouts,
5
+ ConstrainedCoarseDropout for dropping out regions based on masks or bounding boxes,
6
+ and Erasing for random erasing augmentation. These techniques help models become more
7
+ robust to occlusions and varying object completeness.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Annotated, Any, Literal
13
+ from warnings import warn
14
+
15
+ import numpy as np
16
+ from pydantic import AfterValidator
17
+
18
+ import albumentations.augmentations.dropout.functional as fdropout
19
+ from albumentations.augmentations.dropout.transforms import BaseDropout
20
+ from albumentations.core.bbox_utils import denormalize_bboxes
21
+ from albumentations.core.pydantic import check_range_bounds, nondecreasing
22
+
23
+ __all__ = ["CoarseDropout", "ConstrainedCoarseDropout", "Erasing"]
24
+
25
+
26
+ class CoarseDropout(BaseDropout):
27
+ """CoarseDropout randomly drops out rectangular regions from the image and optionally,
28
+ the corresponding regions in an associated mask, to simulate occlusion and
29
+ varied object sizes found in real-world settings.
30
+
31
+ This transformation is an evolution of CutOut and RandomErasing, offering more
32
+ flexibility in the size, number of dropout regions, and fill values.
33
+
34
+ Args:
35
+ num_holes_range (tuple[int, int]): Range (min, max) for the number of rectangular
36
+ regions to drop out. Default: (1, 1)
37
+ hole_height_range (tuple[int, int] | tuple[float, float]): Range (min, max) for the height
38
+ of dropout regions. If int, specifies absolute pixel values. If float,
39
+ interpreted as a fraction of the image height. Default: (0.1, 0.2)
40
+ hole_width_range (tuple[int, int] | tuple[float, float]): Range (min, max) for the width
41
+ of dropout regions. If int, specifies absolute pixel values. If float,
42
+ interpreted as a fraction of the image width. Default: (0.1, 0.2)
43
+ fill (tuple[float, float] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
44
+ Value for the dropped pixels. Can be:
45
+ - int or float: all channels are filled with this value
46
+ - tuple: tuple of values for each channel
47
+ - 'random': each pixel is filled with random values
48
+ - 'random_uniform': each hole is filled with a single random color
49
+ - 'inpaint_telea': uses OpenCV Telea inpainting method
50
+ - 'inpaint_ns': uses OpenCV Navier-Stokes inpainting method
51
+ Default: 0
52
+ fill_mask (tuple[float, float] | float | None): Fill value for dropout regions in the mask.
53
+ If None, mask regions corresponding to image dropouts are unchanged. Default: None
54
+ p (float): Probability of applying the transform. Default: 0.5
55
+
56
+ Targets:
57
+ image, mask, bboxes, keypoints, volume, mask3d
58
+
59
+ Image types:
60
+ uint8, float32
61
+
62
+ Note:
63
+ - The actual number and size of dropout regions are randomly chosen within the specified ranges for each
64
+ application.
65
+ - When using float values for hole_height_range and hole_width_range, ensure they are between 0 and 1.
66
+ - This implementation includes deprecation warnings for older parameter names (min_holes, max_holes, etc.).
67
+ - Inpainting methods ('inpaint_telea', 'inpaint_ns') work only with grayscale or RGB images.
68
+ - For 'random_uniform' fill, each hole gets a single random color, unlike 'random' where each pixel
69
+ gets its own random value.
70
+
71
+ Examples:
72
+ >>> import numpy as np
73
+ >>> import albumentations as A
74
+ >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
75
+ >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
76
+ >>> # Example with random uniform fill
77
+ >>> aug_random = A.CoarseDropout(
78
+ ... num_holes_range=(3, 6),
79
+ ... hole_height_range=(10, 20),
80
+ ... hole_width_range=(10, 20),
81
+ ... fill="random_uniform",
82
+ ... p=1.0
83
+ ... )
84
+ >>> # Example with inpainting
85
+ >>> aug_inpaint = A.CoarseDropout(
86
+ ... num_holes_range=(3, 6),
87
+ ... hole_height_range=(10, 20),
88
+ ... hole_width_range=(10, 20),
89
+ ... fill="inpaint_ns",
90
+ ... p=1.0
91
+ ... )
92
+ >>> transformed = aug_random(image=image, mask=mask)
93
+ >>> transformed_image, transformed_mask = transformed["image"], transformed["mask"]
94
+
95
+ References:
96
+ - CutOut: https://arxiv.org/abs/1708.04552
97
+ - Random Erasing: https://arxiv.org/abs/1708.04896
98
+ - OpenCV Inpainting methods: https://docs.opencv.org/master/df/d3d/tutorial_py_inpainting.html
99
+
100
+ """
101
+
102
+ class InitSchema(BaseDropout.InitSchema):
103
+ num_holes_range: Annotated[
104
+ tuple[int, int],
105
+ AfterValidator(check_range_bounds(1, None)),
106
+ AfterValidator(nondecreasing),
107
+ ]
108
+
109
+ hole_height_range: Annotated[
110
+ tuple[float, float] | tuple[int, int],
111
+ AfterValidator(nondecreasing),
112
+ AfterValidator(check_range_bounds(0, None)),
113
+ ]
114
+
115
+ hole_width_range: Annotated[
116
+ tuple[float, float] | tuple[int, int],
117
+ AfterValidator(nondecreasing),
118
+ AfterValidator(check_range_bounds(0, None)),
119
+ ]
120
+
121
+ def __init__(
122
+ self,
123
+ num_holes_range: tuple[int, int] = (1, 2),
124
+ hole_height_range: tuple[float, float] | tuple[int, int] = (0.1, 0.2),
125
+ hole_width_range: tuple[float, float] | tuple[int, int] = (0.1, 0.2),
126
+ fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"] = 0,
127
+ fill_mask: tuple[float, ...] | float | None = None,
128
+ p: float = 0.5,
129
+ ):
130
+ super().__init__(fill=fill, fill_mask=fill_mask, p=p)
131
+ self.num_holes_range = num_holes_range
132
+ self.hole_height_range = hole_height_range
133
+ self.hole_width_range = hole_width_range
134
+
135
+ def calculate_hole_dimensions(
136
+ self,
137
+ image_shape: tuple[int, int],
138
+ height_range: tuple[float, float] | tuple[int, int],
139
+ width_range: tuple[float, float] | tuple[int, int],
140
+ size: int,
141
+ ) -> tuple[np.ndarray, np.ndarray]:
142
+ """Calculate random hole dimensions based on the provided ranges."""
143
+ height, width = image_shape[:2]
144
+
145
+ if height_range[1] >= 1:
146
+ min_height = height_range[0]
147
+ max_height = min(height_range[1], height)
148
+
149
+ min_width = width_range[0]
150
+ max_width = min(width_range[1], width)
151
+
152
+ hole_heights = self.random_generator.integers(int(min_height), int(max_height + 1), size=size)
153
+ hole_widths = self.random_generator.integers(int(min_width), int(max_width + 1), size=size)
154
+
155
+ else: # Assume float
156
+ hole_heights = (height * self.random_generator.uniform(*height_range, size=size)).astype(int)
157
+ hole_widths = (width * self.random_generator.uniform(*width_range, size=size)).astype(int)
158
+
159
+ return hole_heights, hole_widths
160
+
161
+ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
162
+ """Get parameters dependent on the data.
163
+
164
+ Args:
165
+ params (dict[str, Any]): Dictionary containing parameters.
166
+ data (dict[str, Any]): Dictionary containing data.
167
+
168
+ Returns:
169
+ dict[str, Any]: Dictionary with parameters for transformation.
170
+
171
+ """
172
+ image_shape = params["shape"][:2]
173
+
174
+ num_holes = self.py_random.randint(*self.num_holes_range)
175
+
176
+ hole_heights, hole_widths = self.calculate_hole_dimensions(
177
+ image_shape,
178
+ self.hole_height_range,
179
+ self.hole_width_range,
180
+ size=num_holes,
181
+ )
182
+
183
+ height, width = image_shape[:2]
184
+
185
+ y_min = self.random_generator.integers(0, height - hole_heights + 1, size=num_holes)
186
+ x_min = self.random_generator.integers(0, width - hole_widths + 1, size=num_holes)
187
+ y_max = y_min + hole_heights
188
+ x_max = x_min + hole_widths
189
+
190
+ holes = np.stack([x_min, y_min, x_max, y_max], axis=-1)
191
+
192
+ return {"holes": holes, "seed": self.random_generator.integers(0, 2**32 - 1)}
193
+
194
+
195
+ class Erasing(BaseDropout):
196
+ """Randomly erases rectangular regions in an image, following the Random Erasing Data Augmentation technique.
197
+
198
+ This augmentation helps improve model robustness by randomly masking out rectangular regions in the image,
199
+ simulating occlusions and encouraging the model to learn from partial information. It's particularly
200
+ effective for image classification and person re-identification tasks.
201
+
202
+ Args:
203
+ scale (tuple[float, float]): Range for the proportion of image area to erase.
204
+ The actual area will be randomly sampled from (scale[0] * image_area, scale[1] * image_area).
205
+ Default: (0.02, 0.33)
206
+ ratio (tuple[float, float]): Range for the aspect ratio (width/height) of the erased region.
207
+ The actual ratio will be randomly sampled from (ratio[0], ratio[1]).
208
+ Default: (0.3, 3.3)
209
+ fill (tuple[float, float] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
210
+ Value used to fill the erased regions. Can be:
211
+ - int or float: fills all channels with this value
212
+ - tuple: fills each channel with corresponding value
213
+ - "random": fills each pixel with random values
214
+ - "random_uniform": fills entire erased region with a single random color
215
+ - "inpaint_telea": uses OpenCV Telea inpainting method
216
+ - "inpaint_ns": uses OpenCV Navier-Stokes inpainting method
217
+ Default: 0
218
+ fill_mask (tuple[float, float] | float | None): Value used to fill erased regions in the mask.
219
+ If None, mask regions are not modified. Default: None
220
+ p (float): Probability of applying the transform. Default: 0.5
221
+
222
+ Targets:
223
+ image, mask, bboxes, keypoints, volume, mask3d
224
+
225
+ Image types:
226
+ uint8, float32
227
+
228
+ Note:
229
+ - The transform attempts to find valid erasing parameters up to 10 times.
230
+ If unsuccessful, no erasing is performed.
231
+ - The actual erased area and aspect ratio are randomly sampled within
232
+ the specified ranges for each application.
233
+ - When using inpainting methods, only grayscale or RGB images are supported.
234
+
235
+ Examples:
236
+ >>> import numpy as np
237
+ >>> import albumentations as A
238
+ >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
239
+ >>> # Basic usage with default parameters
240
+ >>> transform = A.Erasing()
241
+ >>> transformed = transform(image=image)
242
+ >>> # Custom configuration
243
+ >>> transform = A.Erasing(
244
+ ... scale=(0.1, 0.4),
245
+ ... ratio=(0.5, 2.0),
246
+ ... fill_value="random_uniform",
247
+ ... p=1.0
248
+ ... )
249
+ >>> transformed = transform(image=image)
250
+
251
+ References:
252
+ - Paper: https://arxiv.org/abs/1708.04896
253
+ - Implementation inspired by torchvision:
254
+ https://pytorch.org/vision/stable/transforms.html#torchvision.transforms.RandomErasing
255
+
256
+ """
257
+
258
+ class InitSchema(BaseDropout.InitSchema):
259
+ scale: Annotated[
260
+ tuple[float, float],
261
+ AfterValidator(nondecreasing),
262
+ AfterValidator(check_range_bounds(0, None)),
263
+ ]
264
+ ratio: Annotated[
265
+ tuple[float, float],
266
+ AfterValidator(nondecreasing),
267
+ AfterValidator(check_range_bounds(0, None)),
268
+ ]
269
+
270
+ def __init__(
271
+ self,
272
+ scale: tuple[float, float] = (0.02, 0.33),
273
+ ratio: tuple[float, float] = (0.3, 3.3),
274
+ fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"] = 0,
275
+ fill_mask: tuple[float, ...] | float | None = None,
276
+ p: float = 0.5,
277
+ ):
278
+ super().__init__(fill=fill, fill_mask=fill_mask, p=p)
279
+
280
+ self.scale = scale
281
+ self.ratio = ratio
282
+
283
+ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
284
+ """Calculate erasing parameters using direct mathematical derivation.
285
+
286
+ Given:
287
+ - Image dimensions (H, W)
288
+ - Target area (A)
289
+ - Aspect ratio (r = w/h)
290
+
291
+ We know:
292
+ - h * w = A (area equation)
293
+ - w = r * h (aspect ratio equation)
294
+
295
+ Therefore:
296
+ - h * (r * h) = A
297
+ - h² = A/r
298
+ - h = sqrt(A/r)
299
+ - w = r * sqrt(A/r) = sqrt(A*r)
300
+ """
301
+ height, width = params["shape"][:2]
302
+ total_area = height * width
303
+
304
+ # Calculate maximum valid area based on dimensions and aspect ratio
305
+ max_area = total_area * self.scale[1]
306
+ min_area = total_area * self.scale[0]
307
+
308
+ # For each aspect ratio r, the maximum area is constrained by:
309
+ # h = sqrt(A/r) ≤ H and w = sqrt(A*r) ≤ W
310
+ # Therefore: A ≤ min(r*H², W²/r)
311
+ r_min, r_max = self.ratio
312
+
313
+ def area_constraint_h(r: float) -> float:
314
+ return r * height * height
315
+
316
+ def area_constraint_w(r: float) -> float:
317
+ return width * width / r
318
+
319
+ # Find maximum valid area considering aspect ratio constraints
320
+ max_area_h = min(area_constraint_h(r_min), area_constraint_h(r_max))
321
+ max_area_w = min(area_constraint_w(r_min), area_constraint_w(r_max))
322
+ max_valid_area = min(max_area, max_area_h, max_area_w)
323
+
324
+ if max_valid_area < min_area:
325
+ return {"holes": np.array([], dtype=np.int32).reshape((0, 4))}
326
+
327
+ # Sample valid area and aspect ratio
328
+ erase_area = self.py_random.uniform(min_area, max_valid_area)
329
+
330
+ # Calculate valid aspect ratio range for this area
331
+ max_r = min(r_max, width * width / erase_area)
332
+ min_r = max(r_min, erase_area / (height * height))
333
+
334
+ if min_r > max_r:
335
+ return {"holes": np.array([], dtype=np.int32).reshape((0, 4))}
336
+
337
+ aspect_ratio = self.py_random.uniform(min_r, max_r)
338
+
339
+ # Calculate dimensions
340
+ h = round(np.sqrt(erase_area / aspect_ratio))
341
+ w = round(np.sqrt(erase_area * aspect_ratio))
342
+
343
+ # Sample position
344
+ top = self.py_random.randint(0, height - h)
345
+ left = self.py_random.randint(0, width - w)
346
+
347
+ holes = np.array([[left, top, left + w, top + h]], dtype=np.int32)
348
+ return {"holes": holes, "seed": self.random_generator.integers(0, 2**32 - 1)}
349
+
350
+
351
+ class ConstrainedCoarseDropout(BaseDropout):
352
+ """Applies coarse dropout to regions containing specific objects in the image.
353
+
354
+ This augmentation creates holes (dropout regions) for each target object in the image.
355
+ Objects can be specified either by their class indices in a segmentation mask or
356
+ by their labels in bounding box annotations.
357
+
358
+ The hole generation differs between mask and box modes:
359
+
360
+ Mask mode:
361
+ 1. For each connected component in the mask matching target indices:
362
+ - Samples N points randomly from within the object region (with replacement)
363
+ - Creates holes centered at these points
364
+ - Hole sizes are proportional to sqrt(component area), not total object area
365
+ - Each component's holes are sized based on its own area
366
+
367
+ Box mode:
368
+ 1. For each bounding box matching target labels:
369
+ - Creates N holes with random positions inside the box
370
+ - Hole sizes are proportional to the box dimensions
371
+
372
+ In both modes:
373
+ - N is sampled once from num_holes_range and used for all objects
374
+ - For example, if num_holes_range=(2,4) and 3 is sampled:
375
+ * With 3 target objects, you'll get exactly 3 holes per object (9 total)
376
+ * Holes may overlap within or between objects
377
+ * All holes are clipped to image boundaries
378
+
379
+ Args:
380
+ num_holes_range (tuple[int, int]): Range for number of holes per object (min, max)
381
+ hole_height_range (tuple[float, float]): Range for hole height as proportion
382
+ of object height/size (min, max). E.g., (0.2, 0.4) means:
383
+ - For boxes: 20-40% of box height
384
+ - For masks: 20-40% of sqrt(component area)
385
+ hole_width_range (tuple[float, float]): Range for hole width, similar to height
386
+ fill (tuple[float, float] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
387
+ Value used to fill the erased regions. Can be:
388
+ - int or float: fills all channels with this value
389
+ - tuple: fills each channel with corresponding value
390
+ - "random": fills each pixel with random values
391
+ - "random_uniform": fills entire erased region with a single random color
392
+ - "inpaint_telea": uses OpenCV Telea inpainting method
393
+ - "inpaint_ns": uses OpenCV Navier-Stokes inpainting method
394
+ Default: 0
395
+ fill_mask (tuple[float, float] | float | None): Value used to fill erased regions in the mask.
396
+ If None, mask regions are not modified. Default: None
397
+ p (float): Probability of applying the transform
398
+ mask_indices (List[int], optional): List of class indices in segmentation mask to target.
399
+ Only objects of these classes will be considered for hole placement.
400
+ bbox_labels (List[str | int | float], optional): List of object labels in bbox
401
+ annotations to target. String labels will be automatically encoded.
402
+ When multiple label fields are specified in BboxParams, only the first
403
+ label field is used for filtering.
404
+
405
+ Targets:
406
+ image, mask, bboxes, keypoints, volume, mask3d
407
+
408
+ Image types:
409
+ uint8, float32
410
+
411
+ Requires one of:
412
+ - 'mask' key with segmentation mask where:
413
+ * 0 represents background
414
+ * Non-zero values represent different object instances/classes
415
+ * Values must correspond to mask_indices
416
+ - 'bboxes' key with bounding boxes in format [x_min, y_min, x_max, y_max, label, ...]
417
+
418
+ Note:
419
+ At least one of mask_indices or bbox_labels must be provided.
420
+ If both are provided, mask_indices takes precedence.
421
+
422
+ Examples:
423
+ >>> # Using segmentation mask
424
+ >>> transform = ConstrainedCoarseDropout(
425
+ ... num_holes_range=(2, 4), # 2-4 holes per object
426
+ ... hole_height_range=(0.2, 0.4), # 20-40% of sqrt(object area)
427
+ ... hole_width_range=(0.2, 0.4), # 20-40% of sqrt(object area)
428
+ ... mask_indices=[1, 2], # Target objects of class 1 and 2
429
+ ... fill=0, # Fill holes with black
430
+ ... )
431
+ >>> # Apply to image and its segmentation mask
432
+ >>> transformed = transform(image=image, mask=mask)
433
+
434
+ >>> # Using bounding boxes with Compose
435
+ >>> transform = A.Compose([
436
+ ... ConstrainedCoarseDropout(
437
+ ... num_holes_range=(1, 3),
438
+ ... hole_height_range=(0.3, 0.5), # 30-50% of box height
439
+ ... hole_width_range=(0.3, 0.5), # 30-50% of box width
440
+ ... bbox_labels=['person'], # Target people
441
+ ... fill=127, # Fill holes with gray
442
+ ... )
443
+ ... ], bbox_params=A.BboxParams(
444
+ ... format='pascal_voc', # [x_min, y_min, x_max, y_max]
445
+ ... label_fields=['labels'] # Specify field containing labels
446
+ ... ))
447
+ >>> # Apply to image and its bounding boxes
448
+ >>> transformed = transform(
449
+ ... image=image,
450
+ ... bboxes=[[0, 0, 100, 100, 'car'], [150, 150, 300, 300, 'person']],
451
+ ... labels=['car', 'person']
452
+ ... )
453
+
454
+ """
455
+
456
+ class InitSchema(BaseDropout.InitSchema):
457
+ num_holes_range: Annotated[
458
+ tuple[int, int],
459
+ AfterValidator(check_range_bounds(1, None)),
460
+ AfterValidator(nondecreasing),
461
+ ]
462
+
463
+ hole_height_range: Annotated[
464
+ tuple[float, float],
465
+ AfterValidator(nondecreasing),
466
+ AfterValidator(check_range_bounds(0.0, 1.0)),
467
+ ]
468
+
469
+ hole_width_range: Annotated[
470
+ tuple[float, float],
471
+ AfterValidator(nondecreasing),
472
+ AfterValidator(check_range_bounds(0.0, 1.0)),
473
+ ]
474
+
475
+ mask_indices: Annotated[
476
+ list[int] | None,
477
+ AfterValidator(check_range_bounds(1, None)),
478
+ ]
479
+
480
+ bbox_labels: list[str | int | float] | None = None
481
+
482
+ def __init__(
483
+ self,
484
+ num_holes_range: tuple[int, int] = (1, 1),
485
+ hole_height_range: tuple[float, float] = (0.1, 0.1),
486
+ hole_width_range: tuple[float, float] = (0.1, 0.1),
487
+ fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"] = 0,
488
+ fill_mask: tuple[float, ...] | float | None = None,
489
+ p: float = 0.5,
490
+ mask_indices: list[int] | None = None,
491
+ bbox_labels: list[str | int | float] | None = None,
492
+ ):
493
+ super().__init__(fill=fill, fill_mask=fill_mask, p=p)
494
+ self.num_holes_range = num_holes_range
495
+ self.hole_height_range = hole_height_range
496
+ self.hole_width_range = hole_width_range
497
+ self.mask_indices = mask_indices
498
+ self.bbox_labels = bbox_labels
499
+
500
+ def get_boxes_from_bboxes(self, bboxes: np.ndarray) -> np.ndarray | None:
501
+ """Get bounding boxes that match specified labels.
502
+
503
+ Uses BboxProcessor's label encoder if bbox_labels contain strings.
504
+ """
505
+ if len(bboxes) == 0 or self.bbox_labels is None:
506
+ return None
507
+
508
+ # Get label encoder from BboxProcessor if needed
509
+ bbox_processor = self.get_processor("bboxes")
510
+ if bbox_processor is None:
511
+ return None
512
+
513
+ if not all(isinstance(label, (int, float)) for label in self.bbox_labels):
514
+ label_fields = bbox_processor.params.label_fields
515
+ if label_fields is None:
516
+ raise ValueError("BboxParams.label_fields must be specified when using string labels")
517
+
518
+ first_class_label = label_fields[0]
519
+ # Access encoder through label_manager's metadata
520
+ metadata = bbox_processor.label_manager.metadata["bboxes"][first_class_label]
521
+ if metadata.encoder is None:
522
+ raise ValueError(f"No encoder found for label field {first_class_label}")
523
+
524
+ target_labels = metadata.encoder.transform(self.bbox_labels)
525
+ else:
526
+ target_labels = np.array(self.bbox_labels)
527
+
528
+ # Filter boxes by labels (usually in column 4)
529
+ mask = np.isin(bboxes[:, 4], target_labels)
530
+ filtered_boxes = bboxes[mask, :4]
531
+
532
+ return filtered_boxes if len(filtered_boxes) > 0 else None
533
+
534
+ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
535
+ """Get hole parameters based on either mask indices or bbox labels."""
536
+ num_holes_per_obj = self.py_random.randint(*self.num_holes_range)
537
+
538
+ if self.mask_indices is not None and "mask" in data:
539
+ holes = fdropout.get_holes_from_mask(
540
+ data["mask"],
541
+ num_holes_per_obj,
542
+ self.mask_indices,
543
+ self.hole_height_range,
544
+ self.hole_width_range,
545
+ self.random_generator,
546
+ )
547
+ elif self.bbox_labels is not None and "bboxes" in data:
548
+ target_boxes = self.get_boxes_from_bboxes(data["bboxes"])
549
+ if target_boxes is None:
550
+ holes = np.array([], dtype=np.int32).reshape((0, 4))
551
+ else:
552
+ target_boxes = denormalize_bboxes(target_boxes, data["image"].shape[:2])
553
+ holes = fdropout.get_holes_from_boxes(
554
+ target_boxes,
555
+ num_holes_per_obj,
556
+ self.hole_height_range,
557
+ self.hole_width_range,
558
+ self.random_generator,
559
+ )
560
+ else:
561
+ warn("Neither valid mask nor bboxes provided, do not apply Constrained Coarse Dropout", stacklevel=2)
562
+ holes = np.array([], dtype=np.int32).reshape((0, 4))
563
+
564
+ return {
565
+ "holes": holes,
566
+ "seed": self.random_generator.integers(0, 2**32 - 1),
567
+ }