nrtk-albumentations 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nrtk-albumentations might be problematic. Click here for more details.
- albumentations/__init__.py +21 -0
- albumentations/augmentations/__init__.py +23 -0
- albumentations/augmentations/blur/__init__.py +0 -0
- albumentations/augmentations/blur/functional.py +438 -0
- albumentations/augmentations/blur/transforms.py +1633 -0
- albumentations/augmentations/crops/__init__.py +0 -0
- albumentations/augmentations/crops/functional.py +494 -0
- albumentations/augmentations/crops/transforms.py +3647 -0
- albumentations/augmentations/dropout/__init__.py +0 -0
- albumentations/augmentations/dropout/channel_dropout.py +134 -0
- albumentations/augmentations/dropout/coarse_dropout.py +567 -0
- albumentations/augmentations/dropout/functional.py +1017 -0
- albumentations/augmentations/dropout/grid_dropout.py +166 -0
- albumentations/augmentations/dropout/mask_dropout.py +274 -0
- albumentations/augmentations/dropout/transforms.py +461 -0
- albumentations/augmentations/dropout/xy_masking.py +186 -0
- albumentations/augmentations/geometric/__init__.py +0 -0
- albumentations/augmentations/geometric/distortion.py +1238 -0
- albumentations/augmentations/geometric/flip.py +752 -0
- albumentations/augmentations/geometric/functional.py +4151 -0
- albumentations/augmentations/geometric/pad.py +676 -0
- albumentations/augmentations/geometric/resize.py +956 -0
- albumentations/augmentations/geometric/rotate.py +864 -0
- albumentations/augmentations/geometric/transforms.py +1962 -0
- albumentations/augmentations/mixing/__init__.py +0 -0
- albumentations/augmentations/mixing/domain_adaptation.py +787 -0
- albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
- albumentations/augmentations/mixing/functional.py +878 -0
- albumentations/augmentations/mixing/transforms.py +832 -0
- albumentations/augmentations/other/__init__.py +0 -0
- albumentations/augmentations/other/lambda_transform.py +180 -0
- albumentations/augmentations/other/type_transform.py +261 -0
- albumentations/augmentations/pixel/__init__.py +0 -0
- albumentations/augmentations/pixel/functional.py +4226 -0
- albumentations/augmentations/pixel/transforms.py +7556 -0
- albumentations/augmentations/spectrogram/__init__.py +0 -0
- albumentations/augmentations/spectrogram/transform.py +220 -0
- albumentations/augmentations/text/__init__.py +0 -0
- albumentations/augmentations/text/functional.py +272 -0
- albumentations/augmentations/text/transforms.py +299 -0
- albumentations/augmentations/transforms3d/__init__.py +0 -0
- albumentations/augmentations/transforms3d/functional.py +393 -0
- albumentations/augmentations/transforms3d/transforms.py +1422 -0
- albumentations/augmentations/utils.py +249 -0
- albumentations/core/__init__.py +0 -0
- albumentations/core/bbox_utils.py +920 -0
- albumentations/core/composition.py +1885 -0
- albumentations/core/hub_mixin.py +299 -0
- albumentations/core/keypoints_utils.py +521 -0
- albumentations/core/label_manager.py +339 -0
- albumentations/core/pydantic.py +239 -0
- albumentations/core/serialization.py +352 -0
- albumentations/core/transforms_interface.py +976 -0
- albumentations/core/type_definitions.py +127 -0
- albumentations/core/utils.py +605 -0
- albumentations/core/validation.py +129 -0
- albumentations/pytorch/__init__.py +1 -0
- albumentations/pytorch/transforms.py +189 -0
- nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
- nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
- nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
- nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,3647 @@
|
|
|
1
|
+
"""Transform classes for cropping operations on images and other data types.
|
|
2
|
+
|
|
3
|
+
This module provides various crop transforms that can be applied to images, masks,
|
|
4
|
+
bounding boxes, and keypoints. The transforms include simple cropping, random cropping,
|
|
5
|
+
center cropping, cropping near bounding boxes, and other specialized cropping operations
|
|
6
|
+
that maintain the integrity of bounding boxes. These transforms are designed to work within
|
|
7
|
+
the albumentations pipeline and can be used for data augmentation in computer vision tasks.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import math
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
from typing import Annotated, Any, Literal, Union, cast
|
|
15
|
+
|
|
16
|
+
import cv2
|
|
17
|
+
import numpy as np
|
|
18
|
+
from pydantic import AfterValidator, Field, model_validator
|
|
19
|
+
from typing_extensions import Self
|
|
20
|
+
|
|
21
|
+
from albumentations.augmentations.geometric import functional as fgeometric
|
|
22
|
+
from albumentations.core.bbox_utils import denormalize_bboxes, normalize_bboxes, union_of_bboxes
|
|
23
|
+
from albumentations.core.pydantic import (
|
|
24
|
+
OnePlusIntRangeType,
|
|
25
|
+
ZeroOneRangeType,
|
|
26
|
+
check_range_bounds,
|
|
27
|
+
nondecreasing,
|
|
28
|
+
)
|
|
29
|
+
from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
|
|
30
|
+
from albumentations.core.type_definitions import (
|
|
31
|
+
ALL_TARGETS,
|
|
32
|
+
NUM_MULTI_CHANNEL_DIMENSIONS,
|
|
33
|
+
PAIR,
|
|
34
|
+
PercentType,
|
|
35
|
+
PxType,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
from . import functional as fcrops
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
"AtLeastOneBBoxRandomCrop",
|
|
42
|
+
"BBoxSafeRandomCrop",
|
|
43
|
+
"CenterCrop",
|
|
44
|
+
"Crop",
|
|
45
|
+
"CropAndPad",
|
|
46
|
+
"CropNonEmptyMaskIfExists",
|
|
47
|
+
"RandomCrop",
|
|
48
|
+
"RandomCropFromBorders",
|
|
49
|
+
"RandomCropNearBBox",
|
|
50
|
+
"RandomResizedCrop",
|
|
51
|
+
"RandomSizedBBoxSafeCrop",
|
|
52
|
+
"RandomSizedCrop",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class CropSizeError(Exception):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class BaseCrop(DualTransform):
|
|
61
|
+
"""Base class for transforms that only perform cropping.
|
|
62
|
+
|
|
63
|
+
This abstract class provides the foundation for all cropping transformations.
|
|
64
|
+
It handles cropping of different data types including images, masks, bounding boxes,
|
|
65
|
+
keypoints, and volumes while keeping their spatial relationships intact.
|
|
66
|
+
|
|
67
|
+
Child classes must implement the `get_params_dependent_on_data` method to determine
|
|
68
|
+
crop coordinates based on transform-specific logic. This method should return a dictionary
|
|
69
|
+
containing at least a 'crop_coords' key with a tuple value (x_min, y_min, x_max, y_max).
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
p (float): Probability of applying the transform. Default: 1.0.
|
|
73
|
+
|
|
74
|
+
Targets:
|
|
75
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
76
|
+
|
|
77
|
+
Image types:
|
|
78
|
+
uint8, float32
|
|
79
|
+
|
|
80
|
+
Note:
|
|
81
|
+
This class is not meant to be used directly. Instead, use or create derived
|
|
82
|
+
transforms that implement the specific cropping behavior required.
|
|
83
|
+
|
|
84
|
+
Examples:
|
|
85
|
+
>>> import numpy as np
|
|
86
|
+
>>> import albumentations as A
|
|
87
|
+
>>> from albumentations.augmentations.crops.transforms import BaseCrop
|
|
88
|
+
>>>
|
|
89
|
+
>>> # Example of a custom crop transform that inherits from BaseCrop
|
|
90
|
+
>>> class CustomCenterCrop(BaseCrop):
|
|
91
|
+
... '''A simple custom center crop with configurable size'''
|
|
92
|
+
... def __init__(self, crop_height, crop_width, p=1.0):
|
|
93
|
+
... super().__init__(p=p)
|
|
94
|
+
... self.crop_height = crop_height
|
|
95
|
+
... self.crop_width = crop_width
|
|
96
|
+
...
|
|
97
|
+
... def get_params_dependent_on_data(self, params, data):
|
|
98
|
+
... '''Calculate crop coordinates based on center of image'''
|
|
99
|
+
... image_height, image_width = params["shape"][:2]
|
|
100
|
+
...
|
|
101
|
+
... # Calculate center crop coordinates
|
|
102
|
+
... x_min = max(0, (image_width - self.crop_width) // 2)
|
|
103
|
+
... y_min = max(0, (image_height - self.crop_height) // 2)
|
|
104
|
+
... x_max = min(image_width, x_min + self.crop_width)
|
|
105
|
+
... y_max = min(image_height, y_min + self.crop_height)
|
|
106
|
+
...
|
|
107
|
+
... return {"crop_coords": (x_min, y_min, x_max, y_max)}
|
|
108
|
+
>>>
|
|
109
|
+
>>> # Prepare sample data
|
|
110
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
111
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
112
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
113
|
+
>>> bbox_labels = [1, 2]
|
|
114
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
115
|
+
>>> keypoint_labels = [0, 1]
|
|
116
|
+
>>>
|
|
117
|
+
>>> # Use the custom transform in a pipeline
|
|
118
|
+
>>> transform = A.Compose(
|
|
119
|
+
... [CustomCenterCrop(crop_height=80, crop_width=80)],
|
|
120
|
+
... bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
121
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels'])
|
|
122
|
+
... )
|
|
123
|
+
>>>
|
|
124
|
+
>>> # Apply the transform to data
|
|
125
|
+
>>> result = transform(
|
|
126
|
+
... image=image,
|
|
127
|
+
... mask=mask,
|
|
128
|
+
... bboxes=bboxes,
|
|
129
|
+
... bbox_labels=bbox_labels,
|
|
130
|
+
... keypoints=keypoints,
|
|
131
|
+
... keypoint_labels=keypoint_labels
|
|
132
|
+
... )
|
|
133
|
+
>>>
|
|
134
|
+
>>> # Get the transformed data
|
|
135
|
+
>>> transformed_image = result['image'] # Will be 80x80
|
|
136
|
+
>>> transformed_mask = result['mask'] # Will be 80x80
|
|
137
|
+
>>> transformed_bboxes = result['bboxes'] # Bounding boxes adjusted to the cropped area
|
|
138
|
+
>>> transformed_bbox_labels = result['bbox_labels'] # Labels for bboxes that remain after cropping
|
|
139
|
+
>>> transformed_keypoints = result['keypoints'] # Keypoints adjusted to the cropped area
|
|
140
|
+
>>> transformed_keypoint_labels = result['keypoint_labels'] # Labels for keypoints that remain after cropping
|
|
141
|
+
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
_targets = ALL_TARGETS
|
|
145
|
+
|
|
146
|
+
def apply(
|
|
147
|
+
self,
|
|
148
|
+
img: np.ndarray,
|
|
149
|
+
crop_coords: tuple[int, int, int, int],
|
|
150
|
+
**params: Any,
|
|
151
|
+
) -> np.ndarray:
|
|
152
|
+
"""Apply the crop transform to an image.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
img (np.ndarray): The image to apply the crop transform to.
|
|
156
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
157
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
np.ndarray: The cropped image.
|
|
161
|
+
|
|
162
|
+
"""
|
|
163
|
+
return fcrops.crop(img, x_min=crop_coords[0], y_min=crop_coords[1], x_max=crop_coords[2], y_max=crop_coords[3])
|
|
164
|
+
|
|
165
|
+
def apply_to_bboxes(
|
|
166
|
+
self,
|
|
167
|
+
bboxes: np.ndarray,
|
|
168
|
+
crop_coords: tuple[int, int, int, int],
|
|
169
|
+
**params: Any,
|
|
170
|
+
) -> np.ndarray:
|
|
171
|
+
"""Apply the crop transform to bounding boxes.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
bboxes (np.ndarray): The bounding boxes to apply the crop transform to.
|
|
175
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
176
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
np.ndarray: The cropped bounding boxes.
|
|
180
|
+
|
|
181
|
+
"""
|
|
182
|
+
return fcrops.crop_bboxes_by_coords(bboxes, crop_coords, params["shape"][:2])
|
|
183
|
+
|
|
184
|
+
def apply_to_keypoints(
|
|
185
|
+
self,
|
|
186
|
+
keypoints: np.ndarray,
|
|
187
|
+
crop_coords: tuple[int, int, int, int],
|
|
188
|
+
**params: Any,
|
|
189
|
+
) -> np.ndarray:
|
|
190
|
+
"""Apply the crop transform to keypoints.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
keypoints (np.ndarray): The keypoints to apply the crop transform to.
|
|
194
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
195
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
np.ndarray: The cropped keypoints.
|
|
199
|
+
|
|
200
|
+
"""
|
|
201
|
+
return fcrops.crop_keypoints_by_coords(keypoints, crop_coords)
|
|
202
|
+
|
|
203
|
+
def apply_to_images(
|
|
204
|
+
self,
|
|
205
|
+
images: np.ndarray,
|
|
206
|
+
crop_coords: tuple[int, int, int, int],
|
|
207
|
+
**params: Any,
|
|
208
|
+
) -> np.ndarray:
|
|
209
|
+
return fcrops.volume_crop_yx(images, crop_coords[0], crop_coords[1], crop_coords[2], crop_coords[3])
|
|
210
|
+
|
|
211
|
+
def apply_to_volume(
|
|
212
|
+
self,
|
|
213
|
+
volume: np.ndarray,
|
|
214
|
+
crop_coords: tuple[int, int, int, int],
|
|
215
|
+
**params: Any,
|
|
216
|
+
) -> np.ndarray:
|
|
217
|
+
return self.apply_to_images(volume, crop_coords, **params)
|
|
218
|
+
|
|
219
|
+
def apply_to_volumes(
|
|
220
|
+
self,
|
|
221
|
+
volumes: np.ndarray,
|
|
222
|
+
crop_coords: tuple[int, int, int, int],
|
|
223
|
+
**params: Any,
|
|
224
|
+
) -> np.ndarray:
|
|
225
|
+
return fcrops.volumes_crop_yx(volumes, crop_coords[0], crop_coords[1], crop_coords[2], crop_coords[3])
|
|
226
|
+
|
|
227
|
+
def apply_to_mask3d(
|
|
228
|
+
self,
|
|
229
|
+
mask3d: np.ndarray,
|
|
230
|
+
crop_coords: tuple[int, int, int, int],
|
|
231
|
+
**params: Any,
|
|
232
|
+
) -> np.ndarray:
|
|
233
|
+
return self.apply_to_images(mask3d, crop_coords, **params)
|
|
234
|
+
|
|
235
|
+
def apply_to_masks3d(
|
|
236
|
+
self,
|
|
237
|
+
masks3d: np.ndarray,
|
|
238
|
+
crop_coords: tuple[int, int, int, int],
|
|
239
|
+
**params: Any,
|
|
240
|
+
) -> np.ndarray:
|
|
241
|
+
return self.apply_to_volumes(masks3d, crop_coords, **params)
|
|
242
|
+
|
|
243
|
+
@staticmethod
|
|
244
|
+
def _clip_bbox(bbox: tuple[int, int, int, int], image_shape: tuple[int, int]) -> tuple[int, int, int, int]:
|
|
245
|
+
height, width = image_shape[:2]
|
|
246
|
+
x_min, y_min, x_max, y_max = bbox
|
|
247
|
+
x_min = np.clip(x_min, 0, width)
|
|
248
|
+
y_min = np.clip(y_min, 0, height)
|
|
249
|
+
|
|
250
|
+
x_max = np.clip(x_max, x_min, width)
|
|
251
|
+
y_max = np.clip(y_max, y_min, height)
|
|
252
|
+
return x_min, y_min, x_max, y_max
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class BaseCropAndPad(BaseCrop):
|
|
256
|
+
"""Base class for transforms that need both cropping and padding.
|
|
257
|
+
|
|
258
|
+
This abstract class extends BaseCrop by adding padding capabilities. It's the foundation
|
|
259
|
+
for transforms that may need to both crop parts of the input and add padding, such as when
|
|
260
|
+
converting inputs to a specific target size. The class handles the complexities of applying
|
|
261
|
+
these operations to different data types (images, masks, bounding boxes, keypoints) while
|
|
262
|
+
maintaining their spatial relationships.
|
|
263
|
+
|
|
264
|
+
Child classes must implement the `get_params_dependent_on_data` method to determine
|
|
265
|
+
crop coordinates and padding parameters based on transform-specific logic.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
pad_if_needed (bool): Whether to pad the input if the crop size exceeds input dimensions.
|
|
269
|
+
border_mode (int): OpenCV border mode used for padding.
|
|
270
|
+
fill (tuple[float, ...] | float): Value to fill the padded area if border_mode is BORDER_CONSTANT.
|
|
271
|
+
For multi-channel images, this can be a tuple with a value for each channel.
|
|
272
|
+
fill_mask (tuple[float, ...] | float): Value to fill the padded area in masks.
|
|
273
|
+
pad_position (Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"]):
|
|
274
|
+
Position of padding when pad_if_needed is True.
|
|
275
|
+
p (float): Probability of applying the transform. Default: 1.0.
|
|
276
|
+
|
|
277
|
+
Targets:
|
|
278
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
279
|
+
|
|
280
|
+
Image types:
|
|
281
|
+
uint8, float32
|
|
282
|
+
|
|
283
|
+
Note:
|
|
284
|
+
This class is not meant to be used directly. Instead, use or create derived
|
|
285
|
+
transforms that implement the specific cropping and padding behavior required.
|
|
286
|
+
|
|
287
|
+
Examples:
|
|
288
|
+
>>> import numpy as np
|
|
289
|
+
>>> import cv2
|
|
290
|
+
>>> import albumentations as A
|
|
291
|
+
>>> from albumentations.augmentations.crops.transforms import BaseCropAndPad
|
|
292
|
+
>>>
|
|
293
|
+
>>> # Example of a custom transform that inherits from BaseCropAndPad
|
|
294
|
+
>>> # This transform crops to a fixed size, padding if needed to maintain dimensions
|
|
295
|
+
>>> class CustomFixedSizeCrop(BaseCropAndPad):
|
|
296
|
+
... '''A custom fixed-size crop that pads if needed to maintain output size'''
|
|
297
|
+
... def __init__(
|
|
298
|
+
... self,
|
|
299
|
+
... height=224,
|
|
300
|
+
... width=224,
|
|
301
|
+
... offset_x=0, # Offset for crop position
|
|
302
|
+
... offset_y=0, # Offset for crop position
|
|
303
|
+
... pad_if_needed=True,
|
|
304
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
305
|
+
... fill=0,
|
|
306
|
+
... fill_mask=0,
|
|
307
|
+
... pad_position="center",
|
|
308
|
+
... p=1.0,
|
|
309
|
+
... ):
|
|
310
|
+
... super().__init__(
|
|
311
|
+
... pad_if_needed=pad_if_needed,
|
|
312
|
+
... border_mode=border_mode,
|
|
313
|
+
... fill=fill,
|
|
314
|
+
... fill_mask=fill_mask,
|
|
315
|
+
... pad_position=pad_position,
|
|
316
|
+
... p=p,
|
|
317
|
+
... )
|
|
318
|
+
... self.height = height
|
|
319
|
+
... self.width = width
|
|
320
|
+
... self.offset_x = offset_x
|
|
321
|
+
... self.offset_y = offset_y
|
|
322
|
+
...
|
|
323
|
+
... def get_params_dependent_on_data(self, params, data):
|
|
324
|
+
... '''Calculate crop coordinates and padding if needed'''
|
|
325
|
+
... image_shape = params["shape"][:2]
|
|
326
|
+
... image_height, image_width = image_shape
|
|
327
|
+
...
|
|
328
|
+
... # Calculate crop coordinates with offsets
|
|
329
|
+
... x_min = self.offset_x
|
|
330
|
+
... y_min = self.offset_y
|
|
331
|
+
... x_max = min(x_min + self.width, image_width)
|
|
332
|
+
... y_max = min(y_min + self.height, image_height)
|
|
333
|
+
...
|
|
334
|
+
... # Get padding params if needed
|
|
335
|
+
... pad_params = self._get_pad_params(
|
|
336
|
+
... image_shape,
|
|
337
|
+
... (self.height, self.width)
|
|
338
|
+
... ) if self.pad_if_needed else None
|
|
339
|
+
...
|
|
340
|
+
... return {
|
|
341
|
+
... "crop_coords": (x_min, y_min, x_max, y_max),
|
|
342
|
+
... "pad_params": pad_params,
|
|
343
|
+
... }
|
|
344
|
+
>>>
|
|
345
|
+
>>> # Prepare sample data
|
|
346
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
347
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
348
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
349
|
+
>>> bbox_labels = [1, 2]
|
|
350
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
351
|
+
>>> keypoint_labels = [0, 1]
|
|
352
|
+
>>>
|
|
353
|
+
>>> # Use the custom transform in a pipeline
|
|
354
|
+
>>> # This will create a 224x224 crop with padding as needed
|
|
355
|
+
>>> transform = A.Compose(
|
|
356
|
+
... [CustomFixedSizeCrop(
|
|
357
|
+
... height=224,
|
|
358
|
+
... width=224,
|
|
359
|
+
... offset_x=20,
|
|
360
|
+
... offset_y=10,
|
|
361
|
+
... fill=127, # Gray color for padding
|
|
362
|
+
... fill_mask=0
|
|
363
|
+
... )],
|
|
364
|
+
... bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
365
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
366
|
+
>>>
|
|
367
|
+
>>> # Apply the transform to data
|
|
368
|
+
>>> result = transform(
|
|
369
|
+
... image=image,
|
|
370
|
+
... mask=mask,
|
|
371
|
+
... bboxes=bboxes,
|
|
372
|
+
... bbox_labels=bbox_labels,
|
|
373
|
+
... keypoints=keypoints,
|
|
374
|
+
... keypoint_labels=keypoint_labels
|
|
375
|
+
... )
|
|
376
|
+
>>>
|
|
377
|
+
>>> # Get the transformed data
|
|
378
|
+
>>> transformed_image = result['image'] # Will be 224x224 with padding
|
|
379
|
+
>>> transformed_mask = result['mask'] # Will be 224x224 with padding
|
|
380
|
+
>>> transformed_bboxes = result['bboxes'] # Bounding boxes adjusted to the cropped and padded area
|
|
381
|
+
>>> transformed_bbox_labels = result['bbox_labels'] # Bounding box labels after crop
|
|
382
|
+
>>> transformed_keypoints = result['keypoints'] # Keypoints adjusted to the cropped and padded area
|
|
383
|
+
>>> transformed_keypoint_labels = result['keypoint_labels'] # Keypoint labels after crop
|
|
384
|
+
|
|
385
|
+
"""
|
|
386
|
+
|
|
387
|
+
class InitSchema(BaseTransformInitSchema):
|
|
388
|
+
pad_if_needed: bool
|
|
389
|
+
border_mode: Literal[
|
|
390
|
+
cv2.BORDER_CONSTANT,
|
|
391
|
+
cv2.BORDER_REPLICATE,
|
|
392
|
+
cv2.BORDER_REFLECT,
|
|
393
|
+
cv2.BORDER_WRAP,
|
|
394
|
+
cv2.BORDER_REFLECT_101,
|
|
395
|
+
]
|
|
396
|
+
fill: tuple[float, ...] | float
|
|
397
|
+
fill_mask: tuple[float, ...] | float
|
|
398
|
+
pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"]
|
|
399
|
+
|
|
400
|
+
def __init__(
|
|
401
|
+
self,
|
|
402
|
+
pad_if_needed: bool,
|
|
403
|
+
border_mode: Literal[
|
|
404
|
+
cv2.BORDER_CONSTANT,
|
|
405
|
+
cv2.BORDER_REPLICATE,
|
|
406
|
+
cv2.BORDER_REFLECT,
|
|
407
|
+
cv2.BORDER_WRAP,
|
|
408
|
+
cv2.BORDER_REFLECT_101,
|
|
409
|
+
],
|
|
410
|
+
fill: tuple[float, ...] | float,
|
|
411
|
+
fill_mask: tuple[float, ...] | float,
|
|
412
|
+
pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"],
|
|
413
|
+
p: float,
|
|
414
|
+
):
|
|
415
|
+
super().__init__(p=p)
|
|
416
|
+
self.pad_if_needed = pad_if_needed
|
|
417
|
+
self.border_mode = border_mode
|
|
418
|
+
self.fill = fill
|
|
419
|
+
self.fill_mask = fill_mask
|
|
420
|
+
self.pad_position = pad_position
|
|
421
|
+
|
|
422
|
+
def _get_pad_params(self, image_shape: tuple[int, int], target_shape: tuple[int, int]) -> dict[str, Any] | None:
|
|
423
|
+
"""Calculate padding parameters if needed."""
|
|
424
|
+
if not self.pad_if_needed:
|
|
425
|
+
return None
|
|
426
|
+
|
|
427
|
+
h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = fgeometric.get_padding_params(
|
|
428
|
+
image_shape=image_shape,
|
|
429
|
+
min_height=target_shape[0],
|
|
430
|
+
min_width=target_shape[1],
|
|
431
|
+
pad_height_divisor=None,
|
|
432
|
+
pad_width_divisor=None,
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
if h_pad_top == h_pad_bottom == w_pad_left == w_pad_right == 0:
|
|
436
|
+
return None
|
|
437
|
+
|
|
438
|
+
h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = fgeometric.adjust_padding_by_position(
|
|
439
|
+
h_top=h_pad_top,
|
|
440
|
+
h_bottom=h_pad_bottom,
|
|
441
|
+
w_left=w_pad_left,
|
|
442
|
+
w_right=w_pad_right,
|
|
443
|
+
position=self.pad_position,
|
|
444
|
+
py_random=self.py_random,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
return {
|
|
448
|
+
"pad_top": h_pad_top,
|
|
449
|
+
"pad_bottom": h_pad_bottom,
|
|
450
|
+
"pad_left": w_pad_left,
|
|
451
|
+
"pad_right": w_pad_right,
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
def apply(
|
|
455
|
+
self,
|
|
456
|
+
img: np.ndarray,
|
|
457
|
+
crop_coords: tuple[int, int, int, int],
|
|
458
|
+
**params: Any,
|
|
459
|
+
) -> np.ndarray:
|
|
460
|
+
"""Apply the crop and pad transform to an image.
|
|
461
|
+
|
|
462
|
+
Args:
|
|
463
|
+
img (np.ndarray): The image to apply the crop and pad transform to.
|
|
464
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
465
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
np.ndarray: The cropped and padded image.
|
|
469
|
+
|
|
470
|
+
"""
|
|
471
|
+
pad_params = params.get("pad_params")
|
|
472
|
+
if pad_params is not None:
|
|
473
|
+
img = fgeometric.pad_with_params(
|
|
474
|
+
img,
|
|
475
|
+
pad_params["pad_top"],
|
|
476
|
+
pad_params["pad_bottom"],
|
|
477
|
+
pad_params["pad_left"],
|
|
478
|
+
pad_params["pad_right"],
|
|
479
|
+
border_mode=self.border_mode,
|
|
480
|
+
value=self.fill,
|
|
481
|
+
)
|
|
482
|
+
return BaseCrop.apply(self, img, crop_coords, **params)
|
|
483
|
+
|
|
484
|
+
def apply_to_mask(
|
|
485
|
+
self,
|
|
486
|
+
mask: np.ndarray,
|
|
487
|
+
crop_coords: Any,
|
|
488
|
+
**params: Any,
|
|
489
|
+
) -> np.ndarray:
|
|
490
|
+
"""Apply the crop and pad transform to a mask.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
mask (np.ndarray): The mask to apply the crop and pad transform to.
|
|
494
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
495
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
np.ndarray: The cropped and padded mask.
|
|
499
|
+
|
|
500
|
+
"""
|
|
501
|
+
pad_params = params.get("pad_params")
|
|
502
|
+
if pad_params is not None:
|
|
503
|
+
mask = fgeometric.pad_with_params(
|
|
504
|
+
mask,
|
|
505
|
+
pad_params["pad_top"],
|
|
506
|
+
pad_params["pad_bottom"],
|
|
507
|
+
pad_params["pad_left"],
|
|
508
|
+
pad_params["pad_right"],
|
|
509
|
+
border_mode=self.border_mode,
|
|
510
|
+
value=self.fill_mask,
|
|
511
|
+
)
|
|
512
|
+
# Note' that super().apply would apply the padding twice as it is looped to this.apply
|
|
513
|
+
return BaseCrop.apply(self, mask, crop_coords=crop_coords, **params)
|
|
514
|
+
|
|
515
|
+
def apply_to_images(
|
|
516
|
+
self,
|
|
517
|
+
images: np.ndarray,
|
|
518
|
+
crop_coords: tuple[int, int, int, int],
|
|
519
|
+
**params: Any,
|
|
520
|
+
) -> np.ndarray:
|
|
521
|
+
pad_params = params.get("pad_params")
|
|
522
|
+
if pad_params is not None:
|
|
523
|
+
images = fcrops.pad_along_axes(
|
|
524
|
+
images,
|
|
525
|
+
pad_params["pad_top"],
|
|
526
|
+
pad_params["pad_bottom"],
|
|
527
|
+
pad_params["pad_left"],
|
|
528
|
+
pad_params["pad_right"],
|
|
529
|
+
h_axis=1,
|
|
530
|
+
w_axis=2,
|
|
531
|
+
border_mode=self.border_mode,
|
|
532
|
+
pad_value=self.fill,
|
|
533
|
+
)
|
|
534
|
+
return BaseCrop.apply_to_images(self, images, crop_coords, **params)
|
|
535
|
+
|
|
536
|
+
def apply_to_volume(
|
|
537
|
+
self,
|
|
538
|
+
volume: np.ndarray,
|
|
539
|
+
crop_coords: tuple[int, int, int, int],
|
|
540
|
+
**params: Any,
|
|
541
|
+
) -> np.ndarray:
|
|
542
|
+
return self.apply_to_images(volume, crop_coords, **params)
|
|
543
|
+
|
|
544
|
+
def apply_to_volumes(
|
|
545
|
+
self,
|
|
546
|
+
volumes: np.ndarray,
|
|
547
|
+
crop_coords: tuple[int, int, int, int],
|
|
548
|
+
**params: Any,
|
|
549
|
+
) -> np.ndarray:
|
|
550
|
+
pad_params = params.get("pad_params")
|
|
551
|
+
if pad_params is not None:
|
|
552
|
+
volumes = fcrops.pad_along_axes(
|
|
553
|
+
volumes,
|
|
554
|
+
pad_params["pad_top"],
|
|
555
|
+
pad_params["pad_bottom"],
|
|
556
|
+
pad_params["pad_left"],
|
|
557
|
+
pad_params["pad_right"],
|
|
558
|
+
h_axis=2,
|
|
559
|
+
w_axis=3,
|
|
560
|
+
border_mode=self.border_mode,
|
|
561
|
+
pad_value=self.fill,
|
|
562
|
+
)
|
|
563
|
+
return BaseCrop.apply_to_volumes(self, volumes, crop_coords, **params)
|
|
564
|
+
|
|
565
|
+
def apply_to_mask3d(
|
|
566
|
+
self,
|
|
567
|
+
mask3d: np.ndarray,
|
|
568
|
+
crop_coords: tuple[int, int, int, int],
|
|
569
|
+
**params: Any,
|
|
570
|
+
) -> np.ndarray:
|
|
571
|
+
return self.apply_to_images(mask3d, crop_coords, **params)
|
|
572
|
+
|
|
573
|
+
def apply_to_masks3d(
|
|
574
|
+
self,
|
|
575
|
+
masks3d: np.ndarray,
|
|
576
|
+
crop_coords: tuple[int, int, int, int],
|
|
577
|
+
**params: Any,
|
|
578
|
+
) -> np.ndarray:
|
|
579
|
+
return self.apply_to_volumes(masks3d, crop_coords, **params)
|
|
580
|
+
|
|
581
|
+
def apply_to_bboxes(
|
|
582
|
+
self,
|
|
583
|
+
bboxes: np.ndarray,
|
|
584
|
+
crop_coords: tuple[int, int, int, int],
|
|
585
|
+
**params: Any,
|
|
586
|
+
) -> np.ndarray:
|
|
587
|
+
"""Apply the crop and pad transform to bounding boxes.
|
|
588
|
+
|
|
589
|
+
Args:
|
|
590
|
+
bboxes (np.ndarray): The bounding boxes to apply the crop and pad transform to.
|
|
591
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
592
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
np.ndarray: The cropped and padded bounding boxes.
|
|
596
|
+
|
|
597
|
+
"""
|
|
598
|
+
pad_params = params.get("pad_params")
|
|
599
|
+
image_shape = params["shape"][:2]
|
|
600
|
+
|
|
601
|
+
if pad_params is not None:
|
|
602
|
+
# First denormalize bboxes to absolute coordinates
|
|
603
|
+
bboxes_np = denormalize_bboxes(bboxes, image_shape)
|
|
604
|
+
|
|
605
|
+
# Apply padding to bboxes (already works with absolute coordinates)
|
|
606
|
+
bboxes_np = fgeometric.pad_bboxes(
|
|
607
|
+
bboxes_np,
|
|
608
|
+
pad_params["pad_top"],
|
|
609
|
+
pad_params["pad_bottom"],
|
|
610
|
+
pad_params["pad_left"],
|
|
611
|
+
pad_params["pad_right"],
|
|
612
|
+
self.border_mode,
|
|
613
|
+
image_shape=image_shape,
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
# Update shape to padded dimensions
|
|
617
|
+
padded_height = image_shape[0] + pad_params["pad_top"] + pad_params["pad_bottom"]
|
|
618
|
+
padded_width = image_shape[1] + pad_params["pad_left"] + pad_params["pad_right"]
|
|
619
|
+
padded_shape = (padded_height, padded_width)
|
|
620
|
+
|
|
621
|
+
bboxes_np = normalize_bboxes(bboxes_np, padded_shape)
|
|
622
|
+
|
|
623
|
+
params["shape"] = padded_shape
|
|
624
|
+
|
|
625
|
+
return BaseCrop.apply_to_bboxes(self, bboxes_np, crop_coords, **params)
|
|
626
|
+
|
|
627
|
+
# If no padding, use original function behavior
|
|
628
|
+
return BaseCrop.apply_to_bboxes(self, bboxes, crop_coords, **params)
|
|
629
|
+
|
|
630
|
+
def apply_to_keypoints(
|
|
631
|
+
self,
|
|
632
|
+
keypoints: np.ndarray,
|
|
633
|
+
crop_coords: tuple[int, int, int, int],
|
|
634
|
+
**params: Any,
|
|
635
|
+
) -> np.ndarray:
|
|
636
|
+
"""Apply the crop and pad transform to keypoints.
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
keypoints (np.ndarray): The keypoints to apply the crop and pad transform to.
|
|
640
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
641
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
642
|
+
|
|
643
|
+
Returns:
|
|
644
|
+
np.ndarray: The cropped and padded keypoints.
|
|
645
|
+
|
|
646
|
+
"""
|
|
647
|
+
pad_params = params.get("pad_params")
|
|
648
|
+
image_shape = params["shape"][:2]
|
|
649
|
+
|
|
650
|
+
if pad_params is not None:
|
|
651
|
+
# Calculate padded dimensions
|
|
652
|
+
padded_height = image_shape[0] + pad_params["pad_top"] + pad_params["pad_bottom"]
|
|
653
|
+
padded_width = image_shape[1] + pad_params["pad_left"] + pad_params["pad_right"]
|
|
654
|
+
|
|
655
|
+
# First apply padding to keypoints using original image shape
|
|
656
|
+
keypoints = fgeometric.pad_keypoints(
|
|
657
|
+
keypoints,
|
|
658
|
+
pad_params["pad_top"],
|
|
659
|
+
pad_params["pad_bottom"],
|
|
660
|
+
pad_params["pad_left"],
|
|
661
|
+
pad_params["pad_right"],
|
|
662
|
+
self.border_mode,
|
|
663
|
+
image_shape=image_shape,
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
# Update image shape for subsequent crop operation
|
|
667
|
+
params = {**params, "shape": (padded_height, padded_width)}
|
|
668
|
+
|
|
669
|
+
return BaseCrop.apply_to_keypoints(self, keypoints, crop_coords, **params)
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
class RandomCrop(BaseCropAndPad):
|
|
673
|
+
"""Crop a random part of the input.
|
|
674
|
+
|
|
675
|
+
Args:
|
|
676
|
+
height (int): height of the crop.
|
|
677
|
+
width (int): width of the crop.
|
|
678
|
+
pad_if_needed (bool): Whether to pad if crop size exceeds image size. Default: False.
|
|
679
|
+
border_mode (OpenCV flag): OpenCV border mode used for padding. Default: cv2.BORDER_CONSTANT.
|
|
680
|
+
fill (tuple[float, ...] | float): Padding value for images if border_mode is
|
|
681
|
+
cv2.BORDER_CONSTANT. Default: 0.
|
|
682
|
+
fill_mask (tuple[float, ...] | float): Padding value for masks if border_mode is
|
|
683
|
+
cv2.BORDER_CONSTANT. Default: 0.
|
|
684
|
+
pad_position (Literal['center', 'top_left', 'top_right', 'bottom_left', 'bottom_right', 'random']):
|
|
685
|
+
Position of padding. Default: 'center'.
|
|
686
|
+
p (float): Probability of applying the transform. Default: 1.0.
|
|
687
|
+
|
|
688
|
+
Targets:
|
|
689
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
690
|
+
|
|
691
|
+
Image types:
|
|
692
|
+
uint8, float32
|
|
693
|
+
|
|
694
|
+
Note:
|
|
695
|
+
If pad_if_needed is True and crop size exceeds image dimensions, the image will be padded
|
|
696
|
+
before applying the random crop.
|
|
697
|
+
|
|
698
|
+
Examples:
|
|
699
|
+
>>> import numpy as np
|
|
700
|
+
>>> import albumentations as A
|
|
701
|
+
>>> import cv2
|
|
702
|
+
>>>
|
|
703
|
+
>>> # Prepare sample data
|
|
704
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
705
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
706
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
707
|
+
>>> bbox_labels = [1, 2]
|
|
708
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
709
|
+
>>> keypoint_labels = [0, 1]
|
|
710
|
+
>>>
|
|
711
|
+
>>> # Example 1: Basic random crop
|
|
712
|
+
>>> transform = A.Compose([
|
|
713
|
+
... A.RandomCrop(height=64, width=64),
|
|
714
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
715
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
716
|
+
>>>
|
|
717
|
+
>>> # Apply the transform
|
|
718
|
+
>>> transformed = transform(
|
|
719
|
+
... image=image,
|
|
720
|
+
... mask=mask,
|
|
721
|
+
... bboxes=bboxes,
|
|
722
|
+
... bbox_labels=bbox_labels,
|
|
723
|
+
... keypoints=keypoints,
|
|
724
|
+
... keypoint_labels=keypoint_labels
|
|
725
|
+
... )
|
|
726
|
+
>>>
|
|
727
|
+
>>> # Get the transformed data
|
|
728
|
+
>>> transformed_image = transformed['image'] # Will be 64x64
|
|
729
|
+
>>> transformed_mask = transformed['mask'] # Will be 64x64
|
|
730
|
+
>>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to the cropped area
|
|
731
|
+
>>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for boxes that remain after cropping
|
|
732
|
+
>>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to the cropped area
|
|
733
|
+
>>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for keypoints that remain
|
|
734
|
+
>>>
|
|
735
|
+
>>> # Example 2: Random crop with padding when needed
|
|
736
|
+
>>> # This is useful when you want to crop to a size larger than some images
|
|
737
|
+
>>> transform_padded = A.Compose([
|
|
738
|
+
... A.RandomCrop(
|
|
739
|
+
... height=120, # Larger than original image height
|
|
740
|
+
... width=120, # Larger than original image width
|
|
741
|
+
... pad_if_needed=True,
|
|
742
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
743
|
+
... fill=0, # Black padding for image
|
|
744
|
+
... fill_mask=0 # Zero padding for mask
|
|
745
|
+
... ),
|
|
746
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
747
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
748
|
+
>>>
|
|
749
|
+
>>> # Apply the padded transform
|
|
750
|
+
>>> padded_transformed = transform_padded(
|
|
751
|
+
... image=image,
|
|
752
|
+
... mask=mask,
|
|
753
|
+
... bboxes=bboxes,
|
|
754
|
+
... bbox_labels=bbox_labels,
|
|
755
|
+
... keypoints=keypoints,
|
|
756
|
+
... keypoint_labels=keypoint_labels
|
|
757
|
+
... )
|
|
758
|
+
>>>
|
|
759
|
+
>>> # The result will be 120x120 with padding
|
|
760
|
+
>>> padded_image = padded_transformed['image']
|
|
761
|
+
>>> padded_mask = padded_transformed['mask']
|
|
762
|
+
>>> padded_bboxes = padded_transformed['bboxes'] # Coordinates adjusted to the new dimensions
|
|
763
|
+
|
|
764
|
+
"""
|
|
765
|
+
|
|
766
|
+
class InitSchema(BaseCropAndPad.InitSchema):
|
|
767
|
+
height: Annotated[int, Field(ge=1)]
|
|
768
|
+
width: Annotated[int, Field(ge=1)]
|
|
769
|
+
border_mode: Literal[
|
|
770
|
+
cv2.BORDER_CONSTANT,
|
|
771
|
+
cv2.BORDER_REPLICATE,
|
|
772
|
+
cv2.BORDER_REFLECT,
|
|
773
|
+
cv2.BORDER_WRAP,
|
|
774
|
+
cv2.BORDER_REFLECT_101,
|
|
775
|
+
]
|
|
776
|
+
|
|
777
|
+
fill: tuple[float, ...] | float
|
|
778
|
+
fill_mask: tuple[float, ...] | float
|
|
779
|
+
|
|
780
|
+
def __init__(
|
|
781
|
+
self,
|
|
782
|
+
height: int,
|
|
783
|
+
width: int,
|
|
784
|
+
pad_if_needed: bool = False,
|
|
785
|
+
pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"] = "center",
|
|
786
|
+
border_mode: Literal[
|
|
787
|
+
cv2.BORDER_CONSTANT,
|
|
788
|
+
cv2.BORDER_REPLICATE,
|
|
789
|
+
cv2.BORDER_REFLECT,
|
|
790
|
+
cv2.BORDER_WRAP,
|
|
791
|
+
cv2.BORDER_REFLECT_101,
|
|
792
|
+
] = cv2.BORDER_CONSTANT,
|
|
793
|
+
fill: tuple[float, ...] | float = 0.0,
|
|
794
|
+
fill_mask: tuple[float, ...] | float = 0.0,
|
|
795
|
+
p: float = 1.0,
|
|
796
|
+
):
|
|
797
|
+
super().__init__(
|
|
798
|
+
pad_if_needed=pad_if_needed,
|
|
799
|
+
border_mode=border_mode,
|
|
800
|
+
fill=fill,
|
|
801
|
+
fill_mask=fill_mask,
|
|
802
|
+
pad_position=pad_position,
|
|
803
|
+
p=p,
|
|
804
|
+
)
|
|
805
|
+
self.height = height
|
|
806
|
+
self.width = width
|
|
807
|
+
|
|
808
|
+
def get_params_dependent_on_data(
|
|
809
|
+
self,
|
|
810
|
+
params: dict[str, Any],
|
|
811
|
+
data: dict[str, Any],
|
|
812
|
+
) -> dict[str, Any]: # Changed return type to be more flexible
|
|
813
|
+
"""Get parameters that depend on input data.
|
|
814
|
+
|
|
815
|
+
Args:
|
|
816
|
+
params (dict[str, Any]): Parameters.
|
|
817
|
+
data (dict[str, Any]): Input data.
|
|
818
|
+
|
|
819
|
+
Returns:
|
|
820
|
+
dict[str, Any]: Dictionary with parameters.
|
|
821
|
+
|
|
822
|
+
"""
|
|
823
|
+
image_shape = params["shape"][:2]
|
|
824
|
+
image_height, image_width = image_shape
|
|
825
|
+
|
|
826
|
+
if not self.pad_if_needed and (self.height > image_height or self.width > image_width):
|
|
827
|
+
raise CropSizeError(
|
|
828
|
+
f"Crop size (height, width) exceeds image dimensions (height, width):"
|
|
829
|
+
f" {(self.height, self.width)} vs {image_shape[:2]}",
|
|
830
|
+
)
|
|
831
|
+
|
|
832
|
+
# Get padding params first if needed
|
|
833
|
+
pad_params = self._get_pad_params(image_shape, (self.height, self.width))
|
|
834
|
+
|
|
835
|
+
# If padding is needed, adjust the image shape for crop calculation
|
|
836
|
+
if pad_params is not None:
|
|
837
|
+
pad_top = pad_params["pad_top"]
|
|
838
|
+
pad_bottom = pad_params["pad_bottom"]
|
|
839
|
+
pad_left = pad_params["pad_left"]
|
|
840
|
+
pad_right = pad_params["pad_right"]
|
|
841
|
+
|
|
842
|
+
padded_height = image_height + pad_top + pad_bottom
|
|
843
|
+
padded_width = image_width + pad_left + pad_right
|
|
844
|
+
padded_shape = (padded_height, padded_width)
|
|
845
|
+
|
|
846
|
+
# Get random crop coordinates based on padded dimensions
|
|
847
|
+
h_start = self.py_random.random()
|
|
848
|
+
w_start = self.py_random.random()
|
|
849
|
+
crop_coords = fcrops.get_crop_coords(padded_shape, (self.height, self.width), h_start, w_start)
|
|
850
|
+
else:
|
|
851
|
+
# Get random crop coordinates based on original dimensions
|
|
852
|
+
h_start = self.py_random.random()
|
|
853
|
+
w_start = self.py_random.random()
|
|
854
|
+
crop_coords = fcrops.get_crop_coords(image_shape, (self.height, self.width), h_start, w_start)
|
|
855
|
+
|
|
856
|
+
return {
|
|
857
|
+
"crop_coords": crop_coords,
|
|
858
|
+
"pad_params": pad_params,
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
class CenterCrop(BaseCropAndPad):
|
|
863
|
+
"""Crop the central part of the input.
|
|
864
|
+
|
|
865
|
+
This transform crops the center of the input image, mask, bounding boxes, and keypoints to the specified dimensions.
|
|
866
|
+
It's useful when you want to focus on the central region of the input, discarding peripheral information.
|
|
867
|
+
|
|
868
|
+
Args:
|
|
869
|
+
height (int): The height of the crop. Must be greater than 0.
|
|
870
|
+
width (int): The width of the crop. Must be greater than 0.
|
|
871
|
+
pad_if_needed (bool): Whether to pad if crop size exceeds image size. Default: False.
|
|
872
|
+
border_mode (OpenCV flag): OpenCV border mode used for padding. Default: cv2.BORDER_CONSTANT.
|
|
873
|
+
fill (tuple[float, ...] | float): Padding value for images if border_mode is
|
|
874
|
+
cv2.BORDER_CONSTANT. Default: 0.
|
|
875
|
+
fill_mask (tuple[float, ...] | float): Padding value for masks if border_mode is
|
|
876
|
+
cv2.BORDER_CONSTANT. Default: 0.
|
|
877
|
+
pad_position (Literal['center', 'top_left', 'top_right', 'bottom_left', 'bottom_right', 'random']):
|
|
878
|
+
Position of padding. Default: 'center'.
|
|
879
|
+
p (float): Probability of applying the transform. Default: 1.0.
|
|
880
|
+
|
|
881
|
+
Targets:
|
|
882
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
883
|
+
|
|
884
|
+
Image types:
|
|
885
|
+
uint8, float32
|
|
886
|
+
|
|
887
|
+
Note:
|
|
888
|
+
- If pad_if_needed is False and crop size exceeds image dimensions, it will raise a CropSizeError.
|
|
889
|
+
- If pad_if_needed is True and crop size exceeds image dimensions, the image will be padded.
|
|
890
|
+
- For bounding boxes and keypoints, coordinates are adjusted appropriately for both padding and cropping.
|
|
891
|
+
|
|
892
|
+
Examples:
|
|
893
|
+
>>> import numpy as np
|
|
894
|
+
>>> import albumentations as A
|
|
895
|
+
>>> import cv2
|
|
896
|
+
>>>
|
|
897
|
+
>>> # Prepare sample data
|
|
898
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
899
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
900
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
901
|
+
>>> bbox_labels = [1, 2]
|
|
902
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
903
|
+
>>> keypoint_labels = [0, 1]
|
|
904
|
+
>>>
|
|
905
|
+
>>> # Example 1: Basic center crop without padding
|
|
906
|
+
>>> transform = A.Compose([
|
|
907
|
+
... A.CenterCrop(height=64, width=64),
|
|
908
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
909
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
910
|
+
>>>
|
|
911
|
+
>>> # Apply the transform
|
|
912
|
+
>>> transformed = transform(
|
|
913
|
+
... image=image,
|
|
914
|
+
... mask=mask,
|
|
915
|
+
... bboxes=bboxes,
|
|
916
|
+
... bbox_labels=bbox_labels,
|
|
917
|
+
... keypoints=keypoints,
|
|
918
|
+
... keypoint_labels=keypoint_labels
|
|
919
|
+
... )
|
|
920
|
+
>>>
|
|
921
|
+
>>> # Get the transformed data
|
|
922
|
+
>>> transformed_image = transformed['image'] # Will be 64x64
|
|
923
|
+
>>> transformed_mask = transformed['mask'] # Will be 64x64
|
|
924
|
+
>>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to the cropped area
|
|
925
|
+
>>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for boxes that remain after cropping
|
|
926
|
+
>>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to the cropped area
|
|
927
|
+
>>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for keypoints that remain
|
|
928
|
+
>>>
|
|
929
|
+
>>> # Example 2: Center crop with padding when needed
|
|
930
|
+
>>> transform_padded = A.Compose([
|
|
931
|
+
... A.CenterCrop(
|
|
932
|
+
... height=120, # Larger than original image height
|
|
933
|
+
... width=120, # Larger than original image width
|
|
934
|
+
... pad_if_needed=True,
|
|
935
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
936
|
+
... fill=0, # Black padding for image
|
|
937
|
+
... fill_mask=0 # Zero padding for mask
|
|
938
|
+
... ),
|
|
939
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
940
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
941
|
+
>>>
|
|
942
|
+
>>> # Apply the padded transform
|
|
943
|
+
>>> padded_transformed = transform_padded(
|
|
944
|
+
... image=image,
|
|
945
|
+
... mask=mask,
|
|
946
|
+
... bboxes=bboxes,
|
|
947
|
+
... bbox_labels=bbox_labels,
|
|
948
|
+
... keypoints=keypoints,
|
|
949
|
+
... keypoint_labels=keypoint_labels
|
|
950
|
+
... )
|
|
951
|
+
>>>
|
|
952
|
+
>>> # The result will be 120x120 with padding
|
|
953
|
+
>>> padded_image = padded_transformed['image']
|
|
954
|
+
>>> padded_mask = padded_transformed['mask']
|
|
955
|
+
>>> padded_bboxes = padded_transformed['bboxes'] # Coordinates adjusted to the new dimensions
|
|
956
|
+
>>> padded_keypoints = padded_transformed['keypoints'] # Coordinates adjusted to the new dimensions
|
|
957
|
+
|
|
958
|
+
"""
|
|
959
|
+
|
|
960
|
+
class InitSchema(BaseCropAndPad.InitSchema):
|
|
961
|
+
height: Annotated[int, Field(ge=1)]
|
|
962
|
+
width: Annotated[int, Field(ge=1)]
|
|
963
|
+
border_mode: Literal[
|
|
964
|
+
cv2.BORDER_CONSTANT,
|
|
965
|
+
cv2.BORDER_REPLICATE,
|
|
966
|
+
cv2.BORDER_REFLECT,
|
|
967
|
+
cv2.BORDER_WRAP,
|
|
968
|
+
cv2.BORDER_REFLECT_101,
|
|
969
|
+
]
|
|
970
|
+
|
|
971
|
+
fill: tuple[float, ...] | float
|
|
972
|
+
fill_mask: tuple[float, ...] | float
|
|
973
|
+
|
|
974
|
+
def __init__(
|
|
975
|
+
self,
|
|
976
|
+
height: int,
|
|
977
|
+
width: int,
|
|
978
|
+
pad_if_needed: bool = False,
|
|
979
|
+
pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"] = "center",
|
|
980
|
+
border_mode: Literal[
|
|
981
|
+
cv2.BORDER_CONSTANT,
|
|
982
|
+
cv2.BORDER_REPLICATE,
|
|
983
|
+
cv2.BORDER_REFLECT,
|
|
984
|
+
cv2.BORDER_WRAP,
|
|
985
|
+
cv2.BORDER_REFLECT_101,
|
|
986
|
+
] = cv2.BORDER_CONSTANT,
|
|
987
|
+
fill: tuple[float, ...] | float = 0.0,
|
|
988
|
+
fill_mask: tuple[float, ...] | float = 0.0,
|
|
989
|
+
p: float = 1.0,
|
|
990
|
+
):
|
|
991
|
+
super().__init__(
|
|
992
|
+
pad_if_needed=pad_if_needed,
|
|
993
|
+
border_mode=border_mode,
|
|
994
|
+
fill=fill,
|
|
995
|
+
fill_mask=fill_mask,
|
|
996
|
+
pad_position=pad_position,
|
|
997
|
+
p=p,
|
|
998
|
+
)
|
|
999
|
+
self.height = height
|
|
1000
|
+
self.width = width
|
|
1001
|
+
|
|
1002
|
+
def get_params_dependent_on_data(
|
|
1003
|
+
self,
|
|
1004
|
+
params: dict[str, Any],
|
|
1005
|
+
data: dict[str, Any],
|
|
1006
|
+
) -> dict[str, Any]:
|
|
1007
|
+
"""Get the parameters dependent on the data.
|
|
1008
|
+
|
|
1009
|
+
Args:
|
|
1010
|
+
params (dict[str, Any]): The parameters of the transform.
|
|
1011
|
+
data (dict[str, Any]): The data of the transform.
|
|
1012
|
+
|
|
1013
|
+
"""
|
|
1014
|
+
image_shape = params["shape"][:2]
|
|
1015
|
+
image_height, image_width = image_shape
|
|
1016
|
+
|
|
1017
|
+
if not self.pad_if_needed and (self.height > image_height or self.width > image_width):
|
|
1018
|
+
raise CropSizeError(
|
|
1019
|
+
f"Crop size (height, width) exceeds image dimensions (height, width):"
|
|
1020
|
+
f" {(self.height, self.width)} vs {image_shape[:2]}",
|
|
1021
|
+
)
|
|
1022
|
+
|
|
1023
|
+
# Get padding params first if needed
|
|
1024
|
+
pad_params = self._get_pad_params(image_shape, (self.height, self.width))
|
|
1025
|
+
|
|
1026
|
+
# If padding is needed, adjust the image shape for crop calculation
|
|
1027
|
+
if pad_params is not None:
|
|
1028
|
+
pad_top = pad_params["pad_top"]
|
|
1029
|
+
pad_bottom = pad_params["pad_bottom"]
|
|
1030
|
+
pad_left = pad_params["pad_left"]
|
|
1031
|
+
pad_right = pad_params["pad_right"]
|
|
1032
|
+
|
|
1033
|
+
padded_height = image_height + pad_top + pad_bottom
|
|
1034
|
+
padded_width = image_width + pad_left + pad_right
|
|
1035
|
+
padded_shape = (padded_height, padded_width)
|
|
1036
|
+
|
|
1037
|
+
# Get crop coordinates based on padded dimensions
|
|
1038
|
+
crop_coords = fcrops.get_center_crop_coords(padded_shape, (self.height, self.width))
|
|
1039
|
+
else:
|
|
1040
|
+
# Get crop coordinates based on original dimensions
|
|
1041
|
+
crop_coords = fcrops.get_center_crop_coords(image_shape, (self.height, self.width))
|
|
1042
|
+
|
|
1043
|
+
return {
|
|
1044
|
+
"crop_coords": crop_coords,
|
|
1045
|
+
"pad_params": pad_params,
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
class Crop(BaseCropAndPad):
|
|
1050
|
+
"""Crop a specific region from the input image.
|
|
1051
|
+
|
|
1052
|
+
This transform crops a rectangular region from the input image, mask, bounding boxes, and keypoints
|
|
1053
|
+
based on specified coordinates. It's useful when you want to extract a specific area of interest
|
|
1054
|
+
from your inputs.
|
|
1055
|
+
|
|
1056
|
+
Args:
|
|
1057
|
+
x_min (int): Minimum x-coordinate of the crop region (left edge). Must be >= 0. Default: 0.
|
|
1058
|
+
y_min (int): Minimum y-coordinate of the crop region (top edge). Must be >= 0. Default: 0.
|
|
1059
|
+
x_max (int): Maximum x-coordinate of the crop region (right edge). Must be > x_min. Default: 1024.
|
|
1060
|
+
y_max (int): Maximum y-coordinate of the crop region (bottom edge). Must be > y_min. Default: 1024.
|
|
1061
|
+
pad_if_needed (bool): Whether to pad if crop coordinates exceed image dimensions. Default: False.
|
|
1062
|
+
border_mode (OpenCV flag): OpenCV border mode used for padding. Default: cv2.BORDER_CONSTANT.
|
|
1063
|
+
fill (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT. Default: 0.
|
|
1064
|
+
fill_mask (tuple[float, ...] | float): Padding value for masks. Default: 0.
|
|
1065
|
+
pad_position (Literal['center', 'top_left', 'top_right', 'bottom_left', 'bottom_right', 'random']):
|
|
1066
|
+
Position of padding. Default: 'center'.
|
|
1067
|
+
p (float): Probability of applying the transform. Default: 1.0.
|
|
1068
|
+
|
|
1069
|
+
Targets:
|
|
1070
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
1071
|
+
|
|
1072
|
+
Image types:
|
|
1073
|
+
uint8, float32
|
|
1074
|
+
|
|
1075
|
+
Note:
|
|
1076
|
+
- The crop coordinates are applied as follows: x_min <= x < x_max and y_min <= y < y_max.
|
|
1077
|
+
- If pad_if_needed is False and crop region extends beyond image boundaries, it will be clipped.
|
|
1078
|
+
- If pad_if_needed is True, image will be padded to accommodate the full crop region.
|
|
1079
|
+
- For bounding boxes and keypoints, coordinates are adjusted appropriately for both padding and cropping.
|
|
1080
|
+
|
|
1081
|
+
Examples:
|
|
1082
|
+
>>> import numpy as np
|
|
1083
|
+
>>> import albumentations as A
|
|
1084
|
+
>>> import cv2
|
|
1085
|
+
>>>
|
|
1086
|
+
>>> # Prepare sample data
|
|
1087
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
1088
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
1089
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
1090
|
+
>>> bbox_labels = [1, 2]
|
|
1091
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
1092
|
+
>>> keypoint_labels = [0, 1]
|
|
1093
|
+
>>>
|
|
1094
|
+
>>> # Example 1: Basic crop with fixed coordinates
|
|
1095
|
+
>>> transform = A.Compose([
|
|
1096
|
+
... A.Crop(x_min=20, y_min=20, x_max=80, y_max=80),
|
|
1097
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
1098
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
1099
|
+
>>>
|
|
1100
|
+
>>> # Apply the transform
|
|
1101
|
+
>>> transformed = transform(
|
|
1102
|
+
... image=image,
|
|
1103
|
+
... mask=mask,
|
|
1104
|
+
... bboxes=bboxes,
|
|
1105
|
+
... bbox_labels=bbox_labels,
|
|
1106
|
+
... keypoints=keypoints,
|
|
1107
|
+
... keypoint_labels=keypoint_labels
|
|
1108
|
+
... )
|
|
1109
|
+
>>>
|
|
1110
|
+
>>> # Get the transformed data
|
|
1111
|
+
>>> transformed_image = transformed['image'] # Will be 60x60 - cropped from (20,20) to (80,80)
|
|
1112
|
+
>>> transformed_mask = transformed['mask'] # Will be 60x60
|
|
1113
|
+
>>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to the cropped area
|
|
1114
|
+
>>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for boxes that remain after cropping
|
|
1115
|
+
>>>
|
|
1116
|
+
>>> # Example 2: Crop with padding when the crop region extends beyond image dimensions
|
|
1117
|
+
>>> transform_padded = A.Compose([
|
|
1118
|
+
... A.Crop(
|
|
1119
|
+
... x_min=50, y_min=50, x_max=150, y_max=150, # Extends beyond the 100x100 image
|
|
1120
|
+
... pad_if_needed=True,
|
|
1121
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
1122
|
+
... fill=0, # Black padding for image
|
|
1123
|
+
... fill_mask=0 # Zero padding for mask
|
|
1124
|
+
... ),
|
|
1125
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
1126
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
1127
|
+
>>>
|
|
1128
|
+
>>> # Apply the padded transform
|
|
1129
|
+
>>> padded_transformed = transform_padded(
|
|
1130
|
+
... image=image,
|
|
1131
|
+
... mask=mask,
|
|
1132
|
+
... bboxes=bboxes,
|
|
1133
|
+
... bbox_labels=bbox_labels,
|
|
1134
|
+
... keypoints=keypoints,
|
|
1135
|
+
... keypoint_labels=keypoint_labels
|
|
1136
|
+
... )
|
|
1137
|
+
>>>
|
|
1138
|
+
>>> # The result will be 100x100 (50:150, 50:150) with padding on right and bottom
|
|
1139
|
+
>>> padded_image = padded_transformed['image'] # 100x100 with 50 pixels of original + 50 pixels of padding
|
|
1140
|
+
>>> padded_mask = padded_transformed['mask']
|
|
1141
|
+
>>> padded_bboxes = padded_transformed['bboxes'] # Coordinates adjusted to the cropped and padded area
|
|
1142
|
+
>>>
|
|
1143
|
+
>>> # Example 3: Crop with reflection padding and custom position
|
|
1144
|
+
>>> transform_reflect = A.Compose([
|
|
1145
|
+
... A.Crop(
|
|
1146
|
+
... x_min=-20, y_min=-20, x_max=80, y_max=80, # Negative coordinates (outside image)
|
|
1147
|
+
... pad_if_needed=True,
|
|
1148
|
+
... border_mode=cv2.BORDER_REFLECT_101, # Reflect image for padding
|
|
1149
|
+
... pad_position="top_left" # Apply padding at top-left
|
|
1150
|
+
... ),
|
|
1151
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
|
|
1152
|
+
>>>
|
|
1153
|
+
>>> # The resulting crop will use reflection padding for the negative coordinates
|
|
1154
|
+
>>> reflect_result = transform_reflect(
|
|
1155
|
+
... image=image,
|
|
1156
|
+
... bboxes=bboxes,
|
|
1157
|
+
... bbox_labels=bbox_labels
|
|
1158
|
+
... )
|
|
1159
|
+
|
|
1160
|
+
"""
|
|
1161
|
+
|
|
1162
|
+
class InitSchema(BaseCropAndPad.InitSchema):
|
|
1163
|
+
x_min: Annotated[int, Field(ge=0)]
|
|
1164
|
+
y_min: Annotated[int, Field(ge=0)]
|
|
1165
|
+
x_max: Annotated[int, Field(gt=0)]
|
|
1166
|
+
y_max: Annotated[int, Field(gt=0)]
|
|
1167
|
+
border_mode: Literal[
|
|
1168
|
+
cv2.BORDER_CONSTANT,
|
|
1169
|
+
cv2.BORDER_REPLICATE,
|
|
1170
|
+
cv2.BORDER_REFLECT,
|
|
1171
|
+
cv2.BORDER_WRAP,
|
|
1172
|
+
cv2.BORDER_REFLECT_101,
|
|
1173
|
+
]
|
|
1174
|
+
|
|
1175
|
+
fill: tuple[float, ...] | float
|
|
1176
|
+
fill_mask: tuple[float, ...] | float
|
|
1177
|
+
|
|
1178
|
+
@model_validator(mode="after")
|
|
1179
|
+
def _validate_coordinates(self) -> Self:
|
|
1180
|
+
if not self.x_min < self.x_max:
|
|
1181
|
+
msg = "x_max must be greater than x_min"
|
|
1182
|
+
raise ValueError(msg)
|
|
1183
|
+
if not self.y_min < self.y_max:
|
|
1184
|
+
msg = "y_max must be greater than y_min"
|
|
1185
|
+
raise ValueError(msg)
|
|
1186
|
+
|
|
1187
|
+
return self
|
|
1188
|
+
|
|
1189
|
+
def __init__(
|
|
1190
|
+
self,
|
|
1191
|
+
x_min: int = 0,
|
|
1192
|
+
y_min: int = 0,
|
|
1193
|
+
x_max: int = 1024,
|
|
1194
|
+
y_max: int = 1024,
|
|
1195
|
+
pad_if_needed: bool = False,
|
|
1196
|
+
pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"] = "center",
|
|
1197
|
+
border_mode: Literal[
|
|
1198
|
+
cv2.BORDER_CONSTANT,
|
|
1199
|
+
cv2.BORDER_REPLICATE,
|
|
1200
|
+
cv2.BORDER_REFLECT,
|
|
1201
|
+
cv2.BORDER_WRAP,
|
|
1202
|
+
cv2.BORDER_REFLECT_101,
|
|
1203
|
+
] = cv2.BORDER_CONSTANT,
|
|
1204
|
+
fill: tuple[float, ...] | float = 0,
|
|
1205
|
+
fill_mask: tuple[float, ...] | float = 0,
|
|
1206
|
+
p: float = 1.0,
|
|
1207
|
+
):
|
|
1208
|
+
super().__init__(
|
|
1209
|
+
pad_if_needed=pad_if_needed,
|
|
1210
|
+
border_mode=border_mode,
|
|
1211
|
+
fill=fill,
|
|
1212
|
+
fill_mask=fill_mask,
|
|
1213
|
+
pad_position=pad_position,
|
|
1214
|
+
p=p,
|
|
1215
|
+
)
|
|
1216
|
+
self.x_min = x_min
|
|
1217
|
+
self.y_min = y_min
|
|
1218
|
+
self.x_max = x_max
|
|
1219
|
+
self.y_max = y_max
|
|
1220
|
+
|
|
1221
|
+
# New helper function for computing minimum padding
|
|
1222
|
+
def _compute_min_padding(self, image_height: int, image_width: int) -> tuple[int, int, int, int]:
|
|
1223
|
+
pad_top = 0
|
|
1224
|
+
pad_bottom = max(0, self.y_max - image_height)
|
|
1225
|
+
pad_left = 0
|
|
1226
|
+
pad_right = max(0, self.x_max - image_width)
|
|
1227
|
+
return pad_top, pad_bottom, pad_left, pad_right
|
|
1228
|
+
|
|
1229
|
+
# New helper function for distributing and adjusting padding
|
|
1230
|
+
def _compute_adjusted_padding(self, pad_top: int, pad_bottom: int, pad_left: int, pad_right: int) -> dict[str, int]:
|
|
1231
|
+
delta_h = pad_top + pad_bottom
|
|
1232
|
+
delta_w = pad_left + pad_right
|
|
1233
|
+
pad_top_dist = delta_h // 2
|
|
1234
|
+
pad_bottom_dist = delta_h - pad_top_dist
|
|
1235
|
+
pad_left_dist = delta_w // 2
|
|
1236
|
+
pad_right_dist = delta_w - pad_left_dist
|
|
1237
|
+
|
|
1238
|
+
(pad_top_adj, pad_bottom_adj, pad_left_adj, pad_right_adj) = fgeometric.adjust_padding_by_position(
|
|
1239
|
+
h_top=pad_top_dist,
|
|
1240
|
+
h_bottom=pad_bottom_dist,
|
|
1241
|
+
w_left=pad_left_dist,
|
|
1242
|
+
w_right=pad_right_dist,
|
|
1243
|
+
position=self.pad_position,
|
|
1244
|
+
py_random=self.py_random,
|
|
1245
|
+
)
|
|
1246
|
+
|
|
1247
|
+
final_top = max(pad_top_adj, pad_top)
|
|
1248
|
+
final_bottom = max(pad_bottom_adj, pad_bottom)
|
|
1249
|
+
final_left = max(pad_left_adj, pad_left)
|
|
1250
|
+
final_right = max(pad_right_adj, pad_right)
|
|
1251
|
+
|
|
1252
|
+
return {
|
|
1253
|
+
"pad_top": final_top,
|
|
1254
|
+
"pad_bottom": final_bottom,
|
|
1255
|
+
"pad_left": final_left,
|
|
1256
|
+
"pad_right": final_right,
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1259
|
+
def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
|
|
1260
|
+
"""Get parameters for crop.
|
|
1261
|
+
|
|
1262
|
+
Args:
|
|
1263
|
+
params (dict): Dictionary with parameters for crop.
|
|
1264
|
+
data (dict): Dictionary with data.
|
|
1265
|
+
|
|
1266
|
+
Returns:
|
|
1267
|
+
dict: Dictionary with parameters for crop.
|
|
1268
|
+
|
|
1269
|
+
"""
|
|
1270
|
+
image_shape = params["shape"][:2]
|
|
1271
|
+
image_height, image_width = image_shape
|
|
1272
|
+
|
|
1273
|
+
if not self.pad_if_needed:
|
|
1274
|
+
return {"crop_coords": (self.x_min, self.y_min, self.x_max, self.y_max), "pad_params": None}
|
|
1275
|
+
|
|
1276
|
+
pad_top, pad_bottom, pad_left, pad_right = self._compute_min_padding(image_height, image_width)
|
|
1277
|
+
pad_params = None
|
|
1278
|
+
|
|
1279
|
+
if any([pad_top, pad_bottom, pad_left, pad_right]):
|
|
1280
|
+
pad_params = self._compute_adjusted_padding(pad_top, pad_bottom, pad_left, pad_right)
|
|
1281
|
+
|
|
1282
|
+
return {"crop_coords": (self.x_min, self.y_min, self.x_max, self.y_max), "pad_params": pad_params}
|
|
1283
|
+
|
|
1284
|
+
|
|
1285
|
+
class CropNonEmptyMaskIfExists(BaseCrop):
|
|
1286
|
+
"""Crop area with mask if mask is non-empty, else make random crop.
|
|
1287
|
+
|
|
1288
|
+
This transform attempts to crop a region containing a mask (non-zero pixels). If the mask is empty or not provided,
|
|
1289
|
+
it falls back to a random crop. This is particularly useful for segmentation tasks where you want to focus on
|
|
1290
|
+
regions of interest defined by the mask.
|
|
1291
|
+
|
|
1292
|
+
Args:
|
|
1293
|
+
height (int): Vertical size of crop in pixels. Must be > 0.
|
|
1294
|
+
width (int): Horizontal size of crop in pixels. Must be > 0.
|
|
1295
|
+
ignore_values (list of int, optional): Values to ignore in mask, `0` values are always ignored.
|
|
1296
|
+
For example, if background value is 5, set `ignore_values=[5]` to ignore it. Default: None.
|
|
1297
|
+
ignore_channels (list of int, optional): Channels to ignore in mask.
|
|
1298
|
+
For example, if background is the first channel, set `ignore_channels=[0]` to ignore it. Default: None.
|
|
1299
|
+
p (float): Probability of applying the transform. Default: 1.0.
|
|
1300
|
+
|
|
1301
|
+
Targets:
|
|
1302
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
1303
|
+
|
|
1304
|
+
Image types:
|
|
1305
|
+
uint8, float32
|
|
1306
|
+
|
|
1307
|
+
Note:
|
|
1308
|
+
- If a mask is provided, the transform will try to crop an area containing non-zero (or non-ignored) pixels.
|
|
1309
|
+
- If no suitable area is found in the mask or no mask is provided, it will perform a random crop.
|
|
1310
|
+
- The crop size (height, width) must not exceed the original image dimensions.
|
|
1311
|
+
- Bounding boxes and keypoints are also cropped along with the image and mask.
|
|
1312
|
+
|
|
1313
|
+
Raises:
|
|
1314
|
+
ValueError: If the specified crop size is larger than the input image dimensions.
|
|
1315
|
+
|
|
1316
|
+
Example:
|
|
1317
|
+
>>> import numpy as np
|
|
1318
|
+
>>> import albumentations as A
|
|
1319
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
1320
|
+
>>> mask = np.zeros((100, 100), dtype=np.uint8)
|
|
1321
|
+
>>> mask[25:75, 25:75] = 1 # Create a non-empty region in the mask
|
|
1322
|
+
>>> transform = A.Compose([
|
|
1323
|
+
... A.CropNonEmptyMaskIfExists(height=50, width=50, p=1.0),
|
|
1324
|
+
... ])
|
|
1325
|
+
>>> transformed = transform(image=image, mask=mask)
|
|
1326
|
+
>>> transformed_image = transformed['image']
|
|
1327
|
+
>>> transformed_mask = transformed['mask']
|
|
1328
|
+
# The resulting crop will likely include part of the non-zero region in the mask
|
|
1329
|
+
|
|
1330
|
+
Raises:
|
|
1331
|
+
ValueError: If the specified crop size is larger than the input image dimensions.
|
|
1332
|
+
|
|
1333
|
+
Examples:
|
|
1334
|
+
>>> import numpy as np
|
|
1335
|
+
>>> import albumentations as A
|
|
1336
|
+
>>>
|
|
1337
|
+
>>> # Prepare sample data
|
|
1338
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
1339
|
+
>>> # Create a mask with non-empty region in the center
|
|
1340
|
+
>>> mask = np.zeros((100, 100), dtype=np.uint8)
|
|
1341
|
+
>>> mask[25:75, 25:75] = 1 # Create a non-empty region in the mask
|
|
1342
|
+
>>>
|
|
1343
|
+
>>> # Create bounding boxes and keypoints in the mask region
|
|
1344
|
+
>>> bboxes = np.array([
|
|
1345
|
+
... [20, 20, 60, 60], # Box overlapping with non-empty region
|
|
1346
|
+
... [30, 30, 70, 70], # Box mostly inside non-empty region
|
|
1347
|
+
... ], dtype=np.float32)
|
|
1348
|
+
>>> bbox_labels = ['cat', 'dog']
|
|
1349
|
+
>>>
|
|
1350
|
+
>>> # Add some keypoints inside mask region
|
|
1351
|
+
>>> keypoints = np.array([
|
|
1352
|
+
... [40, 40], # Inside non-empty region
|
|
1353
|
+
... [60, 60], # At edge of non-empty region
|
|
1354
|
+
... [90, 90] # Outside non-empty region
|
|
1355
|
+
... ], dtype=np.float32)
|
|
1356
|
+
>>> keypoint_labels = ['eye', 'nose', 'ear']
|
|
1357
|
+
>>>
|
|
1358
|
+
>>> # Define transform that will crop around the non-empty mask region
|
|
1359
|
+
>>> transform = A.Compose([
|
|
1360
|
+
... A.CropNonEmptyMaskIfExists(
|
|
1361
|
+
... height=50,
|
|
1362
|
+
... width=50,
|
|
1363
|
+
... ignore_values=None,
|
|
1364
|
+
... ignore_channels=None,
|
|
1365
|
+
... p=1.0
|
|
1366
|
+
... ),
|
|
1367
|
+
... ], bbox_params=A.BboxParams(
|
|
1368
|
+
... format='pascal_voc',
|
|
1369
|
+
... label_fields=['bbox_labels']
|
|
1370
|
+
... ), keypoint_params=A.KeypointParams(
|
|
1371
|
+
... format='xy',
|
|
1372
|
+
... label_fields=['keypoint_labels']
|
|
1373
|
+
... ))
|
|
1374
|
+
>>>
|
|
1375
|
+
>>> # Apply the transform
|
|
1376
|
+
>>> transformed = transform(
|
|
1377
|
+
... image=image,
|
|
1378
|
+
... mask=mask,
|
|
1379
|
+
... bboxes=bboxes,
|
|
1380
|
+
... bbox_labels=bbox_labels,
|
|
1381
|
+
... keypoints=keypoints,
|
|
1382
|
+
... keypoint_labels=keypoint_labels
|
|
1383
|
+
... )
|
|
1384
|
+
>>>
|
|
1385
|
+
>>> # Get the transformed data
|
|
1386
|
+
>>> transformed_image = transformed['image'] # 50x50 image centered on mask region
|
|
1387
|
+
>>> transformed_mask = transformed['mask'] # 50x50 mask showing part of non-empty region
|
|
1388
|
+
>>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to new coordinates
|
|
1389
|
+
>>> transformed_bbox_labels = transformed['bbox_labels'] # Labels preserved for visible boxes
|
|
1390
|
+
>>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to new coordinates
|
|
1391
|
+
>>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for visible keypoints
|
|
1392
|
+
|
|
1393
|
+
"""
|
|
1394
|
+
|
|
1395
|
+
class InitSchema(BaseCrop.InitSchema):
|
|
1396
|
+
ignore_values: list[int] | None
|
|
1397
|
+
ignore_channels: list[int] | None
|
|
1398
|
+
height: Annotated[int, Field(ge=1)]
|
|
1399
|
+
width: Annotated[int, Field(ge=1)]
|
|
1400
|
+
|
|
1401
|
+
def __init__(
|
|
1402
|
+
self,
|
|
1403
|
+
height: int,
|
|
1404
|
+
width: int,
|
|
1405
|
+
ignore_values: list[int] | None = None,
|
|
1406
|
+
ignore_channels: list[int] | None = None,
|
|
1407
|
+
p: float = 1.0,
|
|
1408
|
+
):
|
|
1409
|
+
super().__init__(p=p)
|
|
1410
|
+
|
|
1411
|
+
self.height = height
|
|
1412
|
+
self.width = width
|
|
1413
|
+
self.ignore_values = ignore_values
|
|
1414
|
+
self.ignore_channels = ignore_channels
|
|
1415
|
+
|
|
1416
|
+
def _preprocess_mask(self, mask: np.ndarray) -> np.ndarray:
|
|
1417
|
+
mask_height, mask_width = mask.shape[:2]
|
|
1418
|
+
|
|
1419
|
+
if self.ignore_values is not None:
|
|
1420
|
+
ignore_values_np = np.array(self.ignore_values)
|
|
1421
|
+
mask = np.where(np.isin(mask, ignore_values_np), 0, mask)
|
|
1422
|
+
|
|
1423
|
+
if mask.ndim == NUM_MULTI_CHANNEL_DIMENSIONS and self.ignore_channels is not None:
|
|
1424
|
+
target_channels = np.array([ch for ch in range(mask.shape[-1]) if ch not in self.ignore_channels])
|
|
1425
|
+
mask = np.take(mask, target_channels, axis=-1)
|
|
1426
|
+
|
|
1427
|
+
if self.height > mask_height or self.width > mask_width:
|
|
1428
|
+
raise ValueError(
|
|
1429
|
+
f"Crop size ({self.height},{self.width}) is larger than image ({mask_height},{mask_width})",
|
|
1430
|
+
)
|
|
1431
|
+
|
|
1432
|
+
return mask
|
|
1433
|
+
|
|
1434
|
+
def get_params_dependent_on_data(
|
|
1435
|
+
self,
|
|
1436
|
+
params: dict[str, Any],
|
|
1437
|
+
data: dict[str, Any],
|
|
1438
|
+
) -> dict[str, Any]:
|
|
1439
|
+
"""Get crop coordinates based on mask content.
|
|
1440
|
+
|
|
1441
|
+
Args:
|
|
1442
|
+
params (dict[str, Any]): The parameters of the transform.
|
|
1443
|
+
data (dict[str, Any]): The data of the transform.
|
|
1444
|
+
|
|
1445
|
+
"""
|
|
1446
|
+
if "mask" in data:
|
|
1447
|
+
mask = self._preprocess_mask(data["mask"])
|
|
1448
|
+
elif "masks" in data and len(data["masks"]):
|
|
1449
|
+
masks = data["masks"]
|
|
1450
|
+
mask = self._preprocess_mask(np.copy(masks[0]))
|
|
1451
|
+
for m in masks[1:]:
|
|
1452
|
+
mask |= self._preprocess_mask(m)
|
|
1453
|
+
else:
|
|
1454
|
+
msg = "Can not find mask for CropNonEmptyMaskIfExists"
|
|
1455
|
+
raise RuntimeError(msg)
|
|
1456
|
+
|
|
1457
|
+
mask_height, mask_width = mask.shape[:2]
|
|
1458
|
+
|
|
1459
|
+
if mask.any():
|
|
1460
|
+
# Find non-zero regions in mask
|
|
1461
|
+
mask_sum = mask.sum(axis=-1) if mask.ndim == NUM_MULTI_CHANNEL_DIMENSIONS else mask
|
|
1462
|
+
non_zero_yx = np.argwhere(mask_sum)
|
|
1463
|
+
y, x = self.py_random.choice(non_zero_yx)
|
|
1464
|
+
|
|
1465
|
+
# Calculate crop coordinates centered around chosen point
|
|
1466
|
+
x_min = x - self.py_random.randint(0, self.width - 1)
|
|
1467
|
+
y_min = y - self.py_random.randint(0, self.height - 1)
|
|
1468
|
+
x_min = np.clip(x_min, 0, mask_width - self.width)
|
|
1469
|
+
y_min = np.clip(y_min, 0, mask_height - self.height)
|
|
1470
|
+
else:
|
|
1471
|
+
# Random crop if no non-zero regions
|
|
1472
|
+
x_min = self.py_random.randint(0, mask_width - self.width)
|
|
1473
|
+
y_min = self.py_random.randint(0, mask_height - self.height)
|
|
1474
|
+
|
|
1475
|
+
x_max = x_min + self.width
|
|
1476
|
+
y_max = y_min + self.height
|
|
1477
|
+
|
|
1478
|
+
return {"crop_coords": (x_min, y_min, x_max, y_max)}
|
|
1479
|
+
|
|
1480
|
+
|
|
1481
|
+
class BaseRandomSizedCropInitSchema(BaseTransformInitSchema):
|
|
1482
|
+
size: Annotated[tuple[int, int], AfterValidator(check_range_bounds(1, None))]
|
|
1483
|
+
|
|
1484
|
+
|
|
1485
|
+
class _BaseRandomSizedCrop(DualTransform):
|
|
1486
|
+
"""Base class for transforms that crop an image randomly and resize it to a specific size.
|
|
1487
|
+
|
|
1488
|
+
This abstract class provides the foundation for RandomSizedCrop and RandomResizedCrop transforms.
|
|
1489
|
+
It handles cropping and resizing for different data types (image, mask, bboxes, keypoints) while
|
|
1490
|
+
maintaining their spatial relationships.
|
|
1491
|
+
|
|
1492
|
+
Child classes must implement the `get_params_dependent_on_data` method to determine how the
|
|
1493
|
+
crop coordinates are selected according to transform-specific parameters and logic.
|
|
1494
|
+
|
|
1495
|
+
Args:
|
|
1496
|
+
size (tuple[int, int]): Target size (height, width) after cropping and resizing.
|
|
1497
|
+
interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm
|
|
1498
|
+
for image resizing. Default: cv2.INTER_LINEAR.
|
|
1499
|
+
mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation
|
|
1500
|
+
algorithm for mask resizing. Default: cv2.INTER_NEAREST.
|
|
1501
|
+
area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
|
|
1502
|
+
for downscaling. Options:
|
|
1503
|
+
- None: No automatic interpolation selection, always use the specified interpolation method
|
|
1504
|
+
- "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
|
|
1505
|
+
- "image_mask": Use INTER_AREA when downscaling both images and masks
|
|
1506
|
+
Default: None.
|
|
1507
|
+
p (float): Probability of applying the transform. Default: 1.0.
|
|
1508
|
+
|
|
1509
|
+
Targets:
|
|
1510
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
1511
|
+
|
|
1512
|
+
Image types:
|
|
1513
|
+
uint8, float32
|
|
1514
|
+
|
|
1515
|
+
Note:
|
|
1516
|
+
This class is not meant to be used directly. Instead, use derived transforms
|
|
1517
|
+
like RandomSizedCrop or RandomResizedCrop that implement specific crop selection
|
|
1518
|
+
strategies.
|
|
1519
|
+
When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
|
|
1520
|
+
downscaling (when the crop is larger than the target size), which provides better quality for size reduction.
|
|
1521
|
+
|
|
1522
|
+
Examples:
|
|
1523
|
+
>>> import numpy as np
|
|
1524
|
+
>>> import albumentations as A
|
|
1525
|
+
>>> import cv2
|
|
1526
|
+
>>>
|
|
1527
|
+
>>> # Example of a custom transform that inherits from _BaseRandomSizedCrop
|
|
1528
|
+
>>> class CustomRandomCrop(_BaseRandomSizedCrop):
|
|
1529
|
+
... def __init__(
|
|
1530
|
+
... self,
|
|
1531
|
+
... size=(224, 224),
|
|
1532
|
+
... custom_parameter=0.5,
|
|
1533
|
+
... interpolation=cv2.INTER_LINEAR,
|
|
1534
|
+
... mask_interpolation=cv2.INTER_NEAREST,
|
|
1535
|
+
... area_for_downscale="image",
|
|
1536
|
+
... p=1.0
|
|
1537
|
+
... ):
|
|
1538
|
+
... super().__init__(
|
|
1539
|
+
... size=size,
|
|
1540
|
+
... interpolation=interpolation,
|
|
1541
|
+
... mask_interpolation=mask_interpolation,
|
|
1542
|
+
... area_for_downscale=area_for_downscale,
|
|
1543
|
+
... p=p,
|
|
1544
|
+
... )
|
|
1545
|
+
... self.custom_parameter = custom_parameter
|
|
1546
|
+
...
|
|
1547
|
+
... def get_params_dependent_on_data(self, params, data):
|
|
1548
|
+
... # Custom logic to select crop coordinates
|
|
1549
|
+
... image_height, image_width = params["shape"][:2]
|
|
1550
|
+
...
|
|
1551
|
+
... # Simple example: calculate crop size based on custom_parameter
|
|
1552
|
+
... crop_height = int(image_height * self.custom_parameter)
|
|
1553
|
+
... crop_width = int(image_width * self.custom_parameter)
|
|
1554
|
+
...
|
|
1555
|
+
... # Random position
|
|
1556
|
+
... y1 = self.py_random.randint(0, image_height - crop_height + 1)
|
|
1557
|
+
... x1 = self.py_random.randint(0, image_width - crop_width + 1)
|
|
1558
|
+
... y2 = y1 + crop_height
|
|
1559
|
+
... x2 = x1 + crop_width
|
|
1560
|
+
...
|
|
1561
|
+
... return {"crop_coords": (x1, y1, x2, y2)}
|
|
1562
|
+
>>>
|
|
1563
|
+
>>> # Prepare sample data
|
|
1564
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
1565
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
1566
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
1567
|
+
>>> bbox_labels = [1, 2]
|
|
1568
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
1569
|
+
>>> keypoint_labels = [0, 1]
|
|
1570
|
+
>>>
|
|
1571
|
+
>>> # Create a pipeline with our custom transform
|
|
1572
|
+
>>> transform = A.Compose(
|
|
1573
|
+
... [CustomRandomCrop(size=(64, 64), custom_parameter=0.6, area_for_downscale="image")],
|
|
1574
|
+
... bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
1575
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels'])
|
|
1576
|
+
... )
|
|
1577
|
+
>>>
|
|
1578
|
+
>>> # Apply the transform
|
|
1579
|
+
>>> transformed = transform(
|
|
1580
|
+
... image=image,
|
|
1581
|
+
... mask=mask,
|
|
1582
|
+
... bboxes=bboxes,
|
|
1583
|
+
... bbox_labels=bbox_labels,
|
|
1584
|
+
... keypoints=keypoints,
|
|
1585
|
+
... keypoint_labels=keypoint_labels
|
|
1586
|
+
... )
|
|
1587
|
+
>>>
|
|
1588
|
+
>>> # Get the transformed data
|
|
1589
|
+
>>> transformed_image = transformed['image'] # Will be 64x64
|
|
1590
|
+
>>> transformed_mask = transformed['mask'] # Will be 64x64
|
|
1591
|
+
>>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to new dimensions
|
|
1592
|
+
>>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for bboxes that remain after cropping
|
|
1593
|
+
>>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to new dimensions
|
|
1594
|
+
>>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for keypoints that remain
|
|
1595
|
+
|
|
1596
|
+
"""
|
|
1597
|
+
|
|
1598
|
+
class InitSchema(BaseRandomSizedCropInitSchema):
|
|
1599
|
+
interpolation: Literal[
|
|
1600
|
+
cv2.INTER_NEAREST,
|
|
1601
|
+
cv2.INTER_NEAREST_EXACT,
|
|
1602
|
+
cv2.INTER_LINEAR,
|
|
1603
|
+
cv2.INTER_CUBIC,
|
|
1604
|
+
cv2.INTER_AREA,
|
|
1605
|
+
cv2.INTER_LANCZOS4,
|
|
1606
|
+
cv2.INTER_LINEAR_EXACT,
|
|
1607
|
+
]
|
|
1608
|
+
mask_interpolation: Literal[
|
|
1609
|
+
cv2.INTER_NEAREST,
|
|
1610
|
+
cv2.INTER_NEAREST_EXACT,
|
|
1611
|
+
cv2.INTER_LINEAR,
|
|
1612
|
+
cv2.INTER_CUBIC,
|
|
1613
|
+
cv2.INTER_AREA,
|
|
1614
|
+
cv2.INTER_LANCZOS4,
|
|
1615
|
+
cv2.INTER_LINEAR_EXACT,
|
|
1616
|
+
]
|
|
1617
|
+
area_for_downscale: Literal[None, "image", "image_mask"]
|
|
1618
|
+
|
|
1619
|
+
def __init__(
|
|
1620
|
+
self,
|
|
1621
|
+
size: tuple[int, int],
|
|
1622
|
+
interpolation: Literal[
|
|
1623
|
+
cv2.INTER_NEAREST,
|
|
1624
|
+
cv2.INTER_NEAREST_EXACT,
|
|
1625
|
+
cv2.INTER_LINEAR,
|
|
1626
|
+
cv2.INTER_CUBIC,
|
|
1627
|
+
cv2.INTER_AREA,
|
|
1628
|
+
cv2.INTER_LANCZOS4,
|
|
1629
|
+
cv2.INTER_LINEAR_EXACT,
|
|
1630
|
+
] = cv2.INTER_LINEAR,
|
|
1631
|
+
mask_interpolation: Literal[
|
|
1632
|
+
cv2.INTER_NEAREST,
|
|
1633
|
+
cv2.INTER_NEAREST_EXACT,
|
|
1634
|
+
cv2.INTER_LINEAR,
|
|
1635
|
+
cv2.INTER_CUBIC,
|
|
1636
|
+
cv2.INTER_AREA,
|
|
1637
|
+
cv2.INTER_LANCZOS4,
|
|
1638
|
+
cv2.INTER_LINEAR_EXACT,
|
|
1639
|
+
] = cv2.INTER_NEAREST,
|
|
1640
|
+
area_for_downscale: Literal[None, "image", "image_mask"] = None,
|
|
1641
|
+
p: float = 1.0,
|
|
1642
|
+
):
|
|
1643
|
+
super().__init__(p=p)
|
|
1644
|
+
self.size = size
|
|
1645
|
+
self.interpolation = interpolation
|
|
1646
|
+
self.mask_interpolation = mask_interpolation
|
|
1647
|
+
self.area_for_downscale = area_for_downscale
|
|
1648
|
+
|
|
1649
|
+
def _get_interpolation_for_resize(self, crop_shape: tuple[int, int], target_type: str) -> int:
|
|
1650
|
+
"""Get the appropriate interpolation method for resizing.
|
|
1651
|
+
|
|
1652
|
+
Args:
|
|
1653
|
+
crop_shape: Shape of the crop (height, width)
|
|
1654
|
+
target_type: Either "image" or "mask" to determine base interpolation
|
|
1655
|
+
|
|
1656
|
+
Returns:
|
|
1657
|
+
OpenCV interpolation flag
|
|
1658
|
+
|
|
1659
|
+
"""
|
|
1660
|
+
crop_height, crop_width = crop_shape
|
|
1661
|
+
target_height, target_width = self.size
|
|
1662
|
+
|
|
1663
|
+
# Determine if this is downscaling
|
|
1664
|
+
is_downscale = (crop_height > target_height) or (crop_width > target_width)
|
|
1665
|
+
|
|
1666
|
+
# Use INTER_AREA for downscaling if configured
|
|
1667
|
+
if (is_downscale and (target_type == "image" and self.area_for_downscale in ["image", "image_mask"])) or (
|
|
1668
|
+
target_type == "mask" and self.area_for_downscale == "image_mask"
|
|
1669
|
+
):
|
|
1670
|
+
return cv2.INTER_AREA
|
|
1671
|
+
# Get base interpolation
|
|
1672
|
+
return self.interpolation if target_type == "image" else self.mask_interpolation
|
|
1673
|
+
|
|
1674
|
+
def apply(
|
|
1675
|
+
self,
|
|
1676
|
+
img: np.ndarray,
|
|
1677
|
+
crop_coords: tuple[int, int, int, int],
|
|
1678
|
+
**params: Any,
|
|
1679
|
+
) -> np.ndarray:
|
|
1680
|
+
"""Apply the crop to the image.
|
|
1681
|
+
|
|
1682
|
+
Args:
|
|
1683
|
+
img (np.ndarray): The image to crop.
|
|
1684
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
1685
|
+
**params (Any): Additional parameters.
|
|
1686
|
+
|
|
1687
|
+
"""
|
|
1688
|
+
crop = fcrops.crop(img, *crop_coords)
|
|
1689
|
+
interpolation = self._get_interpolation_for_resize(crop.shape[:2], "image")
|
|
1690
|
+
return fgeometric.resize(crop, self.size, interpolation)
|
|
1691
|
+
|
|
1692
|
+
def apply_to_mask(
|
|
1693
|
+
self,
|
|
1694
|
+
mask: np.ndarray,
|
|
1695
|
+
crop_coords: tuple[int, int, int, int],
|
|
1696
|
+
**params: Any,
|
|
1697
|
+
) -> np.ndarray:
|
|
1698
|
+
"""Apply the crop to the mask.
|
|
1699
|
+
|
|
1700
|
+
Args:
|
|
1701
|
+
mask (np.ndarray): The mask to crop.
|
|
1702
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
1703
|
+
**params (Any): Additional parameters.
|
|
1704
|
+
|
|
1705
|
+
"""
|
|
1706
|
+
crop = fcrops.crop(mask, *crop_coords)
|
|
1707
|
+
interpolation = self._get_interpolation_for_resize(crop.shape[:2], "mask")
|
|
1708
|
+
return fgeometric.resize(crop, self.size, interpolation)
|
|
1709
|
+
|
|
1710
|
+
def apply_to_bboxes(
|
|
1711
|
+
self,
|
|
1712
|
+
bboxes: np.ndarray,
|
|
1713
|
+
crop_coords: tuple[int, int, int, int],
|
|
1714
|
+
**params: Any,
|
|
1715
|
+
) -> np.ndarray:
|
|
1716
|
+
"""Apply the crop to the bounding boxes.
|
|
1717
|
+
|
|
1718
|
+
Args:
|
|
1719
|
+
bboxes (np.ndarray): The bounding boxes to crop.
|
|
1720
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
1721
|
+
**params (Any): Additional parameters.
|
|
1722
|
+
|
|
1723
|
+
"""
|
|
1724
|
+
return fcrops.crop_bboxes_by_coords(bboxes, crop_coords, params["shape"])
|
|
1725
|
+
|
|
1726
|
+
def apply_to_keypoints(
|
|
1727
|
+
self,
|
|
1728
|
+
keypoints: np.ndarray,
|
|
1729
|
+
crop_coords: tuple[int, int, int, int],
|
|
1730
|
+
**params: Any,
|
|
1731
|
+
) -> np.ndarray:
|
|
1732
|
+
"""Apply the crop to the keypoints.
|
|
1733
|
+
|
|
1734
|
+
Args:
|
|
1735
|
+
keypoints (np.ndarray): The keypoints to crop.
|
|
1736
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
1737
|
+
**params (Any): Additional parameters.
|
|
1738
|
+
|
|
1739
|
+
"""
|
|
1740
|
+
# First, crop the keypoints
|
|
1741
|
+
cropped_keypoints = fcrops.crop_keypoints_by_coords(keypoints, crop_coords)
|
|
1742
|
+
|
|
1743
|
+
# Calculate the dimensions of the crop
|
|
1744
|
+
crop_height = crop_coords[3] - crop_coords[1]
|
|
1745
|
+
crop_width = crop_coords[2] - crop_coords[0]
|
|
1746
|
+
|
|
1747
|
+
# Calculate scaling factors
|
|
1748
|
+
scale_x = self.size[1] / crop_width
|
|
1749
|
+
scale_y = self.size[0] / crop_height
|
|
1750
|
+
|
|
1751
|
+
# Scale the cropped keypoints
|
|
1752
|
+
return fgeometric.keypoints_scale(cropped_keypoints, scale_x, scale_y)
|
|
1753
|
+
|
|
1754
|
+
def apply_to_images(
|
|
1755
|
+
self,
|
|
1756
|
+
images: np.ndarray,
|
|
1757
|
+
crop_coords: tuple[int, int, int, int],
|
|
1758
|
+
**params: Any,
|
|
1759
|
+
) -> np.ndarray:
|
|
1760
|
+
"""Apply the crop and resize to a volume/images.
|
|
1761
|
+
|
|
1762
|
+
This method crops the volume first (reducing data size), then resizes using
|
|
1763
|
+
a helper method with batch transform decorator.
|
|
1764
|
+
|
|
1765
|
+
Args:
|
|
1766
|
+
images (np.ndarray): The volume/images to crop and resize with shape (D, H, W) or (D, H, W, C).
|
|
1767
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
1768
|
+
**params (Any): Additional parameters.
|
|
1769
|
+
|
|
1770
|
+
"""
|
|
1771
|
+
# First crop the volume using volume_crop_yx (reduces data size)
|
|
1772
|
+
crop = fcrops.volume_crop_yx(images, *crop_coords)
|
|
1773
|
+
|
|
1774
|
+
# Get interpolation method based on crop dimensions
|
|
1775
|
+
interpolation = self._get_interpolation_for_resize(crop.shape[1:3], "image")
|
|
1776
|
+
|
|
1777
|
+
# Then resize the smaller cropped volume using the selected interpolation
|
|
1778
|
+
return np.stack([fgeometric.resize(crop[i], self.size, interpolation) for i in range(images.shape[0])])
|
|
1779
|
+
|
|
1780
|
+
def apply_to_volume(
|
|
1781
|
+
self,
|
|
1782
|
+
volume: np.ndarray,
|
|
1783
|
+
crop_coords: tuple[int, int, int, int],
|
|
1784
|
+
**params: Any,
|
|
1785
|
+
) -> np.ndarray:
|
|
1786
|
+
"""Apply the crop and resize to a volume.
|
|
1787
|
+
|
|
1788
|
+
Args:
|
|
1789
|
+
volume (np.ndarray): The volume to crop.
|
|
1790
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
1791
|
+
**params (Any): Additional parameters.
|
|
1792
|
+
|
|
1793
|
+
"""
|
|
1794
|
+
return self.apply_to_images(volume, crop_coords, **params)
|
|
1795
|
+
|
|
1796
|
+
def apply_to_mask3d(
|
|
1797
|
+
self,
|
|
1798
|
+
mask3d: np.ndarray,
|
|
1799
|
+
crop_coords: tuple[int, int, int, int],
|
|
1800
|
+
**params: Any,
|
|
1801
|
+
) -> np.ndarray:
|
|
1802
|
+
"""Apply the crop and resize to a mask3d.
|
|
1803
|
+
|
|
1804
|
+
Args:
|
|
1805
|
+
mask3d (np.ndarray): The mask3d to crop.
|
|
1806
|
+
crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
|
|
1807
|
+
**params (Any): Additional parameters.
|
|
1808
|
+
|
|
1809
|
+
"""
|
|
1810
|
+
return self.apply_to_images(mask3d, crop_coords, **params)
|
|
1811
|
+
|
|
1812
|
+
|
|
1813
|
+
class RandomSizedCrop(_BaseRandomSizedCrop):
|
|
1814
|
+
"""Crop a random part of the input and rescale it to a specific size.
|
|
1815
|
+
|
|
1816
|
+
This transform first crops a random portion of the input and then resizes it to a specified size.
|
|
1817
|
+
The size of the random crop is controlled by the 'min_max_height' parameter.
|
|
1818
|
+
|
|
1819
|
+
Args:
|
|
1820
|
+
min_max_height (tuple[int, int]): Minimum and maximum height of the crop in pixels.
|
|
1821
|
+
size (tuple[int, int]): Target size for the output image, i.e. (height, width) after crop and resize.
|
|
1822
|
+
w2h_ratio (float): Aspect ratio (width/height) of crop. Default: 1.0
|
|
1823
|
+
interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
|
|
1824
|
+
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
1825
|
+
Default: cv2.INTER_LINEAR.
|
|
1826
|
+
mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
|
|
1827
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
1828
|
+
Default: cv2.INTER_NEAREST.
|
|
1829
|
+
area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
|
|
1830
|
+
for downscaling. Options:
|
|
1831
|
+
- None: No automatic interpolation selection, always use the specified interpolation method
|
|
1832
|
+
- "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
|
|
1833
|
+
- "image_mask": Use INTER_AREA when downscaling both images and masks
|
|
1834
|
+
Default: None.
|
|
1835
|
+
p (float): Probability of applying the transform. Default: 1.0
|
|
1836
|
+
|
|
1837
|
+
Targets:
|
|
1838
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
1839
|
+
|
|
1840
|
+
Image types:
|
|
1841
|
+
uint8, float32
|
|
1842
|
+
|
|
1843
|
+
Note:
|
|
1844
|
+
- The crop size is randomly selected for each execution within the range specified by 'min_max_height'.
|
|
1845
|
+
- The aspect ratio of the crop is determined by the 'w2h_ratio' parameter.
|
|
1846
|
+
- After cropping, the result is resized to the specified 'size'.
|
|
1847
|
+
- Bounding boxes that end up fully outside the cropped area will be removed.
|
|
1848
|
+
- Keypoints that end up outside the cropped area will be removed.
|
|
1849
|
+
- This transform differs from RandomResizedCrop in that it allows more control over the crop size
|
|
1850
|
+
through the 'min_max_height' parameter, rather than using a scale parameter.
|
|
1851
|
+
- When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
|
|
1852
|
+
downscaling (when the crop is larger than the target size), which provides better quality for size reduction.
|
|
1853
|
+
|
|
1854
|
+
Mathematical Details:
|
|
1855
|
+
1. A random crop height h is sampled from the range [min_max_height[0], min_max_height[1]].
|
|
1856
|
+
2. The crop width w is calculated as: w = h * w2h_ratio
|
|
1857
|
+
3. A random location for the crop is selected within the input image.
|
|
1858
|
+
4. The image is cropped to the size (h, w).
|
|
1859
|
+
5. The crop is then resized to the specified 'size'.
|
|
1860
|
+
|
|
1861
|
+
Examples:
|
|
1862
|
+
>>> import numpy as np
|
|
1863
|
+
>>> import albumentations as A
|
|
1864
|
+
>>> import cv2
|
|
1865
|
+
>>>
|
|
1866
|
+
>>> # Prepare sample data
|
|
1867
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
1868
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
1869
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
1870
|
+
>>> bbox_labels = [1, 2]
|
|
1871
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
1872
|
+
>>> keypoint_labels = [0, 1]
|
|
1873
|
+
>>>
|
|
1874
|
+
>>> # Define transform with parameters as tuples
|
|
1875
|
+
>>> transform = A.Compose([
|
|
1876
|
+
... A.RandomSizedCrop(
|
|
1877
|
+
... min_max_height=(50, 80),
|
|
1878
|
+
... size=(64, 64),
|
|
1879
|
+
... w2h_ratio=1.0,
|
|
1880
|
+
... interpolation=cv2.INTER_LINEAR,
|
|
1881
|
+
... mask_interpolation=cv2.INTER_NEAREST,
|
|
1882
|
+
... area_for_downscale="image", # Use INTER_AREA for image downscaling
|
|
1883
|
+
... p=1.0
|
|
1884
|
+
... ),
|
|
1885
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
1886
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
1887
|
+
>>>
|
|
1888
|
+
>>> # Apply the transform
|
|
1889
|
+
>>> transformed = transform(
|
|
1890
|
+
... image=image,
|
|
1891
|
+
... mask=mask,
|
|
1892
|
+
... bboxes=bboxes,
|
|
1893
|
+
... bbox_labels=bbox_labels,
|
|
1894
|
+
... keypoints=keypoints,
|
|
1895
|
+
... keypoint_labels=keypoint_labels
|
|
1896
|
+
... )
|
|
1897
|
+
>>>
|
|
1898
|
+
>>> # Get the transformed data
|
|
1899
|
+
>>> transformed_image = transformed['image'] # Shape: (64, 64, 3)
|
|
1900
|
+
>>> transformed_mask = transformed['mask'] # Shape: (64, 64)
|
|
1901
|
+
>>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to new crop and size
|
|
1902
|
+
>>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for the preserved bboxes
|
|
1903
|
+
>>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to new crop and size
|
|
1904
|
+
>>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for the preserved keypoints
|
|
1905
|
+
|
|
1906
|
+
"""
|
|
1907
|
+
|
|
1908
|
+
_targets = ALL_TARGETS
|
|
1909
|
+
|
|
1910
|
+
class InitSchema(BaseTransformInitSchema):
|
|
1911
|
+
interpolation: Literal[
|
|
1912
|
+
cv2.INTER_NEAREST,
|
|
1913
|
+
cv2.INTER_NEAREST_EXACT,
|
|
1914
|
+
cv2.INTER_LINEAR,
|
|
1915
|
+
cv2.INTER_CUBIC,
|
|
1916
|
+
cv2.INTER_AREA,
|
|
1917
|
+
cv2.INTER_LANCZOS4,
|
|
1918
|
+
cv2.INTER_LINEAR_EXACT,
|
|
1919
|
+
]
|
|
1920
|
+
mask_interpolation: Literal[
|
|
1921
|
+
cv2.INTER_NEAREST,
|
|
1922
|
+
cv2.INTER_NEAREST_EXACT,
|
|
1923
|
+
cv2.INTER_LINEAR,
|
|
1924
|
+
cv2.INTER_CUBIC,
|
|
1925
|
+
cv2.INTER_AREA,
|
|
1926
|
+
cv2.INTER_LANCZOS4,
|
|
1927
|
+
cv2.INTER_LINEAR_EXACT,
|
|
1928
|
+
]
|
|
1929
|
+
min_max_height: OnePlusIntRangeType
|
|
1930
|
+
w2h_ratio: Annotated[float, Field(gt=0)]
|
|
1931
|
+
size: Annotated[tuple[int, int], AfterValidator(check_range_bounds(1, None))]
|
|
1932
|
+
area_for_downscale: Literal[None, "image", "image_mask"]
|
|
1933
|
+
|
|
1934
|
+
def __init__(
|
|
1935
|
+
self,
|
|
1936
|
+
min_max_height: tuple[int, int],
|
|
1937
|
+
size: tuple[int, int],
|
|
1938
|
+
w2h_ratio: float = 1.0,
|
|
1939
|
+
interpolation: Literal[
|
|
1940
|
+
cv2.INTER_NEAREST,
|
|
1941
|
+
cv2.INTER_NEAREST_EXACT,
|
|
1942
|
+
cv2.INTER_LINEAR,
|
|
1943
|
+
cv2.INTER_CUBIC,
|
|
1944
|
+
cv2.INTER_AREA,
|
|
1945
|
+
cv2.INTER_LANCZOS4,
|
|
1946
|
+
cv2.INTER_LINEAR_EXACT,
|
|
1947
|
+
] = cv2.INTER_LINEAR,
|
|
1948
|
+
mask_interpolation: Literal[
|
|
1949
|
+
cv2.INTER_NEAREST,
|
|
1950
|
+
cv2.INTER_NEAREST_EXACT,
|
|
1951
|
+
cv2.INTER_LINEAR,
|
|
1952
|
+
cv2.INTER_CUBIC,
|
|
1953
|
+
cv2.INTER_AREA,
|
|
1954
|
+
cv2.INTER_LANCZOS4,
|
|
1955
|
+
cv2.INTER_LINEAR_EXACT,
|
|
1956
|
+
] = cv2.INTER_NEAREST,
|
|
1957
|
+
area_for_downscale: Literal[None, "image", "image_mask"] = None,
|
|
1958
|
+
p: float = 1.0,
|
|
1959
|
+
):
|
|
1960
|
+
super().__init__(
|
|
1961
|
+
size=size,
|
|
1962
|
+
interpolation=interpolation,
|
|
1963
|
+
mask_interpolation=mask_interpolation,
|
|
1964
|
+
area_for_downscale=area_for_downscale,
|
|
1965
|
+
p=p,
|
|
1966
|
+
)
|
|
1967
|
+
self.min_max_height = min_max_height
|
|
1968
|
+
self.w2h_ratio = w2h_ratio
|
|
1969
|
+
|
|
1970
|
+
def get_params_dependent_on_data(
|
|
1971
|
+
self,
|
|
1972
|
+
params: dict[str, Any],
|
|
1973
|
+
data: dict[str, Any],
|
|
1974
|
+
) -> dict[str, tuple[int, int, int, int]]:
|
|
1975
|
+
"""Get the parameters dependent on the data.
|
|
1976
|
+
|
|
1977
|
+
Args:
|
|
1978
|
+
params (dict[str, Any]): The parameters of the transform.
|
|
1979
|
+
data (dict[str, Any]): The data of the transform.
|
|
1980
|
+
|
|
1981
|
+
"""
|
|
1982
|
+
image_shape = params["shape"][:2]
|
|
1983
|
+
|
|
1984
|
+
crop_height = self.py_random.randint(*self.min_max_height)
|
|
1985
|
+
crop_width = int(crop_height * self.w2h_ratio)
|
|
1986
|
+
|
|
1987
|
+
crop_shape = (crop_height, crop_width)
|
|
1988
|
+
|
|
1989
|
+
h_start = self.py_random.random()
|
|
1990
|
+
w_start = self.py_random.random()
|
|
1991
|
+
|
|
1992
|
+
crop_coords = fcrops.get_crop_coords(image_shape, crop_shape, h_start, w_start)
|
|
1993
|
+
|
|
1994
|
+
return {"crop_coords": crop_coords}
|
|
1995
|
+
|
|
1996
|
+
|
|
1997
|
+
class RandomResizedCrop(_BaseRandomSizedCrop):
|
|
1998
|
+
"""Crop a random part of the input and rescale it to a specified size.
|
|
1999
|
+
|
|
2000
|
+
This transform first crops a random portion of the input image (or mask, bounding boxes, keypoints)
|
|
2001
|
+
and then resizes the crop to a specified size. It's particularly useful for training neural networks
|
|
2002
|
+
on images of varying sizes and aspect ratios.
|
|
2003
|
+
|
|
2004
|
+
Args:
|
|
2005
|
+
size (tuple[int, int]): Target size for the output image, i.e. (height, width) after crop and resize.
|
|
2006
|
+
scale (tuple[float, float]): Range of the random size of the crop relative to the input size.
|
|
2007
|
+
For example, (0.08, 1.0) means the crop size will be between 8% and 100% of the input size.
|
|
2008
|
+
Default: (0.08, 1.0)
|
|
2009
|
+
ratio (tuple[float, float]): Range of aspect ratios of the random crop.
|
|
2010
|
+
For example, (0.75, 1.3333) allows crop aspect ratios from 3:4 to 4:3.
|
|
2011
|
+
Default: (0.75, 1.3333333333333333)
|
|
2012
|
+
interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
|
|
2013
|
+
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
2014
|
+
Default: cv2.INTER_LINEAR
|
|
2015
|
+
mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
|
|
2016
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
2017
|
+
Default: cv2.INTER_NEAREST
|
|
2018
|
+
area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
|
|
2019
|
+
for downscaling. Options:
|
|
2020
|
+
- None: No automatic interpolation selection, always use the specified interpolation method
|
|
2021
|
+
- "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
|
|
2022
|
+
- "image_mask": Use INTER_AREA when downscaling both images and masks
|
|
2023
|
+
Default: None.
|
|
2024
|
+
p (float): Probability of applying the transform. Default: 1.0
|
|
2025
|
+
|
|
2026
|
+
Targets:
|
|
2027
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
2028
|
+
|
|
2029
|
+
Image types:
|
|
2030
|
+
uint8, float32
|
|
2031
|
+
|
|
2032
|
+
Note:
|
|
2033
|
+
- This transform attempts to crop a random area with an aspect ratio and relative size
|
|
2034
|
+
specified by 'ratio' and 'scale' parameters. If it fails to find a suitable crop after
|
|
2035
|
+
10 attempts, it will return a crop from the center of the image.
|
|
2036
|
+
- The crop's aspect ratio is defined as width / height.
|
|
2037
|
+
- Bounding boxes that end up fully outside the cropped area will be removed.
|
|
2038
|
+
- Keypoints that end up outside the cropped area will be removed.
|
|
2039
|
+
- After cropping, the result is resized to the specified size.
|
|
2040
|
+
- When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
|
|
2041
|
+
downscaling (when the crop is larger than the target size), which provides better quality for size reduction.
|
|
2042
|
+
|
|
2043
|
+
Mathematical Details:
|
|
2044
|
+
1. A target area A is sampled from the range [scale[0] * input_area, scale[1] * input_area].
|
|
2045
|
+
2. A target aspect ratio r is sampled from the range [ratio[0], ratio[1]].
|
|
2046
|
+
3. The crop width and height are computed as:
|
|
2047
|
+
w = sqrt(A * r)
|
|
2048
|
+
h = sqrt(A / r)
|
|
2049
|
+
4. If w and h are within the input image dimensions, the crop is accepted.
|
|
2050
|
+
Otherwise, steps 1-3 are repeated (up to 10 times).
|
|
2051
|
+
5. If no valid crop is found after 10 attempts, a centered crop is taken.
|
|
2052
|
+
6. The crop is then resized to the specified size.
|
|
2053
|
+
|
|
2054
|
+
Examples:
|
|
2055
|
+
>>> import numpy as np
|
|
2056
|
+
>>> import albumentations as A
|
|
2057
|
+
>>> import cv2
|
|
2058
|
+
>>>
|
|
2059
|
+
>>> # Prepare sample data
|
|
2060
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
2061
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
2062
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
2063
|
+
>>> bbox_labels = [1, 2]
|
|
2064
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
2065
|
+
>>> keypoint_labels = [0, 1]
|
|
2066
|
+
>>>
|
|
2067
|
+
>>> # Define transform with parameters as tuples
|
|
2068
|
+
>>> transform = A.Compose([
|
|
2069
|
+
... A.RandomResizedCrop(
|
|
2070
|
+
... size=(64, 64),
|
|
2071
|
+
... scale=(0.5, 0.9), # Crop size will be 50-90% of original image
|
|
2072
|
+
... ratio=(0.75, 1.33), # Aspect ratio will vary from 3:4 to 4:3
|
|
2073
|
+
... interpolation=cv2.INTER_LINEAR,
|
|
2074
|
+
... mask_interpolation=cv2.INTER_NEAREST,
|
|
2075
|
+
... area_for_downscale="image", # Use INTER_AREA for image downscaling
|
|
2076
|
+
... p=1.0
|
|
2077
|
+
... ),
|
|
2078
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
2079
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
2080
|
+
>>>
|
|
2081
|
+
>>> # Apply the transform
|
|
2082
|
+
>>> transformed = transform(
|
|
2083
|
+
... image=image,
|
|
2084
|
+
... mask=mask,
|
|
2085
|
+
... bboxes=bboxes,
|
|
2086
|
+
... bbox_labels=bbox_labels,
|
|
2087
|
+
... keypoints=keypoints,
|
|
2088
|
+
... keypoint_labels=keypoint_labels
|
|
2089
|
+
... )
|
|
2090
|
+
>>>
|
|
2091
|
+
>>> # Get the transformed data
|
|
2092
|
+
>>> transformed_image = transformed['image'] # Shape: (64, 64, 3)
|
|
2093
|
+
>>> transformed_mask = transformed['mask'] # Shape: (64, 64)
|
|
2094
|
+
>>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to new crop and size
|
|
2095
|
+
>>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for the preserved bboxes
|
|
2096
|
+
>>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to new crop and size
|
|
2097
|
+
>>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for the preserved keypoints
|
|
2098
|
+
|
|
2099
|
+
"""
|
|
2100
|
+
|
|
2101
|
+
_targets = ALL_TARGETS
|
|
2102
|
+
|
|
2103
|
+
class InitSchema(BaseTransformInitSchema):
|
|
2104
|
+
scale: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, 1)), AfterValidator(nondecreasing)]
|
|
2105
|
+
ratio: Annotated[
|
|
2106
|
+
tuple[float, float],
|
|
2107
|
+
AfterValidator(check_range_bounds(0, None)),
|
|
2108
|
+
AfterValidator(nondecreasing),
|
|
2109
|
+
]
|
|
2110
|
+
size: Annotated[tuple[int, int], AfterValidator(check_range_bounds(1, None))]
|
|
2111
|
+
interpolation: Literal[
|
|
2112
|
+
cv2.INTER_NEAREST,
|
|
2113
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2114
|
+
cv2.INTER_LINEAR,
|
|
2115
|
+
cv2.INTER_CUBIC,
|
|
2116
|
+
cv2.INTER_AREA,
|
|
2117
|
+
cv2.INTER_LANCZOS4,
|
|
2118
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2119
|
+
]
|
|
2120
|
+
mask_interpolation: Literal[
|
|
2121
|
+
cv2.INTER_NEAREST,
|
|
2122
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2123
|
+
cv2.INTER_LINEAR,
|
|
2124
|
+
cv2.INTER_CUBIC,
|
|
2125
|
+
cv2.INTER_AREA,
|
|
2126
|
+
cv2.INTER_LANCZOS4,
|
|
2127
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2128
|
+
]
|
|
2129
|
+
area_for_downscale: Literal[None, "image", "image_mask"]
|
|
2130
|
+
|
|
2131
|
+
def __init__(
|
|
2132
|
+
self,
|
|
2133
|
+
size: tuple[int, int],
|
|
2134
|
+
scale: tuple[float, float] = (0.08, 1.0),
|
|
2135
|
+
ratio: tuple[float, float] = (0.75, 1.3333333333333333),
|
|
2136
|
+
interpolation: Literal[
|
|
2137
|
+
cv2.INTER_NEAREST,
|
|
2138
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2139
|
+
cv2.INTER_LINEAR,
|
|
2140
|
+
cv2.INTER_CUBIC,
|
|
2141
|
+
cv2.INTER_AREA,
|
|
2142
|
+
cv2.INTER_LANCZOS4,
|
|
2143
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2144
|
+
] = cv2.INTER_LINEAR,
|
|
2145
|
+
mask_interpolation: Literal[
|
|
2146
|
+
cv2.INTER_NEAREST,
|
|
2147
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2148
|
+
cv2.INTER_LINEAR,
|
|
2149
|
+
cv2.INTER_CUBIC,
|
|
2150
|
+
cv2.INTER_AREA,
|
|
2151
|
+
cv2.INTER_LANCZOS4,
|
|
2152
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2153
|
+
] = cv2.INTER_NEAREST,
|
|
2154
|
+
area_for_downscale: Literal[None, "image", "image_mask"] = None,
|
|
2155
|
+
p: float = 1.0,
|
|
2156
|
+
):
|
|
2157
|
+
super().__init__(
|
|
2158
|
+
size=size,
|
|
2159
|
+
interpolation=interpolation,
|
|
2160
|
+
mask_interpolation=mask_interpolation,
|
|
2161
|
+
area_for_downscale=area_for_downscale,
|
|
2162
|
+
p=p,
|
|
2163
|
+
)
|
|
2164
|
+
self.scale = scale
|
|
2165
|
+
self.ratio = ratio
|
|
2166
|
+
|
|
2167
|
+
def get_params_dependent_on_data(
|
|
2168
|
+
self,
|
|
2169
|
+
params: dict[str, Any],
|
|
2170
|
+
data: dict[str, Any],
|
|
2171
|
+
) -> dict[str, tuple[int, int, int, int]]:
|
|
2172
|
+
"""Get the parameters dependent on the data.
|
|
2173
|
+
|
|
2174
|
+
Args:
|
|
2175
|
+
params (dict[str, Any]): The parameters of the transform.
|
|
2176
|
+
data (dict[str, Any]): The data of the transform.
|
|
2177
|
+
|
|
2178
|
+
"""
|
|
2179
|
+
image_shape = params["shape"][:2]
|
|
2180
|
+
image_height, image_width = image_shape
|
|
2181
|
+
|
|
2182
|
+
area = image_height * image_width
|
|
2183
|
+
|
|
2184
|
+
# Pre-compute constants to avoid repeated calculations
|
|
2185
|
+
scale_min_area = self.scale[0] * area
|
|
2186
|
+
scale_max_area = self.scale[1] * area
|
|
2187
|
+
log_ratio_min = math.log(self.ratio[0])
|
|
2188
|
+
log_ratio_max = math.log(self.ratio[1])
|
|
2189
|
+
|
|
2190
|
+
for _ in range(10):
|
|
2191
|
+
target_area = self.py_random.uniform(scale_min_area, scale_max_area)
|
|
2192
|
+
aspect_ratio = math.exp(self.py_random.uniform(log_ratio_min, log_ratio_max))
|
|
2193
|
+
|
|
2194
|
+
width = round(math.sqrt(target_area * aspect_ratio))
|
|
2195
|
+
height = round(math.sqrt(target_area / aspect_ratio))
|
|
2196
|
+
|
|
2197
|
+
if 0 < width <= image_width and 0 < height <= image_height:
|
|
2198
|
+
h_start = self.py_random.random()
|
|
2199
|
+
w_start = self.py_random.random()
|
|
2200
|
+
crop_coords = fcrops.get_crop_coords(image_shape, (height, width), h_start, w_start)
|
|
2201
|
+
return {"crop_coords": crop_coords}
|
|
2202
|
+
|
|
2203
|
+
# Fallback to central crop - use proper function
|
|
2204
|
+
in_ratio = image_width / image_height
|
|
2205
|
+
if in_ratio < self.ratio[0]:
|
|
2206
|
+
width = image_width
|
|
2207
|
+
height = round(image_width / self.ratio[0])
|
|
2208
|
+
elif in_ratio > self.ratio[1]:
|
|
2209
|
+
height = image_height
|
|
2210
|
+
width = round(height * self.ratio[1])
|
|
2211
|
+
else: # whole image
|
|
2212
|
+
width = image_width
|
|
2213
|
+
height = image_height
|
|
2214
|
+
|
|
2215
|
+
crop_coords = fcrops.get_center_crop_coords(image_shape, (height, width))
|
|
2216
|
+
return {"crop_coords": crop_coords}
|
|
2217
|
+
|
|
2218
|
+
|
|
2219
|
+
class RandomCropNearBBox(BaseCrop):
|
|
2220
|
+
"""Crop bbox from image with random shift by x,y coordinates
|
|
2221
|
+
|
|
2222
|
+
Args:
|
|
2223
|
+
max_part_shift (float, (float, float)): Max shift in `height` and `width` dimensions relative
|
|
2224
|
+
to `cropping_bbox` dimension.
|
|
2225
|
+
If max_part_shift is a single float, the range will be (0, max_part_shift).
|
|
2226
|
+
Default (0, 0.3).
|
|
2227
|
+
cropping_bbox_key (str): Additional target key for cropping box. Default `cropping_bbox`.
|
|
2228
|
+
p (float): probability of applying the transform. Default: 1.
|
|
2229
|
+
|
|
2230
|
+
Targets:
|
|
2231
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
2232
|
+
|
|
2233
|
+
Image types:
|
|
2234
|
+
uint8, float32
|
|
2235
|
+
|
|
2236
|
+
Examples:
|
|
2237
|
+
>>> aug = Compose([RandomCropNearBBox(max_part_shift=(0.1, 0.5), cropping_bbox_key='test_bbox')],
|
|
2238
|
+
>>> bbox_params=BboxParams("pascal_voc"))
|
|
2239
|
+
>>> result = aug(image=image, bboxes=bboxes, test_bbox=[0, 5, 10, 20])
|
|
2240
|
+
|
|
2241
|
+
"""
|
|
2242
|
+
|
|
2243
|
+
_targets = ALL_TARGETS
|
|
2244
|
+
|
|
2245
|
+
class InitSchema(BaseTransformInitSchema):
|
|
2246
|
+
max_part_shift: ZeroOneRangeType
|
|
2247
|
+
cropping_bbox_key: str
|
|
2248
|
+
|
|
2249
|
+
def __init__(
|
|
2250
|
+
self,
|
|
2251
|
+
max_part_shift: tuple[float, float] | float = (0, 0.3),
|
|
2252
|
+
cropping_bbox_key: str = "cropping_bbox",
|
|
2253
|
+
p: float = 1.0,
|
|
2254
|
+
):
|
|
2255
|
+
super().__init__(p=p)
|
|
2256
|
+
self.max_part_shift = cast("tuple[float, float]", max_part_shift)
|
|
2257
|
+
self.cropping_bbox_key = cropping_bbox_key
|
|
2258
|
+
|
|
2259
|
+
def get_params_dependent_on_data(
|
|
2260
|
+
self,
|
|
2261
|
+
params: dict[str, Any],
|
|
2262
|
+
data: dict[str, Any],
|
|
2263
|
+
) -> dict[str, tuple[float, ...]]:
|
|
2264
|
+
"""Get the parameters dependent on the data.
|
|
2265
|
+
|
|
2266
|
+
Args:
|
|
2267
|
+
params (dict[str, Any]): The parameters of the transform.
|
|
2268
|
+
data (dict[str, Any]): The data of the transform.
|
|
2269
|
+
|
|
2270
|
+
"""
|
|
2271
|
+
bbox = data[self.cropping_bbox_key]
|
|
2272
|
+
|
|
2273
|
+
image_shape = params["shape"][:2]
|
|
2274
|
+
|
|
2275
|
+
bbox = self._clip_bbox(bbox, image_shape)
|
|
2276
|
+
|
|
2277
|
+
h_max_shift = round((bbox[3] - bbox[1]) * self.max_part_shift[0])
|
|
2278
|
+
w_max_shift = round((bbox[2] - bbox[0]) * self.max_part_shift[1])
|
|
2279
|
+
|
|
2280
|
+
x_min = bbox[0] - self.py_random.randint(-w_max_shift, w_max_shift)
|
|
2281
|
+
x_max = bbox[2] + self.py_random.randint(-w_max_shift, w_max_shift)
|
|
2282
|
+
|
|
2283
|
+
y_min = bbox[1] - self.py_random.randint(-h_max_shift, h_max_shift)
|
|
2284
|
+
y_max = bbox[3] + self.py_random.randint(-h_max_shift, h_max_shift)
|
|
2285
|
+
|
|
2286
|
+
crop_coords = self._clip_bbox((x_min, y_min, x_max, y_max), image_shape)
|
|
2287
|
+
|
|
2288
|
+
if crop_coords[0] == crop_coords[2] or crop_coords[1] == crop_coords[3]:
|
|
2289
|
+
crop_shape = (bbox[3] - bbox[1], bbox[2] - bbox[0])
|
|
2290
|
+
crop_coords = fcrops.get_center_crop_coords(image_shape, crop_shape)
|
|
2291
|
+
|
|
2292
|
+
return {"crop_coords": crop_coords}
|
|
2293
|
+
|
|
2294
|
+
@property
|
|
2295
|
+
def targets_as_params(self) -> list[str]:
|
|
2296
|
+
"""Get the targets as parameters.
|
|
2297
|
+
|
|
2298
|
+
Returns:
|
|
2299
|
+
list[str]: The targets as parameters.
|
|
2300
|
+
|
|
2301
|
+
"""
|
|
2302
|
+
return [self.cropping_bbox_key]
|
|
2303
|
+
|
|
2304
|
+
|
|
2305
|
+
class BBoxSafeRandomCrop(BaseCrop):
|
|
2306
|
+
"""Crop an area from image while ensuring all bounding boxes are preserved in the crop.
|
|
2307
|
+
|
|
2308
|
+
Similar to AtLeastOneBboxRandomCrop, but with a key difference:
|
|
2309
|
+
- BBoxSafeRandomCrop ensures ALL bounding boxes are preserved in the crop when erosion_rate=0.0
|
|
2310
|
+
- AtLeastOneBboxRandomCrop ensures AT LEAST ONE bounding box is present in the crop
|
|
2311
|
+
|
|
2312
|
+
This makes BBoxSafeRandomCrop more suitable for scenarios where:
|
|
2313
|
+
- You need to preserve all objects in the scene
|
|
2314
|
+
- Losing any bounding box would be problematic (e.g., rare object classes)
|
|
2315
|
+
- You're training a model that needs to detect multiple objects simultaneously
|
|
2316
|
+
|
|
2317
|
+
The algorithm:
|
|
2318
|
+
1. If bounding boxes exist:
|
|
2319
|
+
- Computes the union of all bounding boxes
|
|
2320
|
+
- Applies erosion based on erosion_rate to this union
|
|
2321
|
+
- Clips the eroded union to valid image coordinates [0,1]
|
|
2322
|
+
- Randomly samples crop coordinates within the clipped union area
|
|
2323
|
+
2. If no bounding boxes exist:
|
|
2324
|
+
- Computes crop height based on erosion_rate
|
|
2325
|
+
- Sets crop width to maintain original aspect ratio
|
|
2326
|
+
- Randomly places the crop within the image
|
|
2327
|
+
|
|
2328
|
+
Args:
|
|
2329
|
+
erosion_rate (float): Controls how much the valid crop region can deviate from the bbox union.
|
|
2330
|
+
Must be in range [0.0, 1.0].
|
|
2331
|
+
- 0.0: crop must contain the exact bbox union (safest option that guarantees all boxes are preserved)
|
|
2332
|
+
- 1.0: crop can deviate maximally from the bbox union (increases likelihood of cutting off some boxes)
|
|
2333
|
+
Defaults to 0.0.
|
|
2334
|
+
p (float, optional): Probability of applying the transform. Defaults to 1.0.
|
|
2335
|
+
|
|
2336
|
+
Targets:
|
|
2337
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
2338
|
+
|
|
2339
|
+
Image types:
|
|
2340
|
+
uint8, float32
|
|
2341
|
+
|
|
2342
|
+
Raises:
|
|
2343
|
+
CropSizeError: If requested crop size exceeds image dimensions
|
|
2344
|
+
|
|
2345
|
+
Examples:
|
|
2346
|
+
>>> import numpy as np
|
|
2347
|
+
>>> import albumentations as A
|
|
2348
|
+
>>>
|
|
2349
|
+
>>> # Prepare sample data
|
|
2350
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
2351
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
2352
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
2353
|
+
>>> bbox_labels = [1, 2]
|
|
2354
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
2355
|
+
>>> keypoint_labels = [0, 1]
|
|
2356
|
+
>>>
|
|
2357
|
+
>>> # Define transform with erosion_rate parameter
|
|
2358
|
+
>>> transform = A.Compose([
|
|
2359
|
+
... A.BBoxSafeRandomCrop(erosion_rate=0.2),
|
|
2360
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
2361
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
2362
|
+
>>>
|
|
2363
|
+
>>> # Apply the transform
|
|
2364
|
+
>>> result = transform(
|
|
2365
|
+
... image=image,
|
|
2366
|
+
... mask=mask,
|
|
2367
|
+
... bboxes=bboxes,
|
|
2368
|
+
... bbox_labels=bbox_labels,
|
|
2369
|
+
... keypoints=keypoints,
|
|
2370
|
+
... keypoint_labels=keypoint_labels
|
|
2371
|
+
... )
|
|
2372
|
+
>>>
|
|
2373
|
+
>>> # Get the transformed data
|
|
2374
|
+
>>> transformed_image = result['image'] # Cropped image containing all bboxes
|
|
2375
|
+
>>> transformed_mask = result['mask'] # Cropped mask
|
|
2376
|
+
>>> transformed_bboxes = result['bboxes'] # All bounding boxes preserved with adjusted coordinates
|
|
2377
|
+
>>> transformed_bbox_labels = result['bbox_labels'] # Original labels preserved
|
|
2378
|
+
>>> transformed_keypoints = result['keypoints'] # Keypoints with adjusted coordinates
|
|
2379
|
+
>>> transformed_keypoint_labels = result['keypoint_labels'] # Original keypoint labels preserved
|
|
2380
|
+
>>>
|
|
2381
|
+
>>> # Example with a different erosion_rate
|
|
2382
|
+
>>> transform_more_flexible = A.Compose([
|
|
2383
|
+
... A.BBoxSafeRandomCrop(erosion_rate=0.5), # More flexibility in crop placement
|
|
2384
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
|
|
2385
|
+
>>>
|
|
2386
|
+
>>> # Apply transform with only image and bboxes
|
|
2387
|
+
>>> result_bboxes_only = transform_more_flexible(
|
|
2388
|
+
... image=image,
|
|
2389
|
+
... bboxes=bboxes,
|
|
2390
|
+
... bbox_labels=bbox_labels
|
|
2391
|
+
... )
|
|
2392
|
+
>>> transformed_image = result_bboxes_only['image']
|
|
2393
|
+
>>> transformed_bboxes = result_bboxes_only['bboxes'] # All bboxes still preserved
|
|
2394
|
+
|
|
2395
|
+
Note:
|
|
2396
|
+
- IMPORTANT: Using erosion_rate > 0.0 may result in some bounding boxes being cut off,
|
|
2397
|
+
particularly narrow boxes at the boundary of the union area. For guaranteed preservation
|
|
2398
|
+
of all bounding boxes, use erosion_rate=0.0.
|
|
2399
|
+
- Aspect ratio is preserved only when no bounding boxes are present
|
|
2400
|
+
- May be more restrictive in crop placement compared to AtLeastOneBboxRandomCrop
|
|
2401
|
+
- The crop size is determined by the bounding boxes when present
|
|
2402
|
+
|
|
2403
|
+
"""
|
|
2404
|
+
|
|
2405
|
+
_targets = ALL_TARGETS
|
|
2406
|
+
|
|
2407
|
+
class InitSchema(BaseTransformInitSchema):
|
|
2408
|
+
erosion_rate: float = Field(
|
|
2409
|
+
ge=0.0,
|
|
2410
|
+
le=1.0,
|
|
2411
|
+
)
|
|
2412
|
+
|
|
2413
|
+
def __init__(self, erosion_rate: float = 0.0, p: float = 1.0):
|
|
2414
|
+
super().__init__(p=p)
|
|
2415
|
+
self.erosion_rate = erosion_rate
|
|
2416
|
+
|
|
2417
|
+
def _get_coords_no_bbox(self, image_shape: tuple[int, int]) -> tuple[int, int, int, int]:
|
|
2418
|
+
image_height, image_width = image_shape
|
|
2419
|
+
|
|
2420
|
+
erosive_h = int(image_height * (1.0 - self.erosion_rate))
|
|
2421
|
+
crop_height = image_height if erosive_h >= image_height else self.py_random.randint(erosive_h, image_height)
|
|
2422
|
+
|
|
2423
|
+
crop_width = int(crop_height * image_width / image_height)
|
|
2424
|
+
|
|
2425
|
+
h_start = self.py_random.random()
|
|
2426
|
+
w_start = self.py_random.random()
|
|
2427
|
+
|
|
2428
|
+
crop_shape = (crop_height, crop_width)
|
|
2429
|
+
|
|
2430
|
+
return fcrops.get_crop_coords(image_shape, crop_shape, h_start, w_start)
|
|
2431
|
+
|
|
2432
|
+
def get_params_dependent_on_data(
|
|
2433
|
+
self,
|
|
2434
|
+
params: dict[str, Any],
|
|
2435
|
+
data: dict[str, Any],
|
|
2436
|
+
) -> dict[str, tuple[int, int, int, int]]:
|
|
2437
|
+
"""Get the parameters dependent on the data.
|
|
2438
|
+
|
|
2439
|
+
Args:
|
|
2440
|
+
params (dict[str, Any]): The parameters of the transform.
|
|
2441
|
+
data (dict[str, Any]): The data of the transform.
|
|
2442
|
+
|
|
2443
|
+
"""
|
|
2444
|
+
image_shape = params["shape"][:2]
|
|
2445
|
+
|
|
2446
|
+
if len(data["bboxes"]) == 0: # less likely, this class is for use with bboxes.
|
|
2447
|
+
crop_coords = self._get_coords_no_bbox(image_shape)
|
|
2448
|
+
return {"crop_coords": crop_coords}
|
|
2449
|
+
|
|
2450
|
+
bbox_union = union_of_bboxes(bboxes=data["bboxes"], erosion_rate=self.erosion_rate)
|
|
2451
|
+
|
|
2452
|
+
if bbox_union is None:
|
|
2453
|
+
crop_coords = self._get_coords_no_bbox(image_shape)
|
|
2454
|
+
return {"crop_coords": crop_coords}
|
|
2455
|
+
|
|
2456
|
+
x_min, y_min, x_max, y_max = bbox_union
|
|
2457
|
+
|
|
2458
|
+
x_min = np.clip(x_min, 0, 1)
|
|
2459
|
+
y_min = np.clip(y_min, 0, 1)
|
|
2460
|
+
x_max = np.clip(x_max, x_min, 1)
|
|
2461
|
+
y_max = np.clip(y_max, y_min, 1)
|
|
2462
|
+
|
|
2463
|
+
image_height, image_width = image_shape
|
|
2464
|
+
|
|
2465
|
+
crop_x_min = int(x_min * self.py_random.random() * image_width)
|
|
2466
|
+
crop_y_min = int(y_min * self.py_random.random() * image_height)
|
|
2467
|
+
|
|
2468
|
+
bbox_xmax = x_max + (1 - x_max) * self.py_random.random()
|
|
2469
|
+
bbox_ymax = y_max + (1 - y_max) * self.py_random.random()
|
|
2470
|
+
crop_x_max = int(bbox_xmax * image_width)
|
|
2471
|
+
crop_y_max = int(bbox_ymax * image_height)
|
|
2472
|
+
|
|
2473
|
+
return {"crop_coords": (crop_x_min, crop_y_min, crop_x_max, crop_y_max)}
|
|
2474
|
+
|
|
2475
|
+
|
|
2476
|
+
class RandomSizedBBoxSafeCrop(BBoxSafeRandomCrop):
|
|
2477
|
+
"""Crop a random part of the input and rescale it to a specific size without loss of bounding boxes.
|
|
2478
|
+
|
|
2479
|
+
This transform first attempts to crop a random portion of the input image while ensuring that all bounding boxes
|
|
2480
|
+
remain within the cropped area. It then resizes the crop to the specified size. This is particularly useful for
|
|
2481
|
+
object detection tasks where preserving all objects in the image is crucial while also standardizing the image size.
|
|
2482
|
+
|
|
2483
|
+
Args:
|
|
2484
|
+
height (int): Height of the output image after resizing.
|
|
2485
|
+
width (int): Width of the output image after resizing.
|
|
2486
|
+
erosion_rate (float): A value between 0.0 and 1.0 that determines the minimum allowable size of the crop
|
|
2487
|
+
as a fraction of the original image size. For example, an erosion_rate of 0.2 means the crop will be
|
|
2488
|
+
at least 80% of the original image height and width. Default: 0.0 (no minimum size).
|
|
2489
|
+
interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
|
|
2490
|
+
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.AREA, cv2.INTER_LANCZOS4.
|
|
2491
|
+
Default: cv2.INTER_LINEAR.
|
|
2492
|
+
mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
|
|
2493
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.AREA, cv2.INTER_LANCZOS4.
|
|
2494
|
+
Default: cv2.INTER_NEAREST.
|
|
2495
|
+
p (float): Probability of applying the transform. Default: 1.0.
|
|
2496
|
+
|
|
2497
|
+
Targets:
|
|
2498
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
2499
|
+
|
|
2500
|
+
Image types:
|
|
2501
|
+
uint8, float32
|
|
2502
|
+
|
|
2503
|
+
Note:
|
|
2504
|
+
- This transform ensures that all bounding boxes in the original image are fully contained within the
|
|
2505
|
+
cropped area. If it's not possible to find such a crop (e.g., when bounding boxes are too spread out),
|
|
2506
|
+
it will default to cropping the entire image.
|
|
2507
|
+
- After cropping, the result is resized to the specified (height, width) size.
|
|
2508
|
+
- Bounding box coordinates are adjusted to match the new image size.
|
|
2509
|
+
- Keypoints are moved along with the crop and scaled to the new image size.
|
|
2510
|
+
- If there are no bounding boxes in the image, it will fall back to a random crop.
|
|
2511
|
+
|
|
2512
|
+
Mathematical Details:
|
|
2513
|
+
1. A crop region is selected that includes all bounding boxes.
|
|
2514
|
+
2. The crop size is determined by the erosion_rate:
|
|
2515
|
+
min_crop_size = (1 - erosion_rate) * original_size
|
|
2516
|
+
3. If the selected crop is smaller than min_crop_size, it's expanded to meet this requirement.
|
|
2517
|
+
4. The crop is then resized to the specified (height, width) size.
|
|
2518
|
+
5. Bounding box coordinates are transformed to match the new image size:
|
|
2519
|
+
new_coord = (old_coord - crop_start) * (new_size / crop_size)
|
|
2520
|
+
|
|
2521
|
+
Examples:
|
|
2522
|
+
>>> import numpy as np
|
|
2523
|
+
>>> import albumentations as A
|
|
2524
|
+
>>> import cv2
|
|
2525
|
+
>>>
|
|
2526
|
+
>>> # Prepare sample data
|
|
2527
|
+
>>> image = np.random.randint(0, 256, (300, 300, 3), dtype=np.uint8)
|
|
2528
|
+
>>> mask = np.random.randint(0, 2, (300, 300), dtype=np.uint8)
|
|
2529
|
+
>>>
|
|
2530
|
+
>>> # Create bounding boxes with some overlap and separation
|
|
2531
|
+
>>> bboxes = np.array([
|
|
2532
|
+
... [10, 10, 80, 80], # top-left box
|
|
2533
|
+
... [100, 100, 200, 200], # center box
|
|
2534
|
+
... [210, 210, 290, 290] # bottom-right box
|
|
2535
|
+
... ], dtype=np.float32)
|
|
2536
|
+
>>> bbox_labels = ['cat', 'dog', 'bird']
|
|
2537
|
+
>>>
|
|
2538
|
+
>>> # Create keypoints inside the bounding boxes
|
|
2539
|
+
>>> keypoints = np.array([
|
|
2540
|
+
... [45, 45], # inside first box
|
|
2541
|
+
... [150, 150], # inside second box
|
|
2542
|
+
... [250, 250] # inside third box
|
|
2543
|
+
... ], dtype=np.float32)
|
|
2544
|
+
>>> keypoint_labels = ['nose', 'eye', 'tail']
|
|
2545
|
+
>>>
|
|
2546
|
+
>>> # Example 1: Basic usage with default parameters
|
|
2547
|
+
>>> transform_basic = A.Compose([
|
|
2548
|
+
... A.RandomSizedBBoxSafeCrop(height=224, width=224, p=1.0),
|
|
2549
|
+
... ], bbox_params=A.BboxParams(
|
|
2550
|
+
... format='pascal_voc',
|
|
2551
|
+
... label_fields=['bbox_labels']
|
|
2552
|
+
... ), keypoint_params=A.KeypointParams(
|
|
2553
|
+
... format='xy',
|
|
2554
|
+
... label_fields=['keypoint_labels']
|
|
2555
|
+
... ))
|
|
2556
|
+
>>>
|
|
2557
|
+
>>> # Apply the transform
|
|
2558
|
+
>>> result_basic = transform_basic(
|
|
2559
|
+
... image=image,
|
|
2560
|
+
... mask=mask,
|
|
2561
|
+
... bboxes=bboxes,
|
|
2562
|
+
... bbox_labels=bbox_labels,
|
|
2563
|
+
... keypoints=keypoints,
|
|
2564
|
+
... keypoint_labels=keypoint_labels
|
|
2565
|
+
... )
|
|
2566
|
+
>>>
|
|
2567
|
+
>>> # Access the transformed data
|
|
2568
|
+
>>> transformed_image = result_basic['image'] # Shape will be (224, 224, 3)
|
|
2569
|
+
>>> transformed_mask = result_basic['mask'] # Shape will be (224, 224)
|
|
2570
|
+
>>> transformed_bboxes = result_basic['bboxes'] # All original bounding boxes preserved
|
|
2571
|
+
>>> transformed_bbox_labels = result_basic['bbox_labels'] # Original labels preserved
|
|
2572
|
+
>>> transformed_keypoints = result_basic['keypoints'] # Keypoints adjusted to new coordinates
|
|
2573
|
+
>>> transformed_keypoint_labels = result_basic['keypoint_labels'] # Original labels preserved
|
|
2574
|
+
>>>
|
|
2575
|
+
>>> # Example 2: With erosion_rate for more flexibility in crop placement
|
|
2576
|
+
>>> transform_erosion = A.Compose([
|
|
2577
|
+
... A.RandomSizedBBoxSafeCrop(
|
|
2578
|
+
... height=256,
|
|
2579
|
+
... width=256,
|
|
2580
|
+
... erosion_rate=0.2, # Allows 20% flexibility in crop placement
|
|
2581
|
+
... interpolation=cv2.INTER_CUBIC, # Higher quality interpolation
|
|
2582
|
+
... mask_interpolation=cv2.INTER_NEAREST, # Preserve mask edges
|
|
2583
|
+
... p=1.0
|
|
2584
|
+
... ),
|
|
2585
|
+
... ], bbox_params=A.BboxParams(
|
|
2586
|
+
... format='pascal_voc',
|
|
2587
|
+
... label_fields=['bbox_labels'],
|
|
2588
|
+
... min_visibility=0.3 # Only keep bboxes with at least 30% visibility
|
|
2589
|
+
... ), keypoint_params=A.KeypointParams(
|
|
2590
|
+
... format='xy',
|
|
2591
|
+
... label_fields=['keypoint_labels'],
|
|
2592
|
+
... remove_invisible=True # Remove keypoints outside the crop
|
|
2593
|
+
... ))
|
|
2594
|
+
>>>
|
|
2595
|
+
>>> # Apply the transform with erosion
|
|
2596
|
+
>>> result_erosion = transform_erosion(
|
|
2597
|
+
... image=image,
|
|
2598
|
+
... mask=mask,
|
|
2599
|
+
... bboxes=bboxes,
|
|
2600
|
+
... bbox_labels=bbox_labels,
|
|
2601
|
+
... keypoints=keypoints,
|
|
2602
|
+
... keypoint_labels=keypoint_labels
|
|
2603
|
+
... )
|
|
2604
|
+
>>>
|
|
2605
|
+
>>> # With erosion_rate=0.2, the crop has more flexibility in placement
|
|
2606
|
+
>>> # while still ensuring all bounding boxes are included
|
|
2607
|
+
|
|
2608
|
+
"""
|
|
2609
|
+
|
|
2610
|
+
_targets = ALL_TARGETS
|
|
2611
|
+
|
|
2612
|
+
class InitSchema(BaseTransformInitSchema):
|
|
2613
|
+
height: Annotated[int, Field(ge=1)]
|
|
2614
|
+
width: Annotated[int, Field(ge=1)]
|
|
2615
|
+
erosion_rate: float = Field(
|
|
2616
|
+
ge=0.0,
|
|
2617
|
+
le=1.0,
|
|
2618
|
+
)
|
|
2619
|
+
interpolation: Literal[
|
|
2620
|
+
cv2.INTER_NEAREST,
|
|
2621
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2622
|
+
cv2.INTER_LINEAR,
|
|
2623
|
+
cv2.INTER_CUBIC,
|
|
2624
|
+
cv2.INTER_AREA,
|
|
2625
|
+
cv2.INTER_LANCZOS4,
|
|
2626
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2627
|
+
]
|
|
2628
|
+
mask_interpolation: Literal[
|
|
2629
|
+
cv2.INTER_NEAREST,
|
|
2630
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2631
|
+
cv2.INTER_LINEAR,
|
|
2632
|
+
cv2.INTER_CUBIC,
|
|
2633
|
+
cv2.INTER_AREA,
|
|
2634
|
+
cv2.INTER_LANCZOS4,
|
|
2635
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2636
|
+
]
|
|
2637
|
+
|
|
2638
|
+
def __init__(
|
|
2639
|
+
self,
|
|
2640
|
+
height: int,
|
|
2641
|
+
width: int,
|
|
2642
|
+
erosion_rate: float = 0.0,
|
|
2643
|
+
interpolation: Literal[
|
|
2644
|
+
cv2.INTER_NEAREST,
|
|
2645
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2646
|
+
cv2.INTER_LINEAR,
|
|
2647
|
+
cv2.INTER_CUBIC,
|
|
2648
|
+
cv2.INTER_AREA,
|
|
2649
|
+
cv2.INTER_LANCZOS4,
|
|
2650
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2651
|
+
] = cv2.INTER_LINEAR,
|
|
2652
|
+
mask_interpolation: Literal[
|
|
2653
|
+
cv2.INTER_NEAREST,
|
|
2654
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2655
|
+
cv2.INTER_LINEAR,
|
|
2656
|
+
cv2.INTER_CUBIC,
|
|
2657
|
+
cv2.INTER_AREA,
|
|
2658
|
+
cv2.INTER_LANCZOS4,
|
|
2659
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2660
|
+
] = cv2.INTER_NEAREST,
|
|
2661
|
+
p: float = 1.0,
|
|
2662
|
+
):
|
|
2663
|
+
super().__init__(erosion_rate=erosion_rate, p=p)
|
|
2664
|
+
self.height = height
|
|
2665
|
+
self.width = width
|
|
2666
|
+
self.interpolation = interpolation
|
|
2667
|
+
self.mask_interpolation = mask_interpolation
|
|
2668
|
+
|
|
2669
|
+
def apply(
|
|
2670
|
+
self,
|
|
2671
|
+
img: np.ndarray,
|
|
2672
|
+
crop_coords: tuple[int, int, int, int],
|
|
2673
|
+
**params: Any,
|
|
2674
|
+
) -> np.ndarray:
|
|
2675
|
+
"""Apply the crop and pad transform to an image.
|
|
2676
|
+
|
|
2677
|
+
Args:
|
|
2678
|
+
img (np.ndarray): The image to apply the crop and pad transform to.
|
|
2679
|
+
crop_coords (tuple[int, int, int, int]): The parameters for the crop.
|
|
2680
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
2681
|
+
|
|
2682
|
+
"""
|
|
2683
|
+
crop = fcrops.crop(img, *crop_coords)
|
|
2684
|
+
return fgeometric.resize(crop, (self.height, self.width), self.interpolation)
|
|
2685
|
+
|
|
2686
|
+
def apply_to_mask(
|
|
2687
|
+
self,
|
|
2688
|
+
mask: np.ndarray,
|
|
2689
|
+
crop_coords: tuple[int, int, int, int],
|
|
2690
|
+
**params: Any,
|
|
2691
|
+
) -> np.ndarray:
|
|
2692
|
+
"""Apply the crop and pad transform to a mask.
|
|
2693
|
+
|
|
2694
|
+
Args:
|
|
2695
|
+
mask (np.ndarray): The mask to apply the crop and pad transform to.
|
|
2696
|
+
crop_coords (tuple[int, int, int, int]): The parameters for the crop.
|
|
2697
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
2698
|
+
|
|
2699
|
+
"""
|
|
2700
|
+
crop = fcrops.crop(mask, *crop_coords)
|
|
2701
|
+
return fgeometric.resize(crop, (self.height, self.width), self.mask_interpolation)
|
|
2702
|
+
|
|
2703
|
+
def apply_to_keypoints(
|
|
2704
|
+
self,
|
|
2705
|
+
keypoints: np.ndarray,
|
|
2706
|
+
crop_coords: tuple[int, int, int, int],
|
|
2707
|
+
**params: Any,
|
|
2708
|
+
) -> np.ndarray:
|
|
2709
|
+
"""Apply the crop and pad transform to keypoints.
|
|
2710
|
+
|
|
2711
|
+
Args:
|
|
2712
|
+
keypoints (np.ndarray): The keypoints to apply the crop and pad transform to.
|
|
2713
|
+
crop_coords (tuple[int, int, int, int]): The parameters for the crop.
|
|
2714
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
2715
|
+
|
|
2716
|
+
Returns:
|
|
2717
|
+
np.ndarray: The keypoints after the crop and pad transform.
|
|
2718
|
+
|
|
2719
|
+
"""
|
|
2720
|
+
keypoints = fcrops.crop_keypoints_by_coords(keypoints, crop_coords)
|
|
2721
|
+
|
|
2722
|
+
crop_height = crop_coords[3] - crop_coords[1]
|
|
2723
|
+
crop_width = crop_coords[2] - crop_coords[0]
|
|
2724
|
+
|
|
2725
|
+
scale_y = self.height / crop_height
|
|
2726
|
+
scale_x = self.width / crop_width
|
|
2727
|
+
return fgeometric.keypoints_scale(keypoints, scale_x=scale_x, scale_y=scale_y)
|
|
2728
|
+
|
|
2729
|
+
|
|
2730
|
+
class CropAndPad(DualTransform):
|
|
2731
|
+
"""Crop and pad images by pixel amounts or fractions of image sizes.
|
|
2732
|
+
|
|
2733
|
+
This transform allows for simultaneous cropping and padding of images. Cropping removes pixels from the sides
|
|
2734
|
+
(i.e., extracts a subimage), while padding adds pixels to the sides (e.g., black pixels). The amount of
|
|
2735
|
+
cropping/padding can be specified either in absolute pixels or as a fraction of the image size.
|
|
2736
|
+
|
|
2737
|
+
Args:
|
|
2738
|
+
px (int, tuple of int, tuple of tuples of int, or None):
|
|
2739
|
+
The number of pixels to crop (negative values) or pad (positive values) on each side of the image.
|
|
2740
|
+
Either this or the parameter `percent` may be set, not both at the same time.
|
|
2741
|
+
- If int: crop/pad all sides by this value.
|
|
2742
|
+
- If tuple of 2 ints: crop/pad by (top/bottom, left/right).
|
|
2743
|
+
- If tuple of 4 ints: crop/pad by (top, right, bottom, left).
|
|
2744
|
+
- Each int can also be a tuple of 2 ints for a range, or a list of ints for discrete choices.
|
|
2745
|
+
Default: None.
|
|
2746
|
+
|
|
2747
|
+
percent (float, tuple of float, tuple of tuples of float, or None):
|
|
2748
|
+
The fraction of the image size to crop (negative values) or pad (positive values) on each side.
|
|
2749
|
+
Either this or the parameter `px` may be set, not both at the same time.
|
|
2750
|
+
- If float: crop/pad all sides by this fraction.
|
|
2751
|
+
- If tuple of 2 floats: crop/pad by (top/bottom, left/right) fractions.
|
|
2752
|
+
- If tuple of 4 floats: crop/pad by (top, right, bottom, left) fractions.
|
|
2753
|
+
- Each float can also be a tuple of 2 floats for a range, or a list of floats for discrete choices.
|
|
2754
|
+
Default: None.
|
|
2755
|
+
|
|
2756
|
+
border_mode (int):
|
|
2757
|
+
OpenCV border mode used for padding. Default: cv2.BORDER_CONSTANT.
|
|
2758
|
+
|
|
2759
|
+
fill (tuple[float, ...] | float):
|
|
2760
|
+
The constant value to use for padding if border_mode is cv2.BORDER_CONSTANT.
|
|
2761
|
+
Default: 0.
|
|
2762
|
+
|
|
2763
|
+
fill_mask (tuple[float, ...] | float):
|
|
2764
|
+
Same as fill but used for mask padding. Default: 0.
|
|
2765
|
+
|
|
2766
|
+
keep_size (bool):
|
|
2767
|
+
If True, the output image will be resized to the input image size after cropping/padding.
|
|
2768
|
+
Default: True.
|
|
2769
|
+
|
|
2770
|
+
sample_independently (bool):
|
|
2771
|
+
If True and ranges are used for px/percent, sample a value for each side independently.
|
|
2772
|
+
If False, sample one value and use it for all sides. Default: True.
|
|
2773
|
+
|
|
2774
|
+
interpolation (int):
|
|
2775
|
+
OpenCV interpolation flag used for resizing if keep_size is True.
|
|
2776
|
+
Default: cv2.INTER_LINEAR.
|
|
2777
|
+
|
|
2778
|
+
mask_interpolation (int):
|
|
2779
|
+
OpenCV interpolation flag used for resizing if keep_size is True.
|
|
2780
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
2781
|
+
Default: cv2.INTER_NEAREST.
|
|
2782
|
+
|
|
2783
|
+
p (float):
|
|
2784
|
+
Probability of applying the transform. Default: 1.0.
|
|
2785
|
+
|
|
2786
|
+
Targets:
|
|
2787
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
2788
|
+
|
|
2789
|
+
Image types:
|
|
2790
|
+
uint8, float32
|
|
2791
|
+
|
|
2792
|
+
Note:
|
|
2793
|
+
- This transform will never crop images below a height or width of 1.
|
|
2794
|
+
- When using pixel values (px), the image will be cropped/padded by exactly that many pixels.
|
|
2795
|
+
- When using percentages (percent), the amount of crop/pad will be calculated based on the image size.
|
|
2796
|
+
- Bounding boxes that end up fully outside the image after cropping will be removed.
|
|
2797
|
+
- Keypoints that end up outside the image after cropping will be removed.
|
|
2798
|
+
|
|
2799
|
+
Examples:
|
|
2800
|
+
>>> import numpy as np
|
|
2801
|
+
>>> import albumentations as A
|
|
2802
|
+
>>> import cv2
|
|
2803
|
+
>>>
|
|
2804
|
+
>>> # Prepare sample data
|
|
2805
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
2806
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
2807
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
2808
|
+
>>> bbox_labels = [1, 2]
|
|
2809
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
2810
|
+
>>> keypoint_labels = [0, 1]
|
|
2811
|
+
>>>
|
|
2812
|
+
>>> # Example 1: Using px parameter with specific values for each side
|
|
2813
|
+
>>> # Crop 10px from top, pad 20px on right, pad 30px on bottom, crop 40px from left
|
|
2814
|
+
>>> transform_px = A.Compose([
|
|
2815
|
+
... A.CropAndPad(
|
|
2816
|
+
... px=(-10, 20, 30, -40), # (top, right, bottom, left)
|
|
2817
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
2818
|
+
... fill=128, # Gray padding color
|
|
2819
|
+
... fill_mask=0,
|
|
2820
|
+
... keep_size=False, # Don't resize back to original dimensions
|
|
2821
|
+
... p=1.0
|
|
2822
|
+
... ),
|
|
2823
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
2824
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
2825
|
+
>>>
|
|
2826
|
+
>>> # Apply the transform
|
|
2827
|
+
>>> result_px = transform_px(
|
|
2828
|
+
... image=image,
|
|
2829
|
+
... mask=mask,
|
|
2830
|
+
... bboxes=bboxes,
|
|
2831
|
+
... bbox_labels=bbox_labels,
|
|
2832
|
+
... keypoints=keypoints,
|
|
2833
|
+
... keypoint_labels=keypoint_labels
|
|
2834
|
+
... )
|
|
2835
|
+
>>>
|
|
2836
|
+
>>> # Get the transformed data with px parameters
|
|
2837
|
+
>>> transformed_image_px = result_px['image'] # Shape will be different from original
|
|
2838
|
+
>>> transformed_mask_px = result_px['mask']
|
|
2839
|
+
>>> transformed_bboxes_px = result_px['bboxes'] # Adjusted to new dimensions
|
|
2840
|
+
>>> transformed_bbox_labels_px = result_px['bbox_labels'] # Bounding box labels after crop
|
|
2841
|
+
>>> transformed_keypoints_px = result_px['keypoints'] # Adjusted to new dimensions
|
|
2842
|
+
>>> transformed_keypoint_labels_px = result_px['keypoint_labels'] # Keypoint labels after crop
|
|
2843
|
+
>>>
|
|
2844
|
+
>>> # Example 2: Using percent parameter as a single value
|
|
2845
|
+
>>> # This will pad all sides by 10% of image dimensions
|
|
2846
|
+
>>> transform_percent = A.Compose([
|
|
2847
|
+
... A.CropAndPad(
|
|
2848
|
+
... percent=0.1, # Pad all sides by 10%
|
|
2849
|
+
... border_mode=cv2.BORDER_REFLECT, # Use reflection padding
|
|
2850
|
+
... keep_size=True, # Resize back to original dimensions
|
|
2851
|
+
... p=1.0
|
|
2852
|
+
... ),
|
|
2853
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
2854
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
2855
|
+
>>>
|
|
2856
|
+
>>> # Apply the transform
|
|
2857
|
+
>>> result_percent = transform_percent(
|
|
2858
|
+
... image=image,
|
|
2859
|
+
... mask=mask,
|
|
2860
|
+
... bboxes=bboxes,
|
|
2861
|
+
... bbox_labels=bbox_labels,
|
|
2862
|
+
... keypoints=keypoints,
|
|
2863
|
+
... keypoint_labels=keypoint_labels
|
|
2864
|
+
... )
|
|
2865
|
+
>>>
|
|
2866
|
+
>>> # Get the transformed data with percent parameters
|
|
2867
|
+
>>> # Since keep_size=True, image dimensions remain the same (100x100)
|
|
2868
|
+
>>> transformed_image_pct = result_percent['image']
|
|
2869
|
+
>>> transformed_mask_pct = result_percent['mask']
|
|
2870
|
+
>>> transformed_bboxes_pct = result_percent['bboxes']
|
|
2871
|
+
>>> transformed_bbox_labels_pct = result_percent['bbox_labels']
|
|
2872
|
+
>>> transformed_keypoints_pct = result_percent['keypoints']
|
|
2873
|
+
>>> transformed_keypoint_labels_pct = result_percent['keypoint_labels']
|
|
2874
|
+
>>>
|
|
2875
|
+
>>> # Example 3: Random padding within a range
|
|
2876
|
+
>>> # Pad top and bottom by 5-15%, left and right by 10-20%
|
|
2877
|
+
>>> transform_random = A.Compose([
|
|
2878
|
+
... A.CropAndPad(
|
|
2879
|
+
... percent=[(0.05, 0.15), (0.1, 0.2), (0.05, 0.15), (0.1, 0.2)], # (top, right, bottom, left)
|
|
2880
|
+
... sample_independently=True, # Sample each side independently
|
|
2881
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
2882
|
+
... fill=0, # Black padding
|
|
2883
|
+
... keep_size=False,
|
|
2884
|
+
... p=1.0
|
|
2885
|
+
... ),
|
|
2886
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
2887
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
2888
|
+
>>>
|
|
2889
|
+
>>> # Result dimensions will vary based on the random padding values chosen
|
|
2890
|
+
|
|
2891
|
+
"""
|
|
2892
|
+
|
|
2893
|
+
_targets = ALL_TARGETS
|
|
2894
|
+
|
|
2895
|
+
class InitSchema(BaseTransformInitSchema):
|
|
2896
|
+
px: PxType | None
|
|
2897
|
+
percent: PercentType | None
|
|
2898
|
+
keep_size: bool
|
|
2899
|
+
sample_independently: bool
|
|
2900
|
+
interpolation: Literal[
|
|
2901
|
+
cv2.INTER_NEAREST,
|
|
2902
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2903
|
+
cv2.INTER_LINEAR,
|
|
2904
|
+
cv2.INTER_CUBIC,
|
|
2905
|
+
cv2.INTER_AREA,
|
|
2906
|
+
cv2.INTER_LANCZOS4,
|
|
2907
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2908
|
+
]
|
|
2909
|
+
mask_interpolation: Literal[
|
|
2910
|
+
cv2.INTER_NEAREST,
|
|
2911
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2912
|
+
cv2.INTER_LINEAR,
|
|
2913
|
+
cv2.INTER_CUBIC,
|
|
2914
|
+
cv2.INTER_AREA,
|
|
2915
|
+
cv2.INTER_LANCZOS4,
|
|
2916
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2917
|
+
]
|
|
2918
|
+
fill: tuple[float, ...] | float
|
|
2919
|
+
fill_mask: tuple[float, ...] | float
|
|
2920
|
+
border_mode: Literal[
|
|
2921
|
+
cv2.BORDER_CONSTANT,
|
|
2922
|
+
cv2.BORDER_REPLICATE,
|
|
2923
|
+
cv2.BORDER_REFLECT,
|
|
2924
|
+
cv2.BORDER_WRAP,
|
|
2925
|
+
cv2.BORDER_REFLECT_101,
|
|
2926
|
+
]
|
|
2927
|
+
|
|
2928
|
+
@model_validator(mode="after")
|
|
2929
|
+
def _check_px_percent(self) -> Self:
|
|
2930
|
+
if self.px is None and self.percent is None:
|
|
2931
|
+
msg = "Both px and percent parameters cannot be None simultaneously."
|
|
2932
|
+
raise ValueError(msg)
|
|
2933
|
+
if self.px is not None and self.percent is not None:
|
|
2934
|
+
msg = "Only px or percent may be set!"
|
|
2935
|
+
raise ValueError(msg)
|
|
2936
|
+
|
|
2937
|
+
return self
|
|
2938
|
+
|
|
2939
|
+
def __init__(
|
|
2940
|
+
self,
|
|
2941
|
+
px: int | list[int] | None = None,
|
|
2942
|
+
percent: float | list[float] | None = None,
|
|
2943
|
+
keep_size: bool = True,
|
|
2944
|
+
sample_independently: bool = True,
|
|
2945
|
+
interpolation: Literal[
|
|
2946
|
+
cv2.INTER_NEAREST,
|
|
2947
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2948
|
+
cv2.INTER_LINEAR,
|
|
2949
|
+
cv2.INTER_CUBIC,
|
|
2950
|
+
cv2.INTER_AREA,
|
|
2951
|
+
cv2.INTER_LANCZOS4,
|
|
2952
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2953
|
+
] = cv2.INTER_LINEAR,
|
|
2954
|
+
mask_interpolation: Literal[
|
|
2955
|
+
cv2.INTER_NEAREST,
|
|
2956
|
+
cv2.INTER_NEAREST_EXACT,
|
|
2957
|
+
cv2.INTER_LINEAR,
|
|
2958
|
+
cv2.INTER_CUBIC,
|
|
2959
|
+
cv2.INTER_AREA,
|
|
2960
|
+
cv2.INTER_LANCZOS4,
|
|
2961
|
+
cv2.INTER_LINEAR_EXACT,
|
|
2962
|
+
] = cv2.INTER_NEAREST,
|
|
2963
|
+
border_mode: Literal[
|
|
2964
|
+
cv2.BORDER_CONSTANT,
|
|
2965
|
+
cv2.BORDER_REPLICATE,
|
|
2966
|
+
cv2.BORDER_REFLECT,
|
|
2967
|
+
cv2.BORDER_WRAP,
|
|
2968
|
+
cv2.BORDER_REFLECT_101,
|
|
2969
|
+
] = cv2.BORDER_CONSTANT,
|
|
2970
|
+
fill: tuple[float, ...] | float = 0,
|
|
2971
|
+
fill_mask: tuple[float, ...] | float = 0,
|
|
2972
|
+
p: float = 1.0,
|
|
2973
|
+
):
|
|
2974
|
+
super().__init__(p=p)
|
|
2975
|
+
|
|
2976
|
+
self.px = px
|
|
2977
|
+
self.percent = percent
|
|
2978
|
+
|
|
2979
|
+
self.border_mode = border_mode
|
|
2980
|
+
self.fill = fill
|
|
2981
|
+
self.fill_mask = fill_mask
|
|
2982
|
+
|
|
2983
|
+
self.keep_size = keep_size
|
|
2984
|
+
self.sample_independently = sample_independently
|
|
2985
|
+
|
|
2986
|
+
self.interpolation = interpolation
|
|
2987
|
+
self.mask_interpolation = mask_interpolation
|
|
2988
|
+
|
|
2989
|
+
def apply(
|
|
2990
|
+
self,
|
|
2991
|
+
img: np.ndarray,
|
|
2992
|
+
crop_params: Sequence[int],
|
|
2993
|
+
pad_params: Sequence[int],
|
|
2994
|
+
fill: tuple[float, ...] | float,
|
|
2995
|
+
**params: Any,
|
|
2996
|
+
) -> np.ndarray:
|
|
2997
|
+
"""Apply the crop and pad transform to an image.
|
|
2998
|
+
|
|
2999
|
+
Args:
|
|
3000
|
+
img (np.ndarray): The image to apply the crop and pad transform to.
|
|
3001
|
+
crop_params (Sequence[int]): The parameters for the crop.
|
|
3002
|
+
pad_params (Sequence[int]): The parameters for the pad.
|
|
3003
|
+
fill (tuple[float, ...] | float): The value to fill the image with.
|
|
3004
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
3005
|
+
|
|
3006
|
+
Returns:
|
|
3007
|
+
np.ndarray: The image after the crop and pad transform.
|
|
3008
|
+
|
|
3009
|
+
"""
|
|
3010
|
+
return fcrops.crop_and_pad(
|
|
3011
|
+
img,
|
|
3012
|
+
crop_params,
|
|
3013
|
+
pad_params,
|
|
3014
|
+
fill,
|
|
3015
|
+
params["shape"][:2],
|
|
3016
|
+
self.interpolation,
|
|
3017
|
+
self.border_mode,
|
|
3018
|
+
self.keep_size,
|
|
3019
|
+
)
|
|
3020
|
+
|
|
3021
|
+
def apply_to_mask(
|
|
3022
|
+
self,
|
|
3023
|
+
mask: np.ndarray,
|
|
3024
|
+
crop_params: Sequence[int],
|
|
3025
|
+
pad_params: Sequence[int],
|
|
3026
|
+
fill_mask: tuple[float, ...] | float,
|
|
3027
|
+
**params: Any,
|
|
3028
|
+
) -> np.ndarray:
|
|
3029
|
+
"""Apply the crop and pad transform to a mask.
|
|
3030
|
+
|
|
3031
|
+
Args:
|
|
3032
|
+
mask (np.ndarray): The mask to apply the crop and pad transform to.
|
|
3033
|
+
crop_params (Sequence[int]): The parameters for the crop.
|
|
3034
|
+
pad_params (Sequence[int]): The parameters for the pad.
|
|
3035
|
+
fill_mask (tuple[float, ...] | float): The value to fill the mask with.
|
|
3036
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
3037
|
+
|
|
3038
|
+
Returns:
|
|
3039
|
+
np.ndarray: The mask after the crop and pad transform.
|
|
3040
|
+
|
|
3041
|
+
"""
|
|
3042
|
+
return fcrops.crop_and_pad(
|
|
3043
|
+
mask,
|
|
3044
|
+
crop_params,
|
|
3045
|
+
pad_params,
|
|
3046
|
+
fill_mask,
|
|
3047
|
+
params["shape"][:2],
|
|
3048
|
+
self.mask_interpolation,
|
|
3049
|
+
self.border_mode,
|
|
3050
|
+
self.keep_size,
|
|
3051
|
+
)
|
|
3052
|
+
|
|
3053
|
+
def apply_to_bboxes(
|
|
3054
|
+
self,
|
|
3055
|
+
bboxes: np.ndarray,
|
|
3056
|
+
crop_params: tuple[int, int, int, int],
|
|
3057
|
+
pad_params: tuple[int, int, int, int],
|
|
3058
|
+
result_shape: tuple[int, int],
|
|
3059
|
+
**params: Any,
|
|
3060
|
+
) -> np.ndarray:
|
|
3061
|
+
"""Apply the crop and pad transform to bounding boxes.
|
|
3062
|
+
|
|
3063
|
+
Args:
|
|
3064
|
+
bboxes (np.ndarray): The bounding boxes to apply the crop and pad transform to.
|
|
3065
|
+
crop_params (tuple[int, int, int, int]): The parameters for the crop.
|
|
3066
|
+
pad_params (tuple[int, int, int, int]): The parameters for the pad.
|
|
3067
|
+
result_shape (tuple[int, int]): The shape of the result.
|
|
3068
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
3069
|
+
|
|
3070
|
+
Returns:
|
|
3071
|
+
np.ndarray: The bounding boxes after the crop and pad transform.
|
|
3072
|
+
|
|
3073
|
+
"""
|
|
3074
|
+
return fcrops.crop_and_pad_bboxes(bboxes, crop_params, pad_params, params["shape"][:2], result_shape)
|
|
3075
|
+
|
|
3076
|
+
def apply_to_keypoints(
|
|
3077
|
+
self,
|
|
3078
|
+
keypoints: np.ndarray,
|
|
3079
|
+
crop_params: tuple[int, int, int, int],
|
|
3080
|
+
pad_params: tuple[int, int, int, int],
|
|
3081
|
+
result_shape: tuple[int, int],
|
|
3082
|
+
**params: Any,
|
|
3083
|
+
) -> np.ndarray:
|
|
3084
|
+
"""Apply the crop and pad transform to keypoints.
|
|
3085
|
+
|
|
3086
|
+
Args:
|
|
3087
|
+
keypoints (np.ndarray): The keypoints to apply the crop and pad transform to.
|
|
3088
|
+
crop_params (tuple[int, int, int, int]): The parameters for the crop.
|
|
3089
|
+
pad_params (tuple[int, int, int, int]): The parameters for the pad.
|
|
3090
|
+
result_shape (tuple[int, int]): The shape of the result.
|
|
3091
|
+
params (dict[str, Any]): Additional parameters for the transform.
|
|
3092
|
+
|
|
3093
|
+
Returns:
|
|
3094
|
+
np.ndarray: The keypoints after the crop and pad transform.
|
|
3095
|
+
|
|
3096
|
+
"""
|
|
3097
|
+
return fcrops.crop_and_pad_keypoints(
|
|
3098
|
+
keypoints,
|
|
3099
|
+
crop_params,
|
|
3100
|
+
pad_params,
|
|
3101
|
+
params["shape"][:2],
|
|
3102
|
+
result_shape,
|
|
3103
|
+
self.keep_size,
|
|
3104
|
+
)
|
|
3105
|
+
|
|
3106
|
+
@staticmethod
|
|
3107
|
+
def __prevent_zero(val1: int, val2: int, max_val: int) -> tuple[int, int]:
|
|
3108
|
+
regain = abs(max_val) + 1
|
|
3109
|
+
regain1 = regain // 2
|
|
3110
|
+
regain2 = regain // 2
|
|
3111
|
+
if regain1 + regain2 < regain:
|
|
3112
|
+
regain1 += 1
|
|
3113
|
+
|
|
3114
|
+
if regain1 > val1:
|
|
3115
|
+
diff = regain1 - val1
|
|
3116
|
+
regain1 = val1
|
|
3117
|
+
regain2 += diff
|
|
3118
|
+
elif regain2 > val2:
|
|
3119
|
+
diff = regain2 - val2
|
|
3120
|
+
regain2 = val2
|
|
3121
|
+
regain1 += diff
|
|
3122
|
+
|
|
3123
|
+
return val1 - regain1, val2 - regain2
|
|
3124
|
+
|
|
3125
|
+
@staticmethod
|
|
3126
|
+
def _prevent_zero(crop_params: list[int], height: int, width: int) -> list[int]:
|
|
3127
|
+
top, right, bottom, left = crop_params
|
|
3128
|
+
|
|
3129
|
+
remaining_height = height - (top + bottom)
|
|
3130
|
+
remaining_width = width - (left + right)
|
|
3131
|
+
|
|
3132
|
+
if remaining_height < 1:
|
|
3133
|
+
top, bottom = CropAndPad.__prevent_zero(top, bottom, height)
|
|
3134
|
+
if remaining_width < 1:
|
|
3135
|
+
left, right = CropAndPad.__prevent_zero(left, right, width)
|
|
3136
|
+
|
|
3137
|
+
return [max(top, 0), max(right, 0), max(bottom, 0), max(left, 0)]
|
|
3138
|
+
|
|
3139
|
+
def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
|
|
3140
|
+
"""Get the parameters for the crop.
|
|
3141
|
+
|
|
3142
|
+
Args:
|
|
3143
|
+
params (dict[str, Any]): The parameters for the transform.
|
|
3144
|
+
data (dict[str, Any]): The data for the transform.
|
|
3145
|
+
|
|
3146
|
+
Returns:
|
|
3147
|
+
dict[str, Any]: The parameters for the crop.
|
|
3148
|
+
|
|
3149
|
+
"""
|
|
3150
|
+
height, width = params["shape"][:2]
|
|
3151
|
+
|
|
3152
|
+
if self.px is not None:
|
|
3153
|
+
new_params = self._get_px_params()
|
|
3154
|
+
else:
|
|
3155
|
+
percent_params = self._get_percent_params()
|
|
3156
|
+
new_params = [
|
|
3157
|
+
int(percent_params[0] * height),
|
|
3158
|
+
int(percent_params[1] * width),
|
|
3159
|
+
int(percent_params[2] * height),
|
|
3160
|
+
int(percent_params[3] * width),
|
|
3161
|
+
]
|
|
3162
|
+
|
|
3163
|
+
pad_params = [max(i, 0) for i in new_params]
|
|
3164
|
+
|
|
3165
|
+
crop_params = self._prevent_zero([-min(i, 0) for i in new_params], height, width)
|
|
3166
|
+
|
|
3167
|
+
top, right, bottom, left = crop_params
|
|
3168
|
+
crop_params = [left, top, width - right, height - bottom]
|
|
3169
|
+
result_rows = crop_params[3] - crop_params[1]
|
|
3170
|
+
result_cols = crop_params[2] - crop_params[0]
|
|
3171
|
+
if result_cols == width and result_rows == height:
|
|
3172
|
+
crop_params = []
|
|
3173
|
+
|
|
3174
|
+
top, right, bottom, left = pad_params
|
|
3175
|
+
pad_params = [top, bottom, left, right]
|
|
3176
|
+
if any(pad_params):
|
|
3177
|
+
result_rows += top + bottom
|
|
3178
|
+
result_cols += left + right
|
|
3179
|
+
else:
|
|
3180
|
+
pad_params = []
|
|
3181
|
+
|
|
3182
|
+
return {
|
|
3183
|
+
"crop_params": crop_params or None,
|
|
3184
|
+
"pad_params": pad_params or None,
|
|
3185
|
+
"fill": None if pad_params is None else self._get_pad_value(self.fill),
|
|
3186
|
+
"fill_mask": None
|
|
3187
|
+
if pad_params is None
|
|
3188
|
+
else self._get_pad_value(cast("Union[tuple[float, ...], float]", self.fill_mask)),
|
|
3189
|
+
"result_shape": (result_rows, result_cols),
|
|
3190
|
+
}
|
|
3191
|
+
|
|
3192
|
+
def _get_px_params(self) -> list[int]:
|
|
3193
|
+
if self.px is None:
|
|
3194
|
+
msg = "px is not set"
|
|
3195
|
+
raise ValueError(msg)
|
|
3196
|
+
|
|
3197
|
+
if isinstance(self.px, int):
|
|
3198
|
+
return [self.px] * 4
|
|
3199
|
+
if len(self.px) == PAIR:
|
|
3200
|
+
if self.sample_independently:
|
|
3201
|
+
return [self.py_random.randrange(*self.px) for _ in range(4)]
|
|
3202
|
+
px = self.py_random.randrange(*self.px)
|
|
3203
|
+
return [px] * 4
|
|
3204
|
+
if isinstance(self.px[0], int):
|
|
3205
|
+
return self.px
|
|
3206
|
+
if len(self.px[0]) == PAIR:
|
|
3207
|
+
return [self.py_random.randrange(*i) for i in self.px]
|
|
3208
|
+
|
|
3209
|
+
return [self.py_random.choice(i) for i in self.px]
|
|
3210
|
+
|
|
3211
|
+
def _get_percent_params(self) -> list[float]:
|
|
3212
|
+
if self.percent is None:
|
|
3213
|
+
msg = "percent is not set"
|
|
3214
|
+
raise ValueError(msg)
|
|
3215
|
+
|
|
3216
|
+
if isinstance(self.percent, float):
|
|
3217
|
+
params = [self.percent] * 4
|
|
3218
|
+
elif len(self.percent) == PAIR:
|
|
3219
|
+
if self.sample_independently:
|
|
3220
|
+
params = [self.py_random.uniform(*self.percent) for _ in range(4)]
|
|
3221
|
+
else:
|
|
3222
|
+
px = self.py_random.uniform(*self.percent)
|
|
3223
|
+
params = [px] * 4
|
|
3224
|
+
elif isinstance(self.percent[0], (int, float)):
|
|
3225
|
+
params = self.percent
|
|
3226
|
+
elif len(self.percent[0]) == PAIR:
|
|
3227
|
+
params = [self.py_random.uniform(*i) for i in self.percent]
|
|
3228
|
+
else:
|
|
3229
|
+
params = [self.py_random.choice(i) for i in self.percent]
|
|
3230
|
+
|
|
3231
|
+
return params # params = [top, right, bottom, left]
|
|
3232
|
+
|
|
3233
|
+
def _get_pad_value(
|
|
3234
|
+
self,
|
|
3235
|
+
fill: Sequence[float] | float,
|
|
3236
|
+
) -> int | float:
|
|
3237
|
+
if isinstance(fill, (list, tuple)):
|
|
3238
|
+
if len(fill) == PAIR:
|
|
3239
|
+
a, b = fill
|
|
3240
|
+
if isinstance(a, int) and isinstance(b, int):
|
|
3241
|
+
return self.py_random.randint(a, b)
|
|
3242
|
+
return self.py_random.uniform(a, b)
|
|
3243
|
+
return self.py_random.choice(fill)
|
|
3244
|
+
|
|
3245
|
+
if isinstance(fill, (int, float)):
|
|
3246
|
+
return fill
|
|
3247
|
+
|
|
3248
|
+
msg = "fill should be a number or list, or tuple of two numbers."
|
|
3249
|
+
raise ValueError(msg)
|
|
3250
|
+
|
|
3251
|
+
|
|
3252
|
+
class RandomCropFromBorders(BaseCrop):
|
|
3253
|
+
"""Randomly crops the input from its borders without resizing.
|
|
3254
|
+
|
|
3255
|
+
This transform randomly crops parts of the input (image, mask, bounding boxes, or keypoints)
|
|
3256
|
+
from each of its borders. The amount of cropping is specified as a fraction of the input's
|
|
3257
|
+
dimensions for each side independently.
|
|
3258
|
+
|
|
3259
|
+
Args:
|
|
3260
|
+
crop_left (float): The maximum fraction of width to crop from the left side.
|
|
3261
|
+
Must be in the range [0.0, 1.0]. Default: 0.1
|
|
3262
|
+
crop_right (float): The maximum fraction of width to crop from the right side.
|
|
3263
|
+
Must be in the range [0.0, 1.0]. Default: 0.1
|
|
3264
|
+
crop_top (float): The maximum fraction of height to crop from the top.
|
|
3265
|
+
Must be in the range [0.0, 1.0]. Default: 0.1
|
|
3266
|
+
crop_bottom (float): The maximum fraction of height to crop from the bottom.
|
|
3267
|
+
Must be in the range [0.0, 1.0]. Default: 0.1
|
|
3268
|
+
p (float): Probability of applying the transform. Default: 1.0
|
|
3269
|
+
|
|
3270
|
+
Targets:
|
|
3271
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
3272
|
+
|
|
3273
|
+
Image types:
|
|
3274
|
+
uint8, float32
|
|
3275
|
+
|
|
3276
|
+
Note:
|
|
3277
|
+
- The actual amount of cropping for each side is randomly chosen between 0 and
|
|
3278
|
+
the specified maximum for each application of the transform.
|
|
3279
|
+
- The sum of crop_left and crop_right must not exceed 1.0, and the sum of
|
|
3280
|
+
crop_top and crop_bottom must not exceed 1.0. Otherwise, a ValueError will be raised.
|
|
3281
|
+
- This transform does not resize the input after cropping, so the output dimensions
|
|
3282
|
+
will be smaller than the input dimensions.
|
|
3283
|
+
- Bounding boxes that end up fully outside the cropped area will be removed.
|
|
3284
|
+
- Keypoints that end up outside the cropped area will be removed.
|
|
3285
|
+
|
|
3286
|
+
Examples:
|
|
3287
|
+
>>> import numpy as np
|
|
3288
|
+
>>> import albumentations as A
|
|
3289
|
+
>>>
|
|
3290
|
+
>>> # Prepare sample data
|
|
3291
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
3292
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
3293
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
3294
|
+
>>> bbox_labels = [1, 2]
|
|
3295
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
3296
|
+
>>> keypoint_labels = [0, 1]
|
|
3297
|
+
>>>
|
|
3298
|
+
>>> # Define transform with crop fractions for each border
|
|
3299
|
+
>>> transform = A.Compose([
|
|
3300
|
+
... A.RandomCropFromBorders(
|
|
3301
|
+
... crop_left=0.1, # Max 10% crop from left
|
|
3302
|
+
... crop_right=0.2, # Max 20% crop from right
|
|
3303
|
+
... crop_top=0.15, # Max 15% crop from top
|
|
3304
|
+
... crop_bottom=0.05, # Max 5% crop from bottom
|
|
3305
|
+
... p=1.0
|
|
3306
|
+
... ),
|
|
3307
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
3308
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
3309
|
+
>>>
|
|
3310
|
+
>>> # Apply transform
|
|
3311
|
+
>>> result = transform(
|
|
3312
|
+
... image=image,
|
|
3313
|
+
... mask=mask,
|
|
3314
|
+
... bboxes=bboxes,
|
|
3315
|
+
... bbox_labels=bbox_labels,
|
|
3316
|
+
... keypoints=keypoints,
|
|
3317
|
+
... keypoint_labels=keypoint_labels
|
|
3318
|
+
... )
|
|
3319
|
+
>>>
|
|
3320
|
+
>>> # Access transformed data
|
|
3321
|
+
>>> transformed_image = result['image'] # Reduced size image with borders cropped
|
|
3322
|
+
>>> transformed_mask = result['mask'] # Reduced size mask with borders cropped
|
|
3323
|
+
>>> transformed_bboxes = result['bboxes'] # Bounding boxes adjusted to new dimensions
|
|
3324
|
+
>>> transformed_bbox_labels = result['bbox_labels'] # Bounding box labels after crop
|
|
3325
|
+
>>> transformed_keypoints = result['keypoints'] # Keypoints adjusted to new dimensions
|
|
3326
|
+
>>> transformed_keypoint_labels = result['keypoint_labels'] # Keypoint labels after crop
|
|
3327
|
+
>>>
|
|
3328
|
+
>>> # The resulting output shapes will be smaller, with dimensions reduced by
|
|
3329
|
+
>>> # the random crop amounts from each side (within the specified maximums)
|
|
3330
|
+
>>> print(f"Original image shape: (100, 100, 3)")
|
|
3331
|
+
>>> print(f"Transformed image shape: {transformed_image.shape}") # e.g., (85, 75, 3)
|
|
3332
|
+
|
|
3333
|
+
"""
|
|
3334
|
+
|
|
3335
|
+
_targets = ALL_TARGETS
|
|
3336
|
+
|
|
3337
|
+
class InitSchema(BaseTransformInitSchema):
|
|
3338
|
+
crop_left: float = Field(
|
|
3339
|
+
ge=0.0,
|
|
3340
|
+
le=1.0,
|
|
3341
|
+
)
|
|
3342
|
+
crop_right: float = Field(
|
|
3343
|
+
ge=0.0,
|
|
3344
|
+
le=1.0,
|
|
3345
|
+
)
|
|
3346
|
+
crop_top: float = Field(
|
|
3347
|
+
ge=0.0,
|
|
3348
|
+
le=1.0,
|
|
3349
|
+
)
|
|
3350
|
+
crop_bottom: float = Field(
|
|
3351
|
+
ge=0.0,
|
|
3352
|
+
le=1.0,
|
|
3353
|
+
)
|
|
3354
|
+
|
|
3355
|
+
@model_validator(mode="after")
|
|
3356
|
+
def _validate_crop_values(self) -> Self:
|
|
3357
|
+
if self.crop_left + self.crop_right > 1.0:
|
|
3358
|
+
msg = "The sum of crop_left and crop_right must be <= 1."
|
|
3359
|
+
raise ValueError(msg)
|
|
3360
|
+
if self.crop_top + self.crop_bottom > 1.0:
|
|
3361
|
+
msg = "The sum of crop_top and crop_bottom must be <= 1."
|
|
3362
|
+
raise ValueError(msg)
|
|
3363
|
+
return self
|
|
3364
|
+
|
|
3365
|
+
def __init__(
|
|
3366
|
+
self,
|
|
3367
|
+
crop_left: float = 0.1,
|
|
3368
|
+
crop_right: float = 0.1,
|
|
3369
|
+
crop_top: float = 0.1,
|
|
3370
|
+
crop_bottom: float = 0.1,
|
|
3371
|
+
p: float = 1.0,
|
|
3372
|
+
):
|
|
3373
|
+
super().__init__(p=p)
|
|
3374
|
+
self.crop_left = crop_left
|
|
3375
|
+
self.crop_right = crop_right
|
|
3376
|
+
self.crop_top = crop_top
|
|
3377
|
+
self.crop_bottom = crop_bottom
|
|
3378
|
+
|
|
3379
|
+
def get_params_dependent_on_data(
|
|
3380
|
+
self,
|
|
3381
|
+
params: dict[str, Any],
|
|
3382
|
+
data: dict[str, Any],
|
|
3383
|
+
) -> dict[str, tuple[int, int, int, int]]:
|
|
3384
|
+
"""Get the parameters for the crop.
|
|
3385
|
+
|
|
3386
|
+
Args:
|
|
3387
|
+
params (dict[str, Any]): The parameters for the transform.
|
|
3388
|
+
data (dict[str, Any]): The data for the transform.
|
|
3389
|
+
|
|
3390
|
+
Returns:
|
|
3391
|
+
dict[str, tuple[int, int, int, int]]: The parameters for the crop.
|
|
3392
|
+
|
|
3393
|
+
"""
|
|
3394
|
+
height, width = params["shape"][:2]
|
|
3395
|
+
|
|
3396
|
+
x_min = self.py_random.randint(0, int(self.crop_left * width))
|
|
3397
|
+
x_max = self.py_random.randint(max(x_min + 1, int((1 - self.crop_right) * width)), width)
|
|
3398
|
+
|
|
3399
|
+
y_min = self.py_random.randint(0, int(self.crop_top * height))
|
|
3400
|
+
y_max = self.py_random.randint(max(y_min + 1, int((1 - self.crop_bottom) * height)), height)
|
|
3401
|
+
|
|
3402
|
+
crop_coords = x_min, y_min, x_max, y_max
|
|
3403
|
+
|
|
3404
|
+
return {"crop_coords": crop_coords}
|
|
3405
|
+
|
|
3406
|
+
|
|
3407
|
+
class AtLeastOneBBoxRandomCrop(BaseCrop):
|
|
3408
|
+
"""Crop an area from image while ensuring at least one bounding box is present in the crop.
|
|
3409
|
+
|
|
3410
|
+
Similar to BBoxSafeRandomCrop, but with a key difference:
|
|
3411
|
+
- BBoxSafeRandomCrop ensures ALL bounding boxes are preserved in the crop
|
|
3412
|
+
- AtLeastOneBBoxRandomCrop ensures AT LEAST ONE bounding box is present in the crop
|
|
3413
|
+
|
|
3414
|
+
This makes AtLeastOneBBoxRandomCrop more flexible for scenarios where:
|
|
3415
|
+
- You want to focus on individual objects rather than all objects
|
|
3416
|
+
- You're willing to lose some bounding boxes to get more varied crops
|
|
3417
|
+
- The image has many bounding boxes and keeping all of them would be too restrictive
|
|
3418
|
+
|
|
3419
|
+
The algorithm:
|
|
3420
|
+
1. If bounding boxes exist:
|
|
3421
|
+
- Randomly selects a reference bounding box from available boxes
|
|
3422
|
+
- Computes an eroded version of this box (shrunk by erosion_factor)
|
|
3423
|
+
- Calculates valid crop bounds that ensure overlap with the eroded box
|
|
3424
|
+
- Randomly samples crop coordinates within these bounds
|
|
3425
|
+
2. If no bounding boxes exist:
|
|
3426
|
+
- Uses full image dimensions as valid bounds
|
|
3427
|
+
- Randomly samples crop coordinates within these bounds
|
|
3428
|
+
|
|
3429
|
+
Args:
|
|
3430
|
+
height (int): Fixed height of the crop
|
|
3431
|
+
width (int): Fixed width of the crop
|
|
3432
|
+
erosion_factor (float, optional): Factor by which to erode (shrink) the reference
|
|
3433
|
+
bounding box when computing valid crop regions. Must be in range [0.0, 1.0].
|
|
3434
|
+
- 0.0 means no erosion (crop must fully contain the reference box)
|
|
3435
|
+
- 1.0 means maximum erosion (crop can be anywhere that intersects the reference box)
|
|
3436
|
+
Defaults to 0.0.
|
|
3437
|
+
p (float, optional): Probability of applying the transform. Defaults to 1.0.
|
|
3438
|
+
|
|
3439
|
+
Targets:
|
|
3440
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
3441
|
+
|
|
3442
|
+
Image types:
|
|
3443
|
+
uint8, float32
|
|
3444
|
+
|
|
3445
|
+
Raises:
|
|
3446
|
+
CropSizeError: If requested crop size exceeds image dimensions
|
|
3447
|
+
|
|
3448
|
+
Examples:
|
|
3449
|
+
>>> import numpy as np
|
|
3450
|
+
>>> import albumentations as A
|
|
3451
|
+
>>> import cv2
|
|
3452
|
+
>>>
|
|
3453
|
+
>>> # Prepare sample data
|
|
3454
|
+
>>> image = np.random.randint(0, 256, (300, 300, 3), dtype=np.uint8)
|
|
3455
|
+
>>> mask = np.random.randint(0, 2, (300, 300), dtype=np.uint8)
|
|
3456
|
+
>>> # Create multiple bounding boxes - the transform will ensure at least one is in the crop
|
|
3457
|
+
>>> bboxes = np.array([
|
|
3458
|
+
... [30, 50, 100, 140], # first box
|
|
3459
|
+
... [150, 120, 270, 250], # second box
|
|
3460
|
+
... [200, 30, 280, 90] # third box
|
|
3461
|
+
... ], dtype=np.float32)
|
|
3462
|
+
>>> bbox_labels = [1, 2, 3]
|
|
3463
|
+
>>> keypoints = np.array([
|
|
3464
|
+
... [50, 70], # keypoint inside first box
|
|
3465
|
+
... [190, 170], # keypoint inside second box
|
|
3466
|
+
... [240, 60] # keypoint inside third box
|
|
3467
|
+
... ], dtype=np.float32)
|
|
3468
|
+
>>> keypoint_labels = [0, 1, 2]
|
|
3469
|
+
>>>
|
|
3470
|
+
>>> # Define transform with different erosion_factor values
|
|
3471
|
+
>>> transform = A.Compose([
|
|
3472
|
+
... A.AtLeastOneBBoxRandomCrop(
|
|
3473
|
+
... height=200,
|
|
3474
|
+
... width=200,
|
|
3475
|
+
... erosion_factor=0.2, # Allows moderate flexibility in crop placement
|
|
3476
|
+
... p=1.0
|
|
3477
|
+
... ),
|
|
3478
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
3479
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
3480
|
+
>>>
|
|
3481
|
+
>>> # Apply the transform
|
|
3482
|
+
>>> transformed = transform(
|
|
3483
|
+
... image=image,
|
|
3484
|
+
... mask=mask,
|
|
3485
|
+
... bboxes=bboxes,
|
|
3486
|
+
... bbox_labels=bbox_labels,
|
|
3487
|
+
... keypoints=keypoints,
|
|
3488
|
+
... keypoint_labels=keypoint_labels
|
|
3489
|
+
... )
|
|
3490
|
+
>>>
|
|
3491
|
+
>>> # Get the transformed data
|
|
3492
|
+
>>> transformed_image = transformed['image'] # Shape: (200, 200, 3)
|
|
3493
|
+
>>> transformed_mask = transformed['mask'] # Shape: (200, 200)
|
|
3494
|
+
>>> transformed_bboxes = transformed['bboxes'] # At least one bbox is guaranteed
|
|
3495
|
+
>>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for the preserved bboxes
|
|
3496
|
+
>>> transformed_keypoints = transformed['keypoints'] # Only keypoints in crop are kept
|
|
3497
|
+
>>> transformed_keypoint_labels = transformed['keypoint_labels'] # Their labels
|
|
3498
|
+
>>>
|
|
3499
|
+
>>> # Verify that at least one bounding box was preserved
|
|
3500
|
+
>>> assert len(transformed_bboxes) > 0, "Should have at least one bbox in the crop"
|
|
3501
|
+
>>>
|
|
3502
|
+
>>> # With erosion_factor=0.0, the crop must fully contain the selected reference bbox
|
|
3503
|
+
>>> conservative_transform = A.Compose([
|
|
3504
|
+
... A.AtLeastOneBBoxRandomCrop(
|
|
3505
|
+
... height=200,
|
|
3506
|
+
... width=200,
|
|
3507
|
+
... erosion_factor=0.0, # No erosion - crop must fully contain a bbox
|
|
3508
|
+
... p=1.0
|
|
3509
|
+
... ),
|
|
3510
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
|
|
3511
|
+
>>>
|
|
3512
|
+
>>> # With erosion_factor=1.0, the crop must only intersect with the selected reference bbox
|
|
3513
|
+
>>> flexible_transform = A.Compose([
|
|
3514
|
+
... A.AtLeastOneBBoxRandomCrop(
|
|
3515
|
+
... height=200,
|
|
3516
|
+
... width=200,
|
|
3517
|
+
... erosion_factor=1.0, # Maximum erosion - crop only needs to intersect a bbox
|
|
3518
|
+
... p=1.0
|
|
3519
|
+
... ),
|
|
3520
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
|
|
3521
|
+
|
|
3522
|
+
Note:
|
|
3523
|
+
- Uses fixed crop dimensions (height and width)
|
|
3524
|
+
- Bounding boxes that end up partially outside the crop will be adjusted
|
|
3525
|
+
- Bounding boxes that end up completely outside the crop will be removed
|
|
3526
|
+
- If no bounding boxes are provided, acts as a regular random crop
|
|
3527
|
+
|
|
3528
|
+
"""
|
|
3529
|
+
|
|
3530
|
+
_targets = ALL_TARGETS
|
|
3531
|
+
|
|
3532
|
+
class InitSchema(BaseCrop.InitSchema):
|
|
3533
|
+
height: Annotated[int, Field(ge=1)]
|
|
3534
|
+
width: Annotated[int, Field(ge=1)]
|
|
3535
|
+
erosion_factor: Annotated[float, Field(ge=0.0, le=1.0)]
|
|
3536
|
+
|
|
3537
|
+
def __init__(
|
|
3538
|
+
self,
|
|
3539
|
+
height: int,
|
|
3540
|
+
width: int,
|
|
3541
|
+
erosion_factor: float = 0.0,
|
|
3542
|
+
p: float = 1.0,
|
|
3543
|
+
):
|
|
3544
|
+
super().__init__(p=p)
|
|
3545
|
+
self.height = height
|
|
3546
|
+
self.width = width
|
|
3547
|
+
self.erosion_factor = erosion_factor
|
|
3548
|
+
|
|
3549
|
+
def get_params_dependent_on_data(
|
|
3550
|
+
self,
|
|
3551
|
+
params: dict[str, Any],
|
|
3552
|
+
data: dict[str, Any],
|
|
3553
|
+
) -> dict[str, tuple[int, int, int, int]]:
|
|
3554
|
+
"""Get the parameters for the crop.
|
|
3555
|
+
|
|
3556
|
+
Args:
|
|
3557
|
+
params (dict[str, Any]): The parameters for the transform.
|
|
3558
|
+
data (dict[str, Any]): The data for the transform.
|
|
3559
|
+
|
|
3560
|
+
"""
|
|
3561
|
+
image_height, image_width = params["shape"][:2]
|
|
3562
|
+
bboxes = data.get("bboxes", [])
|
|
3563
|
+
|
|
3564
|
+
if self.height > image_height or self.width > image_width:
|
|
3565
|
+
raise CropSizeError(
|
|
3566
|
+
f"Crop size (height, width) exceeds image dimensions (height, width):"
|
|
3567
|
+
f" {(self.height, self.width)} vs {image_height, image_width}",
|
|
3568
|
+
)
|
|
3569
|
+
|
|
3570
|
+
if len(bboxes) > 0:
|
|
3571
|
+
bboxes = denormalize_bboxes(bboxes, shape=(image_height, image_width))
|
|
3572
|
+
|
|
3573
|
+
# Pick a bbox amongst all possible as our reference bbox.
|
|
3574
|
+
reference_bbox = self.py_random.choice(bboxes)
|
|
3575
|
+
|
|
3576
|
+
bbox_x1, bbox_y1, bbox_x2, bbox_y2 = reference_bbox[:4]
|
|
3577
|
+
|
|
3578
|
+
# Compute valid crop bounds:
|
|
3579
|
+
# erosion_factor = 0.0: crop must fully contain the bbox
|
|
3580
|
+
# erosion_factor = 1.0: crop can be anywhere that intersects the bbox
|
|
3581
|
+
if self.erosion_factor < 1.0:
|
|
3582
|
+
# Regular case: compute eroded box dimensions
|
|
3583
|
+
bbox_width = bbox_x2 - bbox_x1
|
|
3584
|
+
bbox_height = bbox_y2 - bbox_y1
|
|
3585
|
+
eroded_width = bbox_width * (1.0 - self.erosion_factor)
|
|
3586
|
+
eroded_height = bbox_height * (1.0 - self.erosion_factor)
|
|
3587
|
+
|
|
3588
|
+
min_crop_x = np.clip(
|
|
3589
|
+
a=bbox_x1 + eroded_width - self.width,
|
|
3590
|
+
a_min=0.0,
|
|
3591
|
+
a_max=image_width - self.width,
|
|
3592
|
+
)
|
|
3593
|
+
max_crop_x = np.clip(
|
|
3594
|
+
a=bbox_x2 - eroded_width,
|
|
3595
|
+
a_min=0.0,
|
|
3596
|
+
a_max=image_width - self.width,
|
|
3597
|
+
)
|
|
3598
|
+
|
|
3599
|
+
min_crop_y = np.clip(
|
|
3600
|
+
a=bbox_y1 + eroded_height - self.height,
|
|
3601
|
+
a_min=0.0,
|
|
3602
|
+
a_max=image_height - self.height,
|
|
3603
|
+
)
|
|
3604
|
+
max_crop_y = np.clip(
|
|
3605
|
+
a=bbox_y2 - eroded_height,
|
|
3606
|
+
a_min=0.0,
|
|
3607
|
+
a_max=image_height - self.height,
|
|
3608
|
+
)
|
|
3609
|
+
else:
|
|
3610
|
+
# Maximum erosion case: crop can be anywhere that intersects the bbox
|
|
3611
|
+
min_crop_x = np.clip(
|
|
3612
|
+
a=bbox_x1 - self.width, # leftmost position that still intersects
|
|
3613
|
+
a_min=0.0,
|
|
3614
|
+
a_max=image_width - self.width,
|
|
3615
|
+
)
|
|
3616
|
+
max_crop_x = np.clip(
|
|
3617
|
+
a=bbox_x2, # rightmost position that still intersects
|
|
3618
|
+
a_min=0.0,
|
|
3619
|
+
a_max=image_width - self.width,
|
|
3620
|
+
)
|
|
3621
|
+
|
|
3622
|
+
min_crop_y = np.clip(
|
|
3623
|
+
a=bbox_y1 - self.height, # topmost position that still intersects
|
|
3624
|
+
a_min=0.0,
|
|
3625
|
+
a_max=image_height - self.height,
|
|
3626
|
+
)
|
|
3627
|
+
max_crop_y = np.clip(
|
|
3628
|
+
a=bbox_y2, # bottommost position that still intersects
|
|
3629
|
+
a_min=0.0,
|
|
3630
|
+
a_max=image_height - self.height,
|
|
3631
|
+
)
|
|
3632
|
+
else:
|
|
3633
|
+
# If there are no bboxes, just crop anywhere in the image.
|
|
3634
|
+
min_crop_x = 0.0
|
|
3635
|
+
max_crop_x = image_width - self.width
|
|
3636
|
+
|
|
3637
|
+
min_crop_y = 0.0
|
|
3638
|
+
max_crop_y = image_height - self.height
|
|
3639
|
+
|
|
3640
|
+
# Randomly draw the upper-left corner of the crop.
|
|
3641
|
+
crop_x1 = int(self.py_random.uniform(a=min_crop_x, b=max_crop_x))
|
|
3642
|
+
crop_y1 = int(self.py_random.uniform(a=min_crop_y, b=max_crop_y))
|
|
3643
|
+
|
|
3644
|
+
crop_x2 = crop_x1 + self.width
|
|
3645
|
+
crop_y2 = crop_y1 + self.height
|
|
3646
|
+
|
|
3647
|
+
return {"crop_coords": (crop_x1, crop_y1, crop_x2, crop_y2)}
|