nrtk-albumentations 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nrtk-albumentations might be problematic. Click here for more details.
- albumentations/__init__.py +21 -0
- albumentations/augmentations/__init__.py +23 -0
- albumentations/augmentations/blur/__init__.py +0 -0
- albumentations/augmentations/blur/functional.py +438 -0
- albumentations/augmentations/blur/transforms.py +1633 -0
- albumentations/augmentations/crops/__init__.py +0 -0
- albumentations/augmentations/crops/functional.py +494 -0
- albumentations/augmentations/crops/transforms.py +3647 -0
- albumentations/augmentations/dropout/__init__.py +0 -0
- albumentations/augmentations/dropout/channel_dropout.py +134 -0
- albumentations/augmentations/dropout/coarse_dropout.py +567 -0
- albumentations/augmentations/dropout/functional.py +1017 -0
- albumentations/augmentations/dropout/grid_dropout.py +166 -0
- albumentations/augmentations/dropout/mask_dropout.py +274 -0
- albumentations/augmentations/dropout/transforms.py +461 -0
- albumentations/augmentations/dropout/xy_masking.py +186 -0
- albumentations/augmentations/geometric/__init__.py +0 -0
- albumentations/augmentations/geometric/distortion.py +1238 -0
- albumentations/augmentations/geometric/flip.py +752 -0
- albumentations/augmentations/geometric/functional.py +4151 -0
- albumentations/augmentations/geometric/pad.py +676 -0
- albumentations/augmentations/geometric/resize.py +956 -0
- albumentations/augmentations/geometric/rotate.py +864 -0
- albumentations/augmentations/geometric/transforms.py +1962 -0
- albumentations/augmentations/mixing/__init__.py +0 -0
- albumentations/augmentations/mixing/domain_adaptation.py +787 -0
- albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
- albumentations/augmentations/mixing/functional.py +878 -0
- albumentations/augmentations/mixing/transforms.py +832 -0
- albumentations/augmentations/other/__init__.py +0 -0
- albumentations/augmentations/other/lambda_transform.py +180 -0
- albumentations/augmentations/other/type_transform.py +261 -0
- albumentations/augmentations/pixel/__init__.py +0 -0
- albumentations/augmentations/pixel/functional.py +4226 -0
- albumentations/augmentations/pixel/transforms.py +7556 -0
- albumentations/augmentations/spectrogram/__init__.py +0 -0
- albumentations/augmentations/spectrogram/transform.py +220 -0
- albumentations/augmentations/text/__init__.py +0 -0
- albumentations/augmentations/text/functional.py +272 -0
- albumentations/augmentations/text/transforms.py +299 -0
- albumentations/augmentations/transforms3d/__init__.py +0 -0
- albumentations/augmentations/transforms3d/functional.py +393 -0
- albumentations/augmentations/transforms3d/transforms.py +1422 -0
- albumentations/augmentations/utils.py +249 -0
- albumentations/core/__init__.py +0 -0
- albumentations/core/bbox_utils.py +920 -0
- albumentations/core/composition.py +1885 -0
- albumentations/core/hub_mixin.py +299 -0
- albumentations/core/keypoints_utils.py +521 -0
- albumentations/core/label_manager.py +339 -0
- albumentations/core/pydantic.py +239 -0
- albumentations/core/serialization.py +352 -0
- albumentations/core/transforms_interface.py +976 -0
- albumentations/core/type_definitions.py +127 -0
- albumentations/core/utils.py +605 -0
- albumentations/core/validation.py +129 -0
- albumentations/pytorch/__init__.py +1 -0
- albumentations/pytorch/transforms.py +189 -0
- nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
- nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
- nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
- nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,956 @@
|
|
|
1
|
+
"""Transforms for resizing images and associated data.
|
|
2
|
+
|
|
3
|
+
This module provides transform classes for resizing operations, including uniform resizing,
|
|
4
|
+
scaling with aspect ratio preservation, and size-constrained transformations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from typing import Any, Literal, cast
|
|
11
|
+
|
|
12
|
+
import cv2
|
|
13
|
+
import numpy as np
|
|
14
|
+
from albucore import batch_transform
|
|
15
|
+
from pydantic import Field, field_validator, model_validator
|
|
16
|
+
from typing_extensions import Self
|
|
17
|
+
|
|
18
|
+
from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
|
|
19
|
+
from albumentations.core.type_definitions import ALL_TARGETS
|
|
20
|
+
from albumentations.core.utils import to_tuple
|
|
21
|
+
|
|
22
|
+
from . import functional as fgeometric
|
|
23
|
+
|
|
24
|
+
__all__ = ["LongestMaxSize", "RandomScale", "Resize", "SmallestMaxSize"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RandomScale(DualTransform):
|
|
28
|
+
"""Randomly resize the input. Output image size is different from the input image size.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
scale_limit (float or tuple[float, float]): scaling factor range. If scale_limit is a single float value, the
|
|
32
|
+
range will be (-scale_limit, scale_limit). Note that the scale_limit will be biased by 1.
|
|
33
|
+
If scale_limit is a tuple, like (low, high), sampling will be done from the range (1 + low, 1 + high).
|
|
34
|
+
Default: (-0.1, 0.1).
|
|
35
|
+
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
|
|
36
|
+
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
37
|
+
Default: cv2.INTER_LINEAR.
|
|
38
|
+
mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
|
|
39
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
40
|
+
Default: cv2.INTER_NEAREST.
|
|
41
|
+
area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
|
|
42
|
+
for downscaling. Options:
|
|
43
|
+
- None: No automatic interpolation selection, always use the specified interpolation method
|
|
44
|
+
- "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
|
|
45
|
+
- "image_mask": Use INTER_AREA when downscaling both images and masks
|
|
46
|
+
Default: None.
|
|
47
|
+
p (float): probability of applying the transform. Default: 0.5.
|
|
48
|
+
|
|
49
|
+
Targets:
|
|
50
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
51
|
+
|
|
52
|
+
Image types:
|
|
53
|
+
uint8, float32
|
|
54
|
+
|
|
55
|
+
Note:
|
|
56
|
+
- The output image size is different from the input image size.
|
|
57
|
+
- Scale factor is sampled independently per image side (width and height).
|
|
58
|
+
- Bounding box coordinates are scaled accordingly.
|
|
59
|
+
- Keypoint coordinates are scaled accordingly.
|
|
60
|
+
- When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
|
|
61
|
+
downscaling (scale < 1.0), which provides better quality for size reduction.
|
|
62
|
+
|
|
63
|
+
Mathematical formulation:
|
|
64
|
+
Let (W, H) be the original image dimensions and (W', H') be the output dimensions.
|
|
65
|
+
The scale factor s is sampled from the range [1 + scale_limit[0], 1 + scale_limit[1]].
|
|
66
|
+
Then, W' = W * s and H' = H * s.
|
|
67
|
+
|
|
68
|
+
Examples:
|
|
69
|
+
>>> import numpy as np
|
|
70
|
+
>>> import albumentations as A
|
|
71
|
+
>>> import cv2
|
|
72
|
+
>>>
|
|
73
|
+
>>> # Create sample data for demonstration
|
|
74
|
+
>>> image = np.zeros((100, 100, 3), dtype=np.uint8)
|
|
75
|
+
>>> # Add some shapes to visualize scaling effects
|
|
76
|
+
>>> cv2.rectangle(image, (25, 25), (75, 75), (255, 0, 0), -1) # Red square
|
|
77
|
+
>>> cv2.circle(image, (50, 50), 10, (0, 255, 0), -1) # Green circle
|
|
78
|
+
>>>
|
|
79
|
+
>>> # Create a mask for segmentation
|
|
80
|
+
>>> mask = np.zeros((100, 100), dtype=np.uint8)
|
|
81
|
+
>>> mask[25:75, 25:75] = 1 # Mask covering the red square
|
|
82
|
+
>>>
|
|
83
|
+
>>> # Create bounding boxes and keypoints
|
|
84
|
+
>>> bboxes = np.array([[25, 25, 75, 75]]) # Box around the red square
|
|
85
|
+
>>> bbox_labels = [1]
|
|
86
|
+
>>> keypoints = np.array([[50, 50]]) # Center of circle
|
|
87
|
+
>>> keypoint_labels = [0]
|
|
88
|
+
>>>
|
|
89
|
+
>>> # Apply RandomScale transform with comprehensive parameters
|
|
90
|
+
>>> transform = A.Compose([
|
|
91
|
+
... A.RandomScale(
|
|
92
|
+
... scale_limit=(-0.3, 0.5), # Scale between 0.7x and 1.5x
|
|
93
|
+
... interpolation=cv2.INTER_LINEAR,
|
|
94
|
+
... mask_interpolation=cv2.INTER_NEAREST,
|
|
95
|
+
... area_for_downscale="image", # Use INTER_AREA for image downscaling
|
|
96
|
+
... p=1.0 # Always apply
|
|
97
|
+
... )
|
|
98
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
99
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
100
|
+
>>>
|
|
101
|
+
>>> # Apply the transform to all targets
|
|
102
|
+
>>> result = transform(
|
|
103
|
+
... image=image,
|
|
104
|
+
... mask=mask,
|
|
105
|
+
... bboxes=bboxes,
|
|
106
|
+
... bbox_labels=bbox_labels,
|
|
107
|
+
... keypoints=keypoints,
|
|
108
|
+
... keypoint_labels=keypoint_labels
|
|
109
|
+
... )
|
|
110
|
+
>>>
|
|
111
|
+
>>> # Get the transformed results
|
|
112
|
+
>>> scaled_image = result['image'] # Dimensions will be between 70-150 pixels
|
|
113
|
+
>>> scaled_mask = result['mask'] # Mask scaled proportionally to image
|
|
114
|
+
>>> scaled_bboxes = result['bboxes'] # Bounding boxes adjusted to new dimensions
|
|
115
|
+
>>> scaled_bbox_labels = result['bbox_labels'] # Labels remain unchanged
|
|
116
|
+
>>> scaled_keypoints = result['keypoints'] # Keypoints adjusted to new dimensions
|
|
117
|
+
>>> scaled_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
|
|
118
|
+
>>>
|
|
119
|
+
>>> # The image dimensions will vary based on the randomly sampled scale factor
|
|
120
|
+
>>> # With scale_limit=(-0.3, 0.5), dimensions could be anywhere from 70% to 150% of original
|
|
121
|
+
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
_targets = ALL_TARGETS
|
|
125
|
+
|
|
126
|
+
class InitSchema(BaseTransformInitSchema):
|
|
127
|
+
scale_limit: tuple[float, float] | float
|
|
128
|
+
area_for_downscale: Literal[None, "image", "image_mask"]
|
|
129
|
+
interpolation: Literal[
|
|
130
|
+
cv2.INTER_NEAREST,
|
|
131
|
+
cv2.INTER_NEAREST_EXACT,
|
|
132
|
+
cv2.INTER_LINEAR,
|
|
133
|
+
cv2.INTER_CUBIC,
|
|
134
|
+
cv2.INTER_AREA,
|
|
135
|
+
cv2.INTER_LANCZOS4,
|
|
136
|
+
cv2.INTER_LINEAR_EXACT,
|
|
137
|
+
]
|
|
138
|
+
mask_interpolation: Literal[
|
|
139
|
+
cv2.INTER_NEAREST,
|
|
140
|
+
cv2.INTER_NEAREST_EXACT,
|
|
141
|
+
cv2.INTER_LINEAR,
|
|
142
|
+
cv2.INTER_CUBIC,
|
|
143
|
+
cv2.INTER_AREA,
|
|
144
|
+
cv2.INTER_LANCZOS4,
|
|
145
|
+
cv2.INTER_LINEAR_EXACT,
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
@field_validator("scale_limit")
|
|
149
|
+
@classmethod
|
|
150
|
+
def _check_scale_limit(cls, v: tuple[float, float] | float) -> tuple[float, float]:
|
|
151
|
+
return to_tuple(v)
|
|
152
|
+
|
|
153
|
+
def __init__(
|
|
154
|
+
self,
|
|
155
|
+
scale_limit: tuple[float, float] | float = (-0.1, 0.1),
|
|
156
|
+
interpolation: Literal[
|
|
157
|
+
cv2.INTER_NEAREST,
|
|
158
|
+
cv2.INTER_NEAREST_EXACT,
|
|
159
|
+
cv2.INTER_LINEAR,
|
|
160
|
+
cv2.INTER_CUBIC,
|
|
161
|
+
cv2.INTER_AREA,
|
|
162
|
+
cv2.INTER_LANCZOS4,
|
|
163
|
+
cv2.INTER_LINEAR_EXACT,
|
|
164
|
+
] = cv2.INTER_LINEAR,
|
|
165
|
+
mask_interpolation: Literal[
|
|
166
|
+
cv2.INTER_NEAREST,
|
|
167
|
+
cv2.INTER_NEAREST_EXACT,
|
|
168
|
+
cv2.INTER_LINEAR,
|
|
169
|
+
cv2.INTER_CUBIC,
|
|
170
|
+
cv2.INTER_AREA,
|
|
171
|
+
cv2.INTER_LANCZOS4,
|
|
172
|
+
cv2.INTER_LINEAR_EXACT,
|
|
173
|
+
] = cv2.INTER_NEAREST,
|
|
174
|
+
area_for_downscale: Literal[None, "image", "image_mask"] = None,
|
|
175
|
+
p: float = 0.5,
|
|
176
|
+
):
|
|
177
|
+
super().__init__(p=p)
|
|
178
|
+
self.scale_limit = cast("tuple[float, float]", scale_limit)
|
|
179
|
+
self.interpolation = interpolation
|
|
180
|
+
self.mask_interpolation = mask_interpolation
|
|
181
|
+
self.area_for_downscale = area_for_downscale
|
|
182
|
+
|
|
183
|
+
def get_params(self) -> dict[str, float]:
|
|
184
|
+
"""Get parameters for the transform.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
dict[str, float]: Dictionary with parameters.
|
|
188
|
+
|
|
189
|
+
"""
|
|
190
|
+
return {"scale": self.py_random.uniform(*self.scale_limit) + 1.0}
|
|
191
|
+
|
|
192
|
+
def apply(
|
|
193
|
+
self,
|
|
194
|
+
img: np.ndarray,
|
|
195
|
+
scale: float,
|
|
196
|
+
**params: Any,
|
|
197
|
+
) -> np.ndarray:
|
|
198
|
+
"""Apply scaling to the image.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
img (np.ndarray): Image to scale.
|
|
202
|
+
scale (float): Scaling factor.
|
|
203
|
+
**params (Any): Additional parameters.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
np.ndarray: Scaled image.
|
|
207
|
+
|
|
208
|
+
"""
|
|
209
|
+
interpolation = self.interpolation
|
|
210
|
+
if self.area_for_downscale in ["image", "image_mask"] and scale < 1.0:
|
|
211
|
+
interpolation = cv2.INTER_AREA
|
|
212
|
+
|
|
213
|
+
return fgeometric.scale(img, scale, interpolation)
|
|
214
|
+
|
|
215
|
+
def apply_to_mask(
|
|
216
|
+
self,
|
|
217
|
+
mask: np.ndarray,
|
|
218
|
+
scale: float,
|
|
219
|
+
**params: Any,
|
|
220
|
+
) -> np.ndarray:
|
|
221
|
+
"""Apply scaling to the mask.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
mask (np.ndarray): Mask to scale.
|
|
225
|
+
scale (float): Scaling factor.
|
|
226
|
+
**params (Any): Additional parameters.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
np.ndarray: Scaled mask.
|
|
230
|
+
|
|
231
|
+
"""
|
|
232
|
+
interpolation = self.mask_interpolation
|
|
233
|
+
if self.area_for_downscale == "image_mask" and scale < 1.0:
|
|
234
|
+
interpolation = cv2.INTER_AREA
|
|
235
|
+
|
|
236
|
+
return fgeometric.scale(mask, scale, interpolation)
|
|
237
|
+
|
|
238
|
+
def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
|
|
239
|
+
"""Apply the transform to bounding boxes.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
bboxes (np.ndarray): Bounding boxes to transform.
|
|
243
|
+
**params (Any): Additional parameters.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
np.ndarray: Transformed bounding boxes which are scale invariant.
|
|
247
|
+
|
|
248
|
+
"""
|
|
249
|
+
# Bounding box coordinates are scale invariant
|
|
250
|
+
return bboxes
|
|
251
|
+
|
|
252
|
+
def apply_to_keypoints(
|
|
253
|
+
self,
|
|
254
|
+
keypoints: np.ndarray,
|
|
255
|
+
scale: float,
|
|
256
|
+
**params: Any,
|
|
257
|
+
) -> np.ndarray:
|
|
258
|
+
"""Apply scaling to keypoints.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
keypoints (np.ndarray): Keypoints to scale.
|
|
262
|
+
scale (float): Scaling factor.
|
|
263
|
+
**params (Any): Additional parameters.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
np.ndarray: Scaled keypoints.
|
|
267
|
+
|
|
268
|
+
"""
|
|
269
|
+
return fgeometric.keypoints_scale(keypoints, scale, scale)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
class MaxSizeTransform(DualTransform):
|
|
273
|
+
"""Base class for transforms that resize based on maximum size constraints.
|
|
274
|
+
|
|
275
|
+
This class provides common functionality for derived transforms like LongestMaxSize and
|
|
276
|
+
SmallestMaxSize that resize images based on size constraints while preserving aspect ratio.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
max_size (int, Sequence[int], optional): Maximum size constraint. The specific interpretation
|
|
280
|
+
depends on the derived class. Default: None.
|
|
281
|
+
max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints.
|
|
282
|
+
Either max_size or max_size_hw must be specified, but not both. Default: None.
|
|
283
|
+
interpolation (OpenCV flag): Flag for the interpolation algorithm. Should be one of:
|
|
284
|
+
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
285
|
+
Default: cv2.INTER_LINEAR.
|
|
286
|
+
mask_interpolation (OpenCV flag): Flag for the mask interpolation algorithm.
|
|
287
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
288
|
+
Default: cv2.INTER_NEAREST.
|
|
289
|
+
area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
|
|
290
|
+
for downscaling. Options:
|
|
291
|
+
- None: No automatic interpolation selection, always use the specified interpolation method
|
|
292
|
+
- "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
|
|
293
|
+
- "image_mask": Use INTER_AREA when downscaling both images and masks
|
|
294
|
+
Default: None.
|
|
295
|
+
p (float): Probability of applying the transform. Default: 1.
|
|
296
|
+
|
|
297
|
+
Targets:
|
|
298
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
299
|
+
|
|
300
|
+
Image types:
|
|
301
|
+
uint8, float32
|
|
302
|
+
|
|
303
|
+
Note:
|
|
304
|
+
- This is a base class that should be extended by concrete resize transforms.
|
|
305
|
+
- The scaling calculation is implemented in derived classes.
|
|
306
|
+
- Aspect ratio is preserved by applying the same scale factor to both dimensions.
|
|
307
|
+
- When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
|
|
308
|
+
downscaling (scale < 1.0), which provides better quality for size reduction.
|
|
309
|
+
|
|
310
|
+
Examples:
|
|
311
|
+
>>> import numpy as np
|
|
312
|
+
>>> import albumentations as A
|
|
313
|
+
>>> import cv2
|
|
314
|
+
>>>
|
|
315
|
+
>>> # Example of creating a custom transform that extends MaxSizeTransform
|
|
316
|
+
>>> class CustomMaxSize(A.MaxSizeTransform):
|
|
317
|
+
... def get_params_dependent_on_data(self, params, data):
|
|
318
|
+
... img_h, img_w = params["shape"][:2]
|
|
319
|
+
... # Calculate scale factor - here we scale to make the image area constant
|
|
320
|
+
... target_area = 300 * 300 # Target area of 300x300
|
|
321
|
+
... current_area = img_h * img_w
|
|
322
|
+
... scale = np.sqrt(target_area / current_area)
|
|
323
|
+
... return {"scale": scale}
|
|
324
|
+
>>>
|
|
325
|
+
>>> # Prepare sample data
|
|
326
|
+
>>> image = np.zeros((100, 200, 3), dtype=np.uint8)
|
|
327
|
+
>>> # Add a rectangle to visualize the effect
|
|
328
|
+
>>> cv2.rectangle(image, (50, 20), (150, 80), (255, 0, 0), -1)
|
|
329
|
+
>>>
|
|
330
|
+
>>> # Create a mask
|
|
331
|
+
>>> mask = np.zeros((100, 200), dtype=np.uint8)
|
|
332
|
+
>>> mask[20:80, 50:150] = 1
|
|
333
|
+
>>>
|
|
334
|
+
>>> # Create bounding boxes and keypoints
|
|
335
|
+
>>> bboxes = np.array([[50, 20, 150, 80]])
|
|
336
|
+
>>> bbox_labels = [1]
|
|
337
|
+
>>> keypoints = np.array([[100, 50]])
|
|
338
|
+
>>> keypoint_labels = [0]
|
|
339
|
+
>>>
|
|
340
|
+
>>> # Apply the custom transform
|
|
341
|
+
>>> transform = A.Compose([
|
|
342
|
+
... CustomMaxSize(
|
|
343
|
+
... max_size=None,
|
|
344
|
+
... max_size_hw=(None, None), # Not used in our custom implementation
|
|
345
|
+
... interpolation=cv2.INTER_LINEAR,
|
|
346
|
+
... mask_interpolation=cv2.INTER_NEAREST,
|
|
347
|
+
... area_for_downscale="image", # Use INTER_AREA when downscaling images
|
|
348
|
+
... p=1.0
|
|
349
|
+
... )
|
|
350
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
351
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
352
|
+
>>>
|
|
353
|
+
>>> # Apply the transform
|
|
354
|
+
>>> result = transform(
|
|
355
|
+
... image=image,
|
|
356
|
+
... mask=mask,
|
|
357
|
+
... bboxes=bboxes,
|
|
358
|
+
... bbox_labels=bbox_labels,
|
|
359
|
+
... keypoints=keypoints,
|
|
360
|
+
... keypoint_labels=keypoint_labels
|
|
361
|
+
... )
|
|
362
|
+
>>>
|
|
363
|
+
>>> # Get results
|
|
364
|
+
>>> transformed_image = result['image'] # Shape will be approximately (122, 245, 3)
|
|
365
|
+
>>> transformed_mask = result['mask'] # Shape will be approximately (122, 245)
|
|
366
|
+
>>> transformed_bboxes = result['bboxes'] # Bounding boxes are scale invariant
|
|
367
|
+
>>> transformed_keypoints = result['keypoints'] # Keypoints scaled proportionally
|
|
368
|
+
>>> transformed_bbox_labels = result['bbox_labels'] # Labels remain unchanged
|
|
369
|
+
>>> transformed_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
|
|
370
|
+
|
|
371
|
+
"""
|
|
372
|
+
|
|
373
|
+
_targets = ALL_TARGETS
|
|
374
|
+
|
|
375
|
+
class InitSchema(BaseTransformInitSchema):
|
|
376
|
+
max_size: int | list[int] | None
|
|
377
|
+
max_size_hw: tuple[int | None, int | None] | None
|
|
378
|
+
area_for_downscale: Literal[None, "image", "image_mask"]
|
|
379
|
+
interpolation: Literal[
|
|
380
|
+
cv2.INTER_NEAREST,
|
|
381
|
+
cv2.INTER_NEAREST_EXACT,
|
|
382
|
+
cv2.INTER_LINEAR,
|
|
383
|
+
cv2.INTER_CUBIC,
|
|
384
|
+
cv2.INTER_AREA,
|
|
385
|
+
cv2.INTER_LANCZOS4,
|
|
386
|
+
cv2.INTER_LINEAR_EXACT,
|
|
387
|
+
]
|
|
388
|
+
mask_interpolation: Literal[
|
|
389
|
+
cv2.INTER_NEAREST,
|
|
390
|
+
cv2.INTER_NEAREST_EXACT,
|
|
391
|
+
cv2.INTER_LINEAR,
|
|
392
|
+
cv2.INTER_CUBIC,
|
|
393
|
+
cv2.INTER_AREA,
|
|
394
|
+
cv2.INTER_LANCZOS4,
|
|
395
|
+
cv2.INTER_LINEAR_EXACT,
|
|
396
|
+
]
|
|
397
|
+
|
|
398
|
+
@model_validator(mode="after")
|
|
399
|
+
def validate_size_parameters(self) -> Self:
|
|
400
|
+
if self.max_size is None and self.max_size_hw is None:
|
|
401
|
+
raise ValueError("Either max_size or max_size_hw must be specified")
|
|
402
|
+
if self.max_size is not None and self.max_size_hw is not None:
|
|
403
|
+
raise ValueError("Only one of max_size or max_size_hw should be specified")
|
|
404
|
+
return self
|
|
405
|
+
|
|
406
|
+
def __init__(
|
|
407
|
+
self,
|
|
408
|
+
max_size: int | Sequence[int] | None = None,
|
|
409
|
+
max_size_hw: tuple[int | None, int | None] | None = None,
|
|
410
|
+
interpolation: Literal[
|
|
411
|
+
cv2.INTER_NEAREST,
|
|
412
|
+
cv2.INTER_NEAREST_EXACT,
|
|
413
|
+
cv2.INTER_LINEAR,
|
|
414
|
+
cv2.INTER_CUBIC,
|
|
415
|
+
cv2.INTER_AREA,
|
|
416
|
+
cv2.INTER_LANCZOS4,
|
|
417
|
+
cv2.INTER_LINEAR_EXACT,
|
|
418
|
+
] = cv2.INTER_LINEAR,
|
|
419
|
+
mask_interpolation: Literal[
|
|
420
|
+
cv2.INTER_NEAREST,
|
|
421
|
+
cv2.INTER_NEAREST_EXACT,
|
|
422
|
+
cv2.INTER_LINEAR,
|
|
423
|
+
cv2.INTER_CUBIC,
|
|
424
|
+
cv2.INTER_AREA,
|
|
425
|
+
cv2.INTER_LANCZOS4,
|
|
426
|
+
cv2.INTER_LINEAR_EXACT,
|
|
427
|
+
] = cv2.INTER_NEAREST,
|
|
428
|
+
area_for_downscale: Literal[None, "image", "image_mask"] = None,
|
|
429
|
+
p: float = 1,
|
|
430
|
+
):
|
|
431
|
+
super().__init__(p=p)
|
|
432
|
+
self.max_size = max_size
|
|
433
|
+
self.max_size_hw = max_size_hw
|
|
434
|
+
self.interpolation = interpolation
|
|
435
|
+
self.mask_interpolation = mask_interpolation
|
|
436
|
+
self.area_for_downscale = area_for_downscale
|
|
437
|
+
|
|
438
|
+
def apply(
|
|
439
|
+
self,
|
|
440
|
+
img: np.ndarray,
|
|
441
|
+
scale: float,
|
|
442
|
+
**params: Any,
|
|
443
|
+
) -> np.ndarray:
|
|
444
|
+
height, width = img.shape[:2]
|
|
445
|
+
new_height, new_width = max(1, round(height * scale)), max(1, round(width * scale))
|
|
446
|
+
|
|
447
|
+
interpolation = self.interpolation
|
|
448
|
+
if self.area_for_downscale in ["image", "image_mask"] and scale < 1.0:
|
|
449
|
+
interpolation = cv2.INTER_AREA
|
|
450
|
+
|
|
451
|
+
return fgeometric.resize(img, (new_height, new_width), interpolation=interpolation)
|
|
452
|
+
|
|
453
|
+
def apply_to_mask(
|
|
454
|
+
self,
|
|
455
|
+
mask: np.ndarray,
|
|
456
|
+
scale: float,
|
|
457
|
+
**params: Any,
|
|
458
|
+
) -> np.ndarray:
|
|
459
|
+
height, width = mask.shape[:2]
|
|
460
|
+
new_height, new_width = max(1, round(height * scale)), max(1, round(width * scale))
|
|
461
|
+
|
|
462
|
+
interpolation = self.mask_interpolation
|
|
463
|
+
if self.area_for_downscale == "image_mask" and scale < 1.0:
|
|
464
|
+
interpolation = cv2.INTER_AREA
|
|
465
|
+
|
|
466
|
+
return fgeometric.resize(mask, (new_height, new_width), interpolation=interpolation)
|
|
467
|
+
|
|
468
|
+
def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
|
|
469
|
+
# Bounding box coordinates are scale invariant
|
|
470
|
+
return bboxes
|
|
471
|
+
|
|
472
|
+
def apply_to_keypoints(
|
|
473
|
+
self,
|
|
474
|
+
keypoints: np.ndarray,
|
|
475
|
+
scale: float,
|
|
476
|
+
**params: Any,
|
|
477
|
+
) -> np.ndarray:
|
|
478
|
+
return fgeometric.keypoints_scale(keypoints, scale, scale)
|
|
479
|
+
|
|
480
|
+
@batch_transform("spatial", has_batch_dim=True, has_depth_dim=False)
|
|
481
|
+
def apply_to_images(self, images: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
|
|
482
|
+
return self.apply(images, *args, **params)
|
|
483
|
+
|
|
484
|
+
@batch_transform("spatial", has_batch_dim=False, has_depth_dim=True)
|
|
485
|
+
def apply_to_volume(self, volume: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
|
|
486
|
+
return self.apply(volume, *args, **params)
|
|
487
|
+
|
|
488
|
+
@batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
|
|
489
|
+
def apply_to_volumes(self, volumes: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
|
|
490
|
+
return self.apply(volumes, *args, **params)
|
|
491
|
+
|
|
492
|
+
@batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
|
|
493
|
+
def apply_to_mask3d(self, mask3d: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
|
|
494
|
+
return self.apply_to_mask(mask3d, *args, **params)
|
|
495
|
+
|
|
496
|
+
@batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
|
|
497
|
+
def apply_to_masks3d(self, masks3d: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
|
|
498
|
+
return self.apply_to_mask(masks3d, *args, **params)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
class LongestMaxSize(MaxSizeTransform):
|
|
502
|
+
"""Rescale an image so that the longest side is equal to max_size or sides meet max_size_hw constraints,
|
|
503
|
+
keeping the aspect ratio.
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
max_size (int, Sequence[int], optional): Maximum size of the longest side after the transformation.
|
|
507
|
+
When using a list or tuple, the max size will be randomly selected from the values provided. Default: None.
|
|
508
|
+
max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints. Supports:
|
|
509
|
+
- (height, width): Both dimensions must fit within these bounds
|
|
510
|
+
- (height, None): Only height is constrained, width scales proportionally
|
|
511
|
+
- (None, width): Only width is constrained, height scales proportionally
|
|
512
|
+
If specified, max_size must be None. Default: None.
|
|
513
|
+
interpolation (OpenCV flag): interpolation method. Default: cv2.INTER_LINEAR.
|
|
514
|
+
mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
|
|
515
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
516
|
+
Default: cv2.INTER_NEAREST.
|
|
517
|
+
area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
|
|
518
|
+
for downscaling. Options:
|
|
519
|
+
- None: No automatic interpolation selection, always use the specified interpolation method
|
|
520
|
+
- "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
|
|
521
|
+
- "image_mask": Use INTER_AREA when downscaling both images and masks
|
|
522
|
+
Default: None.
|
|
523
|
+
p (float): probability of applying the transform. Default: 1.
|
|
524
|
+
|
|
525
|
+
Targets:
|
|
526
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
527
|
+
|
|
528
|
+
Image types:
|
|
529
|
+
uint8, float32
|
|
530
|
+
|
|
531
|
+
Note:
|
|
532
|
+
- If the longest side of the image is already equal to max_size, the image will not be resized.
|
|
533
|
+
- This transform will not crop the image. The resulting image may be smaller than specified in both dimensions.
|
|
534
|
+
- For non-square images, both sides will be scaled proportionally to maintain the aspect ratio.
|
|
535
|
+
- Bounding boxes and keypoints are scaled accordingly.
|
|
536
|
+
- When area_for_downscale is set, INTER_AREA will be used for downscaling, providing better quality.
|
|
537
|
+
|
|
538
|
+
Mathematical Details:
|
|
539
|
+
Let (W, H) be the original width and height of the image.
|
|
540
|
+
|
|
541
|
+
When using max_size:
|
|
542
|
+
1. The scaling factor s is calculated as:
|
|
543
|
+
s = max_size / max(W, H)
|
|
544
|
+
2. The new dimensions (W', H') are:
|
|
545
|
+
W' = W * s
|
|
546
|
+
H' = H * s
|
|
547
|
+
|
|
548
|
+
When using max_size_hw=(H_target, W_target):
|
|
549
|
+
1. For both dimensions specified:
|
|
550
|
+
s = min(H_target/H, W_target/W)
|
|
551
|
+
This ensures both dimensions fit within the specified bounds.
|
|
552
|
+
|
|
553
|
+
2. For height only (W_target=None):
|
|
554
|
+
s = H_target/H
|
|
555
|
+
Width will scale proportionally.
|
|
556
|
+
|
|
557
|
+
3. For width only (H_target=None):
|
|
558
|
+
s = W_target/W
|
|
559
|
+
Height will scale proportionally.
|
|
560
|
+
|
|
561
|
+
4. The new dimensions (W', H') are:
|
|
562
|
+
W' = W * s
|
|
563
|
+
H' = H * s
|
|
564
|
+
|
|
565
|
+
Examples:
|
|
566
|
+
>>> import albumentations as A
|
|
567
|
+
>>> import cv2
|
|
568
|
+
>>> # Using max_size
|
|
569
|
+
>>> transform1 = A.LongestMaxSize(max_size=1024, area_for_downscale="image")
|
|
570
|
+
>>> # Input image (1500, 800) -> Output (1024, 546)
|
|
571
|
+
>>>
|
|
572
|
+
>>> # Using max_size_hw with both dimensions
|
|
573
|
+
>>> transform2 = A.LongestMaxSize(max_size_hw=(800, 1024), area_for_downscale="image_mask")
|
|
574
|
+
>>> # Input (1500, 800) -> Output (800, 427)
|
|
575
|
+
>>> # Input (800, 1500) -> Output (546, 1024)
|
|
576
|
+
>>>
|
|
577
|
+
>>> # Using max_size_hw with only height
|
|
578
|
+
>>> transform3 = A.LongestMaxSize(max_size_hw=(800, None))
|
|
579
|
+
>>> # Input (1500, 800) -> Output (800, 427)
|
|
580
|
+
>>>
|
|
581
|
+
>>> # Common use case with padding
|
|
582
|
+
>>> transform4 = A.Compose([
|
|
583
|
+
... A.LongestMaxSize(max_size=1024, area_for_downscale="image"),
|
|
584
|
+
... A.PadIfNeeded(min_height=1024, min_width=1024),
|
|
585
|
+
... ])
|
|
586
|
+
|
|
587
|
+
"""
|
|
588
|
+
|
|
589
|
+
def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
|
|
590
|
+
"""Calculate parameters that depend on the input data.
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
params (dict[str, Any]): Parameters dictionary.
|
|
594
|
+
data (dict[str, Any]): Dictionary containing input data.
|
|
595
|
+
|
|
596
|
+
Returns:
|
|
597
|
+
dict[str, Any]: Dictionary with parameters calculated based on input data.
|
|
598
|
+
|
|
599
|
+
"""
|
|
600
|
+
img_h, img_w = params["shape"][:2]
|
|
601
|
+
|
|
602
|
+
if self.max_size is not None:
|
|
603
|
+
if isinstance(self.max_size, (list, tuple)):
|
|
604
|
+
max_size = self.py_random.choice(self.max_size)
|
|
605
|
+
else:
|
|
606
|
+
max_size = self.max_size
|
|
607
|
+
scale = max_size / max(img_h, img_w)
|
|
608
|
+
elif self.max_size_hw is not None:
|
|
609
|
+
# We know max_size_hw is not None here due to model validator
|
|
610
|
+
max_h, max_w = self.max_size_hw
|
|
611
|
+
if max_h is not None and max_w is not None:
|
|
612
|
+
# Scale based on longest side to maintain aspect ratio
|
|
613
|
+
h_scale = max_h / img_h
|
|
614
|
+
w_scale = max_w / img_w
|
|
615
|
+
scale = min(h_scale, w_scale)
|
|
616
|
+
elif max_h is not None:
|
|
617
|
+
# Only height specified
|
|
618
|
+
scale = max_h / img_h
|
|
619
|
+
else:
|
|
620
|
+
# Only width specified
|
|
621
|
+
scale = max_w / img_w
|
|
622
|
+
|
|
623
|
+
return {"scale": scale}
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
class SmallestMaxSize(MaxSizeTransform):
|
|
627
|
+
"""Rescale an image so that minimum side is equal to max_size or sides meet max_size_hw constraints,
|
|
628
|
+
keeping the aspect ratio.
|
|
629
|
+
|
|
630
|
+
Args:
|
|
631
|
+
max_size (int, list of int, optional): Maximum size of smallest side of the image after the transformation.
|
|
632
|
+
When using a list, max size will be randomly selected from the values in the list. Default: None.
|
|
633
|
+
max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints. Supports:
|
|
634
|
+
- (height, width): Both dimensions must be at least these values
|
|
635
|
+
- (height, None): Only height is constrained, width scales proportionally
|
|
636
|
+
- (None, width): Only width is constrained, height scales proportionally
|
|
637
|
+
If specified, max_size must be None. Default: None.
|
|
638
|
+
interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
|
|
639
|
+
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
640
|
+
Default: cv2.INTER_LINEAR.
|
|
641
|
+
mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
|
|
642
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
643
|
+
Default: cv2.INTER_NEAREST.
|
|
644
|
+
area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
|
|
645
|
+
for downscaling. Options:
|
|
646
|
+
- None: No automatic interpolation selection, always use the specified interpolation method
|
|
647
|
+
- "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
|
|
648
|
+
- "image_mask": Use INTER_AREA when downscaling both images and masks
|
|
649
|
+
Default: None.
|
|
650
|
+
p (float): Probability of applying the transform. Default: 1.
|
|
651
|
+
|
|
652
|
+
Targets:
|
|
653
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
654
|
+
|
|
655
|
+
Image types:
|
|
656
|
+
uint8, float32
|
|
657
|
+
|
|
658
|
+
Note:
|
|
659
|
+
- If the smallest side of the image is already equal to max_size, the image will not be resized.
|
|
660
|
+
- This transform will not crop the image. The resulting image may be larger than specified in both dimensions.
|
|
661
|
+
- For non-square images, both sides will be scaled proportionally to maintain the aspect ratio.
|
|
662
|
+
- Bounding boxes and keypoints are scaled accordingly.
|
|
663
|
+
- When area_for_downscale is set, INTER_AREA will be used for downscaling, providing better quality.
|
|
664
|
+
|
|
665
|
+
Mathematical Details:
|
|
666
|
+
Let (W, H) be the original width and height of the image.
|
|
667
|
+
|
|
668
|
+
When using max_size:
|
|
669
|
+
1. The scaling factor s is calculated as:
|
|
670
|
+
s = max_size / min(W, H)
|
|
671
|
+
2. The new dimensions (W', H') are:
|
|
672
|
+
W' = W * s
|
|
673
|
+
H' = H * s
|
|
674
|
+
|
|
675
|
+
When using max_size_hw=(H_target, W_target):
|
|
676
|
+
1. For both dimensions specified:
|
|
677
|
+
s = max(H_target/H, W_target/W)
|
|
678
|
+
This ensures both dimensions are at least as large as specified.
|
|
679
|
+
|
|
680
|
+
2. For height only (W_target=None):
|
|
681
|
+
s = H_target/H
|
|
682
|
+
Width will scale proportionally.
|
|
683
|
+
|
|
684
|
+
3. For width only (H_target=None):
|
|
685
|
+
s = W_target/W
|
|
686
|
+
Height will scale proportionally.
|
|
687
|
+
|
|
688
|
+
4. The new dimensions (W', H') are:
|
|
689
|
+
W' = W * s
|
|
690
|
+
H' = H * s
|
|
691
|
+
|
|
692
|
+
Examples:
|
|
693
|
+
>>> import numpy as np
|
|
694
|
+
>>> import albumentations as A
|
|
695
|
+
>>> # Using max_size
|
|
696
|
+
>>> transform1 = A.SmallestMaxSize(max_size=120, area_for_downscale="image")
|
|
697
|
+
>>> # Input image (100, 150) -> Output (120, 180)
|
|
698
|
+
>>>
|
|
699
|
+
>>> # Using max_size_hw with both dimensions
|
|
700
|
+
>>> transform2 = A.SmallestMaxSize(max_size_hw=(100, 200), area_for_downscale="image_mask")
|
|
701
|
+
>>> # Input (80, 160) -> Output (100, 200)
|
|
702
|
+
>>> # Input (160, 80) -> Output (400, 200)
|
|
703
|
+
>>>
|
|
704
|
+
>>> # Using max_size_hw with only height
|
|
705
|
+
>>> transform3 = A.SmallestMaxSize(max_size_hw=(100, None))
|
|
706
|
+
>>> # Input (80, 160) -> Output (100, 200)
|
|
707
|
+
|
|
708
|
+
"""
|
|
709
|
+
|
|
710
|
+
def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
|
|
711
|
+
"""Calculate parameters that depend on the input data.
|
|
712
|
+
|
|
713
|
+
Args:
|
|
714
|
+
params (dict[str, Any]): Parameters dictionary.
|
|
715
|
+
data (dict[str, Any]): Dictionary containing input data.
|
|
716
|
+
|
|
717
|
+
Returns:
|
|
718
|
+
dict[str, Any]: Dictionary with parameters calculated based on input data.
|
|
719
|
+
|
|
720
|
+
"""
|
|
721
|
+
img_h, img_w = params["shape"][:2]
|
|
722
|
+
|
|
723
|
+
if self.max_size is not None:
|
|
724
|
+
if isinstance(self.max_size, (list, tuple)):
|
|
725
|
+
max_size = self.py_random.choice(self.max_size)
|
|
726
|
+
else:
|
|
727
|
+
max_size = self.max_size
|
|
728
|
+
scale = max_size / min(img_h, img_w)
|
|
729
|
+
elif self.max_size_hw is not None:
|
|
730
|
+
max_h, max_w = self.max_size_hw
|
|
731
|
+
if max_h is not None and max_w is not None:
|
|
732
|
+
# Scale based on smallest side to maintain aspect ratio
|
|
733
|
+
h_scale = max_h / img_h
|
|
734
|
+
w_scale = max_w / img_w
|
|
735
|
+
scale = max(h_scale, w_scale)
|
|
736
|
+
elif max_h is not None:
|
|
737
|
+
# Only height specified
|
|
738
|
+
scale = max_h / img_h
|
|
739
|
+
else:
|
|
740
|
+
# Only width specified
|
|
741
|
+
scale = max_w / img_w
|
|
742
|
+
|
|
743
|
+
return {"scale": scale}
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
class Resize(DualTransform):
|
|
747
|
+
"""Resize the input to the given height and width.
|
|
748
|
+
|
|
749
|
+
Args:
|
|
750
|
+
height (int): desired height of the output.
|
|
751
|
+
width (int): desired width of the output.
|
|
752
|
+
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
|
|
753
|
+
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
754
|
+
Default: cv2.INTER_LINEAR.
|
|
755
|
+
mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
|
|
756
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
757
|
+
Default: cv2.INTER_NEAREST.
|
|
758
|
+
area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
|
|
759
|
+
for downscaling. Options:
|
|
760
|
+
- None: No automatic interpolation selection, always use the specified interpolation method
|
|
761
|
+
- "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
|
|
762
|
+
- "image_mask": Use INTER_AREA when downscaling both images and masks
|
|
763
|
+
Default: None.
|
|
764
|
+
p (float): probability of applying the transform. Default: 1.
|
|
765
|
+
|
|
766
|
+
Targets:
|
|
767
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
768
|
+
|
|
769
|
+
Image types:
|
|
770
|
+
uint8, float32
|
|
771
|
+
|
|
772
|
+
Examples:
|
|
773
|
+
>>> import numpy as np
|
|
774
|
+
>>> import albumentations as A
|
|
775
|
+
>>> import cv2
|
|
776
|
+
>>>
|
|
777
|
+
>>> # Create sample data for demonstration
|
|
778
|
+
>>> image = np.zeros((100, 100, 3), dtype=np.uint8)
|
|
779
|
+
>>> # Add some shapes to visualize resize effects
|
|
780
|
+
>>> cv2.rectangle(image, (25, 25), (75, 75), (255, 0, 0), -1) # Red square
|
|
781
|
+
>>> cv2.circle(image, (50, 50), 10, (0, 255, 0), -1) # Green circle
|
|
782
|
+
>>>
|
|
783
|
+
>>> # Create a mask for segmentation
|
|
784
|
+
>>> mask = np.zeros((100, 100), dtype=np.uint8)
|
|
785
|
+
>>> mask[25:75, 25:75] = 1 # Mask covering the red square
|
|
786
|
+
>>>
|
|
787
|
+
>>> # Create bounding boxes and keypoints
|
|
788
|
+
>>> bboxes = np.array([[25, 25, 75, 75]]) # Box around the red square
|
|
789
|
+
>>> bbox_labels = [1]
|
|
790
|
+
>>> keypoints = np.array([[50, 50]]) # Center of circle
|
|
791
|
+
>>> keypoint_labels = [0]
|
|
792
|
+
>>>
|
|
793
|
+
>>> # Resize all data to 224x224 (common input size for many CNNs)
|
|
794
|
+
>>> transform = A.Compose([
|
|
795
|
+
... A.Resize(
|
|
796
|
+
... height=224,
|
|
797
|
+
... width=224,
|
|
798
|
+
... interpolation=cv2.INTER_LINEAR,
|
|
799
|
+
... mask_interpolation=cv2.INTER_NEAREST,
|
|
800
|
+
... area_for_downscale="image", # Use INTER_AREA when downscaling images
|
|
801
|
+
... p=1.0
|
|
802
|
+
... )
|
|
803
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
804
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
805
|
+
>>>
|
|
806
|
+
>>> # Apply the transform to all targets
|
|
807
|
+
>>> result = transform(
|
|
808
|
+
... image=image,
|
|
809
|
+
... mask=mask,
|
|
810
|
+
... bboxes=bboxes,
|
|
811
|
+
... bbox_labels=bbox_labels,
|
|
812
|
+
... keypoints=keypoints,
|
|
813
|
+
... keypoint_labels=keypoint_labels
|
|
814
|
+
... )
|
|
815
|
+
>>>
|
|
816
|
+
>>> # Get the transformed results
|
|
817
|
+
>>> resized_image = result['image'] # Shape will be (224, 224, 3)
|
|
818
|
+
>>> resized_mask = result['mask'] # Shape will be (224, 224)
|
|
819
|
+
>>> resized_bboxes = result['bboxes'] # Bounding boxes scaled to new dimensions
|
|
820
|
+
>>> resized_bbox_labels = result['bbox_labels'] # Labels remain unchanged
|
|
821
|
+
>>> resized_keypoints = result['keypoints'] # Keypoints scaled to new dimensions
|
|
822
|
+
>>> resized_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
|
|
823
|
+
>>>
|
|
824
|
+
>>> # Note: When resizing from 100x100 to 224x224:
|
|
825
|
+
>>> # - The red square will be scaled from (25-75) to approximately (56-168)
|
|
826
|
+
>>> # - The keypoint at (50, 50) will move to approximately (112, 112)
|
|
827
|
+
>>> # - All spatial relationships are preserved but coordinates are scaled
|
|
828
|
+
|
|
829
|
+
"""
|
|
830
|
+
|
|
831
|
+
_targets = ALL_TARGETS
|
|
832
|
+
|
|
833
|
+
class InitSchema(BaseTransformInitSchema):
|
|
834
|
+
height: int = Field(ge=1)
|
|
835
|
+
width: int = Field(ge=1)
|
|
836
|
+
area_for_downscale: Literal[None, "image", "image_mask"]
|
|
837
|
+
interpolation: Literal[
|
|
838
|
+
cv2.INTER_NEAREST,
|
|
839
|
+
cv2.INTER_NEAREST_EXACT,
|
|
840
|
+
cv2.INTER_LINEAR,
|
|
841
|
+
cv2.INTER_CUBIC,
|
|
842
|
+
cv2.INTER_AREA,
|
|
843
|
+
cv2.INTER_LANCZOS4,
|
|
844
|
+
cv2.INTER_LINEAR_EXACT,
|
|
845
|
+
]
|
|
846
|
+
mask_interpolation: Literal[
|
|
847
|
+
cv2.INTER_NEAREST,
|
|
848
|
+
cv2.INTER_NEAREST_EXACT,
|
|
849
|
+
cv2.INTER_LINEAR,
|
|
850
|
+
cv2.INTER_CUBIC,
|
|
851
|
+
cv2.INTER_AREA,
|
|
852
|
+
cv2.INTER_LANCZOS4,
|
|
853
|
+
cv2.INTER_LINEAR_EXACT,
|
|
854
|
+
]
|
|
855
|
+
|
|
856
|
+
def __init__(
|
|
857
|
+
self,
|
|
858
|
+
height: int,
|
|
859
|
+
width: int,
|
|
860
|
+
interpolation: Literal[
|
|
861
|
+
cv2.INTER_NEAREST,
|
|
862
|
+
cv2.INTER_NEAREST_EXACT,
|
|
863
|
+
cv2.INTER_LINEAR,
|
|
864
|
+
cv2.INTER_CUBIC,
|
|
865
|
+
cv2.INTER_AREA,
|
|
866
|
+
cv2.INTER_LANCZOS4,
|
|
867
|
+
cv2.INTER_LINEAR_EXACT,
|
|
868
|
+
] = cv2.INTER_LINEAR,
|
|
869
|
+
mask_interpolation: Literal[
|
|
870
|
+
cv2.INTER_NEAREST,
|
|
871
|
+
cv2.INTER_NEAREST_EXACT,
|
|
872
|
+
cv2.INTER_LINEAR,
|
|
873
|
+
cv2.INTER_CUBIC,
|
|
874
|
+
cv2.INTER_AREA,
|
|
875
|
+
cv2.INTER_LANCZOS4,
|
|
876
|
+
cv2.INTER_LINEAR_EXACT,
|
|
877
|
+
] = cv2.INTER_NEAREST,
|
|
878
|
+
area_for_downscale: Literal[None, "image", "image_mask"] = None,
|
|
879
|
+
p: float = 1,
|
|
880
|
+
):
|
|
881
|
+
super().__init__(p=p)
|
|
882
|
+
self.height = height
|
|
883
|
+
self.width = width
|
|
884
|
+
self.interpolation = interpolation
|
|
885
|
+
self.mask_interpolation = mask_interpolation
|
|
886
|
+
self.area_for_downscale = area_for_downscale
|
|
887
|
+
|
|
888
|
+
def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
|
|
889
|
+
"""Apply resizing to the image.
|
|
890
|
+
|
|
891
|
+
Args:
|
|
892
|
+
img (np.ndarray): Image to resize.
|
|
893
|
+
**params (Any): Additional parameters.
|
|
894
|
+
|
|
895
|
+
Returns:
|
|
896
|
+
np.ndarray: Resized image.
|
|
897
|
+
|
|
898
|
+
"""
|
|
899
|
+
height, width = img.shape[:2]
|
|
900
|
+
is_downscale = (self.height < height) or (self.width < width)
|
|
901
|
+
|
|
902
|
+
interpolation = self.interpolation
|
|
903
|
+
if self.area_for_downscale in ["image", "image_mask"] and is_downscale:
|
|
904
|
+
interpolation = cv2.INTER_AREA
|
|
905
|
+
|
|
906
|
+
return fgeometric.resize(img, (self.height, self.width), interpolation=interpolation)
|
|
907
|
+
|
|
908
|
+
def apply_to_mask(self, mask: np.ndarray, **params: Any) -> np.ndarray:
|
|
909
|
+
"""Apply resizing to the mask.
|
|
910
|
+
|
|
911
|
+
Args:
|
|
912
|
+
mask (np.ndarray): Mask to resize.
|
|
913
|
+
**params (Any): Additional parameters.
|
|
914
|
+
|
|
915
|
+
Returns:
|
|
916
|
+
np.ndarray: Resized mask.
|
|
917
|
+
|
|
918
|
+
"""
|
|
919
|
+
height, width = mask.shape[:2]
|
|
920
|
+
is_downscale = (self.height < height) or (self.width < width)
|
|
921
|
+
|
|
922
|
+
interpolation = self.mask_interpolation
|
|
923
|
+
if self.area_for_downscale == "image_mask" and is_downscale:
|
|
924
|
+
interpolation = cv2.INTER_AREA
|
|
925
|
+
|
|
926
|
+
return fgeometric.resize(mask, (self.height, self.width), interpolation=interpolation)
|
|
927
|
+
|
|
928
|
+
def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
|
|
929
|
+
"""Apply the transform to bounding boxes.
|
|
930
|
+
|
|
931
|
+
Args:
|
|
932
|
+
bboxes (np.ndarray): Bounding boxes to transform.
|
|
933
|
+
**params (Any): Additional parameters.
|
|
934
|
+
|
|
935
|
+
Returns:
|
|
936
|
+
np.ndarray: Transformed bounding boxes which are scale invariant.
|
|
937
|
+
|
|
938
|
+
"""
|
|
939
|
+
# Bounding box coordinates are scale invariant
|
|
940
|
+
return bboxes
|
|
941
|
+
|
|
942
|
+
def apply_to_keypoints(self, keypoints: np.ndarray, **params: Any) -> np.ndarray:
|
|
943
|
+
"""Apply resizing to keypoints.
|
|
944
|
+
|
|
945
|
+
Args:
|
|
946
|
+
keypoints (np.ndarray): Keypoints to resize.
|
|
947
|
+
**params (Any): Additional parameters.
|
|
948
|
+
|
|
949
|
+
Returns:
|
|
950
|
+
np.ndarray: Resized keypoints.
|
|
951
|
+
|
|
952
|
+
"""
|
|
953
|
+
height, width = params["shape"][:2]
|
|
954
|
+
scale_x = self.width / width
|
|
955
|
+
scale_y = self.height / height
|
|
956
|
+
return fgeometric.keypoints_scale(keypoints, scale_x, scale_y)
|