nrtk-albumentations 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nrtk-albumentations might be problematic. Click here for more details.

Files changed (62) hide show
  1. albumentations/__init__.py +21 -0
  2. albumentations/augmentations/__init__.py +23 -0
  3. albumentations/augmentations/blur/__init__.py +0 -0
  4. albumentations/augmentations/blur/functional.py +438 -0
  5. albumentations/augmentations/blur/transforms.py +1633 -0
  6. albumentations/augmentations/crops/__init__.py +0 -0
  7. albumentations/augmentations/crops/functional.py +494 -0
  8. albumentations/augmentations/crops/transforms.py +3647 -0
  9. albumentations/augmentations/dropout/__init__.py +0 -0
  10. albumentations/augmentations/dropout/channel_dropout.py +134 -0
  11. albumentations/augmentations/dropout/coarse_dropout.py +567 -0
  12. albumentations/augmentations/dropout/functional.py +1017 -0
  13. albumentations/augmentations/dropout/grid_dropout.py +166 -0
  14. albumentations/augmentations/dropout/mask_dropout.py +274 -0
  15. albumentations/augmentations/dropout/transforms.py +461 -0
  16. albumentations/augmentations/dropout/xy_masking.py +186 -0
  17. albumentations/augmentations/geometric/__init__.py +0 -0
  18. albumentations/augmentations/geometric/distortion.py +1238 -0
  19. albumentations/augmentations/geometric/flip.py +752 -0
  20. albumentations/augmentations/geometric/functional.py +4151 -0
  21. albumentations/augmentations/geometric/pad.py +676 -0
  22. albumentations/augmentations/geometric/resize.py +956 -0
  23. albumentations/augmentations/geometric/rotate.py +864 -0
  24. albumentations/augmentations/geometric/transforms.py +1962 -0
  25. albumentations/augmentations/mixing/__init__.py +0 -0
  26. albumentations/augmentations/mixing/domain_adaptation.py +787 -0
  27. albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
  28. albumentations/augmentations/mixing/functional.py +878 -0
  29. albumentations/augmentations/mixing/transforms.py +832 -0
  30. albumentations/augmentations/other/__init__.py +0 -0
  31. albumentations/augmentations/other/lambda_transform.py +180 -0
  32. albumentations/augmentations/other/type_transform.py +261 -0
  33. albumentations/augmentations/pixel/__init__.py +0 -0
  34. albumentations/augmentations/pixel/functional.py +4226 -0
  35. albumentations/augmentations/pixel/transforms.py +7556 -0
  36. albumentations/augmentations/spectrogram/__init__.py +0 -0
  37. albumentations/augmentations/spectrogram/transform.py +220 -0
  38. albumentations/augmentations/text/__init__.py +0 -0
  39. albumentations/augmentations/text/functional.py +272 -0
  40. albumentations/augmentations/text/transforms.py +299 -0
  41. albumentations/augmentations/transforms3d/__init__.py +0 -0
  42. albumentations/augmentations/transforms3d/functional.py +393 -0
  43. albumentations/augmentations/transforms3d/transforms.py +1422 -0
  44. albumentations/augmentations/utils.py +249 -0
  45. albumentations/core/__init__.py +0 -0
  46. albumentations/core/bbox_utils.py +920 -0
  47. albumentations/core/composition.py +1885 -0
  48. albumentations/core/hub_mixin.py +299 -0
  49. albumentations/core/keypoints_utils.py +521 -0
  50. albumentations/core/label_manager.py +339 -0
  51. albumentations/core/pydantic.py +239 -0
  52. albumentations/core/serialization.py +352 -0
  53. albumentations/core/transforms_interface.py +976 -0
  54. albumentations/core/type_definitions.py +127 -0
  55. albumentations/core/utils.py +605 -0
  56. albumentations/core/validation.py +129 -0
  57. albumentations/pytorch/__init__.py +1 -0
  58. albumentations/pytorch/transforms.py +189 -0
  59. nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
  60. nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
  61. nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
  62. nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,956 @@
1
+ """Transforms for resizing images and associated data.
2
+
3
+ This module provides transform classes for resizing operations, including uniform resizing,
4
+ scaling with aspect ratio preservation, and size-constrained transformations.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import Sequence
10
+ from typing import Any, Literal, cast
11
+
12
+ import cv2
13
+ import numpy as np
14
+ from albucore import batch_transform
15
+ from pydantic import Field, field_validator, model_validator
16
+ from typing_extensions import Self
17
+
18
+ from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
19
+ from albumentations.core.type_definitions import ALL_TARGETS
20
+ from albumentations.core.utils import to_tuple
21
+
22
+ from . import functional as fgeometric
23
+
24
+ __all__ = ["LongestMaxSize", "RandomScale", "Resize", "SmallestMaxSize"]
25
+
26
+
27
+ class RandomScale(DualTransform):
28
+ """Randomly resize the input. Output image size is different from the input image size.
29
+
30
+ Args:
31
+ scale_limit (float or tuple[float, float]): scaling factor range. If scale_limit is a single float value, the
32
+ range will be (-scale_limit, scale_limit). Note that the scale_limit will be biased by 1.
33
+ If scale_limit is a tuple, like (low, high), sampling will be done from the range (1 + low, 1 + high).
34
+ Default: (-0.1, 0.1).
35
+ interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
36
+ cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
37
+ Default: cv2.INTER_LINEAR.
38
+ mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
39
+ Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
40
+ Default: cv2.INTER_NEAREST.
41
+ area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
42
+ for downscaling. Options:
43
+ - None: No automatic interpolation selection, always use the specified interpolation method
44
+ - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
45
+ - "image_mask": Use INTER_AREA when downscaling both images and masks
46
+ Default: None.
47
+ p (float): probability of applying the transform. Default: 0.5.
48
+
49
+ Targets:
50
+ image, mask, bboxes, keypoints, volume, mask3d
51
+
52
+ Image types:
53
+ uint8, float32
54
+
55
+ Note:
56
+ - The output image size is different from the input image size.
57
+ - Scale factor is sampled independently per image side (width and height).
58
+ - Bounding box coordinates are scaled accordingly.
59
+ - Keypoint coordinates are scaled accordingly.
60
+ - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
61
+ downscaling (scale < 1.0), which provides better quality for size reduction.
62
+
63
+ Mathematical formulation:
64
+ Let (W, H) be the original image dimensions and (W', H') be the output dimensions.
65
+ The scale factor s is sampled from the range [1 + scale_limit[0], 1 + scale_limit[1]].
66
+ Then, W' = W * s and H' = H * s.
67
+
68
+ Examples:
69
+ >>> import numpy as np
70
+ >>> import albumentations as A
71
+ >>> import cv2
72
+ >>>
73
+ >>> # Create sample data for demonstration
74
+ >>> image = np.zeros((100, 100, 3), dtype=np.uint8)
75
+ >>> # Add some shapes to visualize scaling effects
76
+ >>> cv2.rectangle(image, (25, 25), (75, 75), (255, 0, 0), -1) # Red square
77
+ >>> cv2.circle(image, (50, 50), 10, (0, 255, 0), -1) # Green circle
78
+ >>>
79
+ >>> # Create a mask for segmentation
80
+ >>> mask = np.zeros((100, 100), dtype=np.uint8)
81
+ >>> mask[25:75, 25:75] = 1 # Mask covering the red square
82
+ >>>
83
+ >>> # Create bounding boxes and keypoints
84
+ >>> bboxes = np.array([[25, 25, 75, 75]]) # Box around the red square
85
+ >>> bbox_labels = [1]
86
+ >>> keypoints = np.array([[50, 50]]) # Center of circle
87
+ >>> keypoint_labels = [0]
88
+ >>>
89
+ >>> # Apply RandomScale transform with comprehensive parameters
90
+ >>> transform = A.Compose([
91
+ ... A.RandomScale(
92
+ ... scale_limit=(-0.3, 0.5), # Scale between 0.7x and 1.5x
93
+ ... interpolation=cv2.INTER_LINEAR,
94
+ ... mask_interpolation=cv2.INTER_NEAREST,
95
+ ... area_for_downscale="image", # Use INTER_AREA for image downscaling
96
+ ... p=1.0 # Always apply
97
+ ... )
98
+ ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
99
+ ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
100
+ >>>
101
+ >>> # Apply the transform to all targets
102
+ >>> result = transform(
103
+ ... image=image,
104
+ ... mask=mask,
105
+ ... bboxes=bboxes,
106
+ ... bbox_labels=bbox_labels,
107
+ ... keypoints=keypoints,
108
+ ... keypoint_labels=keypoint_labels
109
+ ... )
110
+ >>>
111
+ >>> # Get the transformed results
112
+ >>> scaled_image = result['image'] # Dimensions will be between 70-150 pixels
113
+ >>> scaled_mask = result['mask'] # Mask scaled proportionally to image
114
+ >>> scaled_bboxes = result['bboxes'] # Bounding boxes adjusted to new dimensions
115
+ >>> scaled_bbox_labels = result['bbox_labels'] # Labels remain unchanged
116
+ >>> scaled_keypoints = result['keypoints'] # Keypoints adjusted to new dimensions
117
+ >>> scaled_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
118
+ >>>
119
+ >>> # The image dimensions will vary based on the randomly sampled scale factor
120
+ >>> # With scale_limit=(-0.3, 0.5), dimensions could be anywhere from 70% to 150% of original
121
+
122
+ """
123
+
124
+ _targets = ALL_TARGETS
125
+
126
+ class InitSchema(BaseTransformInitSchema):
127
+ scale_limit: tuple[float, float] | float
128
+ area_for_downscale: Literal[None, "image", "image_mask"]
129
+ interpolation: Literal[
130
+ cv2.INTER_NEAREST,
131
+ cv2.INTER_NEAREST_EXACT,
132
+ cv2.INTER_LINEAR,
133
+ cv2.INTER_CUBIC,
134
+ cv2.INTER_AREA,
135
+ cv2.INTER_LANCZOS4,
136
+ cv2.INTER_LINEAR_EXACT,
137
+ ]
138
+ mask_interpolation: Literal[
139
+ cv2.INTER_NEAREST,
140
+ cv2.INTER_NEAREST_EXACT,
141
+ cv2.INTER_LINEAR,
142
+ cv2.INTER_CUBIC,
143
+ cv2.INTER_AREA,
144
+ cv2.INTER_LANCZOS4,
145
+ cv2.INTER_LINEAR_EXACT,
146
+ ]
147
+
148
+ @field_validator("scale_limit")
149
+ @classmethod
150
+ def _check_scale_limit(cls, v: tuple[float, float] | float) -> tuple[float, float]:
151
+ return to_tuple(v)
152
+
153
+ def __init__(
154
+ self,
155
+ scale_limit: tuple[float, float] | float = (-0.1, 0.1),
156
+ interpolation: Literal[
157
+ cv2.INTER_NEAREST,
158
+ cv2.INTER_NEAREST_EXACT,
159
+ cv2.INTER_LINEAR,
160
+ cv2.INTER_CUBIC,
161
+ cv2.INTER_AREA,
162
+ cv2.INTER_LANCZOS4,
163
+ cv2.INTER_LINEAR_EXACT,
164
+ ] = cv2.INTER_LINEAR,
165
+ mask_interpolation: Literal[
166
+ cv2.INTER_NEAREST,
167
+ cv2.INTER_NEAREST_EXACT,
168
+ cv2.INTER_LINEAR,
169
+ cv2.INTER_CUBIC,
170
+ cv2.INTER_AREA,
171
+ cv2.INTER_LANCZOS4,
172
+ cv2.INTER_LINEAR_EXACT,
173
+ ] = cv2.INTER_NEAREST,
174
+ area_for_downscale: Literal[None, "image", "image_mask"] = None,
175
+ p: float = 0.5,
176
+ ):
177
+ super().__init__(p=p)
178
+ self.scale_limit = cast("tuple[float, float]", scale_limit)
179
+ self.interpolation = interpolation
180
+ self.mask_interpolation = mask_interpolation
181
+ self.area_for_downscale = area_for_downscale
182
+
183
+ def get_params(self) -> dict[str, float]:
184
+ """Get parameters for the transform.
185
+
186
+ Returns:
187
+ dict[str, float]: Dictionary with parameters.
188
+
189
+ """
190
+ return {"scale": self.py_random.uniform(*self.scale_limit) + 1.0}
191
+
192
+ def apply(
193
+ self,
194
+ img: np.ndarray,
195
+ scale: float,
196
+ **params: Any,
197
+ ) -> np.ndarray:
198
+ """Apply scaling to the image.
199
+
200
+ Args:
201
+ img (np.ndarray): Image to scale.
202
+ scale (float): Scaling factor.
203
+ **params (Any): Additional parameters.
204
+
205
+ Returns:
206
+ np.ndarray: Scaled image.
207
+
208
+ """
209
+ interpolation = self.interpolation
210
+ if self.area_for_downscale in ["image", "image_mask"] and scale < 1.0:
211
+ interpolation = cv2.INTER_AREA
212
+
213
+ return fgeometric.scale(img, scale, interpolation)
214
+
215
+ def apply_to_mask(
216
+ self,
217
+ mask: np.ndarray,
218
+ scale: float,
219
+ **params: Any,
220
+ ) -> np.ndarray:
221
+ """Apply scaling to the mask.
222
+
223
+ Args:
224
+ mask (np.ndarray): Mask to scale.
225
+ scale (float): Scaling factor.
226
+ **params (Any): Additional parameters.
227
+
228
+ Returns:
229
+ np.ndarray: Scaled mask.
230
+
231
+ """
232
+ interpolation = self.mask_interpolation
233
+ if self.area_for_downscale == "image_mask" and scale < 1.0:
234
+ interpolation = cv2.INTER_AREA
235
+
236
+ return fgeometric.scale(mask, scale, interpolation)
237
+
238
+ def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
239
+ """Apply the transform to bounding boxes.
240
+
241
+ Args:
242
+ bboxes (np.ndarray): Bounding boxes to transform.
243
+ **params (Any): Additional parameters.
244
+
245
+ Returns:
246
+ np.ndarray: Transformed bounding boxes which are scale invariant.
247
+
248
+ """
249
+ # Bounding box coordinates are scale invariant
250
+ return bboxes
251
+
252
+ def apply_to_keypoints(
253
+ self,
254
+ keypoints: np.ndarray,
255
+ scale: float,
256
+ **params: Any,
257
+ ) -> np.ndarray:
258
+ """Apply scaling to keypoints.
259
+
260
+ Args:
261
+ keypoints (np.ndarray): Keypoints to scale.
262
+ scale (float): Scaling factor.
263
+ **params (Any): Additional parameters.
264
+
265
+ Returns:
266
+ np.ndarray: Scaled keypoints.
267
+
268
+ """
269
+ return fgeometric.keypoints_scale(keypoints, scale, scale)
270
+
271
+
272
+ class MaxSizeTransform(DualTransform):
273
+ """Base class for transforms that resize based on maximum size constraints.
274
+
275
+ This class provides common functionality for derived transforms like LongestMaxSize and
276
+ SmallestMaxSize that resize images based on size constraints while preserving aspect ratio.
277
+
278
+ Args:
279
+ max_size (int, Sequence[int], optional): Maximum size constraint. The specific interpretation
280
+ depends on the derived class. Default: None.
281
+ max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints.
282
+ Either max_size or max_size_hw must be specified, but not both. Default: None.
283
+ interpolation (OpenCV flag): Flag for the interpolation algorithm. Should be one of:
284
+ cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
285
+ Default: cv2.INTER_LINEAR.
286
+ mask_interpolation (OpenCV flag): Flag for the mask interpolation algorithm.
287
+ Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
288
+ Default: cv2.INTER_NEAREST.
289
+ area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
290
+ for downscaling. Options:
291
+ - None: No automatic interpolation selection, always use the specified interpolation method
292
+ - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
293
+ - "image_mask": Use INTER_AREA when downscaling both images and masks
294
+ Default: None.
295
+ p (float): Probability of applying the transform. Default: 1.
296
+
297
+ Targets:
298
+ image, mask, bboxes, keypoints, volume, mask3d
299
+
300
+ Image types:
301
+ uint8, float32
302
+
303
+ Note:
304
+ - This is a base class that should be extended by concrete resize transforms.
305
+ - The scaling calculation is implemented in derived classes.
306
+ - Aspect ratio is preserved by applying the same scale factor to both dimensions.
307
+ - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
308
+ downscaling (scale < 1.0), which provides better quality for size reduction.
309
+
310
+ Examples:
311
+ >>> import numpy as np
312
+ >>> import albumentations as A
313
+ >>> import cv2
314
+ >>>
315
+ >>> # Example of creating a custom transform that extends MaxSizeTransform
316
+ >>> class CustomMaxSize(A.MaxSizeTransform):
317
+ ... def get_params_dependent_on_data(self, params, data):
318
+ ... img_h, img_w = params["shape"][:2]
319
+ ... # Calculate scale factor - here we scale to make the image area constant
320
+ ... target_area = 300 * 300 # Target area of 300x300
321
+ ... current_area = img_h * img_w
322
+ ... scale = np.sqrt(target_area / current_area)
323
+ ... return {"scale": scale}
324
+ >>>
325
+ >>> # Prepare sample data
326
+ >>> image = np.zeros((100, 200, 3), dtype=np.uint8)
327
+ >>> # Add a rectangle to visualize the effect
328
+ >>> cv2.rectangle(image, (50, 20), (150, 80), (255, 0, 0), -1)
329
+ >>>
330
+ >>> # Create a mask
331
+ >>> mask = np.zeros((100, 200), dtype=np.uint8)
332
+ >>> mask[20:80, 50:150] = 1
333
+ >>>
334
+ >>> # Create bounding boxes and keypoints
335
+ >>> bboxes = np.array([[50, 20, 150, 80]])
336
+ >>> bbox_labels = [1]
337
+ >>> keypoints = np.array([[100, 50]])
338
+ >>> keypoint_labels = [0]
339
+ >>>
340
+ >>> # Apply the custom transform
341
+ >>> transform = A.Compose([
342
+ ... CustomMaxSize(
343
+ ... max_size=None,
344
+ ... max_size_hw=(None, None), # Not used in our custom implementation
345
+ ... interpolation=cv2.INTER_LINEAR,
346
+ ... mask_interpolation=cv2.INTER_NEAREST,
347
+ ... area_for_downscale="image", # Use INTER_AREA when downscaling images
348
+ ... p=1.0
349
+ ... )
350
+ ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
351
+ ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
352
+ >>>
353
+ >>> # Apply the transform
354
+ >>> result = transform(
355
+ ... image=image,
356
+ ... mask=mask,
357
+ ... bboxes=bboxes,
358
+ ... bbox_labels=bbox_labels,
359
+ ... keypoints=keypoints,
360
+ ... keypoint_labels=keypoint_labels
361
+ ... )
362
+ >>>
363
+ >>> # Get results
364
+ >>> transformed_image = result['image'] # Shape will be approximately (122, 245, 3)
365
+ >>> transformed_mask = result['mask'] # Shape will be approximately (122, 245)
366
+ >>> transformed_bboxes = result['bboxes'] # Bounding boxes are scale invariant
367
+ >>> transformed_keypoints = result['keypoints'] # Keypoints scaled proportionally
368
+ >>> transformed_bbox_labels = result['bbox_labels'] # Labels remain unchanged
369
+ >>> transformed_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
370
+
371
+ """
372
+
373
+ _targets = ALL_TARGETS
374
+
375
+ class InitSchema(BaseTransformInitSchema):
376
+ max_size: int | list[int] | None
377
+ max_size_hw: tuple[int | None, int | None] | None
378
+ area_for_downscale: Literal[None, "image", "image_mask"]
379
+ interpolation: Literal[
380
+ cv2.INTER_NEAREST,
381
+ cv2.INTER_NEAREST_EXACT,
382
+ cv2.INTER_LINEAR,
383
+ cv2.INTER_CUBIC,
384
+ cv2.INTER_AREA,
385
+ cv2.INTER_LANCZOS4,
386
+ cv2.INTER_LINEAR_EXACT,
387
+ ]
388
+ mask_interpolation: Literal[
389
+ cv2.INTER_NEAREST,
390
+ cv2.INTER_NEAREST_EXACT,
391
+ cv2.INTER_LINEAR,
392
+ cv2.INTER_CUBIC,
393
+ cv2.INTER_AREA,
394
+ cv2.INTER_LANCZOS4,
395
+ cv2.INTER_LINEAR_EXACT,
396
+ ]
397
+
398
+ @model_validator(mode="after")
399
+ def validate_size_parameters(self) -> Self:
400
+ if self.max_size is None and self.max_size_hw is None:
401
+ raise ValueError("Either max_size or max_size_hw must be specified")
402
+ if self.max_size is not None and self.max_size_hw is not None:
403
+ raise ValueError("Only one of max_size or max_size_hw should be specified")
404
+ return self
405
+
406
+ def __init__(
407
+ self,
408
+ max_size: int | Sequence[int] | None = None,
409
+ max_size_hw: tuple[int | None, int | None] | None = None,
410
+ interpolation: Literal[
411
+ cv2.INTER_NEAREST,
412
+ cv2.INTER_NEAREST_EXACT,
413
+ cv2.INTER_LINEAR,
414
+ cv2.INTER_CUBIC,
415
+ cv2.INTER_AREA,
416
+ cv2.INTER_LANCZOS4,
417
+ cv2.INTER_LINEAR_EXACT,
418
+ ] = cv2.INTER_LINEAR,
419
+ mask_interpolation: Literal[
420
+ cv2.INTER_NEAREST,
421
+ cv2.INTER_NEAREST_EXACT,
422
+ cv2.INTER_LINEAR,
423
+ cv2.INTER_CUBIC,
424
+ cv2.INTER_AREA,
425
+ cv2.INTER_LANCZOS4,
426
+ cv2.INTER_LINEAR_EXACT,
427
+ ] = cv2.INTER_NEAREST,
428
+ area_for_downscale: Literal[None, "image", "image_mask"] = None,
429
+ p: float = 1,
430
+ ):
431
+ super().__init__(p=p)
432
+ self.max_size = max_size
433
+ self.max_size_hw = max_size_hw
434
+ self.interpolation = interpolation
435
+ self.mask_interpolation = mask_interpolation
436
+ self.area_for_downscale = area_for_downscale
437
+
438
+ def apply(
439
+ self,
440
+ img: np.ndarray,
441
+ scale: float,
442
+ **params: Any,
443
+ ) -> np.ndarray:
444
+ height, width = img.shape[:2]
445
+ new_height, new_width = max(1, round(height * scale)), max(1, round(width * scale))
446
+
447
+ interpolation = self.interpolation
448
+ if self.area_for_downscale in ["image", "image_mask"] and scale < 1.0:
449
+ interpolation = cv2.INTER_AREA
450
+
451
+ return fgeometric.resize(img, (new_height, new_width), interpolation=interpolation)
452
+
453
+ def apply_to_mask(
454
+ self,
455
+ mask: np.ndarray,
456
+ scale: float,
457
+ **params: Any,
458
+ ) -> np.ndarray:
459
+ height, width = mask.shape[:2]
460
+ new_height, new_width = max(1, round(height * scale)), max(1, round(width * scale))
461
+
462
+ interpolation = self.mask_interpolation
463
+ if self.area_for_downscale == "image_mask" and scale < 1.0:
464
+ interpolation = cv2.INTER_AREA
465
+
466
+ return fgeometric.resize(mask, (new_height, new_width), interpolation=interpolation)
467
+
468
+ def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
469
+ # Bounding box coordinates are scale invariant
470
+ return bboxes
471
+
472
+ def apply_to_keypoints(
473
+ self,
474
+ keypoints: np.ndarray,
475
+ scale: float,
476
+ **params: Any,
477
+ ) -> np.ndarray:
478
+ return fgeometric.keypoints_scale(keypoints, scale, scale)
479
+
480
+ @batch_transform("spatial", has_batch_dim=True, has_depth_dim=False)
481
+ def apply_to_images(self, images: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
482
+ return self.apply(images, *args, **params)
483
+
484
+ @batch_transform("spatial", has_batch_dim=False, has_depth_dim=True)
485
+ def apply_to_volume(self, volume: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
486
+ return self.apply(volume, *args, **params)
487
+
488
+ @batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
489
+ def apply_to_volumes(self, volumes: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
490
+ return self.apply(volumes, *args, **params)
491
+
492
+ @batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
493
+ def apply_to_mask3d(self, mask3d: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
494
+ return self.apply_to_mask(mask3d, *args, **params)
495
+
496
+ @batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
497
+ def apply_to_masks3d(self, masks3d: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
498
+ return self.apply_to_mask(masks3d, *args, **params)
499
+
500
+
501
+ class LongestMaxSize(MaxSizeTransform):
502
+ """Rescale an image so that the longest side is equal to max_size or sides meet max_size_hw constraints,
503
+ keeping the aspect ratio.
504
+
505
+ Args:
506
+ max_size (int, Sequence[int], optional): Maximum size of the longest side after the transformation.
507
+ When using a list or tuple, the max size will be randomly selected from the values provided. Default: None.
508
+ max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints. Supports:
509
+ - (height, width): Both dimensions must fit within these bounds
510
+ - (height, None): Only height is constrained, width scales proportionally
511
+ - (None, width): Only width is constrained, height scales proportionally
512
+ If specified, max_size must be None. Default: None.
513
+ interpolation (OpenCV flag): interpolation method. Default: cv2.INTER_LINEAR.
514
+ mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
515
+ Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
516
+ Default: cv2.INTER_NEAREST.
517
+ area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
518
+ for downscaling. Options:
519
+ - None: No automatic interpolation selection, always use the specified interpolation method
520
+ - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
521
+ - "image_mask": Use INTER_AREA when downscaling both images and masks
522
+ Default: None.
523
+ p (float): probability of applying the transform. Default: 1.
524
+
525
+ Targets:
526
+ image, mask, bboxes, keypoints, volume, mask3d
527
+
528
+ Image types:
529
+ uint8, float32
530
+
531
+ Note:
532
+ - If the longest side of the image is already equal to max_size, the image will not be resized.
533
+ - This transform will not crop the image. The resulting image may be smaller than specified in both dimensions.
534
+ - For non-square images, both sides will be scaled proportionally to maintain the aspect ratio.
535
+ - Bounding boxes and keypoints are scaled accordingly.
536
+ - When area_for_downscale is set, INTER_AREA will be used for downscaling, providing better quality.
537
+
538
+ Mathematical Details:
539
+ Let (W, H) be the original width and height of the image.
540
+
541
+ When using max_size:
542
+ 1. The scaling factor s is calculated as:
543
+ s = max_size / max(W, H)
544
+ 2. The new dimensions (W', H') are:
545
+ W' = W * s
546
+ H' = H * s
547
+
548
+ When using max_size_hw=(H_target, W_target):
549
+ 1. For both dimensions specified:
550
+ s = min(H_target/H, W_target/W)
551
+ This ensures both dimensions fit within the specified bounds.
552
+
553
+ 2. For height only (W_target=None):
554
+ s = H_target/H
555
+ Width will scale proportionally.
556
+
557
+ 3. For width only (H_target=None):
558
+ s = W_target/W
559
+ Height will scale proportionally.
560
+
561
+ 4. The new dimensions (W', H') are:
562
+ W' = W * s
563
+ H' = H * s
564
+
565
+ Examples:
566
+ >>> import albumentations as A
567
+ >>> import cv2
568
+ >>> # Using max_size
569
+ >>> transform1 = A.LongestMaxSize(max_size=1024, area_for_downscale="image")
570
+ >>> # Input image (1500, 800) -> Output (1024, 546)
571
+ >>>
572
+ >>> # Using max_size_hw with both dimensions
573
+ >>> transform2 = A.LongestMaxSize(max_size_hw=(800, 1024), area_for_downscale="image_mask")
574
+ >>> # Input (1500, 800) -> Output (800, 427)
575
+ >>> # Input (800, 1500) -> Output (546, 1024)
576
+ >>>
577
+ >>> # Using max_size_hw with only height
578
+ >>> transform3 = A.LongestMaxSize(max_size_hw=(800, None))
579
+ >>> # Input (1500, 800) -> Output (800, 427)
580
+ >>>
581
+ >>> # Common use case with padding
582
+ >>> transform4 = A.Compose([
583
+ ... A.LongestMaxSize(max_size=1024, area_for_downscale="image"),
584
+ ... A.PadIfNeeded(min_height=1024, min_width=1024),
585
+ ... ])
586
+
587
+ """
588
+
589
+ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
590
+ """Calculate parameters that depend on the input data.
591
+
592
+ Args:
593
+ params (dict[str, Any]): Parameters dictionary.
594
+ data (dict[str, Any]): Dictionary containing input data.
595
+
596
+ Returns:
597
+ dict[str, Any]: Dictionary with parameters calculated based on input data.
598
+
599
+ """
600
+ img_h, img_w = params["shape"][:2]
601
+
602
+ if self.max_size is not None:
603
+ if isinstance(self.max_size, (list, tuple)):
604
+ max_size = self.py_random.choice(self.max_size)
605
+ else:
606
+ max_size = self.max_size
607
+ scale = max_size / max(img_h, img_w)
608
+ elif self.max_size_hw is not None:
609
+ # We know max_size_hw is not None here due to model validator
610
+ max_h, max_w = self.max_size_hw
611
+ if max_h is not None and max_w is not None:
612
+ # Scale based on longest side to maintain aspect ratio
613
+ h_scale = max_h / img_h
614
+ w_scale = max_w / img_w
615
+ scale = min(h_scale, w_scale)
616
+ elif max_h is not None:
617
+ # Only height specified
618
+ scale = max_h / img_h
619
+ else:
620
+ # Only width specified
621
+ scale = max_w / img_w
622
+
623
+ return {"scale": scale}
624
+
625
+
626
+ class SmallestMaxSize(MaxSizeTransform):
627
+ """Rescale an image so that minimum side is equal to max_size or sides meet max_size_hw constraints,
628
+ keeping the aspect ratio.
629
+
630
+ Args:
631
+ max_size (int, list of int, optional): Maximum size of smallest side of the image after the transformation.
632
+ When using a list, max size will be randomly selected from the values in the list. Default: None.
633
+ max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints. Supports:
634
+ - (height, width): Both dimensions must be at least these values
635
+ - (height, None): Only height is constrained, width scales proportionally
636
+ - (None, width): Only width is constrained, height scales proportionally
637
+ If specified, max_size must be None. Default: None.
638
+ interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
639
+ cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
640
+ Default: cv2.INTER_LINEAR.
641
+ mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
642
+ Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
643
+ Default: cv2.INTER_NEAREST.
644
+ area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
645
+ for downscaling. Options:
646
+ - None: No automatic interpolation selection, always use the specified interpolation method
647
+ - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
648
+ - "image_mask": Use INTER_AREA when downscaling both images and masks
649
+ Default: None.
650
+ p (float): Probability of applying the transform. Default: 1.
651
+
652
+ Targets:
653
+ image, mask, bboxes, keypoints, volume, mask3d
654
+
655
+ Image types:
656
+ uint8, float32
657
+
658
+ Note:
659
+ - If the smallest side of the image is already equal to max_size, the image will not be resized.
660
+ - This transform will not crop the image. The resulting image may be larger than specified in both dimensions.
661
+ - For non-square images, both sides will be scaled proportionally to maintain the aspect ratio.
662
+ - Bounding boxes and keypoints are scaled accordingly.
663
+ - When area_for_downscale is set, INTER_AREA will be used for downscaling, providing better quality.
664
+
665
+ Mathematical Details:
666
+ Let (W, H) be the original width and height of the image.
667
+
668
+ When using max_size:
669
+ 1. The scaling factor s is calculated as:
670
+ s = max_size / min(W, H)
671
+ 2. The new dimensions (W', H') are:
672
+ W' = W * s
673
+ H' = H * s
674
+
675
+ When using max_size_hw=(H_target, W_target):
676
+ 1. For both dimensions specified:
677
+ s = max(H_target/H, W_target/W)
678
+ This ensures both dimensions are at least as large as specified.
679
+
680
+ 2. For height only (W_target=None):
681
+ s = H_target/H
682
+ Width will scale proportionally.
683
+
684
+ 3. For width only (H_target=None):
685
+ s = W_target/W
686
+ Height will scale proportionally.
687
+
688
+ 4. The new dimensions (W', H') are:
689
+ W' = W * s
690
+ H' = H * s
691
+
692
+ Examples:
693
+ >>> import numpy as np
694
+ >>> import albumentations as A
695
+ >>> # Using max_size
696
+ >>> transform1 = A.SmallestMaxSize(max_size=120, area_for_downscale="image")
697
+ >>> # Input image (100, 150) -> Output (120, 180)
698
+ >>>
699
+ >>> # Using max_size_hw with both dimensions
700
+ >>> transform2 = A.SmallestMaxSize(max_size_hw=(100, 200), area_for_downscale="image_mask")
701
+ >>> # Input (80, 160) -> Output (100, 200)
702
+ >>> # Input (160, 80) -> Output (400, 200)
703
+ >>>
704
+ >>> # Using max_size_hw with only height
705
+ >>> transform3 = A.SmallestMaxSize(max_size_hw=(100, None))
706
+ >>> # Input (80, 160) -> Output (100, 200)
707
+
708
+ """
709
+
710
+ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
711
+ """Calculate parameters that depend on the input data.
712
+
713
+ Args:
714
+ params (dict[str, Any]): Parameters dictionary.
715
+ data (dict[str, Any]): Dictionary containing input data.
716
+
717
+ Returns:
718
+ dict[str, Any]: Dictionary with parameters calculated based on input data.
719
+
720
+ """
721
+ img_h, img_w = params["shape"][:2]
722
+
723
+ if self.max_size is not None:
724
+ if isinstance(self.max_size, (list, tuple)):
725
+ max_size = self.py_random.choice(self.max_size)
726
+ else:
727
+ max_size = self.max_size
728
+ scale = max_size / min(img_h, img_w)
729
+ elif self.max_size_hw is not None:
730
+ max_h, max_w = self.max_size_hw
731
+ if max_h is not None and max_w is not None:
732
+ # Scale based on smallest side to maintain aspect ratio
733
+ h_scale = max_h / img_h
734
+ w_scale = max_w / img_w
735
+ scale = max(h_scale, w_scale)
736
+ elif max_h is not None:
737
+ # Only height specified
738
+ scale = max_h / img_h
739
+ else:
740
+ # Only width specified
741
+ scale = max_w / img_w
742
+
743
+ return {"scale": scale}
744
+
745
+
746
+ class Resize(DualTransform):
747
+ """Resize the input to the given height and width.
748
+
749
+ Args:
750
+ height (int): desired height of the output.
751
+ width (int): desired width of the output.
752
+ interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
753
+ cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
754
+ Default: cv2.INTER_LINEAR.
755
+ mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
756
+ Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
757
+ Default: cv2.INTER_NEAREST.
758
+ area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
759
+ for downscaling. Options:
760
+ - None: No automatic interpolation selection, always use the specified interpolation method
761
+ - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
762
+ - "image_mask": Use INTER_AREA when downscaling both images and masks
763
+ Default: None.
764
+ p (float): probability of applying the transform. Default: 1.
765
+
766
+ Targets:
767
+ image, mask, bboxes, keypoints, volume, mask3d
768
+
769
+ Image types:
770
+ uint8, float32
771
+
772
+ Examples:
773
+ >>> import numpy as np
774
+ >>> import albumentations as A
775
+ >>> import cv2
776
+ >>>
777
+ >>> # Create sample data for demonstration
778
+ >>> image = np.zeros((100, 100, 3), dtype=np.uint8)
779
+ >>> # Add some shapes to visualize resize effects
780
+ >>> cv2.rectangle(image, (25, 25), (75, 75), (255, 0, 0), -1) # Red square
781
+ >>> cv2.circle(image, (50, 50), 10, (0, 255, 0), -1) # Green circle
782
+ >>>
783
+ >>> # Create a mask for segmentation
784
+ >>> mask = np.zeros((100, 100), dtype=np.uint8)
785
+ >>> mask[25:75, 25:75] = 1 # Mask covering the red square
786
+ >>>
787
+ >>> # Create bounding boxes and keypoints
788
+ >>> bboxes = np.array([[25, 25, 75, 75]]) # Box around the red square
789
+ >>> bbox_labels = [1]
790
+ >>> keypoints = np.array([[50, 50]]) # Center of circle
791
+ >>> keypoint_labels = [0]
792
+ >>>
793
+ >>> # Resize all data to 224x224 (common input size for many CNNs)
794
+ >>> transform = A.Compose([
795
+ ... A.Resize(
796
+ ... height=224,
797
+ ... width=224,
798
+ ... interpolation=cv2.INTER_LINEAR,
799
+ ... mask_interpolation=cv2.INTER_NEAREST,
800
+ ... area_for_downscale="image", # Use INTER_AREA when downscaling images
801
+ ... p=1.0
802
+ ... )
803
+ ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
804
+ ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
805
+ >>>
806
+ >>> # Apply the transform to all targets
807
+ >>> result = transform(
808
+ ... image=image,
809
+ ... mask=mask,
810
+ ... bboxes=bboxes,
811
+ ... bbox_labels=bbox_labels,
812
+ ... keypoints=keypoints,
813
+ ... keypoint_labels=keypoint_labels
814
+ ... )
815
+ >>>
816
+ >>> # Get the transformed results
817
+ >>> resized_image = result['image'] # Shape will be (224, 224, 3)
818
+ >>> resized_mask = result['mask'] # Shape will be (224, 224)
819
+ >>> resized_bboxes = result['bboxes'] # Bounding boxes scaled to new dimensions
820
+ >>> resized_bbox_labels = result['bbox_labels'] # Labels remain unchanged
821
+ >>> resized_keypoints = result['keypoints'] # Keypoints scaled to new dimensions
822
+ >>> resized_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
823
+ >>>
824
+ >>> # Note: When resizing from 100x100 to 224x224:
825
+ >>> # - The red square will be scaled from (25-75) to approximately (56-168)
826
+ >>> # - The keypoint at (50, 50) will move to approximately (112, 112)
827
+ >>> # - All spatial relationships are preserved but coordinates are scaled
828
+
829
+ """
830
+
831
+ _targets = ALL_TARGETS
832
+
833
+ class InitSchema(BaseTransformInitSchema):
834
+ height: int = Field(ge=1)
835
+ width: int = Field(ge=1)
836
+ area_for_downscale: Literal[None, "image", "image_mask"]
837
+ interpolation: Literal[
838
+ cv2.INTER_NEAREST,
839
+ cv2.INTER_NEAREST_EXACT,
840
+ cv2.INTER_LINEAR,
841
+ cv2.INTER_CUBIC,
842
+ cv2.INTER_AREA,
843
+ cv2.INTER_LANCZOS4,
844
+ cv2.INTER_LINEAR_EXACT,
845
+ ]
846
+ mask_interpolation: Literal[
847
+ cv2.INTER_NEAREST,
848
+ cv2.INTER_NEAREST_EXACT,
849
+ cv2.INTER_LINEAR,
850
+ cv2.INTER_CUBIC,
851
+ cv2.INTER_AREA,
852
+ cv2.INTER_LANCZOS4,
853
+ cv2.INTER_LINEAR_EXACT,
854
+ ]
855
+
856
+ def __init__(
857
+ self,
858
+ height: int,
859
+ width: int,
860
+ interpolation: Literal[
861
+ cv2.INTER_NEAREST,
862
+ cv2.INTER_NEAREST_EXACT,
863
+ cv2.INTER_LINEAR,
864
+ cv2.INTER_CUBIC,
865
+ cv2.INTER_AREA,
866
+ cv2.INTER_LANCZOS4,
867
+ cv2.INTER_LINEAR_EXACT,
868
+ ] = cv2.INTER_LINEAR,
869
+ mask_interpolation: Literal[
870
+ cv2.INTER_NEAREST,
871
+ cv2.INTER_NEAREST_EXACT,
872
+ cv2.INTER_LINEAR,
873
+ cv2.INTER_CUBIC,
874
+ cv2.INTER_AREA,
875
+ cv2.INTER_LANCZOS4,
876
+ cv2.INTER_LINEAR_EXACT,
877
+ ] = cv2.INTER_NEAREST,
878
+ area_for_downscale: Literal[None, "image", "image_mask"] = None,
879
+ p: float = 1,
880
+ ):
881
+ super().__init__(p=p)
882
+ self.height = height
883
+ self.width = width
884
+ self.interpolation = interpolation
885
+ self.mask_interpolation = mask_interpolation
886
+ self.area_for_downscale = area_for_downscale
887
+
888
+ def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
889
+ """Apply resizing to the image.
890
+
891
+ Args:
892
+ img (np.ndarray): Image to resize.
893
+ **params (Any): Additional parameters.
894
+
895
+ Returns:
896
+ np.ndarray: Resized image.
897
+
898
+ """
899
+ height, width = img.shape[:2]
900
+ is_downscale = (self.height < height) or (self.width < width)
901
+
902
+ interpolation = self.interpolation
903
+ if self.area_for_downscale in ["image", "image_mask"] and is_downscale:
904
+ interpolation = cv2.INTER_AREA
905
+
906
+ return fgeometric.resize(img, (self.height, self.width), interpolation=interpolation)
907
+
908
+ def apply_to_mask(self, mask: np.ndarray, **params: Any) -> np.ndarray:
909
+ """Apply resizing to the mask.
910
+
911
+ Args:
912
+ mask (np.ndarray): Mask to resize.
913
+ **params (Any): Additional parameters.
914
+
915
+ Returns:
916
+ np.ndarray: Resized mask.
917
+
918
+ """
919
+ height, width = mask.shape[:2]
920
+ is_downscale = (self.height < height) or (self.width < width)
921
+
922
+ interpolation = self.mask_interpolation
923
+ if self.area_for_downscale == "image_mask" and is_downscale:
924
+ interpolation = cv2.INTER_AREA
925
+
926
+ return fgeometric.resize(mask, (self.height, self.width), interpolation=interpolation)
927
+
928
+ def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
929
+ """Apply the transform to bounding boxes.
930
+
931
+ Args:
932
+ bboxes (np.ndarray): Bounding boxes to transform.
933
+ **params (Any): Additional parameters.
934
+
935
+ Returns:
936
+ np.ndarray: Transformed bounding boxes which are scale invariant.
937
+
938
+ """
939
+ # Bounding box coordinates are scale invariant
940
+ return bboxes
941
+
942
+ def apply_to_keypoints(self, keypoints: np.ndarray, **params: Any) -> np.ndarray:
943
+ """Apply resizing to keypoints.
944
+
945
+ Args:
946
+ keypoints (np.ndarray): Keypoints to resize.
947
+ **params (Any): Additional parameters.
948
+
949
+ Returns:
950
+ np.ndarray: Resized keypoints.
951
+
952
+ """
953
+ height, width = params["shape"][:2]
954
+ scale_x = self.width / width
955
+ scale_y = self.height / height
956
+ return fgeometric.keypoints_scale(keypoints, scale_x, scale_y)