nrtk-albumentations 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nrtk-albumentations might be problematic. Click here for more details.

Files changed (62) hide show
  1. albumentations/__init__.py +21 -0
  2. albumentations/augmentations/__init__.py +23 -0
  3. albumentations/augmentations/blur/__init__.py +0 -0
  4. albumentations/augmentations/blur/functional.py +438 -0
  5. albumentations/augmentations/blur/transforms.py +1633 -0
  6. albumentations/augmentations/crops/__init__.py +0 -0
  7. albumentations/augmentations/crops/functional.py +494 -0
  8. albumentations/augmentations/crops/transforms.py +3647 -0
  9. albumentations/augmentations/dropout/__init__.py +0 -0
  10. albumentations/augmentations/dropout/channel_dropout.py +134 -0
  11. albumentations/augmentations/dropout/coarse_dropout.py +567 -0
  12. albumentations/augmentations/dropout/functional.py +1017 -0
  13. albumentations/augmentations/dropout/grid_dropout.py +166 -0
  14. albumentations/augmentations/dropout/mask_dropout.py +274 -0
  15. albumentations/augmentations/dropout/transforms.py +461 -0
  16. albumentations/augmentations/dropout/xy_masking.py +186 -0
  17. albumentations/augmentations/geometric/__init__.py +0 -0
  18. albumentations/augmentations/geometric/distortion.py +1238 -0
  19. albumentations/augmentations/geometric/flip.py +752 -0
  20. albumentations/augmentations/geometric/functional.py +4151 -0
  21. albumentations/augmentations/geometric/pad.py +676 -0
  22. albumentations/augmentations/geometric/resize.py +956 -0
  23. albumentations/augmentations/geometric/rotate.py +864 -0
  24. albumentations/augmentations/geometric/transforms.py +1962 -0
  25. albumentations/augmentations/mixing/__init__.py +0 -0
  26. albumentations/augmentations/mixing/domain_adaptation.py +787 -0
  27. albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
  28. albumentations/augmentations/mixing/functional.py +878 -0
  29. albumentations/augmentations/mixing/transforms.py +832 -0
  30. albumentations/augmentations/other/__init__.py +0 -0
  31. albumentations/augmentations/other/lambda_transform.py +180 -0
  32. albumentations/augmentations/other/type_transform.py +261 -0
  33. albumentations/augmentations/pixel/__init__.py +0 -0
  34. albumentations/augmentations/pixel/functional.py +4226 -0
  35. albumentations/augmentations/pixel/transforms.py +7556 -0
  36. albumentations/augmentations/spectrogram/__init__.py +0 -0
  37. albumentations/augmentations/spectrogram/transform.py +220 -0
  38. albumentations/augmentations/text/__init__.py +0 -0
  39. albumentations/augmentations/text/functional.py +272 -0
  40. albumentations/augmentations/text/transforms.py +299 -0
  41. albumentations/augmentations/transforms3d/__init__.py +0 -0
  42. albumentations/augmentations/transforms3d/functional.py +393 -0
  43. albumentations/augmentations/transforms3d/transforms.py +1422 -0
  44. albumentations/augmentations/utils.py +249 -0
  45. albumentations/core/__init__.py +0 -0
  46. albumentations/core/bbox_utils.py +920 -0
  47. albumentations/core/composition.py +1885 -0
  48. albumentations/core/hub_mixin.py +299 -0
  49. albumentations/core/keypoints_utils.py +521 -0
  50. albumentations/core/label_manager.py +339 -0
  51. albumentations/core/pydantic.py +239 -0
  52. albumentations/core/serialization.py +352 -0
  53. albumentations/core/transforms_interface.py +976 -0
  54. albumentations/core/type_definitions.py +127 -0
  55. albumentations/core/utils.py +605 -0
  56. albumentations/core/validation.py +129 -0
  57. albumentations/pytorch/__init__.py +1 -0
  58. albumentations/pytorch/transforms.py +189 -0
  59. nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
  60. nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
  61. nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
  62. nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,864 @@
1
+ """Transforms for rotating images and associated data.
2
+
3
+ This module provides classes for rotating images, masks, bounding boxes, and keypoints.
4
+ Includes transforms for 90-degree rotations and arbitrary angle rotations with various
5
+ border handling options.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import math
11
+ from typing import Any, cast
12
+
13
+ import cv2
14
+ import numpy as np
15
+ from typing_extensions import Literal
16
+
17
+ from albumentations.augmentations.crops import functional as fcrops
18
+ from albumentations.augmentations.geometric.transforms import Affine
19
+ from albumentations.core.pydantic import SymmetricRangeType
20
+ from albumentations.core.transforms_interface import (
21
+ BaseTransformInitSchema,
22
+ DualTransform,
23
+ )
24
+ from albumentations.core.type_definitions import ALL_TARGETS
25
+
26
+ from . import functional as fgeometric
27
+
28
+ __all__ = ["RandomRotate90", "Rotate", "SafeRotate"]
29
+
30
+ SMALL_NUMBER = 1e-10
31
+
32
+
33
+ class RandomRotate90(DualTransform):
34
+ """Randomly rotate the input by 90 degrees zero or more times.
35
+
36
+ Even with p=1.0, the transform has a 1/4 probability of being identity:
37
+ - With probability p * 1/4: no rotation (0 degrees)
38
+ - With probability p * 1/4: rotate 90 degrees
39
+ - With probability p * 1/4: rotate 180 degrees
40
+ - With probability p * 1/4: rotate 270 degrees
41
+
42
+ For example:
43
+ - With p=1.0: Each rotation angle (including 0°) has 0.25 probability
44
+ - With p=0.8: Each rotation angle has 0.2 probability, and no transform has 0.2 probability
45
+ - With p=0.5: Each rotation angle has 0.125 probability, and no transform has 0.5 probability
46
+
47
+ Common applications:
48
+ - Aerial/satellite imagery: Objects can appear in any orientation
49
+ - Medical imaging: Scans/slides may not have a consistent orientation
50
+ - Document analysis: Pages or symbols might be rotated
51
+ - Microscopy: Cell orientation is often arbitrary
52
+ - Game development: Sprites/textures that should work in multiple orientations
53
+
54
+ Not recommended for:
55
+ - Natural scene images where gravity matters (e.g., landscape photography)
56
+ - Face detection/recognition tasks
57
+ - Text recognition (unless text can appear rotated)
58
+ - Tasks where object orientation is important for classification
59
+
60
+ Note:
61
+ If your domain has both 90-degree rotation AND flip symmetries
62
+ (e.g., satellite imagery, microscopy), consider using `D4` transform instead.
63
+ `D4` is more efficient and mathematically correct as it:
64
+ - Samples uniformly from all 8 possible combinations of rotations and flips
65
+ - Properly represents the dihedral group D4 symmetries
66
+ - Avoids potential correlation between separate rotation and flip augmentations
67
+
68
+ Args:
69
+ p (float): probability of applying the transform. Default: 1.0.
70
+ Note that even with p=1.0, there's still a 0.25 probability
71
+ of getting a 0-degree rotation (identity transform).
72
+
73
+ Targets:
74
+ image, mask, bboxes, keypoints, volume, mask3d
75
+
76
+ Image types:
77
+ uint8, float32
78
+
79
+ Examples:
80
+ >>> import numpy as np
81
+ >>> import albumentations as A
82
+ >>> # Create example data
83
+ >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
84
+ >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
85
+ >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
86
+ >>> bbox_labels = [1, 2] # Class labels for bounding boxes
87
+ >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
88
+ >>> keypoint_labels = [0, 1] # Labels for keypoints
89
+ >>> # Define the transform
90
+ >>> transform = A.Compose([
91
+ ... A.RandomRotate90(p=1.0),
92
+ ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
93
+ ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
94
+ >>> # Apply the transform to all targets
95
+ >>> transformed = transform(
96
+ ... image=image,
97
+ ... mask=mask,
98
+ ... bboxes=bboxes,
99
+ ... bbox_labels=bbox_labels,
100
+ ... keypoints=keypoints,
101
+ ... keypoint_labels=keypoint_labels
102
+ ... )
103
+ >>> rotated_image = transformed["image"]
104
+ >>> rotated_mask = transformed["mask"]
105
+ >>> rotated_bboxes = transformed["bboxes"]
106
+ >>> rotated_bbox_labels = transformed["bbox_labels"]
107
+ >>> rotated_keypoints = transformed["keypoints"]
108
+ >>> rotated_keypoint_labels = transformed["keypoint_labels"]
109
+
110
+ """
111
+
112
+ _targets = ALL_TARGETS
113
+
114
+ def __init__(
115
+ self,
116
+ p: float = 1,
117
+ ):
118
+ super().__init__(p=p)
119
+
120
+ def apply(self, img: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
121
+ """Apply rotation to the input image.
122
+
123
+ Args:
124
+ img (np.ndarray): Image to rotate.
125
+ factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
126
+ **params (Any): Additional parameters.
127
+
128
+ Returns:
129
+ np.ndarray: Rotated image.
130
+
131
+ """
132
+ return fgeometric.rot90(img, factor)
133
+
134
+ def get_params(self) -> dict[str, int]:
135
+ """Get parameters for the transform.
136
+
137
+ Returns:
138
+ dict[str, int]: Dictionary with the rotation factor.
139
+
140
+ """
141
+ # Random int in the range [0, 3]
142
+ return {"factor": self.py_random.randint(0, 3)}
143
+
144
+ def apply_to_bboxes(
145
+ self,
146
+ bboxes: np.ndarray,
147
+ factor: Literal[0, 1, 2, 3],
148
+ **params: Any,
149
+ ) -> np.ndarray:
150
+ """Apply rotation to bounding boxes.
151
+
152
+ Args:
153
+ bboxes (np.ndarray): Bounding boxes to rotate.
154
+ factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
155
+ **params (Any): Additional parameters.
156
+
157
+ Returns:
158
+ np.ndarray: Rotated bounding boxes.
159
+
160
+ """
161
+ return fgeometric.bboxes_rot90(bboxes, factor)
162
+
163
+ def apply_to_keypoints(
164
+ self,
165
+ keypoints: np.ndarray,
166
+ factor: Literal[0, 1, 2, 3],
167
+ **params: Any,
168
+ ) -> np.ndarray:
169
+ """Apply rotation to keypoints.
170
+
171
+ Args:
172
+ keypoints (np.ndarray): Keypoints to rotate.
173
+ factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
174
+ **params (Any): Additional parameters.
175
+
176
+ Returns:
177
+ np.ndarray: Rotated keypoints.
178
+
179
+ """
180
+ return fgeometric.keypoints_rot90(keypoints, factor, params["shape"])
181
+
182
+ def apply_to_images(self, images: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
183
+ """Apply rotation to a batch of images.
184
+
185
+ Args:
186
+ images (np.ndarray): Images to rotate.
187
+ factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
188
+ **params (Any): Additional parameters.
189
+
190
+ Returns:
191
+ np.ndarray: Rotated images.
192
+
193
+ """
194
+ return fgeometric.rot90_images(images, factor)
195
+
196
+ def apply_to_volume(self, volume: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
197
+ """Apply rotation to the input volume.
198
+
199
+ Args:
200
+ volume (np.ndarray): Volume to rotate.
201
+ factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
202
+ **params (Any): Additional parameters.
203
+
204
+ Returns:
205
+ np.ndarray: Rotated volume.
206
+
207
+ """
208
+ return self.apply_to_images(volume, factor, **params)
209
+
210
+ def apply_to_volumes(self, volumes: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
211
+ """Apply rotation to the input volumes.
212
+
213
+ Args:
214
+ volumes (np.ndarray): Volumes to rotate.
215
+ factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
216
+ **params (Any): Additional parameters.
217
+
218
+ Returns:
219
+ np.ndarray: Rotated volumes.
220
+
221
+ """
222
+ return fgeometric.volumes_rot90(volumes, factor)
223
+
224
+ def apply_to_mask3d(self, mask3d: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
225
+ """Apply rotation to the input mask3d.
226
+
227
+ Args:
228
+ mask3d (np.ndarray): Mask3d to rotate.
229
+ factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
230
+ **params (Any): Additional parameters.
231
+
232
+ Returns:
233
+ np.ndarray: Rotated mask3d.
234
+
235
+ """
236
+ return self.apply_to_images(mask3d, factor, **params)
237
+
238
+ def apply_to_masks3d(self, masks3d: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
239
+ """Apply rotation to the input masks3d.
240
+
241
+ Args:
242
+ masks3d (np.ndarray): Masks3d to rotate.
243
+ factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
244
+ **params (Any): Additional parameters.
245
+
246
+ Returns:
247
+ np.ndarray: Rotated masks3d.
248
+
249
+ """
250
+ return self.apply_to_volumes(masks3d, factor, **params)
251
+
252
+
253
+ class RotateInitSchema(BaseTransformInitSchema):
254
+ limit: SymmetricRangeType
255
+
256
+ interpolation: Literal[cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
257
+
258
+ mask_interpolation: Literal[
259
+ cv2.INTER_NEAREST,
260
+ cv2.INTER_LINEAR,
261
+ cv2.INTER_CUBIC,
262
+ cv2.INTER_AREA,
263
+ cv2.INTER_LANCZOS4,
264
+ ]
265
+
266
+ border_mode: Literal[
267
+ cv2.BORDER_CONSTANT,
268
+ cv2.BORDER_REPLICATE,
269
+ cv2.BORDER_REFLECT,
270
+ cv2.BORDER_WRAP,
271
+ cv2.BORDER_REFLECT_101,
272
+ ]
273
+
274
+ fill: tuple[float, ...] | float
275
+ fill_mask: tuple[float, ...] | float | None
276
+
277
+
278
+ class Rotate(DualTransform):
279
+ """Rotate the input by an angle selected randomly from the uniform distribution.
280
+
281
+ Args:
282
+ limit (float | tuple[float, float]): Range from which a random angle is picked. If limit is a single float,
283
+ an angle is picked from (-limit, limit). Default: (-90, 90)
284
+ interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
285
+ cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
286
+ Default: cv2.INTER_LINEAR.
287
+ border_mode (OpenCV flag): Flag that is used to specify the pixel extrapolation method. Should be one of:
288
+ cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
289
+ Default: cv2.BORDER_CONSTANT
290
+ fill (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT.
291
+ fill_mask (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
292
+ rotate_method (Literal["largest_box", "ellipse"]): Method to rotate bounding boxes.
293
+ Should be 'largest_box' or 'ellipse'. Default: 'largest_box'
294
+ crop_border (bool): Whether to crop border after rotation. If True, the output image size might differ
295
+ from the input. Default: False
296
+ mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
297
+ Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
298
+ Default: cv2.INTER_NEAREST.
299
+ p (float): Probability of applying the transform. Default: 0.5.
300
+
301
+ Targets:
302
+ image, mask, bboxes, keypoints, volume, mask3d
303
+
304
+ Image types:
305
+ uint8, float32
306
+
307
+ Note:
308
+ - The rotation angle is randomly selected for each execution within the range specified by 'limit'.
309
+ - When 'crop_border' is False, the output image will have the same size as the input, potentially
310
+ introducing black triangles in the corners.
311
+ - When 'crop_border' is True, the output image is cropped to remove black triangles, which may result
312
+ in a smaller image.
313
+ - Bounding boxes are rotated and may change size or shape.
314
+ - Keypoints are rotated around the center of the image.
315
+
316
+ Mathematical Details:
317
+ 1. An angle θ is randomly sampled from the range specified by 'limit'.
318
+ 2. The image is rotated around its center by θ degrees.
319
+ 3. The rotation matrix R is:
320
+ R = [cos(θ) -sin(θ)]
321
+ [sin(θ) cos(θ)]
322
+ 4. Each point (x, y) in the image is transformed to (x', y') by:
323
+ [x'] [cos(θ) -sin(θ)] [x - cx] [cx]
324
+ [y'] = [sin(θ) cos(θ)] [y - cy] + [cy]
325
+ where (cx, cy) is the center of the image.
326
+ 5. If 'crop_border' is True, the image is cropped to the largest rectangle that fits inside the rotated image.
327
+
328
+ Examples:
329
+ >>> import numpy as np
330
+ >>> import albumentations as A
331
+ >>> # Create example data
332
+ >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
333
+ >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
334
+ >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
335
+ >>> bbox_labels = [1, 2] # Class labels for bounding boxes
336
+ >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
337
+ >>> keypoint_labels = [0, 1] # Labels for keypoints
338
+ >>> # Define the transform
339
+ >>> transform = A.Compose([
340
+ ... A.Rotate(limit=45, p=1.0),
341
+ ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
342
+ ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
343
+ >>> # Apply the transform to all targets
344
+ >>> transformed = transform(
345
+ ... image=image,
346
+ ... mask=mask,
347
+ ... bboxes=bboxes,
348
+ ... bbox_labels=bbox_labels,
349
+ ... keypoints=keypoints,
350
+ ... keypoint_labels=keypoint_labels
351
+ ... )
352
+ >>> rotated_image = transformed["image"]
353
+ >>> rotated_mask = transformed["mask"]
354
+ >>> rotated_bboxes = transformed["bboxes"]
355
+ >>> rotated_bbox_labels = transformed["bbox_labels"]
356
+ >>> rotated_keypoints = transformed["keypoints"]
357
+ >>> rotated_keypoint_labels = transformed["keypoint_labels"]
358
+
359
+ """
360
+
361
+ _targets = ALL_TARGETS
362
+
363
+ class InitSchema(RotateInitSchema):
364
+ rotate_method: Literal["largest_box", "ellipse"]
365
+ crop_border: bool
366
+
367
+ fill: tuple[float, ...] | float
368
+ fill_mask: tuple[float, ...] | float
369
+
370
+ def __init__(
371
+ self,
372
+ limit: tuple[float, float] | float = (-90, 90),
373
+ interpolation: Literal[
374
+ cv2.INTER_NEAREST,
375
+ cv2.INTER_LINEAR,
376
+ cv2.INTER_CUBIC,
377
+ cv2.INTER_AREA,
378
+ cv2.INTER_LANCZOS4,
379
+ ] = cv2.INTER_LINEAR,
380
+ border_mode: Literal[
381
+ cv2.BORDER_CONSTANT,
382
+ cv2.BORDER_REPLICATE,
383
+ cv2.BORDER_REFLECT,
384
+ cv2.BORDER_WRAP,
385
+ cv2.BORDER_REFLECT_101,
386
+ ] = cv2.BORDER_CONSTANT,
387
+ rotate_method: Literal["largest_box", "ellipse"] = "largest_box",
388
+ crop_border: bool = False,
389
+ mask_interpolation: Literal[
390
+ cv2.INTER_NEAREST,
391
+ cv2.INTER_LINEAR,
392
+ cv2.INTER_CUBIC,
393
+ cv2.INTER_AREA,
394
+ cv2.INTER_LANCZOS4,
395
+ ] = cv2.INTER_NEAREST,
396
+ fill: tuple[float, ...] | float = 0,
397
+ fill_mask: tuple[float, ...] | float = 0,
398
+ p: float = 0.5,
399
+ ):
400
+ super().__init__(p=p)
401
+ self.limit = cast("tuple[float, float]", limit)
402
+ self.interpolation = interpolation
403
+ self.mask_interpolation = mask_interpolation
404
+ self.border_mode = border_mode
405
+ self.fill = fill
406
+ self.fill_mask = fill_mask
407
+ self.rotate_method = rotate_method
408
+ self.crop_border = crop_border
409
+
410
+ def apply(
411
+ self,
412
+ img: np.ndarray,
413
+ matrix: np.ndarray,
414
+ x_min: int,
415
+ x_max: int,
416
+ y_min: int,
417
+ y_max: int,
418
+ **params: Any,
419
+ ) -> np.ndarray:
420
+ """Apply affine transformation to the image.
421
+
422
+ Args:
423
+ img (np.ndarray): Image to transform.
424
+ matrix (np.ndarray): Affine transformation matrix.
425
+ x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
426
+ x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
427
+ y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
428
+ y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
429
+ **params (Any): Additional parameters.
430
+
431
+ Returns:
432
+ np.ndarray: Transformed image.
433
+
434
+ """
435
+ img_out = fgeometric.warp_affine(
436
+ img,
437
+ matrix,
438
+ self.interpolation,
439
+ self.fill,
440
+ self.border_mode,
441
+ params["shape"][:2],
442
+ )
443
+ if self.crop_border:
444
+ return fcrops.crop(img_out, x_min, y_min, x_max, y_max)
445
+ return img_out
446
+
447
+ def apply_to_mask(
448
+ self,
449
+ mask: np.ndarray,
450
+ matrix: np.ndarray,
451
+ x_min: int,
452
+ x_max: int,
453
+ y_min: int,
454
+ y_max: int,
455
+ **params: Any,
456
+ ) -> np.ndarray:
457
+ """Apply affine transformation to the mask.
458
+
459
+ Args:
460
+ mask (np.ndarray): Mask to transform.
461
+ matrix (np.ndarray): Affine transformation matrix.
462
+ x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
463
+ x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
464
+ y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
465
+ y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
466
+ **params (Any): Additional parameters.
467
+
468
+ Returns:
469
+ np.ndarray: Transformed mask.
470
+
471
+ """
472
+ img_out = fgeometric.warp_affine(
473
+ mask,
474
+ matrix,
475
+ self.mask_interpolation,
476
+ self.fill_mask,
477
+ self.border_mode,
478
+ params["shape"][:2],
479
+ )
480
+ if self.crop_border:
481
+ return fcrops.crop(img_out, x_min, y_min, x_max, y_max)
482
+ return img_out
483
+
484
+ def apply_to_bboxes(
485
+ self,
486
+ bboxes: np.ndarray,
487
+ bbox_matrix: np.ndarray,
488
+ x_min: int,
489
+ x_max: int,
490
+ y_min: int,
491
+ y_max: int,
492
+ **params: Any,
493
+ ) -> np.ndarray:
494
+ """Apply affine transformation to bounding boxes.
495
+
496
+ Args:
497
+ bboxes (np.ndarray): Bounding boxes to transform.
498
+ bbox_matrix (np.ndarray): Affine transformation matrix for bounding boxes.
499
+ x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
500
+ x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
501
+ y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
502
+ y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
503
+ **params (Any): Additional parameters.
504
+
505
+ Returns:
506
+ np.ndarray: Transformed bounding boxes.
507
+
508
+ """
509
+ image_shape = params["shape"][:2]
510
+ bboxes_out = fgeometric.bboxes_affine(
511
+ bboxes,
512
+ bbox_matrix,
513
+ self.rotate_method,
514
+ image_shape,
515
+ self.border_mode,
516
+ image_shape,
517
+ )
518
+ if self.crop_border:
519
+ return fcrops.crop_bboxes_by_coords(
520
+ bboxes_out,
521
+ (x_min, y_min, x_max, y_max),
522
+ image_shape,
523
+ )
524
+ return bboxes_out
525
+
526
+ def apply_to_keypoints(
527
+ self,
528
+ keypoints: np.ndarray,
529
+ matrix: np.ndarray,
530
+ x_min: int,
531
+ x_max: int,
532
+ y_min: int,
533
+ y_max: int,
534
+ **params: Any,
535
+ ) -> np.ndarray:
536
+ """Apply affine transformation to keypoints.
537
+
538
+ Args:
539
+ keypoints (np.ndarray): Keypoints to transform.
540
+ matrix (np.ndarray): Affine transformation matrix.
541
+ x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
542
+ x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
543
+ y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
544
+ y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
545
+ **params (Any): Additional parameters.
546
+
547
+ Returns:
548
+ np.ndarray: Transformed keypoints.
549
+
550
+ """
551
+ keypoints_out = fgeometric.keypoints_affine(
552
+ keypoints,
553
+ matrix,
554
+ params["shape"][:2],
555
+ scale={"x": 1, "y": 1},
556
+ border_mode=self.border_mode,
557
+ )
558
+ if self.crop_border:
559
+ return fcrops.crop_keypoints_by_coords(
560
+ keypoints_out,
561
+ (x_min, y_min, x_max, y_max),
562
+ )
563
+ return keypoints_out
564
+
565
+ @staticmethod
566
+ def _rotated_rect_with_max_area(
567
+ height: int,
568
+ width: int,
569
+ angle: float,
570
+ ) -> dict[str, int]:
571
+ """Given a rectangle of size wxh that has been rotated by 'angle' (in
572
+ degrees), computes the width and height of the largest possible
573
+ axis-aligned rectangle (maximal area) within the rotated rectangle.
574
+
575
+ References:
576
+ Rotate image and crop out black borders: https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
577
+
578
+ """
579
+ angle = math.radians(angle)
580
+ width_is_longer = width >= height
581
+ side_long, side_short = (width, height) if width_is_longer else (height, width)
582
+
583
+ # since the solutions for angle, -angle and 180-angle are all the same,
584
+ # it is sufficient to look at the first quadrant and the absolute values of sin,cos:
585
+ sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
586
+ if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < SMALL_NUMBER:
587
+ # half constrained case: two crop corners touch the longer side,
588
+ # the other two corners are on the mid-line parallel to the longer line
589
+ x = 0.5 * side_short
590
+ wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
591
+ else:
592
+ # fully constrained case: crop touches all 4 sides
593
+ cos_2a = cos_a * cos_a - sin_a * sin_a
594
+ wr, hr = (
595
+ (width * cos_a - height * sin_a) / cos_2a,
596
+ (height * cos_a - width * sin_a) / cos_2a,
597
+ )
598
+
599
+ return {
600
+ "x_min": max(0, int(width / 2 - wr / 2)),
601
+ "x_max": min(width, int(width / 2 + wr / 2)),
602
+ "y_min": max(0, int(height / 2 - hr / 2)),
603
+ "y_max": min(height, int(height / 2 + hr / 2)),
604
+ }
605
+
606
+ def get_params_dependent_on_data(
607
+ self,
608
+ params: dict[str, Any],
609
+ data: dict[str, Any],
610
+ ) -> dict[str, Any]:
611
+ """Get parameters dependent on the data.
612
+
613
+ Args:
614
+ params (dict[str, Any]): Dictionary containing parameters.
615
+ data (dict[str, Any]): Dictionary containing data.
616
+
617
+ Returns:
618
+ dict[str, Any]: Dictionary with parameters for transformation.
619
+
620
+ """
621
+ angle = self.py_random.uniform(*self.limit)
622
+
623
+ if self.crop_border:
624
+ height, width = params["shape"][:2]
625
+ out_params = self._rotated_rect_with_max_area(height, width, angle)
626
+ else:
627
+ out_params = {"x_min": -1, "x_max": -1, "y_min": -1, "y_max": -1}
628
+
629
+ center = fgeometric.center(params["shape"][:2])
630
+ bbox_center = fgeometric.center_bbox(params["shape"][:2])
631
+
632
+ translate: dict[str, int] = {"x": 0, "y": 0}
633
+ shear: dict[str, float] = {"x": 0, "y": 0}
634
+ scale: dict[str, float] = {"x": 1, "y": 1}
635
+ rotate = angle
636
+
637
+ matrix = fgeometric.create_affine_transformation_matrix(
638
+ translate,
639
+ shear,
640
+ scale,
641
+ rotate,
642
+ center,
643
+ )
644
+ bbox_matrix = fgeometric.create_affine_transformation_matrix(
645
+ translate,
646
+ shear,
647
+ scale,
648
+ rotate,
649
+ bbox_center,
650
+ )
651
+ out_params["matrix"] = matrix
652
+ out_params["bbox_matrix"] = bbox_matrix
653
+
654
+ return out_params
655
+
656
+
657
+ class SafeRotate(Affine):
658
+ """Rotate the input inside the input's frame by an angle selected randomly from the uniform distribution.
659
+
660
+ This transformation ensures that the entire rotated image fits within the original frame by scaling it
661
+ down if necessary. The resulting image maintains its original dimensions but may contain artifacts due to the
662
+ rotation and scaling process.
663
+
664
+ Args:
665
+ limit (float | tuple[float, float]): Range from which a random angle is picked. If limit is a single float,
666
+ an angle is picked from (-limit, limit). Default: (-90, 90)
667
+ interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
668
+ cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
669
+ Default: cv2.INTER_LINEAR.
670
+ border_mode (OpenCV flag): Flag that is used to specify the pixel extrapolation method. Should be one of:
671
+ cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
672
+ Default: cv2.BORDER_REFLECT_101
673
+ fill (tuple[float, float] | float): Padding value if border_mode is cv2.BORDER_CONSTANT.
674
+ fill_mask (tuple[float, float] | float): Padding value if border_mode is cv2.BORDER_CONSTANT applied
675
+ for masks.
676
+ rotate_method (Literal["largest_box", "ellipse"]): Method to rotate bounding boxes.
677
+ Should be 'largest_box' or 'ellipse'. Default: 'largest_box'
678
+ mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
679
+ Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
680
+ Default: cv2.INTER_NEAREST.
681
+ p (float): Probability of applying the transform. Default: 0.5.
682
+
683
+ Targets:
684
+ image, mask, bboxes, keypoints, volume, mask3d
685
+
686
+ Image types:
687
+ uint8, float32
688
+
689
+ Note:
690
+ - The rotation is performed around the center of the image.
691
+ - After rotation, the image is scaled to fit within the original frame, which may cause some distortion.
692
+ - The output image will always have the same dimensions as the input image.
693
+ - Bounding boxes and keypoints are transformed along with the image.
694
+
695
+ Mathematical Details:
696
+ 1. An angle θ is randomly sampled from the range specified by 'limit'.
697
+ 2. The image is rotated around its center by θ degrees.
698
+ 3. The rotation matrix R is:
699
+ R = [cos(θ) -sin(θ)]
700
+ [sin(θ) cos(θ)]
701
+ 4. The scaling factor s is calculated to ensure the rotated image fits within the original frame:
702
+ s = min(width / (width * |cos(θ)| + height * |sin(θ)|),
703
+ height / (width * |sin(θ)| + height * |cos(θ)|))
704
+ 5. The combined transformation matrix T is:
705
+ T = [s*cos(θ) -s*sin(θ) tx]
706
+ [s*sin(θ) s*cos(θ) ty]
707
+ where tx and ty are translation factors to keep the image centered.
708
+ 6. Each point (x, y) in the image is transformed to (x', y') by:
709
+ [x'] [s*cos(θ) s*sin(θ)] [x - cx] [cx]
710
+ [y'] = [-s*sin(θ) s*cos(θ)] [y - cy] + [cy]
711
+ where (cx, cy) is the center of the image.
712
+
713
+ Examples:
714
+ >>> import numpy as np
715
+ >>> import albumentations as A
716
+ >>> # Create example data
717
+ >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
718
+ >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
719
+ >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
720
+ >>> bbox_labels = [1, 2] # Class labels for bounding boxes
721
+ >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
722
+ >>> keypoint_labels = [0, 1] # Labels for keypoints
723
+ >>> # Define the transform
724
+ >>> transform = A.Compose([
725
+ ... A.SafeRotate(limit=45, p=1.0),
726
+ ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
727
+ ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
728
+ >>> # Apply the transform to all targets
729
+ >>> transformed = transform(
730
+ ... image=image,
731
+ ... mask=mask,
732
+ ... bboxes=bboxes,
733
+ ... bbox_labels=bbox_labels,
734
+ ... keypoints=keypoints,
735
+ ... keypoint_labels=keypoint_labels
736
+ ... )
737
+ >>> rotated_image = transformed["image"]
738
+ >>> rotated_mask = transformed["mask"]
739
+ >>> rotated_bboxes = transformed["bboxes"]
740
+ >>> rotated_bbox_labels = transformed["bbox_labels"]
741
+ >>> rotated_keypoints = transformed["keypoints"]
742
+ >>> rotated_keypoint_labels = transformed["keypoint_labels"]
743
+
744
+ """
745
+
746
+ _targets = ALL_TARGETS
747
+
748
+ class InitSchema(RotateInitSchema):
749
+ rotate_method: Literal["largest_box", "ellipse"]
750
+
751
+ def __init__(
752
+ self,
753
+ limit: tuple[float, float] | float = (-90, 90),
754
+ interpolation: Literal[
755
+ cv2.INTER_NEAREST,
756
+ cv2.INTER_LINEAR,
757
+ cv2.INTER_CUBIC,
758
+ cv2.INTER_AREA,
759
+ cv2.INTER_LANCZOS4,
760
+ ] = cv2.INTER_LINEAR,
761
+ border_mode: Literal[
762
+ cv2.BORDER_CONSTANT,
763
+ cv2.BORDER_REPLICATE,
764
+ cv2.BORDER_REFLECT,
765
+ cv2.BORDER_WRAP,
766
+ cv2.BORDER_REFLECT_101,
767
+ ] = cv2.BORDER_CONSTANT,
768
+ rotate_method: Literal["largest_box", "ellipse"] = "largest_box",
769
+ mask_interpolation: Literal[
770
+ cv2.INTER_NEAREST,
771
+ cv2.INTER_LINEAR,
772
+ cv2.INTER_CUBIC,
773
+ cv2.INTER_AREA,
774
+ cv2.INTER_LANCZOS4,
775
+ ] = cv2.INTER_NEAREST,
776
+ fill: tuple[float, ...] | float = 0,
777
+ fill_mask: tuple[float, ...] | float = 0,
778
+ p: float = 0.5,
779
+ ):
780
+ super().__init__(
781
+ rotate=limit,
782
+ interpolation=interpolation,
783
+ border_mode=border_mode,
784
+ fill=fill,
785
+ fill_mask=fill_mask,
786
+ rotate_method=rotate_method,
787
+ fit_output=True,
788
+ mask_interpolation=mask_interpolation,
789
+ p=p,
790
+ )
791
+ self.limit = cast("tuple[float, float]", limit)
792
+
793
+ def _create_safe_rotate_matrix(
794
+ self,
795
+ angle: float,
796
+ center: tuple[float, float],
797
+ image_shape: tuple[int, int],
798
+ ) -> tuple[np.ndarray, dict[str, float]]:
799
+ height, width = image_shape[:2]
800
+ rotation_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
801
+
802
+ # Calculate new image size
803
+ abs_cos = abs(rotation_mat[0, 0])
804
+ abs_sin = abs(rotation_mat[0, 1])
805
+ new_w = int(height * abs_sin + width * abs_cos)
806
+ new_h = int(height * abs_cos + width * abs_sin)
807
+
808
+ # Adjust the rotation matrix to take into account the new size
809
+ rotation_mat[0, 2] += new_w / 2 - center[0]
810
+ rotation_mat[1, 2] += new_h / 2 - center[1]
811
+
812
+ # Calculate scaling factors
813
+ scale_x = width / new_w
814
+ scale_y = height / new_h
815
+
816
+ # Create scaling matrix
817
+ scale_mat = np.array([[scale_x, 0, 0], [0, scale_y, 0], [0, 0, 1]])
818
+
819
+ # Combine rotation and scaling
820
+ matrix = scale_mat @ np.vstack([rotation_mat, [0, 0, 1]])
821
+
822
+ return matrix, {"x": scale_x, "y": scale_y}
823
+
824
+ def get_params_dependent_on_data(
825
+ self,
826
+ params: dict[str, Any],
827
+ data: dict[str, Any],
828
+ ) -> dict[str, Any]:
829
+ """Get parameters dependent on the data.
830
+
831
+ Args:
832
+ params (dict[str, Any]): Dictionary containing parameters.
833
+ data (dict[str, Any]): Dictionary containing data.
834
+
835
+ Returns:
836
+ dict[str, Any]: Dictionary with parameters for transformation.
837
+
838
+ """
839
+ image_shape = params["shape"][:2]
840
+ angle = self.py_random.uniform(*self.limit)
841
+
842
+ # Calculate centers for image and bbox
843
+ image_center = fgeometric.center(image_shape)
844
+ bbox_center = fgeometric.center_bbox(image_shape)
845
+
846
+ # Create matrices for image and bbox
847
+ matrix, scale = self._create_safe_rotate_matrix(
848
+ angle,
849
+ image_center,
850
+ image_shape,
851
+ )
852
+ bbox_matrix, _ = self._create_safe_rotate_matrix(
853
+ angle,
854
+ bbox_center,
855
+ image_shape,
856
+ )
857
+
858
+ return {
859
+ "rotate": angle,
860
+ "scale": scale,
861
+ "matrix": matrix,
862
+ "bbox_matrix": bbox_matrix,
863
+ "output_shape": image_shape,
864
+ }