nrtk-albumentations 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nrtk-albumentations might be problematic. Click here for more details.
- albumentations/__init__.py +21 -0
- albumentations/augmentations/__init__.py +23 -0
- albumentations/augmentations/blur/__init__.py +0 -0
- albumentations/augmentations/blur/functional.py +438 -0
- albumentations/augmentations/blur/transforms.py +1633 -0
- albumentations/augmentations/crops/__init__.py +0 -0
- albumentations/augmentations/crops/functional.py +494 -0
- albumentations/augmentations/crops/transforms.py +3647 -0
- albumentations/augmentations/dropout/__init__.py +0 -0
- albumentations/augmentations/dropout/channel_dropout.py +134 -0
- albumentations/augmentations/dropout/coarse_dropout.py +567 -0
- albumentations/augmentations/dropout/functional.py +1017 -0
- albumentations/augmentations/dropout/grid_dropout.py +166 -0
- albumentations/augmentations/dropout/mask_dropout.py +274 -0
- albumentations/augmentations/dropout/transforms.py +461 -0
- albumentations/augmentations/dropout/xy_masking.py +186 -0
- albumentations/augmentations/geometric/__init__.py +0 -0
- albumentations/augmentations/geometric/distortion.py +1238 -0
- albumentations/augmentations/geometric/flip.py +752 -0
- albumentations/augmentations/geometric/functional.py +4151 -0
- albumentations/augmentations/geometric/pad.py +676 -0
- albumentations/augmentations/geometric/resize.py +956 -0
- albumentations/augmentations/geometric/rotate.py +864 -0
- albumentations/augmentations/geometric/transforms.py +1962 -0
- albumentations/augmentations/mixing/__init__.py +0 -0
- albumentations/augmentations/mixing/domain_adaptation.py +787 -0
- albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
- albumentations/augmentations/mixing/functional.py +878 -0
- albumentations/augmentations/mixing/transforms.py +832 -0
- albumentations/augmentations/other/__init__.py +0 -0
- albumentations/augmentations/other/lambda_transform.py +180 -0
- albumentations/augmentations/other/type_transform.py +261 -0
- albumentations/augmentations/pixel/__init__.py +0 -0
- albumentations/augmentations/pixel/functional.py +4226 -0
- albumentations/augmentations/pixel/transforms.py +7556 -0
- albumentations/augmentations/spectrogram/__init__.py +0 -0
- albumentations/augmentations/spectrogram/transform.py +220 -0
- albumentations/augmentations/text/__init__.py +0 -0
- albumentations/augmentations/text/functional.py +272 -0
- albumentations/augmentations/text/transforms.py +299 -0
- albumentations/augmentations/transforms3d/__init__.py +0 -0
- albumentations/augmentations/transforms3d/functional.py +393 -0
- albumentations/augmentations/transforms3d/transforms.py +1422 -0
- albumentations/augmentations/utils.py +249 -0
- albumentations/core/__init__.py +0 -0
- albumentations/core/bbox_utils.py +920 -0
- albumentations/core/composition.py +1885 -0
- albumentations/core/hub_mixin.py +299 -0
- albumentations/core/keypoints_utils.py +521 -0
- albumentations/core/label_manager.py +339 -0
- albumentations/core/pydantic.py +239 -0
- albumentations/core/serialization.py +352 -0
- albumentations/core/transforms_interface.py +976 -0
- albumentations/core/type_definitions.py +127 -0
- albumentations/core/utils.py +605 -0
- albumentations/core/validation.py +129 -0
- albumentations/pytorch/__init__.py +1 -0
- albumentations/pytorch/transforms.py +189 -0
- nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
- nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
- nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
- nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,864 @@
|
|
|
1
|
+
"""Transforms for rotating images and associated data.
|
|
2
|
+
|
|
3
|
+
This module provides classes for rotating images, masks, bounding boxes, and keypoints.
|
|
4
|
+
Includes transforms for 90-degree rotations and arbitrary angle rotations with various
|
|
5
|
+
border handling options.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import math
|
|
11
|
+
from typing import Any, cast
|
|
12
|
+
|
|
13
|
+
import cv2
|
|
14
|
+
import numpy as np
|
|
15
|
+
from typing_extensions import Literal
|
|
16
|
+
|
|
17
|
+
from albumentations.augmentations.crops import functional as fcrops
|
|
18
|
+
from albumentations.augmentations.geometric.transforms import Affine
|
|
19
|
+
from albumentations.core.pydantic import SymmetricRangeType
|
|
20
|
+
from albumentations.core.transforms_interface import (
|
|
21
|
+
BaseTransformInitSchema,
|
|
22
|
+
DualTransform,
|
|
23
|
+
)
|
|
24
|
+
from albumentations.core.type_definitions import ALL_TARGETS
|
|
25
|
+
|
|
26
|
+
from . import functional as fgeometric
|
|
27
|
+
|
|
28
|
+
__all__ = ["RandomRotate90", "Rotate", "SafeRotate"]
|
|
29
|
+
|
|
30
|
+
SMALL_NUMBER = 1e-10
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class RandomRotate90(DualTransform):
|
|
34
|
+
"""Randomly rotate the input by 90 degrees zero or more times.
|
|
35
|
+
|
|
36
|
+
Even with p=1.0, the transform has a 1/4 probability of being identity:
|
|
37
|
+
- With probability p * 1/4: no rotation (0 degrees)
|
|
38
|
+
- With probability p * 1/4: rotate 90 degrees
|
|
39
|
+
- With probability p * 1/4: rotate 180 degrees
|
|
40
|
+
- With probability p * 1/4: rotate 270 degrees
|
|
41
|
+
|
|
42
|
+
For example:
|
|
43
|
+
- With p=1.0: Each rotation angle (including 0°) has 0.25 probability
|
|
44
|
+
- With p=0.8: Each rotation angle has 0.2 probability, and no transform has 0.2 probability
|
|
45
|
+
- With p=0.5: Each rotation angle has 0.125 probability, and no transform has 0.5 probability
|
|
46
|
+
|
|
47
|
+
Common applications:
|
|
48
|
+
- Aerial/satellite imagery: Objects can appear in any orientation
|
|
49
|
+
- Medical imaging: Scans/slides may not have a consistent orientation
|
|
50
|
+
- Document analysis: Pages or symbols might be rotated
|
|
51
|
+
- Microscopy: Cell orientation is often arbitrary
|
|
52
|
+
- Game development: Sprites/textures that should work in multiple orientations
|
|
53
|
+
|
|
54
|
+
Not recommended for:
|
|
55
|
+
- Natural scene images where gravity matters (e.g., landscape photography)
|
|
56
|
+
- Face detection/recognition tasks
|
|
57
|
+
- Text recognition (unless text can appear rotated)
|
|
58
|
+
- Tasks where object orientation is important for classification
|
|
59
|
+
|
|
60
|
+
Note:
|
|
61
|
+
If your domain has both 90-degree rotation AND flip symmetries
|
|
62
|
+
(e.g., satellite imagery, microscopy), consider using `D4` transform instead.
|
|
63
|
+
`D4` is more efficient and mathematically correct as it:
|
|
64
|
+
- Samples uniformly from all 8 possible combinations of rotations and flips
|
|
65
|
+
- Properly represents the dihedral group D4 symmetries
|
|
66
|
+
- Avoids potential correlation between separate rotation and flip augmentations
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
p (float): probability of applying the transform. Default: 1.0.
|
|
70
|
+
Note that even with p=1.0, there's still a 0.25 probability
|
|
71
|
+
of getting a 0-degree rotation (identity transform).
|
|
72
|
+
|
|
73
|
+
Targets:
|
|
74
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
75
|
+
|
|
76
|
+
Image types:
|
|
77
|
+
uint8, float32
|
|
78
|
+
|
|
79
|
+
Examples:
|
|
80
|
+
>>> import numpy as np
|
|
81
|
+
>>> import albumentations as A
|
|
82
|
+
>>> # Create example data
|
|
83
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
84
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
85
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
86
|
+
>>> bbox_labels = [1, 2] # Class labels for bounding boxes
|
|
87
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
88
|
+
>>> keypoint_labels = [0, 1] # Labels for keypoints
|
|
89
|
+
>>> # Define the transform
|
|
90
|
+
>>> transform = A.Compose([
|
|
91
|
+
... A.RandomRotate90(p=1.0),
|
|
92
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
93
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
94
|
+
>>> # Apply the transform to all targets
|
|
95
|
+
>>> transformed = transform(
|
|
96
|
+
... image=image,
|
|
97
|
+
... mask=mask,
|
|
98
|
+
... bboxes=bboxes,
|
|
99
|
+
... bbox_labels=bbox_labels,
|
|
100
|
+
... keypoints=keypoints,
|
|
101
|
+
... keypoint_labels=keypoint_labels
|
|
102
|
+
... )
|
|
103
|
+
>>> rotated_image = transformed["image"]
|
|
104
|
+
>>> rotated_mask = transformed["mask"]
|
|
105
|
+
>>> rotated_bboxes = transformed["bboxes"]
|
|
106
|
+
>>> rotated_bbox_labels = transformed["bbox_labels"]
|
|
107
|
+
>>> rotated_keypoints = transformed["keypoints"]
|
|
108
|
+
>>> rotated_keypoint_labels = transformed["keypoint_labels"]
|
|
109
|
+
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
_targets = ALL_TARGETS
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
p: float = 1,
|
|
117
|
+
):
|
|
118
|
+
super().__init__(p=p)
|
|
119
|
+
|
|
120
|
+
def apply(self, img: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
|
|
121
|
+
"""Apply rotation to the input image.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
img (np.ndarray): Image to rotate.
|
|
125
|
+
factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
|
|
126
|
+
**params (Any): Additional parameters.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
np.ndarray: Rotated image.
|
|
130
|
+
|
|
131
|
+
"""
|
|
132
|
+
return fgeometric.rot90(img, factor)
|
|
133
|
+
|
|
134
|
+
def get_params(self) -> dict[str, int]:
|
|
135
|
+
"""Get parameters for the transform.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
dict[str, int]: Dictionary with the rotation factor.
|
|
139
|
+
|
|
140
|
+
"""
|
|
141
|
+
# Random int in the range [0, 3]
|
|
142
|
+
return {"factor": self.py_random.randint(0, 3)}
|
|
143
|
+
|
|
144
|
+
def apply_to_bboxes(
|
|
145
|
+
self,
|
|
146
|
+
bboxes: np.ndarray,
|
|
147
|
+
factor: Literal[0, 1, 2, 3],
|
|
148
|
+
**params: Any,
|
|
149
|
+
) -> np.ndarray:
|
|
150
|
+
"""Apply rotation to bounding boxes.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
bboxes (np.ndarray): Bounding boxes to rotate.
|
|
154
|
+
factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
|
|
155
|
+
**params (Any): Additional parameters.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
np.ndarray: Rotated bounding boxes.
|
|
159
|
+
|
|
160
|
+
"""
|
|
161
|
+
return fgeometric.bboxes_rot90(bboxes, factor)
|
|
162
|
+
|
|
163
|
+
def apply_to_keypoints(
|
|
164
|
+
self,
|
|
165
|
+
keypoints: np.ndarray,
|
|
166
|
+
factor: Literal[0, 1, 2, 3],
|
|
167
|
+
**params: Any,
|
|
168
|
+
) -> np.ndarray:
|
|
169
|
+
"""Apply rotation to keypoints.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
keypoints (np.ndarray): Keypoints to rotate.
|
|
173
|
+
factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
|
|
174
|
+
**params (Any): Additional parameters.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
np.ndarray: Rotated keypoints.
|
|
178
|
+
|
|
179
|
+
"""
|
|
180
|
+
return fgeometric.keypoints_rot90(keypoints, factor, params["shape"])
|
|
181
|
+
|
|
182
|
+
def apply_to_images(self, images: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
|
|
183
|
+
"""Apply rotation to a batch of images.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
images (np.ndarray): Images to rotate.
|
|
187
|
+
factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
|
|
188
|
+
**params (Any): Additional parameters.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
np.ndarray: Rotated images.
|
|
192
|
+
|
|
193
|
+
"""
|
|
194
|
+
return fgeometric.rot90_images(images, factor)
|
|
195
|
+
|
|
196
|
+
def apply_to_volume(self, volume: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
|
|
197
|
+
"""Apply rotation to the input volume.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
volume (np.ndarray): Volume to rotate.
|
|
201
|
+
factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
|
|
202
|
+
**params (Any): Additional parameters.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
np.ndarray: Rotated volume.
|
|
206
|
+
|
|
207
|
+
"""
|
|
208
|
+
return self.apply_to_images(volume, factor, **params)
|
|
209
|
+
|
|
210
|
+
def apply_to_volumes(self, volumes: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
|
|
211
|
+
"""Apply rotation to the input volumes.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
volumes (np.ndarray): Volumes to rotate.
|
|
215
|
+
factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
|
|
216
|
+
**params (Any): Additional parameters.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
np.ndarray: Rotated volumes.
|
|
220
|
+
|
|
221
|
+
"""
|
|
222
|
+
return fgeometric.volumes_rot90(volumes, factor)
|
|
223
|
+
|
|
224
|
+
def apply_to_mask3d(self, mask3d: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
|
|
225
|
+
"""Apply rotation to the input mask3d.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
mask3d (np.ndarray): Mask3d to rotate.
|
|
229
|
+
factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
|
|
230
|
+
**params (Any): Additional parameters.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
np.ndarray: Rotated mask3d.
|
|
234
|
+
|
|
235
|
+
"""
|
|
236
|
+
return self.apply_to_images(mask3d, factor, **params)
|
|
237
|
+
|
|
238
|
+
def apply_to_masks3d(self, masks3d: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
|
|
239
|
+
"""Apply rotation to the input masks3d.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
masks3d (np.ndarray): Masks3d to rotate.
|
|
243
|
+
factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
|
|
244
|
+
**params (Any): Additional parameters.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
np.ndarray: Rotated masks3d.
|
|
248
|
+
|
|
249
|
+
"""
|
|
250
|
+
return self.apply_to_volumes(masks3d, factor, **params)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class RotateInitSchema(BaseTransformInitSchema):
|
|
254
|
+
limit: SymmetricRangeType
|
|
255
|
+
|
|
256
|
+
interpolation: Literal[cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
|
|
257
|
+
|
|
258
|
+
mask_interpolation: Literal[
|
|
259
|
+
cv2.INTER_NEAREST,
|
|
260
|
+
cv2.INTER_LINEAR,
|
|
261
|
+
cv2.INTER_CUBIC,
|
|
262
|
+
cv2.INTER_AREA,
|
|
263
|
+
cv2.INTER_LANCZOS4,
|
|
264
|
+
]
|
|
265
|
+
|
|
266
|
+
border_mode: Literal[
|
|
267
|
+
cv2.BORDER_CONSTANT,
|
|
268
|
+
cv2.BORDER_REPLICATE,
|
|
269
|
+
cv2.BORDER_REFLECT,
|
|
270
|
+
cv2.BORDER_WRAP,
|
|
271
|
+
cv2.BORDER_REFLECT_101,
|
|
272
|
+
]
|
|
273
|
+
|
|
274
|
+
fill: tuple[float, ...] | float
|
|
275
|
+
fill_mask: tuple[float, ...] | float | None
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class Rotate(DualTransform):
|
|
279
|
+
"""Rotate the input by an angle selected randomly from the uniform distribution.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
limit (float | tuple[float, float]): Range from which a random angle is picked. If limit is a single float,
|
|
283
|
+
an angle is picked from (-limit, limit). Default: (-90, 90)
|
|
284
|
+
interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
|
|
285
|
+
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
286
|
+
Default: cv2.INTER_LINEAR.
|
|
287
|
+
border_mode (OpenCV flag): Flag that is used to specify the pixel extrapolation method. Should be one of:
|
|
288
|
+
cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
|
|
289
|
+
Default: cv2.BORDER_CONSTANT
|
|
290
|
+
fill (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT.
|
|
291
|
+
fill_mask (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
|
|
292
|
+
rotate_method (Literal["largest_box", "ellipse"]): Method to rotate bounding boxes.
|
|
293
|
+
Should be 'largest_box' or 'ellipse'. Default: 'largest_box'
|
|
294
|
+
crop_border (bool): Whether to crop border after rotation. If True, the output image size might differ
|
|
295
|
+
from the input. Default: False
|
|
296
|
+
mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
|
|
297
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
298
|
+
Default: cv2.INTER_NEAREST.
|
|
299
|
+
p (float): Probability of applying the transform. Default: 0.5.
|
|
300
|
+
|
|
301
|
+
Targets:
|
|
302
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
303
|
+
|
|
304
|
+
Image types:
|
|
305
|
+
uint8, float32
|
|
306
|
+
|
|
307
|
+
Note:
|
|
308
|
+
- The rotation angle is randomly selected for each execution within the range specified by 'limit'.
|
|
309
|
+
- When 'crop_border' is False, the output image will have the same size as the input, potentially
|
|
310
|
+
introducing black triangles in the corners.
|
|
311
|
+
- When 'crop_border' is True, the output image is cropped to remove black triangles, which may result
|
|
312
|
+
in a smaller image.
|
|
313
|
+
- Bounding boxes are rotated and may change size or shape.
|
|
314
|
+
- Keypoints are rotated around the center of the image.
|
|
315
|
+
|
|
316
|
+
Mathematical Details:
|
|
317
|
+
1. An angle θ is randomly sampled from the range specified by 'limit'.
|
|
318
|
+
2. The image is rotated around its center by θ degrees.
|
|
319
|
+
3. The rotation matrix R is:
|
|
320
|
+
R = [cos(θ) -sin(θ)]
|
|
321
|
+
[sin(θ) cos(θ)]
|
|
322
|
+
4. Each point (x, y) in the image is transformed to (x', y') by:
|
|
323
|
+
[x'] [cos(θ) -sin(θ)] [x - cx] [cx]
|
|
324
|
+
[y'] = [sin(θ) cos(θ)] [y - cy] + [cy]
|
|
325
|
+
where (cx, cy) is the center of the image.
|
|
326
|
+
5. If 'crop_border' is True, the image is cropped to the largest rectangle that fits inside the rotated image.
|
|
327
|
+
|
|
328
|
+
Examples:
|
|
329
|
+
>>> import numpy as np
|
|
330
|
+
>>> import albumentations as A
|
|
331
|
+
>>> # Create example data
|
|
332
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
333
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
334
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
335
|
+
>>> bbox_labels = [1, 2] # Class labels for bounding boxes
|
|
336
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
337
|
+
>>> keypoint_labels = [0, 1] # Labels for keypoints
|
|
338
|
+
>>> # Define the transform
|
|
339
|
+
>>> transform = A.Compose([
|
|
340
|
+
... A.Rotate(limit=45, p=1.0),
|
|
341
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
342
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
343
|
+
>>> # Apply the transform to all targets
|
|
344
|
+
>>> transformed = transform(
|
|
345
|
+
... image=image,
|
|
346
|
+
... mask=mask,
|
|
347
|
+
... bboxes=bboxes,
|
|
348
|
+
... bbox_labels=bbox_labels,
|
|
349
|
+
... keypoints=keypoints,
|
|
350
|
+
... keypoint_labels=keypoint_labels
|
|
351
|
+
... )
|
|
352
|
+
>>> rotated_image = transformed["image"]
|
|
353
|
+
>>> rotated_mask = transformed["mask"]
|
|
354
|
+
>>> rotated_bboxes = transformed["bboxes"]
|
|
355
|
+
>>> rotated_bbox_labels = transformed["bbox_labels"]
|
|
356
|
+
>>> rotated_keypoints = transformed["keypoints"]
|
|
357
|
+
>>> rotated_keypoint_labels = transformed["keypoint_labels"]
|
|
358
|
+
|
|
359
|
+
"""
|
|
360
|
+
|
|
361
|
+
_targets = ALL_TARGETS
|
|
362
|
+
|
|
363
|
+
class InitSchema(RotateInitSchema):
|
|
364
|
+
rotate_method: Literal["largest_box", "ellipse"]
|
|
365
|
+
crop_border: bool
|
|
366
|
+
|
|
367
|
+
fill: tuple[float, ...] | float
|
|
368
|
+
fill_mask: tuple[float, ...] | float
|
|
369
|
+
|
|
370
|
+
def __init__(
|
|
371
|
+
self,
|
|
372
|
+
limit: tuple[float, float] | float = (-90, 90),
|
|
373
|
+
interpolation: Literal[
|
|
374
|
+
cv2.INTER_NEAREST,
|
|
375
|
+
cv2.INTER_LINEAR,
|
|
376
|
+
cv2.INTER_CUBIC,
|
|
377
|
+
cv2.INTER_AREA,
|
|
378
|
+
cv2.INTER_LANCZOS4,
|
|
379
|
+
] = cv2.INTER_LINEAR,
|
|
380
|
+
border_mode: Literal[
|
|
381
|
+
cv2.BORDER_CONSTANT,
|
|
382
|
+
cv2.BORDER_REPLICATE,
|
|
383
|
+
cv2.BORDER_REFLECT,
|
|
384
|
+
cv2.BORDER_WRAP,
|
|
385
|
+
cv2.BORDER_REFLECT_101,
|
|
386
|
+
] = cv2.BORDER_CONSTANT,
|
|
387
|
+
rotate_method: Literal["largest_box", "ellipse"] = "largest_box",
|
|
388
|
+
crop_border: bool = False,
|
|
389
|
+
mask_interpolation: Literal[
|
|
390
|
+
cv2.INTER_NEAREST,
|
|
391
|
+
cv2.INTER_LINEAR,
|
|
392
|
+
cv2.INTER_CUBIC,
|
|
393
|
+
cv2.INTER_AREA,
|
|
394
|
+
cv2.INTER_LANCZOS4,
|
|
395
|
+
] = cv2.INTER_NEAREST,
|
|
396
|
+
fill: tuple[float, ...] | float = 0,
|
|
397
|
+
fill_mask: tuple[float, ...] | float = 0,
|
|
398
|
+
p: float = 0.5,
|
|
399
|
+
):
|
|
400
|
+
super().__init__(p=p)
|
|
401
|
+
self.limit = cast("tuple[float, float]", limit)
|
|
402
|
+
self.interpolation = interpolation
|
|
403
|
+
self.mask_interpolation = mask_interpolation
|
|
404
|
+
self.border_mode = border_mode
|
|
405
|
+
self.fill = fill
|
|
406
|
+
self.fill_mask = fill_mask
|
|
407
|
+
self.rotate_method = rotate_method
|
|
408
|
+
self.crop_border = crop_border
|
|
409
|
+
|
|
410
|
+
def apply(
|
|
411
|
+
self,
|
|
412
|
+
img: np.ndarray,
|
|
413
|
+
matrix: np.ndarray,
|
|
414
|
+
x_min: int,
|
|
415
|
+
x_max: int,
|
|
416
|
+
y_min: int,
|
|
417
|
+
y_max: int,
|
|
418
|
+
**params: Any,
|
|
419
|
+
) -> np.ndarray:
|
|
420
|
+
"""Apply affine transformation to the image.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
img (np.ndarray): Image to transform.
|
|
424
|
+
matrix (np.ndarray): Affine transformation matrix.
|
|
425
|
+
x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
|
|
426
|
+
x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
|
|
427
|
+
y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
|
|
428
|
+
y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
|
|
429
|
+
**params (Any): Additional parameters.
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
np.ndarray: Transformed image.
|
|
433
|
+
|
|
434
|
+
"""
|
|
435
|
+
img_out = fgeometric.warp_affine(
|
|
436
|
+
img,
|
|
437
|
+
matrix,
|
|
438
|
+
self.interpolation,
|
|
439
|
+
self.fill,
|
|
440
|
+
self.border_mode,
|
|
441
|
+
params["shape"][:2],
|
|
442
|
+
)
|
|
443
|
+
if self.crop_border:
|
|
444
|
+
return fcrops.crop(img_out, x_min, y_min, x_max, y_max)
|
|
445
|
+
return img_out
|
|
446
|
+
|
|
447
|
+
def apply_to_mask(
|
|
448
|
+
self,
|
|
449
|
+
mask: np.ndarray,
|
|
450
|
+
matrix: np.ndarray,
|
|
451
|
+
x_min: int,
|
|
452
|
+
x_max: int,
|
|
453
|
+
y_min: int,
|
|
454
|
+
y_max: int,
|
|
455
|
+
**params: Any,
|
|
456
|
+
) -> np.ndarray:
|
|
457
|
+
"""Apply affine transformation to the mask.
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
mask (np.ndarray): Mask to transform.
|
|
461
|
+
matrix (np.ndarray): Affine transformation matrix.
|
|
462
|
+
x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
|
|
463
|
+
x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
|
|
464
|
+
y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
|
|
465
|
+
y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
|
|
466
|
+
**params (Any): Additional parameters.
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
np.ndarray: Transformed mask.
|
|
470
|
+
|
|
471
|
+
"""
|
|
472
|
+
img_out = fgeometric.warp_affine(
|
|
473
|
+
mask,
|
|
474
|
+
matrix,
|
|
475
|
+
self.mask_interpolation,
|
|
476
|
+
self.fill_mask,
|
|
477
|
+
self.border_mode,
|
|
478
|
+
params["shape"][:2],
|
|
479
|
+
)
|
|
480
|
+
if self.crop_border:
|
|
481
|
+
return fcrops.crop(img_out, x_min, y_min, x_max, y_max)
|
|
482
|
+
return img_out
|
|
483
|
+
|
|
484
|
+
def apply_to_bboxes(
|
|
485
|
+
self,
|
|
486
|
+
bboxes: np.ndarray,
|
|
487
|
+
bbox_matrix: np.ndarray,
|
|
488
|
+
x_min: int,
|
|
489
|
+
x_max: int,
|
|
490
|
+
y_min: int,
|
|
491
|
+
y_max: int,
|
|
492
|
+
**params: Any,
|
|
493
|
+
) -> np.ndarray:
|
|
494
|
+
"""Apply affine transformation to bounding boxes.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
bboxes (np.ndarray): Bounding boxes to transform.
|
|
498
|
+
bbox_matrix (np.ndarray): Affine transformation matrix for bounding boxes.
|
|
499
|
+
x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
|
|
500
|
+
x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
|
|
501
|
+
y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
|
|
502
|
+
y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
|
|
503
|
+
**params (Any): Additional parameters.
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
np.ndarray: Transformed bounding boxes.
|
|
507
|
+
|
|
508
|
+
"""
|
|
509
|
+
image_shape = params["shape"][:2]
|
|
510
|
+
bboxes_out = fgeometric.bboxes_affine(
|
|
511
|
+
bboxes,
|
|
512
|
+
bbox_matrix,
|
|
513
|
+
self.rotate_method,
|
|
514
|
+
image_shape,
|
|
515
|
+
self.border_mode,
|
|
516
|
+
image_shape,
|
|
517
|
+
)
|
|
518
|
+
if self.crop_border:
|
|
519
|
+
return fcrops.crop_bboxes_by_coords(
|
|
520
|
+
bboxes_out,
|
|
521
|
+
(x_min, y_min, x_max, y_max),
|
|
522
|
+
image_shape,
|
|
523
|
+
)
|
|
524
|
+
return bboxes_out
|
|
525
|
+
|
|
526
|
+
def apply_to_keypoints(
|
|
527
|
+
self,
|
|
528
|
+
keypoints: np.ndarray,
|
|
529
|
+
matrix: np.ndarray,
|
|
530
|
+
x_min: int,
|
|
531
|
+
x_max: int,
|
|
532
|
+
y_min: int,
|
|
533
|
+
y_max: int,
|
|
534
|
+
**params: Any,
|
|
535
|
+
) -> np.ndarray:
|
|
536
|
+
"""Apply affine transformation to keypoints.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
keypoints (np.ndarray): Keypoints to transform.
|
|
540
|
+
matrix (np.ndarray): Affine transformation matrix.
|
|
541
|
+
x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
|
|
542
|
+
x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
|
|
543
|
+
y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
|
|
544
|
+
y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
|
|
545
|
+
**params (Any): Additional parameters.
|
|
546
|
+
|
|
547
|
+
Returns:
|
|
548
|
+
np.ndarray: Transformed keypoints.
|
|
549
|
+
|
|
550
|
+
"""
|
|
551
|
+
keypoints_out = fgeometric.keypoints_affine(
|
|
552
|
+
keypoints,
|
|
553
|
+
matrix,
|
|
554
|
+
params["shape"][:2],
|
|
555
|
+
scale={"x": 1, "y": 1},
|
|
556
|
+
border_mode=self.border_mode,
|
|
557
|
+
)
|
|
558
|
+
if self.crop_border:
|
|
559
|
+
return fcrops.crop_keypoints_by_coords(
|
|
560
|
+
keypoints_out,
|
|
561
|
+
(x_min, y_min, x_max, y_max),
|
|
562
|
+
)
|
|
563
|
+
return keypoints_out
|
|
564
|
+
|
|
565
|
+
@staticmethod
|
|
566
|
+
def _rotated_rect_with_max_area(
|
|
567
|
+
height: int,
|
|
568
|
+
width: int,
|
|
569
|
+
angle: float,
|
|
570
|
+
) -> dict[str, int]:
|
|
571
|
+
"""Given a rectangle of size wxh that has been rotated by 'angle' (in
|
|
572
|
+
degrees), computes the width and height of the largest possible
|
|
573
|
+
axis-aligned rectangle (maximal area) within the rotated rectangle.
|
|
574
|
+
|
|
575
|
+
References:
|
|
576
|
+
Rotate image and crop out black borders: https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
|
|
577
|
+
|
|
578
|
+
"""
|
|
579
|
+
angle = math.radians(angle)
|
|
580
|
+
width_is_longer = width >= height
|
|
581
|
+
side_long, side_short = (width, height) if width_is_longer else (height, width)
|
|
582
|
+
|
|
583
|
+
# since the solutions for angle, -angle and 180-angle are all the same,
|
|
584
|
+
# it is sufficient to look at the first quadrant and the absolute values of sin,cos:
|
|
585
|
+
sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
|
|
586
|
+
if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < SMALL_NUMBER:
|
|
587
|
+
# half constrained case: two crop corners touch the longer side,
|
|
588
|
+
# the other two corners are on the mid-line parallel to the longer line
|
|
589
|
+
x = 0.5 * side_short
|
|
590
|
+
wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
|
|
591
|
+
else:
|
|
592
|
+
# fully constrained case: crop touches all 4 sides
|
|
593
|
+
cos_2a = cos_a * cos_a - sin_a * sin_a
|
|
594
|
+
wr, hr = (
|
|
595
|
+
(width * cos_a - height * sin_a) / cos_2a,
|
|
596
|
+
(height * cos_a - width * sin_a) / cos_2a,
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
return {
|
|
600
|
+
"x_min": max(0, int(width / 2 - wr / 2)),
|
|
601
|
+
"x_max": min(width, int(width / 2 + wr / 2)),
|
|
602
|
+
"y_min": max(0, int(height / 2 - hr / 2)),
|
|
603
|
+
"y_max": min(height, int(height / 2 + hr / 2)),
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
def get_params_dependent_on_data(
|
|
607
|
+
self,
|
|
608
|
+
params: dict[str, Any],
|
|
609
|
+
data: dict[str, Any],
|
|
610
|
+
) -> dict[str, Any]:
|
|
611
|
+
"""Get parameters dependent on the data.
|
|
612
|
+
|
|
613
|
+
Args:
|
|
614
|
+
params (dict[str, Any]): Dictionary containing parameters.
|
|
615
|
+
data (dict[str, Any]): Dictionary containing data.
|
|
616
|
+
|
|
617
|
+
Returns:
|
|
618
|
+
dict[str, Any]: Dictionary with parameters for transformation.
|
|
619
|
+
|
|
620
|
+
"""
|
|
621
|
+
angle = self.py_random.uniform(*self.limit)
|
|
622
|
+
|
|
623
|
+
if self.crop_border:
|
|
624
|
+
height, width = params["shape"][:2]
|
|
625
|
+
out_params = self._rotated_rect_with_max_area(height, width, angle)
|
|
626
|
+
else:
|
|
627
|
+
out_params = {"x_min": -1, "x_max": -1, "y_min": -1, "y_max": -1}
|
|
628
|
+
|
|
629
|
+
center = fgeometric.center(params["shape"][:2])
|
|
630
|
+
bbox_center = fgeometric.center_bbox(params["shape"][:2])
|
|
631
|
+
|
|
632
|
+
translate: dict[str, int] = {"x": 0, "y": 0}
|
|
633
|
+
shear: dict[str, float] = {"x": 0, "y": 0}
|
|
634
|
+
scale: dict[str, float] = {"x": 1, "y": 1}
|
|
635
|
+
rotate = angle
|
|
636
|
+
|
|
637
|
+
matrix = fgeometric.create_affine_transformation_matrix(
|
|
638
|
+
translate,
|
|
639
|
+
shear,
|
|
640
|
+
scale,
|
|
641
|
+
rotate,
|
|
642
|
+
center,
|
|
643
|
+
)
|
|
644
|
+
bbox_matrix = fgeometric.create_affine_transformation_matrix(
|
|
645
|
+
translate,
|
|
646
|
+
shear,
|
|
647
|
+
scale,
|
|
648
|
+
rotate,
|
|
649
|
+
bbox_center,
|
|
650
|
+
)
|
|
651
|
+
out_params["matrix"] = matrix
|
|
652
|
+
out_params["bbox_matrix"] = bbox_matrix
|
|
653
|
+
|
|
654
|
+
return out_params
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
class SafeRotate(Affine):
|
|
658
|
+
"""Rotate the input inside the input's frame by an angle selected randomly from the uniform distribution.
|
|
659
|
+
|
|
660
|
+
This transformation ensures that the entire rotated image fits within the original frame by scaling it
|
|
661
|
+
down if necessary. The resulting image maintains its original dimensions but may contain artifacts due to the
|
|
662
|
+
rotation and scaling process.
|
|
663
|
+
|
|
664
|
+
Args:
|
|
665
|
+
limit (float | tuple[float, float]): Range from which a random angle is picked. If limit is a single float,
|
|
666
|
+
an angle is picked from (-limit, limit). Default: (-90, 90)
|
|
667
|
+
interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
|
|
668
|
+
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
669
|
+
Default: cv2.INTER_LINEAR.
|
|
670
|
+
border_mode (OpenCV flag): Flag that is used to specify the pixel extrapolation method. Should be one of:
|
|
671
|
+
cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
|
|
672
|
+
Default: cv2.BORDER_REFLECT_101
|
|
673
|
+
fill (tuple[float, float] | float): Padding value if border_mode is cv2.BORDER_CONSTANT.
|
|
674
|
+
fill_mask (tuple[float, float] | float): Padding value if border_mode is cv2.BORDER_CONSTANT applied
|
|
675
|
+
for masks.
|
|
676
|
+
rotate_method (Literal["largest_box", "ellipse"]): Method to rotate bounding boxes.
|
|
677
|
+
Should be 'largest_box' or 'ellipse'. Default: 'largest_box'
|
|
678
|
+
mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
|
|
679
|
+
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
|
|
680
|
+
Default: cv2.INTER_NEAREST.
|
|
681
|
+
p (float): Probability of applying the transform. Default: 0.5.
|
|
682
|
+
|
|
683
|
+
Targets:
|
|
684
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
685
|
+
|
|
686
|
+
Image types:
|
|
687
|
+
uint8, float32
|
|
688
|
+
|
|
689
|
+
Note:
|
|
690
|
+
- The rotation is performed around the center of the image.
|
|
691
|
+
- After rotation, the image is scaled to fit within the original frame, which may cause some distortion.
|
|
692
|
+
- The output image will always have the same dimensions as the input image.
|
|
693
|
+
- Bounding boxes and keypoints are transformed along with the image.
|
|
694
|
+
|
|
695
|
+
Mathematical Details:
|
|
696
|
+
1. An angle θ is randomly sampled from the range specified by 'limit'.
|
|
697
|
+
2. The image is rotated around its center by θ degrees.
|
|
698
|
+
3. The rotation matrix R is:
|
|
699
|
+
R = [cos(θ) -sin(θ)]
|
|
700
|
+
[sin(θ) cos(θ)]
|
|
701
|
+
4. The scaling factor s is calculated to ensure the rotated image fits within the original frame:
|
|
702
|
+
s = min(width / (width * |cos(θ)| + height * |sin(θ)|),
|
|
703
|
+
height / (width * |sin(θ)| + height * |cos(θ)|))
|
|
704
|
+
5. The combined transformation matrix T is:
|
|
705
|
+
T = [s*cos(θ) -s*sin(θ) tx]
|
|
706
|
+
[s*sin(θ) s*cos(θ) ty]
|
|
707
|
+
where tx and ty are translation factors to keep the image centered.
|
|
708
|
+
6. Each point (x, y) in the image is transformed to (x', y') by:
|
|
709
|
+
[x'] [s*cos(θ) s*sin(θ)] [x - cx] [cx]
|
|
710
|
+
[y'] = [-s*sin(θ) s*cos(θ)] [y - cy] + [cy]
|
|
711
|
+
where (cx, cy) is the center of the image.
|
|
712
|
+
|
|
713
|
+
Examples:
|
|
714
|
+
>>> import numpy as np
|
|
715
|
+
>>> import albumentations as A
|
|
716
|
+
>>> # Create example data
|
|
717
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
718
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
719
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
720
|
+
>>> bbox_labels = [1, 2] # Class labels for bounding boxes
|
|
721
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
722
|
+
>>> keypoint_labels = [0, 1] # Labels for keypoints
|
|
723
|
+
>>> # Define the transform
|
|
724
|
+
>>> transform = A.Compose([
|
|
725
|
+
... A.SafeRotate(limit=45, p=1.0),
|
|
726
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
727
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
728
|
+
>>> # Apply the transform to all targets
|
|
729
|
+
>>> transformed = transform(
|
|
730
|
+
... image=image,
|
|
731
|
+
... mask=mask,
|
|
732
|
+
... bboxes=bboxes,
|
|
733
|
+
... bbox_labels=bbox_labels,
|
|
734
|
+
... keypoints=keypoints,
|
|
735
|
+
... keypoint_labels=keypoint_labels
|
|
736
|
+
... )
|
|
737
|
+
>>> rotated_image = transformed["image"]
|
|
738
|
+
>>> rotated_mask = transformed["mask"]
|
|
739
|
+
>>> rotated_bboxes = transformed["bboxes"]
|
|
740
|
+
>>> rotated_bbox_labels = transformed["bbox_labels"]
|
|
741
|
+
>>> rotated_keypoints = transformed["keypoints"]
|
|
742
|
+
>>> rotated_keypoint_labels = transformed["keypoint_labels"]
|
|
743
|
+
|
|
744
|
+
"""
|
|
745
|
+
|
|
746
|
+
_targets = ALL_TARGETS
|
|
747
|
+
|
|
748
|
+
class InitSchema(RotateInitSchema):
|
|
749
|
+
rotate_method: Literal["largest_box", "ellipse"]
|
|
750
|
+
|
|
751
|
+
def __init__(
|
|
752
|
+
self,
|
|
753
|
+
limit: tuple[float, float] | float = (-90, 90),
|
|
754
|
+
interpolation: Literal[
|
|
755
|
+
cv2.INTER_NEAREST,
|
|
756
|
+
cv2.INTER_LINEAR,
|
|
757
|
+
cv2.INTER_CUBIC,
|
|
758
|
+
cv2.INTER_AREA,
|
|
759
|
+
cv2.INTER_LANCZOS4,
|
|
760
|
+
] = cv2.INTER_LINEAR,
|
|
761
|
+
border_mode: Literal[
|
|
762
|
+
cv2.BORDER_CONSTANT,
|
|
763
|
+
cv2.BORDER_REPLICATE,
|
|
764
|
+
cv2.BORDER_REFLECT,
|
|
765
|
+
cv2.BORDER_WRAP,
|
|
766
|
+
cv2.BORDER_REFLECT_101,
|
|
767
|
+
] = cv2.BORDER_CONSTANT,
|
|
768
|
+
rotate_method: Literal["largest_box", "ellipse"] = "largest_box",
|
|
769
|
+
mask_interpolation: Literal[
|
|
770
|
+
cv2.INTER_NEAREST,
|
|
771
|
+
cv2.INTER_LINEAR,
|
|
772
|
+
cv2.INTER_CUBIC,
|
|
773
|
+
cv2.INTER_AREA,
|
|
774
|
+
cv2.INTER_LANCZOS4,
|
|
775
|
+
] = cv2.INTER_NEAREST,
|
|
776
|
+
fill: tuple[float, ...] | float = 0,
|
|
777
|
+
fill_mask: tuple[float, ...] | float = 0,
|
|
778
|
+
p: float = 0.5,
|
|
779
|
+
):
|
|
780
|
+
super().__init__(
|
|
781
|
+
rotate=limit,
|
|
782
|
+
interpolation=interpolation,
|
|
783
|
+
border_mode=border_mode,
|
|
784
|
+
fill=fill,
|
|
785
|
+
fill_mask=fill_mask,
|
|
786
|
+
rotate_method=rotate_method,
|
|
787
|
+
fit_output=True,
|
|
788
|
+
mask_interpolation=mask_interpolation,
|
|
789
|
+
p=p,
|
|
790
|
+
)
|
|
791
|
+
self.limit = cast("tuple[float, float]", limit)
|
|
792
|
+
|
|
793
|
+
def _create_safe_rotate_matrix(
|
|
794
|
+
self,
|
|
795
|
+
angle: float,
|
|
796
|
+
center: tuple[float, float],
|
|
797
|
+
image_shape: tuple[int, int],
|
|
798
|
+
) -> tuple[np.ndarray, dict[str, float]]:
|
|
799
|
+
height, width = image_shape[:2]
|
|
800
|
+
rotation_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
801
|
+
|
|
802
|
+
# Calculate new image size
|
|
803
|
+
abs_cos = abs(rotation_mat[0, 0])
|
|
804
|
+
abs_sin = abs(rotation_mat[0, 1])
|
|
805
|
+
new_w = int(height * abs_sin + width * abs_cos)
|
|
806
|
+
new_h = int(height * abs_cos + width * abs_sin)
|
|
807
|
+
|
|
808
|
+
# Adjust the rotation matrix to take into account the new size
|
|
809
|
+
rotation_mat[0, 2] += new_w / 2 - center[0]
|
|
810
|
+
rotation_mat[1, 2] += new_h / 2 - center[1]
|
|
811
|
+
|
|
812
|
+
# Calculate scaling factors
|
|
813
|
+
scale_x = width / new_w
|
|
814
|
+
scale_y = height / new_h
|
|
815
|
+
|
|
816
|
+
# Create scaling matrix
|
|
817
|
+
scale_mat = np.array([[scale_x, 0, 0], [0, scale_y, 0], [0, 0, 1]])
|
|
818
|
+
|
|
819
|
+
# Combine rotation and scaling
|
|
820
|
+
matrix = scale_mat @ np.vstack([rotation_mat, [0, 0, 1]])
|
|
821
|
+
|
|
822
|
+
return matrix, {"x": scale_x, "y": scale_y}
|
|
823
|
+
|
|
824
|
+
def get_params_dependent_on_data(
|
|
825
|
+
self,
|
|
826
|
+
params: dict[str, Any],
|
|
827
|
+
data: dict[str, Any],
|
|
828
|
+
) -> dict[str, Any]:
|
|
829
|
+
"""Get parameters dependent on the data.
|
|
830
|
+
|
|
831
|
+
Args:
|
|
832
|
+
params (dict[str, Any]): Dictionary containing parameters.
|
|
833
|
+
data (dict[str, Any]): Dictionary containing data.
|
|
834
|
+
|
|
835
|
+
Returns:
|
|
836
|
+
dict[str, Any]: Dictionary with parameters for transformation.
|
|
837
|
+
|
|
838
|
+
"""
|
|
839
|
+
image_shape = params["shape"][:2]
|
|
840
|
+
angle = self.py_random.uniform(*self.limit)
|
|
841
|
+
|
|
842
|
+
# Calculate centers for image and bbox
|
|
843
|
+
image_center = fgeometric.center(image_shape)
|
|
844
|
+
bbox_center = fgeometric.center_bbox(image_shape)
|
|
845
|
+
|
|
846
|
+
# Create matrices for image and bbox
|
|
847
|
+
matrix, scale = self._create_safe_rotate_matrix(
|
|
848
|
+
angle,
|
|
849
|
+
image_center,
|
|
850
|
+
image_shape,
|
|
851
|
+
)
|
|
852
|
+
bbox_matrix, _ = self._create_safe_rotate_matrix(
|
|
853
|
+
angle,
|
|
854
|
+
bbox_center,
|
|
855
|
+
image_shape,
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
return {
|
|
859
|
+
"rotate": angle,
|
|
860
|
+
"scale": scale,
|
|
861
|
+
"matrix": matrix,
|
|
862
|
+
"bbox_matrix": bbox_matrix,
|
|
863
|
+
"output_shape": image_shape,
|
|
864
|
+
}
|