nrtk-albumentations 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nrtk-albumentations might be problematic. Click here for more details.
- albumentations/__init__.py +21 -0
- albumentations/augmentations/__init__.py +23 -0
- albumentations/augmentations/blur/__init__.py +0 -0
- albumentations/augmentations/blur/functional.py +438 -0
- albumentations/augmentations/blur/transforms.py +1633 -0
- albumentations/augmentations/crops/__init__.py +0 -0
- albumentations/augmentations/crops/functional.py +494 -0
- albumentations/augmentations/crops/transforms.py +3647 -0
- albumentations/augmentations/dropout/__init__.py +0 -0
- albumentations/augmentations/dropout/channel_dropout.py +134 -0
- albumentations/augmentations/dropout/coarse_dropout.py +567 -0
- albumentations/augmentations/dropout/functional.py +1017 -0
- albumentations/augmentations/dropout/grid_dropout.py +166 -0
- albumentations/augmentations/dropout/mask_dropout.py +274 -0
- albumentations/augmentations/dropout/transforms.py +461 -0
- albumentations/augmentations/dropout/xy_masking.py +186 -0
- albumentations/augmentations/geometric/__init__.py +0 -0
- albumentations/augmentations/geometric/distortion.py +1238 -0
- albumentations/augmentations/geometric/flip.py +752 -0
- albumentations/augmentations/geometric/functional.py +4151 -0
- albumentations/augmentations/geometric/pad.py +676 -0
- albumentations/augmentations/geometric/resize.py +956 -0
- albumentations/augmentations/geometric/rotate.py +864 -0
- albumentations/augmentations/geometric/transforms.py +1962 -0
- albumentations/augmentations/mixing/__init__.py +0 -0
- albumentations/augmentations/mixing/domain_adaptation.py +787 -0
- albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
- albumentations/augmentations/mixing/functional.py +878 -0
- albumentations/augmentations/mixing/transforms.py +832 -0
- albumentations/augmentations/other/__init__.py +0 -0
- albumentations/augmentations/other/lambda_transform.py +180 -0
- albumentations/augmentations/other/type_transform.py +261 -0
- albumentations/augmentations/pixel/__init__.py +0 -0
- albumentations/augmentations/pixel/functional.py +4226 -0
- albumentations/augmentations/pixel/transforms.py +7556 -0
- albumentations/augmentations/spectrogram/__init__.py +0 -0
- albumentations/augmentations/spectrogram/transform.py +220 -0
- albumentations/augmentations/text/__init__.py +0 -0
- albumentations/augmentations/text/functional.py +272 -0
- albumentations/augmentations/text/transforms.py +299 -0
- albumentations/augmentations/transforms3d/__init__.py +0 -0
- albumentations/augmentations/transforms3d/functional.py +393 -0
- albumentations/augmentations/transforms3d/transforms.py +1422 -0
- albumentations/augmentations/utils.py +249 -0
- albumentations/core/__init__.py +0 -0
- albumentations/core/bbox_utils.py +920 -0
- albumentations/core/composition.py +1885 -0
- albumentations/core/hub_mixin.py +299 -0
- albumentations/core/keypoints_utils.py +521 -0
- albumentations/core/label_manager.py +339 -0
- albumentations/core/pydantic.py +239 -0
- albumentations/core/serialization.py +352 -0
- albumentations/core/transforms_interface.py +976 -0
- albumentations/core/type_definitions.py +127 -0
- albumentations/core/utils.py +605 -0
- albumentations/core/validation.py +129 -0
- albumentations/pytorch/__init__.py +1 -0
- albumentations/pytorch/transforms.py +189 -0
- nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
- nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
- nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
- nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,676 @@
|
|
|
1
|
+
"""Padding transformations for images and related data.
|
|
2
|
+
|
|
3
|
+
This module provides transformations for padding images and associated data. Padding is the process
|
|
4
|
+
of adding pixels to the borders of an image to increase its dimensions. Common use cases include:
|
|
5
|
+
|
|
6
|
+
- Ensuring uniform sizes for model inputs in a batch
|
|
7
|
+
- Making image dimensions divisible by specific values (often required by CNNs)
|
|
8
|
+
- Creating space around an image for annotations or visual purposes
|
|
9
|
+
- Standardizing data dimensions for processing pipelines
|
|
10
|
+
|
|
11
|
+
Padding transformations in this module support various border modes (constant, reflection, replication)
|
|
12
|
+
and properly handle all target types including images, masks, bounding boxes, and keypoints.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from numbers import Real
|
|
18
|
+
from typing import Any, Literal
|
|
19
|
+
|
|
20
|
+
import cv2
|
|
21
|
+
import numpy as np
|
|
22
|
+
from pydantic import (
|
|
23
|
+
Field,
|
|
24
|
+
model_validator,
|
|
25
|
+
)
|
|
26
|
+
from typing_extensions import Self
|
|
27
|
+
|
|
28
|
+
from albumentations.core.bbox_utils import (
|
|
29
|
+
denormalize_bboxes,
|
|
30
|
+
normalize_bboxes,
|
|
31
|
+
)
|
|
32
|
+
from albumentations.core.transforms_interface import (
|
|
33
|
+
BaseTransformInitSchema,
|
|
34
|
+
DualTransform,
|
|
35
|
+
)
|
|
36
|
+
from albumentations.core.type_definitions import ALL_TARGETS
|
|
37
|
+
|
|
38
|
+
from . import functional as fgeometric
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
"Pad",
|
|
42
|
+
"PadIfNeeded",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
NUM_PADS_XY = 2
|
|
46
|
+
NUM_PADS_ALL_SIDES = 4
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Pad(DualTransform):
|
|
50
|
+
"""Pad the sides of an image by specified number of pixels.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
padding (int, tuple[int, int] or tuple[int, int, int, int]): Padding values. Can be:
|
|
54
|
+
* int - pad all sides by this value
|
|
55
|
+
* tuple[int, int] - (pad_x, pad_y) to pad left/right by pad_x and top/bottom by pad_y
|
|
56
|
+
* tuple[int, int, int, int] - (left, top, right, bottom) specific padding per side
|
|
57
|
+
fill (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT
|
|
58
|
+
fill_mask (tuple[float, ...] | float): Padding value for mask if border_mode is cv2.BORDER_CONSTANT
|
|
59
|
+
border_mode (OpenCV flag): OpenCV border mode
|
|
60
|
+
p (float): probability of applying the transform. Default: 1.0.
|
|
61
|
+
|
|
62
|
+
Targets:
|
|
63
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
64
|
+
|
|
65
|
+
Image types:
|
|
66
|
+
uint8, float32
|
|
67
|
+
|
|
68
|
+
References:
|
|
69
|
+
PyTorch Pad: https://pytorch.org/vision/main/generated/torchvision.transforms.v2.Pad.html
|
|
70
|
+
|
|
71
|
+
Examples:
|
|
72
|
+
>>> import numpy as np
|
|
73
|
+
>>> import albumentations as A
|
|
74
|
+
>>> import cv2
|
|
75
|
+
>>>
|
|
76
|
+
>>> # Prepare sample data
|
|
77
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
78
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
79
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
80
|
+
>>> bbox_labels = [1, 2]
|
|
81
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
82
|
+
>>> keypoint_labels = [0, 1]
|
|
83
|
+
>>>
|
|
84
|
+
>>> # Example 1: Pad all sides by the same value
|
|
85
|
+
>>> transform = A.Compose([
|
|
86
|
+
... A.Pad(padding=20, border_mode=cv2.BORDER_CONSTANT, fill=0),
|
|
87
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
88
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
89
|
+
>>>
|
|
90
|
+
>>> # Apply the transform
|
|
91
|
+
>>> padded = transform(
|
|
92
|
+
... image=image,
|
|
93
|
+
... mask=mask,
|
|
94
|
+
... bboxes=bboxes,
|
|
95
|
+
... bbox_labels=bbox_labels,
|
|
96
|
+
... keypoints=keypoints,
|
|
97
|
+
... keypoint_labels=keypoint_labels
|
|
98
|
+
... )
|
|
99
|
+
>>>
|
|
100
|
+
>>> # Get the padded data
|
|
101
|
+
>>> padded_image = padded['image'] # Shape will be (140, 140, 3)
|
|
102
|
+
>>> padded_mask = padded['mask'] # Shape will be (140, 140)
|
|
103
|
+
>>> padded_bboxes = padded['bboxes'] # Bounding boxes coordinates adjusted to the padded image
|
|
104
|
+
>>> padded_keypoints = padded['keypoints'] # Keypoints coordinates adjusted to the padded image
|
|
105
|
+
>>>
|
|
106
|
+
>>> # Example 2: Different padding for sides using (pad_x, pad_y)
|
|
107
|
+
>>> transform_xy = A.Compose([
|
|
108
|
+
... A.Pad(
|
|
109
|
+
... padding=(10, 30), # 10px padding on left/right, 30px on top/bottom
|
|
110
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
111
|
+
... fill=128 # Gray padding color
|
|
112
|
+
... ),
|
|
113
|
+
... ])
|
|
114
|
+
>>>
|
|
115
|
+
>>> padded_xy = transform_xy(image=image)
|
|
116
|
+
>>> padded_xy_image = padded_xy['image'] # Shape will be (160, 120, 3)
|
|
117
|
+
>>>
|
|
118
|
+
>>> # Example 3: Different padding for each side
|
|
119
|
+
>>> transform_sides = A.Compose([
|
|
120
|
+
... A.Pad(
|
|
121
|
+
... padding=(5, 10, 15, 20), # (left, top, right, bottom)
|
|
122
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
123
|
+
... fill=0,
|
|
124
|
+
... fill_mask=0
|
|
125
|
+
... ),
|
|
126
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
|
|
127
|
+
>>>
|
|
128
|
+
>>> padded_sides = transform_sides(
|
|
129
|
+
... image=image,
|
|
130
|
+
... mask=mask,
|
|
131
|
+
... bboxes=bboxes,
|
|
132
|
+
... bbox_labels=bbox_labels
|
|
133
|
+
... )
|
|
134
|
+
>>>
|
|
135
|
+
>>> padded_sides_image = padded_sides['image'] # Shape will be (130, 120, 3)
|
|
136
|
+
>>> padded_sides_bboxes = padded_sides['bboxes'] # Bounding boxes adjusted to the new coordinates
|
|
137
|
+
>>>
|
|
138
|
+
>>> # Example 4: Using different border_mode options
|
|
139
|
+
>>> # Create a smaller image for better visualization of reflection/wrapping
|
|
140
|
+
>>> small_image = np.random.randint(0, 256, (10, 10, 3), dtype=np.uint8)
|
|
141
|
+
>>>
|
|
142
|
+
>>> # Reflection padding
|
|
143
|
+
>>> reflect_pad = A.Compose([
|
|
144
|
+
... A.Pad(padding=5, border_mode=cv2.BORDER_REFLECT_101),
|
|
145
|
+
... ])
|
|
146
|
+
>>> reflected = reflect_pad(image=small_image)
|
|
147
|
+
>>> reflected_image = reflected['image'] # Shape will be (20, 20, 3) with reflected edges
|
|
148
|
+
>>>
|
|
149
|
+
>>> # Replicate padding
|
|
150
|
+
>>> replicate_pad = A.Compose([
|
|
151
|
+
... A.Pad(padding=5, border_mode=cv2.BORDER_REPLICATE),
|
|
152
|
+
... ])
|
|
153
|
+
>>> replicated = replicate_pad(image=small_image)
|
|
154
|
+
>>> replicated_image = replicated['image'] # Shape will be (20, 20, 3) with replicated edges
|
|
155
|
+
>>>
|
|
156
|
+
>>> # Example 5: Padding with masks and constant border mode
|
|
157
|
+
>>> binary_mask = np.zeros((50, 50), dtype=np.uint8)
|
|
158
|
+
>>> binary_mask[10:40, 10:40] = 1 # Set center region to 1
|
|
159
|
+
>>>
|
|
160
|
+
>>> mask_transform = A.Compose([
|
|
161
|
+
... A.Pad(
|
|
162
|
+
... padding=10,
|
|
163
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
164
|
+
... fill=0, # Black padding for image
|
|
165
|
+
... fill_mask=0 # Use 0 for mask padding (background)
|
|
166
|
+
... ),
|
|
167
|
+
... ])
|
|
168
|
+
>>>
|
|
169
|
+
>>> padded_mask_result = mask_transform(image=image, mask=binary_mask)
|
|
170
|
+
>>> padded_binary_mask = padded_mask_result['mask'] # Shape will be (70, 70)
|
|
171
|
+
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
_targets = ALL_TARGETS
|
|
175
|
+
|
|
176
|
+
class InitSchema(BaseTransformInitSchema):
|
|
177
|
+
padding: int | tuple[int, int] | tuple[int, int, int, int]
|
|
178
|
+
fill: tuple[float, ...] | float
|
|
179
|
+
fill_mask: tuple[float, ...] | float
|
|
180
|
+
border_mode: Literal[
|
|
181
|
+
cv2.BORDER_CONSTANT,
|
|
182
|
+
cv2.BORDER_REPLICATE,
|
|
183
|
+
cv2.BORDER_REFLECT,
|
|
184
|
+
cv2.BORDER_WRAP,
|
|
185
|
+
cv2.BORDER_REFLECT_101,
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
def __init__(
|
|
189
|
+
self,
|
|
190
|
+
padding: int | tuple[int, int] | tuple[int, int, int, int] = 0,
|
|
191
|
+
fill: tuple[float, ...] | float = 0,
|
|
192
|
+
fill_mask: tuple[float, ...] | float = 0,
|
|
193
|
+
border_mode: Literal[
|
|
194
|
+
cv2.BORDER_CONSTANT,
|
|
195
|
+
cv2.BORDER_REPLICATE,
|
|
196
|
+
cv2.BORDER_REFLECT,
|
|
197
|
+
cv2.BORDER_WRAP,
|
|
198
|
+
cv2.BORDER_REFLECT_101,
|
|
199
|
+
] = cv2.BORDER_CONSTANT,
|
|
200
|
+
p: float = 1.0,
|
|
201
|
+
):
|
|
202
|
+
super().__init__(p=p)
|
|
203
|
+
self.padding = padding
|
|
204
|
+
self.fill = fill
|
|
205
|
+
self.fill_mask = fill_mask
|
|
206
|
+
self.border_mode = border_mode
|
|
207
|
+
|
|
208
|
+
def apply(
|
|
209
|
+
self,
|
|
210
|
+
img: np.ndarray,
|
|
211
|
+
pad_top: int,
|
|
212
|
+
pad_bottom: int,
|
|
213
|
+
pad_left: int,
|
|
214
|
+
pad_right: int,
|
|
215
|
+
**params: Any,
|
|
216
|
+
) -> np.ndarray:
|
|
217
|
+
"""Apply the Pad transform to an image.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
img (np.ndarray): Image to be transformed.
|
|
221
|
+
pad_top (int): Top padding.
|
|
222
|
+
pad_bottom (int): Bottom padding.
|
|
223
|
+
pad_left (int): Left padding.
|
|
224
|
+
pad_right (int): Right padding.
|
|
225
|
+
**params (Any): Additional parameters.
|
|
226
|
+
|
|
227
|
+
"""
|
|
228
|
+
return fgeometric.pad_with_params(
|
|
229
|
+
img,
|
|
230
|
+
pad_top,
|
|
231
|
+
pad_bottom,
|
|
232
|
+
pad_left,
|
|
233
|
+
pad_right,
|
|
234
|
+
border_mode=self.border_mode,
|
|
235
|
+
value=self.fill,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def apply_to_mask(
|
|
239
|
+
self,
|
|
240
|
+
mask: np.ndarray,
|
|
241
|
+
pad_top: int,
|
|
242
|
+
pad_bottom: int,
|
|
243
|
+
pad_left: int,
|
|
244
|
+
pad_right: int,
|
|
245
|
+
**params: Any,
|
|
246
|
+
) -> np.ndarray:
|
|
247
|
+
"""Apply the Pad transform to a mask.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
mask (np.ndarray): Mask to be transformed.
|
|
251
|
+
pad_top (int): Top padding.
|
|
252
|
+
pad_bottom (int): Bottom padding.
|
|
253
|
+
pad_left (int): Left padding.
|
|
254
|
+
pad_right (int): Right padding.
|
|
255
|
+
**params (Any): Additional parameters.
|
|
256
|
+
|
|
257
|
+
"""
|
|
258
|
+
return fgeometric.pad_with_params(
|
|
259
|
+
mask,
|
|
260
|
+
pad_top,
|
|
261
|
+
pad_bottom,
|
|
262
|
+
pad_left,
|
|
263
|
+
pad_right,
|
|
264
|
+
border_mode=self.border_mode,
|
|
265
|
+
value=self.fill_mask,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def apply_to_bboxes(
|
|
269
|
+
self,
|
|
270
|
+
bboxes: np.ndarray,
|
|
271
|
+
pad_top: int,
|
|
272
|
+
pad_bottom: int,
|
|
273
|
+
pad_left: int,
|
|
274
|
+
pad_right: int,
|
|
275
|
+
**params: Any,
|
|
276
|
+
) -> np.ndarray:
|
|
277
|
+
"""Apply the Pad transform to bounding boxes.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
bboxes (np.ndarray): Bounding boxes to be transformed.
|
|
281
|
+
pad_top (int): Top padding.
|
|
282
|
+
pad_bottom (int): Bottom padding.
|
|
283
|
+
pad_left (int): Left padding.
|
|
284
|
+
pad_right (int): Right padding.
|
|
285
|
+
**params (Any): Additional parameters.
|
|
286
|
+
|
|
287
|
+
"""
|
|
288
|
+
image_shape = params["shape"][:2]
|
|
289
|
+
bboxes_np = denormalize_bboxes(bboxes, params["shape"])
|
|
290
|
+
|
|
291
|
+
result = fgeometric.pad_bboxes(
|
|
292
|
+
bboxes_np,
|
|
293
|
+
pad_top,
|
|
294
|
+
pad_bottom,
|
|
295
|
+
pad_left,
|
|
296
|
+
pad_right,
|
|
297
|
+
self.border_mode,
|
|
298
|
+
image_shape=image_shape,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
rows, cols = params["shape"][:2]
|
|
302
|
+
return normalize_bboxes(
|
|
303
|
+
result,
|
|
304
|
+
(rows + pad_top + pad_bottom, cols + pad_left + pad_right),
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
def apply_to_keypoints(
|
|
308
|
+
self,
|
|
309
|
+
keypoints: np.ndarray,
|
|
310
|
+
pad_top: int,
|
|
311
|
+
pad_bottom: int,
|
|
312
|
+
pad_left: int,
|
|
313
|
+
pad_right: int,
|
|
314
|
+
**params: Any,
|
|
315
|
+
) -> np.ndarray:
|
|
316
|
+
"""Apply the Pad transform to keypoints.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
keypoints (np.ndarray): Keypoints to be transformed.
|
|
320
|
+
pad_top (int): Top padding.
|
|
321
|
+
pad_bottom (int): Bottom padding.
|
|
322
|
+
pad_left (int): Left padding.
|
|
323
|
+
pad_right (int): Right padding.
|
|
324
|
+
**params (Any): Additional parameters.
|
|
325
|
+
|
|
326
|
+
"""
|
|
327
|
+
return fgeometric.pad_keypoints(
|
|
328
|
+
keypoints,
|
|
329
|
+
pad_top,
|
|
330
|
+
pad_bottom,
|
|
331
|
+
pad_left,
|
|
332
|
+
pad_right,
|
|
333
|
+
self.border_mode,
|
|
334
|
+
image_shape=params["shape"][:2],
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
def apply_to_images(
|
|
338
|
+
self,
|
|
339
|
+
images: np.ndarray,
|
|
340
|
+
pad_top: int,
|
|
341
|
+
pad_bottom: int,
|
|
342
|
+
pad_left: int,
|
|
343
|
+
pad_right: int,
|
|
344
|
+
**params: Any,
|
|
345
|
+
) -> np.ndarray:
|
|
346
|
+
"""Apply the Pad transform to a batch of images.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
images (np.ndarray): Batch of images to be transformed.
|
|
350
|
+
pad_top (int): Top padding.
|
|
351
|
+
pad_bottom (int): Bottom padding.
|
|
352
|
+
pad_left (int): Left padding.
|
|
353
|
+
pad_right (int): Right padding.
|
|
354
|
+
**params (Any): Additional parameters.
|
|
355
|
+
|
|
356
|
+
"""
|
|
357
|
+
return fgeometric.pad_images_with_params(
|
|
358
|
+
images,
|
|
359
|
+
pad_top,
|
|
360
|
+
pad_bottom,
|
|
361
|
+
pad_left,
|
|
362
|
+
pad_right,
|
|
363
|
+
border_mode=self.border_mode,
|
|
364
|
+
value=self.fill,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
def get_params_dependent_on_data(
|
|
368
|
+
self,
|
|
369
|
+
params: dict[str, Any],
|
|
370
|
+
data: dict[str, Any],
|
|
371
|
+
) -> dict[str, Any]:
|
|
372
|
+
"""Get the parameters dependent on the data.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
params (dict[str, Any]): Parameters.
|
|
376
|
+
data (dict[str, Any]): Data.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
dict[str, Any]: Parameters.
|
|
380
|
+
|
|
381
|
+
"""
|
|
382
|
+
if isinstance(self.padding, Real):
|
|
383
|
+
pad_top = pad_bottom = pad_left = pad_right = self.padding
|
|
384
|
+
elif isinstance(self.padding, (tuple, list)):
|
|
385
|
+
if len(self.padding) == NUM_PADS_XY:
|
|
386
|
+
pad_left = pad_right = self.padding[0]
|
|
387
|
+
pad_top = pad_bottom = self.padding[1]
|
|
388
|
+
elif len(self.padding) == NUM_PADS_ALL_SIDES:
|
|
389
|
+
pad_left, pad_top, pad_right, pad_bottom = self.padding # type: ignore[misc]
|
|
390
|
+
else:
|
|
391
|
+
raise TypeError(
|
|
392
|
+
"Padding must be a single number, a pair of numbers, or a quadruple of numbers",
|
|
393
|
+
)
|
|
394
|
+
else:
|
|
395
|
+
raise TypeError(
|
|
396
|
+
"Padding must be a single number, a pair of numbers, or a quadruple of numbers",
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
return {
|
|
400
|
+
"pad_top": pad_top,
|
|
401
|
+
"pad_bottom": pad_bottom,
|
|
402
|
+
"pad_left": pad_left,
|
|
403
|
+
"pad_right": pad_right,
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
class PadIfNeeded(Pad):
|
|
408
|
+
"""Pads the sides of an image if the image dimensions are less than the specified minimum dimensions.
|
|
409
|
+
If the `pad_height_divisor` or `pad_width_divisor` is specified, the function additionally ensures
|
|
410
|
+
that the image dimensions are divisible by these values.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
min_height (int | None): Minimum desired height of the image. Ensures image height is at least this value.
|
|
414
|
+
If not specified, pad_height_divisor must be provided.
|
|
415
|
+
min_width (int | None): Minimum desired width of the image. Ensures image width is at least this value.
|
|
416
|
+
If not specified, pad_width_divisor must be provided.
|
|
417
|
+
pad_height_divisor (int | None): If set, pads the image height to make it divisible by this value.
|
|
418
|
+
If not specified, min_height must be provided.
|
|
419
|
+
pad_width_divisor (int | None): If set, pads the image width to make it divisible by this value.
|
|
420
|
+
If not specified, min_width must be provided.
|
|
421
|
+
position (Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"]):
|
|
422
|
+
Position where the image is to be placed after padding. Default is 'center'.
|
|
423
|
+
border_mode (int): Specifies the border mode to use if padding is required.
|
|
424
|
+
The default is `cv2.BORDER_CONSTANT`.
|
|
425
|
+
fill (tuple[float, ...] | float | None): Value to fill the border pixels if the border mode
|
|
426
|
+
is `cv2.BORDER_CONSTANT`. Default is None.
|
|
427
|
+
fill_mask (tuple[float, ...] | float | None): Similar to `fill` but used for padding masks. Default is None.
|
|
428
|
+
p (float): Probability of applying the transform. Default is 1.0.
|
|
429
|
+
|
|
430
|
+
Targets:
|
|
431
|
+
image, mask, bboxes, keypoints, volume, mask3d
|
|
432
|
+
|
|
433
|
+
Image types:
|
|
434
|
+
uint8, float32
|
|
435
|
+
|
|
436
|
+
Note:
|
|
437
|
+
- Either `min_height` or `pad_height_divisor` must be set, but not both.
|
|
438
|
+
- Either `min_width` or `pad_width_divisor` must be set, but not both.
|
|
439
|
+
- If `border_mode` is set to `cv2.BORDER_CONSTANT`, `value` must be provided.
|
|
440
|
+
- The transform will maintain consistency across all targets (image, mask, bboxes, keypoints, volume).
|
|
441
|
+
- For bounding boxes, the coordinates will be adjusted to account for the padding.
|
|
442
|
+
- For keypoints, their positions will be shifted according to the padding.
|
|
443
|
+
|
|
444
|
+
Examples:
|
|
445
|
+
>>> import numpy as np
|
|
446
|
+
>>> import albumentations as A
|
|
447
|
+
>>> import cv2
|
|
448
|
+
>>>
|
|
449
|
+
>>> # Prepare sample data
|
|
450
|
+
>>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
|
|
451
|
+
>>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
|
|
452
|
+
>>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
|
|
453
|
+
>>> bbox_labels = [1, 2]
|
|
454
|
+
>>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
|
|
455
|
+
>>> keypoint_labels = [0, 1]
|
|
456
|
+
>>>
|
|
457
|
+
>>> # Example 1: Basic usage with min_height and min_width
|
|
458
|
+
>>> transform = A.Compose([
|
|
459
|
+
... A.PadIfNeeded(min_height=150, min_width=200, border_mode=cv2.BORDER_CONSTANT, fill=0),
|
|
460
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
|
|
461
|
+
... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
|
|
462
|
+
>>>
|
|
463
|
+
>>> # Apply the transform
|
|
464
|
+
>>> padded = transform(
|
|
465
|
+
... image=image,
|
|
466
|
+
... mask=mask,
|
|
467
|
+
... bboxes=bboxes,
|
|
468
|
+
... bbox_labels=bbox_labels,
|
|
469
|
+
... keypoints=keypoints,
|
|
470
|
+
... keypoint_labels=keypoint_labels
|
|
471
|
+
... )
|
|
472
|
+
>>>
|
|
473
|
+
>>> # Get the padded data
|
|
474
|
+
>>> padded_image = padded['image'] # Shape will be (150, 200, 3)
|
|
475
|
+
>>> padded_mask = padded['mask'] # Shape will be (150, 200)
|
|
476
|
+
>>> padded_bboxes = padded['bboxes'] # Bounding boxes adjusted for the padded image
|
|
477
|
+
>>> padded_bbox_labels = padded['bbox_labels'] # Labels remain unchanged
|
|
478
|
+
>>> padded_keypoints = padded['keypoints'] # Keypoints adjusted for the padded image
|
|
479
|
+
>>> padded_keypoint_labels = padded['keypoint_labels'] # Labels remain unchanged
|
|
480
|
+
>>>
|
|
481
|
+
>>> # Example 2: Using pad_height_divisor and pad_width_divisor
|
|
482
|
+
>>> # This ensures the output dimensions are divisible by the specified values
|
|
483
|
+
>>> transform_divisor = A.Compose([
|
|
484
|
+
... A.PadIfNeeded(
|
|
485
|
+
... pad_height_divisor=32,
|
|
486
|
+
... pad_width_divisor=32,
|
|
487
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
488
|
+
... fill=0
|
|
489
|
+
... ),
|
|
490
|
+
... ])
|
|
491
|
+
>>>
|
|
492
|
+
>>> padded_divisor = transform_divisor(image=image)
|
|
493
|
+
>>> padded_divisor_image = padded_divisor['image'] # Shape will be (128, 128, 3) - divisible by 32
|
|
494
|
+
>>>
|
|
495
|
+
>>> # Example 3: Different position options
|
|
496
|
+
>>> # Create a small recognizable image for better visualization of positioning
|
|
497
|
+
>>> small_image = np.zeros((50, 50, 3), dtype=np.uint8)
|
|
498
|
+
>>> small_image[20:30, 20:30, :] = 255 # White square in the middle
|
|
499
|
+
>>>
|
|
500
|
+
>>> # Top-left positioning
|
|
501
|
+
>>> top_left_pad = A.Compose([
|
|
502
|
+
... A.PadIfNeeded(
|
|
503
|
+
... min_height=100,
|
|
504
|
+
... min_width=100,
|
|
505
|
+
... position="top_left",
|
|
506
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
507
|
+
... fill=128 # Gray padding
|
|
508
|
+
... ),
|
|
509
|
+
... ])
|
|
510
|
+
>>> top_left_result = top_left_pad(image=small_image)
|
|
511
|
+
>>> top_left_image = top_left_result['image'] # Image will be at top-left of 100x100 canvas
|
|
512
|
+
>>>
|
|
513
|
+
>>> # Center positioning (default)
|
|
514
|
+
>>> center_pad = A.Compose([
|
|
515
|
+
... A.PadIfNeeded(
|
|
516
|
+
... min_height=100,
|
|
517
|
+
... min_width=100,
|
|
518
|
+
... position="center",
|
|
519
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
520
|
+
... fill=128
|
|
521
|
+
... ),
|
|
522
|
+
... ])
|
|
523
|
+
>>> center_result = center_pad(image=small_image)
|
|
524
|
+
>>> center_image = center_result['image'] # Image will be centered in 100x100 canvas
|
|
525
|
+
>>>
|
|
526
|
+
>>> # Example 4: Different border_mode options
|
|
527
|
+
>>> # Reflection padding
|
|
528
|
+
>>> reflect_pad = A.Compose([
|
|
529
|
+
... A.PadIfNeeded(
|
|
530
|
+
... min_height=100,
|
|
531
|
+
... min_width=100,
|
|
532
|
+
... border_mode=cv2.BORDER_REFLECT_101
|
|
533
|
+
... ),
|
|
534
|
+
... ])
|
|
535
|
+
>>> reflected = reflect_pad(image=small_image)
|
|
536
|
+
>>> reflected_image = reflected['image'] # Will use reflection for padding
|
|
537
|
+
>>>
|
|
538
|
+
>>> # Replication padding
|
|
539
|
+
>>> replicate_pad = A.Compose([
|
|
540
|
+
... A.PadIfNeeded(
|
|
541
|
+
... min_height=100,
|
|
542
|
+
... min_width=100,
|
|
543
|
+
... border_mode=cv2.BORDER_REPLICATE
|
|
544
|
+
... ),
|
|
545
|
+
... ])
|
|
546
|
+
>>> replicated = replicate_pad(image=small_image)
|
|
547
|
+
>>> replicated_image = replicated['image'] # Will use edge replication for padding
|
|
548
|
+
>>>
|
|
549
|
+
>>> # Example 5: Working with masks and custom fill values
|
|
550
|
+
>>> binary_mask = np.zeros((50, 50), dtype=np.uint8)
|
|
551
|
+
>>> binary_mask[10:40, 10:40] = 1 # Set center region to 1
|
|
552
|
+
>>>
|
|
553
|
+
>>> mask_transform = A.Compose([
|
|
554
|
+
... A.PadIfNeeded(
|
|
555
|
+
... min_height=100,
|
|
556
|
+
... min_width=100,
|
|
557
|
+
... border_mode=cv2.BORDER_CONSTANT,
|
|
558
|
+
... fill=0, # Black padding for image
|
|
559
|
+
... fill_mask=0 # Use 0 for mask padding (background)
|
|
560
|
+
... ),
|
|
561
|
+
... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
|
|
562
|
+
>>>
|
|
563
|
+
>>> padded_mask_result = mask_transform(
|
|
564
|
+
... image=image,
|
|
565
|
+
... mask=binary_mask,
|
|
566
|
+
... bboxes=bboxes,
|
|
567
|
+
... bbox_labels=bbox_labels
|
|
568
|
+
... )
|
|
569
|
+
>>> padded_binary_mask = padded_mask_result['mask'] # Shape will be (100, 100)
|
|
570
|
+
>>> padded_result_bboxes = padded_mask_result['bboxes'] # Adjusted for padding
|
|
571
|
+
>>> padded_result_bbox_labels = padded_mask_result['bbox_labels'] # Labels remain unchanged
|
|
572
|
+
|
|
573
|
+
"""
|
|
574
|
+
|
|
575
|
+
class InitSchema(BaseTransformInitSchema):
|
|
576
|
+
min_height: int | None = Field(ge=1)
|
|
577
|
+
min_width: int | None = Field(ge=1)
|
|
578
|
+
pad_height_divisor: int | None = Field(ge=1)
|
|
579
|
+
pad_width_divisor: int | None = Field(ge=1)
|
|
580
|
+
position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"]
|
|
581
|
+
border_mode: Literal[
|
|
582
|
+
cv2.BORDER_CONSTANT,
|
|
583
|
+
cv2.BORDER_REPLICATE,
|
|
584
|
+
cv2.BORDER_REFLECT,
|
|
585
|
+
cv2.BORDER_WRAP,
|
|
586
|
+
cv2.BORDER_REFLECT_101,
|
|
587
|
+
]
|
|
588
|
+
|
|
589
|
+
fill: tuple[float, ...] | float
|
|
590
|
+
fill_mask: tuple[float, ...] | float
|
|
591
|
+
|
|
592
|
+
@model_validator(mode="after")
|
|
593
|
+
def _validate_divisibility(self) -> Self:
|
|
594
|
+
if (self.min_height is None) == (self.pad_height_divisor is None):
|
|
595
|
+
msg = "Only one of 'min_height' and 'pad_height_divisor' parameters must be set"
|
|
596
|
+
raise ValueError(msg)
|
|
597
|
+
if (self.min_width is None) == (self.pad_width_divisor is None):
|
|
598
|
+
msg = "Only one of 'min_width' and 'pad_width_divisor' parameters must be set"
|
|
599
|
+
raise ValueError(msg)
|
|
600
|
+
|
|
601
|
+
if self.border_mode == cv2.BORDER_CONSTANT and self.fill is None:
|
|
602
|
+
msg = "If 'border_mode' is set to 'BORDER_CONSTANT', 'fill' must be provided."
|
|
603
|
+
raise ValueError(msg)
|
|
604
|
+
|
|
605
|
+
return self
|
|
606
|
+
|
|
607
|
+
def __init__(
|
|
608
|
+
self,
|
|
609
|
+
min_height: int | None = 1024,
|
|
610
|
+
min_width: int | None = 1024,
|
|
611
|
+
pad_height_divisor: int | None = None,
|
|
612
|
+
pad_width_divisor: int | None = None,
|
|
613
|
+
position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"] = "center",
|
|
614
|
+
border_mode: Literal[
|
|
615
|
+
cv2.BORDER_CONSTANT,
|
|
616
|
+
cv2.BORDER_REPLICATE,
|
|
617
|
+
cv2.BORDER_REFLECT,
|
|
618
|
+
cv2.BORDER_WRAP,
|
|
619
|
+
cv2.BORDER_REFLECT_101,
|
|
620
|
+
] = cv2.BORDER_CONSTANT,
|
|
621
|
+
fill: tuple[float, ...] | float = 0,
|
|
622
|
+
fill_mask: tuple[float, ...] | float = 0,
|
|
623
|
+
p: float = 1.0,
|
|
624
|
+
):
|
|
625
|
+
# Initialize with dummy padding that will be calculated later
|
|
626
|
+
super().__init__(
|
|
627
|
+
padding=0,
|
|
628
|
+
fill=fill,
|
|
629
|
+
fill_mask=fill_mask,
|
|
630
|
+
border_mode=border_mode,
|
|
631
|
+
p=p,
|
|
632
|
+
)
|
|
633
|
+
self.min_height = min_height
|
|
634
|
+
self.min_width = min_width
|
|
635
|
+
self.pad_height_divisor = pad_height_divisor
|
|
636
|
+
self.pad_width_divisor = pad_width_divisor
|
|
637
|
+
self.position = position
|
|
638
|
+
|
|
639
|
+
def get_params_dependent_on_data(
|
|
640
|
+
self,
|
|
641
|
+
params: dict[str, Any],
|
|
642
|
+
data: dict[str, Any],
|
|
643
|
+
) -> dict[str, Any]:
|
|
644
|
+
"""Get the parameters dependent on the data.
|
|
645
|
+
|
|
646
|
+
Args:
|
|
647
|
+
params (dict[str, Any]): Parameters.
|
|
648
|
+
data (dict[str, Any]): Data.
|
|
649
|
+
|
|
650
|
+
Returns:
|
|
651
|
+
dict[str, Any]: Parameters.
|
|
652
|
+
|
|
653
|
+
"""
|
|
654
|
+
h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = fgeometric.get_padding_params(
|
|
655
|
+
image_shape=params["shape"][:2],
|
|
656
|
+
min_height=self.min_height,
|
|
657
|
+
min_width=self.min_width,
|
|
658
|
+
pad_height_divisor=self.pad_height_divisor,
|
|
659
|
+
pad_width_divisor=self.pad_width_divisor,
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = fgeometric.adjust_padding_by_position(
|
|
663
|
+
h_top=h_pad_top,
|
|
664
|
+
h_bottom=h_pad_bottom,
|
|
665
|
+
w_left=w_pad_left,
|
|
666
|
+
w_right=w_pad_right,
|
|
667
|
+
position=self.position,
|
|
668
|
+
py_random=self.py_random,
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
return {
|
|
672
|
+
"pad_top": h_pad_top,
|
|
673
|
+
"pad_bottom": h_pad_bottom,
|
|
674
|
+
"pad_left": w_pad_left,
|
|
675
|
+
"pad_right": w_pad_right,
|
|
676
|
+
}
|