nrtk-albumentations 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nrtk-albumentations might be problematic. Click here for more details.

Files changed (62) hide show
  1. albumentations/__init__.py +21 -0
  2. albumentations/augmentations/__init__.py +23 -0
  3. albumentations/augmentations/blur/__init__.py +0 -0
  4. albumentations/augmentations/blur/functional.py +438 -0
  5. albumentations/augmentations/blur/transforms.py +1633 -0
  6. albumentations/augmentations/crops/__init__.py +0 -0
  7. albumentations/augmentations/crops/functional.py +494 -0
  8. albumentations/augmentations/crops/transforms.py +3647 -0
  9. albumentations/augmentations/dropout/__init__.py +0 -0
  10. albumentations/augmentations/dropout/channel_dropout.py +134 -0
  11. albumentations/augmentations/dropout/coarse_dropout.py +567 -0
  12. albumentations/augmentations/dropout/functional.py +1017 -0
  13. albumentations/augmentations/dropout/grid_dropout.py +166 -0
  14. albumentations/augmentations/dropout/mask_dropout.py +274 -0
  15. albumentations/augmentations/dropout/transforms.py +461 -0
  16. albumentations/augmentations/dropout/xy_masking.py +186 -0
  17. albumentations/augmentations/geometric/__init__.py +0 -0
  18. albumentations/augmentations/geometric/distortion.py +1238 -0
  19. albumentations/augmentations/geometric/flip.py +752 -0
  20. albumentations/augmentations/geometric/functional.py +4151 -0
  21. albumentations/augmentations/geometric/pad.py +676 -0
  22. albumentations/augmentations/geometric/resize.py +956 -0
  23. albumentations/augmentations/geometric/rotate.py +864 -0
  24. albumentations/augmentations/geometric/transforms.py +1962 -0
  25. albumentations/augmentations/mixing/__init__.py +0 -0
  26. albumentations/augmentations/mixing/domain_adaptation.py +787 -0
  27. albumentations/augmentations/mixing/domain_adaptation_functional.py +453 -0
  28. albumentations/augmentations/mixing/functional.py +878 -0
  29. albumentations/augmentations/mixing/transforms.py +832 -0
  30. albumentations/augmentations/other/__init__.py +0 -0
  31. albumentations/augmentations/other/lambda_transform.py +180 -0
  32. albumentations/augmentations/other/type_transform.py +261 -0
  33. albumentations/augmentations/pixel/__init__.py +0 -0
  34. albumentations/augmentations/pixel/functional.py +4226 -0
  35. albumentations/augmentations/pixel/transforms.py +7556 -0
  36. albumentations/augmentations/spectrogram/__init__.py +0 -0
  37. albumentations/augmentations/spectrogram/transform.py +220 -0
  38. albumentations/augmentations/text/__init__.py +0 -0
  39. albumentations/augmentations/text/functional.py +272 -0
  40. albumentations/augmentations/text/transforms.py +299 -0
  41. albumentations/augmentations/transforms3d/__init__.py +0 -0
  42. albumentations/augmentations/transforms3d/functional.py +393 -0
  43. albumentations/augmentations/transforms3d/transforms.py +1422 -0
  44. albumentations/augmentations/utils.py +249 -0
  45. albumentations/core/__init__.py +0 -0
  46. albumentations/core/bbox_utils.py +920 -0
  47. albumentations/core/composition.py +1885 -0
  48. albumentations/core/hub_mixin.py +299 -0
  49. albumentations/core/keypoints_utils.py +521 -0
  50. albumentations/core/label_manager.py +339 -0
  51. albumentations/core/pydantic.py +239 -0
  52. albumentations/core/serialization.py +352 -0
  53. albumentations/core/transforms_interface.py +976 -0
  54. albumentations/core/type_definitions.py +127 -0
  55. albumentations/core/utils.py +605 -0
  56. albumentations/core/validation.py +129 -0
  57. albumentations/pytorch/__init__.py +1 -0
  58. albumentations/pytorch/transforms.py +189 -0
  59. nrtk_albumentations-2.1.0.dist-info/METADATA +196 -0
  60. nrtk_albumentations-2.1.0.dist-info/RECORD +62 -0
  61. nrtk_albumentations-2.1.0.dist-info/WHEEL +4 -0
  62. nrtk_albumentations-2.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,787 @@
1
+ """Domain adaptation transforms for image augmentation.
2
+
3
+ This module provides transformations designed to bridge the domain gap between
4
+ datasets by adapting the style of an input image to match that of reference images
5
+ from a target domain. Adaptations are based on matching statistical properties
6
+ like histograms, frequency spectra, or overall pixel distributions.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import warnings
12
+ from collections.abc import Sequence
13
+ from typing import Annotated, Any, Callable, Literal, cast
14
+
15
+ import cv2
16
+ import numpy as np
17
+ from pydantic import AfterValidator, field_validator, model_validator
18
+ from typing_extensions import Self
19
+
20
+ from albumentations.augmentations.mixing.domain_adaptation_functional import (
21
+ adapt_pixel_distribution,
22
+ apply_histogram,
23
+ fourier_domain_adaptation,
24
+ )
25
+ from albumentations.augmentations.utils import read_rgb_image
26
+ from albumentations.core.pydantic import ZeroOneRangeType, check_range_bounds, nondecreasing
27
+ from albumentations.core.transforms_interface import BaseTransformInitSchema, ImageOnlyTransform
28
+
29
+ __all__ = [
30
+ "FDA",
31
+ "HistogramMatching",
32
+ "PixelDistributionAdaptation",
33
+ ]
34
+
35
+ MAX_BETA_LIMIT = 0.5
36
+
37
+
38
+ # Base class for Domain Adaptation Init Schema
39
+ class BaseDomainAdaptationInitSchema(BaseTransformInitSchema):
40
+ reference_images: Sequence[Any] | None
41
+ read_fn: Callable[[Any], np.ndarray] | None
42
+ metadata_key: str
43
+
44
+ @model_validator(mode="after")
45
+ def _check_deprecated_args(self) -> Self:
46
+ if self.reference_images is not None:
47
+ warnings.warn(
48
+ "'reference_images' and 'read_fn' arguments are deprecated. "
49
+ "Please pass pre-loaded reference images "
50
+ f"using the '{self.metadata_key}' key in the input data dictionary.",
51
+ DeprecationWarning,
52
+ stacklevel=3, # Adjust stacklevel as needed
53
+ )
54
+
55
+ if self.read_fn is None:
56
+ msg = "read_fn cannot be None when using the deprecated 'reference_images' argument."
57
+ raise ValueError(msg)
58
+
59
+ return self
60
+
61
+
62
+ class BaseDomainAdaptation(ImageOnlyTransform):
63
+ """Base class for domain adaptation transforms.
64
+
65
+ Domain adaptation transforms modify source images to match the characteristics of a target domain.
66
+ These transforms typically require an additional reference image or dataset from the target domain
67
+ to extract style information or domain-specific features.
68
+
69
+ This base class provides the framework for implementing various domain adaptation techniques such as
70
+ color transfer, style transfer, frequency domain adaptation, or histogram matching.
71
+
72
+ Args:
73
+ reference_images (Sequence[Any] | None): Deprecated. Sequence of references to images from the target
74
+ domain. Should be used with read_fn to load actual images. Prefer passing pre-loaded images via
75
+ metadata_key.
76
+ read_fn (Callable[[Any], np.ndarray] | None): Deprecated. Function to read an image from a reference.
77
+ Should be used with reference_images.
78
+ metadata_key (str): Key in the input data dictionary that contains pre-loaded target domain images.
79
+ p (float): Probability of applying the transform. Default: 0.5.
80
+
81
+ Targets:
82
+ image
83
+
84
+ Image types:
85
+ uint8, float32
86
+
87
+ Notes:
88
+ - Subclasses should implement the `apply` method to perform the actual adaptation.
89
+ - Use `targets_as_params` property to define what additional data your transform needs.
90
+ - Override `get_params_dependent_on_data` to extract the target domain data.
91
+ - Domain adaptation often requires per-sample auxiliary data, which should be passed
92
+ through the main data dictionary rather than at initialization time.
93
+
94
+ Examples:
95
+ >>> import numpy as np
96
+ >>> import albumentations as A
97
+ >>> import cv2
98
+ >>>
99
+ >>> # Implement a simple color transfer domain adaptation transform
100
+ >>> class SimpleColorTransfer(A.BaseDomainAdaptation):
101
+ ... class InitSchema(A.BaseTransformInitSchema):
102
+ ... intensity: float = Field(gt=0, le=1)
103
+ ... reference_key: str
104
+ ...
105
+ ... def __init__(
106
+ ... self,
107
+ ... intensity: float = 0.5,
108
+ ... reference_key: str = "target_image",
109
+ ... p: float = 1.0
110
+ ... ):
111
+ ... super().__init__(p=p)
112
+ ... self.intensity = intensity
113
+ ... self.reference_key = reference_key
114
+ ...
115
+ ... @property
116
+ ... def targets_as_params(self) -> list[str]:
117
+ ... return [self.reference_key] # We need target domain image
118
+ ...
119
+ ... def get_params_dependent_on_data(
120
+ ... self,
121
+ ... params: dict[str, Any],
122
+ ... data: dict[str, Any]
123
+ ... ) -> dict[str, Any]:
124
+ ... target_image = data.get(self.reference_key)
125
+ ... if target_image is None:
126
+ ... # Fallback if target image is not provided
127
+ ... return {"target_image": None}
128
+ ... return {"target_image": target_image}
129
+ ...
130
+ ... def apply(
131
+ ... self,
132
+ ... img: np.ndarray,
133
+ ... target_image: np.ndarray = None,
134
+ ... **params
135
+ ... ) -> np.ndarray:
136
+ ... if target_image is None:
137
+ ... return img
138
+ ...
139
+ ... # Simple color transfer implementation
140
+ ... # Calculate mean and std of source and target images
141
+ ... src_mean = np.mean(img, axis=(0, 1))
142
+ ... src_std = np.std(img, axis=(0, 1))
143
+ ... tgt_mean = np.mean(target_image, axis=(0, 1))
144
+ ... tgt_std = np.std(target_image, axis=(0, 1))
145
+ ...
146
+ ... # Normalize source image
147
+ ... normalized = (img - src_mean) / (src_std + 1e-7)
148
+ ...
149
+ ... # Scale by target statistics and blend with original
150
+ ... transformed = normalized * tgt_std + tgt_mean
151
+ ... transformed = np.clip(transformed, 0, 255).astype(np.uint8)
152
+ ...
153
+ ... # Blend the result based on intensity
154
+ ... result = cv2.addWeighted(img, 1 - self.intensity, transformed, self.intensity, 0)
155
+ ... return result
156
+ >>>
157
+ >>> # Usage example with a target image from a different domain
158
+ >>> source_image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
159
+ >>> target_image = np.random.randint(100, 200, (200, 200, 3), dtype=np.uint8) # Different domain image
160
+ >>>
161
+ >>> # Create the transform with the pipeline
162
+ >>> transform = A.Compose([
163
+ ... SimpleColorTransfer(intensity=0.7, reference_key="target_img", p=1.0),
164
+ ... ])
165
+ >>>
166
+ >>> # Apply the transform with the target image passed in the data dictionary
167
+ >>> result = transform(image=source_image, target_img=target_image)
168
+ >>> adapted_image = result["image"] # Image with characteristics transferred from target domain
169
+
170
+ """
171
+
172
+ class InitSchema(BaseDomainAdaptationInitSchema):
173
+ pass
174
+
175
+ def __init__(
176
+ self,
177
+ reference_images: Sequence[Any] | None,
178
+ read_fn: Callable[[Any], np.ndarray] | None,
179
+ metadata_key: str,
180
+ p: float = 0.5,
181
+ ):
182
+ super().__init__(p=p)
183
+ self.reference_images = reference_images
184
+ self.read_fn = read_fn
185
+ self.metadata_key = metadata_key
186
+
187
+ @property
188
+ def targets_as_params(self) -> list[str]:
189
+ return [self.metadata_key]
190
+
191
+ def _get_reference_image(self, data: dict[str, Any]) -> np.ndarray:
192
+ """Retrieves the reference image from metadata or deprecated arguments."""
193
+ reference_image = None
194
+
195
+ if metadata_images := data.get(self.metadata_key):
196
+ if not isinstance(metadata_images, Sequence) or not metadata_images:
197
+ raise ValueError(
198
+ f"Metadata key '{self.metadata_key}' should contain a non-empty sequence of numpy arrays.",
199
+ )
200
+ if not isinstance(metadata_images[0], np.ndarray):
201
+ raise ValueError(
202
+ f"Images in metadata key '{self.metadata_key}' should be numpy arrays.",
203
+ )
204
+ reference_image = self.py_random.choice(metadata_images)
205
+
206
+ if self.reference_images is not None:
207
+ warnings.warn(
208
+ f"Both 'reference_images' (deprecated constructor argument) and metadata via "
209
+ f"'{self.metadata_key}' were provided. Prioritizing metadata.",
210
+ UserWarning,
211
+ stacklevel=3, # Adjust stacklevel as needed
212
+ )
213
+
214
+ elif self.reference_images is not None:
215
+ # Deprecation warning is handled by the InitSchema validator
216
+ if self.read_fn is None:
217
+ # This case should ideally be caught by InitSchema, but safety check
218
+ msg = "read_fn cannot be None when using the deprecated 'reference_images' argument."
219
+ raise ValueError(msg)
220
+ ref_source = self.py_random.choice(self.reference_images)
221
+ reference_image = self.read_fn(ref_source)
222
+ else:
223
+ raise ValueError(
224
+ f"{self.__class__.__name__} requires reference images. Provide them via the `metadata_key` "
225
+ f"'{self.metadata_key}' in the input data, or use the deprecated 'reference_images' argument.",
226
+ )
227
+
228
+ if reference_image is None:
229
+ # Should not happen if logic above is correct, but safety check
230
+ msg = "Could not obtain a reference image."
231
+ raise RuntimeError(msg)
232
+
233
+ return reference_image
234
+
235
+ def to_dict_private(self) -> dict[str, Any]:
236
+ """Convert the transform to a dictionary for serialization.
237
+
238
+ Raises:
239
+ NotImplementedError: Domain adaptation transforms cannot be reliably serialized
240
+ when using metadata key or deprecated arguments.
241
+
242
+ """
243
+ if self.reference_images is not None:
244
+ msg = (
245
+ f"{self.__class__.__name__} cannot be reliably serialized when using the deprecated 'reference_images'."
246
+ )
247
+ raise NotImplementedError(msg)
248
+
249
+ msg = (
250
+ f"{self.__class__.__name__} cannot be reliably serialized due to its dependency "
251
+ "on external data via metadata."
252
+ )
253
+ raise NotImplementedError(msg)
254
+
255
+
256
+ class HistogramMatching(BaseDomainAdaptation):
257
+ """Adjust the pixel value distribution of an input image to match a reference image.
258
+
259
+ This transform modifies the pixel intensities of the input image so that its histogram
260
+ matches the histogram of a provided reference image. This process is applied independently
261
+ to each channel of the image if it is multi-channel.
262
+
263
+ Why use Histogram Matching?
264
+
265
+ **Domain Adaptation:** Helps bridge the gap between images from different sources
266
+ (e.g., different cameras, lighting conditions, synthetic vs. real data) by aligning
267
+ their overall intensity and contrast characteristics.
268
+
269
+ *Use Case Example:* Imagine you have labeled training images from one source (e.g., daytime photos,
270
+ medical scans from Hospital A) but expect your model to work on images from a different
271
+ source at test time (e.g., nighttime photos, scans from Hospital B). You might only have
272
+ unlabeled images from the target (test) domain. HistogramMatching can be used to make your
273
+ labeled training images resemble the *style* (intensity and contrast distribution) of the
274
+ unlabeled target images. By training on these adapted images, your model may generalize
275
+ better to the target domain without needing labels for it.
276
+
277
+ How it works:
278
+ The core idea is to map the pixel values of the input image such that its cumulative
279
+ distribution function (CDF) matches the CDF of the reference image. This effectively
280
+ reshapes the input image's histogram to resemble the reference's histogram.
281
+
282
+ Args:
283
+ metadata_key (str): Key in the input `data` dictionary to retrieve the reference image(s).
284
+ The value should be a sequence (e.g., list) of numpy arrays (pre-loaded images).
285
+ Default: "hm_metadata".
286
+ blend_ratio (tuple[float, float]): Range for the blending factor between the original
287
+ and the histogram-matched image. A value of 0 means the original image is returned,
288
+ 1 means the fully matched image is returned. A random value within this range [min, max]
289
+ is sampled for each application. This allows for varying degrees of adaptation.
290
+ Default: (0.5, 1.0).
291
+ p (float): Probability of applying the transform. Default: 0.5.
292
+
293
+ Targets:
294
+ image
295
+
296
+ Image types:
297
+ uint8, float32
298
+
299
+ Note:
300
+ - Requires at least one reference image to be provided via the `metadata_key` argument.
301
+ - The `reference_images` and `read_fn` constructor arguments are deprecated.
302
+
303
+ Examples:
304
+ >>> import numpy as np
305
+ >>> import albumentations as A
306
+ >>> import cv2
307
+ >>>
308
+ >>> # Create sample images for demonstration
309
+ >>> # Source image: dark image with low contrast
310
+ >>> source_image = np.ones((100, 100, 3), dtype=np.uint8) * 50 # Dark gray image
311
+ >>> source_image[30:70, 30:70] = 100 # Add slightly brighter square in center
312
+ >>>
313
+ >>> # Target image: higher brightness and contrast
314
+ >>> target_image = np.ones((100, 100, 3), dtype=np.uint8) * 150 # Bright image
315
+ >>> target_image[20:80, 20:80] = 200 # Add even brighter square
316
+ >>>
317
+ >>> # Initialize the histogram matching transform with custom settings
318
+ >>> transform = A.Compose([
319
+ ... A.HistogramMatching(
320
+ ... blend_ratio=(0.7, 0.9), # Control the strength of histogram matching
321
+ ... metadata_key="reference_imgs", # Custom metadata key
322
+ ... p=1.0
323
+ ... )
324
+ ... ])
325
+ >>>
326
+ >>> # Apply the transform
327
+ >>> result = transform(
328
+ ... image=source_image,
329
+ ... reference_imgs=[target_image] # Pass reference image via metadata key
330
+ ... )
331
+ >>>
332
+ >>> # Get the histogram-matched image
333
+ >>> matched_image = result["image"]
334
+ >>>
335
+ >>> # The matched_image will have brightness and contrast similar to target_image
336
+ >>> # while preserving the content of source_image
337
+ >>>
338
+ >>> # Multiple reference images can be provided:
339
+ >>> ref_imgs = [
340
+ ... target_image,
341
+ ... np.random.randint(100, 200, (100, 100, 3), dtype=np.uint8) # Another reference image
342
+ ... ]
343
+ >>> multiple_refs_result = transform(image=source_image, reference_imgs=ref_imgs)
344
+ >>> # A random reference image from the list will be chosen for each transform application
345
+
346
+ References:
347
+ Histogram Matching in scikit-image:
348
+ https://scikit-image.org/docs/dev/auto_examples/color_exposure/plot_histogram_matching.html
349
+
350
+ """
351
+
352
+ class InitSchema(BaseDomainAdaptationInitSchema):
353
+ blend_ratio: Annotated[
354
+ tuple[float, float],
355
+ AfterValidator(nondecreasing),
356
+ AfterValidator(check_range_bounds(0, 1)),
357
+ ]
358
+
359
+ def __init__(
360
+ self,
361
+ reference_images: Sequence[Any] | None = None,
362
+ blend_ratio: tuple[float, float] = (0.5, 1.0),
363
+ read_fn: Callable[[Any], np.ndarray] | None = read_rgb_image,
364
+ metadata_key: str = "hm_metadata",
365
+ p: float = 0.5,
366
+ ):
367
+ super().__init__(reference_images=reference_images, read_fn=read_fn, metadata_key=metadata_key, p=p)
368
+ self.blend_ratio = blend_ratio
369
+
370
+ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
371
+ """Generate parameters for the transform based on input data.
372
+
373
+ Args:
374
+ params (dict[str, Any]): Parameters from the previous transform in the pipeline
375
+ data (dict[str, Any]): Input data dictionary containing the image and metadata
376
+
377
+ Returns:
378
+ dict[str, Any]: Dictionary containing the reference image and blend ratio
379
+
380
+ """
381
+ reference_image = self._get_reference_image(data)
382
+ return {
383
+ "reference_image": reference_image,
384
+ "blend_ratio": self.py_random.uniform(*self.blend_ratio),
385
+ }
386
+
387
+ def apply(
388
+ self,
389
+ img: np.ndarray,
390
+ reference_image: np.ndarray,
391
+ blend_ratio: float,
392
+ **params: Any,
393
+ ) -> np.ndarray:
394
+ """Apply histogram matching to the input image.
395
+
396
+ Args:
397
+ img (np.ndarray): Input image to be transformed
398
+ reference_image (np.ndarray): Reference image for histogram matching
399
+ blend_ratio (float): Blending factor between the original and matched image
400
+ **params (Any): Additional parameters
401
+
402
+ Returns:
403
+ np.ndarray: Transformed image with histogram matched to the reference image
404
+
405
+ """
406
+ return apply_histogram(img, reference_image, blend_ratio)
407
+
408
+
409
+ class FDA(BaseDomainAdaptation):
410
+ """Fourier Domain Adaptation (FDA).
411
+
412
+ Adapts the style of the input image to match the style of a reference image
413
+ by manipulating their frequency components in the Fourier domain. This is
414
+ particularly useful for unsupervised domain adaptation (UDA).
415
+
416
+ Why use FDA?
417
+
418
+ **Domain Adaptation:** FDA helps bridge the domain gap between source and target
419
+ datasets (e.g., synthetic vs. real, day vs. night) by aligning their low-frequency
420
+ Fourier spectrum components. This can improve model performance on the target domain
421
+ without requiring target labels.
422
+
423
+ *Use Case Example:* Imagine you have labeled training data acquired under certain conditions
424
+ (e.g., images from Hospital A using a specific scanner) but need your model to perform well
425
+ on data from a different distribution (e.g., unlabeled images from Hospital B with a different scanner).
426
+ FDA can adapt the labeled source images to match the *style* (frequency characteristics)
427
+ of the unlabeled target images, potentially improving the model's generalization to the
428
+ target domain at test time.
429
+
430
+ How it works:
431
+ FDA operates in the frequency domain. It replaces the low-frequency components
432
+ of the source image's Fourier transform with the low-frequency components from the
433
+ reference (target domain) image's Fourier transform. The `beta_limit` parameter
434
+ controls the size of the frequency window being swapped.
435
+
436
+ Args:
437
+ metadata_key (str): Key in the input `data` dictionary to retrieve the reference image(s).
438
+ The value should be a sequence (e.g., list) of numpy arrays (pre-loaded images).
439
+ Default: "fda_metadata".
440
+ beta_limit (tuple[float, float] | float): Controls the extent of the low-frequency
441
+ spectrum swap. A larger beta means more components are swapped. Corresponds to the L
442
+ parameter in the original paper. Should be in the range [0, 0.5]. Sampling is uniform
443
+ within the provided range [min, max]. Default: (0, 0.1).
444
+ p (float): Probability of applying the transform. Default: 0.5.
445
+
446
+ Targets:
447
+ image
448
+
449
+ Image types:
450
+ uint8, float32
451
+
452
+ Note:
453
+ - Requires at least one reference image to be provided via the `metadata_key` argument.
454
+ - The `reference_images` and `read_fn` constructor arguments are deprecated.
455
+
456
+ Examples:
457
+ >>> import numpy as np
458
+ >>> import albumentations as A
459
+ >>> import cv2
460
+ >>>
461
+ >>> # Create sample images for demonstration
462
+ >>> # Source image: synthetic or simulated image (e.g., from a rendered game environment)
463
+ >>> source_img = np.zeros((100, 100, 3), dtype=np.uint8)
464
+ >>> # Create a pattern in the source image
465
+ >>> source_img[20:80, 20:80, 0] = 200 # Red square
466
+ >>> source_img[40:60, 40:60, 1] = 200 # Green inner square
467
+ >>>
468
+ >>> # Target domain image: real-world image with different texture/frequency characteristics
469
+ >>> # For this example, we'll create an image with different frequency patterns
470
+ >>> target_img = np.zeros((100, 100, 3), dtype=np.uint8)
471
+ >>> for i in range(100):
472
+ ... for j in range(100):
473
+ ... # Create a high-frequency pattern
474
+ ... target_img[i, j, 0] = ((i + j) % 8) * 30
475
+ ... target_img[i, j, 1] = ((i - j) % 8) * 30
476
+ ... target_img[i, j, 2] = ((i * j) % 8) * 30
477
+ >>>
478
+ >>> # Example 1: FDA with minimal adaptation (small beta value)
479
+ >>> # This will subtly adjust the frequency characteristics
480
+ >>> minimal_fda = A.Compose([
481
+ ... A.FDA(
482
+ ... beta_limit=(0.01, 0.05), # Small beta range for subtle adaptation
483
+ ... metadata_key="target_domain", # Custom metadata key
484
+ ... p=1.0
485
+ ... )
486
+ ... ])
487
+ >>>
488
+ >>> # Apply the transform with minimal adaptation
489
+ >>> minimal_result = minimal_fda(
490
+ ... image=source_img,
491
+ ... target_domain=[target_img] # Pass reference image via custom metadata key
492
+ ... )
493
+ >>> minimal_adapted_img = minimal_result["image"]
494
+ >>>
495
+ >>> # Example 2: FDA with moderate adaptation (medium beta value)
496
+ >>> moderate_fda = A.Compose([
497
+ ... A.FDA(
498
+ ... beta_limit=(0.1, 0.2), # Medium beta range
499
+ ... metadata_key="target_domain",
500
+ ... p=1.0
501
+ ... )
502
+ ... ])
503
+ >>>
504
+ >>> moderate_result = moderate_fda(image=source_img, target_domain=[target_img])
505
+ >>> moderate_adapted_img = moderate_result["image"]
506
+ >>>
507
+ >>> # Example 3: FDA with strong adaptation (larger beta value)
508
+ >>> strong_fda = A.Compose([
509
+ ... A.FDA(
510
+ ... beta_limit=(0.3, 0.5), # Larger beta range (upper limit is MAX_BETA_LIMIT)
511
+ ... metadata_key="target_domain",
512
+ ... p=1.0
513
+ ... )
514
+ ... ])
515
+ >>>
516
+ >>> strong_result = strong_fda(image=source_img, target_domain=[target_img])
517
+ >>> strong_adapted_img = strong_result["image"]
518
+ >>>
519
+ >>> # Example 4: Using multiple target domain images
520
+ >>> # Creating a list of target domain images with different characteristics
521
+ >>> target_imgs = [target_img]
522
+ >>>
523
+ >>> # Add another target image with different pattern
524
+ >>> another_target = np.zeros((100, 100, 3), dtype=np.uint8)
525
+ >>> for i in range(100):
526
+ ... for j in range(100):
527
+ ... another_target[i, j, 0] = (i // 10) * 25
528
+ ... another_target[i, j, 1] = (j // 10) * 25
529
+ ... another_target[i, j, 2] = ((i + j) // 10) * 25
530
+ >>> target_imgs.append(another_target)
531
+ >>>
532
+ >>> # Using default FDA settings with multiple target images
533
+ >>> multi_target_fda = A.Compose([
534
+ ... A.FDA(p=1.0) # Using default settings with default metadata_key="fda_metadata"
535
+ ... ])
536
+ >>>
537
+ >>> # A random target image will be selected from the list for each application
538
+ >>> multi_target_result = multi_target_fda(image=source_img, fda_metadata=target_imgs)
539
+ >>> adapted_image = multi_target_result["image"]
540
+
541
+ References:
542
+ - FDA: https://github.com/YanchaoYang/FDA
543
+ - FDA: https://openaccess.thecvf.com/content_CVPR_2020/papers/Yang_FDA_Fourier_Domain_Adaptation_for_Semantic_Segmentation_CVPR_2020_paper.pdf
544
+
545
+ """
546
+
547
+ class InitSchema(BaseDomainAdaptationInitSchema):
548
+ beta_limit: ZeroOneRangeType
549
+
550
+ @field_validator("beta_limit")
551
+ @classmethod
552
+ def _check_ranges(cls, value: tuple[float, float]) -> tuple[float, float]:
553
+ bounds = 0, MAX_BETA_LIMIT
554
+ if not bounds[0] <= value[0] <= value[1] <= bounds[1]:
555
+ raise ValueError(f"Values should be in the range {bounds} got {value} ")
556
+ return value
557
+
558
+ def __init__(
559
+ self,
560
+ reference_images: Sequence[Any] | None = None,
561
+ beta_limit: tuple[float, float] | float = (0, 0.1),
562
+ read_fn: Callable[[Any], np.ndarray] | None = read_rgb_image,
563
+ metadata_key: str = "fda_metadata",
564
+ p: float = 0.5,
565
+ ):
566
+ super().__init__(reference_images=reference_images, read_fn=read_fn, metadata_key=metadata_key, p=p)
567
+ self.beta_limit = cast("tuple[float, float]", beta_limit)
568
+
569
+ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
570
+ """Generate parameters for the transform based on input data."""
571
+ target_image = self._get_reference_image(data)
572
+ height, width = params["shape"][:2]
573
+
574
+ # Resize the target image to match the input image dimensions
575
+ target_image_resized = cv2.resize(target_image, dsize=(width, height))
576
+
577
+ return {"target_image": target_image_resized, "beta": self.py_random.uniform(*self.beta_limit)}
578
+
579
+ def apply(
580
+ self,
581
+ img: np.ndarray,
582
+ target_image: np.ndarray,
583
+ beta: float,
584
+ **params: Any,
585
+ ) -> np.ndarray:
586
+ """Apply Fourier Domain Adaptation to the input image.
587
+
588
+ Args:
589
+ img (np.ndarray): Input image to be transformed
590
+ target_image (np.ndarray): Target domain image for adaptation
591
+ beta (float): Coefficient controlling the extent of frequency component swapping
592
+ **params (Any): Additional parameters
593
+
594
+ Returns:
595
+ np.ndarray: Transformed image with adapted frequency components
596
+
597
+ """
598
+ return fourier_domain_adaptation(img, target_image, beta)
599
+
600
+
601
+ class PixelDistributionAdaptation(BaseDomainAdaptation):
602
+ """Adapts the pixel value distribution of an input image to match a reference image
603
+ using statistical transformations (PCA, StandardScaler, or MinMaxScaler).
604
+
605
+ This transform aims to harmonize images from different domains by aligning their pixel-level
606
+ statistical properties.
607
+
608
+ Why use Pixel Distribution Adaptation?
609
+ **Domain Adaptation:** Useful for aligning images across domains with differing pixel statistics
610
+ (e.g., caused by different sensors, lighting, or post-processing).
611
+
612
+ *Use Case Example:* Consider having labeled data from Scanner A and needing the model to perform
613
+ well on unlabeled data from Scanner B, where images might have different overall brightness,
614
+ contrast, or color biases. This transform can adapt the labeled images from Scanner A to
615
+ mimic the pixel distribution *style* of the images from Scanner B, potentially improving
616
+ generalization without needing labels for Scanner B data.
617
+
618
+ How it works:
619
+ 1. A chosen statistical transform (`transform_type`) is fitted to both the input (source) image
620
+ and the reference (target) image separately.
621
+ 2. The input image is transformed using the transform fitted on it (moving it to a standardized space).
622
+ 3. The inverse transform *fitted on the reference image* is applied to the result from step 2
623
+ (moving the standardized input into the reference image's statistical space).
624
+ 4. The result is optionally blended with the original input image using `blend_ratio`.
625
+
626
+ Args:
627
+ metadata_key (str): Key in the input `data` dictionary to retrieve the reference image(s).
628
+ The value should be a sequence (e.g., list) of numpy arrays (pre-loaded images).
629
+ Default: "pda_metadata".
630
+ blend_ratio (tuple[float, float]): Specifies the minimum and maximum blend ratio for mixing
631
+ the adapted image with the original. A value of 0 means the original image is returned,
632
+ 1 means the fully adapted image is returned. A random value within this range [min, max]
633
+ is sampled for each application. Default: (0.25, 1.0).
634
+ transform_type (Literal["pca", "standard", "minmax"]): Specifies the type of statistical
635
+ transformation to apply:
636
+ - "pca": Principal Component Analysis.
637
+ - "standard": StandardScaler (zero mean, unit variance).
638
+ - "minmax": MinMaxScaler (scales to [0, 1] range).
639
+ Default: "pca".
640
+ p (float): The probability of applying the transform. Default: 0.5.
641
+
642
+ Targets:
643
+ image
644
+
645
+ Image types:
646
+ uint8, float32
647
+
648
+ Note:
649
+ - Requires at least one reference image to be provided via the `metadata_key` argument.
650
+ - The `reference_images` and `read_fn` constructor arguments are deprecated.
651
+
652
+ Examples:
653
+ >>> import numpy as np
654
+ >>> import albumentations as A
655
+ >>> import cv2
656
+ >>>
657
+ >>> # Create sample images for demonstration
658
+ >>> # Source image: simulated image from domain A (e.g., medical scan from one scanner)
659
+ >>> source_image = np.random.normal(100, 20, (100, 100, 3)).clip(0, 255).astype(np.uint8)
660
+ >>>
661
+ >>> # Reference image: image from domain B with different statistical properties
662
+ >>> # (e.g., scan from a different scanner with different intensity distribution)
663
+ >>> reference_image = np.random.normal(150, 30, (100, 100, 3)).clip(0, 255).astype(np.uint8)
664
+ >>>
665
+ >>> # Example 1: Using PCA transformation (default)
666
+ >>> pca_transform = A.Compose([
667
+ ... A.PixelDistributionAdaptation(
668
+ ... transform_type="pca",
669
+ ... blend_ratio=(0.8, 1.0), # Strong adaptation
670
+ ... metadata_key="reference_images",
671
+ ... p=1.0
672
+ ... )
673
+ ... ])
674
+ >>>
675
+ >>> # Apply the transform with the reference image
676
+ >>> pca_result = pca_transform(
677
+ ... image=source_image,
678
+ ... reference_images=[reference_image]
679
+ ... )
680
+ >>>
681
+ >>> # Get the adapted image
682
+ >>> pca_adapted_image = pca_result["image"]
683
+ >>>
684
+ >>> # Example 2: Using StandardScaler transformation
685
+ >>> standard_transform = A.Compose([
686
+ ... A.PixelDistributionAdaptation(
687
+ ... transform_type="standard",
688
+ ... blend_ratio=(0.5, 0.7), # Moderate adaptation
689
+ ... metadata_key="reference_images",
690
+ ... p=1.0
691
+ ... )
692
+ ... ])
693
+ >>>
694
+ >>> standard_result = standard_transform(
695
+ ... image=source_image,
696
+ ... reference_images=[reference_image]
697
+ ... )
698
+ >>> standard_adapted_image = standard_result["image"]
699
+ >>>
700
+ >>> # Example 3: Using MinMaxScaler transformation
701
+ >>> minmax_transform = A.Compose([
702
+ ... A.PixelDistributionAdaptation(
703
+ ... transform_type="minmax",
704
+ ... blend_ratio=(0.3, 0.5), # Subtle adaptation
705
+ ... metadata_key="reference_images",
706
+ ... p=1.0
707
+ ... )
708
+ ... ])
709
+ >>>
710
+ >>> minmax_result = minmax_transform(
711
+ ... image=source_image,
712
+ ... reference_images=[reference_image]
713
+ ... )
714
+ >>> minmax_adapted_image = minmax_result["image"]
715
+ >>>
716
+ >>> # Example 4: Using multiple reference images
717
+ >>> # When multiple reference images are provided, one is randomly selected for each transformation
718
+ >>> multiple_references = [
719
+ ... reference_image,
720
+ ... np.random.normal(180, 25, (100, 100, 3)).clip(0, 255).astype(np.uint8),
721
+ ... np.random.normal(120, 40, (100, 100, 3)).clip(0, 255).astype(np.uint8)
722
+ ... ]
723
+ >>>
724
+ >>> multi_ref_transform = A.Compose([
725
+ ... A.PixelDistributionAdaptation(p=1.0) # Using default settings
726
+ ... ])
727
+ >>>
728
+ >>> # Each time the transform is applied, it randomly selects one of the reference images
729
+ >>> multi_ref_result = multi_ref_transform(
730
+ ... image=source_image,
731
+ ... pda_metadata=multiple_references # Using the default metadata key
732
+ ... )
733
+ >>> adapted_image = multi_ref_result["image"]
734
+
735
+ References:
736
+ Qudida: https://github.com/arsenyinfo/qudida
737
+
738
+ """
739
+
740
+ class InitSchema(BaseDomainAdaptationInitSchema):
741
+ blend_ratio: Annotated[
742
+ tuple[float, float],
743
+ AfterValidator(nondecreasing),
744
+ AfterValidator(check_range_bounds(0, 1)),
745
+ ]
746
+ transform_type: Literal["pca", "standard", "minmax"]
747
+
748
+ def __init__(
749
+ self,
750
+ reference_images: Sequence[Any] | None = None,
751
+ blend_ratio: tuple[float, float] = (0.25, 1.0),
752
+ read_fn: Callable[[Any], np.ndarray] | None = read_rgb_image,
753
+ transform_type: Literal["pca", "standard", "minmax"] = "pca",
754
+ metadata_key: str = "pda_metadata",
755
+ p: float = 0.5,
756
+ ):
757
+ super().__init__(reference_images=reference_images, read_fn=read_fn, metadata_key=metadata_key, p=p)
758
+ self.blend_ratio = blend_ratio
759
+ self.transform_type = transform_type
760
+
761
+ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
762
+ """Get parameters for the transform."""
763
+ reference_image = self._get_reference_image(data)
764
+ return {
765
+ "reference_image": reference_image,
766
+ "blend_ratio": self.py_random.uniform(*self.blend_ratio),
767
+ }
768
+
769
+ def apply(self, img: np.ndarray, reference_image: np.ndarray, blend_ratio: float, **params: Any) -> np.ndarray:
770
+ """Apply pixel distribution adaptation to the input image.
771
+
772
+ Args:
773
+ img (np.ndarray): Input image to be transformed
774
+ reference_image (np.ndarray): Reference image for distribution adaptation
775
+ blend_ratio (float): Blending factor between the original and adapted image
776
+ **params (Any): Additional parameters
777
+
778
+ Returns:
779
+ np.ndarray: Transformed image with pixel distribution adapted to the reference image
780
+
781
+ """
782
+ return adapt_pixel_distribution(
783
+ img,
784
+ ref=reference_image,
785
+ weight=blend_ratio,
786
+ transform_type=self.transform_type,
787
+ )