PyPI - careamics - Versions diffs - 0.1.0rc6__py3-none-any.whl → 0.1.0rc8__py3-none-any.whl - Mend

careamics 0.1.0rc6py3-none-any.whl → 0.1.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of careamics might be problematic. Click here for more details.

Files changed (91) hide show

careamics/__init__.py +1 -14
careamics/careamist.py +212 -294
careamics/config/__init__.py +0 -3
careamics/config/algorithm_model.py +8 -15
careamics/config/architectures/architecture_model.py +1 -0
careamics/config/architectures/custom_model.py +5 -3
careamics/config/architectures/unet_model.py +19 -0
careamics/config/architectures/vae_model.py +1 -0
careamics/config/callback_model.py +76 -34
careamics/config/configuration_factory.py +18 -98
careamics/config/configuration_model.py +23 -18
careamics/config/data_model.py +103 -54
careamics/config/inference_model.py +41 -19
careamics/config/optimizer_models.py +13 -7
careamics/config/support/supported_data.py +29 -4
careamics/config/support/supported_transforms.py +0 -1
careamics/config/tile_information.py +36 -58
careamics/config/training_model.py +5 -1
careamics/config/transformations/normalize_model.py +32 -4
careamics/config/validators/validator_utils.py +1 -1
careamics/dataset/__init__.py +12 -1
careamics/dataset/dataset_utils/__init__.py +8 -7
careamics/dataset/dataset_utils/file_utils.py +2 -2
careamics/dataset/dataset_utils/iterate_over_files.py +83 -0
careamics/dataset/dataset_utils/running_stats.py +186 -0
careamics/dataset/in_memory_dataset.py +84 -173
careamics/dataset/in_memory_pred_dataset.py +88 -0
careamics/dataset/in_memory_tiled_pred_dataset.py +129 -0
careamics/dataset/iterable_dataset.py +97 -250
careamics/dataset/iterable_pred_dataset.py +122 -0
careamics/dataset/iterable_tiled_pred_dataset.py +140 -0
careamics/dataset/patching/patching.py +97 -52
careamics/dataset/patching/random_patching.py +9 -4
careamics/dataset/patching/validate_patch_dimension.py +5 -3
careamics/dataset/tiling/__init__.py +10 -0
careamics/dataset/tiling/collate_tiles.py +33 -0
careamics/dataset/{patching → tiling}/tiled_patching.py +4 -4
careamics/file_io/__init__.py +7 -0
careamics/file_io/read/__init__.py +11 -0
careamics/file_io/read/get_func.py +56 -0
careamics/{dataset/dataset_utils/read_tiff.py → file_io/read/tiff.py} +3 -10
careamics/file_io/write/__init__.py +9 -0
careamics/file_io/write/get_func.py +59 -0
careamics/file_io/write/tiff.py +39 -0
careamics/lightning/__init__.py +17 -0
careamics/{lightning_module.py → lightning/lightning_module.py} +69 -92
careamics/{lightning_prediction_datamodule.py → lightning/predict_data_module.py} +120 -178
careamics/{lightning_datamodule.py → lightning/train_data_module.py} +135 -220
careamics/lvae_training/__init__.py +0 -0
careamics/lvae_training/data_modules.py +1220 -0
careamics/lvae_training/data_utils.py +618 -0
careamics/lvae_training/eval_utils.py +905 -0
careamics/lvae_training/get_config.py +84 -0
careamics/lvae_training/lightning_module.py +701 -0
careamics/lvae_training/metrics.py +214 -0
careamics/lvae_training/train_lvae.py +339 -0
careamics/lvae_training/train_utils.py +121 -0
careamics/model_io/bioimage/model_description.py +40 -32
careamics/model_io/bmz_io.py +2 -2
careamics/model_io/model_io_utils.py +6 -3
careamics/models/lvae/__init__.py +0 -0
careamics/models/lvae/layers.py +1998 -0
careamics/models/lvae/likelihoods.py +312 -0
careamics/models/lvae/lvae.py +985 -0
careamics/models/lvae/noise_models.py +409 -0
careamics/models/lvae/utils.py +395 -0
careamics/prediction_utils/__init__.py +10 -0
careamics/prediction_utils/prediction_outputs.py +137 -0
careamics/prediction_utils/stitch_prediction.py +103 -0
careamics/transforms/n2v_manipulate.py +3 -1
careamics/transforms/normalize.py +139 -68
careamics/transforms/pixel_manipulation.py +33 -9
careamics/transforms/tta.py +43 -29
careamics/utils/__init__.py +2 -0
careamics/utils/autocorrelation.py +40 -0
careamics/utils/ram.py +2 -2
{careamics-0.1.0rc6.dist-info → careamics-0.1.0rc8.dist-info}/METADATA +7 -6
careamics-0.1.0rc8.dist-info/RECORD +135 -0
{careamics-0.1.0rc6.dist-info → careamics-0.1.0rc8.dist-info}/WHEEL +1 -1
careamics/config/configuration_example.py +0 -89
careamics/dataset/dataset_utils/read_utils.py +0 -27
careamics/lightning_prediction_loop.py +0 -118
careamics/prediction/__init__.py +0 -7
careamics/prediction/stitch_prediction.py +0 -70
careamics/utils/running_stats.py +0 -43
careamics-0.1.0rc6.dist-info/RECORD +0 -107
/careamics/{dataset/dataset_utils/read_zarr.py → file_io/read/zarr.py} +0 -0
/careamics/{callbacks → lightning/callbacks}/__init__.py +0 -0
/careamics/{callbacks → lightning/callbacks}/hyperparameters_callback.py +0 -0
/careamics/{callbacks → lightning/callbacks}/progress_bar_callback.py +0 -0
{careamics-0.1.0rc6.dist-info → careamics-0.1.0rc8.dist-info}/licenses/LICENSE +0 -0

careamics/config/tile_information.py CHANGED Viewed

@@ -2,9 +2,7 @@
 from __future__ import annotations
-from typing import Optional, Tuple
-from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator
+from pydantic import BaseModel, ConfigDict, field_validator
 class TileInformation(BaseModel):
@@ -13,30 +11,43 @@ class TileInformation(BaseModel):
     This model is used to represent the information required to stitch back a tile into
     a larger image. It is used throughout the prediction pipeline of CAREamics.
+    Array shape should be (C)(Z)YX, where C and Z are optional dimensions, and must not
+    contain singleton dimensions.
     """
     model_config = ConfigDict(validate_default=True)
-    array_shape: Tuple[int, ...]
-    tiled: bool = False
+    array_shape: tuple[int, ...]
+    """Shape of the original (untiled) array."""
     last_tile: bool = False
-    overlap_crop_coords: Optional[Tuple[Tuple[int, ...], ...]] = Field(default=None)
-    stitch_coords: Optional[Tuple[Tuple[int, ...], ...]] = Field(default=None)
+    """Whether this tile is the last one of the array."""
+    overlap_crop_coords: tuple[tuple[int, ...], ...]
+    """Inner coordinates of the tile where to crop the prediction in order to stitch
+    it back into the original image."""
+    stitch_coords: tuple[tuple[int, ...], ...]
+    """Coordinates in the original image where to stitch the cropped tile back."""
+    sample_id: int
+    """Sample ID of the tile."""
     @field_validator("array_shape")
     @classmethod
-    def no_singleton_dimensions(cls, v: Tuple[int, ...]):
+    def no_singleton_dimensions(cls, v: tuple[int, ...]):
         """
         Check that the array shape does not have any singleton dimensions.
         Parameters
         ----------
-        v : Tuple[int, ...]
+        v : tuple of int
             Array shape to check.
         Returns
         -------
-        Tuple[int, ...]
+        tuple of int
             The array shape if it does not contain singleton dimensions.
         Raises
@@ -48,59 +59,26 @@ class TileInformation(BaseModel):
             raise ValueError("Array shape must not contain singleton dimensions.")
         return v
-    @field_validator("last_tile")
-    @classmethod
-    def only_if_tiled(cls, v: bool, values: ValidationInfo):
-        """
-        Check that the last tile flag is only set if tiling is enabled.
+    def __eq__(self, other_tile: object):
+        """Check if two tile information objects are equal.
         Parameters
         ----------
-        v : bool
-            Last tile flag.
-        values : ValidationInfo
-            Validation information.
+        other_tile : object
+            Tile information object to compare with.
         Returns
         -------
         bool
-            The last tile flag.
-        """
-        if not values.data["tiled"]:
-            return False
-        return v
-    @field_validator("overlap_crop_coords", "stitch_coords")
-    @classmethod
-    def mandatory_if_tiled(
-        cls, v: Optional[Tuple[int, ...]], values: ValidationInfo
-    ) -> Optional[Tuple[int, ...]]:
+            Whether the two tile information objects are equal.
         """
-        Check that the coordinates are not `None` if tiling is enabled.
-        The method also return `None` if tiling is not enabled.
-        Parameters
-        ----------
-        v : Optional[Tuple[int, ...]]
-            Coordinates to check.
-        values : ValidationInfo
-            Validation information.
-        Returns
-        -------
-        Optional[Tuple[int, ...]]
-            The coordinates if tiling is enabled, otherwise `None`.
-        Raises
-        ------
-        ValueError
-            If the coordinates are `None` and tiling is enabled.
-        """
-        if values.data["tiled"]:
-            if v is None:
-                raise ValueError("Value must be specified if tiling is enabled.")
-            return v
-        else:
-            return None
+        if not isinstance(other_tile, TileInformation):
+            return NotImplemented
+        return (
+            self.array_shape == other_tile.array_shape
+            and self.last_tile == other_tile.last_tile
+            and self.overlap_crop_coords == other_tile.overlap_crop_coords
+            and self.stitch_coords == other_tile.stitch_coords
+            and self.sample_id == other_tile.sample_id
+        )

careamics/config/training_model.py CHANGED Viewed

@@ -35,15 +35,19 @@ class TrainingConfig(BaseModel):
     )
     num_epochs: int = Field(default=20, ge=1)
+    """Number of epochs, greater than 0."""
     logger: Optional[Literal["wandb", "tensorboard"]] = None
+    """Logger to use during training. If None, no logger will be used. Available
+    loggers are defined in SupportedLogger."""
     checkpoint_callback: CheckpointModel = CheckpointModel()
+    """Checkpoint callback configuration."""
     early_stopping_callback: Optional[EarlyStoppingModel] = Field(
         default=None, validate_default=True
     )
-    # precision: Literal["64", "32", "16", "bf16"] = 32
+    """Early stopping callback configuration."""
     def __str__(self) -> str:
         """Pretty string reprensenting the configuration.

careamics/config/transformations/normalize_model.py CHANGED Viewed

@@ -1,8 +1,9 @@
 """Pydantic model for the Normalize transform."""
-from typing import Literal
+from typing import Literal, Optional
-from pydantic import ConfigDict, Field
+from pydantic import ConfigDict, Field, model_validator
+from typing_extensions import Self
 from .transform_model import TransformModel
@@ -28,5 +29,32 @@ class NormalizeModel(TransformModel):
     )
     name: Literal["Normalize"] = "Normalize"
-    mean: float = Field(default=0.485)  # albumentations defaults
-    std: float = Field(default=0.229)
+    image_means: list = Field(..., min_length=0, max_length=32)
+    image_stds: list = Field(..., min_length=0, max_length=32)
+    target_means: Optional[list] = Field(default=None, min_length=0, max_length=32)
+    target_stds: Optional[list] = Field(default=None, min_length=0, max_length=32)
+    @model_validator(mode="after")
+    def validate_means_stds(self: Self) -> Self:
+        """Validate that the means and stds have the same length.
+        Returns
+        -------
+        Self
+            The instance of the model.
+        """
+        if len(self.image_means) != len(self.image_stds):
+            raise ValueError("The number of image means and stds must be the same.")
+        if (self.target_means is None) != (self.target_stds is None):
+            raise ValueError(
+                "Both target means and stds must be provided together, or bot None."
+            )
+        if self.target_means is not None and self.target_stds is not None:
+            if len(self.target_means) != len(self.target_stds):
+                raise ValueError(
+                    "The number of target means and stds must be the same."
+                )
+        return self

careamics/config/validators/validator_utils.py CHANGED Viewed

@@ -72,7 +72,7 @@ def value_ge_than_8_power_of_2(
         If the value is not a power of 2.
     """
     if value < 8:
-        raise ValueError(f"Value must be non-zero positive (got {value}).")
+        raise ValueError(f"Value must be greater than 8 (got {value}).")
     if (value & (value - 1)) != 0:
         raise ValueError(f"Value must be a power of 2 (got {value}).")

careamics/dataset/__init__.py CHANGED Viewed

@@ -1,6 +1,17 @@
 """Dataset module."""
-__all__ = ["InMemoryDataset", "PathIterableDataset"]
+__all__ = [
+    "InMemoryDataset",
+    "InMemoryPredDataset",
+    "InMemoryTiledPredDataset",
+    "PathIterableDataset",
+    "IterableTiledPredDataset",
+    "IterablePredDataset",
+]
 from .in_memory_dataset import InMemoryDataset
+from .in_memory_pred_dataset import InMemoryPredDataset
+from .in_memory_tiled_pred_dataset import InMemoryTiledPredDataset
 from .iterable_dataset import PathIterableDataset
+from .iterable_pred_dataset import IterablePredDataset
+from .iterable_tiled_pred_dataset import IterableTiledPredDataset

careamics/dataset/dataset_utils/__init__.py CHANGED Viewed

@@ -2,17 +2,18 @@
 __all__ = [
     "reshape_array",
+    "compute_normalization_stats",
     "get_files_size",
     "list_files",
     "validate_source_target_files",
-    "read_tiff",
-    "get_read_func",
-    "read_zarr",
+    "iterate_over_files",
+    "WelfordStatistics",
 ]
-from .dataset_utils import reshape_array
+from .dataset_utils import (
+    reshape_array,
+)
 from .file_utils import get_files_size, list_files, validate_source_target_files
-from .read_tiff import read_tiff
-from .read_utils import get_read_func
-from .read_zarr import read_zarr
+from .iterate_over_files import iterate_over_files
+from .running_stats import WelfordStatistics, compute_normalization_stats

careamics/dataset/dataset_utils/file_utils.py CHANGED Viewed

@@ -33,7 +33,7 @@ def list_files(
     data_type: Union[str, SupportedData],
     extension_filter: str = "",
 ) -> List[Path]:
-    """Create a recursive list of files in `data_path`.
+    """List recursively files in `data_path` and return a sorted list.
     If `data_path` is a file, its name is validated against the `data_type` using
     `fnmatch`, and the method returns `data_path` itself.
@@ -75,7 +75,7 @@ def list_files(
         raise FileNotFoundError(f"Data path {data_path} does not exist.")
     # get extension compatible with fnmatch and rglob search
-    extension = SupportedData.get_extension(data_type)
+    extension = SupportedData.get_extension_pattern(data_type)
     if data_type == SupportedData.CUSTOM and extension_filter != "":
         extension = extension_filter

careamics/dataset/dataset_utils/iterate_over_files.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Function to iterate over files."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Callable, Generator, Optional, Union
+from numpy.typing import NDArray
+from torch.utils.data import get_worker_info
+from careamics.config import DataConfig, InferenceConfig
+from careamics.file_io.read import read_tiff
+from careamics.utils.logging import get_logger
+from .dataset_utils import reshape_array
+logger = get_logger(__name__)
+def iterate_over_files(
+    data_config: Union[DataConfig, InferenceConfig],
+    data_files: list[Path],
+    target_files: Optional[list[Path]] = None,
+    read_source_func: Callable = read_tiff,
+) -> Generator[tuple[NDArray, Optional[NDArray]], None, None]:
+    """Iterate over data source and yield whole reshaped images.
+    Parameters
+    ----------
+    data_config : CAREamics DataConfig or InferenceConfig
+        Configuration.
+    data_files : list of pathlib.Path
+        List of data files.
+    target_files : list of pathlib.Path, optional
+        List of target files, by default None.
+    read_source_func : Callable, optional
+        Function to read the source, by default read_tiff.
+    Yields
+    ------
+    NDArray
+        Image.
+    """
+    # When num_workers > 0, each worker process will have a different copy of the
+    # dataset object
+    # Configuring each copy independently to avoid having duplicate data returned
+    # from the workers
+    worker_info = get_worker_info()
+    worker_id = worker_info.id if worker_info is not None else 0
+    num_workers = worker_info.num_workers if worker_info is not None else 1
+    # iterate over the files
+    for i, filename in enumerate(data_files):
+        # retrieve file corresponding to the worker id
+        if i % num_workers == worker_id:
+            try:
+                # read data
+                sample = read_source_func(filename, data_config.axes)
+                # reshape array
+                reshaped_sample = reshape_array(sample, data_config.axes)
+                # read target, if available
+                if target_files is not None:
+                    if filename.name != target_files[i].name:
+                        raise ValueError(
+                            f"File {filename} does not match target file "
+                            f"{target_files[i]}. Have you passed sorted "
+                            f"arrays?"
+                        )
+                    # read target
+                    target = read_source_func(target_files[i], data_config.axes)
+                    # reshape target
+                    reshaped_target = reshape_array(target, data_config.axes)
+                    yield reshaped_sample, reshaped_target
+                else:
+                    yield reshaped_sample, None
+            except Exception as e:
+                logger.error(f"Error reading file {filename}: {e}")

careamics/dataset/dataset_utils/running_stats.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""Computing data statistics."""
+import numpy as np
+from numpy.typing import NDArray
+def compute_normalization_stats(image: NDArray) -> tuple[NDArray, NDArray]:
+    """
+    Compute mean and standard deviation of an array.
+    Expected input shape is (S, C, (Z), Y, X). The mean and standard deviation are
+    computed per channel.
+    Parameters
+    ----------
+    image : NDArray
+        Input array.
+    Returns
+    -------
+    tuple of (list of floats, list of floats)
+        Lists of mean and standard deviation values per channel.
+    """
+    # Define the list of axes excluding the channel axis
+    axes = tuple(np.delete(np.arange(image.ndim), 1))
+    return np.mean(image, axis=axes), np.std(image, axis=axes)
+def update_iterative_stats(
+    count: NDArray, mean: NDArray, m2: NDArray, new_values: NDArray
+) -> tuple[NDArray, NDArray, NDArray]:
+    """Update the mean and variance of an array iteratively.
+    Parameters
+    ----------
+    count : NDArray
+        Number of elements in the array.
+    mean : NDArray
+        Mean of the array.
+    m2 : NDArray
+        Variance of the array.
+    new_values : NDArray
+        New values to add to the mean and variance.
+    Returns
+    -------
+    tuple[NDArray, NDArray, NDArray]
+        Updated count, mean, and variance.
+    """
+    count += np.array([np.prod(channel.shape) for channel in new_values])
+    # newvalues - oldMean
+    delta = [
+        np.subtract(v.flatten(), [m] * len(v.flatten()))
+        for v, m in zip(new_values, mean)
+    ]
+    mean += np.array([np.sum(d / c) for d, c in zip(delta, count)])
+    # newvalues - newMeant
+    delta2 = [
+        np.subtract(v.flatten(), [m] * len(v.flatten()))
+        for v, m in zip(new_values, mean)
+    ]
+    m2 += np.array([np.sum(d * d2) for d, d2 in zip(delta, delta2)])
+    return (count, mean, m2)
+def finalize_iterative_stats(
+    count: NDArray, mean: NDArray, m2: NDArray
+) -> tuple[NDArray, NDArray]:
+    """Finalize the mean and variance computation.
+    Parameters
+    ----------
+    count : NDArray
+        Number of elements in the array.
+    mean : NDArray
+        Mean of the array.
+    m2 : NDArray
+        Variance of the array.
+    Returns
+    -------
+    tuple[NDArray, NDArray]
+        Final mean and standard deviation.
+    """
+    std = np.array([np.sqrt(m / c) for m, c in zip(m2, count)])
+    if any(c < 2 for c in count):
+        return np.full(mean.shape, np.nan), np.full(std.shape, np.nan)
+    else:
+        return mean, std
+class WelfordStatistics:
+    """Compute Welford statistics iteratively.
+    The Welford algorithm is used to compute the mean and variance of an array
+    iteratively. Based on the implementation from:
+    https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
+    """
+    def update(self, array: NDArray, sample_idx: int) -> None:
+        """Update the Welford statistics.
+        Parameters
+        ----------
+        array : NDArray
+            Input array.
+        sample_idx : int
+            Current sample number.
+        """
+        self.sample_idx = sample_idx
+        sample_channels = np.array(np.split(array, array.shape[1], axis=1))
+        # Initialize the statistics
+        if self.sample_idx == 0:
+            # Compute the mean and standard deviation
+            self.mean, _ = compute_normalization_stats(array)
+            # Initialize the count and m2 with zero-valued arrays of shape (C,)
+            self.count, self.mean, self.m2 = update_iterative_stats(
+                count=np.zeros(array.shape[1]),
+                mean=self.mean,
+                m2=np.zeros(array.shape[1]),
+                new_values=sample_channels,
+            )
+        else:
+            # Update the statistics
+            self.count, self.mean, self.m2 = update_iterative_stats(
+                count=self.count, mean=self.mean, m2=self.m2, new_values=sample_channels
+            )
+        self.sample_idx += 1
+    def finalize(self) -> tuple[NDArray, NDArray]:
+        """Finalize the Welford statistics.
+        Returns
+        -------
+        tuple or numpy arrays
+            Final mean and standard deviation.
+        """
+        return finalize_iterative_stats(self.count, self.mean, self.m2)
+# from multiprocessing import Value
+# from typing import tuple
+# import numpy as np
+# class RunningStats:
+#     """Calculates running mean and std."""
+#     def __init__(self) -> None:
+#         self.reset()
+#     def reset(self) -> None:
+#         """Reset the running stats."""
+#         self.avg_mean = Value("d", 0)
+#         self.avg_std = Value("d", 0)
+#         self.m2 = Value("d", 0)
+#         self.count = Value("i", 0)
+#     def init(self, mean: float, std: float) -> None:
+#         """Initialize running stats."""
+#         with self.avg_mean.get_lock():
+#             self.avg_mean.value += mean
+#         with self.avg_std.get_lock():
+#             self.avg_std.value = std
+#     def compute_std(self) -> tuple[float, float]:
+#         """Compute std."""
+#         if self.count.value >= 2:
+#             self.avg_std.value = np.sqrt(self.m2.value / self.count.value)
+#     def update(self, value: float) -> None:
+#         """Update running stats."""
+#         with self.count.get_lock():
+#             self.count.value += 1
+#         delta = value - self.avg_mean.value
+#         with self.avg_mean.get_lock():
+#             self.avg_mean.value += delta / self.count.value
+#         delta2 = value - self.avg_mean.value
+#         with self.m2.get_lock():
+#             self.m2.value += delta * delta2

careamics 0.1.0rc6__py3-none-any.whl → 0.1.0rc8__py3-none-any.whl

Potentially problematic release.

careamics 0.1.0rc6py3-none-any.whl → 0.1.0rc8py3-none-any.whl