PyPI - careamics - Versions diffs - 0.0.16__py3-none-any.whl → 0.0.17__py3-none-any.whl - Mend

careamics 0.0.16py3-none-any.whl → 0.0.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of careamics might be problematic. Click here for more details.

Files changed (36) hide show

careamics/careamist.py +7 -4
careamics/config/configuration.py +6 -55
careamics/config/configuration_factories.py +22 -12
careamics/config/data/data_model.py +49 -9
careamics/config/data/ng_data_model.py +167 -2
careamics/config/data/patch_filter/__init__.py +15 -0
careamics/config/data/patch_filter/filter_model.py +16 -0
careamics/config/data/patch_filter/mask_filter_model.py +17 -0
careamics/config/data/patch_filter/max_filter_model.py +15 -0
careamics/config/data/patch_filter/meanstd_filter_model.py +18 -0
careamics/config/data/patch_filter/shannon_filter_model.py +15 -0
careamics/config/support/supported_filters.py +17 -0
careamics/dataset_ng/dataset.py +57 -5
careamics/dataset_ng/factory.py +101 -18
careamics/dataset_ng/patch_filter/__init__.py +20 -0
careamics/dataset_ng/patch_filter/coordinate_filter_protocol.py +27 -0
careamics/dataset_ng/patch_filter/filter_factory.py +94 -0
careamics/dataset_ng/patch_filter/mask_filter.py +95 -0
careamics/dataset_ng/patch_filter/max_filter.py +188 -0
careamics/dataset_ng/patch_filter/mean_std_filter.py +218 -0
careamics/dataset_ng/patch_filter/patch_filter_protocol.py +50 -0
careamics/dataset_ng/patch_filter/shannon_filter.py +188 -0
careamics/lightning/callbacks/data_stats_callback.py +13 -3
careamics/lightning/dataset_ng/data_module.py +79 -2
careamics/lightning/lightning_module.py +4 -3
careamics/lightning/microsplit_data_module.py +15 -10
careamics/lvae_training/eval_utils.py +46 -24
careamics/models/lvae/likelihoods.py +2 -1
careamics/prediction_utils/prediction_outputs.py +3 -2
careamics/prediction_utils/stitch_prediction.py +17 -6
careamics/utils/version.py +4 -4
{careamics-0.0.16.dist-info → careamics-0.0.17.dist-info}/METADATA +5 -11
{careamics-0.0.16.dist-info → careamics-0.0.17.dist-info}/RECORD +36 -21
{careamics-0.0.16.dist-info → careamics-0.0.17.dist-info}/WHEEL +0 -0
{careamics-0.0.16.dist-info → careamics-0.0.17.dist-info}/entry_points.txt +0 -0
{careamics-0.0.16.dist-info → careamics-0.0.17.dist-info}/licenses/LICENSE +0 -0

careamics/careamist.py CHANGED Viewed

@@ -41,6 +41,7 @@ logger = get_logger(__name__)
 LOGGER_TYPES = list[Union[TensorBoardLogger, WandbLogger, CSVLogger]]
+# TODO type ignore have been added because of the czi data type in data configuration
 class CAREamist:
     """Main CAREamics class, allowing training and prediction using various algorithms.
@@ -674,7 +675,7 @@ class CAREamist:
         # create the prediction
         self.pred_datamodule = create_predict_datamodule(
             pred_data=source,
-            data_type=data_type or self.cfg.data_config.data_type,
+            data_type=data_type or self.cfg.data_config.data_type,  # type: ignore
             axes=axes or self.cfg.data_config.axes,
             image_means=self.cfg.data_config.image_means,
             image_stds=self.cfg.data_config.image_stds,
@@ -817,14 +818,16 @@ class CAREamist:
         # extract file names
         source_path: Union[Path, str, NDArray]
-        source_data_type: Literal["array", "tiff", "czi", "custom"]
+        source_data_type: Literal["array", "tiff", "custom"]
         if isinstance(source, PredictDataModule):
             source_path = source.pred_data
-            source_data_type = source.data_type
+            source_data_type = source.data_type  # type: ignore
             extension_filter = source.extension_filter
         elif isinstance(source, (str | Path)):
             source_path = source
-            source_data_type = data_type or self.cfg.data_config.data_type
+            source_data_type = (
+                data_type or self.cfg.data_config.data_type  # type: ignore
+            )
             extension_filter = SupportedData.get_extension_pattern(
                 SupportedData(source_data_type)
             )

careamics/config/configuration.py CHANGED Viewed

@@ -3,14 +3,12 @@
 from __future__ import annotations
 import re
-from collections.abc import Callable
 from pprint import pformat
 from typing import Any, Literal, Self, Union
 import numpy as np
 from bioimageio.spec.generic.v0_3 import CiteEntry
 from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
-from pydantic.main import IncEx
 from careamics.config.algorithms import (
     CAREAlgorithm,
@@ -343,19 +341,7 @@ class Configuration(BaseModel):
     def model_dump(
         self,
-        *,
-        mode: Literal["json", "python"] | str = "python",
-        include: IncEx | None = None,
-        exclude: IncEx | None = None,
-        context: Any | None = None,
-        by_alias: bool | None = False,
-        exclude_unset: bool = False,
-        exclude_defaults: bool = False,
-        exclude_none: bool = True,
-        round_trip: bool = False,
-        warnings: bool | Literal["none", "warn", "error"] = True,
-        fallback: Callable[[Any], Any] | None = None,
-        serialize_as_any: bool = False,
+        **kwargs: Any,
     ) -> dict[str, Any]:
         """
         Override model_dump method in order to set default values.
@@ -365,50 +351,15 @@ class Configuration(BaseModel):
         Parameters
         ----------
-        mode : Literal['json', 'python'] | str, default='python'
-            The serialization format.
-        include : Any | None, default=None
-            Attributes to include.
-        exclude : Any | None, default=None
-            Attributes to exclude.
-        context : Any | None, default=None
-            Additional context to pass to the serialization functions.
-        by_alias : bool, default=False
-            Whether to use attribute aliases.
-        exclude_unset : bool, default=False
-            Whether to exclude fields that are not set.
-        exclude_defaults : bool, default=False
-            Whether to exclude fields that have default values.
-        exclude_none : bool, default=true
-            Whether to exclude fields that have None values.
-        round_trip : bool, default=False
-            Whether to dump and load the data to ensure that the output is a valid
-            representation.
-        warnings : bool | Literal['none', 'warn', 'error'], default=True
-            Whether to emit warnings.
-        fallback : Callable[[Any], Any] | None, default=None
-            A function to call when an unknown value is encountered.
-        serialize_as_any : bool, default=False
-            Whether to serialize all types as Any.
+        **kwargs : Any
+            Additional arguments to pass to the parent model_dump method.
         Returns
         -------
         dict
             Dictionary containing the model parameters.
         """
-        dictionary = super().model_dump(
-            mode=mode,
-            include=include,
-            exclude=exclude,
-            context=context,
-            by_alias=by_alias,
-            exclude_unset=exclude_unset,
-            exclude_defaults=exclude_defaults,
-            exclude_none=exclude_none,
-            round_trip=round_trip,
-            warnings=warnings,
-            fallback=fallback,
-            serialize_as_any=serialize_as_any,
-        )
+        if "exclude_none" not in kwargs:
+            kwargs["exclude_none"] = True
-        return dictionary
+        return super().model_dump(**kwargs)

careamics/config/configuration_factories.py CHANGED Viewed

@@ -311,6 +311,10 @@ def _create_microsplit_data_configuration(
         Axes of the data.
     patch_size : list of int
         Size of the patches along the spatial dimensions.
+    grid_size : int
+        Grid size for patch extraction.
+    multiscale_count : int
+        Number of LC scales.
     batch_size : int
         Batch size.
     augmentations : list of transforms
@@ -1610,6 +1614,12 @@ def get_likelihood_config(
 ]:
     """Get the likelihood configuration for split models.
+    Returns a tuple containing the following optional entries:
+        - GaussianLikelihoodConfig: Gaussian likelihood configuration for musplit losses
+        - MultiChannelNMConfig: Multi-channel noise model configuration for denoisplit
+        losses
+        - NMLikelihoodConfig: Noise model likelihood configuration for denoisplit losses
     Parameters
     ----------
     loss_type : Literal["musplit", "denoisplit", "denoisplit_musplit"]
@@ -1629,15 +1639,12 @@ def get_likelihood_config(
     Returns
     -------
-    tuple[GaussianLikelihoodConfig | None, MultiChannelNMConfig | None,
-    NMLikelihoodConfig | None]
-        A tuple containing the likelihood and noise model configurations for the
-        specified loss type.
-        - GaussianLikelihoodConfig: Gaussian likelihood configuration for musplit losses
-        - MultiChannelNMConfig: Multi-channel noise model configuration for denoisplit
-        losses
-        - NMLikelihoodConfig: Noise model likelihood configuration for denoisplit losses
+    GaussianLikelihoodConfig or None
+        Configuration for the Gaussian likelihood model.
+    MultiChannelNMConfig or None
+        Configuration for the multi-channel noise model.
+    NMLikelihoodConfig or None
+        Configuration for the noise model likelihood.
     Raises
     ------
@@ -1647,7 +1654,7 @@ def get_likelihood_config(
     # gaussian likelihood
     if loss_type in ["musplit", "denoisplit_musplit"]:
         # if predict_logvar is None:
-        #     raise ValueError(f"predict_logvar is required for loss_type '{loss_type}'")
+        #    raise ValueError(f"predict_logvar is required for loss_type '{loss_type}'")
         # TODO validators should be in pydantic models
         gaussian_lik_config = GaussianLikelihoodConfig(
             predict_logvar=predict_logvar,
@@ -1903,7 +1910,7 @@ def create_microsplit_configuration(
     decoder_dropout: float = 0.0,
     nonlinearity: Literal[
         "None", "Sigmoid", "Softmax", "Tanh", "ReLU", "LeakyReLU", "ELU"
-    ] = "ReLU",
+    ] = "ReLU",  # TODO do we need all these?
     analytical_kl: bool = False,
     predict_logvar: Literal["pixelwise"] = "pixelwise",
     logvar_lowerbound: Union[float, None] = None,
@@ -1943,8 +1950,11 @@ def create_microsplit_configuration(
         Strides for the decoder convolutional layers, by default (2, 2).
     multiscale_count : int, optional
         Number of multiscale levels, by default 1.
+    grid_size : int, optional
+        Size of the grid for the lateral context, by default 32.
     z_dims : tuple[int, ...], optional
-        List of latent dimensions for each hierarchy level in the LVAE, by default (128, 128).
+        List of latent dimensions for each hierarchy level in the LVAE, by default
+        (128, 128).
     output_channels : int, optional
         Number of output channels for the model, by default 1.
     encoder_n_filters : int, optional

careamics/config/data/data_model.py CHANGED Viewed

@@ -207,13 +207,12 @@ class DataConfig(BaseModel):
     @field_validator("train_dataloader_params", "val_dataloader_params", mode="before")
     @classmethod
-    def set_default_dataloader_params(
+    def set_default_pin_memory(
         cls, dataloader_params: dict[str, Any]
     ) -> dict[str, Any]:
         """
-        Set default dataloader parameters if not provided.
+        Set default pin_memory for dataloader parameters if not provided.
-        - If 'num_workers' is not set, it defaults to the number of available CPU cores.
         - If 'pin_memory' is not set, it defaults to True if CUDA is available.
         Parameters
@@ -224,21 +223,62 @@ class DataConfig(BaseModel):
         Returns
         -------
         dict of {str: Any}
-            The dataloader parameters with defaults applied.
+            The dataloader parameters with pin_memory default applied.
+        """
+        if "pin_memory" not in dataloader_params:
+            import torch
+            dataloader_params["pin_memory"] = torch.cuda.is_available()
+        return dataloader_params
+    @field_validator("train_dataloader_params", mode="before")
+    @classmethod
+    def set_default_train_workers(
+        cls, dataloader_params: dict[str, Any]
+    ) -> dict[str, Any]:
+        """
+        Set default num_workers for training dataloader if not provided.
+        - If 'num_workers' is not set, it defaults to the number of available CPU cores.
+        Parameters
+        ----------
+        dataloader_params : dict of {str: Any}
+            The training dataloader parameters.
+        Returns
+        -------
+        dict of {str: Any}
+            The dataloader parameters with num_workers default applied.
         """
         if "num_workers" not in dataloader_params:
-            # Use 1 worker during tests, otherwise use all available CPU cores
+            # Use 0 workers during tests, otherwise use all available CPU cores
             if "pytest" in sys.modules:
                 dataloader_params["num_workers"] = 0
             else:
                 dataloader_params["num_workers"] = os.cpu_count()
-        if "pin_memory" not in dataloader_params:
-            import torch
+        return dataloader_params
-            dataloader_params["pin_memory"] = torch.cuda.is_available()
+    @model_validator(mode="after")
+    def set_val_workers_to_match_train(self: Self) -> Self:
+        """
+        Set validation dataloader num_workers to match training dataloader.
-        return dataloader_params
+        If num_workers is not specified in val_dataloader_params, it will be set to the
+        same value as train_dataloader_params["num_workers"].
+        Returns
+        -------
+        Self
+            Validated data model with synchronized num_workers.
+        """
+        if "num_workers" not in self.val_dataloader_params:
+            self.val_dataloader_params["num_workers"] = self.train_dataloader_params[
+                "num_workers"
+            ]
+        return self
     @field_validator("train_dataloader_params")
     @classmethod

careamics/config/data/ng_data_model.py CHANGED Viewed

@@ -2,6 +2,9 @@
 from __future__ import annotations
+import os
+import random
+import sys
 from collections.abc import Sequence
 from pprint import pformat
 from typing import Annotated, Any, Literal, Self, Union
@@ -20,6 +23,12 @@ from pydantic import (
 from ..transformations import XYFlipModel, XYRandomRotate90Model
 from ..validators import check_axes_validity
+from .patch_filter import (
+    MaskFilterModel,
+    MaxFilterModel,
+    MeanSTDFilterModel,
+    ShannonFilterModel,
+)
 from .patching_strategies import (
     RandomPatchingModel,
     TiledPatchingModel,
@@ -37,6 +46,17 @@ from .patching_strategies import (
 #       - or is the responsibility of the creator (e.g. conveneince functions)
+def generate_random_seed() -> int:
+    """Generate a random seed for reproducibility.
+    Returns
+    -------
+    int
+        A random integer between 1 and 2^31 - 1.
+    """
+    return random.randint(1, 2**31 - 1)
 def np_float_to_scientific_str(x: float) -> str:
     """Return a string scientific representation of a float.
@@ -67,6 +87,16 @@ PatchingStrategies = Union[
 ]
 """Patching strategies."""
+PatchFilters = Union[
+    MaxFilterModel,
+    MeanSTDFilterModel,
+    ShannonFilterModel,
+]
+"""Patch filters."""
+CoordFilters = Union[MaskFilterModel]  # add more here as needed
+"""Coordinate filters."""
 class NGDataConfig(BaseModel):
     """Next-Generation Dataset configuration.
@@ -105,6 +135,18 @@ class NGDataConfig(BaseModel):
     batch_size: int = Field(default=1, ge=1, validate_default=True)
     """Batch size for training."""
+    patch_filter: PatchFilters | None = Field(default=None, discriminator="name")
+    """Patch filter to apply when using random patching. Only available during
+    training."""
+    coord_filter: CoordFilters | None = Field(default=None, discriminator="name")
+    """Coordinate filter to apply when using random patching. Only available during
+    training."""
+    patch_filter_patience: int = Field(default=5, ge=1)
+    """Number of consecutive patches not passing the filter before accepting the next
+    patch."""
     image_means: list[Float] | None = Field(default=None, min_length=0, max_length=32)
     """Means of the data across channels, used for normalization."""
@@ -141,8 +183,8 @@ class NGDataConfig(BaseModel):
     test_dataloader_params: dict[str, Any] = Field(default={})
     """Dictionary of PyTorch test dataloader parameters."""
-    seed: int | None = Field(default=None, gt=0)
-    """Random seed for reproducibility."""
+    seed: int | None = Field(default_factory=generate_random_seed, gt=0)
+    """Random seed for reproducibility. If not specified, a random seed is generated."""
     @field_validator("axes")
     @classmethod
@@ -296,6 +338,129 @@ class NGDataConfig(BaseModel):
         return self
+    @model_validator(mode="after")
+    def propagate_seed_to_filters(self: Self) -> Self:
+        """
+        Propagate the main seed to patch and coordinate filters that support seeds.
+        This ensures that all filters use the same seed for reproducibility,
+        unless they already have a seed explicitly set.
+        Returns
+        -------
+        Self
+            Data model with propagated seeds.
+        """
+        if self.seed is not None:
+            if self.patch_filter is not None:
+                if (
+                    hasattr(self.patch_filter, "seed")
+                    and self.patch_filter.seed is None
+                ):
+                    self.patch_filter.seed = self.seed
+            if self.coord_filter is not None:
+                if (
+                    hasattr(self.coord_filter, "seed")
+                    and self.coord_filter.seed is None
+                ):
+                    self.coord_filter.seed = self.seed
+        return self
+    @model_validator(mode="after")
+    def propagate_seed_to_transforms(self: Self) -> Self:
+        """
+        Propagate the main seed to all transforms that support seeds.
+        This ensures that all transforms use the same seed for reproducibility,
+        unless they already have a seed explicitly set.
+        Returns
+        -------
+        Self
+            Data model with propagated seeds.
+        """
+        if self.seed is not None:
+            for transform in self.transforms:
+                if hasattr(transform, "seed") and transform.seed is None:
+                    transform.seed = self.seed
+        return self
+    @field_validator("train_dataloader_params", "val_dataloader_params", mode="before")
+    @classmethod
+    def set_default_pin_memory(
+        cls, dataloader_params: dict[str, Any]
+    ) -> dict[str, Any]:
+        """
+        Set default pin_memory for dataloader parameters if not provided.
+        - If 'pin_memory' is not set, it defaults to True if CUDA is available.
+        Parameters
+        ----------
+        dataloader_params : dict of {str: Any}
+            The dataloader parameters.
+        Returns
+        -------
+        dict of {str: Any}
+            The dataloader parameters with pin_memory default applied.
+        """
+        if "pin_memory" not in dataloader_params:
+            import torch
+            dataloader_params["pin_memory"] = torch.cuda.is_available()
+        return dataloader_params
+    @field_validator("train_dataloader_params", mode="before")
+    @classmethod
+    def set_default_train_workers(
+        cls, dataloader_params: dict[str, Any]
+    ) -> dict[str, Any]:
+        """
+        Set default num_workers for training dataloader if not provided.
+        - If 'num_workers' is not set, it defaults to the number of available CPU cores.
+        Parameters
+        ----------
+        dataloader_params : dict of {str: Any}
+            The training dataloader parameters.
+        Returns
+        -------
+        dict of {str: Any}
+            The dataloader parameters with num_workers default applied.
+        """
+        if "num_workers" not in dataloader_params:
+            # Use 0 workers during tests, otherwise use all available CPU cores
+            if "pytest" in sys.modules:
+                dataloader_params["num_workers"] = 0
+            else:
+                dataloader_params["num_workers"] = os.cpu_count()
+        return dataloader_params
+    @model_validator(mode="after")
+    def set_val_workers_to_match_train(self: Self) -> Self:
+        """
+        Set validation dataloader num_workers to match training dataloader.
+        If num_workers is not specified in val_dataloader_params, it will be set to the
+        same value as train_dataloader_params["num_workers"].
+        Returns
+        -------
+        Self
+            Validated data model with synchronized num_workers.
+        """
+        if "num_workers" not in self.val_dataloader_params:
+            self.val_dataloader_params["num_workers"] = self.train_dataloader_params[
+                "num_workers"
+            ]
+        return self
     def __str__(self) -> str:
         """
         Pretty string reprensenting the configuration.

careamics/config/data/patch_filter/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Pydantic models representing coordinate and patch filters."""
+__all__ = [
+    "FilterModel",
+    "MaskFilterModel",
+    "MaxFilterModel",
+    "MeanSTDFilterModel",
+    "ShannonFilterModel",
+]
+from .filter_model import FilterModel
+from .mask_filter_model import MaskFilterModel
+from .max_filter_model import MaxFilterModel
+from .meanstd_filter_model import MeanSTDFilterModel
+from .shannon_filter_model import ShannonFilterModel

careamics/config/data/patch_filter/filter_model.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Base class for patch and coordinate filtering models."""
+from pydantic import BaseModel, Field
+class FilterModel(BaseModel):
+    """Base class for patch and coordinate filtering models."""
+    name: str
+    """Name of the filter."""
+    p: float = Field(1.0, ge=0.0, le=1.0)
+    """Probability of applying the filter to a patch or coordinate."""
+    seed: int | None = Field(default=None, gt=0)
+    """Seed for the random number generator for reproducibility."""

careamics/config/data/patch_filter/mask_filter_model.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Pydantic model for the mask coordinate filter."""
+from typing import Literal
+from pydantic import Field
+from .filter_model import FilterModel
+class MaskFilterModel(FilterModel):
+    """Pydantic model for the mask coordinate filter."""
+    name: Literal["mask"] = "mask"
+    """Name of the filter."""
+    coverage: float = Field(0.5, ge=0.0, le=1.0)
+    """Percentage of masked pixels required to keep a patch."""

careamics/config/data/patch_filter/max_filter_model.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Pydantic model for the max patch filter."""
+from typing import Literal
+from .filter_model import FilterModel
+class MaxFilterModel(FilterModel):
+    """Pydantic model for the max patch filter."""
+    name: Literal["max"] = "max"
+    """Name of the filter."""
+    threshold: float
+    """Threshold for the minimum of the max-filtered patch."""

careamics/config/data/patch_filter/meanstd_filter_model.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""Pydantic model for the mean std patch filter."""
+from typing import Literal
+from .filter_model import FilterModel
+class MeanSTDFilterModel(FilterModel):
+    """Pydantic model for the mean std patch filter."""
+    name: Literal["mean_std"] = "mean_std"
+    """Name of the filter."""
+    mean_threshold: float
+    """Minimum mean intensity required to keep a patch."""
+    std_threshold: float | None = None
+    """Minimum standard deviation required to keep a patch."""

careamics/config/data/patch_filter/shannon_filter_model.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Pydantic model for the Shannon entropy patch filter."""
+from typing import Literal
+from .filter_model import FilterModel
+class ShannonFilterModel(FilterModel):
+    """Pydantic model for the Shannon entropy patch filter."""
+    name: Literal["shannon"] = "shannon"
+    """Name of the filter."""
+    threshold: float
+    """Minimum Shannon entropy required to keep a patch."""

careamics/config/support/supported_filters.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Coordinate and patch filters supported by CAREamics."""
+from careamics.utils import BaseEnum
+class SupportedPatchFilters(str, BaseEnum):
+    """Supported patch filters."""
+    MAX = "max"
+    MEANSTD = "mean_std"
+    SHANNON = "shannon"
+class SupportedCoordinateFilters(str, BaseEnum):
+    """Supported coordinate filters."""
+    MASK = "mask"

careamics 0.0.16__py3-none-any.whl → 0.0.17__py3-none-any.whl

Potentially problematic release.

careamics 0.0.16py3-none-any.whl → 0.0.17py3-none-any.whl