PyPI - careamics - Versions diffs - 0.1.0rc2__py3-none-any.whl → 0.1.0rc4__py3-none-any.whl - Mend

careamics 0.1.0rc2py3-none-any.whl → 0.1.0rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of careamics might be problematic. Click here for more details.

Files changed (134) hide show

careamics/__init__.py +16 -4
careamics/callbacks/__init__.py +6 -0
careamics/callbacks/hyperparameters_callback.py +42 -0
careamics/callbacks/progress_bar_callback.py +57 -0
careamics/careamist.py +761 -0
careamics/config/__init__.py +31 -3
careamics/config/algorithm_model.py +167 -0
careamics/config/architectures/__init__.py +17 -0
careamics/config/architectures/architecture_model.py +29 -0
careamics/config/architectures/custom_model.py +150 -0
careamics/config/architectures/register_model.py +101 -0
careamics/config/architectures/unet_model.py +96 -0
careamics/config/architectures/vae_model.py +39 -0
careamics/config/callback_model.py +92 -0
careamics/config/configuration_example.py +89 -0
careamics/config/configuration_factory.py +597 -0
careamics/config/configuration_model.py +597 -0
careamics/config/data_model.py +555 -0
careamics/config/inference_model.py +283 -0
careamics/config/noise_models.py +162 -0
careamics/config/optimizer_models.py +181 -0
careamics/config/references/__init__.py +45 -0
careamics/config/references/algorithm_descriptions.py +131 -0
careamics/config/references/references.py +38 -0
careamics/config/support/__init__.py +33 -0
careamics/config/support/supported_activations.py +24 -0
careamics/config/support/supported_algorithms.py +18 -0
careamics/config/support/supported_architectures.py +18 -0
careamics/config/support/supported_data.py +82 -0
careamics/{dataset/extraction_strategy.py → config/support/supported_extraction_strategies.py} +5 -2
careamics/config/support/supported_loggers.py +8 -0
careamics/config/support/supported_losses.py +25 -0
careamics/config/support/supported_optimizers.py +55 -0
careamics/config/support/supported_pixel_manipulations.py +15 -0
careamics/config/support/supported_struct_axis.py +19 -0
careamics/config/support/supported_transforms.py +23 -0
careamics/config/tile_information.py +104 -0
careamics/config/training_model.py +65 -0
careamics/config/transformations/__init__.py +14 -0
careamics/config/transformations/n2v_manipulate_model.py +63 -0
careamics/config/transformations/nd_flip_model.py +32 -0
careamics/config/transformations/normalize_model.py +31 -0
careamics/config/transformations/transform_model.py +44 -0
careamics/config/transformations/xy_random_rotate90_model.py +29 -0
careamics/config/validators/__init__.py +5 -0
careamics/config/validators/validator_utils.py +100 -0
careamics/conftest.py +26 -0
careamics/dataset/__init__.py +5 -0
careamics/dataset/dataset_utils/__init__.py +19 -0
careamics/dataset/dataset_utils/dataset_utils.py +100 -0
careamics/dataset/dataset_utils/file_utils.py +140 -0
careamics/dataset/dataset_utils/read_tiff.py +61 -0
careamics/dataset/dataset_utils/read_utils.py +25 -0
careamics/dataset/dataset_utils/read_zarr.py +56 -0
careamics/dataset/in_memory_dataset.py +323 -134
careamics/dataset/iterable_dataset.py +416 -0
careamics/dataset/patching/__init__.py +8 -0
careamics/dataset/patching/patch_transform.py +44 -0
careamics/dataset/patching/patching.py +212 -0
careamics/dataset/patching/random_patching.py +190 -0
careamics/dataset/patching/sequential_patching.py +206 -0
careamics/dataset/patching/tiled_patching.py +158 -0
careamics/dataset/patching/validate_patch_dimension.py +60 -0
careamics/dataset/zarr_dataset.py +149 -0
careamics/lightning_datamodule.py +743 -0
careamics/lightning_module.py +292 -0
careamics/lightning_prediction_datamodule.py +396 -0
careamics/lightning_prediction_loop.py +116 -0
careamics/losses/__init__.py +4 -1
careamics/losses/loss_factory.py +24 -14
careamics/losses/losses.py +65 -5
careamics/losses/noise_model_factory.py +40 -0
careamics/losses/noise_models.py +524 -0
careamics/model_io/__init__.py +8 -0
careamics/model_io/bioimage/__init__.py +11 -0
careamics/model_io/bioimage/_readme_factory.py +120 -0
careamics/model_io/bioimage/bioimage_utils.py +48 -0
careamics/model_io/bioimage/model_description.py +318 -0
careamics/model_io/bmz_io.py +231 -0
careamics/model_io/model_io_utils.py +80 -0
careamics/models/__init__.py +4 -1
careamics/models/activation.py +35 -0
careamics/models/layers.py +244 -0
careamics/models/model_factory.py +21 -221
careamics/models/unet.py +46 -20
careamics/prediction/__init__.py +1 -3
careamics/prediction/stitch_prediction.py +73 -0
careamics/transforms/__init__.py +41 -0
careamics/transforms/n2v_manipulate.py +113 -0
careamics/transforms/nd_flip.py +93 -0
careamics/transforms/normalize.py +109 -0
careamics/transforms/pixel_manipulation.py +383 -0
careamics/transforms/struct_mask_parameters.py +18 -0
careamics/transforms/tta.py +74 -0
careamics/transforms/xy_random_rotate90.py +95 -0
careamics/utils/__init__.py +10 -12
careamics/utils/base_enum.py +32 -0
careamics/utils/context.py +22 -2
careamics/utils/metrics.py +0 -46
careamics/utils/path_utils.py +24 -0
careamics/utils/ram.py +13 -0
careamics/utils/receptive_field.py +102 -0
careamics/utils/running_stats.py +43 -0
careamics/utils/torch_utils.py +112 -75
careamics-0.1.0rc4.dist-info/METADATA +122 -0
careamics-0.1.0rc4.dist-info/RECORD +110 -0
{careamics-0.1.0rc2.dist-info → careamics-0.1.0rc4.dist-info}/WHEEL +1 -1
careamics/bioimage/__init__.py +0 -15
careamics/bioimage/docs/Noise2Void.md +0 -5
careamics/bioimage/docs/__init__.py +0 -1
careamics/bioimage/io.py +0 -182
careamics/bioimage/rdf.py +0 -105
careamics/config/algorithm.py +0 -231
careamics/config/config.py +0 -297
careamics/config/config_filter.py +0 -44
careamics/config/data.py +0 -194
careamics/config/torch_optim.py +0 -118
careamics/config/training.py +0 -534
careamics/dataset/dataset_utils.py +0 -111
careamics/dataset/patching.py +0 -492
careamics/dataset/prepare_dataset.py +0 -175
careamics/dataset/tiff_dataset.py +0 -212
careamics/engine.py +0 -1014
careamics/manipulation/__init__.py +0 -4
careamics/manipulation/pixel_manipulation.py +0 -158
careamics/prediction/prediction_utils.py +0 -106
careamics/utils/ascii_logo.txt +0 -9
careamics/utils/augment.py +0 -65
careamics/utils/normalization.py +0 -55
careamics/utils/validators.py +0 -170
careamics/utils/wandb.py +0 -121
careamics-0.1.0rc2.dist-info/METADATA +0 -81
careamics-0.1.0rc2.dist-info/RECORD +0 -47
{careamics-0.1.0rc2.dist-info → careamics-0.1.0rc4.dist-info}/licenses/LICENSE +0 -0

careamics/lightning_datamodule.py ADDED Viewed

@@ -0,0 +1,743 @@
+"""Training and validation Lightning data modules."""
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Literal, Optional, Union
+import numpy as np
+import pytorch_lightning as L
+from albumentations import Compose
+from torch.utils.data import DataLoader
+from careamics.config import DataConfig
+from careamics.config.data_model import TRANSFORMS_UNION
+from careamics.config.support import SupportedData
+from careamics.dataset.dataset_utils import (
+    get_files_size,
+    get_read_func,
+    list_files,
+    validate_source_target_files,
+)
+from careamics.dataset.in_memory_dataset import (
+    InMemoryDataset,
+)
+from careamics.dataset.iterable_dataset import (
+    PathIterableDataset,
+)
+from careamics.utils import get_logger, get_ram_size
+DatasetType = Union[InMemoryDataset, PathIterableDataset]
+logger = get_logger(__name__)
+class CAREamicsTrainData(L.LightningDataModule):
+    """
+    CAREamics Ligthning training and validation data module.
+    The data module can be used with Path, str or numpy arrays. In the case of
+    numpy arrays, it loads and computes all the patches in memory. For Path and str
+    inputs, it calculates the total file size and estimate whether it can fit in
+    memory. If it does not, it iterates through the files. This behaviour can be
+    deactivated by setting `use_in_memory` to False, in which case it will
+    always use the iterating dataset to train on a Path or str.
+    The data can be either a folder containing images or a single file.
+    Validation can be omitted, in which case the validation data is extracted from
+    the training data. The percentage of the training data to use for validation,
+    as well as the minimum number of patches or files to split from the training
+    data can be set using `val_percentage` and `val_minimum_split`, respectively.
+    To read custom data types, you can set `data_type` to `custom` in `data_config`
+    and provide a function that returns a numpy array from a path as
+    `read_source_func` parameter. The function will receive a Path object and
+    an axies string as arguments, the axes being derived from the `data_config`.
+    You can also provide a `fnmatch` and `Path.rglob` compatible expression (e.g.
+    "*.czi") to filter the files extension using `extension_filter`.
+    Parameters
+    ----------
+    data_config : DataModel
+        Pydantic model for CAREamics data configuration.
+    train_data : Union[Path, str, np.ndarray]
+        Training data, can be a path to a folder, a file or a numpy array.
+    val_data : Optional[Union[Path, str, np.ndarray]], optional
+        Validation data, can be a path to a folder, a file or a numpy array, by
+        default None.
+    train_data_target : Optional[Union[Path, str, np.ndarray]], optional
+        Training target data, can be a path to a folder, a file or a numpy array, by
+        default None.
+    val_data_target : Optional[Union[Path, str, np.ndarray]], optional
+        Validation target data, can be a path to a folder, a file or a numpy array,
+        by default None.
+    read_source_func : Optional[Callable], optional
+        Function to read the source data, by default None. Only used for `custom`
+        data type (see DataModel).
+    extension_filter : str, optional
+        Filter for file extensions, by default "". Only used for `custom` data types
+        (see DataModel).
+    val_percentage : float, optional
+        Percentage of the training data to use for validation, by default 0.1. Only
+        used if `val_data` is None.
+    val_minimum_split : int, optional
+        Minimum number of patches or files to split from the training data for
+        validation, by default 5. Only used if `val_data` is None.
+    use_in_memory : bool, optional
+        Use in memory dataset if possible, by default True.
+    Attributes
+    ----------
+    data_config : DataModel
+        CAREamics data configuration.
+    data_type : SupportedData
+        Expected data type, one of "tiff", "array" or "custom".
+    batch_size : int
+        Batch size.
+    use_in_memory : bool
+        Whether to use in memory dataset if possible.
+    train_data : Union[Path, str, np.ndarray]
+        Training data.
+    val_data : Optional[Union[Path, str, np.ndarray]]
+        Validation data.
+    train_data_target : Optional[Union[Path, str, np.ndarray]]
+        Training target data.
+    val_data_target : Optional[Union[Path, str, np.ndarray]]
+        Validation target data.
+    val_percentage : float
+        Percentage of the training data to use for validation, if no validation data is
+        provided.
+    val_minimum_split : int
+        Minimum number of patches or files to split from the training data for
+        validation, if no validation data is provided.
+    read_source_func : Optional[Callable]
+        Function to read the source data, used if `data_type` is `custom`.
+    extension_filter : str
+        Filter for file extensions, used if `data_type` is `custom`.
+    """
+    def __init__(
+        self,
+        data_config: DataConfig,
+        train_data: Union[Path, str, np.ndarray],
+        val_data: Optional[Union[Path, str, np.ndarray]] = None,
+        train_data_target: Optional[Union[Path, str, np.ndarray]] = None,
+        val_data_target: Optional[Union[Path, str, np.ndarray]] = None,
+        read_source_func: Optional[Callable] = None,
+        extension_filter: str = "",
+        val_percentage: float = 0.1,
+        val_minimum_split: int = 5,
+        use_in_memory: bool = True,
+    ) -> None:
+        """
+        Constructor.
+        Parameters
+        ----------
+        data_config : DataModel
+            Pydantic model for CAREamics data configuration.
+        train_data : Union[Path, str, np.ndarray]
+            Training data, can be a path to a folder, a file or a numpy array.
+        val_data : Optional[Union[Path, str, np.ndarray]], optional
+            Validation data, can be a path to a folder, a file or a numpy array, by
+            default None.
+        train_data_target : Optional[Union[Path, str, np.ndarray]], optional
+            Training target data, can be a path to a folder, a file or a numpy array, by
+            default None.
+        val_data_target : Optional[Union[Path, str, np.ndarray]], optional
+            Validation target data, can be a path to a folder, a file or a numpy array,
+            by default None.
+        read_source_func : Optional[Callable], optional
+            Function to read the source data, by default None. Only used for `custom`
+            data type (see DataModel).
+        extension_filter : str, optional
+            Filter for file extensions, by default "". Only used for `custom` data types
+            (see DataModel).
+        val_percentage : float, optional
+            Percentage of the training data to use for validation, by default 0.1. Only
+            used if `val_data` is None.
+        val_minimum_split : int, optional
+            Minimum number of patches or files to split from the training data for
+            validation, by default 5. Only used if `val_data` is None.
+        use_in_memory : bool, optional
+            Use in memory dataset if possible, by default True.
+        Raises
+        ------
+        NotImplementedError
+            Raised if target data is provided.
+        ValueError
+            If the input types are mixed (e.g. Path and np.ndarray).
+        ValueError
+            If the data type is `custom` and no `read_source_func` is provided.
+        ValueError
+            If the data type is `array` and the input is not a numpy array.
+        ValueError
+            If the data type is `tiff` and the input is neither a Path nor a str.
+        """
+        super().__init__()
+        # check input types coherence (no mixed types)
+        inputs = [train_data, val_data, train_data_target, val_data_target]
+        types_set = {type(i) for i in inputs}
+        if len(types_set) > 2:  # None + expected type
+            raise ValueError(
+                f"Inputs for `train_data`, `val_data`, `train_data_target` and "
+                f"`val_data_target` must be of the same type or None. Got "
+                f"{types_set}."
+            )
+        # check that a read source function is provided for custom types
+        if data_config.data_type == SupportedData.CUSTOM and read_source_func is None:
+            raise ValueError(
+                f"Data type {SupportedData.CUSTOM} is not allowed without "
+                f"specifying a `read_source_func` and an `extension_filer`."
+            )
+        # check correct input type
+        if (
+            isinstance(train_data, np.ndarray)
+            and data_config.data_type != SupportedData.ARRAY
+        ):
+            raise ValueError(
+                f"Received a numpy array as input, but the data type was set to "
+                f"{data_config.data_type}. Set the data type in the configuration "
+                f"to {SupportedData.ARRAY} to train on numpy arrays."
+            )
+        # and that Path or str are passed, if tiff file type specified
+        elif (isinstance(train_data, Path) or isinstance(train_data, str)) and (
+            data_config.data_type != SupportedData.TIFF
+            and data_config.data_type != SupportedData.CUSTOM
+        ):
+            raise ValueError(
+                f"Received a path as input, but the data type was neither set to "
+                f"{SupportedData.TIFF} nor {SupportedData.CUSTOM}. Set the data type "
+                f"in the configuration to {SupportedData.TIFF} or "
+                f"{SupportedData.CUSTOM} to train on files."
+            )
+        # configuration
+        self.data_config = data_config
+        self.data_type = data_config.data_type
+        self.batch_size = data_config.batch_size
+        self.use_in_memory = use_in_memory
+        # data
+        self.train_data = train_data
+        self.val_data = val_data
+        self.train_data_target = train_data_target
+        self.val_data_target = val_data_target
+        self.val_percentage = val_percentage
+        self.val_minimum_split = val_minimum_split
+        # read source function corresponding to the requested type
+        if data_config.data_type == SupportedData.CUSTOM:
+            # mypy check
+            assert read_source_func is not None
+            self.read_source_func: Callable = read_source_func
+        elif data_config.data_type != SupportedData.ARRAY:
+            self.read_source_func = get_read_func(data_config.data_type)
+        self.extension_filter = extension_filter
+        # Pytorch dataloader parameters
+        self.dataloader_params = (
+            data_config.dataloader_params if data_config.dataloader_params else {}
+        )
+    def prepare_data(self) -> None:
+        """
+        Hook used to prepare the data before calling `setup`.
+        Here, we only need to examine the data if it was provided as a str or a Path.
+        TODO: from lightning doc:
+        prepare_data is called from the main process. It is not recommended to assign
+        state here (e.g. self.x = y) since it is called on a single process and if you
+        assign states here then they won't be available for other processes.
+        https://lightning.ai/docs/pytorch/stable/data/datamodule.html
+        """
+        # if the data is a Path or a str
+        if (
+            not isinstance(self.train_data, np.ndarray)
+            and not isinstance(self.val_data, np.ndarray)
+            and not isinstance(self.train_data_target, np.ndarray)
+            and not isinstance(self.val_data_target, np.ndarray)
+        ):
+            # list training files
+            self.train_files = list_files(
+                self.train_data, self.data_type, self.extension_filter
+            )
+            self.train_files_size = get_files_size(self.train_files)
+            # list validation files
+            if self.val_data is not None:
+                self.val_files = list_files(
+                    self.val_data, self.data_type, self.extension_filter
+                )
+            # same for target data
+            if self.train_data_target is not None:
+                self.train_target_files: List[Path] = list_files(
+                    self.train_data_target, self.data_type, self.extension_filter
+                )
+                # verify that they match the training data
+                validate_source_target_files(self.train_files, self.train_target_files)
+            if self.val_data_target is not None:
+                self.val_target_files = list_files(
+                    self.val_data_target, self.data_type, self.extension_filter
+                )
+                # verify that they match the validation data
+                validate_source_target_files(self.val_files, self.val_target_files)
+    def setup(self, *args: Any, **kwargs: Any) -> None:
+        """Hook called at the beginning of fit, validate, or predict.
+        Parameters
+        ----------
+        *args : Any
+            Unused.
+        **kwargs : Any
+            Unused.
+        """
+        # if numpy array
+        if self.data_type == SupportedData.ARRAY:
+            # train dataset
+            self.train_dataset: DatasetType = InMemoryDataset(
+                data_config=self.data_config,
+                inputs=self.train_data,
+                data_target=self.train_data_target,
+            )
+            # validation dataset
+            if self.val_data is not None:
+                # create its own dataset
+                self.val_dataset: DatasetType = InMemoryDataset(
+                    data_config=self.data_config,
+                    inputs=self.val_data,
+                    data_target=self.val_data_target,
+                )
+            else:
+                # extract validation from the training patches
+                self.val_dataset = self.train_dataset.split_dataset(
+                    percentage=self.val_percentage,
+                    minimum_patches=self.val_minimum_split,
+                )
+        # else we read files
+        else:
+            # Heuristics, if the file size is smaller than 80% of the RAM,
+            # we run the training in memory, otherwise we switch to iterable dataset
+            # The switch is deactivated if use_in_memory is False
+            if self.use_in_memory and self.train_files_size < get_ram_size() * 0.8:
+                # train dataset
+                self.train_dataset = InMemoryDataset(
+                    data_config=self.data_config,
+                    inputs=self.train_files,
+                    data_target=self.train_target_files
+                    if self.train_data_target
+                    else None,
+                    read_source_func=self.read_source_func,
+                )
+                # validation dataset
+                if self.val_data is not None:
+                    self.val_dataset = InMemoryDataset(
+                        data_config=self.data_config,
+                        inputs=self.val_files,
+                        data_target=self.val_target_files
+                        if self.val_data_target
+                        else None,
+                        read_source_func=self.read_source_func,
+                    )
+                else:
+                    # split dataset
+                    self.val_dataset = self.train_dataset.split_dataset(
+                        percentage=self.val_percentage,
+                        minimum_patches=self.val_minimum_split,
+                    )
+            # else if the data is too large, load file by file during training
+            else:
+                # create training dataset
+                self.train_dataset = PathIterableDataset(
+                    data_config=self.data_config,
+                    src_files=self.train_files,
+                    target_files=self.train_target_files
+                    if self.train_data_target
+                    else None,
+                    read_source_func=self.read_source_func,
+                )
+                # create validation dataset
+                if self.val_files is not None:
+                    # create its own dataset
+                    self.val_dataset = PathIterableDataset(
+                        data_config=self.data_config,
+                        src_files=self.val_files,
+                        target_files=self.val_target_files
+                        if self.val_data_target
+                        else None,
+                        read_source_func=self.read_source_func,
+                    )
+                elif len(self.train_files) <= self.val_minimum_split:
+                    raise ValueError(
+                        f"Not enough files to split a minimum of "
+                        f"{self.val_minimum_split} files, got {len(self.train_files)} "
+                        f"files."
+                    )
+                else:
+                    # extract validation from the training patches
+                    self.val_dataset = self.train_dataset.split_dataset(
+                        percentage=self.val_percentage,
+                        minimum_files=self.val_minimum_split,
+                    )
+    def train_dataloader(self) -> Any:
+        """
+        Create a dataloader for training.
+        Returns
+        -------
+        Any
+            Training dataloader.
+        """
+        return DataLoader(
+            self.train_dataset, batch_size=self.batch_size, **self.dataloader_params
+        )
+    def val_dataloader(self) -> Any:
+        """
+        Create a dataloader for validation.
+        Returns
+        -------
+        Any
+            Validation dataloader.
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+        )
+class TrainingDataWrapper(CAREamicsTrainData):
+    """
+    Wrapper around the CAREamics Lightning training data module.
+    This class is used to explicitely pass the parameters usually contained in a
+    `data_model` configuration.
+    Since the lightning datamodule has no access to the model, make sure that the
+    parameters passed to the datamodule are consistent with the model's requirements and
+    are coherent.
+    The data module can be used with Path, str or numpy arrays. In the case of
+    numpy arrays, it loads and computes all the patches in memory. For Path and str
+    inputs, it calculates the total file size and estimate whether it can fit in
+    memory. If it does not, it iterates through the files. This behaviour can be
+    deactivated by setting `use_in_memory` to False, in which case it will
+    always use the iterating dataset to train on a Path or str.
+    To use array data, set `data_type` to `array` and pass a numpy array to
+    `train_data`.
+    In particular, N2V requires a specific transformation (N2V manipulates), which is
+    not compatible with supervised training. The default transformations applied to the
+    training patches are defined in `careamics.config.data_model`. To use different
+    transformations, pass a list of transforms or an albumentation `Compose` as
+    `transforms` parameter. See examples for more details.
+    By default, CAREamics only supports types defined in
+    `careamics.config.support.SupportedData`. To read custom data types, you can set
+    `data_type` to `custom` and provide a function that returns a numpy array from a
+    path. Additionally, pass a `fnmatch` and `Path.rglob` compatible expression (e.g.
+    "*.jpeg") to filter the files extension using `extension_filter`.
+    In the absence of validation data, the validation data is extracted from the
+    training data. The percentage of the training data to use for validation, as well as
+    the minimum number of patches to split from the training data for validation can be
+    set using `val_percentage` and `val_minimum_patches`, respectively.
+    In `dataloader_params`, you can pass any parameter accepted by PyTorch dataloaders,
+    except for `batch_size`, which is set by the `batch_size` parameter.
+    Finally, if you intend to use N2V family of algorithms, you can set `use_n2v2` to
+    use N2V2, and set the `struct_n2v_axis` and `struct_n2v_span` parameters to define
+    the axis and span of the structN2V mask. These parameters are without effect if
+    a `train_target_data` or if `transforms` are provided.
+    Parameters
+    ----------
+    train_data : Union[str, Path, np.ndarray]
+        Training data.
+    data_type : Union[str, SupportedData]
+        Data type, see `SupportedData` for available options.
+    patch_size : List[int]
+        Patch size, 2D or 3D patch size.
+    axes : str
+        Axes of the data, choosen amongst SCZYX.
+    batch_size : int
+        Batch size.
+    val_data : Optional[Union[str, Path]], optional
+        Validation data, by default None.
+    transforms : Optional[Union[List[TRANSFORMS_UNION], Compose]], optional
+        List of transforms to apply to training patches. If None, default transforms
+        are applied.
+    train_target_data : Optional[Union[str, Path]], optional
+        Training target data, by default None.
+    val_target_data : Optional[Union[str, Path]], optional
+        Validation target data, by default None.
+    read_source_func : Optional[Callable], optional
+        Function to read the source data, used if `data_type` is `custom`, by
+        default None.
+    extension_filter : str, optional
+        Filter for file extensions, used if `data_type` is `custom`, by default "".
+    val_percentage : float, optional
+        Percentage of the training data to use for validation if no validation data
+        is given, by default 0.1.
+    val_minimum_patches : int, optional
+        Minimum number of patches to split from the training data for validation if
+        no validation data is given, by default 5.
+    dataloader_params : dict, optional
+        Pytorch dataloader parameters, by default {}.
+    use_in_memory : bool, optional
+        Use in memory dataset if possible, by default True.
+    use_n2v2 : bool, optional
+        Use N2V2 transformation during training, by default False.
+    struct_n2v_axis : Literal["horizontal", "vertical", "none"], optional
+        Axis for the structN2V mask, only applied if `struct_n2v_axis` is `none`, by
+        default "none".
+    struct_n2v_span : int, optional
+        Span for the structN2V mask, by default 5.
+    Examples
+    --------
+    Create a TrainingDataWrapper with default transforms with a numpy array:
+    >>> import numpy as np
+    >>> from careamics import TrainingDataWrapper
+    >>> my_array = np.arange(256).reshape(16, 16)
+    >>> data_module = TrainingDataWrapper(
+    ...     train_data=my_array,
+    ...     data_type="array",
+    ...     patch_size=(8, 8),
+    ...     axes='YX',
+    ...     batch_size=2,
+    ... )
+    For custom data types (those not supported by CAREamics), then one can pass a read
+    function and a filter for the files extension:
+    >>> import numpy as np
+    >>> from careamics import TrainingDataWrapper
+    >>>
+    >>> def read_npy(path):
+    ...     return np.load(path)
+    >>>
+    >>> data_module = TrainingDataWrapper(
+    ...     train_data="path/to/data",
+    ...     data_type="custom",
+    ...     patch_size=(8, 8),
+    ...     axes='YX',
+    ...     batch_size=2,
+    ...     read_source_func=read_npy,
+    ...     extension_filter="*.npy",
+    ... )
+    If you want to use a different set of transformations, you can pass a list of
+    transforms:
+    >>> import numpy as np
+    >>> from careamics import TrainingDataWrapper
+    >>> from careamics.config.support import SupportedTransform
+    >>> my_array = np.arange(256).reshape(16, 16)
+    >>> my_transforms = [
+    ...     {
+    ...         "name": SupportedTransform.NORMALIZE.value,
+    ...         "mean": 0,
+    ...         "std": 1,
+    ...     },
+    ...     {
+    ...         "name": SupportedTransform.N2V_MANIPULATE.value,
+    ...     }
+    ... ]
+    >>> data_module = TrainingDataWrapper(
+    ...     train_data=my_array,
+    ...     data_type="array",
+    ...     patch_size=(8, 8),
+    ...     axes='YX',
+    ...     batch_size=2,
+    ...     transforms=my_transforms,
+    ... )
+    """
+    def __init__(
+        self,
+        train_data: Union[str, Path, np.ndarray],
+        data_type: Union[Literal["array", "tiff", "custom"], SupportedData],
+        patch_size: List[int],
+        axes: str,
+        batch_size: int,
+        val_data: Optional[Union[str, Path]] = None,
+        transforms: Optional[Union[List[TRANSFORMS_UNION], Compose]] = None,
+        train_target_data: Optional[Union[str, Path]] = None,
+        val_target_data: Optional[Union[str, Path]] = None,
+        read_source_func: Optional[Callable] = None,
+        extension_filter: str = "",
+        val_percentage: float = 0.1,
+        val_minimum_patches: int = 5,
+        dataloader_params: Optional[dict] = None,
+        use_in_memory: bool = True,
+        use_n2v2: bool = False,
+        struct_n2v_axis: Literal["horizontal", "vertical", "none"] = "none",
+        struct_n2v_span: int = 5,
+    ) -> None:
+        """
+        LightningDataModule wrapper for training and validation datasets.
+        Since the lightning datamodule has no access to the model, make sure that the
+        parameters passed to the datamodule are consistent with the model's requirements
+        and are coherent.
+        The data module can be used with Path, str or numpy arrays. In the case of
+        numpy arrays, it loads and computes all the patches in memory. For Path and str
+        inputs, it calculates the total file size and estimate whether it can fit in
+        memory. If it does not, it iterates through the files. This behaviour can be
+        deactivated by setting `use_in_memory` to False, in which case it will
+        always use the iterating dataset to train on a Path or str.
+        To use array data, set `data_type` to `array` and pass a numpy array to
+        `train_data`.
+        In particular, N2V requires a specific transformation (N2V manipulates), which
+        is not compatible with supervised training. The default transformations applied
+        to the training patches are defined in `careamics.config.data_model`. To use
+        different transformations, pass a list of transforms or an albumentation
+        `Compose` as `transforms` parameter. See examples for more details.
+        By default, CAREamics only supports types defined in
+        `careamics.config.support.SupportedData`. To read custom data types, you can set
+        `data_type` to `custom` and provide a function that returns a numpy array from a
+        path. Additionally, pass a `fnmatch` and `Path.rglob` compatible expression
+        (e.g. "*.jpeg") to filter the files extension using `extension_filter`.
+        In the absence of validation data, the validation data is extracted from the
+        training data. The percentage of the training data to use for validation, as
+        well as the minimum number of patches to split from the training data for
+        validation can be set using `val_percentage` and `val_minimum_patches`,
+        respectively.
+        In `dataloader_params`, you can pass any parameter accepted by PyTorch
+        dataloaders, except for `batch_size`, which is set by the `batch_size`
+        parameter.
+        Finally, if you intend to use N2V family of algorithms, you can set `use_n2v2`
+        to use N2V2, and set the `struct_n2v_axis` and `struct_n2v_span` parameters to
+        define the axis and span of the structN2V mask. These parameters are without
+        effect if a `train_target_data` or if `transforms` are provided.
+        Parameters
+        ----------
+        train_data : Union[str, Path, np.ndarray]
+            Training data.
+        data_type : Union[str, SupportedData]
+            Data type, see `SupportedData` for available options.
+        patch_size : List[int]
+            Patch size, 2D or 3D patch size.
+        axes : str
+            Axes of the data, choosen amongst SCZYX.
+        batch_size : int
+            Batch size.
+        val_data : Optional[Union[str, Path]], optional
+            Validation data, by default None.
+        transforms : Optional[Union[List[TRANSFORMS_UNION], Compose]], optional
+            List of transforms to apply to training patches. If None, default transforms
+            are applied.
+        train_target_data : Optional[Union[str, Path]], optional
+            Training target data, by default None.
+        val_target_data : Optional[Union[str, Path]], optional
+            Validation target data, by default None.
+        read_source_func : Optional[Callable], optional
+            Function to read the source data, used if `data_type` is `custom`, by
+            default None.
+        extension_filter : str, optional
+            Filter for file extensions, used if `data_type` is `custom`, by default "".
+        val_percentage : float, optional
+            Percentage of the training data to use for validation if no validation data
+            is given, by default 0.1.
+        val_minimum_patches : int, optional
+            Minimum number of patches to split from the training data for validation if
+            no validation data is given, by default 5.
+        dataloader_params : dict, optional
+            Pytorch dataloader parameters, by default {}.
+        use_in_memory : bool, optional
+            Use in memory dataset if possible, by default True.
+        use_n2v2 : bool, optional
+            Use N2V2 transformation during training, by default False.
+        struct_n2v_axis : Literal["horizontal", "vertical", "none"], optional
+            Axis for the structN2V mask, only applied if `struct_n2v_axis` is `none`, by
+            default "none".
+        struct_n2v_span : int, optional
+            Span for the structN2V mask, by default 5.
+        Raises
+        ------
+        ValueError
+            If a target is set and N2V manipulation is present in the transforms.
+        """
+        if dataloader_params is None:
+            dataloader_params = {}
+        data_dict: Dict[str, Any] = {
+            "mode": "train",
+            "data_type": data_type,
+            "patch_size": patch_size,
+            "axes": axes,
+            "batch_size": batch_size,
+            "dataloader_params": dataloader_params,
+        }
+        # if transforms are passed (otherwise it will use the default ones)
+        if transforms is not None:
+            data_dict["transforms"] = transforms
+        # validate configuration
+        self.data_config = DataConfig(**data_dict)
+        # N2V specific checks, N2V, structN2V, and transforms
+        if (
+            self.data_config.has_transform_list()
+            and self.data_config.has_n2v_manipulate()
+        ):
+            # there is not target, n2v2 and structN2V can be changed
+            if train_target_data is None:
+                self.data_config.set_N2V2(use_n2v2)
+                self.data_config.set_structN2V_mask(struct_n2v_axis, struct_n2v_span)
+            else:
+                raise ValueError(
+                    "Cannot have both supervised training (target data) and "
+                    "N2V manipulation in the transforms. Pass a list of transforms "
+                    "that is compatible with your supervised training."
+                )
+        # sanity check on the dataloader parameters
+        if "batch_size" in dataloader_params:
+            # remove it
+            del dataloader_params["batch_size"]
+        super().__init__(
+            data_config=self.data_config,
+            train_data=train_data,
+            val_data=val_data,
+            train_data_target=train_target_data,
+            val_data_target=val_target_data,
+            read_source_func=read_source_func,
+            extension_filter=extension_filter,
+            val_percentage=val_percentage,
+            val_minimum_split=val_minimum_patches,
+            use_in_memory=use_in_memory,
+        )

careamics 0.1.0rc2__py3-none-any.whl → 0.1.0rc4__py3-none-any.whl

Potentially problematic release.

careamics 0.1.0rc2py3-none-any.whl → 0.1.0rc4py3-none-any.whl