PyPI - careamics - Versions diffs - 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl - Mend

careamics 0.0.10py3-none-any.whl → 0.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of careamics might be problematic. Click here for more details.

Files changed (54) hide show

careamics/careamist.py +20 -4
careamics/config/configuration.py +10 -5
careamics/config/data/data_model.py +38 -1
careamics/config/optimizer_models.py +1 -3
careamics/config/training_model.py +0 -2
careamics/dataset/dataset_utils/running_stats.py +7 -3
careamics/dataset_ng/README.md +212 -0
careamics/dataset_ng/dataset.py +233 -0
careamics/dataset_ng/demos/bsd68_demo.ipynb +356 -0
careamics/dataset_ng/demos/care_U2OS_demo.ipynb +330 -0
careamics/dataset_ng/demos/demo_custom_image_stack.ipynb +734 -0
careamics/dataset_ng/demos/demo_datamodule.ipynb +443 -0
careamics/dataset_ng/{demo_dataset.ipynb → demos/demo_dataset.ipynb} +39 -15
careamics/dataset_ng/{demo_patch_extractor.py → demos/demo_patch_extractor.py} +7 -9
careamics/dataset_ng/demos/mouse_nuclei_demo.ipynb +292 -0
careamics/dataset_ng/factory.py +408 -0
careamics/dataset_ng/legacy_interoperability.py +168 -0
careamics/dataset_ng/patch_extractor/__init__.py +3 -8
careamics/dataset_ng/patch_extractor/demo_custom_image_stack_loader.py +6 -4
careamics/dataset_ng/patch_extractor/image_stack/__init__.py +2 -1
careamics/dataset_ng/patch_extractor/image_stack/image_stack_protocol.py +5 -1
careamics/dataset_ng/patch_extractor/image_stack_loader.py +5 -75
careamics/dataset_ng/patch_extractor/patch_extractor.py +5 -4
careamics/dataset_ng/patch_extractor/patch_extractor_factory.py +73 -106
careamics/dataset_ng/patching_strategies/__init__.py +6 -1
careamics/dataset_ng/patching_strategies/patching_strategy_protocol.py +31 -0
careamics/dataset_ng/patching_strategies/random_patching.py +3 -1
careamics/dataset_ng/patching_strategies/tiling_strategy.py +171 -0
careamics/dataset_ng/patching_strategies/whole_sample.py +36 -0
careamics/lightning/dataset_ng/data_module.py +488 -0
careamics/lightning/dataset_ng/lightning_modules/__init__.py +9 -0
careamics/lightning/dataset_ng/lightning_modules/care_module.py +58 -0
careamics/lightning/dataset_ng/lightning_modules/n2v_module.py +67 -0
careamics/lightning/dataset_ng/lightning_modules/unet_module.py +143 -0
careamics/lightning/lightning_module.py +3 -0
careamics/lvae_training/dataset/__init__.py +8 -3
careamics/lvae_training/dataset/config.py +3 -3
careamics/lvae_training/dataset/ms_dataset_ref.py +1067 -0
careamics/lvae_training/dataset/multich_dataset.py +46 -17
careamics/lvae_training/dataset/multicrop_dset.py +196 -0
careamics/lvae_training/dataset/types.py +3 -3
careamics/lvae_training/dataset/utils/index_manager.py +259 -0
careamics/lvae_training/eval_utils.py +93 -3
careamics/transforms/compose.py +1 -0
careamics/transforms/normalize.py +18 -7
careamics/utils/lightning_utils.py +25 -11
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/METADATA +3 -3
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/RECORD +51 -36
careamics/dataset_ng/dataset/__init__.py +0 -3
careamics/dataset_ng/dataset/dataset.py +0 -184
careamics/dataset_ng/demo_patch_extractor_factory.py +0 -37
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/WHEEL +0 -0
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/entry_points.txt +0 -0
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/licenses/LICENSE +0 -0

careamics/careamist.py CHANGED Viewed

@@ -52,6 +52,9 @@ class CAREamist:
         by default None.
     callbacks : list of Callback, optional
         List of callbacks to use during training and prediction, by default None.
+    enable_progress_bar : bool
+        Whether a progress bar will be displayed during training, validation and
+        prediction.
     Attributes
     ----------
@@ -77,6 +80,7 @@ class CAREamist:
         source: Union[Path, str],
         work_dir: Optional[Union[Path, str]] = None,
         callbacks: Optional[list[Callback]] = None,
+        enable_progress_bar: bool = True,
     ) -> None: ...
     @overload
@@ -85,6 +89,7 @@ class CAREamist:
         source: Configuration,
         work_dir: Optional[Union[Path, str]] = None,
         callbacks: Optional[list[Callback]] = None,
+        enable_progress_bar: bool = True,
     ) -> None: ...
     def __init__(
@@ -92,6 +97,7 @@ class CAREamist:
         source: Union[Path, str, Configuration],
         work_dir: Optional[Union[Path, str]] = None,
         callbacks: Optional[list[Callback]] = None,
+        enable_progress_bar: bool = True,
     ) -> None:
         """
         Initialize CAREamist with a configuration object or a path.
@@ -112,6 +118,9 @@ class CAREamist:
             by default None.
         callbacks : list of Callback, optional
             List of callbacks to use during training and prediction, by default None.
+        enable_progress_bar : bool
+            Whether a progress bar will be displayed during training, validation and
+            prediction.
         Raises
         ------
@@ -169,7 +178,7 @@ class CAREamist:
                 self.model, self.cfg = load_pretrained(source)
         # define the checkpoint saving callback
-        self._define_callbacks(callbacks)
+        self._define_callbacks(callbacks, enable_progress_bar)
         # instantiate logger
         csv_logger = CSVLogger(
@@ -202,7 +211,7 @@ class CAREamist:
             precision=self.cfg.training_config.precision,
             max_steps=self.cfg.training_config.max_steps,
             check_val_every_n_epoch=self.cfg.training_config.check_val_every_n_epoch,
-            enable_progress_bar=self.cfg.training_config.enable_progress_bar,
+            enable_progress_bar=enable_progress_bar,
             accumulate_grad_batches=self.cfg.training_config.accumulate_grad_batches,
             gradient_clip_val=self.cfg.training_config.gradient_clip_val,
             gradient_clip_algorithm=self.cfg.training_config.gradient_clip_algorithm,
@@ -215,13 +224,19 @@ class CAREamist:
         self.train_datamodule: Optional[TrainDataModule] = None
         self.pred_datamodule: Optional[PredictDataModule] = None
-    def _define_callbacks(self, callbacks: Optional[list[Callback]] = None) -> None:
+    def _define_callbacks(
+        self, callbacks: Optional[list[Callback]], enable_progress_bar: bool
+    ) -> None:
         """Define the callbacks for the training loop.
         Parameters
         ----------
         callbacks : list of Callback, optional
             List of callbacks to use during training and prediction, by default None.
+        enable_progress_bar : bool
+            Whether a progress bar will be displayed during training, validation and
+            prediction. It controls whether a `ProgressBarCallback` is added to the
+            callback list.
         """
         self.callbacks = [] if callbacks is None else callbacks
@@ -251,9 +266,10 @@ class CAREamist:
                     filename=self.cfg.experiment_name,
                     **self.cfg.training_config.checkpoint_callback.model_dump(),
                 ),
-                ProgressBarCallback(),
             ]
         )
+        if enable_progress_bar:
+            self.callbacks.append(ProgressBarCallback())
         # early stopping callback
         if self.cfg.training_config.early_stopping_callback is not None:

careamics/config/configuration.py CHANGED Viewed

@@ -4,10 +4,11 @@ from __future__ import annotations
 import re
 from pprint import pformat
-from typing import Any, Literal, Union
+from typing import Any, Callable, Literal, Union
 from bioimageio.spec.generic.v0_3 import CiteEntry
 from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+from pydantic.main import IncEx
 from typing_extensions import Self
 from careamics.config.algorithms import (
@@ -297,17 +298,18 @@ class Configuration(BaseModel):
         self,
         *,
         mode: Literal["json", "python"] | str = "python",
-        include: Any | None = None,
-        exclude: Any | None = None,
+        include: IncEx | None = None,
+        exclude: IncEx | None = None,
         context: Any | None = None,
-        by_alias: bool = False,
+        by_alias: bool | None = False,
         exclude_unset: bool = False,
         exclude_defaults: bool = False,
         exclude_none: bool = True,
         round_trip: bool = False,
         warnings: bool | Literal["none", "warn", "error"] = True,
+        fallback: Callable[[Any], Any] | None = None,
         serialize_as_any: bool = False,
-    ) -> dict:
+    ) -> dict[str, Any]:
         """
         Override model_dump method in order to set default values.
@@ -337,6 +339,8 @@ class Configuration(BaseModel):
             representation.
         warnings : bool | Literal['none', 'warn', 'error'], default=True
             Whether to emit warnings.
+        fallback : Callable[[Any], Any] | None, default=None
+            A function to call when an unknown value is encountered.
         serialize_as_any : bool, default=False
             Whether to serialize all types as Any.
@@ -356,6 +360,7 @@ class Configuration(BaseModel):
             exclude_none=exclude_none,
             round_trip=round_trip,
             warnings=warnings,
+            fallback=fallback,
             serialize_as_any=serialize_as_any,
         )

careamics/config/data/data_model.py CHANGED Viewed

@@ -2,6 +2,8 @@
 from __future__ import annotations
+import os
+import sys
 from collections.abc import Sequence
 from pprint import pformat
 from typing import Annotated, Any, Literal, Optional, Union
@@ -143,7 +145,7 @@ class DataConfig(BaseModel):
     should include the `shuffle` key, which is set to `True` by default. We strongly
     recommend to keep it as `True` to ensure the best training results."""
-    val_dataloader_params: dict[str, Any] = Field(default={})
+    val_dataloader_params: dict[str, Any] = Field(default={}, validate_default=True)
     """Dictionary of PyTorch validation dataloader parameters."""
     @field_validator("patch_size")
@@ -210,6 +212,41 @@ class DataConfig(BaseModel):
         return axes
+    @field_validator("train_dataloader_params", "val_dataloader_params", mode="before")
+    @classmethod
+    def set_default_dataloader_params(
+        cls, dataloader_params: dict[str, Any]
+    ) -> dict[str, Any]:
+        """
+        Set default dataloader parameters if not provided.
+        - If 'num_workers' is not set, it defaults to the number of available CPU cores.
+        - If 'pin_memory' is not set, it defaults to True if CUDA is available.
+        Parameters
+        ----------
+        dataloader_params : dict of {str: Any}
+            The dataloader parameters.
+        Returns
+        -------
+        dict of {str: Any}
+            The dataloader parameters with defaults applied.
+        """
+        if "num_workers" not in dataloader_params:
+            # Use 1 worker during tests, otherwise use all available CPU cores
+            if "pytest" in sys.modules:
+                dataloader_params["num_workers"] = 0
+            else:
+                dataloader_params["num_workers"] = os.cpu_count()
+        if "pin_memory" not in dataloader_params:
+            import torch
+            dataloader_params["pin_memory"] = torch.cuda.is_available()
+        return dataloader_params
     @field_validator("train_dataloader_params")
     @classmethod
     def shuffle_train_dataloader(

careamics/config/optimizer_models.py CHANGED Viewed

@@ -51,9 +51,7 @@ class OptimizerModel(BaseModel):
     # Optional parameters, empty dict default value to allow filtering dictionary
     parameters: dict = Field(
-        default={
-            "lr": 1e-4,
-        },
+        default={},
         validate_default=True,
     )
     """Parameters of the optimizer, see PyTorch documentation for more details."""

careamics/config/training_model.py CHANGED Viewed

@@ -39,8 +39,6 @@ class TrainingConfig(BaseModel):
     """Maximum number of steps to train for. -1 means no limit."""
     check_val_every_n_epoch: int = Field(default=1, ge=1)
     """Validation step frequency."""
-    enable_progress_bar: bool = Field(default=True)
-    """Whether to enable the progress bar."""
     accumulate_grad_batches: int = Field(default=1, ge=1)
     """Number of batches to accumulate gradients over before stepping the optimizer."""
     gradient_clip_val: Optional[Union[int, float]] = None

careamics/dataset/dataset_utils/running_stats.py CHANGED Viewed

@@ -21,9 +21,13 @@ def compute_normalization_stats(image: NDArray) -> tuple[NDArray, NDArray]:
     tuple of (list of floats, list of floats)
         Lists of mean and standard deviation values per channel.
     """
-    # Define the list of axes excluding the channel axis
-    axes = tuple(np.delete(np.arange(image.ndim), 1))
-    return np.mean(image, axis=axes), np.std(image, axis=axes)
+    # Define the lists for storing mean and std values
+    means, stds = [], []
+    # Iterate over the channels dimension and compute mean and std
+    for ax in range(image.shape[1]):
+        means.append(image[:, ax, ...].mean())
+        stds.append(image[:, ax, ...].std())
+    return np.stack(means), np.stack(stds)
 def update_iterative_stats(

careamics/dataset_ng/README.md ADDED Viewed

@@ -0,0 +1,212 @@
+# The CAREamics Dataset
+Welcome to the CAREamics dataset!
+A PyTorch based dataset, designed to be used with microscopy data. It is universal for the training, validation and prediction stages of a machine learning pipeline.
+The key ethos is to create a modular and maintainable dataset comprised of swappable components that interact through interfaces. This should facilitate a smooth development process when extending the dataset's function to new features, and also enable advanced users to easily customize the dataset to their needs, by writing custom components. This is achieved by following a few key software engineering principles, detailed at the end of this README file.
+## Dataset Component overview
+```mermaid
+---
+title: CAREamicsDataset
+---
+classDiagram
+    class CAREamicsDataset{
+        +PatchExtractor input_extractor
+        +Optional[PatchExtractor] target_extractor
+        +PatchingStrategy patching_strategy
+        +list~Transform~ transforms
+        +\_\_getitem\_\_(int index) NDArray
+    }
+    class PatchingStrategy{
+        <<interface>>
+        +n_patches int
+        +get_patch_spec(index: int) PatchSpecs
+    }
+    class RandomPatchingStrategy{
+    }
+    class FixedRandomPatchingStrategy{
+    }
+    class SequentialPatchingStrategy{
+    }
+    class TilingStrategy{
+        +get_patch_spec(index: int) TileSpecs
+    }
+    class PatchExtractor{
+        +list~ImageStack~ image_stacks
+        +extract_patch(PatchSpecs) NDArray
+    }
+    class PatchSpecs {
+        <<TypedDict>>
+        +int data_idx
+        +int sample_idx
+        +Sequence~int~ coords
+        +Sequence~int~ patch_size
+    }
+        class TileSpecs {
+        <<TypedDict>>
+        +Sequence~int~ crop_coords
+        +Sequence~int~ crop_size
+        +Sequence~int~ stitch_coords
+    }
+    class ImageStack{
+        <<interface>>
+        +Union[Path, Literal["array"]] source
+        +Sequence~int~ data_shape
+        +DTypeLike data_type
+        +extract_patch(sample_idx, coords, patch_size) NDArray
+    }
+    class InMemoryImageStack {
+    }
+    class ZarrImageStack {
+        +Path source
+    }
+    CAREamicsDataset --* PatchExtractor: Is composed of
+    CAREamicsDataset --* PatchingStrategy: Is composed of
+    PatchExtractor --o ImageStack: Aggregates
+    ImageStack <|-- InMemoryImageStack: Implements
+    ImageStack <|-- ZarrImageStack: Implements
+    PatchingStrategy <|-- RandomPatchingStrategy: Implements
+    PatchingStrategy <|-- FixedRandomPatchingStrategy: Implements
+    PatchingStrategy <|-- SequentialPatchingStrategy: Implements
+    PatchingStrategy <|-- TilingStrategy: Implements
+    PatchSpecs <|-- TileSpecs: Inherits from
+```
+### `ImageStack` and implementations
+This interface represents a set of image data, which can be saved with any subset of the
+axes STCZYX, in any order, see below for a description of the dimensions. The `ImageStack`
+interface's job is to act as an adapter for different data storage types, so that higher
+level classes can access the image data without having to know the implementation details of
+how to load or read data from each storage type. This means we can decide to support new storage
+types by implementing a new concrete `ImageStack` class without having to change anything
+in the `CAREamistDataset` class. Advanced users can also choose to create their own
+`ImageStack` class if they want to work with their own data storage type.
+The interface provides an `extract_patch` method which will produce a patch from the image,
+as a NumPy array, with the dimensions C(Z)YX. This method should be thought of as simply
+a wrapper for the equivalent to NumPy slicing for each of the storage types.
+#### Concrete implementations
+- `InMemoryImageStack`: The underlying data is stored as a NumPy array in memory. It has some
+additional constructor methods to load the data from known file formats such as TIFF files.
+- `ZarrImageStack`: The underlying data is stored as a ZARR file on disk.
+#### Axes description
+- S is a generic sample dimension,
+- T is a time dimension,
+- C is a channel dimension,
+- Z is a spatial dimension,
+- Y is a spatial dimension,
+- X is a spatial dimension.
+### `PatchExtractor`
+The `PatchExtractor` class aggregates many `ImageStack` instances, this allows for multiple
+images with different dimensions, and possibly different storage types to be treated as a single entity.
+The class has an `extract_patch` method to extract a patch from any one of its `ImageStack`
+objects. It can also possibly be extended when extra logic to extract patches is needed,
+for example when constructing lateral-context inputs for the MicroSplit LVAE models.
+### `PatchingStrategy`
+The `PatchingStrategy` class is an interface to generate patch specifications, where each of the
+concrete implementations produce a set of patch specifications using a different strategy.
+It has a `n_patches` attribute that can be accessed to find out how many patches the
+strategy will produce, given the shapes of the image stacks it has been initialized with.
+This is needed by the `CAREamicsDataset` to return its length.
+Most importantly it has a `get_patch_spec` method, that takes an index and returns a
+patch specification. For deterministic patching strategies, this method will always
+return the same patch specification given the same index, but there are also random strategies
+where the returned patch specification will change every time. The given index can never
+be greater than `n_patches`.
+#### Concrete implementations
+- `RandomPatchingStrategy`: this strategy will produce random patches that will change
+even if the `extract_patch` method is called with the same index.
+- `FixedRandomPatchingStrategy`: this strategy will produce random patches, but the patch
+will be the same if the `extract_patch` method is called with the same index. This is
+useful for making sure validation is comparable epoch to epoch.
+- `SequentialPatchingStrategy`: this strategy is deterministic and the patches will be
+sequential with some specified overlap.
+- `TilingStrategy`: this strategy is deterministic and the patches will be
+sequential with some specified overlap. Rather than a `PatchSpecs` dictionary it will
+produce a `TileSpecs` dictionary which includes some extra fields that are used for
+stitching the tiles back together.
+#### PatchSpecs
+The `get_patch_spec` returns a dictionary containing the keys `data_idx`, `sample_idx`, `coords` and `patch_size`.
+These are the exact arguments that the `PatchExtractor.extract_patch` method takes. The patch specification
+produced by the patching strategy is received by the `PatchExtractor` to in-turn produce an image patch.
+For type hinting, `PatchSpecs` is defined as a `TypedDict`.
+## Key Principles
+The aim of all these principles is to create a system of interacting classes that have
+low coupling. This allows for one section to be changed or extended without breaking functionality
+elsewhere in the codebase.
+### Composition over inheritance
+The principle of composition over inheritance is: rather than using inheritance to
+extend or change the behavior of a class, instead, a class can be composed of modules
+that can be swapped to extend or change behavior.
+The reason to use composition is that it promotes the easy reuse of the underlying
+components, it can prevent a subclass explosion, and it leads to a maintainable and
+easily extendable design. A software architecture based on composition is normally
+maintainable and extendable because if a component needs to change then the whole class
+shouldn't have to be refactored and if a new feature needs to be added, usually an additional
+component can be added to the class.
+The `CAREamicsDataset` is composed of `PatchExtractor` and `PatchingStrategy` and `Transfrom` components.
+The `PatchingStrategy` classes implement an interface so the dataset can switch between
+different strategies. The `PatchExtractor` is composed of many `ImageStack` instances,
+new image stacks can be added to extend the type of data that the dataset can read from.
+### Dependency Inversion
+The dependency inversion principle states:
+1. High-level modules should not depend on low-level modules. Both high-level and
+low-level modules should depend on abstractions (e.g. interfaces).
+2. Abstractions should not depend on details (concrete implementations). Details should
+depend on abstractions.
+In other words high level modules that provide complex logic should be easily reusable
+and not depend on implementation details of low-level modules that provide utility functionality.
+This can be achieved by introducing abstractions that decouple high and low level modules.
+An example of the dependency inversion principle in use is how the `PatchExtractor` only
+depends on the `ImageStack` interface, and does not have to have any knowledge of the
+concrete implementations. The concrete `ImageStack` implementations also do not have
+any knowledge of the `PatchExtractor` or any other higher-level functionality that the
+dataset needs.
+### Single Responsibility Principle
+Each component should have a small scope of responsibility that is easily defined. This
+should make the code easier to maintain and hopefully reduce the number of places in the
+code that have to change when introducing a new feature.
+- `ImageStack` responsibility: to act as an adapter for loading and reading image data
+from different underlying storage.
+- `PatchExtractor` responsibility: to extract patches from a set of image stacks.
+- `PatchingStrategy` responsibility: to produce patch specifications given an index, through
+an interface that hides the underlying implementation.
+- `CAREamicsDataset` responsibility: to orchestrate the interactions of its underlying
+components to produce an input patch (and target patch when required) given an index.

careamics/dataset_ng/dataset.py ADDED Viewed

@@ -0,0 +1,233 @@
+from collections.abc import Sequence
+from enum import Enum
+from pathlib import Path
+from typing import Any, Generic, Literal, NamedTuple, Optional, Union
+import numpy as np
+from numpy.typing import NDArray
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
+from careamics.config import DataConfig, InferenceConfig
+from careamics.config.transformations import NormalizeModel
+from careamics.dataset.dataset_utils.running_stats import WelfordStatistics
+from careamics.dataset.patching.patching import Stats
+from careamics.dataset_ng.patch_extractor import GenericImageStack, PatchExtractor
+from careamics.dataset_ng.patching_strategies import (
+    FixedRandomPatchingStrategy,
+    PatchingStrategy,
+    PatchSpecs,
+    RandomPatchingStrategy,
+    TilingStrategy,
+    WholeSamplePatchingStrategy,
+)
+from careamics.transforms import Compose
+class Mode(str, Enum):
+    TRAINING = "training"
+    VALIDATING = "validating"
+    PREDICTING = "predicting"
+class ImageRegionData(NamedTuple):
+    data: NDArray
+    source: Union[str, Literal["array"]]
+    data_shape: Sequence[int]
+    dtype: str  # dtype should be str for collate
+    axes: str
+    region_spec: PatchSpecs
+InputType = Union[Sequence[NDArray[Any]], Sequence[Path]]
+class CareamicsDataset(Dataset, Generic[GenericImageStack]):
+    def __init__(
+        self,
+        data_config: Union[DataConfig, InferenceConfig],
+        mode: Mode,
+        input_extractor: PatchExtractor[GenericImageStack],
+        target_extractor: Optional[PatchExtractor[GenericImageStack]] = None,
+    ):
+        self.config = data_config
+        self.mode = mode
+        self.input_extractor = input_extractor
+        self.target_extractor = target_extractor
+        self.patching_strategy = self._initialize_patching_strategy()
+        self.input_stats, self.target_stats = self._initialize_statistics()
+        self.transforms = self._initialize_transforms()
+    def _initialize_patching_strategy(self) -> PatchingStrategy:
+        patching_strategy: PatchingStrategy
+        if self.mode == Mode.TRAINING:
+            if isinstance(self.config, InferenceConfig):
+                raise ValueError("Inference config cannot be used for training.")
+            patching_strategy = RandomPatchingStrategy(
+                data_shapes=self.input_extractor.shape,
+                patch_size=self.config.patch_size,
+                # TODO: Add random seed to dataconfig
+                seed=getattr(self.config, "random_seed", 42),
+            )
+        elif self.mode == Mode.VALIDATING:
+            if isinstance(self.config, InferenceConfig):
+                raise ValueError("Inference config cannot be used for validating.")
+            patching_strategy = FixedRandomPatchingStrategy(
+                data_shapes=self.input_extractor.shape,
+                patch_size=self.config.patch_size,
+                # TODO: Add random seed to dataconfig
+                seed=getattr(self.config, "random_seed", 42),
+            )
+        elif self.mode == Mode.PREDICTING:
+            if not isinstance(self.config, InferenceConfig):
+                raise ValueError("Inference config must be used for predicting.")
+            if (self.config.tile_size is not None) and (
+                self.config.tile_overlap is not None
+            ):
+                patching_strategy = TilingStrategy(
+                    data_shapes=self.input_extractor.shape,
+                    tile_size=self.config.tile_size,
+                    overlaps=self.config.tile_overlap,
+                )
+            else:
+                patching_strategy = WholeSamplePatchingStrategy(
+                    data_shapes=self.input_extractor.shape
+                )
+        else:
+            raise ValueError(f"Unrecognised dataset mode {self.mode}.")
+        return patching_strategy
+    def _initialize_transforms(self) -> Optional[Compose]:
+        if isinstance(self.config, DataConfig):
+            if self.mode == Mode.TRAINING:
+                # TODO: initialize normalization separately depending on configuration
+                return Compose(
+                    transform_list=[
+                        NormalizeModel(
+                            image_means=self.input_stats.means,
+                            image_stds=self.input_stats.stds,
+                            target_means=self.target_stats.means,
+                            target_stds=self.target_stats.stds,
+                        )
+                    ]
+                    + list(self.config.transforms)
+                )
+        # TODO: add TTA
+        return Compose(
+            transform_list=[
+                NormalizeModel(
+                    image_means=self.input_stats.means,
+                    image_stds=self.input_stats.stds,
+                    target_means=self.target_stats.means,
+                    target_stds=self.target_stats.stds,
+                )
+            ]
+        )
+    def _calculate_stats(
+        self, data_extractor: PatchExtractor[GenericImageStack]
+    ) -> Stats:
+        image_stats = WelfordStatistics()
+        n_patches = self.patching_strategy.n_patches
+        for idx in tqdm(range(n_patches), desc="Computing statistics"):
+            patch_spec = self.patching_strategy.get_patch_spec(idx)
+            patch = data_extractor.extract_patch(
+                data_idx=patch_spec["data_idx"],
+                sample_idx=patch_spec["sample_idx"],
+                coords=patch_spec["coords"],
+                patch_size=patch_spec["patch_size"],
+            )
+            # TODO: statistics accept SCYX format, while patch is CYX
+            image_stats.update(patch[None, ...], sample_idx=idx)
+        image_means, image_stds = image_stats.finalize()
+        return Stats(image_means, image_stds)
+    # TODO: add running stats
+    def _initialize_statistics(self) -> tuple[Stats, Stats]:
+        if self.config.image_means is not None and self.config.image_stds is not None:
+            input_stats = Stats(self.config.image_means, self.config.image_stds)
+        else:
+            input_stats = self._calculate_stats(self.input_extractor)
+        target_stats = Stats((), ())
+        if isinstance(self.config, DataConfig):
+            if (
+                self.config.target_means is not None
+                and self.config.target_stds is not None
+            ):
+                target_stats = Stats(self.config.target_means, self.config.target_stds)
+            elif self.target_extractor is not None:
+                target_stats = self._calculate_stats(self.target_extractor)
+        return input_stats, target_stats
+    def __len__(self):
+        return self.patching_strategy.n_patches
+    def _create_image_region(
+        self, patch: np.ndarray, patch_spec: PatchSpecs, extractor: PatchExtractor
+    ) -> ImageRegionData:
+        data_idx = patch_spec["data_idx"]
+        source = extractor.image_stacks[data_idx].source
+        return ImageRegionData(
+            data=patch,
+            source=str(source),
+            dtype=str(extractor.image_stacks[data_idx].data_dtype),
+            data_shape=extractor.image_stacks[data_idx].data_shape,
+            # TODO: should it be axes of the original image instead?
+            axes=self.config.axes,
+            region_spec=patch_spec,
+        )
+    def __getitem__(
+        self, index: int
+    ) -> Union[tuple[ImageRegionData], tuple[ImageRegionData, ImageRegionData]]:
+        patch_spec = self.patching_strategy.get_patch_spec(index)
+        input_patch = self.input_extractor.extract_patch(
+            data_idx=patch_spec["data_idx"],
+            sample_idx=patch_spec["sample_idx"],
+            coords=patch_spec["coords"],
+            patch_size=patch_spec["patch_size"],
+        )
+        target_patch = (
+            self.target_extractor.extract_patch(
+                data_idx=patch_spec["data_idx"],
+                sample_idx=patch_spec["sample_idx"],
+                coords=patch_spec["coords"],
+                patch_size=patch_spec["patch_size"],
+            )
+            if self.target_extractor is not None
+            else None
+        )
+        if self.transforms is not None:
+            if self.target_extractor is not None:
+                input_patch, target_patch = self.transforms(input_patch, target_patch)
+            else:
+                # TODO: compose doesn't return None for target patch anymore
+                #   so have to do this annoying if else
+                (input_patch,) = self.transforms(input_patch, target_patch)
+                target_patch = None
+        input_data = self._create_image_region(
+            patch=input_patch, patch_spec=patch_spec, extractor=self.input_extractor
+        )
+        if target_patch is not None and self.target_extractor is not None:
+            target_data = self._create_image_region(
+                patch=target_patch,
+                patch_spec=patch_spec,
+                extractor=self.target_extractor,
+            )
+            return input_data, target_data
+        else:
+            return (input_data,)

careamics 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

Potentially problematic release.

careamics 0.0.10py3-none-any.whl → 0.0.12py3-none-any.whl