PyPI - careamics - Versions diffs - 0.0.19__py3-none-any.whl - Mend

careamics 0.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (279) hide show

careamics/__init__.py +24 -0
careamics/careamist.py +961 -0
careamics/cli/__init__.py +5 -0
careamics/cli/conf.py +394 -0
careamics/cli/main.py +234 -0
careamics/cli/utils.py +27 -0
careamics/config/__init__.py +66 -0
careamics/config/algorithms/__init__.py +21 -0
careamics/config/algorithms/care_algorithm_config.py +122 -0
careamics/config/algorithms/hdn_algorithm_config.py +103 -0
careamics/config/algorithms/microsplit_algorithm_config.py +103 -0
careamics/config/algorithms/n2n_algorithm_config.py +115 -0
careamics/config/algorithms/n2v_algorithm_config.py +296 -0
careamics/config/algorithms/pn2v_algorithm_config.py +301 -0
careamics/config/algorithms/unet_algorithm_config.py +91 -0
careamics/config/algorithms/vae_algorithm_config.py +178 -0
careamics/config/architectures/__init__.py +7 -0
careamics/config/architectures/architecture_config.py +37 -0
careamics/config/architectures/lvae_config.py +262 -0
careamics/config/architectures/unet_config.py +125 -0
careamics/config/configuration.py +367 -0
careamics/config/configuration_factories.py +2400 -0
careamics/config/data/__init__.py +27 -0
careamics/config/data/data_config.py +472 -0
careamics/config/data/inference_config.py +237 -0
careamics/config/data/ng_data_config.py +1038 -0
careamics/config/data/patch_filter/__init__.py +15 -0
careamics/config/data/patch_filter/filter_config.py +16 -0
careamics/config/data/patch_filter/mask_filter_config.py +17 -0
careamics/config/data/patch_filter/max_filter_config.py +15 -0
careamics/config/data/patch_filter/meanstd_filter_config.py +18 -0
careamics/config/data/patch_filter/shannon_filter_config.py +15 -0
careamics/config/data/patching_strategies/__init__.py +15 -0
careamics/config/data/patching_strategies/_overlapping_patched_config.py +102 -0
careamics/config/data/patching_strategies/_patched_config.py +56 -0
careamics/config/data/patching_strategies/random_patching_config.py +45 -0
careamics/config/data/patching_strategies/sequential_patching_config.py +25 -0
careamics/config/data/patching_strategies/tiled_patching_config.py +40 -0
careamics/config/data/patching_strategies/whole_patching_config.py +12 -0
careamics/config/data/tile_information.py +65 -0
careamics/config/lightning/__init__.py +15 -0
careamics/config/lightning/callbacks/__init__.py +8 -0
careamics/config/lightning/callbacks/callback_config.py +116 -0
careamics/config/lightning/optimizer_configs.py +186 -0
careamics/config/lightning/training_config.py +70 -0
careamics/config/losses/__init__.py +8 -0
careamics/config/losses/loss_config.py +60 -0
careamics/config/ng_configs/__init__.py +5 -0
careamics/config/ng_configs/n2v_configuration.py +64 -0
careamics/config/ng_configs/ng_configuration.py +256 -0
careamics/config/ng_factories/__init__.py +9 -0
careamics/config/ng_factories/algorithm_factory.py +120 -0
careamics/config/ng_factories/data_factory.py +154 -0
careamics/config/ng_factories/n2v_factory.py +256 -0
careamics/config/ng_factories/training_factory.py +69 -0
careamics/config/noise_model/__init__.py +12 -0
careamics/config/noise_model/likelihood_config.py +60 -0
careamics/config/noise_model/noise_model_config.py +149 -0
careamics/config/support/__init__.py +31 -0
careamics/config/support/supported_activations.py +27 -0
careamics/config/support/supported_algorithms.py +40 -0
careamics/config/support/supported_architectures.py +13 -0
careamics/config/support/supported_data.py +122 -0
careamics/config/support/supported_filters.py +17 -0
careamics/config/support/supported_loggers.py +10 -0
careamics/config/support/supported_losses.py +32 -0
careamics/config/support/supported_optimizers.py +57 -0
careamics/config/support/supported_patching_strategies.py +22 -0
careamics/config/support/supported_pixel_manipulations.py +15 -0
careamics/config/support/supported_struct_axis.py +21 -0
careamics/config/support/supported_transforms.py +12 -0
careamics/config/transformations/__init__.py +22 -0
careamics/config/transformations/n2v_manipulate_config.py +79 -0
careamics/config/transformations/normalize_config.py +59 -0
careamics/config/transformations/transform_config.py +45 -0
careamics/config/transformations/transform_unions.py +29 -0
careamics/config/transformations/xy_flip_config.py +43 -0
careamics/config/transformations/xy_random_rotate90_config.py +35 -0
careamics/config/utils/__init__.py +8 -0
careamics/config/utils/configuration_io.py +85 -0
careamics/config/validators/__init__.py +18 -0
careamics/config/validators/axes_validators.py +90 -0
careamics/config/validators/model_validators.py +84 -0
careamics/config/validators/patch_validators.py +55 -0
careamics/conftest.py +39 -0
careamics/dataset/__init__.py +17 -0
careamics/dataset/dataset_utils/__init__.py +19 -0
careamics/dataset/dataset_utils/dataset_utils.py +118 -0
careamics/dataset/dataset_utils/file_utils.py +141 -0
careamics/dataset/dataset_utils/iterate_over_files.py +84 -0
careamics/dataset/dataset_utils/running_stats.py +189 -0
careamics/dataset/in_memory_dataset.py +303 -0
careamics/dataset/in_memory_pred_dataset.py +88 -0
careamics/dataset/in_memory_tiled_pred_dataset.py +131 -0
careamics/dataset/iterable_dataset.py +294 -0
careamics/dataset/iterable_pred_dataset.py +121 -0
careamics/dataset/iterable_tiled_pred_dataset.py +141 -0
careamics/dataset/patching/__init__.py +1 -0
careamics/dataset/patching/patching.py +300 -0
careamics/dataset/patching/random_patching.py +110 -0
careamics/dataset/patching/sequential_patching.py +212 -0
careamics/dataset/patching/validate_patch_dimension.py +64 -0
careamics/dataset/tiling/__init__.py +10 -0
careamics/dataset/tiling/collate_tiles.py +33 -0
careamics/dataset/tiling/lvae_tiled_patching.py +375 -0
careamics/dataset/tiling/tiled_patching.py +166 -0
careamics/dataset_ng/README.md +212 -0
careamics/dataset_ng/__init__.py +0 -0
careamics/dataset_ng/dataset.py +365 -0
careamics/dataset_ng/demos/bsd68_demo.ipynb +361 -0
careamics/dataset_ng/demos/bsd68_zarr_demo.ipynb +453 -0
careamics/dataset_ng/demos/care_U2OS_demo.ipynb +330 -0
careamics/dataset_ng/demos/demo_custom_image_stack.ipynb +736 -0
careamics/dataset_ng/demos/demo_datamodule.ipynb +447 -0
careamics/dataset_ng/demos/demo_dataset.ipynb +278 -0
careamics/dataset_ng/demos/demo_patch_extractor.py +51 -0
careamics/dataset_ng/demos/mouse_nuclei_demo.ipynb +293 -0
careamics/dataset_ng/factory.py +180 -0
careamics/dataset_ng/grouped_index_sampler.py +73 -0
careamics/dataset_ng/image_stack/__init__.py +14 -0
careamics/dataset_ng/image_stack/czi_image_stack.py +396 -0
careamics/dataset_ng/image_stack/file_image_stack.py +140 -0
careamics/dataset_ng/image_stack/image_stack_protocol.py +93 -0
careamics/dataset_ng/image_stack/image_utils/__init__.py +6 -0
careamics/dataset_ng/image_stack/image_utils/image_stack_utils.py +125 -0
careamics/dataset_ng/image_stack/in_memory_image_stack.py +93 -0
careamics/dataset_ng/image_stack/zarr_image_stack.py +170 -0
careamics/dataset_ng/image_stack_loader/__init__.py +19 -0
careamics/dataset_ng/image_stack_loader/image_stack_loader_protocol.py +70 -0
careamics/dataset_ng/image_stack_loader/image_stack_loaders.py +273 -0
careamics/dataset_ng/image_stack_loader/zarr_utils.py +130 -0
careamics/dataset_ng/legacy_interoperability.py +175 -0
careamics/dataset_ng/microsplit_input_synth.py +377 -0
careamics/dataset_ng/patch_extractor/__init__.py +7 -0
careamics/dataset_ng/patch_extractor/limit_file_extractor.py +50 -0
careamics/dataset_ng/patch_extractor/patch_construction.py +151 -0
careamics/dataset_ng/patch_extractor/patch_extractor.py +117 -0
careamics/dataset_ng/patch_filter/__init__.py +20 -0
careamics/dataset_ng/patch_filter/coordinate_filter_protocol.py +27 -0
careamics/dataset_ng/patch_filter/filter_factory.py +95 -0
careamics/dataset_ng/patch_filter/mask_filter.py +96 -0
careamics/dataset_ng/patch_filter/max_filter.py +188 -0
careamics/dataset_ng/patch_filter/mean_std_filter.py +218 -0
careamics/dataset_ng/patch_filter/patch_filter_protocol.py +50 -0
careamics/dataset_ng/patch_filter/shannon_filter.py +188 -0
careamics/dataset_ng/patching_strategies/__init__.py +26 -0
careamics/dataset_ng/patching_strategies/patching_strategy_factory.py +50 -0
careamics/dataset_ng/patching_strategies/patching_strategy_protocol.py +161 -0
careamics/dataset_ng/patching_strategies/random_patching.py +393 -0
careamics/dataset_ng/patching_strategies/sequential_patching.py +99 -0
careamics/dataset_ng/patching_strategies/tiling_strategy.py +207 -0
careamics/dataset_ng/patching_strategies/whole_sample.py +61 -0
careamics/file_io/__init__.py +15 -0
careamics/file_io/read/__init__.py +11 -0
careamics/file_io/read/get_func.py +57 -0
careamics/file_io/read/tiff.py +58 -0
careamics/file_io/write/__init__.py +15 -0
careamics/file_io/write/get_func.py +63 -0
careamics/file_io/write/tiff.py +40 -0
careamics/lightning/__init__.py +32 -0
careamics/lightning/callbacks/__init__.py +13 -0
careamics/lightning/callbacks/data_stats_callback.py +33 -0
careamics/lightning/callbacks/hyperparameters_callback.py +49 -0
careamics/lightning/callbacks/prediction_writer_callback/__init__.py +20 -0
careamics/lightning/callbacks/prediction_writer_callback/file_path_utils.py +56 -0
careamics/lightning/callbacks/prediction_writer_callback/prediction_writer_callback.py +234 -0
careamics/lightning/callbacks/prediction_writer_callback/write_strategy.py +399 -0
careamics/lightning/callbacks/prediction_writer_callback/write_strategy_factory.py +215 -0
careamics/lightning/callbacks/progress_bar_callback.py +90 -0
careamics/lightning/dataset_ng/__init__.py +1 -0
careamics/lightning/dataset_ng/callbacks/__init__.py +1 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/__init__.py +29 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/cached_tiles_strategy.py +164 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/file_path_utils.py +33 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/prediction_writer_callback.py +219 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/write_image_strategy.py +91 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/write_strategy.py +27 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/write_strategy_factory.py +214 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/write_tiles_zarr_strategy.py +375 -0
careamics/lightning/dataset_ng/data_module.py +529 -0
careamics/lightning/dataset_ng/data_module_utils.py +395 -0
careamics/lightning/dataset_ng/lightning_modules/__init__.py +9 -0
careamics/lightning/dataset_ng/lightning_modules/care_module.py +97 -0
careamics/lightning/dataset_ng/lightning_modules/n2v_module.py +106 -0
careamics/lightning/dataset_ng/lightning_modules/unet_module.py +221 -0
careamics/lightning/dataset_ng/prediction/__init__.py +16 -0
careamics/lightning/dataset_ng/prediction/convert_prediction.py +198 -0
careamics/lightning/dataset_ng/prediction/stitch_prediction.py +171 -0
careamics/lightning/lightning_module.py +914 -0
careamics/lightning/microsplit_data_module.py +632 -0
careamics/lightning/predict_data_module.py +341 -0
careamics/lightning/train_data_module.py +666 -0
careamics/losses/__init__.py +21 -0
careamics/losses/fcn/__init__.py +1 -0
careamics/losses/fcn/losses.py +125 -0
careamics/losses/loss_factory.py +80 -0
careamics/losses/lvae/__init__.py +1 -0
careamics/losses/lvae/loss_utils.py +83 -0
careamics/losses/lvae/losses.py +589 -0
careamics/lvae_training/__init__.py +0 -0
careamics/lvae_training/calibration.py +191 -0
careamics/lvae_training/dataset/__init__.py +20 -0
careamics/lvae_training/dataset/config.py +135 -0
careamics/lvae_training/dataset/lc_dataset.py +274 -0
careamics/lvae_training/dataset/ms_dataset_ref.py +1067 -0
careamics/lvae_training/dataset/multich_dataset.py +1121 -0
careamics/lvae_training/dataset/multicrop_dset.py +196 -0
careamics/lvae_training/dataset/multifile_dataset.py +335 -0
careamics/lvae_training/dataset/types.py +32 -0
careamics/lvae_training/dataset/utils/__init__.py +0 -0
careamics/lvae_training/dataset/utils/data_utils.py +114 -0
careamics/lvae_training/dataset/utils/empty_patch_fetcher.py +65 -0
careamics/lvae_training/dataset/utils/index_manager.py +491 -0
careamics/lvae_training/dataset/utils/index_switcher.py +165 -0
careamics/lvae_training/eval_utils.py +987 -0
careamics/lvae_training/get_config.py +84 -0
careamics/lvae_training/lightning_module.py +701 -0
careamics/lvae_training/metrics.py +214 -0
careamics/lvae_training/train_lvae.py +342 -0
careamics/lvae_training/train_utils.py +121 -0
careamics/model_io/__init__.py +7 -0
careamics/model_io/bioimage/__init__.py +11 -0
careamics/model_io/bioimage/_readme_factory.py +113 -0
careamics/model_io/bioimage/bioimage_utils.py +56 -0
careamics/model_io/bioimage/cover_factory.py +171 -0
careamics/model_io/bioimage/model_description.py +341 -0
careamics/model_io/bmz_io.py +251 -0
careamics/model_io/model_io_utils.py +95 -0
careamics/models/__init__.py +5 -0
careamics/models/activation.py +40 -0
careamics/models/layers.py +495 -0
careamics/models/lvae/__init__.py +3 -0
careamics/models/lvae/layers.py +1371 -0
careamics/models/lvae/likelihoods.py +394 -0
careamics/models/lvae/lvae.py +848 -0
careamics/models/lvae/noise_models.py +738 -0
careamics/models/lvae/stochastic.py +394 -0
careamics/models/lvae/utils.py +404 -0
careamics/models/model_factory.py +54 -0
careamics/models/unet.py +449 -0
careamics/nm_training_placeholder.py +203 -0
careamics/prediction_utils/__init__.py +21 -0
careamics/prediction_utils/lvae_prediction.py +158 -0
careamics/prediction_utils/lvae_tiling_manager.py +362 -0
careamics/prediction_utils/prediction_outputs.py +238 -0
careamics/prediction_utils/stitch_prediction.py +193 -0
careamics/py.typed +5 -0
careamics/transforms/__init__.py +22 -0
careamics/transforms/compose.py +173 -0
careamics/transforms/n2v_manipulate.py +150 -0
careamics/transforms/n2v_manipulate_torch.py +149 -0
careamics/transforms/normalize.py +374 -0
careamics/transforms/pixel_manipulation.py +406 -0
careamics/transforms/pixel_manipulation_torch.py +388 -0
careamics/transforms/struct_mask_parameters.py +20 -0
careamics/transforms/transform.py +24 -0
careamics/transforms/tta.py +88 -0
careamics/transforms/xy_flip.py +131 -0
careamics/transforms/xy_random_rotate90.py +108 -0
careamics/utils/__init__.py +19 -0
careamics/utils/autocorrelation.py +40 -0
careamics/utils/base_enum.py +60 -0
careamics/utils/context.py +67 -0
careamics/utils/deprecation.py +63 -0
careamics/utils/lightning_utils.py +71 -0
careamics/utils/logging.py +323 -0
careamics/utils/metrics.py +394 -0
careamics/utils/path_utils.py +26 -0
careamics/utils/plotting.py +76 -0
careamics/utils/ram.py +15 -0
careamics/utils/receptive_field.py +108 -0
careamics/utils/serializers.py +62 -0
careamics/utils/torch_utils.py +150 -0
careamics/utils/version.py +38 -0
careamics-0.0.19.dist-info/METADATA +80 -0
careamics-0.0.19.dist-info/RECORD +279 -0
careamics-0.0.19.dist-info/WHEEL +4 -0
careamics-0.0.19.dist-info/entry_points.txt +2 -0
careamics-0.0.19.dist-info/licenses/LICENSE +28 -0

careamics/dataset_ng/factory.py ADDED Viewed

@@ -0,0 +1,180 @@
+from functools import partial
+from typing import Any
+from typing_extensions import ParamSpec
+from careamics.config.data.ng_data_config import NGDataConfig
+from careamics.config.support import SupportedData
+from careamics.file_io.read import ReadFunc
+from .dataset import CareamicsDataset
+from .image_stack import (
+    GenericImageStack,
+    ImageStack,
+)
+from .image_stack_loader import (
+    ImageStackLoader,
+    load_arrays,
+    load_custom_file,
+    load_czis,
+    load_iter_tiff,
+    load_tiffs,
+    load_zarrs,
+)
+from .patch_extractor import LimitFilesPatchExtractor, PatchExtractor
+P = ParamSpec("P")
+# convenience function but should use `create_dataloader` function instead
+# For lazy loading custom batch sampler also needs to be set.
+def create_dataset(
+    config: NGDataConfig,
+    inputs: Any,
+    targets: Any,
+    masks: Any = None,
+    read_func: ReadFunc | None = None,
+    read_kwargs: dict[str, Any] | None = None,
+    image_stack_loader: ImageStackLoader | None = None,
+    image_stack_loader_kwargs: dict[str, Any] | None = None,
+) -> CareamicsDataset[ImageStack]:
+    """
+    Convenience function to create the CAREamicsDataset.
+    Parameters
+    ----------
+    config : DataConfig or InferenceConfig
+        The data configuration.
+    inputs : Any
+        The input sources to the dataset.
+    targets : Any, optional
+        The target sources to the dataset.
+    masks : Any, optional
+        The mask sources used to filter patches.
+    read_func : ReadFunc, optional
+        A function that can that can be used to load custom data. This argument is
+        ignored unless the `data_type` in the `config` is "custom".
+    read_kwargs : dict of {str, Any}, optional
+        Additional key-word arguments to pass to the `read_func`.
+    image_stack_loader : ImageStackLoader, optional
+        A function for custom image stack loading. This argument is ignored unless the
+        `data_type` in the `config` is "custom".
+    image_stack_loader_kwargs : {str, Any}, optional
+        Additional key-word arguments to pass to the `image_stack_loader`.
+    """
+    image_stack_loader = select_image_stack_loader(
+        data_type=SupportedData(config.data_type),
+        in_memory=config.in_memory,
+        read_func=read_func,
+        read_kwargs=read_kwargs,
+        image_stack_loader=image_stack_loader,
+        image_stack_loader_kwargs=image_stack_loader_kwargs,
+    )
+    patch_extractor_type = select_patch_extractor_type(
+        data_type=SupportedData(config.data_type), in_memory=config.in_memory
+    )
+    input_extractor = init_patch_extractor(
+        patch_extractor_type, image_stack_loader, inputs, config.axes
+    )
+    if targets is not None:
+        target_extractor = init_patch_extractor(
+            patch_extractor_type, image_stack_loader, targets, config.axes
+        )
+    else:
+        target_extractor = None
+    if masks is not None:
+        mask_extractor = init_patch_extractor(
+            patch_extractor_type, image_stack_loader, masks, config.axes
+        )
+    else:
+        mask_extractor = None
+    return CareamicsDataset(
+        data_config=config,
+        input_extractor=input_extractor,
+        target_extractor=target_extractor,
+        mask_extractor=mask_extractor,
+    )
+def init_patch_extractor(
+    patch_extractor: type[PatchExtractor],
+    image_stack_loader: ImageStackLoader[..., GenericImageStack],
+    source: Any,
+    axes: str,
+) -> PatchExtractor[GenericImageStack]:
+    image_stacks = image_stack_loader(source, axes)
+    return patch_extractor(image_stacks)
+def select_patch_extractor_type(
+    data_type: SupportedData,
+    in_memory: bool,
+) -> type[PatchExtractor]:
+    """Select the appropriate PatchExtractor type based on data type and memory mode.
+    If `in_memory` is True, or `data_type` is ZARR or CZI, the standard
+    `PatchExtractor` is selected, otherwise the `LimitFilesPatchExtractor` will be used.
+    Parameters
+    ----------
+    data_type : SupportedData
+        The type of data being handled.
+    in_memory : bool
+        Indicates whether data is to be loaded into memory.
+    Returns
+    -------
+    type[PatchExtractor]
+        The selected PatchExtractor type.
+    """
+    if not in_memory and data_type in (SupportedData.TIFF, SupportedData.CUSTOM):
+        return LimitFilesPatchExtractor
+    else:
+        return PatchExtractor
+def select_image_stack_loader(
+    data_type: SupportedData,
+    in_memory: bool,
+    read_func: ReadFunc | None = None,
+    read_kwargs: dict[str, Any] | None = None,
+    image_stack_loader: ImageStackLoader | None = None,
+    image_stack_loader_kwargs: dict[str, Any] | None = None,
+) -> ImageStackLoader:
+    match data_type:
+        case SupportedData.ARRAY:
+            return load_arrays
+        case SupportedData.TIFF:
+            if in_memory:
+                return load_tiffs
+            else:
+                return load_iter_tiff
+        case SupportedData.CUSTOM:
+            if (read_func is not None) and (image_stack_loader is None):
+                read_kwargs = {} if read_kwargs is None else read_kwargs
+                return partial(
+                    load_custom_file, read_func=read_func, read_kwargs=read_kwargs
+                )
+            elif (read_func is None) and (image_stack_loader is not None):
+                image_stack_loader_kwargs = (
+                    {}
+                    if image_stack_loader_kwargs is None
+                    else image_stack_loader_kwargs
+                )
+                return partial(image_stack_loader, **image_stack_loader_kwargs)
+            else:
+                raise ValueError(
+                    "Found `data_type='custom'` **one** of `read_func` or "
+                    "`image_stack_loader` must be provided."
+                )
+        case SupportedData.ZARR:
+            # TODO: in_memory or not
+            return load_zarrs
+        case SupportedData.CZI:
+            # TODO: in_memory or not
+            return load_czis
+        case _:
+            raise NotImplementedError(
+                f"Selecting an image stack for data type '{data_type}' has not been "
+                "implemented yet."
+            )

careamics/dataset_ng/grouped_index_sampler.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""Module for the `GroupedIndexSampler`."""
+from collections.abc import Iterator, Sequence
+from typing import Self
+import numpy as np
+from numpy.random import Generator, default_rng
+from torch.utils.data import Sampler
+from careamics.dataset_ng.dataset import CareamicsDataset
+class GroupedIndexSampler(Sampler):
+    """
+    A PyTorch Sampler iterates through groups of indices.
+    The order of the groups will be shuffled and the order of the indices within the
+    groups will be shuffled.
+    This sampler is useful for iterative file loading — one file should be loaded at a
+    time so indices belonging to the same file should be grouped, but the order of the
+    files and the order of the indices should be shuffled.
+    """
+    def __init__(self, grouped_indices: Sequence[Sequence[int]], rng: Generator | None):
+        """
+        Parameters
+        ----------
+        grouped_indices : Sequence of (Sequence of int)
+            The indices that should be iterated through in groups.
+        """
+        super().__init__()
+        if rng is None:
+            self.rng = default_rng()
+        else:
+            self.rng = rng
+        # TODO: validate indices are unique across groups
+        self.grouped_indices = grouped_indices
+    @classmethod
+    def from_dataset(
+        cls, dataset: CareamicsDataset, rng: Generator | None = None
+    ) -> Self:
+        """
+        Create the sampler from a CareamicsDataset.
+        The grouped indices will be retrieved from the dataset's patching strategy.
+        Parameters
+        ----------
+        dataset: CareamicsDataset
+            An instance of the CareamicsDataset to create the sampler for.
+        rng: numpy.random.Generator, optional
+            Numpy random number generator that can be used to seed the sampler.
+        """
+        n_data_samples = len(dataset.input_extractor.shapes)
+        grouped_indices: list[Sequence[int]] = [
+            dataset.patching_strategy.get_patch_indices(i)
+            for i in range(n_data_samples)
+        ]
+        return cls(grouped_indices=grouped_indices, rng=rng)
+    def __iter__(self) -> Iterator[int]:
+        # shuffle the groups and the sub groups but keep indices in a group adjacent
+        group_order = np.arange(len(self.grouped_indices))
+        self.rng.shuffle(group_order)
+        for group_idx in group_order:
+            group = self.grouped_indices[group_idx.item()]
+            index_order = np.arange(len(group))
+            self.rng.shuffle(index_order)
+            for idx in index_order:
+                yield group[idx.item()]

careamics/dataset_ng/image_stack/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+__all__ = [
+    "CziImageStack",
+    "FileImageStack",
+    "GenericImageStack",
+    "ImageStack",
+    "InMemoryImageStack",
+    "ZarrImageStack",
+]
+from .czi_image_stack import CziImageStack
+from .file_image_stack import FileImageStack
+from .image_stack_protocol import GenericImageStack, ImageStack
+from .in_memory_image_stack import InMemoryImageStack
+from .zarr_image_stack import ZarrImageStack

careamics/dataset_ng/image_stack/czi_image_stack.py ADDED Viewed

@@ -0,0 +1,396 @@
+from __future__ import annotations
+import re
+from collections.abc import Iterable, Sequence
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Literal
+import numpy as np
+from numpy.typing import NDArray
+try:
+    from pylibCZIrw.czi import CziReader, Rectangle, open_czi
+    pyczi_available = True
+except ImportError:
+    pyczi_available = False
+if TYPE_CHECKING:
+    try:
+        from pylibCZIrw.czi import CziReader, Rectangle, open_czi
+    except ImportError:
+        CziReader = Rectangle = open_czi = None  # type: ignore
+class CziImageStack:
+    """
+    A class for extracting patches from an image stack that is stored as a CZI file.
+    Parameters
+    ----------
+    data_path : str or Path
+        Path to the CZI file.
+    scene : int, optional
+        Index of the scene to extract.
+        A single CZI file can contain multiple "scenes", which are stored alongside each
+        other at different coordinates in the image plane, often separated by empty
+        space. Specifying this argument will read only the single scene with that index
+        from the file. Think of it as cropping the CZI file to the region where that
+        scene is located.
+        If no scene index is specified, the entire image will be read. In case it
+        contains multiple scenes, they will all be present in the resulting image.
+        This is usually not desirable due to the empty space between them.
+        In general, only omit this argument or set it to `None` if you know that
+        your CZI file does not contain any scenes.
+        The static function :py:meth:`get_bounding_rectangles` can be used to find out
+        how many scenes a given file contains and what their bounding rectangles are.
+        The scene can also be provided as part of `data_path` by appending an `"@"`
+        followed by the scene index to the filename.
+    depth_axis : {"none", "Z", "T"}, default: "none"
+        Which axis to use as depth-axis for providing 3-D patches.
+        - `"none"`: Only provide 2-D patches. If a Z or T dimension is present in the
+          data, they will be combined into the sample dimension `S`.
+        - `"Z"`: Use the Z-axis as depth-axis. If a T axis is present as well, it will
+          be merged into the sample dimensions `S`.
+        - `"T"`: Use the T-axis as depth-axis. If a Z axis is present as well, it will
+          be merged into the sample dimensions `S`.
+    Attributes
+    ----------
+    source : Path
+        Path to the CZI file, including the scene index if specified.
+    data_path : Path
+        Path to the CZI file without scene index.
+    scene : int or None
+        Index of the scene to extract, or None if not specified.
+    data_shape : Sequence[int]
+        The shape of the data in the order `(SC(Z)YX)`.
+    axes : str
+        The axes in the CZI file corresponding to the dimensions in `data_shape`.
+        The following values can occur:
+        - "SCZYX" for 3-D volumes if `depth_axis` is `"Z"`.
+        - "SCTYX" for time-series if `depth_axis` is `"T"`.
+        - "SCYX" if `depth_axis` is `"none"`.
+        The axis `S` (sample) is the only one not mapping one-to-one to an axis in the
+        CZI file but combines all remaining axes present in the file into one.
+    Examples
+    --------
+    Create an image stack for the first scene in a CZI file:
+    >>> stack = CziImageStack("path/to/file.czi", scene=0)  # doctest: +SKIP
+    Alternatively, the scene index can also be provided as part of the filename.
+    This is mainly intended for re-creating an image stack from the `source` property:
+    >>> stack = CziImageStack("path/to/file.czi@0")  # doctest: +SKIP
+    >>> stack2 = CziImageStack(stack.source)  # doctest: +SKIP
+    If the CZI file contains a third dimension (Z or T) and you want to perform 3-D
+    denoising, you need to explicitly set `depth_axis` to `"Z"` or `"T"`:
+    >>> stack_2d = CziImageStack("path/to/file.czi", scene=0)  # doctest: +SKIP
+    >>> stack_2d.axes, stack_2d.data_shape  # doctest: +SKIP
+    ('SCYX', [40, 1, 512, 512])
+    >>> stack_3d = CziImageStack(  # doctest: +SKIP
+    ...     "path/to/file.czi", scene=0, depth_axis="Z"
+    ... )
+    >>> stack_3d.axes, stack_3d.data_shape  # doctest: +SKIP
+    ('SCZYX', [4, 1, 10, 512, 512])
+    """
+    def __init__(
+        self,
+        data_path: str | Path,
+        scene: int | None = None,
+        depth_axis: Literal["none", "Z", "T"] = "none",
+    ) -> None:
+        if not pyczi_available:
+            raise ImportError(
+                "The CZI image stack requires the `pylibCZIrw` package to be installed."
+                " Please install it with `pip install careamics[czi]`."
+            )
+        _data_path = Path(data_path)
+        # Check for scene encoded in filename.
+        # Normally, file path and scene should be provided as separate arguments but
+        # we would also like to support using the `source` property to re-create the
+        # CZI image stack. In this case, the scene index is encoded in the file path.
+        scene_matches = re.match(r"^(.*)@(\d+)$", _data_path.name)
+        if scene_matches:
+            if scene is not None:
+                raise ValueError(
+                    f"Scene index is specified in the filename ({_data_path.name}) and "
+                    f"as an argument ({scene}). Please specify only one."
+                )
+            _data_path = _data_path.parent / scene_matches.group(1)
+            scene = int(scene_matches.group(2))
+        # Set variables
+        self.data_path = _data_path
+        self.scene = scene
+        self._depth_axis = depth_axis
+        # Open CZI file
+        self._czi = CziReader(str(self.data_path))
+        # Determine metadata
+        self.axes, self.data_shape, self._bounding_rectangle, self._sample_axes = (
+            self._get_shape()
+        )
+        self.data_dtype = np.float32
+    def __del__(self):
+        if hasattr(self, "_czi"):
+            # Close CZI file
+            self._czi.close()
+    def __getstate__(self) -> dict[str, Any]:
+        # Remove CziReader object from state to avoid pickling issues
+        state = self.__dict__.copy()
+        del state["_czi"]
+        return state
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        # Reopen CZI file after unpickling
+        self.__dict__.update(state)
+        self._czi = CziReader(str(self.data_path))
+    # TODO: we append the scene index to the file name
+    #       - not sure if this is a good approach
+    @property
+    def source(self) -> Path:
+        filename = self.data_path.name
+        if self.scene is not None:
+            filename = f"{filename}@{self.scene}"
+        return self.data_path.parent / filename
+    def extract_patch(
+        self, sample_idx: int, coords: Sequence[int], patch_size: Sequence[int]
+    ) -> NDArray:
+        return self.extract_channel_patch(sample_idx, None, coords, patch_size)
+    def extract_channel_patch(
+        self,
+        sample_idx: int,
+        channels: Sequence[int] | None,  # `channels = None` to select all channels
+        coords: Sequence[int],
+        patch_size: Sequence[int],
+    ) -> NDArray:
+        # check that channels are within bounds
+        if channels is not None:
+            max_channel = self.data_shape[1] - 1  # channel is second dimension
+            for ch in channels:
+                if ch > max_channel:
+                    raise ValueError(
+                        f"Channel index {ch} is out of bounds for data with "
+                        f"{self.data_shape[1]} channels. Check the provided `channels` "
+                        f"parameter in the configuration for erroneous channel "
+                        f"indices."
+                    )
+        # Determine 3rd dimension (T, Z or none)
+        if len(coords) == 3:
+            if len(self.axes) != 5:
+                raise ValueError(
+                    f"Requested a 3D patch from a 2D image stack with axes {self.axes}."
+                )
+            third_dim = self.axes[2]
+            third_dim_offset, third_dim_size = coords[0], patch_size[0]
+        else:
+            if len(self.axes) != 4:
+                raise ValueError(
+                    f"Requested a 2D patch from a 3D image stack with axes {self.axes}."
+                )
+            third_dim = None
+            third_dim_offset, third_dim_size = 0, 1
+        # Set up ROI to extract from each plane as (x, y, w, h)
+        roi = (
+            self._bounding_rectangle.x + coords[-1],
+            self._bounding_rectangle.y + coords[-2],
+            patch_size[-1],
+            patch_size[-2],
+        )
+        # Create output array of shape (C, Z, Y, X)
+        n_channels = self.data_shape[1] if channels is None else len(channels)
+        patch = np.empty(
+            (n_channels, third_dim_size, *patch_size[-2:]), dtype=np.float32
+        )
+        # Set up plane to index `sample_idx`
+        sample_shape = list(self._sample_axes.values())
+        sample_indices = np.unravel_index(sample_idx, sample_shape)
+        plane = {
+            dimension: int(index)
+            for dimension, index in zip(
+                self._sample_axes.keys(), sample_indices, strict=False
+            )
+        }
+        # Read XY planes sequentially
+        channel_iter: Iterable
+        if channels is None:
+            channel_iter = range(self.data_shape[1])  # iter over number of requested C
+        else:
+            channel_iter = list(channels)
+        # for each channel
+        for patch_channel, data_channel in enumerate(channel_iter):
+            # pull plane with the given channel and 3rd dim index
+            for third_dim_index in range(third_dim_size):
+                plane["C"] = data_channel
+                if third_dim is not None:
+                    plane[third_dim] = third_dim_offset + third_dim_index
+                # read plane
+                extracted_roi = self._czi.read(roi=roi, plane=plane, scene=self.scene)
+                if extracted_roi.ndim == 3:
+                    if extracted_roi.shape[-1] > 1:
+                        raise ValueError(
+                            "CZI files with RGB channels are currently not supported."
+                        )
+                    # remove channel dimension
+                    extracted_roi = extracted_roi.squeeze(-1)
+                # add requested channel into the patch
+                patch[patch_channel, third_dim_index] = extracted_roi
+        # Remove dummy 3rd dimension for 2-D data
+        if third_dim is None:
+            patch = patch.squeeze(1)
+        return patch
+    def _get_shape(self) -> tuple[str, list[int], Rectangle, dict[str, int]]:
+        """Determines the shape of the selected scene.
+        Returns
+        -------
+        axes : str
+            String specifying the axis order. Examples:
+            - "SCZYX" for 3-D volumes if `depth_axis` is `"Z"`.
+            - "SCTYX" for time-series if `depth_axis` is `"T"`.
+            - "SCYX" if `depth_axis` is `"none"`.
+            The axis `S` is the sample dimension and combines all remaining axes
+            present in the data.
+        shape : list[int]
+            The size of each axis, in the order listed in `axes`.
+        bounding_rectangle : Rectangle
+            The bounding rectangle of the scene in pixels. The rectangle is
+            defined by its top-left corner (x, y) and its width and height (w, h).
+        sample_axes : dict[str, int]
+            A dictionary with information about the remaining axes used for the
+            sample dimension.
+            The keys are the axis names (e.g., "T", "Z") and the values are their
+            respective sizes.
+        """
+        # Get CZI dimensions
+        total_bbox = self._czi.total_bounding_box_no_pyramid
+        if self.scene is None:
+            bounding_rectangle = self._czi.total_bounding_rectangle_no_pyramid
+        else:
+            bounding_rectangle = self._czi.scenes_bounding_rectangle_no_pyramid[
+                self.scene
+            ]
+        # Determine if T and Z axis are present
+        # Note: An axis of size 1 is as good as no axis since we cannot use it for 3-D
+        # denoising.
+        has_time = "T" in total_bbox and (total_bbox["T"][1] - total_bbox["T"][0]) > 1
+        has_depth = "Z" in total_bbox and (total_bbox["Z"][1] - total_bbox["Z"][0]) > 1
+        # Determine axis order depending on `depth_axis`
+        if self._depth_axis == "Z":
+            axes = "SCZYX"
+            if not has_depth:
+                raise RuntimeError(
+                    f"The CZI file {self.data_path} does not contain a Z axis to use "
+                    'for 3-D denoising. Consider setting `axes="YX"` or '
+                    '`depth_axis="none"` to perform 2-D denoising instead.'
+                )
+        elif self._depth_axis == "T":
+            axes = "SCTYX"
+            if not has_time:
+                raise RuntimeError(
+                    f"The CZI file {self.data_path} does not contain a T axis to use "
+                    'for 3-D denoising. Consider setting `axes="YX"` or '
+                    '`depth_axis="none"` to perform 2-D denoising instead.'
+                )
+        else:
+            axes = "SCYX"
+        # Calculcate size of sample dimension S, combining all axes not used elsewhere.
+        # This could, for example, be a time axis. If we only perform 2-D denoising, a
+        # potentially present Z axis would also be used as sample dimension. If both,
+        # T and Z, are present, both need to be combined into the sample dimension.
+        # The same needs to be done to any other potentially present axis in the CZI
+        # file which is not a spatial or channel axis.
+        # The following code calculates the size of the combined sample axis.
+        sample_axes = {}
+        sample_size = 1
+        for dimension, (start, end) in total_bbox.items():
+            if dimension not in axes:
+                sample_axes[dimension] = end - start
+                sample_size *= end - start
+        # Determine data shape
+        shape = []
+        for dimension in axes:
+            if dimension == "S":
+                shape.append(sample_size)
+            elif dimension == "Y":
+                shape.append(bounding_rectangle.h)
+            elif dimension == "X":
+                shape.append(bounding_rectangle.w)
+            elif dimension in total_bbox:
+                shape.append(total_bbox[dimension][1] - total_bbox[dimension][0])
+            else:
+                shape.append(1)
+        return axes, shape, bounding_rectangle, sample_axes
+    @classmethod
+    def get_bounding_rectangles(
+        cls, czi: Path | str | CziReader
+    ) -> dict[int | None, Rectangle]:
+        """Gets the bounding rectangles of all scenes in a CZI file.
+        Parameters
+        ----------
+        czi : Path or str or pyczi.CziReader
+            Path to the CZI file or an already opened file as CziReader object.
+        Returns
+        -------
+        dict[int | None, Rectangle]
+            A dictionary mapping scene indices to their bounding rectangles in the
+            format `(x, y, w, h)`.
+            If no scenes are present in the CZI file, the returned dictionary will
+            have only one entry with key `None`, whose bounding rectangle covers the
+            entire image.
+        """
+        if not isinstance(czi, CziReader):
+            with open_czi(str(czi)) as czi_reader:
+                return cls.get_bounding_rectangles(czi_reader)
+        scenes_bounding_rectangle = czi.scenes_bounding_rectangle_no_pyramid
+        if len(scenes_bounding_rectangle) >= 1:
+            # Ensure keys are int | None for type compatibility
+            return {int(k): v for k, v in scenes_bounding_rectangle.items()}
+        else:
+            return {None: czi.total_bounding_rectangle_no_pyramid}