PyPI - careamics - Versions diffs - 0.0.19__py3-none-any.whl - Mend

careamics 0.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (279) hide show

careamics/__init__.py +24 -0
careamics/careamist.py +961 -0
careamics/cli/__init__.py +5 -0
careamics/cli/conf.py +394 -0
careamics/cli/main.py +234 -0
careamics/cli/utils.py +27 -0
careamics/config/__init__.py +66 -0
careamics/config/algorithms/__init__.py +21 -0
careamics/config/algorithms/care_algorithm_config.py +122 -0
careamics/config/algorithms/hdn_algorithm_config.py +103 -0
careamics/config/algorithms/microsplit_algorithm_config.py +103 -0
careamics/config/algorithms/n2n_algorithm_config.py +115 -0
careamics/config/algorithms/n2v_algorithm_config.py +296 -0
careamics/config/algorithms/pn2v_algorithm_config.py +301 -0
careamics/config/algorithms/unet_algorithm_config.py +91 -0
careamics/config/algorithms/vae_algorithm_config.py +178 -0
careamics/config/architectures/__init__.py +7 -0
careamics/config/architectures/architecture_config.py +37 -0
careamics/config/architectures/lvae_config.py +262 -0
careamics/config/architectures/unet_config.py +125 -0
careamics/config/configuration.py +367 -0
careamics/config/configuration_factories.py +2400 -0
careamics/config/data/__init__.py +27 -0
careamics/config/data/data_config.py +472 -0
careamics/config/data/inference_config.py +237 -0
careamics/config/data/ng_data_config.py +1038 -0
careamics/config/data/patch_filter/__init__.py +15 -0
careamics/config/data/patch_filter/filter_config.py +16 -0
careamics/config/data/patch_filter/mask_filter_config.py +17 -0
careamics/config/data/patch_filter/max_filter_config.py +15 -0
careamics/config/data/patch_filter/meanstd_filter_config.py +18 -0
careamics/config/data/patch_filter/shannon_filter_config.py +15 -0
careamics/config/data/patching_strategies/__init__.py +15 -0
careamics/config/data/patching_strategies/_overlapping_patched_config.py +102 -0
careamics/config/data/patching_strategies/_patched_config.py +56 -0
careamics/config/data/patching_strategies/random_patching_config.py +45 -0
careamics/config/data/patching_strategies/sequential_patching_config.py +25 -0
careamics/config/data/patching_strategies/tiled_patching_config.py +40 -0
careamics/config/data/patching_strategies/whole_patching_config.py +12 -0
careamics/config/data/tile_information.py +65 -0
careamics/config/lightning/__init__.py +15 -0
careamics/config/lightning/callbacks/__init__.py +8 -0
careamics/config/lightning/callbacks/callback_config.py +116 -0
careamics/config/lightning/optimizer_configs.py +186 -0
careamics/config/lightning/training_config.py +70 -0
careamics/config/losses/__init__.py +8 -0
careamics/config/losses/loss_config.py +60 -0
careamics/config/ng_configs/__init__.py +5 -0
careamics/config/ng_configs/n2v_configuration.py +64 -0
careamics/config/ng_configs/ng_configuration.py +256 -0
careamics/config/ng_factories/__init__.py +9 -0
careamics/config/ng_factories/algorithm_factory.py +120 -0
careamics/config/ng_factories/data_factory.py +154 -0
careamics/config/ng_factories/n2v_factory.py +256 -0
careamics/config/ng_factories/training_factory.py +69 -0
careamics/config/noise_model/__init__.py +12 -0
careamics/config/noise_model/likelihood_config.py +60 -0
careamics/config/noise_model/noise_model_config.py +149 -0
careamics/config/support/__init__.py +31 -0
careamics/config/support/supported_activations.py +27 -0
careamics/config/support/supported_algorithms.py +40 -0
careamics/config/support/supported_architectures.py +13 -0
careamics/config/support/supported_data.py +122 -0
careamics/config/support/supported_filters.py +17 -0
careamics/config/support/supported_loggers.py +10 -0
careamics/config/support/supported_losses.py +32 -0
careamics/config/support/supported_optimizers.py +57 -0
careamics/config/support/supported_patching_strategies.py +22 -0
careamics/config/support/supported_pixel_manipulations.py +15 -0
careamics/config/support/supported_struct_axis.py +21 -0
careamics/config/support/supported_transforms.py +12 -0
careamics/config/transformations/__init__.py +22 -0
careamics/config/transformations/n2v_manipulate_config.py +79 -0
careamics/config/transformations/normalize_config.py +59 -0
careamics/config/transformations/transform_config.py +45 -0
careamics/config/transformations/transform_unions.py +29 -0
careamics/config/transformations/xy_flip_config.py +43 -0
careamics/config/transformations/xy_random_rotate90_config.py +35 -0
careamics/config/utils/__init__.py +8 -0
careamics/config/utils/configuration_io.py +85 -0
careamics/config/validators/__init__.py +18 -0
careamics/config/validators/axes_validators.py +90 -0
careamics/config/validators/model_validators.py +84 -0
careamics/config/validators/patch_validators.py +55 -0
careamics/conftest.py +39 -0
careamics/dataset/__init__.py +17 -0
careamics/dataset/dataset_utils/__init__.py +19 -0
careamics/dataset/dataset_utils/dataset_utils.py +118 -0
careamics/dataset/dataset_utils/file_utils.py +141 -0
careamics/dataset/dataset_utils/iterate_over_files.py +84 -0
careamics/dataset/dataset_utils/running_stats.py +189 -0
careamics/dataset/in_memory_dataset.py +303 -0
careamics/dataset/in_memory_pred_dataset.py +88 -0
careamics/dataset/in_memory_tiled_pred_dataset.py +131 -0
careamics/dataset/iterable_dataset.py +294 -0
careamics/dataset/iterable_pred_dataset.py +121 -0
careamics/dataset/iterable_tiled_pred_dataset.py +141 -0
careamics/dataset/patching/__init__.py +1 -0
careamics/dataset/patching/patching.py +300 -0
careamics/dataset/patching/random_patching.py +110 -0
careamics/dataset/patching/sequential_patching.py +212 -0
careamics/dataset/patching/validate_patch_dimension.py +64 -0
careamics/dataset/tiling/__init__.py +10 -0
careamics/dataset/tiling/collate_tiles.py +33 -0
careamics/dataset/tiling/lvae_tiled_patching.py +375 -0
careamics/dataset/tiling/tiled_patching.py +166 -0
careamics/dataset_ng/README.md +212 -0
careamics/dataset_ng/__init__.py +0 -0
careamics/dataset_ng/dataset.py +365 -0
careamics/dataset_ng/demos/bsd68_demo.ipynb +361 -0
careamics/dataset_ng/demos/bsd68_zarr_demo.ipynb +453 -0
careamics/dataset_ng/demos/care_U2OS_demo.ipynb +330 -0
careamics/dataset_ng/demos/demo_custom_image_stack.ipynb +736 -0
careamics/dataset_ng/demos/demo_datamodule.ipynb +447 -0
careamics/dataset_ng/demos/demo_dataset.ipynb +278 -0
careamics/dataset_ng/demos/demo_patch_extractor.py +51 -0
careamics/dataset_ng/demos/mouse_nuclei_demo.ipynb +293 -0
careamics/dataset_ng/factory.py +180 -0
careamics/dataset_ng/grouped_index_sampler.py +73 -0
careamics/dataset_ng/image_stack/__init__.py +14 -0
careamics/dataset_ng/image_stack/czi_image_stack.py +396 -0
careamics/dataset_ng/image_stack/file_image_stack.py +140 -0
careamics/dataset_ng/image_stack/image_stack_protocol.py +93 -0
careamics/dataset_ng/image_stack/image_utils/__init__.py +6 -0
careamics/dataset_ng/image_stack/image_utils/image_stack_utils.py +125 -0
careamics/dataset_ng/image_stack/in_memory_image_stack.py +93 -0
careamics/dataset_ng/image_stack/zarr_image_stack.py +170 -0
careamics/dataset_ng/image_stack_loader/__init__.py +19 -0
careamics/dataset_ng/image_stack_loader/image_stack_loader_protocol.py +70 -0
careamics/dataset_ng/image_stack_loader/image_stack_loaders.py +273 -0
careamics/dataset_ng/image_stack_loader/zarr_utils.py +130 -0
careamics/dataset_ng/legacy_interoperability.py +175 -0
careamics/dataset_ng/microsplit_input_synth.py +377 -0
careamics/dataset_ng/patch_extractor/__init__.py +7 -0
careamics/dataset_ng/patch_extractor/limit_file_extractor.py +50 -0
careamics/dataset_ng/patch_extractor/patch_construction.py +151 -0
careamics/dataset_ng/patch_extractor/patch_extractor.py +117 -0
careamics/dataset_ng/patch_filter/__init__.py +20 -0
careamics/dataset_ng/patch_filter/coordinate_filter_protocol.py +27 -0
careamics/dataset_ng/patch_filter/filter_factory.py +95 -0
careamics/dataset_ng/patch_filter/mask_filter.py +96 -0
careamics/dataset_ng/patch_filter/max_filter.py +188 -0
careamics/dataset_ng/patch_filter/mean_std_filter.py +218 -0
careamics/dataset_ng/patch_filter/patch_filter_protocol.py +50 -0
careamics/dataset_ng/patch_filter/shannon_filter.py +188 -0
careamics/dataset_ng/patching_strategies/__init__.py +26 -0
careamics/dataset_ng/patching_strategies/patching_strategy_factory.py +50 -0
careamics/dataset_ng/patching_strategies/patching_strategy_protocol.py +161 -0
careamics/dataset_ng/patching_strategies/random_patching.py +393 -0
careamics/dataset_ng/patching_strategies/sequential_patching.py +99 -0
careamics/dataset_ng/patching_strategies/tiling_strategy.py +207 -0
careamics/dataset_ng/patching_strategies/whole_sample.py +61 -0
careamics/file_io/__init__.py +15 -0
careamics/file_io/read/__init__.py +11 -0
careamics/file_io/read/get_func.py +57 -0
careamics/file_io/read/tiff.py +58 -0
careamics/file_io/write/__init__.py +15 -0
careamics/file_io/write/get_func.py +63 -0
careamics/file_io/write/tiff.py +40 -0
careamics/lightning/__init__.py +32 -0
careamics/lightning/callbacks/__init__.py +13 -0
careamics/lightning/callbacks/data_stats_callback.py +33 -0
careamics/lightning/callbacks/hyperparameters_callback.py +49 -0
careamics/lightning/callbacks/prediction_writer_callback/__init__.py +20 -0
careamics/lightning/callbacks/prediction_writer_callback/file_path_utils.py +56 -0
careamics/lightning/callbacks/prediction_writer_callback/prediction_writer_callback.py +234 -0
careamics/lightning/callbacks/prediction_writer_callback/write_strategy.py +399 -0
careamics/lightning/callbacks/prediction_writer_callback/write_strategy_factory.py +215 -0
careamics/lightning/callbacks/progress_bar_callback.py +90 -0
careamics/lightning/dataset_ng/__init__.py +1 -0
careamics/lightning/dataset_ng/callbacks/__init__.py +1 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/__init__.py +29 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/cached_tiles_strategy.py +164 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/file_path_utils.py +33 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/prediction_writer_callback.py +219 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/write_image_strategy.py +91 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/write_strategy.py +27 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/write_strategy_factory.py +214 -0
careamics/lightning/dataset_ng/callbacks/prediction_writer/write_tiles_zarr_strategy.py +375 -0
careamics/lightning/dataset_ng/data_module.py +529 -0
careamics/lightning/dataset_ng/data_module_utils.py +395 -0
careamics/lightning/dataset_ng/lightning_modules/__init__.py +9 -0
careamics/lightning/dataset_ng/lightning_modules/care_module.py +97 -0
careamics/lightning/dataset_ng/lightning_modules/n2v_module.py +106 -0
careamics/lightning/dataset_ng/lightning_modules/unet_module.py +221 -0
careamics/lightning/dataset_ng/prediction/__init__.py +16 -0
careamics/lightning/dataset_ng/prediction/convert_prediction.py +198 -0
careamics/lightning/dataset_ng/prediction/stitch_prediction.py +171 -0
careamics/lightning/lightning_module.py +914 -0
careamics/lightning/microsplit_data_module.py +632 -0
careamics/lightning/predict_data_module.py +341 -0
careamics/lightning/train_data_module.py +666 -0
careamics/losses/__init__.py +21 -0
careamics/losses/fcn/__init__.py +1 -0
careamics/losses/fcn/losses.py +125 -0
careamics/losses/loss_factory.py +80 -0
careamics/losses/lvae/__init__.py +1 -0
careamics/losses/lvae/loss_utils.py +83 -0
careamics/losses/lvae/losses.py +589 -0
careamics/lvae_training/__init__.py +0 -0
careamics/lvae_training/calibration.py +191 -0
careamics/lvae_training/dataset/__init__.py +20 -0
careamics/lvae_training/dataset/config.py +135 -0
careamics/lvae_training/dataset/lc_dataset.py +274 -0
careamics/lvae_training/dataset/ms_dataset_ref.py +1067 -0
careamics/lvae_training/dataset/multich_dataset.py +1121 -0
careamics/lvae_training/dataset/multicrop_dset.py +196 -0
careamics/lvae_training/dataset/multifile_dataset.py +335 -0
careamics/lvae_training/dataset/types.py +32 -0
careamics/lvae_training/dataset/utils/__init__.py +0 -0
careamics/lvae_training/dataset/utils/data_utils.py +114 -0
careamics/lvae_training/dataset/utils/empty_patch_fetcher.py +65 -0
careamics/lvae_training/dataset/utils/index_manager.py +491 -0
careamics/lvae_training/dataset/utils/index_switcher.py +165 -0
careamics/lvae_training/eval_utils.py +987 -0
careamics/lvae_training/get_config.py +84 -0
careamics/lvae_training/lightning_module.py +701 -0
careamics/lvae_training/metrics.py +214 -0
careamics/lvae_training/train_lvae.py +342 -0
careamics/lvae_training/train_utils.py +121 -0
careamics/model_io/__init__.py +7 -0
careamics/model_io/bioimage/__init__.py +11 -0
careamics/model_io/bioimage/_readme_factory.py +113 -0
careamics/model_io/bioimage/bioimage_utils.py +56 -0
careamics/model_io/bioimage/cover_factory.py +171 -0
careamics/model_io/bioimage/model_description.py +341 -0
careamics/model_io/bmz_io.py +251 -0
careamics/model_io/model_io_utils.py +95 -0
careamics/models/__init__.py +5 -0
careamics/models/activation.py +40 -0
careamics/models/layers.py +495 -0
careamics/models/lvae/__init__.py +3 -0
careamics/models/lvae/layers.py +1371 -0
careamics/models/lvae/likelihoods.py +394 -0
careamics/models/lvae/lvae.py +848 -0
careamics/models/lvae/noise_models.py +738 -0
careamics/models/lvae/stochastic.py +394 -0
careamics/models/lvae/utils.py +404 -0
careamics/models/model_factory.py +54 -0
careamics/models/unet.py +449 -0
careamics/nm_training_placeholder.py +203 -0
careamics/prediction_utils/__init__.py +21 -0
careamics/prediction_utils/lvae_prediction.py +158 -0
careamics/prediction_utils/lvae_tiling_manager.py +362 -0
careamics/prediction_utils/prediction_outputs.py +238 -0
careamics/prediction_utils/stitch_prediction.py +193 -0
careamics/py.typed +5 -0
careamics/transforms/__init__.py +22 -0
careamics/transforms/compose.py +173 -0
careamics/transforms/n2v_manipulate.py +150 -0
careamics/transforms/n2v_manipulate_torch.py +149 -0
careamics/transforms/normalize.py +374 -0
careamics/transforms/pixel_manipulation.py +406 -0
careamics/transforms/pixel_manipulation_torch.py +388 -0
careamics/transforms/struct_mask_parameters.py +20 -0
careamics/transforms/transform.py +24 -0
careamics/transforms/tta.py +88 -0
careamics/transforms/xy_flip.py +131 -0
careamics/transforms/xy_random_rotate90.py +108 -0
careamics/utils/__init__.py +19 -0
careamics/utils/autocorrelation.py +40 -0
careamics/utils/base_enum.py +60 -0
careamics/utils/context.py +67 -0
careamics/utils/deprecation.py +63 -0
careamics/utils/lightning_utils.py +71 -0
careamics/utils/logging.py +323 -0
careamics/utils/metrics.py +394 -0
careamics/utils/path_utils.py +26 -0
careamics/utils/plotting.py +76 -0
careamics/utils/ram.py +15 -0
careamics/utils/receptive_field.py +108 -0
careamics/utils/serializers.py +62 -0
careamics/utils/torch_utils.py +150 -0
careamics/utils/version.py +38 -0
careamics-0.0.19.dist-info/METADATA +80 -0
careamics-0.0.19.dist-info/RECORD +279 -0
careamics-0.0.19.dist-info/WHEEL +4 -0
careamics-0.0.19.dist-info/entry_points.txt +2 -0
careamics-0.0.19.dist-info/licenses/LICENSE +28 -0

careamics/dataset_ng/README.md ADDED Viewed

@@ -0,0 +1,212 @@
+# The CAREamics Dataset
+Welcome to the CAREamics dataset!
+A PyTorch based dataset, designed to be used with microscopy data. It is universal for the training, validation and prediction stages of a machine learning pipeline.
+The key ethos is to create a modular and maintainable dataset comprised of swappable components that interact through interfaces. This should facilitate a smooth development process when extending the dataset's function to new features, and also enable advanced users to easily customize the dataset to their needs, by writing custom components. This is achieved by following a few key software engineering principles, detailed at the end of this README file.
+## Dataset Component overview
+```mermaid
+---
+title: CAREamicsDataset
+---
+classDiagram
+    class CAREamicsDataset{
+        +PatchExtractor input_extractor
+        +Optional[PatchExtractor] target_extractor
+        +PatchingStrategy patching_strategy
+        +list~Transform~ transforms
+        +\_\_getitem\_\_(int index) NDArray
+    }
+    class PatchingStrategy{
+        <<interface>>
+        +n_patches int
+        +get_patch_spec(index: int) PatchSpecs
+    }
+    class RandomPatchingStrategy{
+    }
+    class FixedRandomPatchingStrategy{
+    }
+    class SequentialPatchingStrategy{
+    }
+    class TilingStrategy{
+        +get_patch_spec(index: int) TileSpecs
+    }
+    class PatchExtractor{
+        +list~ImageStack~ image_stacks
+        +extract_patch(PatchSpecs) NDArray
+    }
+    class PatchSpecs {
+        <<TypedDict>>
+        +int data_idx
+        +int sample_idx
+        +Sequence~int~ coords
+        +Sequence~int~ patch_size
+    }
+        class TileSpecs {
+        <<TypedDict>>
+        +Sequence~int~ crop_coords
+        +Sequence~int~ crop_size
+        +Sequence~int~ stitch_coords
+    }
+    class ImageStack{
+        <<interface>>
+        +Union[Path, Literal["array"]] source
+        +Sequence~int~ data_shape
+        +DTypeLike data_type
+        +extract_patch(sample_idx, coords, patch_size) NDArray
+    }
+    class InMemoryImageStack {
+    }
+    class ZarrImageStack {
+        +Path source
+    }
+    CAREamicsDataset --* PatchExtractor: Is composed of
+    CAREamicsDataset --* PatchingStrategy: Is composed of
+    PatchExtractor --o ImageStack: Aggregates
+    ImageStack <|-- InMemoryImageStack: Implements
+    ImageStack <|-- ZarrImageStack: Implements
+    PatchingStrategy <|-- RandomPatchingStrategy: Implements
+    PatchingStrategy <|-- FixedRandomPatchingStrategy: Implements
+    PatchingStrategy <|-- SequentialPatchingStrategy: Implements
+    PatchingStrategy <|-- TilingStrategy: Implements
+    PatchSpecs <|-- TileSpecs: Inherits from
+```
+### `ImageStack` and implementations
+This interface represents a set of image data, which can be saved with any subset of the
+axes STCZYX, in any order, see below for a description of the dimensions. The `ImageStack`
+interface's job is to act as an adapter for different data storage types, so that higher
+level classes can access the image data without having to know the implementation details of
+how to load or read data from each storage type. This means we can decide to support new storage
+types by implementing a new concrete `ImageStack` class without having to change anything
+in the `CAREamistDataset` class. Advanced users can also choose to create their own
+`ImageStack` class if they want to work with their own data storage type.
+The interface provides an `extract_patch` method which will produce a patch from the image,
+as a NumPy array, with the dimensions C(Z)YX. This method should be thought of as simply
+a wrapper for the equivalent to NumPy slicing for each of the storage types.
+#### Concrete implementations
+- `InMemoryImageStack`: The underlying data is stored as a NumPy array in memory. It has some
+additional constructor methods to load the data from known file formats such as TIFF files.
+- `ZarrImageStack`: The underlying data is stored as a ZARR file on disk.
+#### Axes description
+- S is a generic sample dimension,
+- T is a time dimension,
+- C is a channel dimension,
+- Z is a spatial dimension,
+- Y is a spatial dimension,
+- X is a spatial dimension.
+### `PatchExtractor`
+The `PatchExtractor` class aggregates many `ImageStack` instances, this allows for multiple
+images with different dimensions, and possibly different storage types to be treated as a single entity.
+The class has an `extract_patch` method to extract a patch from any one of its `ImageStack`
+objects. It can also possibly be extended when extra logic to extract patches is needed,
+for example when constructing lateral-context inputs for the MicroSplit LVAE models.
+### `PatchingStrategy`
+The `PatchingStrategy` class is an interface to generate patch specifications, where each of the
+concrete implementations produce a set of patch specifications using a different strategy.
+It has a `n_patches` attribute that can be accessed to find out how many patches the
+strategy will produce, given the shapes of the image stacks it has been initialized with.
+This is needed by the `CAREamicsDataset` to return its length.
+Most importantly it has a `get_patch_spec` method, that takes an index and returns a
+patch specification. For deterministic patching strategies, this method will always
+return the same patch specification given the same index, but there are also random strategies
+where the returned patch specification will change every time. The given index can never
+be greater than `n_patches`.
+#### Concrete implementations
+- `RandomPatchingStrategy`: this strategy will produce random patches that will change
+even if the `extract_patch` method is called with the same index.
+- `FixedRandomPatchingStrategy`: this strategy will produce random patches, but the patch
+will be the same if the `extract_patch` method is called with the same index. This is
+useful for making sure validation is comparable epoch to epoch.
+- `SequentialPatchingStrategy`: this strategy is deterministic and the patches will be
+sequential with some specified overlap.
+- `TilingStrategy`: this strategy is deterministic and the patches will be
+sequential with some specified overlap. Rather than a `PatchSpecs` dictionary it will
+produce a `TileSpecs` dictionary which includes some extra fields that are used for
+stitching the tiles back together.
+#### PatchSpecs
+The `get_patch_spec` returns a dictionary containing the keys `data_idx`, `sample_idx`, `coords` and `patch_size`.
+These are the exact arguments that the `PatchExtractor.extract_patch` method takes. The patch specification
+produced by the patching strategy is received by the `PatchExtractor` to in-turn produce an image patch.
+For type hinting, `PatchSpecs` is defined as a `TypedDict`.
+## Key Principles
+The aim of all these principles is to create a system of interacting classes that have
+low coupling. This allows for one section to be changed or extended without breaking functionality
+elsewhere in the codebase.
+### Composition over inheritance
+The principle of composition over inheritance is: rather than using inheritance to
+extend or change the behavior of a class, instead, a class can be composed of modules
+that can be swapped to extend or change behavior.
+The reason to use composition is that it promotes the easy reuse of the underlying
+components, it can prevent a subclass explosion, and it leads to a maintainable and
+easily extendable design. A software architecture based on composition is normally
+maintainable and extendable because if a component needs to change then the whole class
+shouldn't have to be refactored and if a new feature needs to be added, usually an additional
+component can be added to the class.
+The `CAREamicsDataset` is composed of `PatchExtractor` and `PatchingStrategy` and `Transfrom` components.
+The `PatchingStrategy` classes implement an interface so the dataset can switch between
+different strategies. The `PatchExtractor` is composed of many `ImageStack` instances,
+new image stacks can be added to extend the type of data that the dataset can read from.
+### Dependency Inversion
+The dependency inversion principle states:
+1. High-level modules should not depend on low-level modules. Both high-level and
+low-level modules should depend on abstractions (e.g. interfaces).
+2. Abstractions should not depend on details (concrete implementations). Details should
+depend on abstractions.
+In other words high level modules that provide complex logic should be easily reusable
+and not depend on implementation details of low-level modules that provide utility functionality.
+This can be achieved by introducing abstractions that decouple high and low level modules.
+An example of the dependency inversion principle in use is how the `PatchExtractor` only
+depends on the `ImageStack` interface, and does not have to have any knowledge of the
+concrete implementations. The concrete `ImageStack` implementations also do not have
+any knowledge of the `PatchExtractor` or any other higher-level functionality that the
+dataset needs.
+### Single Responsibility Principle
+Each component should have a small scope of responsibility that is easily defined. This
+should make the code easier to maintain and hopefully reduce the number of places in the
+code that have to change when introducing a new feature.
+- `ImageStack` responsibility: to act as an adapter for loading and reading image data
+from different underlying storage.
+- `PatchExtractor` responsibility: to extract patches from a set of image stacks.
+- `PatchingStrategy` responsibility: to produce patch specifications given an index, through
+an interface that hides the underlying implementation.
+- `CAREamicsDataset` responsibility: to orchestrate the interactions of its underlying
+components to produce an input patch (and target patch when required) given an index.

careamics/dataset_ng/__init__.py ADDED Viewed

File without changes

careamics/dataset_ng/dataset.py ADDED Viewed

@@ -0,0 +1,365 @@
+from collections.abc import Sequence
+from pathlib import Path
+from typing import Any, Generic, Literal, NamedTuple, Union
+import numpy as np
+from numpy.typing import NDArray
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
+from careamics.config.data.ng_data_config import Mode, NGDataConfig, WholePatchingConfig
+from careamics.config.transformations import NormalizeConfig
+from careamics.dataset.dataset_utils.running_stats import WelfordStatistics
+from careamics.dataset.patching.patching import Stats
+from careamics.transforms import Compose
+from .image_stack import GenericImageStack, ZarrImageStack
+from .patch_extractor import PatchExtractor
+from .patch_filter import create_coord_filter, create_patch_filter
+from .patching_strategies import (
+    PatchSpecs,
+    RegionSpecs,
+    create_patching_strategy,
+)
+class ImageRegionData(NamedTuple, Generic[RegionSpecs]):
+    """
+    Data structure for arrays produced by the dataset and propagated through models.
+    An ImageRegionData may be a patch during training/validation, a tile during
+    prediction with tiling, or a whole image during prediction without tiling.
+    `data_shape` may not correspond to the shape of the original data if a subset
+    of the channels has been requested, in which case the channel dimension may
+    be smaller than that of the original data and only correspond to the requested
+    number of channels.
+    ImageRegionData may be collated in batches during training by the DataLoader. In
+    that case:
+    - data: arrays are collated into NDArray of shape (B,C,Z,Y,X)
+    - source: list of str, length B
+    - data_shape: list of tuples of int, each tuple being of length B and representing
+        the shape of the original images in the corresponding dimension
+    - dtype: list of str, length B
+    - axes: list of str, length B
+    - region_spec: dict of {str: sequence}, each sequence being of length B
+    - additional_metadata: list of dict
+    Description of the fields is given for the uncollated case (non-batched).
+    """
+    data: NDArray
+    """Patch, tile or image in C(Z)YX format."""
+    source: Union[str, Literal["array"]]
+    """Source of the data, e.g. file path, zarr URI, or "array" for in-memory arrays."""
+    data_shape: Sequence[int]
+    """Shape of the original image in (SCZ)YX format and order. If channels are
+    subsetted, the channel dimension corresponds to the number of requested channels."""
+    dtype: str  # dtype should be str for collate
+    """Data type of the original image as a string."""
+    axes: str
+    """Axes of the original data array, in format SCZYX."""
+    region_spec: RegionSpecs  # PatchSpecs or subclasses, e.g. TileSpecs
+    """Specifications of the region within the original image from where `data` is
+    extracted. Of type PatchSpecs during training/validation and prediction without
+    tiling, and TileSpecs during prediction with tiling.
+    """
+    additional_metadata: dict[str, Any]
+    """Additional metadata to be stored with the image region. Currently used to store
+    chunk and shard information for zarr image stacks."""
+InputType = Union[Sequence[NDArray[Any]], Sequence[Path]]
+def _adjust_shape_for_channels(
+    shape: Sequence[int],
+    channels: Sequence[int] | None,
+    value: int | Literal["channels"] = "channels",
+) -> tuple[int, ...]:
+    """Adjust shape to account for channel subsetting.
+    Parameters
+    ----------
+    shape : Sequence[int]
+        The original data shape in SC(Z)YX format.
+    channels : Sequence[int] | None
+        The list of channels to select. If None, no adjustment is made.
+    value : int | Literal["channels"], default="channels"
+        The value to replace the channel dimension with. If "channels", the length
+        of the channels list is used, by default "channels".
+    Returns
+    -------
+    tuple[int, ...]
+        The adjusted data shape in SC(Z)YX format.
+    """
+    if channels is not None:
+        adjusted_shape = list(shape)
+        adjusted_shape[1] = len(channels) if value == "channels" else value
+        return tuple(adjusted_shape)
+    return tuple(shape)
+def _patch_size_within_data_shapes(
+    data_shapes: Sequence[Sequence[int]], patch_size: Sequence[int]
+) -> bool:
+    """Determine whether all the data_shapes are greater than the patch size.
+    Parameters
+    ----------
+    data_shapes : Sequence[Sequence[int]]
+        A sequence of data shapes. They must be in the format SC(Z)YX.
+    patch_size : Sequence[int]
+        A patch size that must specify the size of the patch in all the spatial
+        dimensions, in the format (Z)YX.
+    Returns
+    -------
+    bool
+        If all the data shapes are greater than the patch size.
+    """
+    smaller_than_shapes = [
+        # skip sample and channel dimension in data_shape
+        (np.array(patch_size) < np.array(data_shape[2:])).all()
+        for data_shape in data_shapes
+    ]
+    return all(smaller_than_shapes)
+class CareamicsDataset(Dataset, Generic[GenericImageStack]):
+    def __init__(
+        self,
+        data_config: NGDataConfig,
+        input_extractor: PatchExtractor[GenericImageStack],
+        target_extractor: PatchExtractor[GenericImageStack] | None = None,
+        mask_extractor: PatchExtractor[GenericImageStack] | None = None,
+    ) -> None:
+        # Make sure all the image sizes are greater than the patch size for training
+        data_shapes = [
+            image_stack.data_shape for image_stack in input_extractor.image_stacks
+        ]
+        if data_config.mode != Mode.PREDICTING:
+            if not isinstance(
+                data_config.patching, WholePatchingConfig
+            ) and not _patch_size_within_data_shapes(
+                data_shapes, data_config.patching.patch_size
+            ):
+                raise ValueError(
+                    "Not all images sizes are greater than the patch size for training "
+                    "and validation."
+                )
+        self.config = data_config
+        self.input_extractor = input_extractor
+        self.target_extractor = target_extractor
+        self.patch_filter = (
+            create_patch_filter(self.config.patch_filter)
+            if self.config.patch_filter is not None
+            else None
+        )
+        self.coord_filter = (
+            create_coord_filter(self.config.coord_filter, mask=mask_extractor)
+            if self.config.coord_filter is not None and mask_extractor is not None
+            else None
+        )
+        self.patch_filter_patience = self.config.patch_filter_patience
+        self.patching_strategy = create_patching_strategy(
+            data_shapes=self.input_extractor.shapes,
+            patching_config=self.config.patching,
+        )
+        self.input_stats, self.target_stats = self._initialize_statistics()
+        self.transforms = self._initialize_transforms()
+    def _initialize_transforms(self) -> Compose | None:
+        normalize = NormalizeConfig(
+            image_means=self.input_stats.means,
+            image_stds=self.input_stats.stds,
+            target_means=self.target_stats.means,
+            target_stds=self.target_stats.stds,
+        )
+        if self.config.mode == Mode.TRAINING:
+            # TODO: initialize normalization separately depending on configuration
+            return Compose(transform_list=[normalize] + list(self.config.transforms))
+        # TODO: add TTA
+        return Compose(transform_list=[normalize])
+    def _calculate_stats(
+        self, data_extractor: PatchExtractor[GenericImageStack]
+    ) -> Stats:
+        image_stats = WelfordStatistics()
+        n_patches = self.patching_strategy.n_patches
+        for idx in tqdm(range(n_patches), desc="Computing statistics"):
+            patch_spec = self.patching_strategy.get_patch_spec(idx)
+            patch = data_extractor.extract_channel_patch(
+                data_idx=patch_spec["data_idx"],
+                sample_idx=patch_spec["sample_idx"],
+                channels=self.config.channels,
+                coords=patch_spec["coords"],
+                patch_size=patch_spec["patch_size"],
+            )
+            # TODO: statistics accept SCYX format, while patch is CYX
+            image_stats.update(patch[None, ...], sample_idx=idx)
+        image_means, image_stds = image_stats.finalize()
+        return Stats(image_means, image_stds)
+    # TODO: add running stats
+    def _initialize_statistics(self) -> tuple[Stats, Stats]:
+        if self.config.image_means is not None and self.config.image_stds is not None:
+            input_stats = Stats(self.config.image_means, self.config.image_stds)
+        else:
+            input_stats = self._calculate_stats(self.input_extractor)
+        target_stats = Stats((), ())
+        if self.config.target_means is not None and self.config.target_stds is not None:
+            target_stats = Stats(self.config.target_means, self.config.target_stds)
+        elif self.target_extractor is not None:
+            target_stats = self._calculate_stats(self.target_extractor)
+        return input_stats, target_stats
+    def __len__(self):
+        return self.patching_strategy.n_patches
+    def _create_image_region(
+        self, patch: np.ndarray, patch_spec: PatchSpecs, extractor: PatchExtractor
+    ) -> ImageRegionData:
+        data_idx = patch_spec["data_idx"]
+        image_stack: GenericImageStack = extractor.image_stacks[data_idx]
+        # adjust the number of channels in data_shape if needed
+        data_shape = _adjust_shape_for_channels(
+            shape=image_stack.data_shape,
+            channels=self.config.channels,
+        )
+        # additional metadata for zarr image stacks
+        if isinstance(image_stack, ZarrImageStack):
+            additional_metadata = {
+                "chunks": image_stack.chunks,
+            }
+            if image_stack.shards is not None:
+                additional_metadata["shards"] = image_stack.shards
+        else:
+            additional_metadata = {}
+        return ImageRegionData(
+            data=patch,
+            source=str(image_stack.source),
+            dtype=str(image_stack.data_dtype),
+            data_shape=data_shape,
+            # TODO: should it be axes of the original image instead?
+            axes=self.config.axes,
+            region_spec=patch_spec,
+            additional_metadata=additional_metadata,
+        )
+    def _extract_patches(
+        self, patch_spec: PatchSpecs
+    ) -> tuple[NDArray, NDArray | None]:
+        """Extract input and target patches based on patch specifications."""
+        input_patch = self.input_extractor.extract_channel_patch(
+            data_idx=patch_spec["data_idx"],
+            sample_idx=patch_spec["sample_idx"],
+            channels=self.config.channels,
+            coords=patch_spec["coords"],
+            patch_size=patch_spec["patch_size"],
+        )
+        target_patch = (
+            self.target_extractor.extract_channel_patch(
+                data_idx=patch_spec["data_idx"],
+                sample_idx=patch_spec["sample_idx"],
+                # TODO does not allow selecting different channels for target
+                channels=self.config.channels,
+                coords=patch_spec["coords"],
+                patch_size=patch_spec["patch_size"],
+            )
+            if self.target_extractor is not None
+            else None
+        )
+        return input_patch, target_patch
+    def _get_filtered_patch(
+        self, index: int
+    ) -> tuple[NDArray[Any], NDArray[Any] | None, PatchSpecs]:
+        """Extract a patch that passes filtering criteria with retry logic."""
+        should_filter = self.config.mode == Mode.TRAINING and (
+            self.patch_filter is not None or self.coord_filter is not None
+        )
+        empty_patch = True
+        patch_filter_patience = self.patch_filter_patience  # reset patience
+        while empty_patch and patch_filter_patience > 0:
+            # query patches
+            patch_spec = self.patching_strategy.get_patch_spec(index)
+            # filter patch based on coordinates if needed
+            if should_filter and self.coord_filter is not None:
+                if self.coord_filter.filter_out(patch_spec):
+                    patch_filter_patience -= 1
+                    # TODO should we raise an error rather than silently accept patches?
+                    # if patience runs out without ever finding coordinates
+                    # then we need to guard against an exist before defining
+                    # input_patch and target_patch
+                    if patch_filter_patience != 0:
+                        continue
+            input_patch, target_patch = self._extract_patches(patch_spec)
+            # filter patch based on values if needed
+            if should_filter and self.patch_filter is not None:
+                empty_patch = self.patch_filter.filter_out(input_patch)
+                patch_filter_patience -= 1  # decrease patience
+            else:
+                empty_patch = False
+        return input_patch, target_patch, patch_spec
+    def __getitem__(
+        self, index: int
+    ) -> Union[tuple[ImageRegionData], tuple[ImageRegionData, ImageRegionData]]:
+        input_patch, target_patch, patch_spec = self._get_filtered_patch(index)
+        # apply transforms
+        if self.transforms is not None:
+            if self.target_extractor is not None:
+                input_patch, target_patch = self.transforms(input_patch, target_patch)
+            else:
+                # TODO: compose doesn't return None for target patch anymore
+                #   so have to do this annoying if else
+                (input_patch,) = self.transforms(input_patch, target_patch)
+                target_patch = None
+        input_data = self._create_image_region(
+            patch=input_patch, patch_spec=patch_spec, extractor=self.input_extractor
+        )
+        if target_patch is not None and self.target_extractor is not None:
+            target_data = self._create_image_region(
+                patch=target_patch,
+                patch_spec=patch_spec,
+                extractor=self.target_extractor,
+            )
+            return input_data, target_data
+        else:
+            return (input_data,)