PyPI - careamics - Versions diffs - 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl - Mend

careamics 0.0.10py3-none-any.whl → 0.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of careamics might be problematic. Click here for more details.

Files changed (54) hide show

careamics/careamist.py +20 -4
careamics/config/configuration.py +10 -5
careamics/config/data/data_model.py +38 -1
careamics/config/optimizer_models.py +1 -3
careamics/config/training_model.py +0 -2
careamics/dataset/dataset_utils/running_stats.py +7 -3
careamics/dataset_ng/README.md +212 -0
careamics/dataset_ng/dataset.py +233 -0
careamics/dataset_ng/demos/bsd68_demo.ipynb +356 -0
careamics/dataset_ng/demos/care_U2OS_demo.ipynb +330 -0
careamics/dataset_ng/demos/demo_custom_image_stack.ipynb +734 -0
careamics/dataset_ng/demos/demo_datamodule.ipynb +443 -0
careamics/dataset_ng/{demo_dataset.ipynb → demos/demo_dataset.ipynb} +39 -15
careamics/dataset_ng/{demo_patch_extractor.py → demos/demo_patch_extractor.py} +7 -9
careamics/dataset_ng/demos/mouse_nuclei_demo.ipynb +292 -0
careamics/dataset_ng/factory.py +408 -0
careamics/dataset_ng/legacy_interoperability.py +168 -0
careamics/dataset_ng/patch_extractor/__init__.py +3 -8
careamics/dataset_ng/patch_extractor/demo_custom_image_stack_loader.py +6 -4
careamics/dataset_ng/patch_extractor/image_stack/__init__.py +2 -1
careamics/dataset_ng/patch_extractor/image_stack/image_stack_protocol.py +5 -1
careamics/dataset_ng/patch_extractor/image_stack_loader.py +5 -75
careamics/dataset_ng/patch_extractor/patch_extractor.py +5 -4
careamics/dataset_ng/patch_extractor/patch_extractor_factory.py +73 -106
careamics/dataset_ng/patching_strategies/__init__.py +6 -1
careamics/dataset_ng/patching_strategies/patching_strategy_protocol.py +31 -0
careamics/dataset_ng/patching_strategies/random_patching.py +3 -1
careamics/dataset_ng/patching_strategies/tiling_strategy.py +171 -0
careamics/dataset_ng/patching_strategies/whole_sample.py +36 -0
careamics/lightning/dataset_ng/data_module.py +488 -0
careamics/lightning/dataset_ng/lightning_modules/__init__.py +9 -0
careamics/lightning/dataset_ng/lightning_modules/care_module.py +58 -0
careamics/lightning/dataset_ng/lightning_modules/n2v_module.py +67 -0
careamics/lightning/dataset_ng/lightning_modules/unet_module.py +143 -0
careamics/lightning/lightning_module.py +3 -0
careamics/lvae_training/dataset/__init__.py +8 -3
careamics/lvae_training/dataset/config.py +3 -3
careamics/lvae_training/dataset/ms_dataset_ref.py +1067 -0
careamics/lvae_training/dataset/multich_dataset.py +46 -17
careamics/lvae_training/dataset/multicrop_dset.py +196 -0
careamics/lvae_training/dataset/types.py +3 -3
careamics/lvae_training/dataset/utils/index_manager.py +259 -0
careamics/lvae_training/eval_utils.py +93 -3
careamics/transforms/compose.py +1 -0
careamics/transforms/normalize.py +18 -7
careamics/utils/lightning_utils.py +25 -11
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/METADATA +3 -3
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/RECORD +51 -36
careamics/dataset_ng/dataset/__init__.py +0 -3
careamics/dataset_ng/dataset/dataset.py +0 -184
careamics/dataset_ng/demo_patch_extractor_factory.py +0 -37
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/WHEEL +0 -0
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/entry_points.txt +0 -0
{careamics-0.0.10.dist-info → careamics-0.0.12.dist-info}/licenses/LICENSE +0 -0

careamics/lvae_training/dataset/multich_dataset.py CHANGED Viewed

@@ -340,25 +340,54 @@ class MultiChDloader:
         return self._data.shape[0]
     def reduce_data(
-        self, t_list=None, h_start=None, h_end=None, w_start=None, w_end=None
+        self,
+        t_list=None,
+        z_start=None,
+        z_end=None,
+        h_start=None,
+        h_end=None,
+        w_start=None,
+        w_end=None,
     ):
-        assert not self._5Ddata, "This function is not supported for 3D data."
-        if t_list is None:
-            t_list = list(range(self._data.shape[0]))
-        if h_start is None:
-            h_start = 0
-        if h_end is None:
-            h_end = self._data.shape[1]
-        if w_start is None:
-            w_start = 0
-        if w_end is None:
-            w_end = self._data.shape[2]
-        self._data = self._data[t_list, h_start:h_end, w_start:w_end, :].copy()
-        if self._noise_data is not None:
-            self._noise_data = self._noise_data[
-                t_list, h_start:h_end, w_start:w_end, :
+        if self._5Ddata:
+            if t_list is None:
+                t_list = list(range(self._data.shape[0]))
+            if z_start is None:
+                z_start = 0
+            if z_end is None:
+                z_end = self._data.shape[1]
+            if h_start is None:
+                h_start = 0
+            if h_end is None:
+                h_end = self._data.shape[2]
+            if w_start is None:
+                w_start = 0
+            if w_end is None:
+                w_end = self._data.shape[3]
+            self._data = self._data[
+                t_list, z_start:z_end, h_start:h_end, w_start:w_end, :
             ].copy()
+            if self._noise_data is not None:
+                self._noise_data = self._noise_data[
+                    t_list, z_start:z_end, h_start:h_end, w_start:w_end, :
+                ].copy()
+        else:
+            if t_list is None:
+                t_list = list(range(self._data.shape[0]))
+            if h_start is None:
+                h_start = 0
+            if h_end is None:
+                h_end = self._data.shape[1]
+            if w_start is None:
+                w_start = 0
+            if w_end is None:
+                w_end = self._data.shape[2]
+            self._data = self._data[t_list, h_start:h_end, w_start:w_end, :].copy()
+            if self._noise_data is not None:
+                self._noise_data = self._noise_data[
+                    t_list, h_start:h_end, w_start:w_end, :
+                ].copy()
         # TODO where tf is self._img_sz defined?
         self.set_img_sz([self._img_sz, self._img_sz], self._grid_sz)
         print(

careamics/lvae_training/dataset/multicrop_dset.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""
+Here, we have multiple folders, each containing images of a single channel.
+"""
+from collections import defaultdict
+from functools import cache
+import numpy as np
+from .types import DataSplitType
+def l2(x):
+    return np.sqrt(np.mean(np.array(x) ** 2))
+class MultiCropDset:
+    def __init__(
+        self,
+        data_config,
+        fpath: str,
+        load_data_fn=None,
+        val_fraction=None,
+        test_fraction=None,
+    ):
+        assert (
+            data_config.input_is_sum == True
+        ), "This dataset is designed for sum of images"
+        self._img_sz = data_config.image_size
+        self._enable_rotation = data_config.enable_rotation_aug
+        self._background_values = data_config.background_values
+        self._data = load_data_fn(
+            data_config, fpath, data_config.datasplit_type, val_fraction, test_fraction
+        )
+        # remove upper quantiles, crucial for removing puncta
+        self.max_val = data_config.max_val
+        if self.max_val is not None:
+            for ch_idx, data in enumerate(self._data):
+                if self.max_val[ch_idx] is not None:
+                    for idx in range(len(data)):
+                        data[idx][data[idx] > self.max_val[ch_idx]] = self.max_val[
+                            ch_idx
+                        ]
+        # remove background values
+        if self._background_values is not None:
+            final_data_arr = []
+            for ch_idx, data in enumerate(self._data):
+                data_float = [x.astype(np.float32) for x in data]
+                final_data_arr.append(
+                    [x - self._background_values[ch_idx] for x in data_float]
+                )
+            self._data = final_data_arr
+        print(
+            f"{self.__class__.__name__} N:{len(self)} Rot:{self._enable_rotation} Ch:{len(self._data)} MaxVal:{self.max_val} Bg:{self._background_values}"
+        )
+    def get_max_val(self):
+        return self.max_val
+    def compute_mean_std(self):
+        mean_tar_dict = defaultdict(list)
+        std_tar_dict = defaultdict(list)
+        mean_inp = []
+        std_inp = []
+        for _ in range(30000):
+            crops = []
+            for ch_idx in range(len(self._data)):
+                crop = self.sample_crop(ch_idx)
+                mean_tar_dict[ch_idx].append(np.mean(crop))
+                std_tar_dict[ch_idx].append(np.std(crop))
+                crops.append(crop)
+            inp = 0
+            for img in crops:
+                inp += img
+            mean_inp.append(np.mean(inp))
+            std_inp.append(np.std(inp))
+        output_mean = defaultdict(list)
+        output_std = defaultdict(list)
+        NC = len(self._data)
+        for ch_idx in range(NC):
+            output_mean["target"].append(np.mean(mean_tar_dict[ch_idx]))
+            output_std["target"].append(l2(std_tar_dict[ch_idx]))
+        output_mean["target"] = np.array(output_mean["target"]).reshape(NC, 1, 1)
+        output_std["target"] = np.array(output_std["target"]).reshape(NC, 1, 1)
+        output_mean["input"] = np.array([np.mean(mean_inp)]).reshape(1, 1, 1)
+        output_std["input"] = np.array([l2(std_inp)]).reshape(1, 1, 1)
+        return dict(output_mean), dict(output_std)
+    def set_mean_std(self, mean_dict, std_dict):
+        self._data_mean = mean_dict
+        self._data_std = std_dict
+    def get_mean_std(self):
+        return self._data_mean, self._data_std
+    def get_num_frames(self):
+        return len(self._data)
+    @cache
+    def crop_probablities(self, ch_idx):
+        sizes = np.array([np.prod(x.shape) for x in self._data[ch_idx]])
+        return sizes / sizes.sum()
+    def sample_crop(self, ch_idx):
+        idx = None
+        count = 0
+        while idx is None:
+            count += 1
+            idx = np.random.choice(
+                len(self._data[ch_idx]), p=self.crop_probablities(ch_idx)
+            )
+            data = self._data[ch_idx][idx]
+            if data.shape[0] >= self._img_sz[0] and data.shape[1] >= self._img_sz[1]:
+                h = np.random.randint(0, data.shape[0] - self._img_sz[0])
+                w = np.random.randint(0, data.shape[1] - self._img_sz[1])
+                return data[h : h + self._img_sz[0], w : w + self._img_sz[1]]
+            elif count > 100:
+                raise ValueError("Cannot find a valid crop")
+            else:
+                idx = None
+        return None
+    def len_per_channel(self, ch_idx):
+        return np.sum([np.prod(x.shape) for x in self._data[ch_idx]]) / np.prod(
+            self._img_sz
+        )
+    def imgs_for_patch(self):
+        return [self.sample_crop(ch_idx) for ch_idx in range(len(self._data))]
+    def __len__(self):
+        len_per_channel = [
+            self.len_per_channel(ch_idx) for ch_idx in range(len(self._data))
+        ]
+        return int(np.max(len_per_channel))
+    def _rotate(self, img_tuples):
+        return self._rotate2D(img_tuples)
+    def _rotate2D(self, img_tuples):
+        img_kwargs = {}
+        for i, img in enumerate(img_tuples):
+            for k in range(len(img)):
+                img_kwargs[f"img{i}_{k}"] = img[k]
+        keys = list(img_kwargs.keys())
+        self._rotation_transform.add_targets({k: "image" for k in keys})
+        rot_dic = self._rotation_transform(image=img_tuples[0][0], **img_kwargs)
+        rotated_img_tuples = []
+        for i, img in enumerate(img_tuples):
+            if len(img) == 1:
+                rotated_img_tuples.append(rot_dic[f"img{i}_0"][None])
+            else:
+                rotated_img_tuples.append(
+                    np.concatenate(
+                        [rot_dic[f"img{i}_{k}"][None] for k in range(len(img))], axis=0
+                    )
+                )
+        return rotated_img_tuples
+    def _compute_input(self, imgs):
+        inp = 0
+        for img in imgs:
+            inp += img
+        inp = inp[None]
+        inp = (inp - self._data_mean["input"]) / (self._data_std["input"])
+        return inp
+    def _compute_target(self, imgs):
+        imgs = np.stack(imgs)
+        target = (imgs - self._data_mean["target"]) / (self._data_std["target"])
+        return target
+    def __getitem__(self, idx):
+        imgs = self.imgs_for_patch()
+        if self._enable_rotation:
+            imgs = self._rotate(imgs)
+        inp = self._compute_input(imgs)
+        target = self._compute_target(imgs)
+        return inp, target

careamics/lvae_training/dataset/types.py CHANGED Viewed

@@ -2,9 +2,9 @@ from enum import Enum
 class DataType(Enum):
-    Elisa3DData = 0
+    HTH24Data = 0
     HTLIF24Data = 1
-    Pavia3SeqData = 2
+    PaviaP24Data = 2
     TavernaSox2GolgiV2 = 3
     Dao3ChannelWithInput = 4
     ExpMicroscopyV1 = 5
@@ -15,7 +15,7 @@ class DataType(Enum):
     OptiMEM100_014 = 10
     SeparateTiffData = 11
     BioSR_MRC = 12
-    PunctaRemoval = 13  # for the case when we have a set of differently sized crops for each channel.
+    HTH23BData = 13  # puncta, in case we have differently sized crops for each channel.
     Care3D = 14

careamics/lvae_training/dataset/utils/index_manager.py CHANGED Viewed

@@ -230,3 +230,262 @@ class GridIndexManager:
         new_idx = dataset_idx - self.grid_count(dim)
         if new_idx < 0:
             return None
+@dataclass
+class GridIndexManagerRef:
+    data_shapes: tuple
+    grid_shape: tuple
+    patch_shape: tuple
+    tiling_mode: TilingMode
+    # This class is used to calculate and store information about patches, and calculate
+    # the total length of the dataset in patches.
+    # It introduces a concept of a grid, to which input images are split.
+    # The grid is defined by the grid_shape and patch_shape, with former controlling the
+    # overlap.
+    # In this reimplementation it can accept multiple channels with different lengths,
+    # and every image can have different shape.
+    def __post_init__(self):
+        if len(self.data_shapes) > 1:
+            assert {len(ds) for ds in self.data_shapes[0]}.pop() == {
+                len(ds) for ds in self.data_shapes[1]
+            }.pop(), "Data shape for all channels must be the same"  # TODO better way to assert this
+        assert {len(ds) for ds in self.data_shapes[0]}.pop() == len(
+            self.grid_shape
+        ), "Data shape and grid size must have the same dimension"
+        assert {len(ds) for ds in self.data_shapes[0]}.pop() == len(
+            self.patch_shape
+        ), "Data shape and patch shape must have the same dimension"
+        innerpad = np.array(self.patch_shape) - np.array(self.grid_shape)
+        for dim, pad in enumerate(innerpad):
+            if pad < 0:
+                raise ValueError(
+                    f"Patch shape must be greater than or equal to grid shape in dimension {dim}"
+                )
+            if pad % 2 != 0:
+                raise ValueError(
+                    f"Patch shape must have even padding in dimension {dim}"
+                )
+        self.num_patches_per_channel = self.total_grid_count()[1]
+    def patch_offset(self):
+        return (np.array(self.patch_shape) - np.array(self.grid_shape)) // 2
+    def get_individual_dim_grid_count(self, shape: tuple, dim: int):
+        """
+        Returns the number of the grid in the specified dimension, ignoring all other dimensions.
+        """
+        # assert that dim is less than the number of dimensions in data shape
+        # if dim > len()
+        if self.grid_shape[dim] == 1 and self.patch_shape[dim] == 1:
+            return shape[dim]
+        elif self.tiling_mode == TilingMode.PadBoundary:
+            return int(np.ceil(shape[dim] / self.grid_shape[dim]))
+        elif self.tiling_mode == TilingMode.ShiftBoundary:
+            excess_size = self.patch_shape[dim] - self.grid_shape[dim]
+            return int(np.ceil((shape[dim] - excess_size) / self.grid_shape[dim]))
+            # if dim_index < self.get_individual_dim_grid_count(dim) - 1:
+            #         return dim_index * self.grid_shape[dim] + excess_size
+            # on boundary. grid should be placed such that the patch covers the entire data.
+            # return self.data_shape[dim] - self.grid_shape[dim] - excess_size
+        else:
+            excess_size = self.patch_shape[dim] - self.grid_shape[dim]
+            return int(np.floor((shape[dim] - excess_size) / self.grid_shape[dim]))
+    def total_grid_count(self):
+        """Returns the total number of patches in the dataset."""
+        len_per_channel = []
+        num_patches_per_sample = []
+        for channel_data in self.data_shapes:
+            num_patches = []
+            for file_shape in channel_data:
+                num_patches.append(np.prod(self.grid_count_per_sample(file_shape)))
+            len_per_channel.append(np.sum(num_patches))
+            num_patches_per_sample.append(num_patches)
+        return len_per_channel, num_patches_per_sample
+    def grid_count_per_sample(self, shape: tuple):
+        """Returns the total number of patches for one dimension."""
+        grid_count = []
+        for dim in range(len(shape)):
+            grid_count.append(self.get_individual_dim_grid_count(shape, dim))
+        return grid_count
+    def get_grid_index(self, shape, dim: int, coordinate: int):
+        """Returns the index of the patch in the specified dimension."""
+        assert dim < len(
+            shape
+        ), f"Dimension {dim} is out of bounds for data shape {shape}"
+        assert dim >= 0, "Dimension must be greater than or equal to 0"
+        assert (
+            coordinate < shape[dim]
+        ), f"Coordinate {coordinate} is out of bounds for data shape {shape}"
+        if self.grid_shape[dim] == 1 and self.patch_shape[dim] == 1:
+            return coordinate
+        elif self.tiling_mode == TilingMode.PadBoundary:  # self.trim_boundary is False:
+            return np.floor(coordinate / self.grid_shape[dim])
+        elif self.tiling_mode == TilingMode.TrimBoundary:
+            excess_size = (self.patch_shape[dim] - self.grid_shape[dim]) // 2
+            # can be <0 if coordinate is in [0,grid_shape[dim]]
+            return max(0, np.floor((coordinate - excess_size) / self.grid_shape[dim]))
+        elif self.tiling_mode == TilingMode.ShiftBoundary:
+            excess_size = (self.patch_shape[dim] - self.grid_shape[dim]) // 2
+            if coordinate + self.grid_shape[dim] + excess_size == self.data_shapes[dim]:
+                return self.get_individual_dim_grid_count(shape, dim) - 1
+            else:
+                # can be <0 if coordinate is in [0,grid_shape[dim]]
+                return max(
+                    0, np.floor((coordinate - excess_size) / self.grid_shape[dim])
+                )
+        else:
+            raise ValueError(f"Unsupported tiling mode {self.tiling_mode}")
+    def patch_idx_from_grid_idx(self, shape: tuple, grid_idx: tuple):
+        """Returns the index of the patch in the dataset."""
+        assert len(grid_idx) == len(
+            shape
+        ), f"Dimension indices {grid_idx} must have the same dimension as data shape {shape}"
+        index = 0
+        for dim in range(len(grid_idx)):
+            index += grid_idx[dim] * self.grid_count(shape, dim)
+        return index
+    def get_patch_location_from_patch_idx(self, ch_idx: int, patch_idx: int):
+        """Returns the patch location of the grid in the dataset."""
+        grid_location = self.get_location_from_patch_idx(ch_idx, patch_idx)
+        offset = self.patch_offset()
+        return tuple(np.array(grid_location) - np.concatenate((np.array((0,)), offset)))
+    def get_patch_idx_from_grid_location(self, shape, location: tuple):
+        assert len(location) == len(
+            shape
+        ), f"Location {location} must have the same dimension as data shape {shape}"
+        grid_idx = [
+            self.get_grid_index(dim, location[dim]) for dim in range(len(location))
+        ]
+        return self.patch_idx_from_grid_idx(tuple(grid_idx))
+    def get_gridstart_location_from_dim_index(
+        self, shape: tuple, dim_idx: int, dim: int
+    ):
+        """Returns the grid-start coordinate of the grid in the specified dimension.
+        dim_idx: int
+            Index of the dimension in the data shape.
+        dim: int
+            Value of the dimension in the grid (relative to num patches in dimension).
+        """
+        if self.grid_shape[dim_idx] == 1 and self.patch_shape[dim_idx] == 1:
+            return dim_idx
+        elif self.tiling_mode == TilingMode.ShiftBoundary:
+            excess_size = (self.patch_shape[dim_idx] - self.grid_shape[dim_idx]) // 2
+            if dim < self.get_individual_dim_grid_count(shape, dim_idx) - 1:
+                return dim * self.grid_shape[dim_idx] + excess_size
+            else:
+                # on boundary. grid should be placed such that the patch covers the entire data.
+                return shape[dim_idx] - self.grid_shape[dim_idx] - excess_size
+        else:
+            raise ValueError(f"Unsupported tiling mode {self.tiling_mode}")
+    def get_location_from_patch_idx(self, channel_idx: int, patch_idx: int):
+        """
+        Returns the start location of the grid in the dataset. Per channel!.
+        Parameters
+        ----------
+        patch_idx : int
+            The index of the patch in a list of samples within a channel. Channels can
+            be different in length.
+        """
+        # TODO assert patch_idx <= num of patches in the channel
+        # create cumulative sum of the grid counts for each channel
+        cumulative_indices = np.cumsum(self.total_grid_count()[1][channel_idx])
+        # find the channel index
+        sample_idx = np.searchsorted(cumulative_indices, patch_idx, side="right")
+        sample_shape = self.data_shapes[channel_idx][sample_idx]
+        # TODO duplicated runs, revisit
+        # ingoring the channel dimension because we index it explicitly
+        grid_count = self.grid_count_per_sample(sample_shape)[1:]
+        grid_idx = []
+        for i in range(len(grid_count) - 1, -1, -1):
+            stride = np.prod(grid_count[:i]) if i > 0 else 1
+            grid_idx.insert(0, patch_idx // stride)
+            patch_idx %= stride
+        # TODO check for 3D !
+        # adding channel index
+        grid_idx = [channel_idx] + grid_idx
+        location = [
+            sample_idx,
+        ] + [
+            self.get_gridstart_location_from_dim_index(
+                shape=sample_shape, dim_idx=dim_idx, dim=grid_idx[dim_idx]
+            )
+            for dim_idx in range(len(grid_idx))
+        ]
+        return tuple(location)
+    def get_location_from_patch_idx_o(self, dataset_idx: int):
+        """
+        Returns the start location of the grid in the dataset.
+        """
+        grid_idx = []
+        for dim in range(len(self.data_shape)):
+            grid_idx.append(dataset_idx // self.grid_count(dim))
+            dataset_idx = dataset_idx % self.grid_count(dim)
+        location = [
+            self.get_gridstart_location_from_dim_index(dim, grid_idx[dim])
+            for dim in range(len(self.data_shape))
+        ]
+        return tuple(location)
+    def on_boundary(self, dataset_idx: int, dim: int, only_end: bool = False):
+        """
+        Returns True if the grid is on the boundary in the specified dimension.
+        """
+        assert dim < len(
+            self.data_shapes
+        ), f"Dimension {dim} is out of bounds for data shape {self.data_shapes}"
+        assert dim >= 0, "Dimension must be greater than or equal to 0"
+        if dim > 0:
+            dataset_idx = dataset_idx % self.grid_count(dim - 1)
+        dim_index = dataset_idx // self.grid_count(dim)
+        if only_end:
+            return dim_index == self.get_individual_dim_grid_count(dim) - 1
+        return (
+            dim_index == 0 or dim_index == self.get_individual_dim_grid_count(dim) - 1
+        )
+    def next_grid_along_dim(self, dataset_idx: int, dim: int):
+        """
+        Returns the index of the grid in the specified dimension in the specified direction.
+        """
+        assert dim < len(
+            self.data_shapes
+        ), f"Dimension {dim} is out of bounds for data shape {self.data_shapes}"
+        assert dim >= 0, "Dimension must be greater than or equal to 0"
+        new_idx = dataset_idx + self.grid_count(dim)
+        if new_idx >= self.total_grid_count():
+            return None
+        return new_idx
+    def prev_grid_along_dim(self, dataset_idx: int, dim: int):
+        """
+        Returns the index of the grid in the specified dimension in the specified direction.
+        """
+        assert dim < len(
+            self.data_shapes
+        ), f"Dimension {dim} is out of bounds for data shape {self.data_shapes}"
+        assert dim >= 0, "Dimension must be greater than or equal to 0"
+        new_idx = dataset_idx - self.grid_count(dim)
+        if new_idx < 0:
+            return None

careamics/lvae_training/eval_utils.py CHANGED Viewed

@@ -14,10 +14,11 @@ import matplotlib.pyplot as plt
 import numpy as np
 import torch
 from matplotlib.gridspec import GridSpec
-from torch.utils.data import DataLoader, Dataset, Subset
+from torch.utils.data import DataLoader, Dataset
 from tqdm import tqdm
 from careamics.lightning import VAEModule
+from careamics.lvae_training.dataset import MultiChDloaderRef
 from careamics.utils.metrics import scale_invariant_psnr
@@ -542,7 +543,9 @@ def get_predictions(
             mmse_count=mmse_count,
             num_workers=num_workers,
         )
+        # TODO stitching still not working properly for weirdly shaped images
         # get filename without extension and path
+        # TODO in the ref ds this is the name of a folder not file :(
         filename = dset._fpath.name
         return (
             {filename: stitched_predictions},
@@ -656,8 +659,14 @@ def get_single_file_mmse(
     tiles_arr = np.concatenate(tile_mmse, axis=0)
     tile_stds = np.concatenate(tile_stds, axis=0)
-    stitched_predictions = stitch_predictions_new(tiles_arr, dset)
-    stitched_stds = stitch_predictions_new(tile_stds, dset)
+    # TODO temporary hack, because of the stupid jupyter!
+    # If a user reruns a cell with class definition, isinstance will return False
+    if str(MultiChDloaderRef).split(".")[-1] == str(dset.__class__).split(".")[-1]:
+        stitch_func = stitch_predictions_general
+    else:
+        stitch_func = stitch_predictions_new
+    stitched_predictions = stitch_func(tiles_arr, dset)
+    stitched_stds = stitch_func(tile_stds, dset)
     return stitched_predictions, stitched_stds
@@ -873,3 +882,84 @@ def stitch_predictions_new(predictions, dset):
                 raise ValueError(f"Unsupported shape {output.shape}")
     return output
+def stitch_predictions_general(predictions, dset):
+    """Stitching for the dataset with multiple files of different shape."""
+    mng = dset.idx_manager
+    # TODO assert all shapes are equal len
+    # adjust number of channels to match with prediction shape #TODO ugly, refac!
+    shapes = []
+    for shape in dset.get_data_shapes()[0]:
+        shapes.append((predictions.shape[1],) + shape[1:])
+    output = [np.zeros(shape, dtype=predictions.dtype) for shape in shapes]
+    # frame_shape = dset.get_data_shape()[:-1]
+    for patch_idx in range(predictions.shape[0]):
+        # grid start, grid end
+        # channel_idx is 0 because during prediction we're only use one channel. # TODO revisit this
+        # 0th dimension is sample index in the output list
+        grid_coords = np.array(
+            mng.get_location_from_patch_idx(channel_idx=0, patch_idx=patch_idx),
+            dtype=int,
+        )
+        sample_idx = grid_coords[0]
+        grid_start = grid_coords[1:]
+        # from here on, coordinates are relative to the sample(file in the list of inputs)
+        grid_end = grid_start + mng.grid_shape
+        # patch start, patch end
+        patch_start = grid_start - mng.patch_offset()
+        patch_end = patch_start + mng.patch_shape
+        # valid grid start, valid grid end
+        valid_grid_start = np.array([max(0, x) for x in grid_start], dtype=int)
+        valid_grid_end = np.array(
+            [min(x, y) for x, y in zip(grid_end, shapes[sample_idx])], dtype=int
+        )
+        if mng.tiling_mode == TilingMode.ShiftBoundary:
+            for dim in range(len(valid_grid_start)):
+                if patch_start[dim] == 0:
+                    valid_grid_start[dim] = 0
+                if patch_end[dim] == mng.data_shape[dim]:
+                    valid_grid_end[dim] = mng.data_shape[dim]
+        # relative start, relative end. This will be used on pred_tiled
+        relative_start = valid_grid_start - patch_start
+        relative_end = relative_start + (valid_grid_end - valid_grid_start)
+        for ch_idx in range(predictions.shape[1]):
+            if len(output[sample_idx].shape) == 3:
+                # starting from 1 because 0th dimension is channel relative to input
+                # channel dimension for stitched output is relative to model output
+                output[sample_idx][
+                    ch_idx,
+                    valid_grid_start[1] : valid_grid_end[1],
+                    valid_grid_start[2] : valid_grid_end[2],
+                ] = predictions[patch_idx][
+                    ch_idx,
+                    relative_start[1] : relative_end[1],
+                    relative_start[2] : relative_end[2],
+                ]
+            elif len(output[sample_idx].shape) == 4:
+                assert (
+                    valid_grid_end[0] - valid_grid_start[0] == 1
+                ), "Only one frame is supported"
+                output[
+                    ch_idx,
+                    valid_grid_start[0],
+                    valid_grid_end[1] : valid_grid_end[1],
+                    valid_grid_start[2] : valid_grid_end[2],
+                    valid_grid_start[3] : valid_grid_end[3],
+                ] = predictions[patch_idx][
+                    ch_idx,
+                    relative_start[1] : relative_end[1],
+                    relative_start[2] : relative_end[2],
+                    relative_start[3] : relative_end[3],
+                ]
+            else:
+                raise ValueError(f"Unsupported shape {output.shape}")
+    return output

careamics/transforms/compose.py CHANGED Viewed

@@ -86,6 +86,7 @@ class Compose:
             *params, _ = t(*params)  # ignore additional_arrays dict
         # avoid None values that create problems for collating
+        # TODO: removing None should be handled in dataset, not here
         return tuple(p for p in params if p is not None)
     def _chain_transforms_additional_arrays(

careamics 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

Potentially problematic release.

careamics 0.0.10py3-none-any.whl → 0.0.12py3-none-any.whl