PyPI - kaiko-eva - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

kaiko-eva 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kaiko-eva might be problematic. Click here for more details.

Files changed (85) hide show

eva/vision/data/datasets/segmentation/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Segmentation datasets API."""
-from eva.vision.data.datasets.segmentation.base import ImageSegmentation
 from eva.vision.data.datasets.segmentation.bcss import BCSS
+from eva.vision.data.datasets.segmentation.btcv import BTCV
 from eva.vision.data.datasets.segmentation.consep import CoNSeP
 from eva.vision.data.datasets.segmentation.embeddings import EmbeddingsSegmentationDataset
 from eva.vision.data.datasets.segmentation.lits import LiTS
@@ -10,8 +10,8 @@ from eva.vision.data.datasets.segmentation.monusac import MoNuSAC
 from eva.vision.data.datasets.segmentation.total_segmentator_2d import TotalSegmentator2D
 __all__ = [
-    "ImageSegmentation",
     "BCSS",
+    "BTCV",
     "CoNSeP",
     "EmbeddingsSegmentationDataset",
     "LiTS",

eva/vision/data/datasets/segmentation/_utils.py CHANGED Viewed

@@ -1,8 +1,12 @@
 from typing import Any, Tuple
 import numpy.typing as npt
+import torch
+from torchvision import tv_tensors
+from eva.vision.data import tv_tensors as eva_tv_tensors
 from eva.vision.data.datasets import wsi
+from eva.vision.utils import io
 def get_coords_at_index(
@@ -36,3 +40,46 @@ def extract_mask_patch(
     """
     (x, y), width, height = get_coords_at_index(dataset, index)
     return mask[y : y + height, x : x + width]
+def load_volume_tensor(file: str, orientation: str = "PLS") -> eva_tv_tensors.Volume:
+    """Load a volume from NIfTI file as :class:`eva.vision.data.tv_tensors.Volume`.
+    Args:
+        file: The path to the NIfTI file.
+        orientation: The orientation code to reorient the nifti image.
+    Returns:
+        Volume tensor representing of shape `[T, C, H, W]`.
+    """
+    nii = io.read_nifti(file, orientation=orientation)
+    array = io.nifti_to_array(nii)
+    array_reshaped_tchw = array[None, :, :, :].transpose(3, 0, 1, 2)
+    if nii.affine is None:
+        raise ValueError(f"Affine matrix is missing for {file}.")
+    affine = torch.tensor(nii.affine[:, [2, 0, 1, 3]], dtype=torch.float32)
+    return eva_tv_tensors.Volume(
+        array_reshaped_tchw, affine=affine, dtype=torch.float32
+    )  # type: ignore
+def load_mask_tensor(
+    file: str, volume_file: str | None = None, orientation: str = "PLS"
+) -> tv_tensors.Mask:
+    """Load a volume from NIfTI file as :class:`torchvision.tv_tensors.Mask`.
+    Args:
+        file: The path to the NIfTI file containing the mask.
+        volume_file: The path to the volume file used as orientation reference in case
+            the mask file is missing the pixdim array in the NIfTI header.
+        orientation: The orientation code to reorient the nifti image.
+    Returns:
+        Mask tensor of shape `[T, C, H, W]`.
+    """
+    nii = io.read_nifti(file, orientation="PLS", orientation_reference=volume_file)
+    array = io.nifti_to_array(nii)
+    array_reshaped_tchw = array[None, :, :, :].transpose(3, 0, 1, 2)
+    return tv_tensors.Mask(array_reshaped_tchw, dtype=torch.long)  # type: ignore

eva/vision/data/datasets/segmentation/bcss.py CHANGED Viewed

@@ -12,13 +12,13 @@ from torchvision import tv_tensors
 from torchvision.transforms.v2 import functional
 from typing_extensions import override
-from eva.vision.data.datasets import _validators, wsi
-from eva.vision.data.datasets.segmentation import _utils, base
+from eva.vision.data.datasets import _validators, vision, wsi
+from eva.vision.data.datasets.segmentation import _utils
 from eva.vision.data.wsi.patching import samplers
 from eva.vision.utils import io
-class BCSS(wsi.MultiWsiDataset, base.ImageSegmentation):
+class BCSS(wsi.MultiWsiDataset, vision.VisionDataset[tv_tensors.Image, tv_tensors.Mask]):
     """Dataset class for BCSS semantic segmentation task.
     Source: https://github.com/PathologyDataScience/BCSS
@@ -71,7 +71,6 @@ class BCSS(wsi.MultiWsiDataset, base.ImageSegmentation):
             width: Width of the patches to be extracted, in pixels.
             height: Height of the patches to be extracted, in pixels.
             target_mpp: Target microns per pixel (mpp) for the patches.
-            backend: The backend to use for reading the whole-slide images.
             transforms: Transforms to apply to the extracted image & mask patches.
         """
         self._split = split
@@ -90,7 +89,7 @@ class BCSS(wsi.MultiWsiDataset, base.ImageSegmentation):
             overwrite_mpp=0.25,
             backend="pil",
         )
-        base.ImageSegmentation.__init__(self, transforms=transforms)
+        vision.VisionDataset.__init__(self, transforms=transforms)
     @property
     @override
@@ -129,15 +128,15 @@ class BCSS(wsi.MultiWsiDataset, base.ImageSegmentation):
     @override
     def __getitem__(self, index: int) -> Tuple[tv_tensors.Image, tv_tensors.Mask, Dict[str, Any]]:
-        return base.ImageSegmentation.__getitem__(self, index)
+        return vision.VisionDataset.__getitem__(self, index)
     @override
-    def load_image(self, index: int) -> tv_tensors.Image:
+    def load_data(self, index: int) -> tv_tensors.Image:
         image_array = wsi.MultiWsiDataset.__getitem__(self, index)
         return functional.to_image(image_array)
     @override
-    def load_mask(self, index: int) -> tv_tensors.Mask:
+    def load_target(self, index: int) -> tv_tensors.Mask:
         path = self._get_mask_path(index)
         mask = io.read_image_as_array(path)
         mask_patch = _utils.extract_mask_patch(mask, self, index)

eva/vision/data/datasets/segmentation/btcv.py ADDED Viewed

@@ -0,0 +1,236 @@
+"""BTCV dataset."""
+import glob
+import os
+import re
+from typing import Any, Callable, Dict, List, Literal, Tuple
+import huggingface_hub
+from torchvision import tv_tensors
+from torchvision.datasets import utils as data_utils
+from typing_extensions import override
+from eva.vision.data import tv_tensors as eva_tv_tensors
+from eva.vision.data.datasets import _utils as _data_utils
+from eva.vision.data.datasets.segmentation import _utils
+from eva.vision.data.datasets.vision import VisionDataset
+class BTCV(VisionDataset[eva_tv_tensors.Volume, tv_tensors.Mask]):
+    """Beyond the Cranial Vault (BTCV) Abdomen dataset.
+    The BTCV dataset comprises abdominal CT acquired at the Vanderbilt
+    University Medical Center from metastatic liver cancer patients or
+    post-operative ventral hernia patients. The dataset contains one
+    background class and thirteen organ classes.
+    More info:
+      - Multi-organ Abdominal CT Reference Standard Segmentations
+        https://zenodo.org/records/1169361
+      - Dataset Split
+        https://github.com/Luffy03/Large-Scale-Medical/blob/main/Downstream/monai/BTCV/dataset/dataset_0.json
+    """
+    _split_index_ranges = {
+        "train": [(0, 24)],
+        "val": [(24, 30)],
+        None: [(0, 30)],
+    }
+    """Sample indices for the dataset splits."""
+    def __init__(
+        self,
+        root: str,
+        split: Literal["train", "val"] | None = None,
+        download: bool = False,
+        transforms: Callable | None = None,
+    ) -> None:
+        """Initializes the dataset.
+        Args:
+            root: Path to the dataset root directory.
+            split: Dataset split to use ('train' or 'val').
+                If None, it uses the full dataset.
+            download: Whether to download the dataset.
+            transforms: A callable object for applying data transformations.
+                If None, no transformations are applied.
+        """
+        super().__init__(transforms=transforms)
+        self._root = root
+        self._split = split
+        self._download = download
+        self._samples: List[Tuple[str, str]]
+        self._indices: List[int]
+    @property
+    @override
+    def classes(self) -> List[str]:
+        return [
+            "background",
+            "spleen",
+            "right_kidney",
+            "left_kidney",
+            "gallbladder",
+            "esophagus",
+            "liver",
+            "stomach",
+            "aorta",
+            "inferior_vena_cava",
+            "portal_and_splenic_vein",
+            "pancreas",
+            "right_adrenal_gland",
+            "left_adrenal_gland",
+        ]
+    @property
+    @override
+    def class_to_idx(self) -> Dict[str, int]:
+        return {label: index for index, label in enumerate(self.classes)}
+    @override
+    def filename(self, index: int) -> str:
+        return os.path.basename(self._samples[self._indices[index]][0])
+    @override
+    def prepare_data(self) -> None:
+        if self._download:
+            self._download_dataset()
+    @override
+    def configure(self) -> None:
+        self._samples = self._find_samples()
+        self._indices = self._make_indices()
+    @override
+    def validate(self) -> None:
+        def _valid_sample(index: int) -> bool:
+            """Indicates if the sample files exist and are reachable."""
+            volume_file, segmentation_file = self._samples[self._indices[index]]
+            return os.path.isfile(volume_file) and os.path.isfile(segmentation_file)
+        if len(self._samples) < len(self._indices):
+            raise OSError(f"Dataset is missing {len(self._indices) - len(self._samples)} files.")
+        invalid_samples = [self._samples[i] for i in range(len(self)) if not _valid_sample(i)]
+        if invalid_samples:
+            raise OSError(
+                f"Dataset '{self.__class__.__qualname__}' contains missing or "
+                f"corrupted samples  ({len(invalid_samples)} in total). "
+                f"Examples of missing folders: {str(invalid_samples[:10])[:-1]}, ...]. "
+            )
+    @override
+    def __getitem__(
+        self, index: int
+    ) -> tuple[eva_tv_tensors.Volume, tv_tensors.Mask, dict[str, Any]]:
+        volume = self.load_data(index)
+        mask = self.load_target(index)
+        metadata = self.load_metadata(index) or {}
+        volume_tensor, mask_tensor = self._apply_transforms(volume, mask)
+        return volume_tensor, mask_tensor, metadata
+    @override
+    def __len__(self) -> int:
+        return len(self._indices)
+    @override
+    def load_data(self, index: int) -> eva_tv_tensors.Volume:
+        """Loads the CT volume for a given sample.
+        Args:
+            index: The index of the desired sample.
+        Returns:
+            Tensor representing the CT volume of shape `[T, C, H, W]`.
+        """
+        ct_scan_file, _ = self._samples[self._indices[index]]
+        return _utils.load_volume_tensor(ct_scan_file)
+    @override
+    def load_target(self, index: int) -> tv_tensors.Mask:
+        """Loads the segmentation mask for a given sample.
+        Args:
+            index: The index of the desired sample.
+        Returns:
+            Tensor representing the segmentation mask of shape `[T, C, H, W]`.
+        """
+        ct_scan_file, mask_file = self._samples[self._indices[index]]
+        return _utils.load_mask_tensor(mask_file, ct_scan_file)
+    def _apply_transforms(
+        self, ct_scan: eva_tv_tensors.Volume, mask: tv_tensors.Mask
+    ) -> tuple[eva_tv_tensors.Volume, tv_tensors.Mask]:
+        """Applies transformations to the provided data.
+        Args:
+            ct_scan: The CT volume tensor.
+            mask: The segmentation mask tensor.
+        Returns:
+            A tuple containing the transformed CT and mask tensors.
+        """
+        return self._transforms(ct_scan, mask) if self._transforms else (ct_scan, mask)
+    def _find_samples(self) -> list[tuple[str, str]]:
+        """Retrieves the file paths for the CT volumes and segmentation.
+        Returns:
+            The a list of file path to the CT volumes and segmentation.
+        """
+        def filename_id(filename: str) -> int:
+            matches = re.match(r".*(?:\D|^)(\d+)", filename)
+            if matches is None:
+                raise ValueError(f"Filename '{filename}' is not valid.")
+            return int(matches.group(1))
+        subdir = os.path.join(self._root, "BTCV")
+        root = subdir if os.path.isdir(subdir) else self._root
+        volume_files_pattern = os.path.join(root, "imagesTr", "*.nii.gz")
+        volume_filenames = glob.glob(volume_files_pattern)
+        volume_ids = {filename_id(filename): filename for filename in volume_filenames}
+        segmentation_files_pattern = os.path.join(root, "labelsTr", "*.nii.gz")
+        segmentation_filenames = glob.glob(segmentation_files_pattern)
+        segmentation_ids = {filename_id(filename): filename for filename in segmentation_filenames}
+        return [
+            (volume_ids[file_id], segmentation_ids[file_id])
+            for file_id in sorted(volume_ids.keys() & segmentation_ids.keys())
+        ]
+    def _make_indices(self) -> list[int]:
+        """Builds the dataset indices for the specified split."""
+        index_ranges = self._split_index_ranges.get(self._split)
+        if index_ranges is None:
+            raise ValueError("Invalid data split. Use 'train', 'val' or `None`.")
+        return _data_utils.ranges_to_indices(index_ranges)
+    def _download_dataset(self) -> None:
+        hf_token = os.getenv("HF_TOKEN")
+        if not hf_token:
+            raise ValueError("Huggingface token required, please set the HF_TOKEN env variable.")
+        huggingface_hub.snapshot_download(
+            "Luffy503/VoCo_Downstream",
+            repo_type="dataset",
+            token=hf_token,
+            local_dir=self._root,
+            ignore_patterns=[".git*"],
+            allow_patterns=["BTCV.zip"],
+        )
+        zip_path = os.path.join(self._root, "BTCV.zip")
+        if not os.path.exists(zip_path):
+            raise FileNotFoundError(
+                f"BTCV.zip not found in {self._root}, something with the download went wrong."
+            )
+        data_utils.extract_archive(zip_path, self._root, remove_finished=True)

eva/vision/data/datasets/segmentation/consep.py CHANGED Viewed

@@ -11,13 +11,13 @@ from torchvision import tv_tensors
 from torchvision.transforms.v2 import functional
 from typing_extensions import override
-from eva.vision.data.datasets import _validators, wsi
-from eva.vision.data.datasets.segmentation import _utils, base
+from eva.vision.data.datasets import _validators, vision, wsi
+from eva.vision.data.datasets.segmentation import _utils
 from eva.vision.data.wsi.patching import samplers
 from eva.vision.utils import io
-class CoNSeP(wsi.MultiWsiDataset, base.ImageSegmentation):
+class CoNSeP(wsi.MultiWsiDataset, vision.VisionDataset[tv_tensors.Image, tv_tensors.Mask]):
     """Dataset class for CoNSeP semantic segmentation task.
     As in [1], we combine classes 3 (healthy epithelial) & 4 (dysplastic/malignant epithelial)
@@ -55,7 +55,6 @@ class CoNSeP(wsi.MultiWsiDataset, base.ImageSegmentation):
             width: Width of the patches to be extracted, in pixels.
             height: Height of the patches to be extracted, in pixels.
             target_mpp: Target microns per pixel (mpp) for the patches.
-            backend: The backend to use for reading the whole-slide images.
             transforms: Transforms to apply to the extracted image & mask patches.
         """
         self._split = split
@@ -112,15 +111,15 @@ class CoNSeP(wsi.MultiWsiDataset, base.ImageSegmentation):
     @override
     def __getitem__(self, index: int) -> Tuple[tv_tensors.Image, tv_tensors.Mask, Dict[str, Any]]:
-        return base.ImageSegmentation.__getitem__(self, index)
+        return vision.VisionDataset.__getitem__(self, index)
     @override
-    def load_image(self, index: int) -> tv_tensors.Image:
+    def load_data(self, index: int) -> tv_tensors.Image:
         image_array = wsi.MultiWsiDataset.__getitem__(self, index)
         return functional.to_image(image_array)
     @override
-    def load_mask(self, index: int) -> tv_tensors.Mask:
+    def load_target(self, index: int) -> tv_tensors.Mask:
         path = self._get_mask_path(index)
         mask = np.array(io.read_mat(path)["type_map"])
         mask_patch = _utils.extract_mask_patch(mask, self, index)

eva/vision/data/datasets/segmentation/lits.py CHANGED Viewed

@@ -13,12 +13,11 @@ from typing_extensions import override
 from eva.core import utils
 from eva.core.data import splitting
-from eva.vision.data.datasets import _validators
-from eva.vision.data.datasets.segmentation import base
+from eva.vision.data.datasets import _validators, vision
 from eva.vision.utils import io
-class LiTS(base.ImageSegmentation):
+class LiTS(vision.VisionDataset[tv_tensors.Image, tv_tensors.Mask]):
     """LiTS - Liver Tumor Segmentation Challenge.
     Webpage: https://competitions.codalab.org/competitions/17094
@@ -110,21 +109,23 @@ class LiTS(base.ImageSegmentation):
         )
     @override
-    def load_image(self, index: int) -> tv_tensors.Image:
+    def load_data(self, index: int) -> tv_tensors.Image:
         sample_index, slice_index = self._indices[index]
         volume_path = self._volume_files[sample_index]
-        image_array = io.read_nifti(volume_path, slice_index)
+        image_nii = io.read_nifti(volume_path, slice_index)
+        image_array = io.nifti_to_array(image_nii)
         if self._fix_orientation:
             image_array = self._orientation(image_array, sample_index)
         return tv_tensors.Image(image_array.transpose(2, 0, 1))
     @override
-    def load_mask(self, index: int) -> tv_tensors.Mask:
+    def load_target(self, index: int) -> tv_tensors.Mask:
         sample_index, slice_index = self._indices[index]
         segmentation_path = self._segmentation_file(sample_index)
-        semantic_labels = io.read_nifti(segmentation_path, slice_index)
+        mask_nii = io.read_nifti(segmentation_path, slice_index)
+        mask_array = io.nifti_to_array(mask_nii)
         if self._fix_orientation:
-            semantic_labels = self._orientation(semantic_labels, sample_index)
+            semantic_labels = self._orientation(mask_array, sample_index)
         return tv_tensors.Mask(semantic_labels.squeeze(), dtype=torch.int64)  # type: ignore[reportCallIssue]
     def _orientation(self, array: npt.NDArray, sample_index: int) -> npt.NDArray:

eva/vision/data/datasets/segmentation/lits_balanced.py CHANGED Viewed

@@ -64,7 +64,8 @@ class LiTSBalanced(lits.LiTS):
             if sample_idx not in split_indices:
                 continue
-            segmentation = io.read_nifti(self._segmentation_file(sample_idx))
+            segmentation_nii = io.read_nifti(self._segmentation_file(sample_idx))
+            segmentation = io.nifti_to_array(segmentation_nii)
             tumor_filter = segmentation == 2
             tumor_slice_filter = tumor_filter.sum(axis=(0, 1)) > 0

eva/vision/data/datasets/segmentation/monusac.py CHANGED Viewed

@@ -16,12 +16,11 @@ from torchvision.datasets import utils
 from typing_extensions import override
 from eva.core.utils.progress_bar import tqdm
-from eva.vision.data.datasets import _validators, structs
-from eva.vision.data.datasets.segmentation import base
+from eva.vision.data.datasets import _validators, structs, vision
 from eva.vision.utils import io
-class MoNuSAC(base.ImageSegmentation):
+class MoNuSAC(vision.VisionDataset[tv_tensors.Image, tv_tensors.Mask]):
     """MoNuSAC2020: A Multi-organ Nuclei Segmentation and Classification Challenge.
     Webpage: https://monusac-2020.grand-challenge.org/
@@ -112,13 +111,13 @@ class MoNuSAC(base.ImageSegmentation):
         )
     @override
-    def load_image(self, index: int) -> tv_tensors.Image:
+    def load_data(self, index: int) -> tv_tensors.Image:
         image_path = self._image_files[index]
         image_rgb_array = io.read_image(image_path)
         return tv_tensors.Image(image_rgb_array.transpose(2, 0, 1))
     @override
-    def load_mask(self, index: int) -> tv_tensors.Mask:
+    def load_target(self, index: int) -> tv_tensors.Mask:
         semantic_labels = (
             self._load_semantic_mask_file(index)
             if self._export_masks

eva/vision/data/datasets/segmentation/total_segmentator_2d.py CHANGED Viewed

@@ -17,12 +17,12 @@ from typing_extensions import override
 from eva.core.utils import io as core_io
 from eva.core.utils import multiprocessing
-from eva.vision.data.datasets import _validators, structs
-from eva.vision.data.datasets.segmentation import _total_segmentator, base
+from eva.vision.data.datasets import _validators, structs, vision
+from eva.vision.data.datasets.segmentation import _total_segmentator
 from eva.vision.utils import io
-class TotalSegmentator2D(base.ImageSegmentation):
+class TotalSegmentator2D(vision.VisionDataset[tv_tensors.Image, tv_tensors.Mask]):
     """TotalSegmentator 2D segmentation dataset."""
     _expected_dataset_lengths: Dict[str, int] = {
@@ -206,19 +206,20 @@ class TotalSegmentator2D(base.ImageSegmentation):
         return len(self._indices)
     @override
-    def load_image(self, index: int) -> tv_tensors.Image:
+    def load_data(self, index: int) -> tv_tensors.Image:
         sample_index, slice_index = self._indices[index]
         image_path = self._get_image_path(sample_index)
-        image_array = io.read_nifti(image_path, slice_index)
+        image_nii = io.read_nifti(image_path, slice_index)
+        image_array = io.nifti_to_array(image_nii)
         image_array = self._fix_orientation(image_array)
         return tv_tensors.Image(image_array.copy().transpose(2, 0, 1))
     @override
-    def load_mask(self, index: int) -> tv_tensors.Mask:
+    def load_target(self, index: int) -> tv_tensors.Mask:
         if self._optimize_mask_loading:
             mask = self._load_semantic_label_mask(index)
         else:
-            mask = self._load_mask(index)
+            mask = self._load_target(index)
         mask = self._fix_orientation(mask)
         return tv_tensors.Mask(mask.copy().squeeze(), dtype=torch.int64)  # type: ignore
@@ -227,14 +228,15 @@ class TotalSegmentator2D(base.ImageSegmentation):
         _, slice_index = self._indices[index]
         return {"slice_index": slice_index}
-    def _load_mask(self, index: int) -> npt.NDArray[Any]:
+    def _load_target(self, index: int) -> npt.NDArray[Any]:
         sample_index, slice_index = self._indices[index]
         return self._load_masks_as_semantic_label(sample_index, slice_index)
     def _load_semantic_label_mask(self, index: int) -> npt.NDArray[Any]:
         """Loads the segmentation mask from a semantic label NifTi file."""
         sample_index, slice_index = self._indices[index]
-        return io.read_nifti(self._get_optimized_masks_file(sample_index), slice_index)
+        nii = io.read_nifti(self._get_optimized_masks_file(sample_index), slice_index)
+        return io.nifti_to_array(nii)
     def _load_masks_as_semantic_label(
         self, sample_index: int, slice_index: int | None = None
@@ -248,7 +250,7 @@ class TotalSegmentator2D(base.ImageSegmentation):
         masks_dir = self._get_masks_dir(sample_index)
         classes = self._class_mappings.keys() if self._class_mappings else self.classes[1:]
         mask_paths = [os.path.join(masks_dir, f"{label}.nii.gz") for label in classes]
-        binary_masks = [io.read_nifti(path, slice_index) for path in mask_paths]
+        binary_masks = [io.nifti_to_array(io.read_nifti(path, slice_index)) for path in mask_paths]
         if self._class_mappings:
             mapped_binary_masks = [np.zeros_like(binary_masks[0], dtype=np.bool_)] * len(

eva/vision/data/datasets/vision.py CHANGED Viewed

@@ -1,17 +1,92 @@
 """Vision Dataset base class."""
 import abc
-from typing import Generic, TypeVar
+from typing import Any, Callable, Dict, Generic, List, Tuple, TypeVar
 from eva.core.data.datasets import base
-DataSample = TypeVar("DataSample")
-"""The data sample type."""
+InputType = TypeVar("InputType")
+"""The input data type."""
+TargetType = TypeVar("TargetType")
+"""The target data type."""
-class VisionDataset(base.MapDataset, abc.ABC, Generic[DataSample]):
+class VisionDataset(
+    base.MapDataset[Tuple[InputType, TargetType, Dict[str, Any]]],
+    abc.ABC,
+    Generic[InputType, TargetType],
+):
     """Base dataset class for vision tasks."""
+    def __init__(
+        self,
+        transforms: Callable | None = None,
+    ) -> None:
+        """Initializes the dataset.
+        Args:
+            transforms: A function/transform which returns a transformed
+                version of the raw data samples.
+        """
+        super().__init__()
+        self._transforms = transforms
+    @property
+    def classes(self) -> List[str] | None:
+        """Returns the list with names of the dataset names."""
+    @property
+    def class_to_idx(self) -> Dict[str, int] | None:
+        """Returns a mapping of the class name to its target index."""
+    def __getitem__(self, index: int) -> Tuple[InputType, TargetType, Dict[str, Any]]:
+        """Returns the `index`'th data sample.
+        Args:
+            index: The index of the data sample to load.
+        Returns:
+            A tuple with the image, the target and the metadata.
+        """
+        image = self.load_data(index)
+        target = self.load_target(index)
+        image, target = self._apply_transforms(image, target)
+        return image, target, self.load_metadata(index) or {}
+    def load_metadata(self, index: int) -> Dict[str, Any] | None:
+        """Returns the dataset metadata.
+        Args:
+            index: The index of the data sample to return the metadata of.
+        Returns:
+            The sample metadata.
+        """
+    @abc.abstractmethod
+    def load_data(self, index: int) -> InputType:
+        """Returns the `index`'th data sample.
+        Args:
+            index: The index of the data sample to load.
+        Returns:
+            The sample data.
+        """
+    @abc.abstractmethod
+    def load_target(self, index: int) -> TargetType:
+        """Returns the `index`'th target sample.
+        Args:
+            index: The index of the data sample to load.
+        Returns:
+            The sample target.
+        """
     @abc.abstractmethod
     def filename(self, index: int) -> str:
         """Returns the filename of the `index`'th data sample.
@@ -24,3 +99,19 @@ class VisionDataset(base.MapDataset, abc.ABC, Generic[DataSample]):
         Returns:
             The filename of the `index`'th data sample.
         """
+    def _apply_transforms(
+        self, image: InputType, target: TargetType
+    ) -> Tuple[InputType, TargetType]:
+        """Applies the transforms to the provided data and returns them.
+        Args:
+            image: The desired image.
+            target: The target of the image.
+        Returns:
+            A tuple with the image and the target transformed.
+        """
+        if self._transforms is not None:
+            image, target = self._transforms(image, target)
+        return image, target

kaiko-eva 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

Potentially problematic release.

kaiko-eva 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl