PyPI - kaiko-eva - Versions diffs - 0.0.0.dev6__py3-none-any.whl - Mend

kaiko-eva 0.0.0.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kaiko-eva might be problematic. Click here for more details.

Files changed (111) hide show

eva/.DS_Store +0 -0
eva/__init__.py +33 -0
eva/__main__.py +18 -0
eva/__version__.py +25 -0
eva/core/__init__.py +19 -0
eva/core/callbacks/__init__.py +5 -0
eva/core/callbacks/writers/__init__.py +5 -0
eva/core/callbacks/writers/embeddings.py +169 -0
eva/core/callbacks/writers/typings.py +23 -0
eva/core/cli/__init__.py +5 -0
eva/core/cli/cli.py +19 -0
eva/core/cli/logo.py +38 -0
eva/core/cli/setup.py +89 -0
eva/core/data/__init__.py +14 -0
eva/core/data/dataloaders/__init__.py +5 -0
eva/core/data/dataloaders/dataloader.py +80 -0
eva/core/data/datamodules/__init__.py +6 -0
eva/core/data/datamodules/call.py +33 -0
eva/core/data/datamodules/datamodule.py +108 -0
eva/core/data/datamodules/schemas.py +62 -0
eva/core/data/datasets/__init__.py +7 -0
eva/core/data/datasets/base.py +53 -0
eva/core/data/datasets/classification/__init__.py +5 -0
eva/core/data/datasets/classification/embeddings.py +154 -0
eva/core/data/datasets/dataset.py +6 -0
eva/core/data/samplers/__init__.py +5 -0
eva/core/data/samplers/sampler.py +6 -0
eva/core/data/transforms/__init__.py +5 -0
eva/core/data/transforms/dtype/__init__.py +5 -0
eva/core/data/transforms/dtype/array.py +28 -0
eva/core/interface/__init__.py +5 -0
eva/core/interface/interface.py +79 -0
eva/core/metrics/__init__.py +17 -0
eva/core/metrics/average_loss.py +47 -0
eva/core/metrics/binary_balanced_accuracy.py +22 -0
eva/core/metrics/defaults/__init__.py +6 -0
eva/core/metrics/defaults/classification/__init__.py +6 -0
eva/core/metrics/defaults/classification/binary.py +76 -0
eva/core/metrics/defaults/classification/multiclass.py +80 -0
eva/core/metrics/structs/__init__.py +9 -0
eva/core/metrics/structs/collection.py +6 -0
eva/core/metrics/structs/metric.py +6 -0
eva/core/metrics/structs/module.py +115 -0
eva/core/metrics/structs/schemas.py +47 -0
eva/core/metrics/structs/typings.py +15 -0
eva/core/models/__init__.py +13 -0
eva/core/models/modules/__init__.py +7 -0
eva/core/models/modules/head.py +113 -0
eva/core/models/modules/inference.py +37 -0
eva/core/models/modules/module.py +190 -0
eva/core/models/modules/typings.py +23 -0
eva/core/models/modules/utils/__init__.py +6 -0
eva/core/models/modules/utils/batch_postprocess.py +57 -0
eva/core/models/modules/utils/grad.py +23 -0
eva/core/models/networks/__init__.py +6 -0
eva/core/models/networks/_utils.py +25 -0
eva/core/models/networks/mlp.py +69 -0
eva/core/models/networks/transforms/__init__.py +5 -0
eva/core/models/networks/transforms/extract_cls_features.py +25 -0
eva/core/models/networks/wrappers/__init__.py +8 -0
eva/core/models/networks/wrappers/base.py +47 -0
eva/core/models/networks/wrappers/from_function.py +58 -0
eva/core/models/networks/wrappers/huggingface.py +37 -0
eva/core/models/networks/wrappers/onnx.py +47 -0
eva/core/trainers/__init__.py +6 -0
eva/core/trainers/_logging.py +81 -0
eva/core/trainers/_recorder.py +149 -0
eva/core/trainers/_utils.py +12 -0
eva/core/trainers/functional.py +113 -0
eva/core/trainers/trainer.py +97 -0
eva/core/utils/__init__.py +1 -0
eva/core/utils/io/__init__.py +5 -0
eva/core/utils/io/dataframe.py +21 -0
eva/core/utils/multiprocessing.py +44 -0
eva/core/utils/workers.py +21 -0
eva/vision/__init__.py +14 -0
eva/vision/data/__init__.py +5 -0
eva/vision/data/datasets/__init__.py +22 -0
eva/vision/data/datasets/_utils.py +50 -0
eva/vision/data/datasets/_validators.py +44 -0
eva/vision/data/datasets/classification/__init__.py +15 -0
eva/vision/data/datasets/classification/bach.py +174 -0
eva/vision/data/datasets/classification/base.py +103 -0
eva/vision/data/datasets/classification/crc.py +176 -0
eva/vision/data/datasets/classification/mhist.py +106 -0
eva/vision/data/datasets/classification/patch_camelyon.py +203 -0
eva/vision/data/datasets/classification/total_segmentator.py +212 -0
eva/vision/data/datasets/segmentation/__init__.py +6 -0
eva/vision/data/datasets/segmentation/base.py +112 -0
eva/vision/data/datasets/segmentation/total_segmentator.py +212 -0
eva/vision/data/datasets/structs.py +17 -0
eva/vision/data/datasets/vision.py +43 -0
eva/vision/data/transforms/__init__.py +5 -0
eva/vision/data/transforms/common/__init__.py +5 -0
eva/vision/data/transforms/common/resize_and_crop.py +44 -0
eva/vision/models/__init__.py +5 -0
eva/vision/models/networks/__init__.py +6 -0
eva/vision/models/networks/abmil.py +176 -0
eva/vision/models/networks/postprocesses/__init__.py +5 -0
eva/vision/models/networks/postprocesses/cls.py +25 -0
eva/vision/utils/__init__.py +5 -0
eva/vision/utils/io/__init__.py +12 -0
eva/vision/utils/io/_utils.py +29 -0
eva/vision/utils/io/image.py +54 -0
eva/vision/utils/io/nifti.py +50 -0
eva/vision/utils/io/text.py +18 -0
kaiko_eva-0.0.0.dev6.dist-info/METADATA +393 -0
kaiko_eva-0.0.0.dev6.dist-info/RECORD +111 -0
kaiko_eva-0.0.0.dev6.dist-info/WHEEL +4 -0
kaiko_eva-0.0.0.dev6.dist-info/entry_points.txt +4 -0
kaiko_eva-0.0.0.dev6.dist-info/licenses/LICENSE +201 -0

eva/vision/data/datasets/segmentation/total_segmentator.py ADDED Viewed

@@ -0,0 +1,212 @@
+"""TotalSegmentator 2D segmentation dataset class."""
+import functools
+import os
+from glob import glob
+from typing import Callable, Dict, List, Literal, Tuple
+import numpy as np
+from torchvision.datasets import utils
+from typing_extensions import override
+from eva.vision.data.datasets import _utils, _validators, structs
+from eva.vision.data.datasets.segmentation import base
+from eva.vision.utils import io
+class TotalSegmentator2D(base.ImageSegmentation):
+    """TotalSegmentator 2D segmentation dataset."""
+    _train_index_ranges: List[Tuple[int, int]] = [(0, 83)]
+    """Train range indices."""
+    _val_index_ranges: List[Tuple[int, int]] = [(83, 103)]
+    """Validation range indices."""
+    _n_slices_per_image: int = 20
+    """The amount of slices to sample per 3D CT scan image."""
+    _resources_full: List[structs.DownloadResource] = [
+        structs.DownloadResource(
+            filename="Totalsegmentator_dataset_v201.zip",
+            url="https://zenodo.org/records/10047292/files/Totalsegmentator_dataset_v201.zip",
+            md5="fe250e5718e0a3b5df4c4ea9d58a62fe",
+        ),
+    ]
+    """Resources for the full dataset version."""
+    _resources_small: List[structs.DownloadResource] = [
+        structs.DownloadResource(
+            filename="Totalsegmentator_dataset_small_v201.zip",
+            url="https://zenodo.org/records/10047263/files/Totalsegmentator_dataset_small_v201.zip",
+            md5="6b5524af4b15e6ba06ef2d700c0c73e0",
+        ),
+    ]
+    """Resources for the small dataset version."""
+    def __init__(
+        self,
+        root: str,
+        split: Literal["train", "val"] | None,
+        version: Literal["small", "full"] = "small",
+        download: bool = False,
+        image_transforms: Callable | None = None,
+        target_transforms: Callable | None = None,
+        image_target_transforms: Callable | None = None,
+    ) -> None:
+        """Initialize dataset.
+        Args:
+            root: Path to the root directory of the dataset. The dataset will
+                be downloaded and extracted here, if it does not already exist.
+            split: Dataset split to use. If `None`, the entire dataset is used.
+            version: The version of the dataset to initialize.
+            download: Whether to download the data for the specified split.
+                Note that the download will be executed only by additionally
+                calling the :meth:`prepare_data` method and if the data does not
+                exist yet on disk.
+            image_transforms: A function/transform that takes in an image
+                and returns a transformed version.
+            target_transforms: A function/transform that takes in the target
+                and transforms it.
+            image_target_transforms: A function/transforms that takes in an
+                image and a label and returns the transformed versions of both.
+                This transform happens after the `image_transforms` and
+                `target_transforms`.
+        """
+        super().__init__(
+            image_transforms=image_transforms,
+            target_transforms=target_transforms,
+            image_target_transforms=image_target_transforms,
+        )
+        self._root = root
+        self._split = split
+        self._version = version
+        self._download = download
+        self._samples_dirs: List[str] = []
+        self._indices: List[int] = []
+    @functools.cached_property
+    @override
+    def classes(self) -> List[str]:
+        def get_filename(path: str) -> str:
+            """Returns the filename from the full path."""
+            return os.path.basename(path).split(".")[0]
+        first_sample_labels = os.path.join(
+            self._root, self._samples_dirs[0], "segmentations", "*.nii.gz"
+        )
+        return sorted(map(get_filename, glob(first_sample_labels)))
+    @property
+    @override
+    def class_to_idx(self) -> Dict[str, int]:
+        return {label: index for index, label in enumerate(self.classes)}
+    @override
+    def filename(self, index: int) -> str:
+        sample_dir = self._samples_dirs[self._indices[index]]
+        return os.path.join(sample_dir, "ct.nii.gz")
+    @override
+    def prepare_data(self) -> None:
+        if self._download:
+            self._download_dataset()
+    @override
+    def configure(self) -> None:
+        self._samples_dirs = self._fetch_samples_dirs()
+        self._indices = self._create_indices()
+    @override
+    def validate(self) -> None:
+        _validators.check_dataset_integrity(
+            self,
+            length=1660 if self._split == "train" else 400,
+            n_classes=117,
+            first_and_last_labels=("adrenal_gland_left", "vertebrae_T9"),
+        )
+    @override
+    def __len__(self) -> int:
+        return len(self._indices) * self._n_slices_per_image
+    @override
+    def load_image(self, index: int) -> np.ndarray:
+        image_path = self._get_image_path(index)
+        slice_index = self._get_sample_slice_index(index)
+        image_array = io.read_nifti_slice(image_path, slice_index)
+        return image_array.repeat(3, axis=2)
+    @override
+    def load_mask(self, index: int) -> np.ndarray:
+        masks_dir = self._get_masks_dir(index)
+        slice_index = self._get_sample_slice_index(index)
+        mask_paths = (os.path.join(masks_dir, label + ".nii.gz") for label in self.classes)
+        masks = [io.read_nifti_slice(path, slice_index) for path in mask_paths]
+        return np.concatenate(masks, axis=-1)
+    def _get_masks_dir(self, index: int) -> str:
+        """Returns the directory of the corresponding masks."""
+        sample_dir = self._get_sample_dir(index)
+        return os.path.join(self._root, sample_dir, "segmentations")
+    def _get_image_path(self, index: int) -> str:
+        """Returns the corresponding image path."""
+        sample_dir = self._get_sample_dir(index)
+        return os.path.join(self._root, sample_dir, "ct.nii.gz")
+    def _get_sample_dir(self, index: int) -> str:
+        """Returns the corresponding sample directory."""
+        sample_index = self._indices[index // self._n_slices_per_image]
+        return self._samples_dirs[sample_index]
+    def _get_sample_slice_index(self, index: int) -> int:
+        """Returns the corresponding slice index."""
+        image_path = self._get_image_path(index)
+        total_slices = io.fetch_total_nifti_slices(image_path)
+        slice_indices = np.linspace(0, total_slices - 1, num=self._n_slices_per_image, dtype=int)
+        return slice_indices[index % self._n_slices_per_image]
+    def _fetch_samples_dirs(self) -> List[str]:
+        """Returns the name of all the samples of all the splits of the dataset."""
+        sample_filenames = [
+            filename
+            for filename in os.listdir(self._root)
+            if os.path.isdir(os.path.join(self._root, filename))
+        ]
+        return sorted(sample_filenames)
+    def _create_indices(self) -> List[int]:
+        """Builds the dataset indices for the specified split."""
+        split_index_ranges = {
+            "train": self._train_index_ranges,
+            "val": self._val_index_ranges,
+            None: [(0, 103)],
+        }
+        index_ranges = split_index_ranges.get(self._split)
+        if index_ranges is None:
+            raise ValueError("Invalid data split. Use 'train', 'val' or `None`.")
+        return _utils.ranges_to_indices(index_ranges)
+    def _download_dataset(self) -> None:
+        """Downloads the dataset."""
+        dataset_resources = {
+            "small": self._resources_small,
+            "full": self._resources_full,
+            None: (0, 103),
+        }
+        resources = dataset_resources.get(self._version)
+        if resources is None:
+            raise ValueError("Invalid data version. Use 'small' or 'full'.")
+        for resource in resources:
+            utils.download_and_extract_archive(
+                resource.url,
+                download_root=self._root,
+                filename=resource.filename,
+                remove_finished=True,
+            )

eva/vision/data/datasets/structs.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Helper dataclasses and data structures for vision datasets."""
+import dataclasses
+@dataclasses.dataclass(frozen=True)
+class DownloadResource:
+    """Contains download information for a specific resource."""
+    filename: str
+    """The filename of the resource."""
+    url: str
+    """The URL of the resource."""
+    md5: str | None = None
+    """The MD5 hash of the resource."""

eva/vision/data/datasets/vision.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Vision Dataset base class."""
+import abc
+from typing import Generic, TypeVar
+from eva.core.data.datasets import base
+DataSample = TypeVar("DataSample")
+"""The data sample type."""
+class VisionDataset(base.Dataset, abc.ABC, Generic[DataSample]):
+    """Base dataset class for vision tasks."""
+    @abc.abstractmethod
+    def filename(self, index: int) -> str:
+        """Returns the filename of the `index`'th data sample.
+        Note that this is the relative file path to the root.
+        Args:
+            index: The index of the data-sample to select.
+        Returns:
+            The filename of the `index`'th data sample.
+        """
+    @abc.abstractmethod
+    def __getitem__(self, index: int) -> DataSample:
+        """Returns the `index`'th data sample.
+        Args:
+            index: The index of the data-sample to select.
+        Returns:
+            A data sample and its target.
+        """
+        raise NotImplementedError
+    @abc.abstractmethod
+    def __len__(self) -> int:
+        """Returns the total length of the data."""
+        raise NotImplementedError

eva/vision/data/transforms/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Vision data transforms."""
+from eva.vision.data.transforms.common import ResizeAndCrop
+__all__ = ["ResizeAndCrop"]

eva/vision/data/transforms/common/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Common vision transforms."""
+from eva.vision.data.transforms.common.resize_and_crop import ResizeAndCrop
+__all__ = ["ResizeAndCrop"]

eva/vision/data/transforms/common/resize_and_crop.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""Resizes and normalizes the input image."""
+from typing import Callable, Sequence
+import torch
+import torchvision.transforms.v2 as torch_transforms
+class ResizeAndCrop(torch_transforms.Compose):
+    """Resizes, crops and normalizes an input image while preserving its aspect ratio."""
+    def __init__(
+        self,
+        size: int | Sequence[int] = 224,
+        mean: Sequence[float] = (0.5, 0.5, 0.5),
+        std: Sequence[float] = (0.5, 0.5, 0.5),
+    ) -> None:
+        """Initializes the transform object.
+        Args:
+            size: Desired output size of the crop. If size is an `int` instead
+                of sequence like (h, w), a square crop (size, size) is made.
+            mean: Sequence of means for each image channel.
+            std: Sequence of standard deviations for each image channel.
+        """
+        self._size = size
+        self._mean = mean
+        self._std = std
+        super().__init__(transforms=self._build_transforms())
+    def _build_transforms(self) -> Sequence[Callable]:
+        """Builds and returns the list of transforms."""
+        transforms = [
+            torch_transforms.ToImage(),
+            torch_transforms.Resize(size=self._size),
+            torch_transforms.CenterCrop(size=self._size),
+            torch_transforms.ToDtype(torch.float32, scale=True),
+            torch_transforms.Normalize(
+                mean=self._mean,
+                std=self._std,
+            ),
+        ]
+        return transforms

eva/vision/models/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Vision Models API."""
+from eva.vision.models import networks
+__all__ = ["networks"]

eva/vision/models/networks/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Vision Networks API."""
+from eva.vision.models.networks import postprocesses
+from eva.vision.models.networks.abmil import ABMIL
+__all__ = ["postprocesses", "ABMIL"]

eva/vision/models/networks/abmil.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""ABMIL Network."""
+from typing import Type
+import torch
+import torch.nn as nn
+from eva.core.models.networks import MLP
+class ABMIL(torch.nn.Module):
+    """ABMIL network for multiple instance learning classification tasks.
+    Takes an array of patch level embeddings per slide as input. This implementation supports
+    batched inputs of shape (`batch_size`, `n_instances`, `input_size`). For slides with less
+    than `n_instances` patches, you can apply padding and provide a mask tensor to the forward
+    pass.
+    The original implementation from [1] was used as a reference:
+    https://github.com/AMLab-Amsterdam/AttentionDeepMIL/blob/master/model.py
+    Notes:
+        - use_bias: The paper didn't use bias in their formalism, but their published
+        example code inadvertently does.
+        - To prevent dot product similarities near-equal due to concentration of measure
+        as a consequence of large input embedding dimensionality (>128), we added the
+        option to project the input embeddings to a lower dimensionality
+    [1] Maximilian Ilse, Jakub M. Tomczak, Max Welling, "Attention-based Deep Multiple
+        Instance Learning", 2018
+        https://arxiv.org/abs/1802.04712
+    """
+    def __init__(
+        self,
+        input_size: int,
+        output_size: int,
+        projected_input_size: int | None,
+        hidden_size_attention: int = 128,
+        hidden_sizes_mlp: tuple = (128, 64),
+        use_bias: bool = True,
+        dropout_input_embeddings: float = 0.0,
+        dropout_attention: float = 0.0,
+        dropout_mlp: float = 0.0,
+        pad_value: int | float | None = float("-inf"),
+    ) -> None:
+        """Initializes the ABMIL network.
+        Args:
+            input_size: input embedding dimension
+            output_size: number of classes
+            projected_input_size: size of the projected input. if `None`, no projection is
+                performed.
+            hidden_size_attention: hidden dimension in attention network
+            hidden_sizes_mlp: dimensions for hidden layers in last mlp
+            use_bias: whether to use bias in the attention network
+            dropout_input_embeddings: dropout rate for the input embeddings
+            dropout_attention: dropout rate for the attention network and classifier
+            dropout_mlp: dropout rate for the final MLP network
+            pad_value: Value indicating padding in the input tensor. If specified, entries with
+                this value in the will be masked. If set to `None`, no masking is applied.
+        """
+        super().__init__()
+        self._pad_value = pad_value
+        if projected_input_size:
+            self.projector = nn.Sequential(
+                nn.Linear(input_size, projected_input_size, bias=True),
+                nn.Dropout(p=dropout_input_embeddings),
+            )
+            input_size = projected_input_size
+        else:
+            self.projector = nn.Dropout(p=dropout_input_embeddings)
+        self.gated_attention = GatedAttention(
+            input_dim=input_size,
+            hidden_dim=hidden_size_attention,
+            dropout=dropout_attention,
+            n_classes=1,
+            use_bias=use_bias,
+        )
+        self.classifier = MLP(
+            input_size=input_size,
+            output_size=output_size,
+            hidden_layer_sizes=hidden_sizes_mlp,
+            dropout=dropout_mlp,
+            hidden_activation_fn=nn.ReLU,
+        )
+    def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
+        """Forward pass.
+        Args:
+            input_tensor: Tensor with expected shape of (batch_size, n_instances, input_size).
+        """
+        input_tensor, mask = self._mask_values(input_tensor, self._pad_value)
+        # (batch_size, n_instances, input_size) -> (batch_size, n_instances, projected_input_size)
+        input_tensor = self.projector(input_tensor)
+        attention_logits = self.gated_attention(input_tensor)  # (batch_size, n_instances, 1)
+        if mask is not None:
+            # fill masked values with -inf, which will yield 0s after softmax
+            attention_logits = attention_logits.masked_fill(mask, float("-inf"))
+        attention_weights = nn.functional.softmax(attention_logits, dim=1)
+        # (batch_size, n_instances, 1)
+        attention_result = torch.matmul(torch.transpose(attention_weights, 1, 2), input_tensor)
+        # (batch_size, 1, hidden_size_attention)
+        attention_result = torch.squeeze(attention_result, 1)  # (batch_size, hidden_size_attention)
+        return self.classifier(attention_result)  # (batch_size, output_size)
+    def _mask_values(self, input_tensor: torch.Tensor, pad_value: float | None):
+        """Masks the padded values in the input tensor."""
+        if pad_value is None:
+            return input_tensor, None
+        else:
+            # (batch_size, n_instances, input_size)
+            mask = input_tensor == pad_value
+            # (batch_size, n_instances, input_size) -> (batch_size, n_instances, 1)
+            mask = mask.all(dim=-1, keepdim=True)
+            # Fill masked values with 0, so that they don't contribute to dense layers
+            input_tensor = input_tensor.masked_fill(mask, 0)
+            return input_tensor, mask
+class GatedAttention(nn.Module):
+    """Attention mechanism with Sigmoid Gating using 3 linear layers."""
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        dropout: float = 0.25,
+        n_classes: int = 1,
+        use_bias: bool = True,
+        activation_a: Type[nn.Module] = nn.Tanh,
+        activation_b: Type[nn.Module] = nn.Sigmoid,
+    ):
+        """Initializes the GatedAttention network.
+        Args:
+            input_dim: input feature dimension
+            hidden_dim: hidden layer dimension
+            dropout: dropout rate
+            n_classes: number of classes
+            use_bias: whether to use bias in the linear layers
+            activation_a: activation function for attention_a.
+            activation_b: activation function for attention_b.
+        """
+        super().__init__()
+        def make_attention(activation: nn.Module):
+            return nn.Sequential(
+                nn.Linear(input_dim, hidden_dim, bias=use_bias), nn.Dropout(p=dropout), activation
+            )
+        self.attention_a = make_attention(activation_a())
+        self.attention_b = make_attention(activation_b())
+        self.attention_c = nn.Linear(hidden_dim, n_classes)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass."""
+        a = self.attention_a(x)  # [..., hidden_dim]
+        b = self.attention_b(x)  # [..., hidden_dim]
+        att = a.mul(b)  # [..., hidden_dim]
+        att = self.attention_c(att)  # [..., n_classes]
+        return att

eva/vision/models/networks/postprocesses/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Model post-process transforms."""
+from eva.vision.models.networks.postprocesses.cls import ExtractCLSFeatures
+__all__ = ["ExtractCLSFeatures"]

eva/vision/models/networks/postprocesses/cls.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Transforms for extracting the CLS output from a model output."""
+import torch
+from transformers import modeling_outputs
+class ExtractCLSFeatures:
+    """Extracts the CLS token from a ViT model output."""
+    def __call__(
+        self, tensor: torch.Tensor | modeling_outputs.BaseModelOutputWithPooling
+    ) -> torch.Tensor:
+        """Call method for the transformation.
+        Args:
+            tensor: The tensor representing the model output.
+        """
+        if isinstance(tensor, torch.Tensor):
+            transformed_tensor = tensor[:, 0, :]
+        elif isinstance(tensor, modeling_outputs.BaseModelOutputWithPooling):
+            transformed_tensor = tensor.last_hidden_state[:, 0, :]
+        else:
+            raise ValueError(f"Unsupported type {type(tensor)}")
+        return transformed_tensor

eva/vision/utils/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Vision utilities and helper functions."""
+from eva.vision.utils import io
+__all__ = ["io"]

eva/vision/utils/io/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Vision I/O utilities."""
+from eva.vision.utils.io.image import read_image
+from eva.vision.utils.io.nifti import fetch_total_nifti_slices, read_nifti_slice
+from eva.vision.utils.io.text import read_csv
+__all__ = [
+    "read_image",
+    "fetch_total_nifti_slices",
+    "read_nifti_slice",
+    "read_csv",
+]

eva/vision/utils/io/_utils.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""File IO utilities."""
+import os
+def is_file(path: str) -> bool:
+    """Checks if the input path is a valid file.
+    Args:
+        path: The file path to be checked.
+    Returns:
+        A boolean value whether the file exists.
+    """
+    return os.path.exists(path) and os.stat(path).st_size != 0 and os.path.isfile(path)
+def check_file(path: str) -> None:
+    """Checks whether the input path is a valid file and raises and error.
+    Args:
+        path: The file path to be checked.
+    """
+    if not is_file(path):
+        raise FileExistsError(
+            f"Input '{path if isinstance(path, str) else type(path)}' "
+            "could not be recognized as a valid file. Please verify "
+            "that the file exists and is reachable."
+        )

eva/vision/utils/io/image.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""Image I/O related functions."""
+import cv2
+import numpy as np
+import numpy.typing as npt
+from eva.vision.utils.io import _utils
+def read_image(path: str) -> npt.NDArray[np.uint8]:
+    """Reads and loads the image from a file path as a RGB.
+    Args:
+        path: The path of the image file.
+    Returns:
+        The RGB image as a numpy array.
+    Raises:
+        FileExistsError: If the path does not exist or it is unreachable.
+        IOError: If the image could not be loaded.
+    """
+    return read_image_as_array(path, cv2.IMREAD_COLOR)
+def read_image_as_array(path: str, flags: int = cv2.IMREAD_UNCHANGED) -> npt.NDArray[np.uint8]:
+    """Reads and loads an image file as a numpy array.
+    Args:
+        path: The path to the image file.
+        flags: Specifies the way in which the image should be read.
+    Returns:
+        The image as a numpy array.
+    Raises:
+        FileExistsError: If the path does not exist or it is unreachable.
+        IOError: If the image could not be loaded.
+    """
+    _utils.check_file(path)
+    image = cv2.imread(path, flags=flags)
+    if image is None:
+        raise IOError(
+            f"Input '{path}' could not be loaded. "
+            "Please verify that the path is a valid image file."
+        )
+    if image.ndim == 3:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    if image.ndim == 2 and flags == cv2.IMREAD_COLOR:
+        image = image[:, :, np.newaxis]
+    return np.asarray(image).astype(np.uint8)