PyPI - kaiko-eva - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

kaiko-eva 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kaiko-eva might be problematic. Click here for more details.

Files changed (63) hide show

eva/core/callbacks/writers/embeddings/base.py CHANGED Viewed

@@ -172,15 +172,14 @@ class EmbeddingsWriter(callbacks.BasePredictionWriter, abc.ABC):
     def _check_if_exists(self) -> None:
         """Checks if the output directory already exists and if it should be overwritten."""
-        try:
-            os.makedirs(self._output_dir, exist_ok=self._overwrite)
-        except FileExistsError as e:
+        os.makedirs(self._output_dir, exist_ok=True)
+        if os.path.exists(os.path.join(self._output_dir, "manifest.csv")) and not self._overwrite:
             raise FileExistsError(
                 f"The embeddings output directory already exists: {self._output_dir}. This "
                 "either means that they have been computed before or that a wrong output "
                 "directory is being used. Consider using `eva fit` instead, selecting a "
                 "different output directory or setting overwrite=True."
-            ) from e
+            )
         os.makedirs(self._output_dir, exist_ok=True)

eva/core/data/dataloaders/dataloader.py CHANGED Viewed

@@ -38,7 +38,7 @@ class DataLoader:
     Mutually exclusive with `batch_size`, `shuffle`, `sampler` and `drop_last`.
     """
-    num_workers: int = multiprocessing.cpu_count()
+    num_workers: int | None = None
     """How many workers to use for loading the data.
     By default, it will use the number of CPUs available.
@@ -71,7 +71,7 @@ class DataLoader:
             shuffle=self.shuffle,
             sampler=self.sampler,
             batch_sampler=self.batch_sampler,
-            num_workers=self.num_workers,
+            num_workers=self.num_workers or multiprocessing.cpu_count(),
             collate_fn=self.collate_fn,
             pin_memory=self.pin_memory,
             drop_last=self.drop_last,

eva/core/data/splitting/random.py CHANGED Viewed

@@ -24,12 +24,13 @@ def random_split(
     Returns:
         The indices of the train, validation, and test sets as lists.
     """
-    if train_ratio + val_ratio + (test_ratio or 0) != 1:
-        raise ValueError("The sum of the ratios must be equal to 1.")
+    total_ratio = train_ratio + val_ratio + test_ratio
+    if total_ratio > 1.0:
+        raise ValueError("The sum of the ratios must be lower or equal to 1.")
-    np.random.seed(seed)
-    n_samples = len(samples)
-    indices = np.random.permutation(n_samples)
+    random_generator = np.random.default_rng(seed)
+    n_samples = int(total_ratio * len(samples))
+    indices = random_generator.permutation(len(samples))[:n_samples]
     n_train = int(np.floor(train_ratio * n_samples))
     n_val = n_samples - n_train if test_ratio == 0.0 else int(np.floor(val_ratio * n_samples)) or 1

eva/core/data/splitting/stratified.py CHANGED Viewed

@@ -28,10 +28,11 @@ def stratified_split(
     """
     if len(samples) != len(targets):
         raise ValueError("The number of samples and targets must be equal.")
-    if train_ratio + val_ratio + (test_ratio or 0) != 1:
-        raise ValueError("The sum of the ratios must be equal to 1.")
+    if train_ratio + val_ratio + (test_ratio or 0) > 1.0:
+        raise ValueError("The sum of the ratios must be lower or equal to 1.")
-    np.random.seed(seed)
+    use_all_samples = train_ratio + val_ratio + test_ratio == 1
+    random_generator = np.random.default_rng(seed)
     unique_classes, y_indices = np.unique(targets, return_inverse=True)
     n_classes = unique_classes.shape[0]
@@ -39,18 +40,23 @@ def stratified_split(
     for c in range(n_classes):
         class_indices = np.where(y_indices == c)[0]
-        np.random.shuffle(class_indices)
+        random_generator.shuffle(class_indices)
         n_train = int(np.floor(train_ratio * len(class_indices))) or 1
         n_val = (
             len(class_indices) - n_train
-            if test_ratio == 0.0
+            if test_ratio == 0.0 and use_all_samples
             else int(np.floor(val_ratio * len(class_indices))) or 1
         )
         train_indices.extend(class_indices[:n_train])
         val_indices.extend(class_indices[n_train : n_train + n_val])
         if test_ratio > 0.0:
-            test_indices.extend(class_indices[n_train + n_val :])
+            n_test = (
+                len(class_indices) - n_train - n_val
+                if use_all_samples
+                else int(np.floor(test_ratio * len(class_indices))) or 1
+            )
+            test_indices.extend(class_indices[n_train + n_val : n_train + n_val + n_test])
     return train_indices, val_indices, test_indices or None

eva/core/losses/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Loss functions API."""
+from eva.core.losses.cross_entropy import CrossEntropyLoss
+__all__ = ["CrossEntropyLoss"]

eva/core/losses/cross_entropy.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""Cross-entropy based loss function."""
+from typing import Sequence
+import torch
+from torch import nn
+class CrossEntropyLoss(nn.CrossEntropyLoss):
+    """A wrapper around torch.nn.CrossEntropyLoss that accepts weights in list format.
+    Needed for .yaml file loading & class instantiation with jsonarparse.
+    """
+    def __init__(
+        self, *args, weight: Sequence[float] | torch.Tensor | None = None, **kwargs
+    ) -> None:
+        """Initialize the loss function.
+        Args:
+            args: Positional arguments from the base class.
+            weight: A list of weights to assign to each class.
+            kwargs: Key-word arguments from the base class.
+        """
+        if weight is not None and not isinstance(weight, torch.Tensor):
+            weight = torch.tensor(weight)
+        super().__init__(*args, **kwargs, weight=weight)

eva/core/metrics/__init__.py CHANGED Viewed

@@ -3,8 +3,6 @@
 from eva.core.metrics.average_loss import AverageLoss
 from eva.core.metrics.binary_balanced_accuracy import BinaryBalancedAccuracy
 from eva.core.metrics.defaults import BinaryClassificationMetrics, MulticlassClassificationMetrics
-from eva.core.metrics.generalized_dice import GeneralizedDiceScore
-from eva.core.metrics.mean_iou import MeanIoU
 from eva.core.metrics.structs import Metric, MetricCollection, MetricModule, MetricsSchema
 __all__ = [
@@ -12,8 +10,6 @@ __all__ = [
     "BinaryBalancedAccuracy",
     "BinaryClassificationMetrics",
     "MulticlassClassificationMetrics",
-    "GeneralizedDiceScore",
-    "MeanIoU",
     "Metric",
     "MetricCollection",
     "MetricModule",

eva/core/metrics/defaults/__init__.py CHANGED Viewed

@@ -4,10 +4,8 @@ from eva.core.metrics.defaults.classification import (
     BinaryClassificationMetrics,
     MulticlassClassificationMetrics,
 )
-from eva.core.metrics.defaults.segmentation import MulticlassSegmentationMetrics
 __all__ = [
     "MulticlassClassificationMetrics",
     "BinaryClassificationMetrics",
-    "MulticlassSegmentationMetrics",
 ]

eva/core/models/modules/module.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """Base model module."""
+import os
 from typing import Any, Mapping
 import lightning.pytorch as pl
 import torch
-from lightning.pytorch.strategies.single_device import SingleDeviceStrategy
 from lightning.pytorch.utilities import memory
 from lightning.pytorch.utilities.types import STEP_OUTPUT
 from typing_extensions import override
@@ -49,14 +49,14 @@ class ModelModule(pl.LightningModule):
     @property
     def metrics_device(self) -> torch.device:
-        """Returns the device by which the metrics should be calculated.
-        We allocate the metrics to CPU when operating on single device, as
-        it is much faster, but to GPU when employing multiple ones, as DDP
-        strategy requires the metrics to be allocated to the module's GPU.
-        """
-        move_to_cpu = isinstance(self.trainer.strategy, SingleDeviceStrategy)
-        return torch.device("cpu") if move_to_cpu else self.device
+        """Returns the device by which the metrics should be calculated."""
+        device = os.getenv("METRICS_DEVICE", None)
+        if device is not None:
+            return torch.device(device)
+        elif self.device.type == "mps":
+            # mps seems to have compatibility issues with segmentation metrics
+            return torch.device("cpu")
+        return self.device
     @override
     def on_fit_start(self) -> None:

eva/core/models/transforms/extract_cls_features.py CHANGED Viewed

@@ -7,13 +7,20 @@ from transformers import modeling_outputs
 class ExtractCLSFeatures:
     """Extracts the CLS token from a ViT model output."""
-    def __init__(self, cls_index: int = 0) -> None:
+    def __init__(
+        self, cls_index: int = 0, num_register_tokens: int = 0, include_patch_tokens: bool = False
+    ) -> None:
         """Initializes the transformation.
         Args:
             cls_index: The index of the CLS token in the output tensor.
+            num_register_tokens: The number of register tokens in the model output.
+            include_patch_tokens: Whether to concat the mean aggregated patch tokens with
+                the cls token.
         """
         self._cls_index = cls_index
+        self._num_register_tokens = num_register_tokens
+        self._include_patch_tokens = include_patch_tokens
     def __call__(
         self, tensor: torch.Tensor | modeling_outputs.BaseModelOutputWithPooling
@@ -23,11 +30,12 @@ class ExtractCLSFeatures:
         Args:
             tensor: The tensor representing the model output.
         """
-        if isinstance(tensor, torch.Tensor):
-            transformed_tensor = tensor[:, self._cls_index, :]
-        elif isinstance(tensor, modeling_outputs.BaseModelOutputWithPooling):
-            transformed_tensor = tensor.last_hidden_state[:, self._cls_index, :]
-        else:
-            raise ValueError(f"Unsupported type {type(tensor)}")
-        return transformed_tensor
+        if isinstance(tensor, modeling_outputs.BaseModelOutputWithPooling):
+            tensor = tensor.last_hidden_state
+        cls_token = tensor[:, self._cls_index, :]
+        if self._include_patch_tokens:
+            patch_tokens = tensor[:, 1 + self._num_register_tokens :, :]
+            return torch.cat([cls_token, patch_tokens.mean(1)], dim=-1)
+        return cls_token

eva/core/models/transforms/extract_patch_features.py CHANGED Viewed

@@ -10,13 +10,23 @@ from transformers import modeling_outputs
 class ExtractPatchFeatures:
     """Extracts the patch features from a ViT model output."""
-    def __init__(self, ignore_remaining_dims: bool = False) -> None:
+    def __init__(
+        self,
+        has_cls_token: bool = True,
+        num_register_tokens: int = 0,
+        ignore_remaining_dims: bool = False,
+    ) -> None:
         """Initializes the transformation.
         Args:
+            has_cls_token: If set to `True`, the model output is expected to have
+                a classification token.
+            num_register_tokens: The number of register tokens in the model output.
             ignore_remaining_dims: If set to `True`, ignore the remaining dimensions
                 of the patch grid if it is not a square number.
         """
+        self._has_cls_token = has_cls_token
+        self._num_register_tokens = num_register_tokens
         self._ignore_remaining_dims = ignore_remaining_dims
     def __call__(
@@ -31,17 +41,19 @@ class ExtractPatchFeatures:
             A tensor (batch_size, hidden_size, n_patches_height, n_patches_width)
             representing the model output.
         """
+        num_skip = int(self._has_cls_token) + self._num_register_tokens
         if isinstance(tensor, modeling_outputs.BaseModelOutputWithPooling):
-            features = tensor.last_hidden_state[:, 1:, :].permute(0, 2, 1)
-            batch_size, hidden_size, patch_grid = features.shape
-            height = width = int(math.sqrt(patch_grid))
-            if height * width != patch_grid:
-                if self._ignore_remaining_dims:
-                    features = features[:, :, : height * width]
-                else:
-                    raise ValueError(f"Patch grid size must be a square number {patch_grid}.")
-            patch_embeddings = features.view(batch_size, hidden_size, height, width)
+            features = tensor.last_hidden_state[:, num_skip:, :].permute(0, 2, 1)
         else:
-            raise ValueError(f"Unsupported type {type(tensor)}")
+            features = tensor[:, num_skip:, :].permute(0, 2, 1)
+        batch_size, hidden_size, patch_grid = features.shape
+        height = width = int(math.sqrt(patch_grid))
+        if height * width != patch_grid:
+            if self._ignore_remaining_dims:
+                features = features[:, :, -height * width :]
+            else:
+                raise ValueError(f"Patch grid size must be a square number {patch_grid}.")
+        patch_embeddings = features.view(batch_size, hidden_size, height, width)
         return [patch_embeddings]

eva/core/utils/progress_bar.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Progress bar utility functions."""
+import os
+from tqdm import tqdm as _tqdm
+def tqdm(*args, **kwargs) -> _tqdm:
+    """Wrapper function for `tqdm.tqdm`."""
+    refresh_rate = os.environ.get("TQDM_REFRESH_RATE")
+    refresh_rate = int(refresh_rate) if refresh_rate is not None else None
+    disable = bool(int(os.environ.get("TQDM_DISABLE", 0))) or (refresh_rate == 0)
+    kwargs.setdefault("disable", disable)
+    kwargs.setdefault("miniters", refresh_rate)
+    return _tqdm(*args, **kwargs)

eva/vision/data/datasets/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from eva.vision.data.datasets.classification import (
     MHIST,
     PANDA,
     Camelyon16,
+    PANDASmall,
     PatchCamelyon,
     WsiClassificationDataset,
 )
@@ -15,6 +16,7 @@ from eva.vision.data.datasets.segmentation import (
     EmbeddingsSegmentationDataset,
     ImageSegmentation,
     LiTS,
+    LiTSBalanced,
     MoNuSAC,
     TotalSegmentator2D,
 )
@@ -27,6 +29,7 @@ __all__ = [
     "CRC",
     "MHIST",
     "PANDA",
+    "PANDASmall",
     "Camelyon16",
     "PatchCamelyon",
     "WsiClassificationDataset",
@@ -34,6 +37,7 @@ __all__ = [
     "EmbeddingsSegmentationDataset",
     "ImageSegmentation",
     "LiTS",
+    "LiTSBalanced",
     "MoNuSAC",
     "TotalSegmentator2D",
     "VisionDataset",

eva/vision/data/datasets/classification/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ from eva.vision.data.datasets.classification.bach import BACH
 from eva.vision.data.datasets.classification.camelyon16 import Camelyon16
 from eva.vision.data.datasets.classification.crc import CRC
 from eva.vision.data.datasets.classification.mhist import MHIST
-from eva.vision.data.datasets.classification.panda import PANDA
+from eva.vision.data.datasets.classification.panda import PANDA, PANDASmall
 from eva.vision.data.datasets.classification.patch_camelyon import PatchCamelyon
 from eva.vision.data.datasets.classification.wsi import WsiClassificationDataset
@@ -15,5 +15,6 @@ __all__ = [
     "PatchCamelyon",
     "WsiClassificationDataset",
     "PANDA",
+    "PANDASmall",
     "Camelyon16",
 ]

eva/vision/data/datasets/classification/camelyon16.py CHANGED Viewed

@@ -87,6 +87,7 @@ class Camelyon16(wsi.MultiWsiDataset, base.ImageClassification):
         target_mpp: float = 0.5,
         backend: str = "openslide",
         image_transforms: Callable | None = None,
+        coords_path: str | None = None,
         seed: int = 42,
     ) -> None:
         """Initializes the dataset.
@@ -100,6 +101,7 @@ class Camelyon16(wsi.MultiWsiDataset, base.ImageClassification):
             target_mpp: Target microns per pixel (mpp) for the patches.
             backend: The backend to use for reading the whole-slide images.
             image_transforms: Transforms to apply to the extracted image patches.
+            coords_path: File path to save the patch coordinates as .csv.
             seed: Random seed for reproducibility.
         """
         self._split = split
@@ -119,6 +121,7 @@ class Camelyon16(wsi.MultiWsiDataset, base.ImageClassification):
             target_mpp=target_mpp,
             backend=backend,
             image_transforms=image_transforms,
+            coords_path=coords_path,
         )
     @property
@@ -207,7 +210,7 @@ class Camelyon16(wsi.MultiWsiDataset, base.ImageClassification):
     @override
     def load_metadata(self, index: int) -> Dict[str, Any]:
-        return {"wsi_id": self.filename(index).split(".")[0]}
+        return wsi.MultiWsiDataset.load_metadata(self, index)
     def _load_file_paths(self, split: Literal["train", "val", "test"] | None = None) -> List[str]:
         """Loads the file paths of the corresponding dataset split."""

eva/vision/data/datasets/classification/panda.py CHANGED Viewed

@@ -49,6 +49,7 @@ class PANDA(wsi.MultiWsiDataset, base.ImageClassification):
         target_mpp: float = 0.5,
         backend: str = "openslide",
         image_transforms: Callable | None = None,
+        coords_path: str | None = None,
         seed: int = 42,
     ) -> None:
         """Initializes the dataset.
@@ -62,6 +63,7 @@ class PANDA(wsi.MultiWsiDataset, base.ImageClassification):
             target_mpp: Target microns per pixel (mpp) for the patches.
             backend: The backend to use for reading the whole-slide images.
             image_transforms: Transforms to apply to the extracted image patches.
+            coords_path: File path to save the patch coordinates as .csv.
             seed: Random seed for reproducibility.
         """
         self._split = split
@@ -80,6 +82,7 @@ class PANDA(wsi.MultiWsiDataset, base.ImageClassification):
             target_mpp=target_mpp,
             backend=backend,
             image_transforms=image_transforms,
+            coords_path=coords_path,
         )
     @property
@@ -132,7 +135,7 @@ class PANDA(wsi.MultiWsiDataset, base.ImageClassification):
     @override
     def load_metadata(self, index: int) -> Dict[str, Any]:
-        return {"wsi_id": self.filename(index).split(".")[0]}
+        return wsi.MultiWsiDataset.load_metadata(self, index)
     def _load_file_paths(self, split: Literal["train", "val", "test"] | None = None) -> List[str]:
         """Loads the file paths of the corresponding dataset split."""
@@ -182,3 +185,16 @@ class PANDA(wsi.MultiWsiDataset, base.ImageClassification):
     def _get_id_from_path(self, file_path: str) -> str:
         return os.path.basename(file_path).replace(".tiff", "")
+class PANDASmall(PANDA):
+    """Small version of the PANDA dataset for quicker benchmarking."""
+    _train_split_ratio: float = 0.1
+    """Train split ratio."""
+    _val_split_ratio: float = 0.05
+    """Validation split ratio."""
+    _test_split_ratio: float = 0.05
+    """Test split ratio."""

eva/vision/data/datasets/classification/wsi.py CHANGED Viewed

@@ -35,6 +35,7 @@ class WsiClassificationDataset(wsi.MultiWsiDataset, base.ImageClassification):
         split: Literal["train", "val", "test"] | None = None,
         image_transforms: Callable | None = None,
         column_mapping: Dict[str, str] = default_column_mapping,
+        coords_path: str | None = None,
     ):
         """Initializes the dataset.
@@ -51,6 +52,7 @@ class WsiClassificationDataset(wsi.MultiWsiDataset, base.ImageClassification):
             split: The split of the dataset to load.
             image_transforms: Transforms to apply to the extracted image patches.
             column_mapping: Mapping of the columns in the manifest file.
+            coords_path: File path to save the patch coordinates as .csv.
         """
         self._split = split
         self._column_mapping = self.default_column_mapping | column_mapping
@@ -66,6 +68,7 @@ class WsiClassificationDataset(wsi.MultiWsiDataset, base.ImageClassification):
             target_mpp=target_mpp,
             backend=backend,
             image_transforms=image_transforms,
+            coords_path=coords_path,
         )
     @override
@@ -88,7 +91,7 @@ class WsiClassificationDataset(wsi.MultiWsiDataset, base.ImageClassification):
     @override
     def load_metadata(self, index: int) -> Dict[str, Any]:
-        return {"wsi_id": self.filename(index).split(".")[0]}
+        return wsi.MultiWsiDataset.load_metadata(self, index)
     def _load_manifest(self, manifest_path: str) -> pd.DataFrame:
         df = pd.read_csv(manifest_path)

eva/vision/data/datasets/segmentation/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ from eva.vision.data.datasets.segmentation.bcss import BCSS
 from eva.vision.data.datasets.segmentation.consep import CoNSeP
 from eva.vision.data.datasets.segmentation.embeddings import EmbeddingsSegmentationDataset
 from eva.vision.data.datasets.segmentation.lits import LiTS
+from eva.vision.data.datasets.segmentation.lits_balanced import LiTSBalanced
 from eva.vision.data.datasets.segmentation.monusac import MoNuSAC
 from eva.vision.data.datasets.segmentation.total_segmentator_2d import TotalSegmentator2D
@@ -14,6 +15,7 @@ __all__ = [
     "CoNSeP",
     "EmbeddingsSegmentationDataset",
     "LiTS",
+    "LiTSBalanced",
     "MoNuSAC",
     "TotalSegmentator2D",
 ]

eva/vision/data/datasets/segmentation/consep.py CHANGED Viewed

@@ -37,8 +37,8 @@ class CoNSeP(wsi.MultiWsiDataset, base.ImageSegmentation):
         root: str,
         sampler: samplers.Sampler | None = None,
         split: Literal["train", "val"] | None = None,
-        width: int = 224,
-        height: int = 224,
+        width: int = 250,
+        height: int = 250,
         target_mpp: float = 0.25,
         transforms: Callable | None = None,
     ) -> None:

kaiko-eva 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

Potentially problematic release.

kaiko-eva 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl