PyPI - kaiko-eva - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

kaiko-eva 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kaiko-eva might be problematic. Click here for more details.

Files changed (63) hide show

eva/vision/metrics/segmentation/_utils.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Utils for segmentation metric collections."""
+from typing import Tuple
+import torch
+def apply_ignore_index(
+    preds: torch.Tensor, target: torch.Tensor, ignore_index: int, num_classes: int
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Applies the ignore index to the predictions and target tensors.
+    1. Masks the values in the target tensor that correspond to the ignored index.
+    2. Remove the channel corresponding to the ignored index from both tensors.
+    Args:
+        preds: The predictions tensor. Expected to be of shape `(N,C,...)`.
+        target: The target tensor. Expected to be of shape `(N,C,...)`.
+        ignore_index: The index to ignore.
+        num_classes: The number of classes.
+    Returns:
+        The modified predictions and target tensors of shape `(N,C-1,...)`.
+    """
+    if ignore_index < 0:
+        raise ValueError("ignore_index must be a non-negative integer")
+    ignore_mask = preds[:, ignore_index] == 1
+    target = target * (~ignore_mask.unsqueeze(1))
+    preds = _ignore_tensor_channel(preds, ignore_index)
+    target = _ignore_tensor_channel(target, ignore_index)
+    return preds, target
+def index_to_one_hot(tensor: torch.Tensor, num_classes: int) -> torch.Tensor:
+    """Converts an index tensor to a one-hot tensor.
+    Args:
+        tensor: The index tensor to convert. Expected to be of shape `(N,...)`.
+        num_classes: The number of classes to one-hot encode.
+    Returns:
+        A one-hot tensor of shape `(N,C,...)`.
+    """
+    if not _is_one_hot(tensor):
+        tensor = torch.nn.functional.one_hot(tensor.long(), num_classes=num_classes).movedim(-1, 1)
+    return tensor
+def _ignore_tensor_channel(tensor: torch.Tensor, ignore_index: int) -> torch.Tensor:
+    """Removes the channel corresponding to the specified ignore index.
+    Args:
+        tensor: The tensor to remove the channel from. Expected to be of shape `(N,C,...)`.
+        ignore_index: The index of the channel dimension (C) to remove.
+    Returns:
+        A tensor without the specified channel `(N,C-1,...)`.
+    """
+    if ignore_index < 0:
+        raise ValueError("ignore_index must be a non-negative integer")
+    return torch.cat([tensor[:, :ignore_index], tensor[:, ignore_index + 1 :]], dim=1)
+def _is_one_hot(tensor: torch.Tensor, expected_dim: int = 4) -> bool:
+    """Checks if the tensor is a one-hot tensor."""
+    return bool((tensor.bool() == tensor).all()) and tensor.ndim == expected_dim

eva/{core/metrics → vision/metrics/segmentation}/generalized_dice.py RENAMED Viewed

@@ -6,6 +6,8 @@ import torch
 from torchmetrics import segmentation
 from typing_extensions import override
+from eva.vision.metrics.segmentation import _utils
 class GeneralizedDiceScore(segmentation.GeneralizedDiceScore):
     """Defines the Generalized Dice Score.
@@ -30,8 +32,6 @@ class GeneralizedDiceScore(segmentation.GeneralizedDiceScore):
             include_background: Whether to include the background class in the computation
             weight_type: The type of weight to apply to each class. Can be one of `"square"`,
                 `"simple"`, or `"linear"`.
-            input_format: What kind of input the function receives. Choose between ``"one-hot"``
-                for one-hot encoded tensors or ``"index"`` for index tensors.
             ignore_index: Integer specifying a target class to ignore. If given, this class
                 index does not contribute to the returned score, regardless of reduction method.
             per_class: Whether to compute the IoU for each class separately. If set to ``False``,
@@ -39,21 +39,23 @@ class GeneralizedDiceScore(segmentation.GeneralizedDiceScore):
             kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
         """
         super().__init__(
-            num_classes=num_classes,
+            num_classes=num_classes
+            - (ignore_index is not None)
+            + (ignore_index == 0 and not include_background),
             include_background=include_background,
             weight_type=weight_type,
             per_class=per_class,
             **kwargs,
         )
+        self.orig_num_classes = num_classes
         self.ignore_index = ignore_index
     @override
     def update(self, preds: torch.Tensor, target: torch.Tensor) -> None:
+        preds = _utils.index_to_one_hot(preds, num_classes=self.orig_num_classes)
+        target = _utils.index_to_one_hot(target, num_classes=self.orig_num_classes)
         if self.ignore_index is not None:
-            mask = target != self.ignore_index
-            mask = mask.all(dim=-1, keepdim=True)
-            preds = preds * mask
-            target = target * mask
-        super().update(preds=preds, target=target)
+            preds, target = _utils.apply_ignore_index(
+                preds, target, self.ignore_index, self.num_classes
+            )
+        super().update(preds=preds.long(), target=target.long())

eva/vision/metrics/segmentation/mean_iou.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""MeanIoU metric for semantic segmentation."""
+from typing import Any
+import torch
+from torchmetrics import segmentation
+from typing_extensions import override
+from eva.vision.metrics.segmentation import _utils
+class MeanIoU(segmentation.MeanIoU):
+    """MeanIoU (mIOU) metric for semantic segmentation.
+    It expands the `torchmetrics` class by including an `ignore_index`
+    functionality.
+    """
+    def __init__(
+        self,
+        num_classes: int,
+        include_background: bool = True,
+        ignore_index: int | None = None,
+        per_class: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        """Initializes the metric.
+        Args:
+            num_classes: The number of classes in the segmentation problem.
+            include_background: Whether to include the background class in the computation
+            ignore_index: Integer specifying a target class to ignore. If given, this class
+                index does not contribute to the returned score, regardless of reduction method.
+            per_class: Whether to compute the IoU for each class separately. If set to ``False``,
+                the metric will compute the mean IoU over all classes.
+            kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
+        """
+        super().__init__(
+            num_classes=num_classes
+            - (ignore_index is not None)
+            + (ignore_index == 0 and not include_background),
+            include_background=include_background,
+            per_class=per_class,
+            **kwargs,
+        )
+        self.orig_num_classes = num_classes
+        self.ignore_index = ignore_index
+    @override
+    def update(self, preds: torch.Tensor, target: torch.Tensor) -> None:
+        preds = _utils.index_to_one_hot(preds, num_classes=self.orig_num_classes)
+        target = _utils.index_to_one_hot(target, num_classes=self.orig_num_classes)
+        if self.ignore_index is not None:
+            preds, target = _utils.apply_ignore_index(
+                preds, target, self.ignore_index, self.num_classes
+            )
+        super().update(preds=preds.long(), target=target.long())

eva/vision/models/modules/semantic_segmentation.py CHANGED Viewed

@@ -15,6 +15,7 @@ from eva.core.models.modules.typings import INPUT_BATCH, INPUT_TENSOR_BATCH
 from eva.core.models.modules.utils import batch_postprocess, grad
 from eva.core.utils import parser
 from eva.vision.models.networks import decoders
+from eva.vision.models.networks.decoders.segmentation.typings import DecoderInputs
 class SemanticSegmentationModule(module.ModelModule):
@@ -101,9 +102,9 @@ class SemanticSegmentationModule(module.ModelModule):
                 "Please provide the expected `to_size` that the "
                 "decoder should map the embeddings (`inputs`) to."
             )
-        patch_embeddings = self.encoder(inputs) if self.encoder else inputs
-        return self.decoder(patch_embeddings, to_size or inputs.shape[-2:])
+        features = self.encoder(inputs) if self.encoder else inputs
+        decoder_inputs = DecoderInputs(features, inputs.shape[-2:], inputs)  # type: ignore
+        return self.decoder(decoder_inputs)
     @override
     def training_step(self, batch: INPUT_TENSOR_BATCH, *args: Any, **kwargs: Any) -> STEP_OUTPUT:

eva/vision/models/networks/backbones/_utils.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """Utilis for backbone networks."""
+import os
 from typing import Any, Dict, Tuple
+import huggingface_hub
 from torch import nn
 from eva import models
@@ -37,3 +39,13 @@ def load_hugingface_model(
         tensor_transforms=tensor_transforms,
         model_kwargs=model_kwargs,
     )
+def huggingface_login(hf_token: str | None = None):
+    token = hf_token or os.environ.get("HF_TOKEN")
+    if not token:
+        raise ValueError(
+            "Please provide a HuggingFace token to download the model. "
+            "You can either pass it as an argument or set the env variable HF_TOKEN."
+        )
+    huggingface_hub.login(token=token)

eva/vision/models/networks/backbones/pathology/__init__.py CHANGED Viewed

@@ -12,7 +12,8 @@ from eva.vision.models.networks.backbones.pathology.kaiko import (
 )
 from eva.vision.models.networks.backbones.pathology.lunit import lunit_vits8, lunit_vits16
 from eva.vision.models.networks.backbones.pathology.mahmood import mahmood_uni
-from eva.vision.models.networks.backbones.pathology.owkin import owkin_phikon
+from eva.vision.models.networks.backbones.pathology.owkin import owkin_phikon, owkin_phikon_v2
+from eva.vision.models.networks.backbones.pathology.paige import paige_virchow2
 __all__ = [
     "kaiko_vitb16",
@@ -21,6 +22,7 @@ __all__ = [
     "kaiko_vits16",
     "kaiko_vits8",
     "owkin_phikon",
+    "owkin_phikon_v2",
     "lunit_vits16",
     "lunit_vits8",
     "mahmood_uni",
@@ -28,4 +30,5 @@ __all__ = [
     "prov_gigapath",
     "histai_hibou_b",
     "histai_hibou_l",
+    "paige_virchow2",
 ]

eva/vision/models/networks/backbones/pathology/histai.py CHANGED Viewed

@@ -12,6 +12,9 @@ from eva.vision.models.networks.backbones.registry import register_model
 def histai_hibou_b(out_indices: int | Tuple[int, ...] | None = None) -> nn.Module:
     """Initializes the hibou-B pathology FM by hist.ai (https://huggingface.co/histai/hibou-B).
+    Uses a customized implementation of the DINOv2 architecture from the transformers
+    library to add support for registers, which requires the trust_remote_code=True flag.
     Args:
         out_indices: Whether and which multi-level patch embeddings to return.
             Currently only out_indices=1 is supported.
@@ -23,7 +26,7 @@ def histai_hibou_b(out_indices: int | Tuple[int, ...] | None = None) -> nn.Modul
         model_name="histai/hibou-B",
         out_indices=out_indices,
         model_kwargs={"trust_remote_code": True},
-        transform_args={"ignore_remaining_dims": True} if out_indices is not None else None,
+        transform_args={"num_register_tokens": 4} if out_indices is not None else None,
     )
@@ -31,6 +34,9 @@ def histai_hibou_b(out_indices: int | Tuple[int, ...] | None = None) -> nn.Modul
 def histai_hibou_l(out_indices: int | Tuple[int, ...] | None = None) -> nn.Module:
     """Initializes the hibou-L pathology FM by hist.ai (https://huggingface.co/histai/hibou-L).
+    Uses a customized implementation of the DINOv2 architecture from the transformers
+    library to add support for registers, which requires the trust_remote_code=True flag.
     Args:
         out_indices: Whether and which multi-level patch embeddings to return.
             Currently only out_indices=1 is supported.
@@ -42,5 +48,5 @@ def histai_hibou_l(out_indices: int | Tuple[int, ...] | None = None) -> nn.Modul
         model_name="histai/hibou-L",
         out_indices=out_indices,
         model_kwargs={"trust_remote_code": True},
-        transform_args={"ignore_remaining_dims": True} if out_indices is not None else None,
+        transform_args={"num_register_tokens": 4} if out_indices is not None else None,
     )

eva/vision/models/networks/backbones/pathology/mahmood.py CHANGED Viewed

@@ -9,6 +9,7 @@ from loguru import logger
 from torch import nn
 from eva.vision.models import wrappers
+from eva.vision.models.networks.backbones import _utils
 from eva.vision.models.networks.backbones.registry import register_model
@@ -31,19 +32,11 @@ def mahmood_uni(
     Returns:
         The model instance.
     """
-    token = hf_token or os.environ.get("HF_TOKEN")
-    if not token:
-        raise ValueError(
-            "Please provide a HuggingFace token to download the model. "
-            "You can either pass it as an argument or set the env variable HF_TOKEN."
-        )
     checkpoint_path = os.path.join(download_dir, "pytorch_model.bin")
     if not os.path.exists(checkpoint_path):
         logger.info(f"Downloading the model checkpoint to {download_dir} ...")
         os.makedirs(download_dir, exist_ok=True)
-        huggingface_hub.login(token=token)
+        _utils.huggingface_login(hf_token)
         huggingface_hub.hf_hub_download(
             "MahmoodLab/UNI",
             filename="pytorch_model.bin",

eva/vision/models/networks/backbones/pathology/owkin.py CHANGED Viewed

@@ -20,3 +20,17 @@ def owkin_phikon(out_indices: int | Tuple[int, ...] | None = None) -> nn.Module:
         The model instance.
     """
     return _utils.load_hugingface_model(model_name="owkin/phikon", out_indices=out_indices)
+@register_model("pathology/owkin_phikon_v2")
+def owkin_phikon_v2(out_indices: int | Tuple[int, ...] | None = None) -> nn.Module:
+    """Initializes the phikon-v2 pathology FM by owkin (https://huggingface.co/owkin/phikon-v2).
+    Args:
+        out_indices: Whether and which multi-level patch embeddings to return.
+            Currently only out_indices=1 is supported.
+    Returns:
+        The model instance.
+    """
+    return _utils.load_hugingface_model(model_name="owkin/phikon-v2", out_indices=out_indices)

eva/vision/models/networks/backbones/pathology/paige.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Pathology FMs from paige.ai.
+Source: https://huggingface.co/paige-ai/
+"""
+from typing import Tuple
+import timm
+import torch.nn as nn
+from eva.core.models import transforms
+from eva.vision.models import wrappers
+from eva.vision.models.networks.backbones import _utils
+from eva.vision.models.networks.backbones.registry import register_model
+@register_model("pathology/paige_virchow2")
+def paige_virchow2(
+    dynamic_img_size: bool = True,
+    out_indices: int | Tuple[int, ...] | None = None,
+    hf_token: str | None = None,
+    include_patch_tokens: bool = False,
+) -> nn.Module:
+    """Initializes the Virchow2 pathology FM by paige.ai.
+    Args:
+        dynamic_img_size: Support different input image sizes by allowing to change
+            the grid size (interpolate abs and/or ROPE pos) in the forward pass.
+        out_indices: Whether and which multi-level patch embeddings to return.
+        include_patch_tokens: Whether to combine the mean aggregated patch tokens with cls token.
+        hf_token: HuggingFace token to download the model.
+    Returns:
+        The model instance.
+    """
+    _utils.huggingface_login(hf_token)
+    return wrappers.TimmModel(
+        model_name="hf-hub:paige-ai/Virchow2",
+        out_indices=out_indices,
+        pretrained=True,
+        model_kwargs={
+            "dynamic_img_size": dynamic_img_size,
+            "mlp_layer": timm.layers.SwiGLUPacked,
+            "act_layer": nn.SiLU,
+        },
+        tensor_transforms=(
+            transforms.ExtractCLSFeatures(include_patch_tokens=include_patch_tokens)
+            if out_indices is None
+            else None
+        ),
+    )

eva/vision/models/networks/decoders/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Decoder heads API."""
 from eva.vision.models.networks.decoders import segmentation
-from eva.vision.models.networks.decoders.decoder import Decoder
+from eva.vision.models.networks.decoders.segmentation.base import Decoder
 __all__ = ["segmentation", "Decoder"]

eva/vision/models/networks/decoders/segmentation/__init__.py CHANGED Viewed

@@ -1,11 +1,19 @@
 """Segmentation decoder heads API."""
-from eva.vision.models.networks.decoders.segmentation.common import (
+from eva.vision.models.networks.decoders.segmentation.decoder2d import Decoder2D
+from eva.vision.models.networks.decoders.segmentation.linear import LinearDecoder
+from eva.vision.models.networks.decoders.segmentation.semantic import (
     ConvDecoder1x1,
     ConvDecoderMS,
+    ConvDecoderWithImage,
     SingleLinearDecoder,
 )
-from eva.vision.models.networks.decoders.segmentation.conv2d import ConvDecoder
-from eva.vision.models.networks.decoders.segmentation.linear import LinearDecoder
-__all__ = ["ConvDecoder1x1", "ConvDecoderMS", "SingleLinearDecoder", "ConvDecoder", "LinearDecoder"]
+__all__ = [
+    "ConvDecoder1x1",
+    "ConvDecoderMS",
+    "SingleLinearDecoder",
+    "ConvDecoderWithImage",
+    "Decoder2D",
+    "LinearDecoder",
+]

eva/vision/models/networks/decoders/segmentation/base.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Semantic segmentation decoder base class."""
+import abc
+import torch
+from torch import nn
+from eva.vision.models.networks.decoders.segmentation.typings import DecoderInputs
+class Decoder(nn.Module, abc.ABC):
+    """Abstract base class for segmentation decoders."""
+    @abc.abstractmethod
+    def forward(self, decoder_inputs: DecoderInputs) -> torch.Tensor:
+        """Forward pass of the decoder."""

eva/vision/models/networks/decoders/segmentation/{conv2d.py → decoder2d.py} RENAMED Viewed

@@ -1,19 +1,20 @@
 """Convolutional based semantic segmentation decoder."""
-from typing import List, Tuple
+from typing import List, Sequence, Tuple
 import torch
 from torch import nn
 from torch.nn import functional
-from eva.vision.models.networks.decoders import decoder
+from eva.vision.models.networks.decoders.segmentation import base
+from eva.vision.models.networks.decoders.segmentation.typings import DecoderInputs
-class ConvDecoder(decoder.Decoder):
-    """Convolutional segmentation decoder."""
+class Decoder2D(base.Decoder):
+    """Segmentation decoder for 2D applications."""
-    def __init__(self, layers: nn.Module) -> None:
-        """Initializes the convolutional based decoder head.
+    def __init__(self, layers: nn.Module, combine_features: bool = True) -> None:
+        """Initializes the based decoder head.
         Here the input nn layers will be directly applied to the
         features of shape (batch_size, hidden_size, n_patches_height,
@@ -21,13 +22,16 @@ class ConvDecoder(decoder.Decoder):
         Note the n_patches is also known as grid_size.
         Args:
-            layers: The convolutional layers to be used as the decoder head.
+            layers: The layers to be used as the decoder head.
+            combine_features: Whether to combine the features from different
+                feature levels into one tensor before applying the decoder head.
         """
         super().__init__()
         self._layers = layers
+        self._combine_features = combine_features
-    def _forward_features(self, features: List[torch.Tensor]) -> torch.Tensor:
+    def _forward_features(self, features: torch.Tensor | List[torch.Tensor]) -> torch.Tensor:
         """Forward function for multi-level feature maps to a single one.
         It will interpolate the features and concat them into a single tensor
@@ -46,6 +50,8 @@ class ConvDecoder(decoder.Decoder):
             A tensor of shape (batch_size, hidden_size, n_patches_height,
             n_patches_width) which is feature map of the decoder head.
         """
+        if isinstance(features, torch.Tensor):
+            features = [features]
         if not isinstance(features, list) or features[0].ndim != 4:
             raise ValueError(
                 "Input features should be a list of four (4) dimensional inputs of "
@@ -63,7 +69,9 @@ class ConvDecoder(decoder.Decoder):
         ]
         return torch.cat(upsampled_features, dim=1)
-    def _forward_head(self, patch_embeddings: torch.Tensor) -> torch.Tensor:
+    def _forward_head(
+        self, patch_embeddings: torch.Tensor | Sequence[torch.Tensor]
+    ) -> torch.Tensor:
         """Forward of the decoder head.
         Args:
@@ -75,12 +83,12 @@ class ConvDecoder(decoder.Decoder):
         """
         return self._layers(patch_embeddings)
-    def _cls_seg(
+    def _upscale(
         self,
         logits: torch.Tensor,
         image_size: Tuple[int, int],
     ) -> torch.Tensor:
-        """Classify each pixel of the image.
+        """Upscales the calculated logits to the target image size.
         Args:
             logits: The decoder outputs of shape (batch_size, n_classes,
@@ -93,22 +101,18 @@ class ConvDecoder(decoder.Decoder):
         """
         return functional.interpolate(logits, image_size, mode="bilinear")
-    def forward(
-        self,
-        features: List[torch.Tensor],
-        image_size: Tuple[int, int],
-    ) -> torch.Tensor:
+    def forward(self, decoder_inputs: DecoderInputs) -> torch.Tensor:
         """Maps the patch embeddings to a segmentation mask of the image size.
         Args:
-            features: List of multi-level image features of shape (batch_size,
-                hidden_size, n_patches_height, n_patches_width).
-            image_size: The target image size (height, width).
+            decoder_inputs: Inputs required by the decoder.
         Returns:
             Tensor containing scores for all of the classes with shape
             (batch_size, n_classes, image_height, image_width).
         """
-        patch_embeddings = self._forward_features(features)
-        logits = self._forward_head(patch_embeddings)
-        return self._cls_seg(logits, image_size)
+        features, image_size, _ = DecoderInputs(*decoder_inputs)
+        if self._combine_features:
+            features = self._forward_features(features)
+        logits = self._forward_head(features)
+        return self._upscale(logits, image_size)

eva/vision/models/networks/decoders/segmentation/linear.py CHANGED Viewed

@@ -6,10 +6,10 @@ import torch
 from torch import nn
 from torch.nn import functional
-from eva.vision.models.networks.decoders import decoder
+from eva.vision.models.networks.decoders.segmentation import base
-class LinearDecoder(decoder.Decoder):
+class LinearDecoder(base.Decoder):
     """Linear decoder."""
     def __init__(self, layers: nn.Module) -> None:

eva/vision/models/networks/decoders/segmentation/semantic/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Semantic Segmentation decoder heads API."""
+from eva.vision.models.networks.decoders.segmentation.semantic.common import (
+    ConvDecoder1x1,
+    ConvDecoderMS,
+    SingleLinearDecoder,
+)
+from eva.vision.models.networks.decoders.segmentation.semantic.with_image import (
+    ConvDecoderWithImage,
+)
+__all__ = ["ConvDecoder1x1", "ConvDecoderMS", "SingleLinearDecoder", "ConvDecoderWithImage"]

eva/vision/models/networks/decoders/segmentation/{common.py → semantic/common.py} RENAMED Viewed

@@ -7,10 +7,10 @@ output by an encoder into pixel-wise predictions for segmentation tasks.
 from torch import nn
-from eva.vision.models.networks.decoders.segmentation import conv2d, linear
+from eva.vision.models.networks.decoders.segmentation import decoder2d, linear
-class ConvDecoder1x1(conv2d.ConvDecoder):
+class ConvDecoder1x1(decoder2d.Decoder2D):
     """A convolutional decoder with a single 1x1 convolutional layer."""
     def __init__(self, in_features: int, num_classes: int) -> None:
@@ -29,7 +29,7 @@ class ConvDecoder1x1(conv2d.ConvDecoder):
         )
-class ConvDecoderMS(conv2d.ConvDecoder):
+class ConvDecoderMS(decoder2d.Decoder2D):
     """A multi-stage convolutional decoder with upsampling and convolutional layers.
     This decoder applies a series of upsampling and convolutional layers to transform

kaiko-eva 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

Potentially problematic release.

kaiko-eva 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl