PyPI - slide2vec - Versions diffs - 4.1.1__tar.gz → 4.2.0__tar.gz - Mend

slide2vec 4.1.1tar.gz → 4.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

{slide2vec-4.1.1 → slide2vec-4.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: slide2vec
-Version: 4.1.1
+Version: 4.2.0
 Summary: Embedding of whole slide images with Foundation Models
 Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
 License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.0
+Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1
 Requires-Dist: omegaconf
 Requires-Dist: matplotlib
 Requires-Dist: numpy<2
@@ -50,6 +50,8 @@ Requires-Dist: xformers==0.0.31; extra == "prism"
 Provides-Extra: hibou
 Requires-Dist: scipy~=1.8.1; extra == "hibou"
 Requires-Dist: scikit-image~=0.19.3; extra == "hibou"
+Provides-Extra: moozy
+Requires-Dist: huggingface_hub<1.0,>=0.30.0; extra == "moozy"
 Provides-Extra: titan
 Requires-Dist: torch==2.0.1; extra == "titan"
 Requires-Dist: timm==1.0.3; extra == "titan"
@@ -63,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
 Requires-Dist: pandas; extra == "fm"
 Requires-Dist: pillow; extra == "fm"
 Requires-Dist: rich; extra == "fm"
-Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.0; extra == "fm"
+Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1; extra == "fm"
 Requires-Dist: wandb; extra == "fm"
 Requires-Dist: torch<2.8,>=2.3; extra == "fm"
 Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -210,7 +212,7 @@ The CLI is a thin wrapper over the package API.
 Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml
+slide2vec /path/to/config.yaml
 ```
 By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.

{slide2vec-4.1.1 → slide2vec-4.2.0}/README.md RENAMED Viewed

@@ -112,7 +112,7 @@ The CLI is a thin wrapper over the package API.
 Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml
+slide2vec /path/to/config.yaml
 ```
 By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.

{slide2vec-4.1.1 → slide2vec-4.2.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "slide2vec"
-version = "4.1.1"
+version = "4.2.0"
 description = "Embedding of whole slide images with Foundation Models"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -21,7 +21,7 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "hs2p[asap,cucim,openslide,vips]>=3.2.0",
+    "hs2p[asap,cucim,openslide,vips]>=3.2.1",
     "omegaconf",
     "matplotlib",
     "numpy<2",
@@ -42,7 +42,7 @@ Homepage = "https://github.com/clemsgrs/slide2vec"
 "Bug Tracker" = "https://github.com/clemsgrs/slide2vec/issues"
 [project.scripts]
-slide2vec = "slide2vec.cli:main"
+slide2vec = "slide2vec.cli:entrypoint"
 [project.optional-dependencies]
 hoptimus = [
@@ -71,6 +71,9 @@ hibou = [
     "scipy~=1.8.1",
     "scikit-image~=0.19.3",
 ]
+moozy = [
+    "huggingface_hub>=0.30.0,<1.0",
+]
 titan = [
     "torch==2.0.1",
     "timm==1.0.3",
@@ -85,7 +88,7 @@ fm = [
     "pandas",
     "pillow",
     "rich",
-    "hs2p[asap,cucim,openslide,vips]>=3.2.0",
+    "hs2p[asap,cucim,openslide,vips]>=3.2.1",
     "wandb",
     "torch>=2.3,<2.8",
     "torchvision>=0.18.0",
@@ -154,7 +157,7 @@ no_implicit_reexport = true
 max-line-length = 160
 [tool.bumpver]
-current_version = "4.1.1"
+current_version = "4.2.0"
 version_pattern = "MAJOR.MINOR.PATCH"
 commit = false       # We do version bumping in CI, not as a commit
 tag = false          # Git tag already exists — we don't auto-tag

{slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/__init__.py RENAMED Viewed

@@ -2,7 +2,7 @@ from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, Prep
 from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
-__version__ = "4.1.1"
+__version__ = "4.2.0"
 __all__ = [
     "Model",

{slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/api.py RENAMED Viewed

@@ -11,6 +11,7 @@ from hs2p import SlideSpec
 from slide2vec.artifacts import (
     HierarchicalEmbeddingArtifact,
+    PatientEmbeddingArtifact,
     SlideEmbeddingArtifact,
     TileEmbeddingArtifact,
 )
@@ -127,6 +128,7 @@ class ExecutionOptions:
     prefetch_factor: int = 4
     persistent_workers: bool = True
     save_tile_embeddings: bool = False
+    save_slide_embeddings: bool = False
     save_latents: bool = False
     @classmethod
@@ -151,6 +153,7 @@ class ExecutionOptions:
             prefetch_factor=prefetch_factor,
             persistent_workers=persistent_workers,
             save_tile_embeddings=bool(cfg.model.save_tile_embeddings),
+            save_slide_embeddings=bool(cfg.model.save_slide_embeddings),
             save_latents=bool(cfg.model.save_latents),
         )
@@ -200,9 +203,17 @@ class RunResult:
     tile_artifacts: list[TileEmbeddingArtifact]
     hierarchical_artifacts: list[HierarchicalEmbeddingArtifact]
     slide_artifacts: list[SlideEmbeddingArtifact]
+    patient_artifacts: list[PatientEmbeddingArtifact] = field(default_factory=list)
     process_list_path: Path | None = None
+@dataclass(frozen=True, kw_only=True)
+class EmbeddedPatient:
+    patient_id: str
+    patient_embedding: Any  # torch.Tensor [D]
+    slide_embeddings: dict[str, Any]  # {sample_id: torch.Tensor [D]}
 @dataclass(frozen=True, kw_only=True)
 class EmbeddedSlide:
     sample_id: str
@@ -343,6 +354,82 @@ class Model:
                 execution=resolved,
             )
+    def embed_patient(
+        self,
+        slides: SlideSequence,
+        patient_id: str | None = None,
+        *,
+        preprocessing: PreprocessingConfig | None = None,
+        execution: ExecutionOptions | None = None,
+    ) -> "EmbeddedPatient":
+        """Embed a single patient's slides and return one ``EmbeddedPatient``.
+        Convenience wrapper around :meth:`embed_patients` for the common case
+        where all *slides* belong to the same patient.
+        Args:
+            slides: All slides for this patient.
+            patient_id: Optional patient identifier applied to every slide.
+                When omitted, ``patient_id`` is read from slide dict keys or
+                object attributes; slides that carry no ``patient_id`` fall
+                back to ``sample_id``.
+        """
+        patient_id_map: dict | None = None
+        if patient_id is not None:
+            patient_id_map = {}
+            for s in slides:
+                if isinstance(s, (str, Path)):
+                    patient_id_map[Path(s).stem] = patient_id
+                elif isinstance(s, dict):
+                    patient_id_map[str(s["sample_id"])] = patient_id
+                else:
+                    patient_id_map[str(s.sample_id)] = patient_id
+        return self.embed_patients(
+            slides,
+            patient_id_map=patient_id_map,
+            preprocessing=preprocessing,
+            execution=execution,
+        )[0]
+    def embed_patients(
+        self,
+        slides: SlideSequence,
+        patient_id_map: dict | None = None,
+        *,
+        preprocessing: PreprocessingConfig | None = None,
+        execution: ExecutionOptions | None = None,
+    ) -> "list[EmbeddedPatient]":
+        """Embed slides and aggregate them into patient-level embeddings.
+        Requires a patient-level model (e.g. ``moozy``).  For each patient
+        all contributing slide embeddings are aggregated by the model's
+        ``encode_patient`` method.
+        Args:
+            slides: Slides to process.  Each entry may be a path, a
+                ``SlideSpec``, or a dict with ``sample_id`` / ``image_path``
+                keys.  When *patient_id_map* is ``None`` a ``patient_id``
+                key in each dict is used to group slides.
+            patient_id_map: Optional explicit ``{sample_id: patient_id}``
+                mapping.  When provided it takes precedence over any
+                ``patient_id`` key embedded in the slide dicts.  When
+                omitted and the slide dicts carry no ``patient_id``, each
+                slide is treated as its own patient.
+        """
+        from slide2vec.inference import embed_patients
+        resolved = _coerce_execution_options(execution, model=self)
+        resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
+        with _auto_progress_reporting(output_dir=resolved.output_dir):
+            _validate_model_config(self, resolved_preprocessing, resolved)
+            return embed_patients(
+                self,
+                slides,
+                patient_id_map=patient_id_map,
+                preprocessing=resolved_preprocessing,
+                execution=resolved,
+            )
     def _load_backend(self) -> LoadedModel:
         if self._backend is None:
             from slide2vec.inference import load_model

{slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/artifacts.py RENAMED Viewed

@@ -35,6 +35,20 @@ class SlideEmbeddingArtifact:
         return load_metadata(self.metadata_path)
+@dataclass(frozen=True, kw_only=True)
+class PatientEmbeddingArtifact:
+    patient_id: str
+    path: Path
+    metadata_path: Path
+    format: str
+    feature_dim: int
+    num_slides: int
+    @property
+    def metadata(self) -> dict[str, Any]:
+        return load_metadata(self.metadata_path)
 @dataclass(frozen=True, kw_only=True)
 class HierarchicalEmbeddingArtifact:
     sample_id: str
@@ -223,6 +237,45 @@ def write_slide_embeddings(
     )
+def write_patient_embeddings(
+    patient_id: str,
+    embedding,
+    *,
+    output_dir: str | Path,
+    output_format: str = "pt",
+    metadata: dict[str, Any] | None = None,
+    num_slides: int = 0,
+) -> PatientEmbeddingArtifact:
+    output_format = _validate_output_format(output_format)
+    artifact_path, metadata_path = _setup_artifact_paths(
+        output_dir, "patient_embeddings", patient_id, output_format
+    )
+    embedding_array = _ensure_array(embedding)
+    if output_format == "pt":
+        torch.save(_ensure_tensor(embedding), artifact_path)
+    else:
+        np.savez_compressed(artifact_path, features=embedding_array)
+    patient_metadata = {
+        "patient_id": patient_id,
+        "artifact_type": "patient_embeddings",
+        "format": output_format,
+        "feature_dim": int(embedding_array.shape[-1]) if embedding_array.ndim else 1,
+        "num_slides": num_slides,
+    }
+    if metadata:
+        patient_metadata.update(metadata)
+    _write_metadata(metadata_path, patient_metadata)
+    return PatientEmbeddingArtifact(
+        patient_id=patient_id,
+        path=artifact_path,
+        metadata_path=metadata_path,
+        format=output_format,
+        feature_dim=patient_metadata["feature_dim"],
+        num_slides=num_slides,
+    )
 def write_hierarchical_embeddings(
     sample_id: str,
     features,

{slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/cli.py RENAMED Viewed

@@ -7,20 +7,21 @@ import slide2vec.progress as progress
 def get_args_parser(add_help: bool = True):
     parser = argparse.ArgumentParser("slide2vec", add_help=add_help)
-    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
+    parser.add_argument("config_file", metavar="CONFIG", help="path to config file")
     parser.add_argument("--skip-datetime", action="store_true", help="skip run id datetime prefix")
     parser.add_argument("--tiling-only", action="store_true", help="only run slide tiling")
     parser.add_argument("--run-on-cpu", action="store_true", help="run inference on cpu")
     parser.add_argument("--output-dir", type=str, default=None, help="output directory to save artifacts")
-    parser.add_argument(
-        "opts",
-        help='Modify config options at the end of the command using "path.key=value".',
-        default=None,
-        nargs=argparse.REMAINDER,
-    )
     return parser
+def parse_args(argv=None):
+    parser = get_args_parser(add_help=True)
+    args, opts = parser.parse_known_args(argv)
+    args.opts = opts
+    return args
 def build_model_and_pipeline(args):
     cfg, _cfg_path = setup(args)
     hf_login()
@@ -39,8 +40,7 @@ def build_model_and_pipeline(args):
 def main(argv=None):
-    parser = get_args_parser(add_help=True)
-    args = parser.parse_args(argv)
+    args = parse_args(argv)
     pipeline, cfg = build_model_and_pipeline(args)
     reporter = progress.create_cli_progress_reporter(output_dir=getattr(cfg, "output_dir", None))
     with progress.activate_progress_reporter(reporter):
@@ -50,3 +50,6 @@ def main(argv=None):
         )
+def entrypoint(argv=None):
+    main(argv)
+    return 0

{slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/configs/default.yaml RENAMED Viewed

@@ -13,6 +13,7 @@ model:
   output_variant: # requested output variant for presets that expose multiple outputs
   batch_size: 32
   save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
+  save_slide_embeddings: false # whether to save per-slide embeddings when level is "patient" (e.g. moozy); requires a 'patient_id' column in the input CSV
   save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
   allow_non_recommended_settings: false # when true, non-recommended spacing / tile size / precision combinations warn instead of erroring

{slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/__init__.py RENAMED Viewed

@@ -6,6 +6,7 @@ the ``models`` subpackage.
 from slide2vec.encoders.base import (
     Encoder,
+    PatientEncoder,
     SlideEncoder,
     TileEncoder,
     TimmTileEncoder,
@@ -24,6 +25,7 @@ from slide2vec.encoders import models as _models_pkg  # noqa: F401
 __all__ = [
     "Encoder",
+    "PatientEncoder",
     "TileEncoder",
     "SlideEncoder",
     "TimmTileEncoder",

{slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/base.py RENAMED Viewed

@@ -96,6 +96,33 @@ class SlideEncoder(Encoder):
         return coordinates
+class PatientEncoder(Encoder):
+    """Base class for encoders that aggregate slide embeddings into patient embeddings."""
+    tile_encoder: TileEncoder | None = None
+    def encode_tiles(self, batch: Tensor) -> Tensor:
+        if self.tile_encoder is None:
+            raise AttributeError("patient encoders must attach a tile_encoder before encoding tiles")
+        return self.tile_encoder.encode_tiles(batch)
+    @abstractmethod
+    def encode_slide(
+        self,
+        tile_features: Tensor,
+        coordinates: Tensor | None = None,
+        *,
+        tile_size_lv0: int | None = None,
+    ) -> Tensor:
+        """Pool tile-level features into a single slide-level embedding."""
+        ...
+    @abstractmethod
+    def encode_patient(self, slide_embeddings: Tensor) -> Tensor:
+        """Aggregate slide embeddings [S, D] into a single patient-level embedding [D]."""
+        ...
 class TimmTileEncoder(TileEncoder):
     """Convenience base for timm-backed tile encoders."""

{slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/__init__.py RENAMED Viewed

@@ -8,7 +8,9 @@ from . import (
     gigapath,
     hibou,
     hoptimus,
+    lunit,
     midnight,
+    moozy,
     musk,
     phikon,
     prost40m,
@@ -23,7 +25,9 @@ __all__ = [
     "gigapath",
     "hibou",
     "hoptimus",
+    "lunit",
     "midnight",
+    "moozy",
     "musk",
     "phikon",
     "prost40m",

slide2vec-4.2.0/slide2vec/encoders/models/lunit.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Lunit ViT-S/8 tile encoder implementation."""
+from slide2vec.encoders.base import TimmTileEncoder
+from slide2vec.encoders.registry import register_encoder
+@register_encoder(
+    "lunit",
+    output_variants={"default": {"encode_dim": 384}},
+    default_output_variant="default",
+    input_size=224,
+    supported_spacing_um=0.5,
+    precision="fp32",
+    source="1aurent/vit_small_patch8_224.lunit_dino",
+)
+class LunitTileEncoder(TimmTileEncoder):
+    def __init__(self, *, output_variant: str | None = None):
+        super().__init__(
+            "hf_hub:1aurent/vit_small_patch8_224.lunit_dino",
+            output_variant=output_variant,
+        )

slide2vec-4.2.0/slide2vec/encoders/models/moozy/__init__.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""MOOZY slide and patient encoder implementations."""
+import torch
+from slide2vec.encoders.base import PatientEncoder, SlideEncoder, preferred_default_device, resolve_requested_output_variant
+from .loading import load_moozy_inference_components
+from slide2vec.encoders.registry import register_encoder
+__all__ = [
+    "MOOZYSlideEncoder",
+    "MOOZYPatientEncoder",
+]
+@register_encoder(
+    "moozy-slide",
+    level="slide",
+    tile_encoder="lunit",
+    tile_encoder_output_variant="default",
+    output_variants={"default": {"encode_dim": 768}},
+    default_output_variant="default",
+    supported_spacing_um=0.5,
+    precision="fp32",
+    source="AtlasAnalyticsLab/MOOZY",
+)
+class MOOZYSlideEncoder(SlideEncoder):
+    def __init__(self, *, output_variant: str | None = None):
+        components = load_moozy_inference_components(device=torch.device("cpu"))
+        self._model = components.slide_encoder.eval()
+        self._device = preferred_default_device()
+        self._output_variant = resolve_requested_output_variant(output_variant)
+    @property
+    def encode_dim(self) -> int:
+        return 768
+    @property
+    def device(self) -> torch.device:
+        return self._device
+    def to(self, device: torch.device | str) -> "MOOZYSlideEncoder":
+        self._device = torch.device(device)
+        self._model = self._model.to(self._device)
+        return self
+    def encode_slide(
+        self,
+        tile_features: torch.Tensor,
+        coordinates: torch.Tensor | None = None,
+        *,
+        tile_size_lv0: int | None = None,
+    ) -> torch.Tensor:
+        if coordinates is None or tile_size_lv0 is None:
+            raise ValueError("MOOZY slide encoding requires coordinates and tile_size_lv0")
+        # MOOZYSlideEncoder expects [B, crop_h, crop_w, feat_dim]; use [1, 1, N, D]
+        x = tile_features.unsqueeze(0).unsqueeze(0)
+        coords = coordinates.unsqueeze(0).to(torch.float32)
+        patch_sizes = torch.tensor([tile_size_lv0], dtype=torch.float32, device=tile_features.device)
+        cls, _, _ = self._model(x, coords_xy=coords, patch_sizes=patch_sizes)
+        return cls.squeeze(0)
+@register_encoder(
+    "moozy",
+    level="patient",
+    tile_encoder="lunit",
+    tile_encoder_output_variant="default",
+    output_variants={"default": {"encode_dim": 768}},
+    default_output_variant="default",
+    supported_spacing_um=0.5,
+    precision="fp32",
+    source="AtlasAnalyticsLab/MOOZY",
+)
+class MOOZYPatientEncoder(PatientEncoder):
+    def __init__(self, *, output_variant: str | None = None):
+        components = load_moozy_inference_components(device=torch.device("cpu"))
+        self._slide_model = components.slide_encoder.eval()
+        self._case_transformer = components.case_transformer.eval()
+        self._device = preferred_default_device()
+        self._output_variant = resolve_requested_output_variant(output_variant)
+    @property
+    def encode_dim(self) -> int:
+        return 768
+    @property
+    def device(self) -> torch.device:
+        return self._device
+    def to(self, device: torch.device | str) -> "MOOZYPatientEncoder":
+        self._device = torch.device(device)
+        self._slide_model = self._slide_model.to(self._device)
+        self._case_transformer = self._case_transformer.to(self._device)
+        return self
+    def encode_slide(
+        self,
+        tile_features: torch.Tensor,
+        coordinates: torch.Tensor | None = None,
+        *,
+        tile_size_lv0: int | None = None,
+    ) -> torch.Tensor:
+        if coordinates is None or tile_size_lv0 is None:
+            raise ValueError("MOOZY patient encoding requires coordinates and tile_size_lv0")
+        x = tile_features.unsqueeze(0).unsqueeze(0)
+        coords = coordinates.unsqueeze(0).to(torch.float32)
+        patch_sizes = torch.tensor([tile_size_lv0], dtype=torch.float32, device=tile_features.device)
+        cls, _, _ = self._slide_model(x, coords_xy=coords, patch_sizes=patch_sizes)
+        return cls.squeeze(0)
+    def encode_patient(self, slide_embeddings: torch.Tensor) -> torch.Tensor:
+        return self._case_transformer(slide_embeddings)

slide2vec 4.1.1__tar.gz → 4.2.0__tar.gz

slide2vec 4.1.1tar.gz → 4.2.0tar.gz