PyPI - slide2vec - Versions diffs - 4.8.0__tar.gz → 5.0.1__tar.gz - Mend

slide2vec 4.8.0tar.gz → 5.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

{slide2vec-4.8.0 → slide2vec-5.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: slide2vec
-Version: 4.8.0
+Version: 5.0.1
 Summary: Embedding of whole slide images with Foundation Models
 Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
 License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1
+Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
 Requires-Dist: omegaconf
 Requires-Dist: matplotlib
 Requires-Dist: numpy<2
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
 Requires-Dist: pandas; extra == "fm"
 Requires-Dist: pillow; extra == "fm"
 Requires-Dist: rich; extra == "fm"
-Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1; extra == "fm"
+Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0; extra == "fm"
 Requires-Dist: wandb; extra == "fm"
 Requires-Dist: torch<2.8,>=2.3; extra == "fm"
 Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -169,7 +169,7 @@ pipeline = Pipeline(
     preprocessing=PreprocessingConfig(
         requested_spacing_um=0.5,
         requested_tile_size_px=224,
-        tissue_threshold=0.1,
+        masks={"min_coverage": {"tissue": 0.1}},
     ),
     execution=ExecutionOptions(output_dir="outputs/demo"),
 )

{slide2vec-4.8.0 → slide2vec-5.0.1}/README.md RENAMED Viewed

@@ -63,7 +63,7 @@ pipeline = Pipeline(
     preprocessing=PreprocessingConfig(
         requested_spacing_um=0.5,
         requested_tile_size_px=224,
-        tissue_threshold=0.1,
+        masks={"min_coverage": {"tissue": 0.1}},
     ),
     execution=ExecutionOptions(output_dir="outputs/demo"),
 )

{slide2vec-4.8.0 → slide2vec-5.0.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "slide2vec"
-version = "4.8.0"
+version = "5.0.1"
 description = "Embedding of whole slide images with Foundation Models"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -21,7 +21,7 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1",
+    "hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
     "omegaconf",
     "matplotlib",
     "numpy<2",
@@ -88,7 +88,7 @@ fm = [
     "pandas",
     "pillow",
     "rich",
-    "hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1",
+    "hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
     "wandb",
     "torch>=2.3,<2.8",
     "torchvision>=0.18.0",
@@ -145,6 +145,9 @@ addopts = "--cov=slide2vec"
 testpaths = [
     "tests",
 ]
+markers = [
+    "heavy: real-weight foundation-model inference on CPU; minutes per test. Excluded from the PR suite via `-m 'not heavy'`; run on the scheduled/manual heavy workflow (.github/workflows/nightly-heavy.yaml).",
+]
 [tool.mypy]
 mypy_path = "."
@@ -164,7 +167,7 @@ no_implicit_reexport = true
 max-line-length = 160
 [tool.bumpver]
-current_version = "4.8.0"
+current_version = "5.0.1"
 version_pattern = "MAJOR.MINOR.PATCH"
 commit = false       # We do version bumping in CI, not as a commit
 tag = false          # Git tag already exists — we don't auto-tag

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/__init__.py RENAMED Viewed

@@ -11,7 +11,7 @@ from slide2vec.api import (
 from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
-__version__ = "4.8.0"
+__version__ = "5.0.1"
 __all__ = [
     "Model",

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/api.py RENAMED Viewed

@@ -351,6 +351,11 @@ class EmbeddedSlide:
     image_path: Path
     #: Path to the tissue mask used for tiling, if any.
     mask_path: Path | None = None
+    #: Annotation class this bag of tiles was sampled for. ``"tissue"`` for the
+    #: default tissue-only path, ``"merged"`` for the union output mode, or the
+    #: class name (e.g. ``"tumor"``) when annotation-aware sampling fans a slide
+    #: out into one bag per class. See the annotation-aware sampling documentation.
+    annotation: str | None = None
     #: Number of tiles extracted from the slide.
     num_tiles: int | None = None
     #: Path to the mask preview image, if generated.
@@ -442,12 +447,13 @@ class Model:
         self,
         slide: SlideInput,
         *,
+        annotation: str | list[str] | None = None,
         preprocessing: PreprocessingConfig | None = None,
         execution: ExecutionOptions | None = None,
         sample_id: str | None = None,
         mask_path: PathLike | None = None,
         spacing_at_level_0: float | None = None,
-    ) -> EmbeddedSlide:
+    ) -> EmbeddedSlide | list[EmbeddedSlide]:
         if isinstance(slide, (str, Path)):
             slide = {
                 "sample_id": sample_id or Path(slide).stem,
@@ -459,31 +465,42 @@ class Model:
             raise ValueError(
                 "sample_id, mask_path, and spacing_at_level_0 overrides are only supported when slide is a path-like input"
             )
-        return self.embed_slides(
+        requested = None if isinstance(annotation, str) else annotation
+        grouped = self.embed_slides(
             [slide],
+            annotations=requested,
             preprocessing=preprocessing,
             execution=execution,
-        )[0]
+        )
+        # Single slide in → at most one outer key out. Flatten to the inner
+        # {label: EmbeddedSlide} mapping (empty when the run produced nothing).
+        bags: dict[str, EmbeddedSlide] = {}
+        for inner in grouped.values():
+            bags = inner
+            break
+        return _select_embedded_bag(bags, annotation)
     def embed_slides(
         self,
         slides: SlideSequence,
         *,
+        annotations: list[str] | None = None,
         preprocessing: PreprocessingConfig | None = None,
         execution: ExecutionOptions | None = None,
-    ) -> list[EmbeddedSlide]:
+    ) -> dict[str, dict[str, EmbeddedSlide]]:
         from slide2vec.inference import embed_slides
         resolved = _coerce_execution_options(execution, model=self)
         resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
         with _auto_progress_reporting(output_dir=resolved.output_dir):
             _validate_model_config(self, resolved_preprocessing, resolved)
-            return embed_slides(
+            embedded = embed_slides(
                 self,
                 slides,
                 preprocessing=resolved_preprocessing,
                 execution=resolved,
             )
+        return _group_embedded_slides(embedded, annotations=annotations)
     def embed_patient(
         self,
@@ -650,6 +667,77 @@ class Pipeline:
             )
+def _select_embedded_bag(
+    bags: Mapping[str, EmbeddedSlide],
+    annotation: str | list[str] | None,
+) -> EmbeddedSlide | list[EmbeddedSlide]:
+    """Select per-class bag(s) from a single slide's ``{label: EmbeddedSlide}`` map.
+    numpy-style shape-in/shape-out:
+    - a single class string returns one :class:`EmbeddedSlide`;
+    - a list of class strings returns a list in the requested order;
+    - ``None`` returns the single bag when the run produced exactly one,
+      otherwise raises naming the available bags and directing to
+      :meth:`Model.embed_slides`.
+    Requesting a class the run did not produce raises naming what is available.
+    """
+    available = sorted(bags)
+    if isinstance(annotation, str):
+        if annotation not in bags:
+            raise ValueError(
+                f"embed_slide() found no '{annotation}' annotation bag for this "
+                f"slide; available bags: {available}."
+            )
+        return bags[annotation]
+    if annotation is not None:
+        selected: list[EmbeddedSlide] = []
+        for label in annotation:
+            if label not in bags:
+                raise ValueError(
+                    f"embed_slide() found no '{label}' annotation bag for this "
+                    f"slide; available bags: {available}."
+                )
+            selected.append(bags[label])
+        return selected
+    if len(bags) == 1:
+        return next(iter(bags.values()))
+    raise ValueError(
+        f"embed_slide() received {len(bags)} annotation bags for this slide "
+        f"({available}); annotation-aware sampling produces one bag per class. "
+        "Pass annotation=... to select a class, or use Model.embed_slides(...) "
+        "to receive every per-class EmbeddedSlide (each carries its .annotation)."
+    )
+def _group_embedded_slides(
+    embedded: Sequence[EmbeddedSlide],
+    *,
+    annotations: list[str] | None = None,
+) -> dict[str, dict[str, EmbeddedSlide]]:
+    """Group flat per-row :class:`EmbeddedSlide` results into a nested mapping.
+    The outer key is ``sample_id``; the inner key is the bag's informative
+    annotation label (``"tissue"``/``"merged"``/class name), never ``None``.
+    A bag whose ``.annotation`` is ``None`` (defensive — post-#173 real runs
+    always carry a label) does not produce a ``None`` key.
+    When *annotations* is given, the inner keys are restricted to the named
+    classes (in encounter order).
+    """
+    requested = None if annotations is None else set(annotations)
+    grouped: dict[str, dict[str, EmbeddedSlide]] = {}
+    for bag in embedded:
+        label = bag.annotation
+        if label is None:
+            continue
+        if requested is not None and label not in requested:
+            continue
+        grouped.setdefault(bag.sample_id, {})[label] = bag
+    return grouped
 def _coerce_execution_options(
     options: ExecutionOptions | None,
     *,

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/hibou.py RENAMED Viewed

@@ -54,6 +54,13 @@ class _HibouBase(TileEncoder):
             v2.Normalize(mean=_HIBOU_MEAN, std=_HIBOU_STD),
         ])
+    @property
+    def _num_prefix_tokens(self) -> int:
+        # CLS + register tokens. Dinov2-with-registers carries the register tokens
+        # between the CLS and patch tokens, so both the dense and attention paths
+        # must strip them; deriving the count from config keeps the two in sync.
+        return 1 + int(getattr(self._model.config, "num_register_tokens", 0))
     def encode_tiles(self, batch: Tensor) -> Tensor:
         output = self._model(pixel_values=batch)
         return output.pooler_output
@@ -77,7 +84,7 @@ class _HibouBase(TileEncoder):
             output.last_hidden_state,
             grid_h=height // patch,
             grid_w=width // patch,
-            num_prefix_tokens=1 + int(getattr(self._model.config, "num_register_tokens", 0)),
+            num_prefix_tokens=self._num_prefix_tokens,
             encoder_name=type(self).__name__,
         )
@@ -111,7 +118,7 @@ class _HibouBase(TileEncoder):
             output = self._model(pixel_values=batch, output_attentions=True)
         return attentions_tuple_to_grids(
             output.attentions,
-            num_prefix_tokens=1 + int(getattr(self._model.config, "num_register_tokens", 0)),
+            num_prefix_tokens=self._num_prefix_tokens,
             blocks=blocks,
             include_registers=include_registers,
             grid_h=height // patch,

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/midnight.py RENAMED Viewed

@@ -36,6 +36,18 @@ class Midnight(TileEncoder):
         self._model = AutoModel.from_pretrained("kaiko-ai/midnight").eval()
         self._device = preferred_default_device()
         self._output_variant = resolve_requested_output_variant(output_variant)
+        # The pooled, dense, and attention paths all assume a single CLS prefix
+        # token (kaiko's reference recipe pools over output[:, 1:]). If a future
+        # checkpoint adds register tokens, that assumption silently folds them into
+        # the patch mean and mislabels the dense/attention grids — fail loudly here.
+        num_register_tokens = int(getattr(self._model.config, "num_register_tokens", 0))
+        if num_register_tokens:
+            raise ValueError(
+                "Midnight encoder assumes a single CLS prefix token, but the loaded "
+                f"checkpoint reports num_register_tokens={num_register_tokens}. Update "
+                "the pooled/dense/attention paths to strip the register tokens before "
+                "using this checkpoint."
+            )
     def get_transform(self) -> Callable:
         return v2.Compose([

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/virchow.py RENAMED Viewed

@@ -16,8 +16,6 @@ _VIRCHOW_OUTPUT_DIMS = {
 class _VirchowBase(TimmTileEncoder):
     """Base for Virchow models that concat CLS + mean-pooled patch tokens."""
-    _num_prefix_tokens: int = 1  # Override in subclass if needed
     def __init__(self, model_name: str, *, output_variant: str | None = None):
         self._output_variant = resolve_requested_output_variant(
             output_variant,
@@ -36,7 +34,7 @@ class _VirchowBase(TimmTileEncoder):
         cls_token = output[:, 0]
         if self._output_variant == "cls":
             return cls_token
-        patch_tokens = output[:, self._num_prefix_tokens:]
+        patch_tokens = output[:, self._model.num_prefix_tokens:]
         return torch.cat([cls_token, patch_tokens.mean(dim=1)], dim=-1)
     @property
@@ -57,8 +55,6 @@ class _VirchowBase(TimmTileEncoder):
     source="paige-ai/Virchow",
 )
 class Virchow(_VirchowBase):
-    _num_prefix_tokens = 1
     def __init__(self, *, output_variant: str | None = None):
         super().__init__("hf-hub:paige-ai/Virchow", output_variant=output_variant)
@@ -71,12 +67,10 @@ class Virchow(_VirchowBase):
     },
     default_output_variant="cls_patch_mean",
     input_size=224,
-    supported_spacing_um=[0.5, 1.0, 2.0],
+    supported_spacing_um=[0.25, 0.5, 1.0, 2.0],
     precision="fp16",
     source="paige-ai/Virchow2",
 )
 class Virchow2(_VirchowBase):
-    _num_prefix_tokens = 5  # 1 CLS + 4 register tokens
     def __init__(self, *, output_variant: str | None = None):
         super().__init__("hf-hub:paige-ai/Virchow2", output_variant=output_variant)

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/artifacts_collect.py RENAMED Viewed

@@ -101,15 +101,14 @@ def _normalized_row_annotation(annotation) -> str | None:
     """Collapse a process-list ``annotation`` cell to the per-class key (``None`` for the flat path).
     Mirrors the in-memory single-GPU path: ``None``/NaN and hs2p's flat-layout sentinels
-    (:func:`hs2p.fileops.is_flattened_annotation`, e.g. ``"tissue"``) land flat, and the merged
-    output-mode label ``"merged"`` is collapsed to ``None`` exactly as
-    :func:`slide2vec.utils.tiling_io.load_tiling_result_from_row` does — so the distributed reconcile
-    keys those rows to the flat embedding path with no per-class subdir.
+    (:func:`hs2p.fileops.is_flattened_annotation` — the single source of truth, which flattens
+    ``None``/``"tissue"``/``"merged"``) land flat — so the distributed reconcile keys those rows
+    to the flat embedding path with no per-class subdir.
     """
     if annotation is None or (isinstance(annotation, float) and pd.isna(annotation)):
         return None
     annotation = str(annotation)
-    if annotation == "merged" or is_flattened_annotation(annotation):
+    if is_flattened_annotation(annotation):
         return None
     return annotation

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/distributed.py RENAMED Viewed

@@ -32,15 +32,15 @@ def normalize_work_unit_annotation(annotation: str | None) -> str | None:
     """Collapse flat-layout annotations to ``None`` so flat units key by bare ``sample_id``.
     Mirrors the in-memory single-GPU path and the distributed reconcile
-    (:func:`slide2vec.runtime.artifacts_collect._normalized_row_annotation`): ``None``, hs2p's
-    flat-layout sentinels (:func:`hs2p.fileops.is_flattened_annotation`, e.g. ``"tissue"``), and the
-    merged output-mode label ``"merged"`` all collapse to ``None``. Only genuine per-class
+    (:func:`slide2vec.runtime.artifacts_collect._normalized_row_annotation`): hs2p's flat-layout
+    sentinels (:func:`hs2p.fileops.is_flattened_annotation`, the single source of truth — it
+    flattens ``None``/``"tissue"``/``"merged"``) all collapse to ``None``. Only genuine per-class
     annotations survive as a composite key.
     """
     if annotation is None:
         return None
     annotation = str(annotation)
-    if annotation == "merged" or is_flattened_annotation(annotation):
+    if is_flattened_annotation(annotation):
         return None
     return annotation

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/embedding_persist.py RENAMED Viewed

@@ -56,6 +56,7 @@ def make_embedded_slide(
         tile_size_lv0=int(tiling_result.tile_size_lv0),
         image_path=slide.image_path,
         mask_path=slide.mask_path,
+        annotation=tiling_result_annotation(tiling_result),
         num_tiles=int(n_tiles) if n_tiles is not None else len(x_values),
         mask_preview_path=Path(mask_preview_path) if mask_preview_path is not None else None,
         tiling_preview_path=Path(tiling_preview_path) if tiling_preview_path is not None else None,

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/persistence.py RENAMED Viewed

@@ -265,14 +265,15 @@ def _normalized_annotation(annotation: Any) -> str | None:
     Keying the per-class feature-path map on this normalized value lets the flat tissue-only
     path and a real class share one matching rule without the sentinel leaking into lookups.
-    ``"merged"`` (hs2p's merged output-mode label) carries no class and is collapsed to ``None``
-    here, matching :func:`slide2vec.utils.tiling_io.load_tiling_result_from_row`, so its
-    process-list row resolves to the flat embedding path rather than being left unmatched.
+    Flattening is decided solely by :func:`hs2p.fileops.is_flattened_annotation` (the single
+    source of truth), which flattens ``None``/``"tissue"``/``"merged"`` to the flat root, so
+    ``"merged"`` (hs2p's merged output-mode label, which carries no class) resolves to the flat
+    embedding path rather than being left unmatched.
     """
     if annotation is None or (isinstance(annotation, float) and pd.isna(annotation)):
         return None
     annotation = str(annotation)
-    if annotation == "merged" or is_flattened_annotation(annotation):
+    if is_flattened_annotation(annotation):
         return None
     return annotation

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/tiling.py RENAMED Viewed

@@ -46,9 +46,10 @@ def build_hs2p_configs(
         if is_hierarchical_preprocessing(preprocessing)
         else preprocessing.requested_tile_size_px
     )
-    # Reuse hs2p's tiling-config resolver so the derived tissue_threshold comes from
-    # masks.min_coverage.tissue (the single source of truth) and independent_sampling
-    # is threaded consistently. The resolver reads attributes, so wrap the masks dict.
+    # Reuse hs2p's tiling-config resolver so the resolved min_coverage map comes from
+    # masks.min_coverage (the single source of truth; min_coverage["tissue"] is the tissue
+    # threshold) and independent_sampling is threaded consistently. The resolver reads
+    # attributes, so wrap the masks dict.
     tiling_adapter = SimpleNamespace(
         tiling=SimpleNamespace(
             masks=SimpleNamespace(**dict(preprocessing.masks)),

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/tiling_io.py RENAMED Viewed

@@ -244,11 +244,10 @@ def load_tiling_result_from_row(row):
     annotation = annotation if annotation is None else str(annotation)
     # The merged output mode (hs2p's CoordinateOutputMode.MERGED) emits a single per-slide
     # coordinate set over the union of tiles passing any active class threshold. hs2p labels
-    # that process-list row "merged" so it is not mistaken for plain tissue, but it carries no
-    # class — collapse it to None here so the flatten rule (is_flattened_annotation) lands its
-    # artifacts at the flat output root, with no per-class subdir.
-    if annotation == "merged":
-        annotation = None
+    # that process-list row "merged" so it is not mistaken for plain tissue. The informative
+    # label is preserved verbatim here — artifact placement is decided downstream solely by
+    # hs2p.fileops.is_flattened_annotation (which flattens None/"tissue"/"merged" to the flat
+    # output root), so "merged" still lands flat without erasing its self-describing label.
     setattr(tiling_result, "annotation", annotation)
     setattr(tiling_result, "tiles_tar_path", _optional_path(row.get("tiles_tar_path")))
     setattr(tiling_result, "mask_preview_path", _optional_path(row.get("mask_preview_path")))

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: slide2vec
-Version: 4.8.0
+Version: 5.0.1
 Summary: Embedding of whole slide images with Foundation Models
 Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
 License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1
+Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
 Requires-Dist: omegaconf
 Requires-Dist: matplotlib
 Requires-Dist: numpy<2
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
 Requires-Dist: pandas; extra == "fm"
 Requires-Dist: pillow; extra == "fm"
 Requires-Dist: rich; extra == "fm"
-Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1; extra == "fm"
+Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0; extra == "fm"
 Requires-Dist: wandb; extra == "fm"
 Requires-Dist: torch<2.8,>=2.3; extra == "fm"
 Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -169,7 +169,7 @@ pipeline = Pipeline(
     preprocessing=PreprocessingConfig(
         requested_spacing_um=0.5,
         requested_tile_size_px=224,
-        tissue_threshold=0.1,
+        masks={"min_coverage": {"tissue": 0.1}},
     ),
     execution=ExecutionOptions(output_dir="outputs/demo"),
 )

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/SOURCES.txt RENAMED Viewed

@@ -83,7 +83,6 @@ slide2vec/utils/utils.py
 tests/test_architecture_runtime_split.py
 tests/test_attention_extraction.py
 tests/test_dense_extraction.py
-tests/test_dense_locality_gated.py
 tests/test_dense_regions.py
 tests/test_encoder_registry.py
 tests/test_hs2p_package_cutover.py

{slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,4 @@
-hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1
+hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
 omegaconf
 matplotlib
 numpy<2
@@ -27,7 +27,7 @@ numpy<2
 pandas
 pillow
 rich
-hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1
+hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
 wandb
 torch<2.8,>=2.3
 torchvision>=0.18.0

{slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_output_consistency.py RENAMED Viewed

@@ -101,6 +101,7 @@ def mask_path() -> Path:
     return p
+@pytest.mark.heavy
 @pytest.mark.skipif(
     not os.environ.get("HF_TOKEN"),
     reason="HF_TOKEN required for model weight download",

{slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_regression_core.py RENAMED Viewed

@@ -700,7 +700,7 @@ def test_masks_min_coverage_tissue_drives_derived_tiling_threshold():
     tiling_cfg = build_hs2p_configs(preprocessing)[0]
-    assert tiling_cfg.tissue_threshold == pytest.approx(0.37)
+    assert tiling_cfg.min_coverage["tissue"] == pytest.approx(0.37)
     assert tiling_cfg.independent_sampling is False
@@ -842,9 +842,10 @@ def test_independent_sampling_toggle_selects_selection_strategy():
     assert joint[-2] == "joint_sampling"
-def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
-    """A merged tiling row is labelled ``merged`` by hs2p, but carries no class — it must
-    collapse to the flat output root (no per-class subdir), exactly like tissue/None."""
+def test_merged_annotation_label_survives_round_trip_to_flat_root(tmp_path: Path):
+    """A merged tiling row is labelled ``merged`` by hs2p. The informative label must
+    survive the round-trip (no collapse to ``None``), yet artifacts still land at the flat
+    output root because hs2p's ``is_flattened_annotation`` flattens ``"merged"``."""
     from slide2vec.utils.tiling_io import load_tiling_result_from_row
     coordinates_meta_path = tmp_path / "slide-a.coordinates.meta.json"
@@ -868,8 +869,8 @@ def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
     finally:
         tiling_io.load_tiling_result = original
-    # Merged carries no class label; the flatten rule sends it to the flat root.
-    assert result.annotation is None
+    # The informative label survives the round-trip; it is not blanked to None.
+    assert result.annotation == "merged"
     artifact = write_tile_embeddings(
         "slide-a",
         np.arange(8, dtype=np.float32).reshape(2, 4),
@@ -877,9 +878,84 @@ def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
         output_format="npz",
         annotation=result.annotation,
     )
+    # ...but placement is decided by is_flattened_annotation, so it still lands flat.
     assert artifact.path == tmp_path / "tile_embeddings" / "slide-a.npz"
+def test_tissue_annotation_survives_round_trip_to_flat_root(tmp_path: Path):
+    """A ``"tissue"`` row keeps its informative label through the round-trip while still
+    resolving to flat-root placement via ``is_flattened_annotation``."""
+    from slide2vec.utils.tiling_io import load_tiling_result_from_row
+    coordinates_meta_path = tmp_path / "slide-t.coordinates.meta.json"
+    coordinates_meta_path.write_text("{}", encoding="utf-8")
+    def fake_load_tiling_result(**kwargs):
+        return SimpleNamespace()
+    import slide2vec.utils.tiling_io as tiling_io
+    original = tiling_io.load_tiling_result
+    tiling_io.load_tiling_result = fake_load_tiling_result
+    try:
+        result = load_tiling_result_from_row(
+            {
+                "annotation": "tissue",
+                "coordinates_npz_path": str(tmp_path / "slide-t.coordinates.npz"),
+                "coordinates_meta_path": str(coordinates_meta_path),
+            }
+        )
+    finally:
+        tiling_io.load_tiling_result = original
+    assert result.annotation == "tissue"
+    artifact = write_tile_embeddings(
+        "slide-t",
+        np.arange(8, dtype=np.float32).reshape(2, 4),
+        output_dir=tmp_path,
+        output_format="npz",
+        annotation=result.annotation,
+    )
+    assert artifact.path == tmp_path / "tile_embeddings" / "slide-t.npz"
+def test_real_class_annotation_survives_round_trip_to_per_class_subdir(tmp_path: Path):
+    """A genuine class label (e.g. ``"tumor"``) survives the round-trip and routes to its
+    own per-class subdir, since ``is_flattened_annotation`` does not flatten it."""
+    from slide2vec.utils.tiling_io import load_tiling_result_from_row
+    coordinates_meta_path = tmp_path / "slide-u.coordinates.meta.json"
+    coordinates_meta_path.write_text("{}", encoding="utf-8")
+    def fake_load_tiling_result(**kwargs):
+        return SimpleNamespace()
+    import slide2vec.utils.tiling_io as tiling_io
+    original = tiling_io.load_tiling_result
+    tiling_io.load_tiling_result = fake_load_tiling_result
+    try:
+        result = load_tiling_result_from_row(
+            {
+                "annotation": "tumor",
+                "coordinates_npz_path": str(tmp_path / "slide-u.coordinates.npz"),
+                "coordinates_meta_path": str(coordinates_meta_path),
+            }
+        )
+    finally:
+        tiling_io.load_tiling_result = original
+    assert result.annotation == "tumor"
+    artifact = write_tile_embeddings(
+        "slide-u",
+        np.arange(8, dtype=np.float32).reshape(2, 4),
+        output_dir=tmp_path,
+        output_format="npz",
+        annotation=result.annotation,
+    )
+    assert artifact.path == tmp_path / "tile_embeddings" / "tumor" / "slide-u.npz"
 def test_invalid_masks_block_with_duplicate_pixel_values_fails_fast():
     from slide2vec.runtime.tiling import build_hs2p_configs

slide2vec 4.8.0__tar.gz → 5.0.1__tar.gz

slide2vec 4.8.0tar.gz → 5.0.1tar.gz