slide2vec 4.8.0__tar.gz → 5.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-4.8.0 → slide2vec-5.0.1}/PKG-INFO +4 -4
- {slide2vec-4.8.0 → slide2vec-5.0.1}/README.md +1 -1
- {slide2vec-4.8.0 → slide2vec-5.0.1}/pyproject.toml +7 -4
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/__init__.py +1 -1
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/api.py +93 -5
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/hibou.py +9 -2
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/midnight.py +12 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/virchow.py +2 -8
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/artifacts_collect.py +4 -5
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/distributed.py +4 -4
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/embedding_persist.py +1 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/persistence.py +5 -4
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/tiling.py +4 -3
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/tiling_io.py +4 -5
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/PKG-INFO +4 -4
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/SOURCES.txt +0 -1
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/requires.txt +2 -2
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_output_consistency.py +1 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_regression_core.py +82 -6
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_regression_inference.py +31 -5
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_regression_models.py +182 -114
- slide2vec-4.8.0/tests/test_dense_locality_gated.py +0 -162
- {slide2vec-4.8.0 → slide2vec-5.0.1}/LICENSE +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/setup.cfg +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/__main__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/artifacts.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/cli.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/configs/default.yaml +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/configs/resources.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/data/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/data/dataset.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/data/tile_reader.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/distributed/direct_embed_worker.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/distributed/pipeline_worker.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/base.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/conch.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/gigapath.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/lunit.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/blocks.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/case.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/loading.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/slide.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/types.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/musk.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/registry.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/validation.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/inference.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/progress.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/batching.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/cpu_budget.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/dense_regions.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/distributed_stage.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/embedding.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/embedding_pipeline.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/hierarchical.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/manifest.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/model_settings.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/patient_pipeline.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/persist_callbacks.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/process_list.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/progress_bridge.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/registry.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/serialization.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/slide_encode.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/tiling_pipeline.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/types.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/worker_io.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/config.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/utils.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/entry_points.txt +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_architecture_runtime_split.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_attention_extraction.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_dense_extraction.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_dense_regions.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_encoder_registry.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_hs2p_package_cutover.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_progress.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_runtime_batching.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_tile_store.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_tiling_pipeline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.1
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
18
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
|
|
19
19
|
Requires-Dist: omegaconf
|
|
20
20
|
Requires-Dist: matplotlib
|
|
21
21
|
Requires-Dist: numpy<2
|
|
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
|
|
|
65
65
|
Requires-Dist: pandas; extra == "fm"
|
|
66
66
|
Requires-Dist: pillow; extra == "fm"
|
|
67
67
|
Requires-Dist: rich; extra == "fm"
|
|
68
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
68
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0; extra == "fm"
|
|
69
69
|
Requires-Dist: wandb; extra == "fm"
|
|
70
70
|
Requires-Dist: torch<2.8,>=2.3; extra == "fm"
|
|
71
71
|
Requires-Dist: torchvision>=0.18.0; extra == "fm"
|
|
@@ -169,7 +169,7 @@ pipeline = Pipeline(
|
|
|
169
169
|
preprocessing=PreprocessingConfig(
|
|
170
170
|
requested_spacing_um=0.5,
|
|
171
171
|
requested_tile_size_px=224,
|
|
172
|
-
|
|
172
|
+
masks={"min_coverage": {"tissue": 0.1}},
|
|
173
173
|
),
|
|
174
174
|
execution=ExecutionOptions(output_dir="outputs/demo"),
|
|
175
175
|
)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "
|
|
7
|
+
version = "5.0.1"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -21,7 +21,7 @@ classifiers = [
|
|
|
21
21
|
"Programming Language :: Python :: 3.13",
|
|
22
22
|
]
|
|
23
23
|
dependencies = [
|
|
24
|
-
"hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
24
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
|
|
25
25
|
"omegaconf",
|
|
26
26
|
"matplotlib",
|
|
27
27
|
"numpy<2",
|
|
@@ -88,7 +88,7 @@ fm = [
|
|
|
88
88
|
"pandas",
|
|
89
89
|
"pillow",
|
|
90
90
|
"rich",
|
|
91
|
-
"hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
91
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
|
|
92
92
|
"wandb",
|
|
93
93
|
"torch>=2.3,<2.8",
|
|
94
94
|
"torchvision>=0.18.0",
|
|
@@ -145,6 +145,9 @@ addopts = "--cov=slide2vec"
|
|
|
145
145
|
testpaths = [
|
|
146
146
|
"tests",
|
|
147
147
|
]
|
|
148
|
+
markers = [
|
|
149
|
+
"heavy: real-weight foundation-model inference on CPU; minutes per test. Excluded from the PR suite via `-m 'not heavy'`; run on the scheduled/manual heavy workflow (.github/workflows/nightly-heavy.yaml).",
|
|
150
|
+
]
|
|
148
151
|
|
|
149
152
|
[tool.mypy]
|
|
150
153
|
mypy_path = "."
|
|
@@ -164,7 +167,7 @@ no_implicit_reexport = true
|
|
|
164
167
|
max-line-length = 160
|
|
165
168
|
|
|
166
169
|
[tool.bumpver]
|
|
167
|
-
current_version = "
|
|
170
|
+
current_version = "5.0.1"
|
|
168
171
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
169
172
|
commit = false # We do version bumping in CI, not as a commit
|
|
170
173
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -351,6 +351,11 @@ class EmbeddedSlide:
|
|
|
351
351
|
image_path: Path
|
|
352
352
|
#: Path to the tissue mask used for tiling, if any.
|
|
353
353
|
mask_path: Path | None = None
|
|
354
|
+
#: Annotation class this bag of tiles was sampled for. ``"tissue"`` for the
|
|
355
|
+
#: default tissue-only path, ``"merged"`` for the union output mode, or the
|
|
356
|
+
#: class name (e.g. ``"tumor"``) when annotation-aware sampling fans a slide
|
|
357
|
+
#: out into one bag per class. See the annotation-aware sampling documentation.
|
|
358
|
+
annotation: str | None = None
|
|
354
359
|
#: Number of tiles extracted from the slide.
|
|
355
360
|
num_tiles: int | None = None
|
|
356
361
|
#: Path to the mask preview image, if generated.
|
|
@@ -442,12 +447,13 @@ class Model:
|
|
|
442
447
|
self,
|
|
443
448
|
slide: SlideInput,
|
|
444
449
|
*,
|
|
450
|
+
annotation: str | list[str] | None = None,
|
|
445
451
|
preprocessing: PreprocessingConfig | None = None,
|
|
446
452
|
execution: ExecutionOptions | None = None,
|
|
447
453
|
sample_id: str | None = None,
|
|
448
454
|
mask_path: PathLike | None = None,
|
|
449
455
|
spacing_at_level_0: float | None = None,
|
|
450
|
-
) -> EmbeddedSlide:
|
|
456
|
+
) -> EmbeddedSlide | list[EmbeddedSlide]:
|
|
451
457
|
if isinstance(slide, (str, Path)):
|
|
452
458
|
slide = {
|
|
453
459
|
"sample_id": sample_id or Path(slide).stem,
|
|
@@ -459,31 +465,42 @@ class Model:
|
|
|
459
465
|
raise ValueError(
|
|
460
466
|
"sample_id, mask_path, and spacing_at_level_0 overrides are only supported when slide is a path-like input"
|
|
461
467
|
)
|
|
462
|
-
|
|
468
|
+
requested = None if isinstance(annotation, str) else annotation
|
|
469
|
+
grouped = self.embed_slides(
|
|
463
470
|
[slide],
|
|
471
|
+
annotations=requested,
|
|
464
472
|
preprocessing=preprocessing,
|
|
465
473
|
execution=execution,
|
|
466
|
-
)
|
|
474
|
+
)
|
|
475
|
+
# Single slide in → at most one outer key out. Flatten to the inner
|
|
476
|
+
# {label: EmbeddedSlide} mapping (empty when the run produced nothing).
|
|
477
|
+
bags: dict[str, EmbeddedSlide] = {}
|
|
478
|
+
for inner in grouped.values():
|
|
479
|
+
bags = inner
|
|
480
|
+
break
|
|
481
|
+
return _select_embedded_bag(bags, annotation)
|
|
467
482
|
|
|
468
483
|
def embed_slides(
|
|
469
484
|
self,
|
|
470
485
|
slides: SlideSequence,
|
|
471
486
|
*,
|
|
487
|
+
annotations: list[str] | None = None,
|
|
472
488
|
preprocessing: PreprocessingConfig | None = None,
|
|
473
489
|
execution: ExecutionOptions | None = None,
|
|
474
|
-
) ->
|
|
490
|
+
) -> dict[str, dict[str, EmbeddedSlide]]:
|
|
475
491
|
from slide2vec.inference import embed_slides
|
|
476
492
|
|
|
477
493
|
resolved = _coerce_execution_options(execution, model=self)
|
|
478
494
|
resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
|
|
479
495
|
with _auto_progress_reporting(output_dir=resolved.output_dir):
|
|
480
496
|
_validate_model_config(self, resolved_preprocessing, resolved)
|
|
481
|
-
|
|
497
|
+
embedded = embed_slides(
|
|
482
498
|
self,
|
|
483
499
|
slides,
|
|
484
500
|
preprocessing=resolved_preprocessing,
|
|
485
501
|
execution=resolved,
|
|
486
502
|
)
|
|
503
|
+
return _group_embedded_slides(embedded, annotations=annotations)
|
|
487
504
|
|
|
488
505
|
def embed_patient(
|
|
489
506
|
self,
|
|
@@ -650,6 +667,77 @@ class Pipeline:
|
|
|
650
667
|
)
|
|
651
668
|
|
|
652
669
|
|
|
670
|
+
def _select_embedded_bag(
|
|
671
|
+
bags: Mapping[str, EmbeddedSlide],
|
|
672
|
+
annotation: str | list[str] | None,
|
|
673
|
+
) -> EmbeddedSlide | list[EmbeddedSlide]:
|
|
674
|
+
"""Select per-class bag(s) from a single slide's ``{label: EmbeddedSlide}`` map.
|
|
675
|
+
|
|
676
|
+
numpy-style shape-in/shape-out:
|
|
677
|
+
|
|
678
|
+
- a single class string returns one :class:`EmbeddedSlide`;
|
|
679
|
+
- a list of class strings returns a list in the requested order;
|
|
680
|
+
- ``None`` returns the single bag when the run produced exactly one,
|
|
681
|
+
otherwise raises naming the available bags and directing to
|
|
682
|
+
:meth:`Model.embed_slides`.
|
|
683
|
+
|
|
684
|
+
Requesting a class the run did not produce raises naming what is available.
|
|
685
|
+
"""
|
|
686
|
+
available = sorted(bags)
|
|
687
|
+
if isinstance(annotation, str):
|
|
688
|
+
if annotation not in bags:
|
|
689
|
+
raise ValueError(
|
|
690
|
+
f"embed_slide() found no '{annotation}' annotation bag for this "
|
|
691
|
+
f"slide; available bags: {available}."
|
|
692
|
+
)
|
|
693
|
+
return bags[annotation]
|
|
694
|
+
if annotation is not None:
|
|
695
|
+
selected: list[EmbeddedSlide] = []
|
|
696
|
+
for label in annotation:
|
|
697
|
+
if label not in bags:
|
|
698
|
+
raise ValueError(
|
|
699
|
+
f"embed_slide() found no '{label}' annotation bag for this "
|
|
700
|
+
f"slide; available bags: {available}."
|
|
701
|
+
)
|
|
702
|
+
selected.append(bags[label])
|
|
703
|
+
return selected
|
|
704
|
+
if len(bags) == 1:
|
|
705
|
+
return next(iter(bags.values()))
|
|
706
|
+
raise ValueError(
|
|
707
|
+
f"embed_slide() received {len(bags)} annotation bags for this slide "
|
|
708
|
+
f"({available}); annotation-aware sampling produces one bag per class. "
|
|
709
|
+
"Pass annotation=... to select a class, or use Model.embed_slides(...) "
|
|
710
|
+
"to receive every per-class EmbeddedSlide (each carries its .annotation)."
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _group_embedded_slides(
|
|
715
|
+
embedded: Sequence[EmbeddedSlide],
|
|
716
|
+
*,
|
|
717
|
+
annotations: list[str] | None = None,
|
|
718
|
+
) -> dict[str, dict[str, EmbeddedSlide]]:
|
|
719
|
+
"""Group flat per-row :class:`EmbeddedSlide` results into a nested mapping.
|
|
720
|
+
|
|
721
|
+
The outer key is ``sample_id``; the inner key is the bag's informative
|
|
722
|
+
annotation label (``"tissue"``/``"merged"``/class name), never ``None``.
|
|
723
|
+
A bag whose ``.annotation`` is ``None`` (defensive — post-#173 real runs
|
|
724
|
+
always carry a label) does not produce a ``None`` key.
|
|
725
|
+
|
|
726
|
+
When *annotations* is given, the inner keys are restricted to the named
|
|
727
|
+
classes (in encounter order).
|
|
728
|
+
"""
|
|
729
|
+
requested = None if annotations is None else set(annotations)
|
|
730
|
+
grouped: dict[str, dict[str, EmbeddedSlide]] = {}
|
|
731
|
+
for bag in embedded:
|
|
732
|
+
label = bag.annotation
|
|
733
|
+
if label is None:
|
|
734
|
+
continue
|
|
735
|
+
if requested is not None and label not in requested:
|
|
736
|
+
continue
|
|
737
|
+
grouped.setdefault(bag.sample_id, {})[label] = bag
|
|
738
|
+
return grouped
|
|
739
|
+
|
|
740
|
+
|
|
653
741
|
def _coerce_execution_options(
|
|
654
742
|
options: ExecutionOptions | None,
|
|
655
743
|
*,
|
|
@@ -54,6 +54,13 @@ class _HibouBase(TileEncoder):
|
|
|
54
54
|
v2.Normalize(mean=_HIBOU_MEAN, std=_HIBOU_STD),
|
|
55
55
|
])
|
|
56
56
|
|
|
57
|
+
@property
|
|
58
|
+
def _num_prefix_tokens(self) -> int:
|
|
59
|
+
# CLS + register tokens. Dinov2-with-registers carries the register tokens
|
|
60
|
+
# between the CLS and patch tokens, so both the dense and attention paths
|
|
61
|
+
# must strip them; deriving the count from config keeps the two in sync.
|
|
62
|
+
return 1 + int(getattr(self._model.config, "num_register_tokens", 0))
|
|
63
|
+
|
|
57
64
|
def encode_tiles(self, batch: Tensor) -> Tensor:
|
|
58
65
|
output = self._model(pixel_values=batch)
|
|
59
66
|
return output.pooler_output
|
|
@@ -77,7 +84,7 @@ class _HibouBase(TileEncoder):
|
|
|
77
84
|
output.last_hidden_state,
|
|
78
85
|
grid_h=height // patch,
|
|
79
86
|
grid_w=width // patch,
|
|
80
|
-
num_prefix_tokens=
|
|
87
|
+
num_prefix_tokens=self._num_prefix_tokens,
|
|
81
88
|
encoder_name=type(self).__name__,
|
|
82
89
|
)
|
|
83
90
|
|
|
@@ -111,7 +118,7 @@ class _HibouBase(TileEncoder):
|
|
|
111
118
|
output = self._model(pixel_values=batch, output_attentions=True)
|
|
112
119
|
return attentions_tuple_to_grids(
|
|
113
120
|
output.attentions,
|
|
114
|
-
num_prefix_tokens=
|
|
121
|
+
num_prefix_tokens=self._num_prefix_tokens,
|
|
115
122
|
blocks=blocks,
|
|
116
123
|
include_registers=include_registers,
|
|
117
124
|
grid_h=height // patch,
|
|
@@ -36,6 +36,18 @@ class Midnight(TileEncoder):
|
|
|
36
36
|
self._model = AutoModel.from_pretrained("kaiko-ai/midnight").eval()
|
|
37
37
|
self._device = preferred_default_device()
|
|
38
38
|
self._output_variant = resolve_requested_output_variant(output_variant)
|
|
39
|
+
# The pooled, dense, and attention paths all assume a single CLS prefix
|
|
40
|
+
# token (kaiko's reference recipe pools over output[:, 1:]). If a future
|
|
41
|
+
# checkpoint adds register tokens, that assumption silently folds them into
|
|
42
|
+
# the patch mean and mislabels the dense/attention grids — fail loudly here.
|
|
43
|
+
num_register_tokens = int(getattr(self._model.config, "num_register_tokens", 0))
|
|
44
|
+
if num_register_tokens:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
"Midnight encoder assumes a single CLS prefix token, but the loaded "
|
|
47
|
+
f"checkpoint reports num_register_tokens={num_register_tokens}. Update "
|
|
48
|
+
"the pooled/dense/attention paths to strip the register tokens before "
|
|
49
|
+
"using this checkpoint."
|
|
50
|
+
)
|
|
39
51
|
|
|
40
52
|
def get_transform(self) -> Callable:
|
|
41
53
|
return v2.Compose([
|
|
@@ -16,8 +16,6 @@ _VIRCHOW_OUTPUT_DIMS = {
|
|
|
16
16
|
class _VirchowBase(TimmTileEncoder):
|
|
17
17
|
"""Base for Virchow models that concat CLS + mean-pooled patch tokens."""
|
|
18
18
|
|
|
19
|
-
_num_prefix_tokens: int = 1 # Override in subclass if needed
|
|
20
|
-
|
|
21
19
|
def __init__(self, model_name: str, *, output_variant: str | None = None):
|
|
22
20
|
self._output_variant = resolve_requested_output_variant(
|
|
23
21
|
output_variant,
|
|
@@ -36,7 +34,7 @@ class _VirchowBase(TimmTileEncoder):
|
|
|
36
34
|
cls_token = output[:, 0]
|
|
37
35
|
if self._output_variant == "cls":
|
|
38
36
|
return cls_token
|
|
39
|
-
patch_tokens = output[:, self.
|
|
37
|
+
patch_tokens = output[:, self._model.num_prefix_tokens:]
|
|
40
38
|
return torch.cat([cls_token, patch_tokens.mean(dim=1)], dim=-1)
|
|
41
39
|
|
|
42
40
|
@property
|
|
@@ -57,8 +55,6 @@ class _VirchowBase(TimmTileEncoder):
|
|
|
57
55
|
source="paige-ai/Virchow",
|
|
58
56
|
)
|
|
59
57
|
class Virchow(_VirchowBase):
|
|
60
|
-
_num_prefix_tokens = 1
|
|
61
|
-
|
|
62
58
|
def __init__(self, *, output_variant: str | None = None):
|
|
63
59
|
super().__init__("hf-hub:paige-ai/Virchow", output_variant=output_variant)
|
|
64
60
|
|
|
@@ -71,12 +67,10 @@ class Virchow(_VirchowBase):
|
|
|
71
67
|
},
|
|
72
68
|
default_output_variant="cls_patch_mean",
|
|
73
69
|
input_size=224,
|
|
74
|
-
supported_spacing_um=[0.5, 1.0, 2.0],
|
|
70
|
+
supported_spacing_um=[0.25, 0.5, 1.0, 2.0],
|
|
75
71
|
precision="fp16",
|
|
76
72
|
source="paige-ai/Virchow2",
|
|
77
73
|
)
|
|
78
74
|
class Virchow2(_VirchowBase):
|
|
79
|
-
_num_prefix_tokens = 5 # 1 CLS + 4 register tokens
|
|
80
|
-
|
|
81
75
|
def __init__(self, *, output_variant: str | None = None):
|
|
82
76
|
super().__init__("hf-hub:paige-ai/Virchow2", output_variant=output_variant)
|
|
@@ -101,15 +101,14 @@ def _normalized_row_annotation(annotation) -> str | None:
|
|
|
101
101
|
"""Collapse a process-list ``annotation`` cell to the per-class key (``None`` for the flat path).
|
|
102
102
|
|
|
103
103
|
Mirrors the in-memory single-GPU path: ``None``/NaN and hs2p's flat-layout sentinels
|
|
104
|
-
(:func:`hs2p.fileops.is_flattened_annotation
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
keys those rows to the flat embedding path with no per-class subdir.
|
|
104
|
+
(:func:`hs2p.fileops.is_flattened_annotation` — the single source of truth, which flattens
|
|
105
|
+
``None``/``"tissue"``/``"merged"``) land flat — so the distributed reconcile keys those rows
|
|
106
|
+
to the flat embedding path with no per-class subdir.
|
|
108
107
|
"""
|
|
109
108
|
if annotation is None or (isinstance(annotation, float) and pd.isna(annotation)):
|
|
110
109
|
return None
|
|
111
110
|
annotation = str(annotation)
|
|
112
|
-
if
|
|
111
|
+
if is_flattened_annotation(annotation):
|
|
113
112
|
return None
|
|
114
113
|
return annotation
|
|
115
114
|
|
|
@@ -32,15 +32,15 @@ def normalize_work_unit_annotation(annotation: str | None) -> str | None:
|
|
|
32
32
|
"""Collapse flat-layout annotations to ``None`` so flat units key by bare ``sample_id``.
|
|
33
33
|
|
|
34
34
|
Mirrors the in-memory single-GPU path and the distributed reconcile
|
|
35
|
-
(:func:`slide2vec.runtime.artifacts_collect._normalized_row_annotation`):
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
(:func:`slide2vec.runtime.artifacts_collect._normalized_row_annotation`): hs2p's flat-layout
|
|
36
|
+
sentinels (:func:`hs2p.fileops.is_flattened_annotation`, the single source of truth — it
|
|
37
|
+
flattens ``None``/``"tissue"``/``"merged"``) all collapse to ``None``. Only genuine per-class
|
|
38
38
|
annotations survive as a composite key.
|
|
39
39
|
"""
|
|
40
40
|
if annotation is None:
|
|
41
41
|
return None
|
|
42
42
|
annotation = str(annotation)
|
|
43
|
-
if
|
|
43
|
+
if is_flattened_annotation(annotation):
|
|
44
44
|
return None
|
|
45
45
|
return annotation
|
|
46
46
|
|
|
@@ -56,6 +56,7 @@ def make_embedded_slide(
|
|
|
56
56
|
tile_size_lv0=int(tiling_result.tile_size_lv0),
|
|
57
57
|
image_path=slide.image_path,
|
|
58
58
|
mask_path=slide.mask_path,
|
|
59
|
+
annotation=tiling_result_annotation(tiling_result),
|
|
59
60
|
num_tiles=int(n_tiles) if n_tiles is not None else len(x_values),
|
|
60
61
|
mask_preview_path=Path(mask_preview_path) if mask_preview_path is not None else None,
|
|
61
62
|
tiling_preview_path=Path(tiling_preview_path) if tiling_preview_path is not None else None,
|
|
@@ -265,14 +265,15 @@ def _normalized_annotation(annotation: Any) -> str | None:
|
|
|
265
265
|
|
|
266
266
|
Keying the per-class feature-path map on this normalized value lets the flat tissue-only
|
|
267
267
|
path and a real class share one matching rule without the sentinel leaking into lookups.
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
268
|
+
Flattening is decided solely by :func:`hs2p.fileops.is_flattened_annotation` (the single
|
|
269
|
+
source of truth), which flattens ``None``/``"tissue"``/``"merged"`` to the flat root, so
|
|
270
|
+
``"merged"`` (hs2p's merged output-mode label, which carries no class) resolves to the flat
|
|
271
|
+
embedding path rather than being left unmatched.
|
|
271
272
|
"""
|
|
272
273
|
if annotation is None or (isinstance(annotation, float) and pd.isna(annotation)):
|
|
273
274
|
return None
|
|
274
275
|
annotation = str(annotation)
|
|
275
|
-
if
|
|
276
|
+
if is_flattened_annotation(annotation):
|
|
276
277
|
return None
|
|
277
278
|
return annotation
|
|
278
279
|
|
|
@@ -46,9 +46,10 @@ def build_hs2p_configs(
|
|
|
46
46
|
if is_hierarchical_preprocessing(preprocessing)
|
|
47
47
|
else preprocessing.requested_tile_size_px
|
|
48
48
|
)
|
|
49
|
-
# Reuse hs2p's tiling-config resolver so the
|
|
50
|
-
# masks.min_coverage
|
|
51
|
-
# is threaded consistently. The resolver reads
|
|
49
|
+
# Reuse hs2p's tiling-config resolver so the resolved min_coverage map comes from
|
|
50
|
+
# masks.min_coverage (the single source of truth; min_coverage["tissue"] is the tissue
|
|
51
|
+
# threshold) and independent_sampling is threaded consistently. The resolver reads
|
|
52
|
+
# attributes, so wrap the masks dict.
|
|
52
53
|
tiling_adapter = SimpleNamespace(
|
|
53
54
|
tiling=SimpleNamespace(
|
|
54
55
|
masks=SimpleNamespace(**dict(preprocessing.masks)),
|
|
@@ -244,11 +244,10 @@ def load_tiling_result_from_row(row):
|
|
|
244
244
|
annotation = annotation if annotation is None else str(annotation)
|
|
245
245
|
# The merged output mode (hs2p's CoordinateOutputMode.MERGED) emits a single per-slide
|
|
246
246
|
# coordinate set over the union of tiles passing any active class threshold. hs2p labels
|
|
247
|
-
# that process-list row "merged" so it is not mistaken for plain tissue
|
|
248
|
-
#
|
|
249
|
-
#
|
|
250
|
-
|
|
251
|
-
annotation = None
|
|
247
|
+
# that process-list row "merged" so it is not mistaken for plain tissue. The informative
|
|
248
|
+
# label is preserved verbatim here — artifact placement is decided downstream solely by
|
|
249
|
+
# hs2p.fileops.is_flattened_annotation (which flattens None/"tissue"/"merged" to the flat
|
|
250
|
+
# output root), so "merged" still lands flat without erasing its self-describing label.
|
|
252
251
|
setattr(tiling_result, "annotation", annotation)
|
|
253
252
|
setattr(tiling_result, "tiles_tar_path", _optional_path(row.get("tiles_tar_path")))
|
|
254
253
|
setattr(tiling_result, "mask_preview_path", _optional_path(row.get("mask_preview_path")))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.1
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
18
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
|
|
19
19
|
Requires-Dist: omegaconf
|
|
20
20
|
Requires-Dist: matplotlib
|
|
21
21
|
Requires-Dist: numpy<2
|
|
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
|
|
|
65
65
|
Requires-Dist: pandas; extra == "fm"
|
|
66
66
|
Requires-Dist: pillow; extra == "fm"
|
|
67
67
|
Requires-Dist: rich; extra == "fm"
|
|
68
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
68
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0; extra == "fm"
|
|
69
69
|
Requires-Dist: wandb; extra == "fm"
|
|
70
70
|
Requires-Dist: torch<2.8,>=2.3; extra == "fm"
|
|
71
71
|
Requires-Dist: torchvision>=0.18.0; extra == "fm"
|
|
@@ -169,7 +169,7 @@ pipeline = Pipeline(
|
|
|
169
169
|
preprocessing=PreprocessingConfig(
|
|
170
170
|
requested_spacing_um=0.5,
|
|
171
171
|
requested_tile_size_px=224,
|
|
172
|
-
|
|
172
|
+
masks={"min_coverage": {"tissue": 0.1}},
|
|
173
173
|
),
|
|
174
174
|
execution=ExecutionOptions(output_dir="outputs/demo"),
|
|
175
175
|
)
|
|
@@ -83,7 +83,6 @@ slide2vec/utils/utils.py
|
|
|
83
83
|
tests/test_architecture_runtime_split.py
|
|
84
84
|
tests/test_attention_extraction.py
|
|
85
85
|
tests/test_dense_extraction.py
|
|
86
|
-
tests/test_dense_locality_gated.py
|
|
87
86
|
tests/test_dense_regions.py
|
|
88
87
|
tests/test_encoder_registry.py
|
|
89
88
|
tests/test_hs2p_package_cutover.py
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
1
|
+
hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
|
|
2
2
|
omegaconf
|
|
3
3
|
matplotlib
|
|
4
4
|
numpy<2
|
|
@@ -27,7 +27,7 @@ numpy<2
|
|
|
27
27
|
pandas
|
|
28
28
|
pillow
|
|
29
29
|
rich
|
|
30
|
-
hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
30
|
+
hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
|
|
31
31
|
wandb
|
|
32
32
|
torch<2.8,>=2.3
|
|
33
33
|
torchvision>=0.18.0
|
|
@@ -700,7 +700,7 @@ def test_masks_min_coverage_tissue_drives_derived_tiling_threshold():
|
|
|
700
700
|
|
|
701
701
|
tiling_cfg = build_hs2p_configs(preprocessing)[0]
|
|
702
702
|
|
|
703
|
-
assert tiling_cfg.
|
|
703
|
+
assert tiling_cfg.min_coverage["tissue"] == pytest.approx(0.37)
|
|
704
704
|
assert tiling_cfg.independent_sampling is False
|
|
705
705
|
|
|
706
706
|
|
|
@@ -842,9 +842,10 @@ def test_independent_sampling_toggle_selects_selection_strategy():
|
|
|
842
842
|
assert joint[-2] == "joint_sampling"
|
|
843
843
|
|
|
844
844
|
|
|
845
|
-
def
|
|
846
|
-
"""A merged tiling row is labelled ``merged`` by hs2p
|
|
847
|
-
|
|
845
|
+
def test_merged_annotation_label_survives_round_trip_to_flat_root(tmp_path: Path):
|
|
846
|
+
"""A merged tiling row is labelled ``merged`` by hs2p. The informative label must
|
|
847
|
+
survive the round-trip (no collapse to ``None``), yet artifacts still land at the flat
|
|
848
|
+
output root because hs2p's ``is_flattened_annotation`` flattens ``"merged"``."""
|
|
848
849
|
from slide2vec.utils.tiling_io import load_tiling_result_from_row
|
|
849
850
|
|
|
850
851
|
coordinates_meta_path = tmp_path / "slide-a.coordinates.meta.json"
|
|
@@ -868,8 +869,8 @@ def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
|
|
|
868
869
|
finally:
|
|
869
870
|
tiling_io.load_tiling_result = original
|
|
870
871
|
|
|
871
|
-
#
|
|
872
|
-
assert result.annotation
|
|
872
|
+
# The informative label survives the round-trip; it is not blanked to None.
|
|
873
|
+
assert result.annotation == "merged"
|
|
873
874
|
artifact = write_tile_embeddings(
|
|
874
875
|
"slide-a",
|
|
875
876
|
np.arange(8, dtype=np.float32).reshape(2, 4),
|
|
@@ -877,9 +878,84 @@ def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
|
|
|
877
878
|
output_format="npz",
|
|
878
879
|
annotation=result.annotation,
|
|
879
880
|
)
|
|
881
|
+
# ...but placement is decided by is_flattened_annotation, so it still lands flat.
|
|
880
882
|
assert artifact.path == tmp_path / "tile_embeddings" / "slide-a.npz"
|
|
881
883
|
|
|
882
884
|
|
|
885
|
+
def test_tissue_annotation_survives_round_trip_to_flat_root(tmp_path: Path):
|
|
886
|
+
"""A ``"tissue"`` row keeps its informative label through the round-trip while still
|
|
887
|
+
resolving to flat-root placement via ``is_flattened_annotation``."""
|
|
888
|
+
from slide2vec.utils.tiling_io import load_tiling_result_from_row
|
|
889
|
+
|
|
890
|
+
coordinates_meta_path = tmp_path / "slide-t.coordinates.meta.json"
|
|
891
|
+
coordinates_meta_path.write_text("{}", encoding="utf-8")
|
|
892
|
+
|
|
893
|
+
def fake_load_tiling_result(**kwargs):
|
|
894
|
+
return SimpleNamespace()
|
|
895
|
+
|
|
896
|
+
import slide2vec.utils.tiling_io as tiling_io
|
|
897
|
+
|
|
898
|
+
original = tiling_io.load_tiling_result
|
|
899
|
+
tiling_io.load_tiling_result = fake_load_tiling_result
|
|
900
|
+
try:
|
|
901
|
+
result = load_tiling_result_from_row(
|
|
902
|
+
{
|
|
903
|
+
"annotation": "tissue",
|
|
904
|
+
"coordinates_npz_path": str(tmp_path / "slide-t.coordinates.npz"),
|
|
905
|
+
"coordinates_meta_path": str(coordinates_meta_path),
|
|
906
|
+
}
|
|
907
|
+
)
|
|
908
|
+
finally:
|
|
909
|
+
tiling_io.load_tiling_result = original
|
|
910
|
+
|
|
911
|
+
assert result.annotation == "tissue"
|
|
912
|
+
artifact = write_tile_embeddings(
|
|
913
|
+
"slide-t",
|
|
914
|
+
np.arange(8, dtype=np.float32).reshape(2, 4),
|
|
915
|
+
output_dir=tmp_path,
|
|
916
|
+
output_format="npz",
|
|
917
|
+
annotation=result.annotation,
|
|
918
|
+
)
|
|
919
|
+
assert artifact.path == tmp_path / "tile_embeddings" / "slide-t.npz"
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
def test_real_class_annotation_survives_round_trip_to_per_class_subdir(tmp_path: Path):
|
|
923
|
+
"""A genuine class label (e.g. ``"tumor"``) survives the round-trip and routes to its
|
|
924
|
+
own per-class subdir, since ``is_flattened_annotation`` does not flatten it."""
|
|
925
|
+
from slide2vec.utils.tiling_io import load_tiling_result_from_row
|
|
926
|
+
|
|
927
|
+
coordinates_meta_path = tmp_path / "slide-u.coordinates.meta.json"
|
|
928
|
+
coordinates_meta_path.write_text("{}", encoding="utf-8")
|
|
929
|
+
|
|
930
|
+
def fake_load_tiling_result(**kwargs):
|
|
931
|
+
return SimpleNamespace()
|
|
932
|
+
|
|
933
|
+
import slide2vec.utils.tiling_io as tiling_io
|
|
934
|
+
|
|
935
|
+
original = tiling_io.load_tiling_result
|
|
936
|
+
tiling_io.load_tiling_result = fake_load_tiling_result
|
|
937
|
+
try:
|
|
938
|
+
result = load_tiling_result_from_row(
|
|
939
|
+
{
|
|
940
|
+
"annotation": "tumor",
|
|
941
|
+
"coordinates_npz_path": str(tmp_path / "slide-u.coordinates.npz"),
|
|
942
|
+
"coordinates_meta_path": str(coordinates_meta_path),
|
|
943
|
+
}
|
|
944
|
+
)
|
|
945
|
+
finally:
|
|
946
|
+
tiling_io.load_tiling_result = original
|
|
947
|
+
|
|
948
|
+
assert result.annotation == "tumor"
|
|
949
|
+
artifact = write_tile_embeddings(
|
|
950
|
+
"slide-u",
|
|
951
|
+
np.arange(8, dtype=np.float32).reshape(2, 4),
|
|
952
|
+
output_dir=tmp_path,
|
|
953
|
+
output_format="npz",
|
|
954
|
+
annotation=result.annotation,
|
|
955
|
+
)
|
|
956
|
+
assert artifact.path == tmp_path / "tile_embeddings" / "tumor" / "slide-u.npz"
|
|
957
|
+
|
|
958
|
+
|
|
883
959
|
def test_invalid_masks_block_with_duplicate_pixel_values_fails_fast():
|
|
884
960
|
from slide2vec.runtime.tiling import build_hs2p_configs
|
|
885
961
|
|