slide2vec 4.1.1__tar.gz → 4.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-4.1.1 → slide2vec-4.2.0}/PKG-INFO +6 -4
- {slide2vec-4.1.1 → slide2vec-4.2.0}/README.md +1 -1
- {slide2vec-4.1.1 → slide2vec-4.2.0}/pyproject.toml +8 -5
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/__init__.py +1 -1
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/api.py +87 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/artifacts.py +53 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/cli.py +12 -9
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/configs/default.yaml +1 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/__init__.py +2 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/base.py +27 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/__init__.py +4 -0
- slide2vec-4.2.0/slide2vec/encoders/models/lunit.py +21 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/__init__.py +114 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/blocks.py +272 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/case.py +91 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/loading.py +103 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/slide.py +152 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/types.py +13 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/registry.py +6 -5
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/inference.py +396 -17
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/progress.py +12 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/config.py +10 -5
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/tiling_io.py +16 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/PKG-INFO +6 -4
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/SOURCES.txt +7 -0
- slide2vec-4.2.0/slide2vec.egg-info/entry_points.txt +2 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/requires.txt +5 -2
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_output_consistency.py +3 -2
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_progress.py +40 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_regression_core.py +36 -2
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_regression_inference.py +87 -11
- slide2vec-4.1.1/slide2vec.egg-info/entry_points.txt +0 -2
- {slide2vec-4.1.1 → slide2vec-4.2.0}/LICENSE +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/setup.cfg +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/__main__.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/data/__init__.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/data/dataset.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/data/tile_reader.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/distributed/pipeline_worker.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/conch.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/gigapath.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/hibou.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/midnight.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/musk.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/virchow.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/validation.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/main.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/model_settings.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/registry.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/resources.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/runtime_types.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/utils.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_batch_collator_timing.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_encoder_registry.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_hs2p_package_cutover.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_packaging_metadata.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_regression_models.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_tile_store.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.
|
|
18
|
+
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1
|
|
19
19
|
Requires-Dist: omegaconf
|
|
20
20
|
Requires-Dist: matplotlib
|
|
21
21
|
Requires-Dist: numpy<2
|
|
@@ -50,6 +50,8 @@ Requires-Dist: xformers==0.0.31; extra == "prism"
|
|
|
50
50
|
Provides-Extra: hibou
|
|
51
51
|
Requires-Dist: scipy~=1.8.1; extra == "hibou"
|
|
52
52
|
Requires-Dist: scikit-image~=0.19.3; extra == "hibou"
|
|
53
|
+
Provides-Extra: moozy
|
|
54
|
+
Requires-Dist: huggingface_hub<1.0,>=0.30.0; extra == "moozy"
|
|
53
55
|
Provides-Extra: titan
|
|
54
56
|
Requires-Dist: torch==2.0.1; extra == "titan"
|
|
55
57
|
Requires-Dist: timm==1.0.3; extra == "titan"
|
|
@@ -63,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
|
|
|
63
65
|
Requires-Dist: pandas; extra == "fm"
|
|
64
66
|
Requires-Dist: pillow; extra == "fm"
|
|
65
67
|
Requires-Dist: rich; extra == "fm"
|
|
66
|
-
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.
|
|
68
|
+
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1; extra == "fm"
|
|
67
69
|
Requires-Dist: wandb; extra == "fm"
|
|
68
70
|
Requires-Dist: torch<2.8,>=2.3; extra == "fm"
|
|
69
71
|
Requires-Dist: torchvision>=0.18.0; extra == "fm"
|
|
@@ -210,7 +212,7 @@ The CLI is a thin wrapper over the package API.
|
|
|
210
212
|
Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
|
|
211
213
|
|
|
212
214
|
```shell
|
|
213
|
-
|
|
215
|
+
slide2vec /path/to/config.yaml
|
|
214
216
|
```
|
|
215
217
|
|
|
216
218
|
By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
|
|
@@ -112,7 +112,7 @@ The CLI is a thin wrapper over the package API.
|
|
|
112
112
|
Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
|
|
113
113
|
|
|
114
114
|
```shell
|
|
115
|
-
|
|
115
|
+
slide2vec /path/to/config.yaml
|
|
116
116
|
```
|
|
117
117
|
|
|
118
118
|
By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "4.
|
|
7
|
+
version = "4.2.0"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -21,7 +21,7 @@ classifiers = [
|
|
|
21
21
|
"Programming Language :: Python :: 3.13",
|
|
22
22
|
]
|
|
23
23
|
dependencies = [
|
|
24
|
-
"hs2p[asap,cucim,openslide,vips]>=3.2.
|
|
24
|
+
"hs2p[asap,cucim,openslide,vips]>=3.2.1",
|
|
25
25
|
"omegaconf",
|
|
26
26
|
"matplotlib",
|
|
27
27
|
"numpy<2",
|
|
@@ -42,7 +42,7 @@ Homepage = "https://github.com/clemsgrs/slide2vec"
|
|
|
42
42
|
"Bug Tracker" = "https://github.com/clemsgrs/slide2vec/issues"
|
|
43
43
|
|
|
44
44
|
[project.scripts]
|
|
45
|
-
slide2vec = "slide2vec.cli:
|
|
45
|
+
slide2vec = "slide2vec.cli:entrypoint"
|
|
46
46
|
|
|
47
47
|
[project.optional-dependencies]
|
|
48
48
|
hoptimus = [
|
|
@@ -71,6 +71,9 @@ hibou = [
|
|
|
71
71
|
"scipy~=1.8.1",
|
|
72
72
|
"scikit-image~=0.19.3",
|
|
73
73
|
]
|
|
74
|
+
moozy = [
|
|
75
|
+
"huggingface_hub>=0.30.0,<1.0",
|
|
76
|
+
]
|
|
74
77
|
titan = [
|
|
75
78
|
"torch==2.0.1",
|
|
76
79
|
"timm==1.0.3",
|
|
@@ -85,7 +88,7 @@ fm = [
|
|
|
85
88
|
"pandas",
|
|
86
89
|
"pillow",
|
|
87
90
|
"rich",
|
|
88
|
-
"hs2p[asap,cucim,openslide,vips]>=3.2.
|
|
91
|
+
"hs2p[asap,cucim,openslide,vips]>=3.2.1",
|
|
89
92
|
"wandb",
|
|
90
93
|
"torch>=2.3,<2.8",
|
|
91
94
|
"torchvision>=0.18.0",
|
|
@@ -154,7 +157,7 @@ no_implicit_reexport = true
|
|
|
154
157
|
max-line-length = 160
|
|
155
158
|
|
|
156
159
|
[tool.bumpver]
|
|
157
|
-
current_version = "4.
|
|
160
|
+
current_version = "4.2.0"
|
|
158
161
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
159
162
|
commit = false # We do version bumping in CI, not as a commit
|
|
160
163
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -2,7 +2,7 @@ from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, Prep
|
|
|
2
2
|
from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
__version__ = "4.
|
|
5
|
+
__version__ = "4.2.0"
|
|
6
6
|
|
|
7
7
|
__all__ = [
|
|
8
8
|
"Model",
|
|
@@ -11,6 +11,7 @@ from hs2p import SlideSpec
|
|
|
11
11
|
|
|
12
12
|
from slide2vec.artifacts import (
|
|
13
13
|
HierarchicalEmbeddingArtifact,
|
|
14
|
+
PatientEmbeddingArtifact,
|
|
14
15
|
SlideEmbeddingArtifact,
|
|
15
16
|
TileEmbeddingArtifact,
|
|
16
17
|
)
|
|
@@ -127,6 +128,7 @@ class ExecutionOptions:
|
|
|
127
128
|
prefetch_factor: int = 4
|
|
128
129
|
persistent_workers: bool = True
|
|
129
130
|
save_tile_embeddings: bool = False
|
|
131
|
+
save_slide_embeddings: bool = False
|
|
130
132
|
save_latents: bool = False
|
|
131
133
|
|
|
132
134
|
@classmethod
|
|
@@ -151,6 +153,7 @@ class ExecutionOptions:
|
|
|
151
153
|
prefetch_factor=prefetch_factor,
|
|
152
154
|
persistent_workers=persistent_workers,
|
|
153
155
|
save_tile_embeddings=bool(cfg.model.save_tile_embeddings),
|
|
156
|
+
save_slide_embeddings=bool(cfg.model.save_slide_embeddings),
|
|
154
157
|
save_latents=bool(cfg.model.save_latents),
|
|
155
158
|
)
|
|
156
159
|
|
|
@@ -200,9 +203,17 @@ class RunResult:
|
|
|
200
203
|
tile_artifacts: list[TileEmbeddingArtifact]
|
|
201
204
|
hierarchical_artifacts: list[HierarchicalEmbeddingArtifact]
|
|
202
205
|
slide_artifacts: list[SlideEmbeddingArtifact]
|
|
206
|
+
patient_artifacts: list[PatientEmbeddingArtifact] = field(default_factory=list)
|
|
203
207
|
process_list_path: Path | None = None
|
|
204
208
|
|
|
205
209
|
|
|
210
|
+
@dataclass(frozen=True, kw_only=True)
|
|
211
|
+
class EmbeddedPatient:
|
|
212
|
+
patient_id: str
|
|
213
|
+
patient_embedding: Any # torch.Tensor [D]
|
|
214
|
+
slide_embeddings: dict[str, Any] # {sample_id: torch.Tensor [D]}
|
|
215
|
+
|
|
216
|
+
|
|
206
217
|
@dataclass(frozen=True, kw_only=True)
|
|
207
218
|
class EmbeddedSlide:
|
|
208
219
|
sample_id: str
|
|
@@ -343,6 +354,82 @@ class Model:
|
|
|
343
354
|
execution=resolved,
|
|
344
355
|
)
|
|
345
356
|
|
|
357
|
+
def embed_patient(
|
|
358
|
+
self,
|
|
359
|
+
slides: SlideSequence,
|
|
360
|
+
patient_id: str | None = None,
|
|
361
|
+
*,
|
|
362
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
363
|
+
execution: ExecutionOptions | None = None,
|
|
364
|
+
) -> "EmbeddedPatient":
|
|
365
|
+
"""Embed a single patient's slides and return one ``EmbeddedPatient``.
|
|
366
|
+
|
|
367
|
+
Convenience wrapper around :meth:`embed_patients` for the common case
|
|
368
|
+
where all *slides* belong to the same patient.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
slides: All slides for this patient.
|
|
372
|
+
patient_id: Optional patient identifier applied to every slide.
|
|
373
|
+
When omitted, ``patient_id`` is read from slide dict keys or
|
|
374
|
+
object attributes; slides that carry no ``patient_id`` fall
|
|
375
|
+
back to ``sample_id``.
|
|
376
|
+
"""
|
|
377
|
+
patient_id_map: dict | None = None
|
|
378
|
+
if patient_id is not None:
|
|
379
|
+
patient_id_map = {}
|
|
380
|
+
for s in slides:
|
|
381
|
+
if isinstance(s, (str, Path)):
|
|
382
|
+
patient_id_map[Path(s).stem] = patient_id
|
|
383
|
+
elif isinstance(s, dict):
|
|
384
|
+
patient_id_map[str(s["sample_id"])] = patient_id
|
|
385
|
+
else:
|
|
386
|
+
patient_id_map[str(s.sample_id)] = patient_id
|
|
387
|
+
return self.embed_patients(
|
|
388
|
+
slides,
|
|
389
|
+
patient_id_map=patient_id_map,
|
|
390
|
+
preprocessing=preprocessing,
|
|
391
|
+
execution=execution,
|
|
392
|
+
)[0]
|
|
393
|
+
|
|
394
|
+
def embed_patients(
|
|
395
|
+
self,
|
|
396
|
+
slides: SlideSequence,
|
|
397
|
+
patient_id_map: dict | None = None,
|
|
398
|
+
*,
|
|
399
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
400
|
+
execution: ExecutionOptions | None = None,
|
|
401
|
+
) -> "list[EmbeddedPatient]":
|
|
402
|
+
"""Embed slides and aggregate them into patient-level embeddings.
|
|
403
|
+
|
|
404
|
+
Requires a patient-level model (e.g. ``moozy``). For each patient
|
|
405
|
+
all contributing slide embeddings are aggregated by the model's
|
|
406
|
+
``encode_patient`` method.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
slides: Slides to process. Each entry may be a path, a
|
|
410
|
+
``SlideSpec``, or a dict with ``sample_id`` / ``image_path``
|
|
411
|
+
keys. When *patient_id_map* is ``None`` a ``patient_id``
|
|
412
|
+
key in each dict is used to group slides.
|
|
413
|
+
patient_id_map: Optional explicit ``{sample_id: patient_id}``
|
|
414
|
+
mapping. When provided it takes precedence over any
|
|
415
|
+
``patient_id`` key embedded in the slide dicts. When
|
|
416
|
+
omitted and the slide dicts carry no ``patient_id``, each
|
|
417
|
+
slide is treated as its own patient.
|
|
418
|
+
"""
|
|
419
|
+
from slide2vec.inference import embed_patients
|
|
420
|
+
|
|
421
|
+
resolved = _coerce_execution_options(execution, model=self)
|
|
422
|
+
resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
|
|
423
|
+
with _auto_progress_reporting(output_dir=resolved.output_dir):
|
|
424
|
+
_validate_model_config(self, resolved_preprocessing, resolved)
|
|
425
|
+
return embed_patients(
|
|
426
|
+
self,
|
|
427
|
+
slides,
|
|
428
|
+
patient_id_map=patient_id_map,
|
|
429
|
+
preprocessing=resolved_preprocessing,
|
|
430
|
+
execution=resolved,
|
|
431
|
+
)
|
|
432
|
+
|
|
346
433
|
def _load_backend(self) -> LoadedModel:
|
|
347
434
|
if self._backend is None:
|
|
348
435
|
from slide2vec.inference import load_model
|
|
@@ -35,6 +35,20 @@ class SlideEmbeddingArtifact:
|
|
|
35
35
|
return load_metadata(self.metadata_path)
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
@dataclass(frozen=True, kw_only=True)
|
|
39
|
+
class PatientEmbeddingArtifact:
|
|
40
|
+
patient_id: str
|
|
41
|
+
path: Path
|
|
42
|
+
metadata_path: Path
|
|
43
|
+
format: str
|
|
44
|
+
feature_dim: int
|
|
45
|
+
num_slides: int
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def metadata(self) -> dict[str, Any]:
|
|
49
|
+
return load_metadata(self.metadata_path)
|
|
50
|
+
|
|
51
|
+
|
|
38
52
|
@dataclass(frozen=True, kw_only=True)
|
|
39
53
|
class HierarchicalEmbeddingArtifact:
|
|
40
54
|
sample_id: str
|
|
@@ -223,6 +237,45 @@ def write_slide_embeddings(
|
|
|
223
237
|
)
|
|
224
238
|
|
|
225
239
|
|
|
240
|
+
def write_patient_embeddings(
|
|
241
|
+
patient_id: str,
|
|
242
|
+
embedding,
|
|
243
|
+
*,
|
|
244
|
+
output_dir: str | Path,
|
|
245
|
+
output_format: str = "pt",
|
|
246
|
+
metadata: dict[str, Any] | None = None,
|
|
247
|
+
num_slides: int = 0,
|
|
248
|
+
) -> PatientEmbeddingArtifact:
|
|
249
|
+
output_format = _validate_output_format(output_format)
|
|
250
|
+
artifact_path, metadata_path = _setup_artifact_paths(
|
|
251
|
+
output_dir, "patient_embeddings", patient_id, output_format
|
|
252
|
+
)
|
|
253
|
+
embedding_array = _ensure_array(embedding)
|
|
254
|
+
if output_format == "pt":
|
|
255
|
+
torch.save(_ensure_tensor(embedding), artifact_path)
|
|
256
|
+
else:
|
|
257
|
+
np.savez_compressed(artifact_path, features=embedding_array)
|
|
258
|
+
|
|
259
|
+
patient_metadata = {
|
|
260
|
+
"patient_id": patient_id,
|
|
261
|
+
"artifact_type": "patient_embeddings",
|
|
262
|
+
"format": output_format,
|
|
263
|
+
"feature_dim": int(embedding_array.shape[-1]) if embedding_array.ndim else 1,
|
|
264
|
+
"num_slides": num_slides,
|
|
265
|
+
}
|
|
266
|
+
if metadata:
|
|
267
|
+
patient_metadata.update(metadata)
|
|
268
|
+
_write_metadata(metadata_path, patient_metadata)
|
|
269
|
+
return PatientEmbeddingArtifact(
|
|
270
|
+
patient_id=patient_id,
|
|
271
|
+
path=artifact_path,
|
|
272
|
+
metadata_path=metadata_path,
|
|
273
|
+
format=output_format,
|
|
274
|
+
feature_dim=patient_metadata["feature_dim"],
|
|
275
|
+
num_slides=num_slides,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
226
279
|
def write_hierarchical_embeddings(
|
|
227
280
|
sample_id: str,
|
|
228
281
|
features,
|
|
@@ -7,20 +7,21 @@ import slide2vec.progress as progress
|
|
|
7
7
|
|
|
8
8
|
def get_args_parser(add_help: bool = True):
|
|
9
9
|
parser = argparse.ArgumentParser("slide2vec", add_help=add_help)
|
|
10
|
-
parser.add_argument("
|
|
10
|
+
parser.add_argument("config_file", metavar="CONFIG", help="path to config file")
|
|
11
11
|
parser.add_argument("--skip-datetime", action="store_true", help="skip run id datetime prefix")
|
|
12
12
|
parser.add_argument("--tiling-only", action="store_true", help="only run slide tiling")
|
|
13
13
|
parser.add_argument("--run-on-cpu", action="store_true", help="run inference on cpu")
|
|
14
14
|
parser.add_argument("--output-dir", type=str, default=None, help="output directory to save artifacts")
|
|
15
|
-
parser.add_argument(
|
|
16
|
-
"opts",
|
|
17
|
-
help='Modify config options at the end of the command using "path.key=value".',
|
|
18
|
-
default=None,
|
|
19
|
-
nargs=argparse.REMAINDER,
|
|
20
|
-
)
|
|
21
15
|
return parser
|
|
22
16
|
|
|
23
17
|
|
|
18
|
+
def parse_args(argv=None):
|
|
19
|
+
parser = get_args_parser(add_help=True)
|
|
20
|
+
args, opts = parser.parse_known_args(argv)
|
|
21
|
+
args.opts = opts
|
|
22
|
+
return args
|
|
23
|
+
|
|
24
|
+
|
|
24
25
|
def build_model_and_pipeline(args):
|
|
25
26
|
cfg, _cfg_path = setup(args)
|
|
26
27
|
hf_login()
|
|
@@ -39,8 +40,7 @@ def build_model_and_pipeline(args):
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
def main(argv=None):
|
|
42
|
-
|
|
43
|
-
args = parser.parse_args(argv)
|
|
43
|
+
args = parse_args(argv)
|
|
44
44
|
pipeline, cfg = build_model_and_pipeline(args)
|
|
45
45
|
reporter = progress.create_cli_progress_reporter(output_dir=getattr(cfg, "output_dir", None))
|
|
46
46
|
with progress.activate_progress_reporter(reporter):
|
|
@@ -50,3 +50,6 @@ def main(argv=None):
|
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
|
|
53
|
+
def entrypoint(argv=None):
|
|
54
|
+
main(argv)
|
|
55
|
+
return 0
|
|
@@ -13,6 +13,7 @@ model:
|
|
|
13
13
|
output_variant: # requested output variant for presets that expose multiple outputs
|
|
14
14
|
batch_size: 32
|
|
15
15
|
save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
|
|
16
|
+
save_slide_embeddings: false # whether to save per-slide embeddings when level is "patient" (e.g. moozy); requires a 'patient_id' column in the input CSV
|
|
16
17
|
save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
|
|
17
18
|
allow_non_recommended_settings: false # when true, non-recommended spacing / tile size / precision combinations warn instead of erroring
|
|
18
19
|
|
|
@@ -6,6 +6,7 @@ the ``models`` subpackage.
|
|
|
6
6
|
|
|
7
7
|
from slide2vec.encoders.base import (
|
|
8
8
|
Encoder,
|
|
9
|
+
PatientEncoder,
|
|
9
10
|
SlideEncoder,
|
|
10
11
|
TileEncoder,
|
|
11
12
|
TimmTileEncoder,
|
|
@@ -24,6 +25,7 @@ from slide2vec.encoders import models as _models_pkg # noqa: F401
|
|
|
24
25
|
|
|
25
26
|
__all__ = [
|
|
26
27
|
"Encoder",
|
|
28
|
+
"PatientEncoder",
|
|
27
29
|
"TileEncoder",
|
|
28
30
|
"SlideEncoder",
|
|
29
31
|
"TimmTileEncoder",
|
|
@@ -96,6 +96,33 @@ class SlideEncoder(Encoder):
|
|
|
96
96
|
return coordinates
|
|
97
97
|
|
|
98
98
|
|
|
99
|
+
class PatientEncoder(Encoder):
|
|
100
|
+
"""Base class for encoders that aggregate slide embeddings into patient embeddings."""
|
|
101
|
+
|
|
102
|
+
tile_encoder: TileEncoder | None = None
|
|
103
|
+
|
|
104
|
+
def encode_tiles(self, batch: Tensor) -> Tensor:
|
|
105
|
+
if self.tile_encoder is None:
|
|
106
|
+
raise AttributeError("patient encoders must attach a tile_encoder before encoding tiles")
|
|
107
|
+
return self.tile_encoder.encode_tiles(batch)
|
|
108
|
+
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def encode_slide(
|
|
111
|
+
self,
|
|
112
|
+
tile_features: Tensor,
|
|
113
|
+
coordinates: Tensor | None = None,
|
|
114
|
+
*,
|
|
115
|
+
tile_size_lv0: int | None = None,
|
|
116
|
+
) -> Tensor:
|
|
117
|
+
"""Pool tile-level features into a single slide-level embedding."""
|
|
118
|
+
...
|
|
119
|
+
|
|
120
|
+
@abstractmethod
|
|
121
|
+
def encode_patient(self, slide_embeddings: Tensor) -> Tensor:
|
|
122
|
+
"""Aggregate slide embeddings [S, D] into a single patient-level embedding [D]."""
|
|
123
|
+
...
|
|
124
|
+
|
|
125
|
+
|
|
99
126
|
class TimmTileEncoder(TileEncoder):
|
|
100
127
|
"""Convenience base for timm-backed tile encoders."""
|
|
101
128
|
|
|
@@ -8,7 +8,9 @@ from . import (
|
|
|
8
8
|
gigapath,
|
|
9
9
|
hibou,
|
|
10
10
|
hoptimus,
|
|
11
|
+
lunit,
|
|
11
12
|
midnight,
|
|
13
|
+
moozy,
|
|
12
14
|
musk,
|
|
13
15
|
phikon,
|
|
14
16
|
prost40m,
|
|
@@ -23,7 +25,9 @@ __all__ = [
|
|
|
23
25
|
"gigapath",
|
|
24
26
|
"hibou",
|
|
25
27
|
"hoptimus",
|
|
28
|
+
"lunit",
|
|
26
29
|
"midnight",
|
|
30
|
+
"moozy",
|
|
27
31
|
"musk",
|
|
28
32
|
"phikon",
|
|
29
33
|
"prost40m",
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Lunit ViT-S/8 tile encoder implementation."""
|
|
2
|
+
|
|
3
|
+
from slide2vec.encoders.base import TimmTileEncoder
|
|
4
|
+
from slide2vec.encoders.registry import register_encoder
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@register_encoder(
|
|
8
|
+
"lunit",
|
|
9
|
+
output_variants={"default": {"encode_dim": 384}},
|
|
10
|
+
default_output_variant="default",
|
|
11
|
+
input_size=224,
|
|
12
|
+
supported_spacing_um=0.5,
|
|
13
|
+
precision="fp32",
|
|
14
|
+
source="1aurent/vit_small_patch8_224.lunit_dino",
|
|
15
|
+
)
|
|
16
|
+
class LunitTileEncoder(TimmTileEncoder):
|
|
17
|
+
def __init__(self, *, output_variant: str | None = None):
|
|
18
|
+
super().__init__(
|
|
19
|
+
"hf_hub:1aurent/vit_small_patch8_224.lunit_dino",
|
|
20
|
+
output_variant=output_variant,
|
|
21
|
+
)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""MOOZY slide and patient encoder implementations."""
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
|
|
5
|
+
from slide2vec.encoders.base import PatientEncoder, SlideEncoder, preferred_default_device, resolve_requested_output_variant
|
|
6
|
+
from .loading import load_moozy_inference_components
|
|
7
|
+
from slide2vec.encoders.registry import register_encoder
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"MOOZYSlideEncoder",
|
|
11
|
+
"MOOZYPatientEncoder",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@register_encoder(
|
|
16
|
+
"moozy-slide",
|
|
17
|
+
level="slide",
|
|
18
|
+
tile_encoder="lunit",
|
|
19
|
+
tile_encoder_output_variant="default",
|
|
20
|
+
output_variants={"default": {"encode_dim": 768}},
|
|
21
|
+
default_output_variant="default",
|
|
22
|
+
supported_spacing_um=0.5,
|
|
23
|
+
precision="fp32",
|
|
24
|
+
source="AtlasAnalyticsLab/MOOZY",
|
|
25
|
+
)
|
|
26
|
+
class MOOZYSlideEncoder(SlideEncoder):
|
|
27
|
+
def __init__(self, *, output_variant: str | None = None):
|
|
28
|
+
components = load_moozy_inference_components(device=torch.device("cpu"))
|
|
29
|
+
self._model = components.slide_encoder.eval()
|
|
30
|
+
self._device = preferred_default_device()
|
|
31
|
+
self._output_variant = resolve_requested_output_variant(output_variant)
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def encode_dim(self) -> int:
|
|
35
|
+
return 768
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def device(self) -> torch.device:
|
|
39
|
+
return self._device
|
|
40
|
+
|
|
41
|
+
def to(self, device: torch.device | str) -> "MOOZYSlideEncoder":
|
|
42
|
+
self._device = torch.device(device)
|
|
43
|
+
self._model = self._model.to(self._device)
|
|
44
|
+
return self
|
|
45
|
+
|
|
46
|
+
def encode_slide(
|
|
47
|
+
self,
|
|
48
|
+
tile_features: torch.Tensor,
|
|
49
|
+
coordinates: torch.Tensor | None = None,
|
|
50
|
+
*,
|
|
51
|
+
tile_size_lv0: int | None = None,
|
|
52
|
+
) -> torch.Tensor:
|
|
53
|
+
if coordinates is None or tile_size_lv0 is None:
|
|
54
|
+
raise ValueError("MOOZY slide encoding requires coordinates and tile_size_lv0")
|
|
55
|
+
# MOOZYSlideEncoder expects [B, crop_h, crop_w, feat_dim]; use [1, 1, N, D]
|
|
56
|
+
x = tile_features.unsqueeze(0).unsqueeze(0)
|
|
57
|
+
coords = coordinates.unsqueeze(0).to(torch.float32)
|
|
58
|
+
patch_sizes = torch.tensor([tile_size_lv0], dtype=torch.float32, device=tile_features.device)
|
|
59
|
+
cls, _, _ = self._model(x, coords_xy=coords, patch_sizes=patch_sizes)
|
|
60
|
+
return cls.squeeze(0)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@register_encoder(
|
|
64
|
+
"moozy",
|
|
65
|
+
level="patient",
|
|
66
|
+
tile_encoder="lunit",
|
|
67
|
+
tile_encoder_output_variant="default",
|
|
68
|
+
output_variants={"default": {"encode_dim": 768}},
|
|
69
|
+
default_output_variant="default",
|
|
70
|
+
supported_spacing_um=0.5,
|
|
71
|
+
precision="fp32",
|
|
72
|
+
source="AtlasAnalyticsLab/MOOZY",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class MOOZYPatientEncoder(PatientEncoder):
|
|
77
|
+
def __init__(self, *, output_variant: str | None = None):
|
|
78
|
+
components = load_moozy_inference_components(device=torch.device("cpu"))
|
|
79
|
+
self._slide_model = components.slide_encoder.eval()
|
|
80
|
+
self._case_transformer = components.case_transformer.eval()
|
|
81
|
+
self._device = preferred_default_device()
|
|
82
|
+
self._output_variant = resolve_requested_output_variant(output_variant)
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def encode_dim(self) -> int:
|
|
86
|
+
return 768
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def device(self) -> torch.device:
|
|
90
|
+
return self._device
|
|
91
|
+
|
|
92
|
+
def to(self, device: torch.device | str) -> "MOOZYPatientEncoder":
|
|
93
|
+
self._device = torch.device(device)
|
|
94
|
+
self._slide_model = self._slide_model.to(self._device)
|
|
95
|
+
self._case_transformer = self._case_transformer.to(self._device)
|
|
96
|
+
return self
|
|
97
|
+
|
|
98
|
+
def encode_slide(
|
|
99
|
+
self,
|
|
100
|
+
tile_features: torch.Tensor,
|
|
101
|
+
coordinates: torch.Tensor | None = None,
|
|
102
|
+
*,
|
|
103
|
+
tile_size_lv0: int | None = None,
|
|
104
|
+
) -> torch.Tensor:
|
|
105
|
+
if coordinates is None or tile_size_lv0 is None:
|
|
106
|
+
raise ValueError("MOOZY patient encoding requires coordinates and tile_size_lv0")
|
|
107
|
+
x = tile_features.unsqueeze(0).unsqueeze(0)
|
|
108
|
+
coords = coordinates.unsqueeze(0).to(torch.float32)
|
|
109
|
+
patch_sizes = torch.tensor([tile_size_lv0], dtype=torch.float32, device=tile_features.device)
|
|
110
|
+
cls, _, _ = self._slide_model(x, coords_xy=coords, patch_sizes=patch_sizes)
|
|
111
|
+
return cls.squeeze(0)
|
|
112
|
+
|
|
113
|
+
def encode_patient(self, slide_embeddings: torch.Tensor) -> torch.Tensor:
|
|
114
|
+
return self._case_transformer(slide_embeddings)
|