slide2vec 4.1.0__tar.gz → 4.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-4.1.0 → slide2vec-4.2.0}/PKG-INFO +10 -8
- {slide2vec-4.1.0 → slide2vec-4.2.0}/README.md +5 -5
- {slide2vec-4.1.0 → slide2vec-4.2.0}/pyproject.toml +8 -5
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/__init__.py +1 -1
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/api.py +122 -35
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/artifacts.py +53 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/cli.py +12 -9
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/configs/default.yaml +7 -6
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/data/tile_reader.py +6 -6
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/__init__.py +2 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/base.py +28 -1
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/__init__.py +4 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/gigapath.py +2 -2
- slide2vec-4.2.0/slide2vec/encoders/models/lunit.py +21 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/__init__.py +114 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/blocks.py +272 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/case.py +91 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/loading.py +103 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/slide.py +152 -0
- slide2vec-4.2.0/slide2vec/encoders/models/moozy/types.py +13 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/registry.py +8 -7
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/validation.py +8 -8
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/inference.py +479 -88
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/progress.py +12 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/config.py +18 -13
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/log_utils.py +7 -1
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/tiling_io.py +16 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/PKG-INFO +10 -8
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/SOURCES.txt +7 -0
- slide2vec-4.2.0/slide2vec.egg-info/entry_points.txt +2 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/requires.txt +5 -2
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_batch_collator_timing.py +5 -5
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_output_consistency.py +5 -4
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_progress.py +42 -2
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_regression_core.py +82 -48
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_regression_inference.py +149 -52
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_regression_models.py +7 -7
- slide2vec-4.1.0/slide2vec.egg-info/entry_points.txt +0 -2
- {slide2vec-4.1.0 → slide2vec-4.2.0}/LICENSE +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/setup.cfg +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/__main__.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/data/__init__.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/data/dataset.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/distributed/pipeline_worker.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/conch.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/hibou.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/midnight.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/musk.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/virchow.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/main.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/model_settings.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/registry.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/resources.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/runtime_types.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/utils.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_encoder_registry.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_hs2p_package_cutover.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_packaging_metadata.py +0 -0
- {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_tile_store.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1
|
|
18
|
+
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1
|
|
19
19
|
Requires-Dist: omegaconf
|
|
20
20
|
Requires-Dist: matplotlib
|
|
21
21
|
Requires-Dist: numpy<2
|
|
@@ -50,6 +50,8 @@ Requires-Dist: xformers==0.0.31; extra == "prism"
|
|
|
50
50
|
Provides-Extra: hibou
|
|
51
51
|
Requires-Dist: scipy~=1.8.1; extra == "hibou"
|
|
52
52
|
Requires-Dist: scikit-image~=0.19.3; extra == "hibou"
|
|
53
|
+
Provides-Extra: moozy
|
|
54
|
+
Requires-Dist: huggingface_hub<1.0,>=0.30.0; extra == "moozy"
|
|
53
55
|
Provides-Extra: titan
|
|
54
56
|
Requires-Dist: torch==2.0.1; extra == "titan"
|
|
55
57
|
Requires-Dist: timm==1.0.3; extra == "titan"
|
|
@@ -63,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
|
|
|
63
65
|
Requires-Dist: pandas; extra == "fm"
|
|
64
66
|
Requires-Dist: pillow; extra == "fm"
|
|
65
67
|
Requires-Dist: rich; extra == "fm"
|
|
66
|
-
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1
|
|
68
|
+
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1; extra == "fm"
|
|
67
69
|
Requires-Dist: wandb; extra == "fm"
|
|
68
70
|
Requires-Dist: torch<2.8,>=2.3; extra == "fm"
|
|
69
71
|
Requires-Dist: torchvision>=0.18.0; extra == "fm"
|
|
@@ -143,8 +145,8 @@ from slide2vec import ExecutionOptions, Pipeline, PreprocessingConfig
|
|
|
143
145
|
pipeline = Pipeline(
|
|
144
146
|
model=model,
|
|
145
147
|
preprocessing=PreprocessingConfig(
|
|
146
|
-
|
|
147
|
-
|
|
148
|
+
requested_spacing_um=0.5,
|
|
149
|
+
requested_tile_size_px=224,
|
|
148
150
|
tissue_threshold=0.1,
|
|
149
151
|
),
|
|
150
152
|
execution=ExecutionOptions(output_dir="outputs/demo"),
|
|
@@ -160,8 +162,8 @@ Tile embeddings can be spatially grouped into regions for downstream models that
|
|
|
160
162
|
|
|
161
163
|
```python
|
|
162
164
|
preprocessing = PreprocessingConfig(
|
|
163
|
-
|
|
164
|
-
|
|
165
|
+
requested_spacing_um=0.5,
|
|
166
|
+
requested_tile_size_px=224,
|
|
165
167
|
region_tile_multiple=6, # 6x6 tiles per region
|
|
166
168
|
)
|
|
167
169
|
embedded = model.embed_slide("/path/to/slide.svs", preprocessing=preprocessing)
|
|
@@ -210,7 +212,7 @@ The CLI is a thin wrapper over the package API.
|
|
|
210
212
|
Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
|
|
211
213
|
|
|
212
214
|
```shell
|
|
213
|
-
|
|
215
|
+
slide2vec /path/to/config.yaml
|
|
214
216
|
```
|
|
215
217
|
|
|
216
218
|
By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
|
|
@@ -45,8 +45,8 @@ from slide2vec import ExecutionOptions, Pipeline, PreprocessingConfig
|
|
|
45
45
|
pipeline = Pipeline(
|
|
46
46
|
model=model,
|
|
47
47
|
preprocessing=PreprocessingConfig(
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
requested_spacing_um=0.5,
|
|
49
|
+
requested_tile_size_px=224,
|
|
50
50
|
tissue_threshold=0.1,
|
|
51
51
|
),
|
|
52
52
|
execution=ExecutionOptions(output_dir="outputs/demo"),
|
|
@@ -62,8 +62,8 @@ Tile embeddings can be spatially grouped into regions for downstream models that
|
|
|
62
62
|
|
|
63
63
|
```python
|
|
64
64
|
preprocessing = PreprocessingConfig(
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
requested_spacing_um=0.5,
|
|
66
|
+
requested_tile_size_px=224,
|
|
67
67
|
region_tile_multiple=6, # 6x6 tiles per region
|
|
68
68
|
)
|
|
69
69
|
embedded = model.embed_slide("/path/to/slide.svs", preprocessing=preprocessing)
|
|
@@ -112,7 +112,7 @@ The CLI is a thin wrapper over the package API.
|
|
|
112
112
|
Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
|
|
113
113
|
|
|
114
114
|
```shell
|
|
115
|
-
|
|
115
|
+
slide2vec /path/to/config.yaml
|
|
116
116
|
```
|
|
117
117
|
|
|
118
118
|
By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "4.
|
|
7
|
+
version = "4.2.0"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -21,7 +21,7 @@ classifiers = [
|
|
|
21
21
|
"Programming Language :: Python :: 3.13",
|
|
22
22
|
]
|
|
23
23
|
dependencies = [
|
|
24
|
-
"hs2p[asap,cucim,openslide,vips]>=3.1
|
|
24
|
+
"hs2p[asap,cucim,openslide,vips]>=3.2.1",
|
|
25
25
|
"omegaconf",
|
|
26
26
|
"matplotlib",
|
|
27
27
|
"numpy<2",
|
|
@@ -42,7 +42,7 @@ Homepage = "https://github.com/clemsgrs/slide2vec"
|
|
|
42
42
|
"Bug Tracker" = "https://github.com/clemsgrs/slide2vec/issues"
|
|
43
43
|
|
|
44
44
|
[project.scripts]
|
|
45
|
-
slide2vec = "slide2vec.cli:
|
|
45
|
+
slide2vec = "slide2vec.cli:entrypoint"
|
|
46
46
|
|
|
47
47
|
[project.optional-dependencies]
|
|
48
48
|
hoptimus = [
|
|
@@ -71,6 +71,9 @@ hibou = [
|
|
|
71
71
|
"scipy~=1.8.1",
|
|
72
72
|
"scikit-image~=0.19.3",
|
|
73
73
|
]
|
|
74
|
+
moozy = [
|
|
75
|
+
"huggingface_hub>=0.30.0,<1.0",
|
|
76
|
+
]
|
|
74
77
|
titan = [
|
|
75
78
|
"torch==2.0.1",
|
|
76
79
|
"timm==1.0.3",
|
|
@@ -85,7 +88,7 @@ fm = [
|
|
|
85
88
|
"pandas",
|
|
86
89
|
"pillow",
|
|
87
90
|
"rich",
|
|
88
|
-
"hs2p[asap,cucim,openslide,vips]>=3.1
|
|
91
|
+
"hs2p[asap,cucim,openslide,vips]>=3.2.1",
|
|
89
92
|
"wandb",
|
|
90
93
|
"torch>=2.3,<2.8",
|
|
91
94
|
"torchvision>=0.18.0",
|
|
@@ -154,7 +157,7 @@ no_implicit_reexport = true
|
|
|
154
157
|
max-line-length = 160
|
|
155
158
|
|
|
156
159
|
[tool.bumpver]
|
|
157
|
-
current_version = "4.
|
|
160
|
+
current_version = "4.2.0"
|
|
158
161
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
159
162
|
commit = false # We do version bumping in CI, not as a commit
|
|
160
163
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -2,7 +2,7 @@ from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, Prep
|
|
|
2
2
|
from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
__version__ = "4.
|
|
5
|
+
__version__ = "4.2.0"
|
|
6
6
|
|
|
7
7
|
__all__ = [
|
|
8
8
|
"Model",
|
|
@@ -11,6 +11,7 @@ from hs2p import SlideSpec
|
|
|
11
11
|
|
|
12
12
|
from slide2vec.artifacts import (
|
|
13
13
|
HierarchicalEmbeddingArtifact,
|
|
14
|
+
PatientEmbeddingArtifact,
|
|
14
15
|
SlideEmbeddingArtifact,
|
|
15
16
|
TileEmbeddingArtifact,
|
|
16
17
|
)
|
|
@@ -42,9 +43,9 @@ TilingResultsInput = Sequence[Any] | Mapping[str, Any]
|
|
|
42
43
|
@dataclass(frozen=True, kw_only=True)
|
|
43
44
|
class PreprocessingConfig:
|
|
44
45
|
backend: str = "auto"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
requested_spacing_um: float | None = None
|
|
47
|
+
requested_tile_size_px: int | None = None
|
|
48
|
+
requested_region_size_px: int | None = None
|
|
48
49
|
region_tile_multiple: int | None = None
|
|
49
50
|
tolerance: float = 0.05
|
|
50
51
|
overlap: float = 0.0
|
|
@@ -75,11 +76,11 @@ class PreprocessingConfig:
|
|
|
75
76
|
preview_downsample = int(preview_cfg.downsample)
|
|
76
77
|
return cls(
|
|
77
78
|
backend=tiling.backend,
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
requested_spacing_um=float(tiling.params.requested_spacing_um),
|
|
80
|
+
requested_tile_size_px=int(tiling.params.requested_tile_size_px),
|
|
81
|
+
requested_region_size_px=(
|
|
81
82
|
int(v)
|
|
82
|
-
if (v := getattr(tiling.params, "
|
|
83
|
+
if (v := getattr(tiling.params, "requested_region_size_px", None)) is not None
|
|
83
84
|
else None
|
|
84
85
|
),
|
|
85
86
|
region_tile_multiple=(
|
|
@@ -127,6 +128,7 @@ class ExecutionOptions:
|
|
|
127
128
|
prefetch_factor: int = 4
|
|
128
129
|
persistent_workers: bool = True
|
|
129
130
|
save_tile_embeddings: bool = False
|
|
131
|
+
save_slide_embeddings: bool = False
|
|
130
132
|
save_latents: bool = False
|
|
131
133
|
|
|
132
134
|
@classmethod
|
|
@@ -151,6 +153,7 @@ class ExecutionOptions:
|
|
|
151
153
|
prefetch_factor=prefetch_factor,
|
|
152
154
|
persistent_workers=persistent_workers,
|
|
153
155
|
save_tile_embeddings=bool(cfg.model.save_tile_embeddings),
|
|
156
|
+
save_slide_embeddings=bool(cfg.model.save_slide_embeddings),
|
|
154
157
|
save_latents=bool(cfg.model.save_latents),
|
|
155
158
|
)
|
|
156
159
|
|
|
@@ -200,9 +203,17 @@ class RunResult:
|
|
|
200
203
|
tile_artifacts: list[TileEmbeddingArtifact]
|
|
201
204
|
hierarchical_artifacts: list[HierarchicalEmbeddingArtifact]
|
|
202
205
|
slide_artifacts: list[SlideEmbeddingArtifact]
|
|
206
|
+
patient_artifacts: list[PatientEmbeddingArtifact] = field(default_factory=list)
|
|
203
207
|
process_list_path: Path | None = None
|
|
204
208
|
|
|
205
209
|
|
|
210
|
+
@dataclass(frozen=True, kw_only=True)
|
|
211
|
+
class EmbeddedPatient:
|
|
212
|
+
patient_id: str
|
|
213
|
+
patient_embedding: Any # torch.Tensor [D]
|
|
214
|
+
slide_embeddings: dict[str, Any] # {sample_id: torch.Tensor [D]}
|
|
215
|
+
|
|
216
|
+
|
|
206
217
|
@dataclass(frozen=True, kw_only=True)
|
|
207
218
|
class EmbeddedSlide:
|
|
208
219
|
sample_id: str
|
|
@@ -343,6 +354,82 @@ class Model:
|
|
|
343
354
|
execution=resolved,
|
|
344
355
|
)
|
|
345
356
|
|
|
357
|
+
def embed_patient(
|
|
358
|
+
self,
|
|
359
|
+
slides: SlideSequence,
|
|
360
|
+
patient_id: str | None = None,
|
|
361
|
+
*,
|
|
362
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
363
|
+
execution: ExecutionOptions | None = None,
|
|
364
|
+
) -> "EmbeddedPatient":
|
|
365
|
+
"""Embed a single patient's slides and return one ``EmbeddedPatient``.
|
|
366
|
+
|
|
367
|
+
Convenience wrapper around :meth:`embed_patients` for the common case
|
|
368
|
+
where all *slides* belong to the same patient.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
slides: All slides for this patient.
|
|
372
|
+
patient_id: Optional patient identifier applied to every slide.
|
|
373
|
+
When omitted, ``patient_id`` is read from slide dict keys or
|
|
374
|
+
object attributes; slides that carry no ``patient_id`` fall
|
|
375
|
+
back to ``sample_id``.
|
|
376
|
+
"""
|
|
377
|
+
patient_id_map: dict | None = None
|
|
378
|
+
if patient_id is not None:
|
|
379
|
+
patient_id_map = {}
|
|
380
|
+
for s in slides:
|
|
381
|
+
if isinstance(s, (str, Path)):
|
|
382
|
+
patient_id_map[Path(s).stem] = patient_id
|
|
383
|
+
elif isinstance(s, dict):
|
|
384
|
+
patient_id_map[str(s["sample_id"])] = patient_id
|
|
385
|
+
else:
|
|
386
|
+
patient_id_map[str(s.sample_id)] = patient_id
|
|
387
|
+
return self.embed_patients(
|
|
388
|
+
slides,
|
|
389
|
+
patient_id_map=patient_id_map,
|
|
390
|
+
preprocessing=preprocessing,
|
|
391
|
+
execution=execution,
|
|
392
|
+
)[0]
|
|
393
|
+
|
|
394
|
+
def embed_patients(
|
|
395
|
+
self,
|
|
396
|
+
slides: SlideSequence,
|
|
397
|
+
patient_id_map: dict | None = None,
|
|
398
|
+
*,
|
|
399
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
400
|
+
execution: ExecutionOptions | None = None,
|
|
401
|
+
) -> "list[EmbeddedPatient]":
|
|
402
|
+
"""Embed slides and aggregate them into patient-level embeddings.
|
|
403
|
+
|
|
404
|
+
Requires a patient-level model (e.g. ``moozy``). For each patient
|
|
405
|
+
all contributing slide embeddings are aggregated by the model's
|
|
406
|
+
``encode_patient`` method.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
slides: Slides to process. Each entry may be a path, a
|
|
410
|
+
``SlideSpec``, or a dict with ``sample_id`` / ``image_path``
|
|
411
|
+
keys. When *patient_id_map* is ``None`` a ``patient_id``
|
|
412
|
+
key in each dict is used to group slides.
|
|
413
|
+
patient_id_map: Optional explicit ``{sample_id: patient_id}``
|
|
414
|
+
mapping. When provided it takes precedence over any
|
|
415
|
+
``patient_id`` key embedded in the slide dicts. When
|
|
416
|
+
omitted and the slide dicts carry no ``patient_id``, each
|
|
417
|
+
slide is treated as its own patient.
|
|
418
|
+
"""
|
|
419
|
+
from slide2vec.inference import embed_patients
|
|
420
|
+
|
|
421
|
+
resolved = _coerce_execution_options(execution, model=self)
|
|
422
|
+
resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
|
|
423
|
+
with _auto_progress_reporting(output_dir=resolved.output_dir):
|
|
424
|
+
_validate_model_config(self, resolved_preprocessing, resolved)
|
|
425
|
+
return embed_patients(
|
|
426
|
+
self,
|
|
427
|
+
slides,
|
|
428
|
+
patient_id_map=patient_id_map,
|
|
429
|
+
preprocessing=resolved_preprocessing,
|
|
430
|
+
execution=resolved,
|
|
431
|
+
)
|
|
432
|
+
|
|
346
433
|
def _load_backend(self) -> LoadedModel:
|
|
347
434
|
if self._backend is None:
|
|
348
435
|
from slide2vec.inference import load_model
|
|
@@ -454,28 +541,28 @@ def _resolve_direct_api_preprocessing(
|
|
|
454
541
|
return defaults
|
|
455
542
|
|
|
456
543
|
if preprocessing is None:
|
|
457
|
-
|
|
544
|
+
requested_tile_size_px, requested_spacing_um = ensure_defaults()
|
|
458
545
|
return _resolve_hierarchical_preprocessing(
|
|
459
546
|
PreprocessingConfig(
|
|
460
547
|
backend="auto",
|
|
461
|
-
|
|
462
|
-
|
|
548
|
+
requested_spacing_um=requested_spacing_um,
|
|
549
|
+
requested_tile_size_px=requested_tile_size_px,
|
|
463
550
|
)
|
|
464
551
|
)
|
|
465
552
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
if
|
|
553
|
+
requested_spacing_um = preprocessing.requested_spacing_um
|
|
554
|
+
requested_tile_size_px = preprocessing.requested_tile_size_px
|
|
555
|
+
if requested_spacing_um is None or requested_tile_size_px is None:
|
|
469
556
|
default_tile_size_px, default_spacing_um = ensure_defaults()
|
|
470
|
-
if
|
|
471
|
-
|
|
472
|
-
if
|
|
473
|
-
|
|
557
|
+
if requested_spacing_um is None:
|
|
558
|
+
requested_spacing_um = default_spacing_um
|
|
559
|
+
if requested_tile_size_px is None:
|
|
560
|
+
requested_tile_size_px = default_tile_size_px
|
|
474
561
|
return _resolve_hierarchical_preprocessing(
|
|
475
562
|
replace(
|
|
476
563
|
preprocessing,
|
|
477
|
-
|
|
478
|
-
|
|
564
|
+
requested_spacing_um=requested_spacing_um,
|
|
565
|
+
requested_tile_size_px=requested_tile_size_px,
|
|
479
566
|
)
|
|
480
567
|
)
|
|
481
568
|
|
|
@@ -484,7 +571,7 @@ def _default_preprocessing_from_registry(name: str | None) -> tuple[int, float]:
|
|
|
484
571
|
if not name or name not in encoder_registry:
|
|
485
572
|
raise ValueError(
|
|
486
573
|
"Cannot infer preprocessing defaults without a registered model. "
|
|
487
|
-
"Pass preprocessing.
|
|
574
|
+
"Pass preprocessing.requested_spacing_um and preprocessing.requested_tile_size_px explicitly."
|
|
488
575
|
)
|
|
489
576
|
|
|
490
577
|
defaults = resolve_preprocessing_defaults(name)
|
|
@@ -499,7 +586,7 @@ def _validate_model_config(
|
|
|
499
586
|
name = model.name
|
|
500
587
|
if name not in encoder_registry:
|
|
501
588
|
return
|
|
502
|
-
if preprocessing.region_tile_multiple is not None or preprocessing.
|
|
589
|
+
if preprocessing.region_tile_multiple is not None or preprocessing.requested_region_size_px is not None:
|
|
503
590
|
info = encoder_registry.info(name)
|
|
504
591
|
if info["level"] != "tile":
|
|
505
592
|
raise ValueError("Hierarchical preprocessing is only supported for tile encoders")
|
|
@@ -508,8 +595,8 @@ def _validate_model_config(
|
|
|
508
595
|
precision = None if on_cpu or execution is None else execution.precision
|
|
509
596
|
validate_encoder_config(
|
|
510
597
|
name,
|
|
511
|
-
|
|
512
|
-
|
|
598
|
+
requested_tile_size_px=preprocessing.requested_tile_size_px,
|
|
599
|
+
requested_spacing_um=preprocessing.requested_spacing_um,
|
|
513
600
|
precision=precision,
|
|
514
601
|
output_variant=model._output_variant,
|
|
515
602
|
allow_non_recommended=bool(model.allow_non_recommended_settings),
|
|
@@ -518,32 +605,32 @@ def _validate_model_config(
|
|
|
518
605
|
|
|
519
606
|
def _resolve_hierarchical_preprocessing(preprocessing: PreprocessingConfig) -> PreprocessingConfig:
|
|
520
607
|
multiple = preprocessing.region_tile_multiple
|
|
521
|
-
|
|
608
|
+
requested_region_size_px = preprocessing.requested_region_size_px
|
|
522
609
|
if multiple is not None:
|
|
523
610
|
multiple = int(multiple)
|
|
524
611
|
if multiple < 2:
|
|
525
612
|
raise ValueError("region_tile_multiple must be at least 2")
|
|
526
|
-
if multiple is None and
|
|
613
|
+
if multiple is None and requested_region_size_px is None:
|
|
527
614
|
return preprocessing
|
|
528
|
-
if preprocessing.
|
|
615
|
+
if preprocessing.requested_tile_size_px is None:
|
|
529
616
|
raise ValueError(
|
|
530
|
-
"
|
|
617
|
+
"requested_tile_size_px must be resolved before deriving hierarchical region geometry"
|
|
531
618
|
)
|
|
532
|
-
if
|
|
533
|
-
|
|
619
|
+
if requested_region_size_px is None:
|
|
620
|
+
requested_region_size_px = int(preprocessing.requested_tile_size_px) * int(multiple)
|
|
534
621
|
elif multiple is None:
|
|
535
|
-
if int(
|
|
622
|
+
if int(requested_region_size_px) % int(preprocessing.requested_tile_size_px) != 0:
|
|
536
623
|
raise ValueError(
|
|
537
|
-
"
|
|
624
|
+
"requested_region_size_px must be an exact multiple of requested_tile_size_px"
|
|
538
625
|
)
|
|
539
|
-
multiple = int(
|
|
540
|
-
elif int(
|
|
626
|
+
multiple = int(requested_region_size_px) // int(preprocessing.requested_tile_size_px)
|
|
627
|
+
elif int(requested_region_size_px) != int(preprocessing.requested_tile_size_px) * int(multiple):
|
|
541
628
|
raise ValueError(
|
|
542
|
-
"
|
|
629
|
+
"requested_region_size_px must match requested_tile_size_px * region_tile_multiple"
|
|
543
630
|
)
|
|
544
631
|
return replace(
|
|
545
632
|
preprocessing,
|
|
546
|
-
|
|
633
|
+
requested_region_size_px=int(requested_region_size_px),
|
|
547
634
|
region_tile_multiple=int(multiple),
|
|
548
635
|
)
|
|
549
636
|
|
|
@@ -35,6 +35,20 @@ class SlideEmbeddingArtifact:
|
|
|
35
35
|
return load_metadata(self.metadata_path)
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
@dataclass(frozen=True, kw_only=True)
|
|
39
|
+
class PatientEmbeddingArtifact:
|
|
40
|
+
patient_id: str
|
|
41
|
+
path: Path
|
|
42
|
+
metadata_path: Path
|
|
43
|
+
format: str
|
|
44
|
+
feature_dim: int
|
|
45
|
+
num_slides: int
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def metadata(self) -> dict[str, Any]:
|
|
49
|
+
return load_metadata(self.metadata_path)
|
|
50
|
+
|
|
51
|
+
|
|
38
52
|
@dataclass(frozen=True, kw_only=True)
|
|
39
53
|
class HierarchicalEmbeddingArtifact:
|
|
40
54
|
sample_id: str
|
|
@@ -223,6 +237,45 @@ def write_slide_embeddings(
|
|
|
223
237
|
)
|
|
224
238
|
|
|
225
239
|
|
|
240
|
+
def write_patient_embeddings(
|
|
241
|
+
patient_id: str,
|
|
242
|
+
embedding,
|
|
243
|
+
*,
|
|
244
|
+
output_dir: str | Path,
|
|
245
|
+
output_format: str = "pt",
|
|
246
|
+
metadata: dict[str, Any] | None = None,
|
|
247
|
+
num_slides: int = 0,
|
|
248
|
+
) -> PatientEmbeddingArtifact:
|
|
249
|
+
output_format = _validate_output_format(output_format)
|
|
250
|
+
artifact_path, metadata_path = _setup_artifact_paths(
|
|
251
|
+
output_dir, "patient_embeddings", patient_id, output_format
|
|
252
|
+
)
|
|
253
|
+
embedding_array = _ensure_array(embedding)
|
|
254
|
+
if output_format == "pt":
|
|
255
|
+
torch.save(_ensure_tensor(embedding), artifact_path)
|
|
256
|
+
else:
|
|
257
|
+
np.savez_compressed(artifact_path, features=embedding_array)
|
|
258
|
+
|
|
259
|
+
patient_metadata = {
|
|
260
|
+
"patient_id": patient_id,
|
|
261
|
+
"artifact_type": "patient_embeddings",
|
|
262
|
+
"format": output_format,
|
|
263
|
+
"feature_dim": int(embedding_array.shape[-1]) if embedding_array.ndim else 1,
|
|
264
|
+
"num_slides": num_slides,
|
|
265
|
+
}
|
|
266
|
+
if metadata:
|
|
267
|
+
patient_metadata.update(metadata)
|
|
268
|
+
_write_metadata(metadata_path, patient_metadata)
|
|
269
|
+
return PatientEmbeddingArtifact(
|
|
270
|
+
patient_id=patient_id,
|
|
271
|
+
path=artifact_path,
|
|
272
|
+
metadata_path=metadata_path,
|
|
273
|
+
format=output_format,
|
|
274
|
+
feature_dim=patient_metadata["feature_dim"],
|
|
275
|
+
num_slides=num_slides,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
226
279
|
def write_hierarchical_embeddings(
|
|
227
280
|
sample_id: str,
|
|
228
281
|
features,
|
|
@@ -7,20 +7,21 @@ import slide2vec.progress as progress
|
|
|
7
7
|
|
|
8
8
|
def get_args_parser(add_help: bool = True):
|
|
9
9
|
parser = argparse.ArgumentParser("slide2vec", add_help=add_help)
|
|
10
|
-
parser.add_argument("
|
|
10
|
+
parser.add_argument("config_file", metavar="CONFIG", help="path to config file")
|
|
11
11
|
parser.add_argument("--skip-datetime", action="store_true", help="skip run id datetime prefix")
|
|
12
12
|
parser.add_argument("--tiling-only", action="store_true", help="only run slide tiling")
|
|
13
13
|
parser.add_argument("--run-on-cpu", action="store_true", help="run inference on cpu")
|
|
14
14
|
parser.add_argument("--output-dir", type=str, default=None, help="output directory to save artifacts")
|
|
15
|
-
parser.add_argument(
|
|
16
|
-
"opts",
|
|
17
|
-
help='Modify config options at the end of the command using "path.key=value".',
|
|
18
|
-
default=None,
|
|
19
|
-
nargs=argparse.REMAINDER,
|
|
20
|
-
)
|
|
21
15
|
return parser
|
|
22
16
|
|
|
23
17
|
|
|
18
|
+
def parse_args(argv=None):
|
|
19
|
+
parser = get_args_parser(add_help=True)
|
|
20
|
+
args, opts = parser.parse_known_args(argv)
|
|
21
|
+
args.opts = opts
|
|
22
|
+
return args
|
|
23
|
+
|
|
24
|
+
|
|
24
25
|
def build_model_and_pipeline(args):
|
|
25
26
|
cfg, _cfg_path = setup(args)
|
|
26
27
|
hf_login()
|
|
@@ -39,8 +40,7 @@ def build_model_and_pipeline(args):
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
def main(argv=None):
|
|
42
|
-
|
|
43
|
-
args = parser.parse_args(argv)
|
|
43
|
+
args = parse_args(argv)
|
|
44
44
|
pipeline, cfg = build_model_and_pipeline(args)
|
|
45
45
|
reporter = progress.create_cli_progress_reporter(output_dir=getattr(cfg, "output_dir", None))
|
|
46
46
|
with progress.activate_progress_reporter(reporter):
|
|
@@ -50,3 +50,6 @@ def main(argv=None):
|
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
|
|
53
|
+
def entrypoint(argv=None):
|
|
54
|
+
main(argv)
|
|
55
|
+
return 0
|
|
@@ -13,6 +13,7 @@ model:
|
|
|
13
13
|
output_variant: # requested output variant for presets that expose multiple outputs
|
|
14
14
|
batch_size: 32
|
|
15
15
|
save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
|
|
16
|
+
save_slide_embeddings: false # whether to save per-slide embeddings when level is "patient" (e.g. moozy); requires a 'patient_id' column in the input CSV
|
|
16
17
|
save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
|
|
17
18
|
allow_non_recommended_settings: false # when true, non-recommended spacing / tile size / precision combinations warn instead of erroring
|
|
18
19
|
|
|
@@ -26,10 +27,10 @@ tiling:
|
|
|
26
27
|
read_tiles_from: # path to an existing directory containing pre-extracted `.tiles.tar` tile stores to reuse instead of starting tiling from scratch
|
|
27
28
|
backend: "auto" # backend to use for slide reading; "auto" lets hs2p resolve the best backend per slide, preferring cuCIM when available
|
|
28
29
|
params:
|
|
29
|
-
|
|
30
|
+
requested_spacing_um: # spacing at which to tile the slide, in microns per pixel; filled from a preset model when available
|
|
30
31
|
tolerance: 0.05 # tolerance for matching the spacing (float between 0 and 1, deciding how much the spacing can deviate from the one specified in the slide metadata)
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
requested_tile_size_px: # size of the tiles to extract, in pixels; filled from a preset model when available
|
|
33
|
+
requested_region_size_px: # size of hierarchical parent regions in pixels; when unset and region_tile_multiple is set, derived from requested_tile_size_px * region_tile_multiple
|
|
33
34
|
region_tile_multiple: # hierarchical region grid width/height in tiles; e.g. 6 means 6x6 tiles per region
|
|
34
35
|
overlap: 0.0 # percentage of overlap between two consecutive tiles (float between 0 and 1)
|
|
35
36
|
tissue_threshold: 0.1 # minimum fraction of pixels that must be tissue to keep a tile (float between 0 and 1)
|
|
@@ -44,8 +45,8 @@ tiling:
|
|
|
44
45
|
use_otsu: false # use otsu's method instead of simple binary thresholding
|
|
45
46
|
use_hsv: true # use HSV thresholding instead of simple binary thresholding
|
|
46
47
|
filter_params:
|
|
47
|
-
ref_tile_size: ${tiling.params.
|
|
48
|
-
a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.
|
|
48
|
+
ref_tile_size: ${tiling.params.requested_tile_size_px} # reference tile size at the target spacing
|
|
49
|
+
a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.requested_spacing_um will be kept)
|
|
49
50
|
a_h: 2 # area filter threshold for holes (positive integer, the minimum size of detected holes/cavities in foreground contours to avoid, once again relative to the reference tile size ref_tile_size)
|
|
50
51
|
filter_white: false # whether to filter out mostly white tiles
|
|
51
52
|
filter_black: false # whether to filter out mostly black tiles
|
|
@@ -78,7 +79,7 @@ wandb:
|
|
|
78
79
|
project: "" # wandb project name
|
|
79
80
|
username: "" # wandb username
|
|
80
81
|
exp_name: "" # wandb experiment name
|
|
81
|
-
tags: ["features", "${model.name}", "${tiling.params.
|
|
82
|
+
tags: ["features", "${model.name}", "${tiling.params.requested_tile_size_px}"] # wandb tags
|
|
82
83
|
dir: "/home/user/"
|
|
83
84
|
group:
|
|
84
85
|
resume_id: "${resume_dirname}"
|
|
@@ -89,7 +89,7 @@ class WSITileReader:
|
|
|
89
89
|
self._num_cucim_workers = num_cucim_workers
|
|
90
90
|
self._gpu_decode = gpu_decode
|
|
91
91
|
self._read_level = int(tiling_result.read_level)
|
|
92
|
-
self._tile_size_px = int(tiling_result.
|
|
92
|
+
self._tile_size_px = int(tiling_result.read_tile_size_px)
|
|
93
93
|
self._x = tiling_result.x
|
|
94
94
|
self._y = tiling_result.y
|
|
95
95
|
self._reader = None
|
|
@@ -215,7 +215,7 @@ class OnTheFlyBatchTileCollator:
|
|
|
215
215
|
gpu_decode: bool = False,
|
|
216
216
|
use_supertiles: bool = True,
|
|
217
217
|
):
|
|
218
|
-
self.tile_size = int(tiling_result.
|
|
218
|
+
self.tile_size = int(tiling_result.read_tile_size_px)
|
|
219
219
|
self._reader = WSITileReader(
|
|
220
220
|
image_path,
|
|
221
221
|
tiling_result,
|
|
@@ -354,8 +354,8 @@ class OnTheFlyHierarchicalBatchCollator:
|
|
|
354
354
|
tiling_result: TilingResult,
|
|
355
355
|
region_index: np.ndarray,
|
|
356
356
|
subtile_index_within_region: np.ndarray,
|
|
357
|
-
|
|
358
|
-
|
|
357
|
+
read_region_size_px: int,
|
|
358
|
+
read_tile_size_px: int,
|
|
359
359
|
backend: str = "cucim",
|
|
360
360
|
num_cucim_workers: int = 4,
|
|
361
361
|
gpu_decode: bool = False,
|
|
@@ -363,11 +363,11 @@ class OnTheFlyHierarchicalBatchCollator:
|
|
|
363
363
|
self._region_index = np.asarray(region_index, dtype=np.int32)
|
|
364
364
|
self._subtile_index_within_region = np.asarray(subtile_index_within_region, dtype=np.int32)
|
|
365
365
|
self._tiles_per_region = int(self._subtile_index_within_region.max()) + 1 if len(self._subtile_index_within_region) else 0
|
|
366
|
-
self._tile_size = int(
|
|
366
|
+
self._tile_size = int(read_tile_size_px)
|
|
367
367
|
self._reader = WSIRegionReader(
|
|
368
368
|
image_path,
|
|
369
369
|
read_level=int(tiling_result.read_level),
|
|
370
|
-
region_size_px=int(
|
|
370
|
+
region_size_px=int(read_region_size_px),
|
|
371
371
|
backend=backend,
|
|
372
372
|
num_cucim_workers=num_cucim_workers,
|
|
373
373
|
gpu_decode=gpu_decode,
|
|
@@ -6,6 +6,7 @@ the ``models`` subpackage.
|
|
|
6
6
|
|
|
7
7
|
from slide2vec.encoders.base import (
|
|
8
8
|
Encoder,
|
|
9
|
+
PatientEncoder,
|
|
9
10
|
SlideEncoder,
|
|
10
11
|
TileEncoder,
|
|
11
12
|
TimmTileEncoder,
|
|
@@ -24,6 +25,7 @@ from slide2vec.encoders import models as _models_pkg # noqa: F401
|
|
|
24
25
|
|
|
25
26
|
__all__ = [
|
|
26
27
|
"Encoder",
|
|
28
|
+
"PatientEncoder",
|
|
27
29
|
"TileEncoder",
|
|
28
30
|
"SlideEncoder",
|
|
29
31
|
"TimmTileEncoder",
|