slide2vec 4.3.0__tar.gz → 4.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-4.3.0 → slide2vec-4.4.0}/PKG-INFO +3 -3
- {slide2vec-4.3.0 → slide2vec-4.4.0}/pyproject.toml +4 -4
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/__init__.py +3 -1
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/api.py +85 -16
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/configs/default.yaml +3 -3
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/configs/resources.py +2 -6
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/distributed/direct_embed_worker.py +12 -8
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/distributed/pipeline_worker.py +12 -11
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/validation.py +13 -9
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/inference.py +113 -59
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/batching.py +58 -27
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/distributed.py +1 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/serialization.py +23 -20
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec.egg-info/PKG-INFO +3 -3
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec.egg-info/SOURCES.txt +2 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec.egg-info/requires.txt +2 -2
- slide2vec-4.4.0/tests/test_output_consistency.py +192 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/tests/test_progress.py +68 -68
- {slide2vec-4.3.0 → slide2vec-4.4.0}/tests/test_regression_core.py +24 -16
- {slide2vec-4.3.0 → slide2vec-4.4.0}/tests/test_regression_inference.py +618 -56
- slide2vec-4.4.0/tests/test_runtime_batching.py +33 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/LICENSE +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/README.md +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/setup.cfg +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/__main__.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/artifacts.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/cli.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/data/__init__.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/data/dataset.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/data/tile_reader.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/__init__.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/base.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/__init__.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/conch.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/gigapath.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/hibou.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/lunit.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/midnight.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/moozy/__init__.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/moozy/blocks.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/moozy/case.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/moozy/loading.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/moozy/slide.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/moozy/types.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/musk.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/models/virchow.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/encoders/registry.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/main.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/progress.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/__init__.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/embedding.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/hierarchical.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/model_settings.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/persistence.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/progress_bridge.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/registry.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/tiling.py +1 -1
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/runtime/types.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/utils/config.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/utils/tiling_io.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec/utils/utils.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec.egg-info/entry_points.txt +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/tests/test_architecture_runtime_split.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/tests/test_encoder_registry.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/tests/test_hs2p_package_cutover.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/tests/test_regression_models.py +0 -0
- {slide2vec-4.3.0 → slide2vec-4.4.0}/tests/test_tile_store.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.4.0
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.
|
|
18
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.1
|
|
19
19
|
Requires-Dist: omegaconf
|
|
20
20
|
Requires-Dist: matplotlib
|
|
21
21
|
Requires-Dist: numpy<2
|
|
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
|
|
|
65
65
|
Requires-Dist: pandas; extra == "fm"
|
|
66
66
|
Requires-Dist: pillow; extra == "fm"
|
|
67
67
|
Requires-Dist: rich; extra == "fm"
|
|
68
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.
|
|
68
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.1; extra == "fm"
|
|
69
69
|
Requires-Dist: wandb; extra == "fm"
|
|
70
70
|
Requires-Dist: torch<2.8,>=2.3; extra == "fm"
|
|
71
71
|
Requires-Dist: torchvision>=0.18.0; extra == "fm"
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "4.
|
|
7
|
+
version = "4.4.0"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -21,7 +21,7 @@ classifiers = [
|
|
|
21
21
|
"Programming Language :: Python :: 3.13",
|
|
22
22
|
]
|
|
23
23
|
dependencies = [
|
|
24
|
-
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.
|
|
24
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.1",
|
|
25
25
|
"omegaconf",
|
|
26
26
|
"matplotlib",
|
|
27
27
|
"numpy<2",
|
|
@@ -88,7 +88,7 @@ fm = [
|
|
|
88
88
|
"pandas",
|
|
89
89
|
"pillow",
|
|
90
90
|
"rich",
|
|
91
|
-
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.
|
|
91
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.1",
|
|
92
92
|
"wandb",
|
|
93
93
|
"torch>=2.3,<2.8",
|
|
94
94
|
"torchvision>=0.18.0",
|
|
@@ -164,7 +164,7 @@ no_implicit_reexport = true
|
|
|
164
164
|
max-line-length = 160
|
|
165
165
|
|
|
166
166
|
[tool.bumpver]
|
|
167
|
-
current_version = "4.
|
|
167
|
+
current_version = "4.4.0"
|
|
168
168
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
169
169
|
commit = false # We do version bumping in CI, not as a commit
|
|
170
170
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from slide2vec.api import (
|
|
2
|
+
EmbeddedPatient,
|
|
2
3
|
EmbeddedSlide,
|
|
3
4
|
ExecutionOptions,
|
|
4
5
|
Model,
|
|
@@ -10,7 +11,7 @@ from slide2vec.api import (
|
|
|
10
11
|
from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
__version__ = "4.
|
|
14
|
+
__version__ = "4.4.0"
|
|
14
15
|
|
|
15
16
|
__all__ = [
|
|
16
17
|
"Model",
|
|
@@ -19,6 +20,7 @@ __all__ = [
|
|
|
19
20
|
"PreprocessingConfig",
|
|
20
21
|
"ExecutionOptions",
|
|
21
22
|
"RunResult",
|
|
23
|
+
"EmbeddedPatient",
|
|
22
24
|
"EmbeddedSlide",
|
|
23
25
|
"SlideEmbeddingArtifact",
|
|
24
26
|
"HierarchicalEmbeddingArtifact",
|
|
@@ -42,25 +42,53 @@ TilingResultsInput = Sequence[Any] | Mapping[str, Any]
|
|
|
42
42
|
|
|
43
43
|
@dataclass(frozen=True, kw_only=True)
|
|
44
44
|
class PreprocessingConfig:
|
|
45
|
+
"""Configuration for slide tiling and preprocessing."""
|
|
46
|
+
|
|
47
|
+
#: Slide reading backend. ``"auto"`` tries cucim → openslide → vips in order.
|
|
48
|
+
#: Explicit choices: ``"cucim"``, ``"openslide"``, ``"vips"``, ``"asap"``.
|
|
45
49
|
backend: str = "auto"
|
|
50
|
+
#: Target spacing in µm/px. Resolved from the model preset when ``None``.
|
|
46
51
|
requested_spacing_um: float | None = None
|
|
52
|
+
#: Tile side length in pixels at *requested_spacing_um*.
|
|
53
|
+
#: Resolved from the model preset when ``None``.
|
|
47
54
|
requested_tile_size_px: int | None = None
|
|
55
|
+
#: Parent region side length in pixels (hierarchical mode).
|
|
56
|
+
#: Auto-derived as ``requested_tile_size_px × region_tile_multiple`` when ``None``.
|
|
48
57
|
requested_region_size_px: int | None = None
|
|
58
|
+
#: Region grid width/height in tiles (e.g. ``6`` → 6×6 = 36 tiles per region).
|
|
59
|
+
#: Enables hierarchical extraction when set; must be ≥ 2.
|
|
49
60
|
region_tile_multiple: int | None = None
|
|
61
|
+
#: Relative spacing tolerance for pyramid level selection (default ``0.05``).
|
|
50
62
|
tolerance: float = 0.05
|
|
63
|
+
#: Fractional tile overlap (``0.0`` = no overlap).
|
|
51
64
|
overlap: float = 0.0
|
|
65
|
+
#: Minimum tissue fraction required to keep a tile (default ``0.01``).
|
|
52
66
|
tissue_threshold: float = 0.01
|
|
67
|
+
#: Directory containing pre-extracted tile coordinates to reuse, skipping tiling.
|
|
53
68
|
read_coordinates_from: Path | None = None
|
|
69
|
+
#: Directory containing pre-extracted tile images to skip the tiling step entirely.
|
|
54
70
|
read_tiles_from: Path | None = None
|
|
71
|
+
#: Read and decode tiles on demand rather than pre-loading into memory.
|
|
55
72
|
on_the_fly: bool = True
|
|
73
|
+
#: Decode tiles on the GPU via CuCIM / nvImageCodec when ``True``.
|
|
56
74
|
gpu_decode: bool = False
|
|
75
|
+
#: Dynamically adjust batch size based on tile count.
|
|
57
76
|
adaptive_batching: bool = False
|
|
77
|
+
#: Group adjacent tiles into supertile batches for faster I/O.
|
|
58
78
|
use_supertiles: bool = True
|
|
79
|
+
#: JPEG decode library — ``"turbojpeg"`` (default) or ``"pillow"``.
|
|
59
80
|
jpeg_backend: str = "turbojpeg"
|
|
81
|
+
#: Number of CuCIM reader threads.
|
|
60
82
|
num_cucim_workers: int = 4
|
|
83
|
+
#: Skip slides already present in the output directory when ``True``.
|
|
61
84
|
resume: bool = False
|
|
85
|
+
#: Forwarded to hs2p segmentation config. Supported keys: ``method``,
|
|
86
|
+
#: ``downsample``, ``sam2_device``. See :doc:`preprocessing` for details.
|
|
62
87
|
segmentation: dict[str, Any] = field(default_factory=dict)
|
|
88
|
+
#: Forwarded to hs2p tile-filtering config.
|
|
63
89
|
filtering: dict[str, Any] = field(default_factory=dict)
|
|
90
|
+
#: Controls whether hs2p writes mask and tiling preview images.
|
|
91
|
+
#: Keys: ``save_mask_preview``, ``save_tiling_preview``, ``downsample``.
|
|
64
92
|
preview: dict[str, Any] = field(default_factory=dict)
|
|
65
93
|
|
|
66
94
|
@classmethod
|
|
@@ -123,31 +151,44 @@ class PreprocessingConfig:
|
|
|
123
151
|
|
|
124
152
|
@dataclass(frozen=True, kw_only=True)
|
|
125
153
|
class ExecutionOptions:
|
|
154
|
+
"""Runtime execution and output settings."""
|
|
155
|
+
|
|
156
|
+
#: Directory where artifacts are written. Required for :class:`Pipeline` runs.
|
|
126
157
|
output_dir: Path | None = None
|
|
158
|
+
#: Tensor serialization format — ``"pt"`` (PyTorch, default) or ``"npz"`` (NumPy).
|
|
127
159
|
output_format: str = "pt"
|
|
128
|
-
|
|
129
|
-
|
|
160
|
+
#: Number of tiles per forward pass.
|
|
161
|
+
batch_size: int = 32
|
|
162
|
+
#: DataLoader worker count per GPU rank. ``None`` means auto
|
|
163
|
+
#: (capped by CPU / SLURM limit, then split across the resolved GPU count).
|
|
164
|
+
num_workers_per_gpu: int | None = None
|
|
165
|
+
#: Tiling worker count. ``None`` means auto (capped by CPU / SLURM limit).
|
|
130
166
|
num_preprocessing_workers: int | None = None
|
|
167
|
+
#: Number of GPUs to use. ``None`` defaults to all available GPUs.
|
|
131
168
|
num_gpus: int | None = None
|
|
169
|
+
#: Forward-pass dtype — ``"fp16"``, ``"bf16"``, ``"fp32"``,
|
|
170
|
+
#: or ``None`` (auto-determined from the model preset).
|
|
132
171
|
precision: str | None = None
|
|
172
|
+
#: DataLoader prefetch queue depth per worker (default ``4``).
|
|
133
173
|
prefetch_factor: int = 4
|
|
134
|
-
|
|
174
|
+
#: Persist tile embeddings to disk when running a slide-level model.
|
|
135
175
|
save_tile_embeddings: bool = False
|
|
176
|
+
#: Persist slide embeddings to disk when running a patient-level model.
|
|
136
177
|
save_slide_embeddings: bool = False
|
|
178
|
+
#: Persist encoder latent representations when available.
|
|
137
179
|
save_latents: bool = False
|
|
138
180
|
|
|
139
181
|
@classmethod
|
|
140
182
|
def from_config(cls, cfg: Any, *, run_on_cpu: bool = False) -> "ExecutionOptions":
|
|
141
183
|
configured_num_gpus = cfg.speed.num_gpus
|
|
142
184
|
requested_precision = normalize_precision_name(cfg.speed.precision)
|
|
143
|
-
|
|
185
|
+
num_workers_per_gpu = cfg.speed.num_dataloader_workers
|
|
144
186
|
prefetch_factor = int(cfg.speed.prefetch_factor_embedding)
|
|
145
|
-
persistent_workers = bool(cfg.speed.persistent_workers_embedding)
|
|
146
187
|
return cls(
|
|
147
188
|
output_dir=Path(cfg.output_dir),
|
|
148
189
|
output_format="pt",
|
|
149
190
|
batch_size=int(cfg.model.batch_size),
|
|
150
|
-
|
|
191
|
+
num_workers_per_gpu=int(num_workers_per_gpu) if num_workers_per_gpu is not None else None,
|
|
151
192
|
num_preprocessing_workers=(
|
|
152
193
|
int(cfg.speed.num_preprocessing_workers)
|
|
153
194
|
if cfg.speed.num_preprocessing_workers is not None
|
|
@@ -156,7 +197,6 @@ class ExecutionOptions:
|
|
|
156
197
|
num_gpus=1 if run_on_cpu else (int(configured_num_gpus) if configured_num_gpus is not None else None),
|
|
157
198
|
precision="fp32" if run_on_cpu else requested_precision,
|
|
158
199
|
prefetch_factor=prefetch_factor,
|
|
159
|
-
persistent_workers=persistent_workers,
|
|
160
200
|
save_tile_embeddings=bool(cfg.model.save_tile_embeddings),
|
|
161
201
|
save_slide_embeddings=bool(cfg.model.save_slide_embeddings),
|
|
162
202
|
save_latents=bool(cfg.model.save_latents),
|
|
@@ -179,23 +219,25 @@ class ExecutionOptions:
|
|
|
179
219
|
object.__setattr__(self, "num_preprocessing_workers", capped_num_preprocessing_workers)
|
|
180
220
|
logger = logging.getLogger(__name__)
|
|
181
221
|
cap_source = f"slurm_cpu_limit={slurm_limit}" if slurm_limit is not None else f"cpu_count={cpu_count}"
|
|
182
|
-
resolved_num_workers = self.
|
|
183
|
-
|
|
222
|
+
resolved_num_workers = self.resolved_num_workers_per_gpu()
|
|
223
|
+
num_workers_per_gpu_label = (
|
|
184
224
|
f"{resolved_num_workers} (requested=auto)"
|
|
185
|
-
if self.
|
|
225
|
+
if self.num_workers_per_gpu is None
|
|
186
226
|
else str(resolved_num_workers)
|
|
187
227
|
)
|
|
188
228
|
logger.info(
|
|
189
|
-
"ExecutionOptions:
|
|
229
|
+
"ExecutionOptions: num_workers_per_gpu=%s, num_preprocessing_workers=%d "
|
|
190
230
|
"(preprocessing cap=%d via %s)",
|
|
191
|
-
|
|
231
|
+
num_workers_per_gpu_label,
|
|
192
232
|
capped_num_preprocessing_workers,
|
|
193
233
|
cap,
|
|
194
234
|
cap_source,
|
|
195
235
|
)
|
|
196
236
|
|
|
197
|
-
def
|
|
198
|
-
|
|
237
|
+
def resolved_num_workers_per_gpu(self) -> int:
|
|
238
|
+
if self.num_workers_per_gpu is not None:
|
|
239
|
+
return self.num_workers_per_gpu
|
|
240
|
+
return max(1, cpu_worker_limit() // self.num_gpus)
|
|
199
241
|
|
|
200
242
|
def with_output_dir(self, output_dir: PathLike | None) -> "ExecutionOptions":
|
|
201
243
|
if output_dir is None:
|
|
@@ -205,33 +247,60 @@ class ExecutionOptions:
|
|
|
205
247
|
|
|
206
248
|
@dataclass(frozen=True, kw_only=True)
|
|
207
249
|
class RunResult:
|
|
250
|
+
"""Return value of :meth:`Pipeline.run`."""
|
|
251
|
+
|
|
252
|
+
#: Tile embedding artifacts written to disk.
|
|
208
253
|
tile_artifacts: list[TileEmbeddingArtifact]
|
|
254
|
+
#: Hierarchical embedding artifacts; empty when hierarchical mode is disabled.
|
|
209
255
|
hierarchical_artifacts: list[HierarchicalEmbeddingArtifact]
|
|
256
|
+
#: Slide embedding artifacts written to disk.
|
|
210
257
|
slide_artifacts: list[SlideEmbeddingArtifact]
|
|
258
|
+
#: Patient embedding artifacts; empty when no patient-level model is used.
|
|
211
259
|
patient_artifacts: list[PatientEmbeddingArtifact] = field(default_factory=list)
|
|
260
|
+
#: Path to ``process_list.csv``, which tracks processing status per sample.
|
|
212
261
|
process_list_path: Path | None = None
|
|
213
262
|
|
|
214
263
|
|
|
215
264
|
@dataclass(frozen=True, kw_only=True)
|
|
216
265
|
class EmbeddedPatient:
|
|
266
|
+
"""In-memory result of embedding a single patient."""
|
|
267
|
+
|
|
268
|
+
#: Unique patient identifier.
|
|
217
269
|
patient_id: str
|
|
218
|
-
|
|
219
|
-
|
|
270
|
+
#: Aggregated patient embedding — :class:`torch.Tensor` of shape ``(D,)``.
|
|
271
|
+
patient_embedding: Any
|
|
272
|
+
#: Slide-level embeddings keyed by ``sample_id`` — each a :class:`torch.Tensor` of shape ``(D,)``.
|
|
273
|
+
slide_embeddings: dict[str, Any]
|
|
220
274
|
|
|
221
275
|
|
|
222
276
|
@dataclass(frozen=True, kw_only=True)
|
|
223
277
|
class EmbeddedSlide:
|
|
278
|
+
"""In-memory result of embedding a single slide."""
|
|
279
|
+
|
|
280
|
+
#: Unique slide identifier.
|
|
224
281
|
sample_id: str
|
|
282
|
+
#: Tile embeddings — :class:`torch.Tensor` of shape ``(N, D)``.
|
|
225
283
|
tile_embeddings: Any
|
|
284
|
+
#: Slide-level embedding — :class:`torch.Tensor` of shape ``(D,)`` for
|
|
285
|
+
#: slide-level encoders; ``None`` for tile-only encoders.
|
|
226
286
|
slide_embedding: Any | None
|
|
287
|
+
#: x coordinate (pixels at level 0) of each tile's top-left corner — array of shape ``(N,)``.
|
|
227
288
|
x: Any
|
|
289
|
+
#: y coordinate (pixels at level 0) of each tile's top-left corner — array of shape ``(N,)``.
|
|
228
290
|
y: Any
|
|
291
|
+
#: Tile side length in pixels at level 0.
|
|
229
292
|
tile_size_lv0: int
|
|
293
|
+
#: Path to the source slide file.
|
|
230
294
|
image_path: Path
|
|
295
|
+
#: Path to the tissue mask used for tiling, if any.
|
|
231
296
|
mask_path: Path | None = None
|
|
297
|
+
#: Number of tiles extracted from the slide.
|
|
232
298
|
num_tiles: int | None = None
|
|
299
|
+
#: Path to the mask preview image, if generated.
|
|
233
300
|
mask_preview_path: Path | None = None
|
|
301
|
+
#: Path to the tiling preview image, if generated.
|
|
234
302
|
tiling_preview_path: Path | None = None
|
|
303
|
+
#: Encoder latent representations when available; ``None`` otherwise.
|
|
235
304
|
latents: Any | None = None
|
|
236
305
|
|
|
237
306
|
|
|
@@ -42,10 +42,11 @@ tiling:
|
|
|
42
42
|
sthresh_up: 255 # upper threshold value for scaling the binary mask
|
|
43
43
|
mthresh: 7 # median filter size (positive, odd integer)
|
|
44
44
|
close: 4 # additional morphological closing to apply following initial thresholding (positive integer)
|
|
45
|
-
method:
|
|
45
|
+
method: # tissue segmentation method: "hsv", "otsu", "threshold", or "sam2"; ignored when precomputed tissue masks are provided
|
|
46
46
|
sam2_checkpoint_path: # optional when method="sam2"; if empty, hs2p downloads the default AtlasPatch checkpoint from Hugging Face
|
|
47
47
|
sam2_config_path: # optional local override for the SAM2 model config; if empty, hs2p downloads the default AtlasPatch config from Hugging Face
|
|
48
48
|
sam2_device: "cpu" # device for SAM2 inference, e.g. "cpu", "cuda", or "cuda:0"
|
|
49
|
+
sam2_num_workers: # optional cap on concurrent SAM2 mask-resolution workers; set to 1 to serialize GPU inference and avoid CUDA OOMs
|
|
49
50
|
filter_params:
|
|
50
51
|
ref_tile_size: ${tiling.params.requested_tile_size_px} # reference tile size at the target spacing
|
|
51
52
|
a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.requested_spacing_um will be kept)
|
|
@@ -70,12 +71,11 @@ tiling:
|
|
|
70
71
|
|
|
71
72
|
speed:
|
|
72
73
|
precision: # model inference precision ["fp32", "fp16", "bf16"]; if not set, determined automatically based on model recommendations
|
|
73
|
-
num_dataloader_workers: # number of DataLoader worker processes for reading tiles during embedding; defaults to auto (job CPU budget, except cuCIM on-the-fly uses
|
|
74
|
+
num_dataloader_workers: # number of DataLoader worker processes per GPU rank for reading tiles during embedding; defaults to auto (job CPU budget split across GPUs, except cuCIM on-the-fly uses per-GPU budget // speed.num_cucim_workers)
|
|
74
75
|
num_gpus: # number of GPUs to use for feature extraction; defaults to all available GPUs
|
|
75
76
|
num_preprocessing_workers: # number of workers for hs2p tiling (WSI reading, JPEG encoding, tar writing); defaults to the runtime CPU budget capped at 64
|
|
76
77
|
num_cucim_workers: 4 # number of internal cucim threads per read_region call (embedding path, on-the-fly only); DataLoader workers are auto-set to cpu_count // num_cucim_workers
|
|
77
78
|
prefetch_factor_embedding: 4 # prefetch factor for tile embedding dataloaders
|
|
78
|
-
persistent_workers_embedding: true # keep DataLoader workers alive across epochs/batches
|
|
79
79
|
|
|
80
80
|
wandb:
|
|
81
81
|
enable: false
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from contextlib import contextmanager
|
|
2
|
-
from importlib.resources import as_file, files
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
from typing import Iterator
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
def config_resource(*parts: str):
|
|
8
|
-
path =
|
|
7
|
+
path = Path(__file__).resolve().parent
|
|
9
8
|
for part in parts:
|
|
10
9
|
path = path.joinpath(part)
|
|
11
10
|
return path.with_suffix(".yaml")
|
|
@@ -21,7 +20,4 @@ def load_config(*parts: str):
|
|
|
21
20
|
|
|
22
21
|
@contextmanager
|
|
23
22
|
def config_path(*parts: str) -> Iterator[Path]:
|
|
24
|
-
|
|
25
|
-
with as_file(resource) as resolved:
|
|
26
|
-
yield resolved
|
|
27
|
-
|
|
23
|
+
yield config_resource(*parts)
|
|
@@ -119,20 +119,24 @@ def main(argv=None) -> int:
|
|
|
119
119
|
return 0
|
|
120
120
|
assigned_slides = [paired_by_sample[sample_id][0] for sample_id in assigned_ids]
|
|
121
121
|
assigned_tiling_results = [paired_by_sample[sample_id][1] for sample_id in assigned_ids]
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
assigned_slides,
|
|
125
|
-
assigned_tiling_results,
|
|
126
|
-
preprocessing=preprocessing,
|
|
127
|
-
execution=execution,
|
|
128
|
-
)
|
|
129
|
-
for embedded_slide in embedded_slides:
|
|
122
|
+
|
|
123
|
+
def _persist_embedded_slide(slide, tiling_result, embedded_slide) -> None:
|
|
130
124
|
payload = {
|
|
131
125
|
"tile_embeddings": _to_cpu_payload(embedded_slide.tile_embeddings),
|
|
132
126
|
"slide_embedding": _to_cpu_payload(embedded_slide.slide_embedding),
|
|
133
127
|
"latents": _to_cpu_payload(embedded_slide.latents),
|
|
134
128
|
}
|
|
135
129
|
torch.save(payload, coordination_dir / f"{embedded_slide.sample_id}.embedded.pt")
|
|
130
|
+
|
|
131
|
+
_compute_embedded_slides(
|
|
132
|
+
model,
|
|
133
|
+
assigned_slides,
|
|
134
|
+
assigned_tiling_results,
|
|
135
|
+
preprocessing=preprocessing,
|
|
136
|
+
execution=execution,
|
|
137
|
+
on_embedded_slide=_persist_embedded_slide,
|
|
138
|
+
collect_results=False,
|
|
139
|
+
)
|
|
136
140
|
return 0
|
|
137
141
|
finally:
|
|
138
142
|
if dist.is_available() and dist.is_initialized():
|
|
@@ -19,8 +19,8 @@ def main(argv=None) -> int:
|
|
|
19
19
|
import slide2vec.distributed as distributed
|
|
20
20
|
from slide2vec.api import Model
|
|
21
21
|
from slide2vec.inference import (
|
|
22
|
+
_build_incremental_persist_callback,
|
|
22
23
|
_compute_embedded_slides,
|
|
23
|
-
_persist_embedded_slide,
|
|
24
24
|
load_successful_tiled_slides,
|
|
25
25
|
)
|
|
26
26
|
from slide2vec.progress import JsonlProgressReporter, activate_progress_reporter
|
|
@@ -46,7 +46,8 @@ def main(argv=None) -> int:
|
|
|
46
46
|
)
|
|
47
47
|
preprocessing = deserialize_preprocessing(request["preprocessing"])
|
|
48
48
|
execution = deserialize_execution(request["execution"])
|
|
49
|
-
|
|
49
|
+
tiling_input_dir = Path(request.get("tiling_input_dir", str(output_dir)))
|
|
50
|
+
slide_records, tiling_results = load_successful_tiled_slides(tiling_input_dir)
|
|
50
51
|
assignments = assign_slides_to_ranks(slide_records, tiling_results, num_gpus=world_size)
|
|
51
52
|
assigned_ids = assignments.get(global_rank, [])
|
|
52
53
|
if not assigned_ids:
|
|
@@ -69,21 +70,21 @@ def main(argv=None) -> int:
|
|
|
69
70
|
)
|
|
70
71
|
context = activate_progress_reporter(reporter) if reporter is not None else nullcontext()
|
|
71
72
|
with context:
|
|
72
|
-
|
|
73
|
+
persist_callback, _, _ = _build_incremental_persist_callback(
|
|
74
|
+
model=model,
|
|
75
|
+
preprocessing=preprocessing,
|
|
76
|
+
execution=execution,
|
|
77
|
+
process_list_path=None,
|
|
78
|
+
)
|
|
79
|
+
_compute_embedded_slides(
|
|
73
80
|
model,
|
|
74
81
|
assigned_slides,
|
|
75
82
|
assigned_tiling_results,
|
|
76
83
|
preprocessing=preprocessing,
|
|
77
84
|
execution=execution,
|
|
85
|
+
on_embedded_slide=persist_callback,
|
|
86
|
+
collect_results=False,
|
|
78
87
|
)
|
|
79
|
-
for embedded_slide, tiling_result in zip(embedded_slides, assigned_tiling_results):
|
|
80
|
-
_persist_embedded_slide(
|
|
81
|
-
model,
|
|
82
|
-
embedded_slide,
|
|
83
|
-
tiling_result,
|
|
84
|
-
preprocessing=preprocessing,
|
|
85
|
-
execution=execution,
|
|
86
|
-
)
|
|
87
88
|
return 0
|
|
88
89
|
finally:
|
|
89
90
|
if dist.is_available() and dist.is_initialized():
|
|
@@ -63,14 +63,18 @@ def validate_encoder_config(
|
|
|
63
63
|
if not mismatches:
|
|
64
64
|
return
|
|
65
65
|
|
|
66
|
-
message = (
|
|
67
|
-
f"Model '{encoder_name}' is configured with "
|
|
68
|
-
f"{'; '.join(mismatches)}. "
|
|
69
|
-
"Set `model.allow_non_recommended_settings=true` in YAML/CLI or "
|
|
70
|
-
"`allow_non_recommended_settings=True` in `Model.from_preset(...)` "
|
|
71
|
-
"to continue with a warning."
|
|
72
|
-
)
|
|
73
66
|
if allow_non_recommended:
|
|
74
|
-
logger.warning(
|
|
67
|
+
logger.warning(
|
|
68
|
+
f"Model '{encoder_name}' is configured with "
|
|
69
|
+
f"{'; '.join(mismatches)}. "
|
|
70
|
+
"Warning-only mode is enabled because "
|
|
71
|
+
"`allow_non_recommended_settings=True`."
|
|
72
|
+
)
|
|
75
73
|
else:
|
|
76
|
-
raise ValueError(
|
|
74
|
+
raise ValueError(
|
|
75
|
+
f"Model '{encoder_name}' is configured with "
|
|
76
|
+
f"{'; '.join(mismatches)}. "
|
|
77
|
+
"Set `model.allow_non_recommended_settings=true` in YAML/CLI or "
|
|
78
|
+
"`allow_non_recommended_settings=True` in `Model.from_preset(...)` "
|
|
79
|
+
"to continue."
|
|
80
|
+
)
|