slide2vec 4.7.0__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-4.7.0 → slide2vec-5.0.0}/PKG-INFO +4 -4
- {slide2vec-4.7.0 → slide2vec-5.0.0}/README.md +1 -1
- {slide2vec-4.7.0 → slide2vec-5.0.0}/pyproject.toml +4 -4
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/__init__.py +1 -1
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/api.py +159 -8
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/artifacts.py +71 -5
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/configs/default.yaml +18 -1
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/distributed/direct_embed_worker.py +24 -9
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/distributed/pipeline_worker.py +21 -17
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/inference.py +17 -0
- slide2vec-5.0.0/slide2vec/runtime/artifacts_collect.py +299 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/distributed.py +85 -9
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/distributed_stage.py +30 -12
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/embedding.py +16 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/embedding_persist.py +7 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/embedding_pipeline.py +3 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/persist_callbacks.py +63 -11
- slide2vec-5.0.0/slide2vec/runtime/persistence.py +299 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/process_list.py +59 -13
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/serialization.py +5 -2
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/tiling.py +32 -8
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/tiling_pipeline.py +31 -15
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/utils/tiling_io.py +11 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec.egg-info/PKG-INFO +4 -4
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec.egg-info/requires.txt +2 -2
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_output_consistency.py +10 -1
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_progress.py +1 -1
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_regression_core.py +349 -5
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_regression_inference.py +1901 -43
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_regression_models.py +182 -114
- slide2vec-4.7.0/slide2vec/runtime/artifacts_collect.py +0 -155
- slide2vec-4.7.0/slide2vec/runtime/persistence.py +0 -188
- {slide2vec-4.7.0 → slide2vec-5.0.0}/LICENSE +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/setup.cfg +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/__main__.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/cli.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/configs/resources.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/data/__init__.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/data/dataset.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/data/tile_reader.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/__init__.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/base.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/__init__.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/conch.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/gigapath.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/hibou.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/lunit.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/midnight.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/__init__.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/blocks.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/case.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/loading.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/slide.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/types.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/musk.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/models/virchow.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/registry.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/encoders/validation.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/progress.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/__init__.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/batching.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/cpu_budget.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/dense_regions.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/hierarchical.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/manifest.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/model_settings.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/patient_pipeline.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/progress_bridge.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/registry.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/slide_encode.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/types.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/runtime/worker_io.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/utils/config.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec/utils/utils.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec.egg-info/SOURCES.txt +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec.egg-info/entry_points.txt +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_architecture_runtime_split.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_attention_extraction.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_dense_extraction.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_dense_locality_gated.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_dense_regions.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_encoder_registry.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_hs2p_package_cutover.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_runtime_batching.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_tile_store.py +0 -0
- {slide2vec-4.7.0 → slide2vec-5.0.0}/tests/test_tiling_pipeline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0
|
|
18
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
|
|
19
19
|
Requires-Dist: omegaconf
|
|
20
20
|
Requires-Dist: matplotlib
|
|
21
21
|
Requires-Dist: numpy<2
|
|
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
|
|
|
65
65
|
Requires-Dist: pandas; extra == "fm"
|
|
66
66
|
Requires-Dist: pillow; extra == "fm"
|
|
67
67
|
Requires-Dist: rich; extra == "fm"
|
|
68
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0
|
|
68
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0; extra == "fm"
|
|
69
69
|
Requires-Dist: wandb; extra == "fm"
|
|
70
70
|
Requires-Dist: torch<2.8,>=2.3; extra == "fm"
|
|
71
71
|
Requires-Dist: torchvision>=0.18.0; extra == "fm"
|
|
@@ -169,7 +169,7 @@ pipeline = Pipeline(
|
|
|
169
169
|
preprocessing=PreprocessingConfig(
|
|
170
170
|
requested_spacing_um=0.5,
|
|
171
171
|
requested_tile_size_px=224,
|
|
172
|
-
|
|
172
|
+
masks={"min_coverage": {"tissue": 0.1}},
|
|
173
173
|
),
|
|
174
174
|
execution=ExecutionOptions(output_dir="outputs/demo"),
|
|
175
175
|
)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "
|
|
7
|
+
version = "5.0.0"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -21,7 +21,7 @@ classifiers = [
|
|
|
21
21
|
"Programming Language :: Python :: 3.13",
|
|
22
22
|
]
|
|
23
23
|
dependencies = [
|
|
24
|
-
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0
|
|
24
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
|
|
25
25
|
"omegaconf",
|
|
26
26
|
"matplotlib",
|
|
27
27
|
"numpy<2",
|
|
@@ -88,7 +88,7 @@ fm = [
|
|
|
88
88
|
"pandas",
|
|
89
89
|
"pillow",
|
|
90
90
|
"rich",
|
|
91
|
-
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0
|
|
91
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
|
|
92
92
|
"wandb",
|
|
93
93
|
"torch>=2.3,<2.8",
|
|
94
94
|
"torchvision>=0.18.0",
|
|
@@ -164,7 +164,7 @@ no_implicit_reexport = true
|
|
|
164
164
|
max-line-length = 160
|
|
165
165
|
|
|
166
166
|
[tool.bumpver]
|
|
167
|
-
current_version = "
|
|
167
|
+
current_version = "5.0.0"
|
|
168
168
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
169
169
|
commit = false # We do version bumping in CI, not as a commit
|
|
170
170
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
|
|
2
|
+
import copy
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
4
5
|
from dataclasses import dataclass, field, replace
|
|
@@ -40,6 +41,55 @@ SlideSequence = Sequence[SlideInput]
|
|
|
40
41
|
TilingResultsInput = Sequence[Any] | Mapping[str, Any]
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
#: Default annotation-mask vocabulary — plain binary tissue tiling. Mirrors hs2p's
|
|
45
|
+
#: shipped default ``{background: 0, tissue: 1}``; leaving it untouched keeps a run
|
|
46
|
+
#: behaving exactly as a tissue-only run. ``min_coverage.tissue`` is the single source
|
|
47
|
+
#: of truth for the tissue threshold (the standalone ``tissue_threshold`` knob is gone).
|
|
48
|
+
#: A :class:`PreprocessingConfig` ``masks`` value is deep-merged over this default, so
|
|
49
|
+
#: callers only state what they override (e.g. ``{"min_coverage": {"tissue": 0.1}}``).
|
|
50
|
+
DEFAULT_MASKS: dict[str, Any] = {
|
|
51
|
+
"output_mode": "per_annotation",
|
|
52
|
+
"pixel_mapping": {"background": 0, "tissue": 1},
|
|
53
|
+
"colors": {"background": None, "tissue": [157, 219, 129]},
|
|
54
|
+
"min_coverage": {"background": None, "tissue": 0.01},
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _deep_merge_masks(base: Mapping[str, Any], override: Mapping[str, Any]) -> dict[str, Any]:
|
|
59
|
+
"""Deep-merge *override* onto a copy of *base* (nested dicts merge key-by-key)."""
|
|
60
|
+
merged = copy.deepcopy(dict(base))
|
|
61
|
+
for key, value in override.items():
|
|
62
|
+
existing = merged.get(key)
|
|
63
|
+
if isinstance(value, Mapping) and isinstance(existing, dict):
|
|
64
|
+
merged[key] = _deep_merge_masks(existing, value)
|
|
65
|
+
else:
|
|
66
|
+
merged[key] = copy.deepcopy(value)
|
|
67
|
+
return merged
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def resolve_masks(masks: Mapping[str, Any] | None) -> dict[str, Any]:
|
|
71
|
+
"""Complete a (possibly partial) ``masks`` mapping by merging it over :data:`DEFAULT_MASKS`."""
|
|
72
|
+
if not masks:
|
|
73
|
+
return copy.deepcopy(DEFAULT_MASKS)
|
|
74
|
+
return _deep_merge_masks(DEFAULT_MASKS, masks)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _masks_to_plain_dict(node: Any) -> dict[str, Any]:
|
|
78
|
+
"""Normalize a masks config node (OmegaConf, mapping, or namespace) to a plain dict."""
|
|
79
|
+
if node is None:
|
|
80
|
+
return {}
|
|
81
|
+
try:
|
|
82
|
+
from omegaconf import OmegaConf
|
|
83
|
+
|
|
84
|
+
if OmegaConf.is_config(node):
|
|
85
|
+
return copy.deepcopy(OmegaConf.to_container(node, resolve=True)) # type: ignore[return-value]
|
|
86
|
+
except ImportError:
|
|
87
|
+
pass
|
|
88
|
+
if isinstance(node, Mapping):
|
|
89
|
+
return copy.deepcopy(dict(node))
|
|
90
|
+
return copy.deepcopy(dict(vars(node)))
|
|
91
|
+
|
|
92
|
+
|
|
43
93
|
@dataclass(frozen=True, kw_only=True)
|
|
44
94
|
class PreprocessingConfig:
|
|
45
95
|
"""Configuration for slide tiling and preprocessing."""
|
|
@@ -62,8 +112,6 @@ class PreprocessingConfig:
|
|
|
62
112
|
tolerance: float = 0.05
|
|
63
113
|
#: Fractional tile overlap (``0.0`` = no overlap).
|
|
64
114
|
overlap: float = 0.0
|
|
65
|
-
#: Minimum tissue fraction required to keep a tile (default ``0.01``).
|
|
66
|
-
tissue_threshold: float = 0.01
|
|
67
115
|
#: Directory containing pre-extracted tile coordinates to reuse, skipping tiling.
|
|
68
116
|
read_coordinates_from: Path | None = None
|
|
69
117
|
#: Directory containing pre-extracted tile images to skip the tiling step entirely.
|
|
@@ -90,6 +138,20 @@ class PreprocessingConfig:
|
|
|
90
138
|
#: Controls whether hs2p writes mask and tiling preview images.
|
|
91
139
|
#: Keys: ``save_mask_preview``, ``save_tiling_preview``, ``downsample``.
|
|
92
140
|
preview: dict[str, Any] = field(default_factory=dict)
|
|
141
|
+
#: Annotation-mask vocabulary forwarded to hs2p's sampling resolver. Keys:
|
|
142
|
+
#: ``output_mode``, ``pixel_mapping``, ``colors``, ``min_coverage``. A partial
|
|
143
|
+
#: mapping is deep-merged over :data:`DEFAULT_MASKS`, so callers only state what
|
|
144
|
+
#: they override (e.g. ``{"min_coverage": {"tissue": 0.1}}``). The default
|
|
145
|
+
#: ``{background, tissue}`` block is plain tissue tiling; ``min_coverage.tissue``
|
|
146
|
+
#: is the single source of truth for the tissue threshold.
|
|
147
|
+
masks: dict[str, Any] = field(default_factory=dict)
|
|
148
|
+
#: When annotation sampling is active, tile each class independently (``True``)
|
|
149
|
+
#: vs jointly across classes (``False``).
|
|
150
|
+
independent_sampling: bool = True
|
|
151
|
+
|
|
152
|
+
def __post_init__(self) -> None:
|
|
153
|
+
# Complete a (possibly partial) masks mapping against the shipped default.
|
|
154
|
+
object.__setattr__(self, "masks", resolve_masks(self.masks))
|
|
93
155
|
|
|
94
156
|
@classmethod
|
|
95
157
|
def from_config(cls, cfg: Any) -> "PreprocessingConfig":
|
|
@@ -121,7 +183,8 @@ class PreprocessingConfig:
|
|
|
121
183
|
region_tile_multiple=int(region_tile_multiple) if region_tile_multiple is not None else None,
|
|
122
184
|
tolerance=float(tiling.params.tolerance),
|
|
123
185
|
overlap=float(tiling.params.overlap),
|
|
124
|
-
|
|
186
|
+
masks=_masks_to_plain_dict(getattr(tiling, "masks", None)),
|
|
187
|
+
independent_sampling=bool(getattr(tiling, "independent_sampling", True)),
|
|
125
188
|
read_coordinates_from=Path(read_coordinates_from) if read_coordinates_from else None,
|
|
126
189
|
read_tiles_from=(
|
|
127
190
|
Path(read_tiles_from) if read_tiles_from else None
|
|
@@ -288,6 +351,11 @@ class EmbeddedSlide:
|
|
|
288
351
|
image_path: Path
|
|
289
352
|
#: Path to the tissue mask used for tiling, if any.
|
|
290
353
|
mask_path: Path | None = None
|
|
354
|
+
#: Annotation class this bag of tiles was sampled for. ``"tissue"`` for the
|
|
355
|
+
#: default tissue-only path, ``"merged"`` for the union output mode, or the
|
|
356
|
+
#: class name (e.g. ``"tumor"``) when annotation-aware sampling fans a slide
|
|
357
|
+
#: out into one bag per class. See the annotation-aware sampling documentation.
|
|
358
|
+
annotation: str | None = None
|
|
291
359
|
#: Number of tiles extracted from the slide.
|
|
292
360
|
num_tiles: int | None = None
|
|
293
361
|
#: Path to the mask preview image, if generated.
|
|
@@ -379,12 +447,13 @@ class Model:
|
|
|
379
447
|
self,
|
|
380
448
|
slide: SlideInput,
|
|
381
449
|
*,
|
|
450
|
+
annotation: str | list[str] | None = None,
|
|
382
451
|
preprocessing: PreprocessingConfig | None = None,
|
|
383
452
|
execution: ExecutionOptions | None = None,
|
|
384
453
|
sample_id: str | None = None,
|
|
385
454
|
mask_path: PathLike | None = None,
|
|
386
455
|
spacing_at_level_0: float | None = None,
|
|
387
|
-
) -> EmbeddedSlide:
|
|
456
|
+
) -> EmbeddedSlide | list[EmbeddedSlide]:
|
|
388
457
|
if isinstance(slide, (str, Path)):
|
|
389
458
|
slide = {
|
|
390
459
|
"sample_id": sample_id or Path(slide).stem,
|
|
@@ -396,31 +465,42 @@ class Model:
|
|
|
396
465
|
raise ValueError(
|
|
397
466
|
"sample_id, mask_path, and spacing_at_level_0 overrides are only supported when slide is a path-like input"
|
|
398
467
|
)
|
|
399
|
-
|
|
468
|
+
requested = None if isinstance(annotation, str) else annotation
|
|
469
|
+
grouped = self.embed_slides(
|
|
400
470
|
[slide],
|
|
471
|
+
annotations=requested,
|
|
401
472
|
preprocessing=preprocessing,
|
|
402
473
|
execution=execution,
|
|
403
|
-
)
|
|
474
|
+
)
|
|
475
|
+
# Single slide in → at most one outer key out. Flatten to the inner
|
|
476
|
+
# {label: EmbeddedSlide} mapping (empty when the run produced nothing).
|
|
477
|
+
bags: dict[str, EmbeddedSlide] = {}
|
|
478
|
+
for inner in grouped.values():
|
|
479
|
+
bags = inner
|
|
480
|
+
break
|
|
481
|
+
return _select_embedded_bag(bags, annotation)
|
|
404
482
|
|
|
405
483
|
def embed_slides(
|
|
406
484
|
self,
|
|
407
485
|
slides: SlideSequence,
|
|
408
486
|
*,
|
|
487
|
+
annotations: list[str] | None = None,
|
|
409
488
|
preprocessing: PreprocessingConfig | None = None,
|
|
410
489
|
execution: ExecutionOptions | None = None,
|
|
411
|
-
) ->
|
|
490
|
+
) -> dict[str, dict[str, EmbeddedSlide]]:
|
|
412
491
|
from slide2vec.inference import embed_slides
|
|
413
492
|
|
|
414
493
|
resolved = _coerce_execution_options(execution, model=self)
|
|
415
494
|
resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
|
|
416
495
|
with _auto_progress_reporting(output_dir=resolved.output_dir):
|
|
417
496
|
_validate_model_config(self, resolved_preprocessing, resolved)
|
|
418
|
-
|
|
497
|
+
embedded = embed_slides(
|
|
419
498
|
self,
|
|
420
499
|
slides,
|
|
421
500
|
preprocessing=resolved_preprocessing,
|
|
422
501
|
execution=resolved,
|
|
423
502
|
)
|
|
503
|
+
return _group_embedded_slides(embedded, annotations=annotations)
|
|
424
504
|
|
|
425
505
|
def embed_patient(
|
|
426
506
|
self,
|
|
@@ -587,6 +667,77 @@ class Pipeline:
|
|
|
587
667
|
)
|
|
588
668
|
|
|
589
669
|
|
|
670
|
+
def _select_embedded_bag(
|
|
671
|
+
bags: Mapping[str, EmbeddedSlide],
|
|
672
|
+
annotation: str | list[str] | None,
|
|
673
|
+
) -> EmbeddedSlide | list[EmbeddedSlide]:
|
|
674
|
+
"""Select per-class bag(s) from a single slide's ``{label: EmbeddedSlide}`` map.
|
|
675
|
+
|
|
676
|
+
numpy-style shape-in/shape-out:
|
|
677
|
+
|
|
678
|
+
- a single class string returns one :class:`EmbeddedSlide`;
|
|
679
|
+
- a list of class strings returns a list in the requested order;
|
|
680
|
+
- ``None`` returns the single bag when the run produced exactly one,
|
|
681
|
+
otherwise raises naming the available bags and directing to
|
|
682
|
+
:meth:`Model.embed_slides`.
|
|
683
|
+
|
|
684
|
+
Requesting a class the run did not produce raises naming what is available.
|
|
685
|
+
"""
|
|
686
|
+
available = sorted(bags)
|
|
687
|
+
if isinstance(annotation, str):
|
|
688
|
+
if annotation not in bags:
|
|
689
|
+
raise ValueError(
|
|
690
|
+
f"embed_slide() found no '{annotation}' annotation bag for this "
|
|
691
|
+
f"slide; available bags: {available}."
|
|
692
|
+
)
|
|
693
|
+
return bags[annotation]
|
|
694
|
+
if annotation is not None:
|
|
695
|
+
selected: list[EmbeddedSlide] = []
|
|
696
|
+
for label in annotation:
|
|
697
|
+
if label not in bags:
|
|
698
|
+
raise ValueError(
|
|
699
|
+
f"embed_slide() found no '{label}' annotation bag for this "
|
|
700
|
+
f"slide; available bags: {available}."
|
|
701
|
+
)
|
|
702
|
+
selected.append(bags[label])
|
|
703
|
+
return selected
|
|
704
|
+
if len(bags) == 1:
|
|
705
|
+
return next(iter(bags.values()))
|
|
706
|
+
raise ValueError(
|
|
707
|
+
f"embed_slide() received {len(bags)} annotation bags for this slide "
|
|
708
|
+
f"({available}); annotation-aware sampling produces one bag per class. "
|
|
709
|
+
"Pass annotation=... to select a class, or use Model.embed_slides(...) "
|
|
710
|
+
"to receive every per-class EmbeddedSlide (each carries its .annotation)."
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _group_embedded_slides(
|
|
715
|
+
embedded: Sequence[EmbeddedSlide],
|
|
716
|
+
*,
|
|
717
|
+
annotations: list[str] | None = None,
|
|
718
|
+
) -> dict[str, dict[str, EmbeddedSlide]]:
|
|
719
|
+
"""Group flat per-row :class:`EmbeddedSlide` results into a nested mapping.
|
|
720
|
+
|
|
721
|
+
The outer key is ``sample_id``; the inner key is the bag's informative
|
|
722
|
+
annotation label (``"tissue"``/``"merged"``/class name), never ``None``.
|
|
723
|
+
A bag whose ``.annotation`` is ``None`` (defensive — post-#173 real runs
|
|
724
|
+
always carry a label) does not produce a ``None`` key.
|
|
725
|
+
|
|
726
|
+
When *annotations* is given, the inner keys are restricted to the named
|
|
727
|
+
classes (in encounter order).
|
|
728
|
+
"""
|
|
729
|
+
requested = None if annotations is None else set(annotations)
|
|
730
|
+
grouped: dict[str, dict[str, EmbeddedSlide]] = {}
|
|
731
|
+
for bag in embedded:
|
|
732
|
+
label = bag.annotation
|
|
733
|
+
if label is None:
|
|
734
|
+
continue
|
|
735
|
+
if requested is not None and label not in requested:
|
|
736
|
+
continue
|
|
737
|
+
grouped.setdefault(bag.sample_id, {})[label] = bag
|
|
738
|
+
return grouped
|
|
739
|
+
|
|
740
|
+
|
|
590
741
|
def _coerce_execution_options(
|
|
591
742
|
options: ExecutionOptions | None,
|
|
592
743
|
*,
|
|
@@ -5,6 +5,7 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import torch
|
|
8
|
+
from hs2p.fileops import is_flattened_annotation
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
@dataclass(frozen=True, kw_only=True)
|
|
@@ -29,6 +30,7 @@ class SlideEmbeddingArtifact:
|
|
|
29
30
|
format: str
|
|
30
31
|
feature_dim: int
|
|
31
32
|
latent_path: Path | None = None
|
|
33
|
+
annotation: str | None = None
|
|
32
34
|
|
|
33
35
|
@property
|
|
34
36
|
def metadata(self) -> dict[str, Any]:
|
|
@@ -58,6 +60,7 @@ class HierarchicalEmbeddingArtifact:
|
|
|
58
60
|
feature_dim: int
|
|
59
61
|
num_regions: int
|
|
60
62
|
tiles_per_region: int
|
|
63
|
+
annotation: str | None = None
|
|
61
64
|
|
|
62
65
|
@property
|
|
63
66
|
def metadata(self) -> dict[str, Any]:
|
|
@@ -90,6 +93,53 @@ def _write_metadata(path: Path, metadata: dict[str, Any]) -> None:
|
|
|
90
93
|
path.write_text(json.dumps(metadata, indent=2, sort_keys=True), encoding="utf-8")
|
|
91
94
|
|
|
92
95
|
|
|
96
|
+
def tile_embeddings_subdir(annotation: str | None) -> str:
|
|
97
|
+
"""Namespace the ``tile_embeddings`` output dir per annotation class.
|
|
98
|
+
|
|
99
|
+
Reuses hs2p's flatten rule (the single source of truth): ``None`` and the sentinel
|
|
100
|
+
``"tissue"`` collapse to the flat ``tile_embeddings`` root, so the default tissue-only
|
|
101
|
+
path is byte-for-byte unchanged; any real class label gets its own
|
|
102
|
+
``tile_embeddings/<class>`` subdirectory.
|
|
103
|
+
"""
|
|
104
|
+
if is_flattened_annotation(annotation):
|
|
105
|
+
return "tile_embeddings"
|
|
106
|
+
return f"tile_embeddings/{annotation}"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def slide_embeddings_subdir(annotation: str | None) -> str:
|
|
110
|
+
"""Namespace the ``slide_embeddings`` output dir per annotation class.
|
|
111
|
+
|
|
112
|
+
Reuses hs2p's flatten rule (the single source of truth, shared with
|
|
113
|
+
:func:`tile_embeddings_subdir`): ``None`` and the sentinel ``"tissue"`` collapse to the
|
|
114
|
+
flat ``slide_embeddings`` root, so the default tissue-only path is byte-for-byte
|
|
115
|
+
unchanged; any real class label gets its own ``slide_embeddings/<class>`` subdirectory.
|
|
116
|
+
"""
|
|
117
|
+
if is_flattened_annotation(annotation):
|
|
118
|
+
return "slide_embeddings"
|
|
119
|
+
return f"slide_embeddings/{annotation}"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def slide_latents_subdir(annotation: str | None) -> str:
|
|
123
|
+
"""Namespace the ``slide_latents`` output dir per annotation class (mirrors slide embeddings)."""
|
|
124
|
+
if is_flattened_annotation(annotation):
|
|
125
|
+
return "slide_latents"
|
|
126
|
+
return f"slide_latents/{annotation}"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def hierarchical_embeddings_subdir(annotation: str | None) -> str:
|
|
130
|
+
"""Namespace the ``hierarchical_embeddings`` output dir per annotation class.
|
|
131
|
+
|
|
132
|
+
Reuses hs2p's flatten rule (the single source of truth, shared with
|
|
133
|
+
:func:`tile_embeddings_subdir` and :func:`slide_embeddings_subdir`): ``None`` and the
|
|
134
|
+
sentinel ``"tissue"`` collapse to the flat ``hierarchical_embeddings`` root, so the
|
|
135
|
+
default tissue-only path is byte-for-byte unchanged; any real class label gets its own
|
|
136
|
+
``hierarchical_embeddings/<class>`` subdirectory.
|
|
137
|
+
"""
|
|
138
|
+
if is_flattened_annotation(annotation):
|
|
139
|
+
return "hierarchical_embeddings"
|
|
140
|
+
return f"hierarchical_embeddings/{annotation}"
|
|
141
|
+
|
|
142
|
+
|
|
93
143
|
def _setup_artifact_paths(
|
|
94
144
|
output_dir: str | Path, subdir: str, sample_id: str, output_format: str
|
|
95
145
|
) -> tuple[Path, Path]:
|
|
@@ -142,9 +192,12 @@ def write_tile_embeddings(
|
|
|
142
192
|
output_format: str = "pt",
|
|
143
193
|
metadata: dict[str, Any] | None = None,
|
|
144
194
|
tile_index: Any | None = None,
|
|
195
|
+
annotation: str | None = None,
|
|
145
196
|
) -> TileEmbeddingArtifact:
|
|
146
197
|
output_format = _validate_output_format(output_format)
|
|
147
|
-
artifact_path, metadata_path = _setup_artifact_paths(
|
|
198
|
+
artifact_path, metadata_path = _setup_artifact_paths(
|
|
199
|
+
output_dir, tile_embeddings_subdir(annotation), sample_id, output_format
|
|
200
|
+
)
|
|
148
201
|
feature_array = _ensure_array(features)
|
|
149
202
|
if output_format == "pt":
|
|
150
203
|
torch.save(_ensure_tensor(features), artifact_path)
|
|
@@ -180,9 +233,12 @@ def write_tile_embedding_metadata(
|
|
|
180
233
|
feature_dim: int | None = None,
|
|
181
234
|
num_tiles: int = 0,
|
|
182
235
|
metadata: dict[str, Any] | None = None,
|
|
236
|
+
annotation: str | None = None,
|
|
183
237
|
) -> Path:
|
|
184
238
|
output_format = _validate_output_format(output_format)
|
|
185
|
-
_, metadata_path = _setup_artifact_paths(
|
|
239
|
+
_, metadata_path = _setup_artifact_paths(
|
|
240
|
+
output_dir, tile_embeddings_subdir(annotation), sample_id, output_format
|
|
241
|
+
)
|
|
186
242
|
tile_metadata = _build_tile_embedding_metadata(
|
|
187
243
|
sample_id,
|
|
188
244
|
output_format=output_format,
|
|
@@ -202,9 +258,12 @@ def write_slide_embeddings(
|
|
|
202
258
|
output_format: str = "pt",
|
|
203
259
|
metadata: dict[str, Any] | None = None,
|
|
204
260
|
latents: Any | None = None,
|
|
261
|
+
annotation: str | None = None,
|
|
205
262
|
) -> SlideEmbeddingArtifact:
|
|
206
263
|
output_format = _validate_output_format(output_format)
|
|
207
|
-
artifact_path, metadata_path = _setup_artifact_paths(
|
|
264
|
+
artifact_path, metadata_path = _setup_artifact_paths(
|
|
265
|
+
output_dir, slide_embeddings_subdir(annotation), sample_id, output_format
|
|
266
|
+
)
|
|
208
267
|
embedding_array = _ensure_array(embedding)
|
|
209
268
|
latent_path = None
|
|
210
269
|
if output_format == "pt":
|
|
@@ -212,7 +271,9 @@ def write_slide_embeddings(
|
|
|
212
271
|
else:
|
|
213
272
|
np.savez_compressed(artifact_path, features=embedding_array)
|
|
214
273
|
if latents is not None:
|
|
215
|
-
latent_path, _ = _setup_artifact_paths(
|
|
274
|
+
latent_path, _ = _setup_artifact_paths(
|
|
275
|
+
output_dir, slide_latents_subdir(annotation), sample_id, output_format
|
|
276
|
+
)
|
|
216
277
|
if output_format == "pt":
|
|
217
278
|
torch.save(_ensure_tensor(latents), latent_path)
|
|
218
279
|
else:
|
|
@@ -234,6 +295,7 @@ def write_slide_embeddings(
|
|
|
234
295
|
format=output_format,
|
|
235
296
|
feature_dim=slide_metadata["feature_dim"],
|
|
236
297
|
latent_path=latent_path,
|
|
298
|
+
annotation=annotation,
|
|
237
299
|
)
|
|
238
300
|
|
|
239
301
|
|
|
@@ -283,9 +345,12 @@ def write_hierarchical_embeddings(
|
|
|
283
345
|
output_dir: str | Path,
|
|
284
346
|
output_format: str = "pt",
|
|
285
347
|
metadata: dict[str, Any] | None = None,
|
|
348
|
+
annotation: str | None = None,
|
|
286
349
|
) -> HierarchicalEmbeddingArtifact:
|
|
287
350
|
output_format = _validate_output_format(output_format)
|
|
288
|
-
artifact_path, metadata_path = _setup_artifact_paths(
|
|
351
|
+
artifact_path, metadata_path = _setup_artifact_paths(
|
|
352
|
+
output_dir, hierarchical_embeddings_subdir(annotation), sample_id, output_format
|
|
353
|
+
)
|
|
289
354
|
feature_array = _ensure_array(features)
|
|
290
355
|
if feature_array.ndim != 3:
|
|
291
356
|
raise ValueError(
|
|
@@ -315,4 +380,5 @@ def write_hierarchical_embeddings(
|
|
|
315
380
|
feature_dim=int(hierarchical_metadata["feature_dim"]),
|
|
316
381
|
num_regions=int(hierarchical_metadata["num_regions"]),
|
|
317
382
|
tiles_per_region=int(hierarchical_metadata["tiles_per_region"]),
|
|
383
|
+
annotation=annotation,
|
|
318
384
|
)
|
|
@@ -26,6 +26,24 @@ tiling:
|
|
|
26
26
|
read_coordinates_from: # path to an existing directory containing pre-extracted `.coordinates.npz` / `.coordinates.meta.json` artifacts to reuse instead of starting tiling from scratch
|
|
27
27
|
read_tiles_from: # path to an existing directory containing pre-extracted `.tiles.tar` tile stores to reuse instead of starting tiling from scratch
|
|
28
28
|
backend: "auto" # backend to use for slide reading; "auto" lets hs2p resolve the best backend per slide, preferring cuCIM when available
|
|
29
|
+
independent_sampling: true # selection strategy when annotation sampling is active. true: sample each class independently against its own binary mask (independent selection); false: sample once over the union of active classes, then post-filter per class by coverage (joint selection). Ignored when the masks vocabulary is left at the tissue-only default.
|
|
30
|
+
masks:
|
|
31
|
+
# Annotation-mask vocabulary forwarded to hs2p's sampling resolver. The shipped default
|
|
32
|
+
# ({background:0, tissue:1}) is plain binary tissue tiling — leave it untouched and the run
|
|
33
|
+
# behaves exactly as a tissue-only run. Customising the vocabulary (e.g. adding a `tumor`
|
|
34
|
+
# class with its own pixel value + min_coverage) opts the run into annotation-aware sampling,
|
|
35
|
+
# where `mask_path` is read as a multi-label raster. The `tissue` min_coverage entry below is
|
|
36
|
+
# the single source of truth for the tissue threshold.
|
|
37
|
+
output_mode: per_annotation # how sampled tiles are grouped into artifacts. per_annotation: one flat artifact set per sampled class, namespaced under a `<class>/` subdir (the `tissue` class collapses to the flat root). merged: a single flat artifact set per slide over the union of tiles passing any active class threshold — it carries no class label, so it lands at the flat output root (no `<class>/` subdir).
|
|
38
|
+
pixel_mapping: # {class_name: integer pixel value in the mask raster}
|
|
39
|
+
background: 0
|
|
40
|
+
tissue: 1
|
|
41
|
+
colors: # {class_name: [r, g, b] | null} used when rendering previews
|
|
42
|
+
background:
|
|
43
|
+
tissue: [157, 219, 129]
|
|
44
|
+
min_coverage: # {class_name: float | null}; minimum fraction of a tile that must be covered to keep it; null = don't sample that class
|
|
45
|
+
background:
|
|
46
|
+
tissue: 0.1
|
|
29
47
|
params:
|
|
30
48
|
requested_spacing_um: # spacing at which to tile the slide, in microns per pixel; filled from a preset model when available
|
|
31
49
|
tolerance: 0.05 # tolerance for matching the spacing (float between 0 and 1, deciding how much the spacing can deviate from the one specified in the slide metadata)
|
|
@@ -33,7 +51,6 @@ tiling:
|
|
|
33
51
|
requested_region_size_px: # size of hierarchical parent regions in pixels; when unset and region_tile_multiple is set, derived from requested_tile_size_px * region_tile_multiple
|
|
34
52
|
region_tile_multiple: # hierarchical region grid width/height in tiles; e.g. 6 means 6x6 tiles per region
|
|
35
53
|
overlap: 0.0 # percentage of overlap between two consecutive tiles (float between 0 and 1)
|
|
36
|
-
tissue_threshold: 0.1 # minimum fraction of pixels that must be tissue to keep a tile (float between 0 and 1)
|
|
37
54
|
seg_params:
|
|
38
55
|
# downsample controls which pyramid level is read for tissue segmentation.
|
|
39
56
|
# Larger values are faster and use less memory; smaller values can improve mask precision.
|
|
@@ -49,12 +49,21 @@ def main(argv=None) -> int:
|
|
|
49
49
|
)
|
|
50
50
|
preprocessing = deserialize_preprocessing(request["preprocessing"])
|
|
51
51
|
execution = deserialize_execution(request["execution"])
|
|
52
|
+
from slide2vec.runtime.distributed import (
|
|
53
|
+
decode_work_unit,
|
|
54
|
+
encode_work_unit,
|
|
55
|
+
work_unit_shard_stem,
|
|
56
|
+
)
|
|
57
|
+
from slide2vec.runtime.embedding import tiling_result_annotation
|
|
58
|
+
|
|
52
59
|
load_successful_tiled_slides_fn = getattr(inference, "load_successful_tiled_slides", None)
|
|
53
60
|
if not callable(load_successful_tiled_slides_fn):
|
|
54
61
|
from slide2vec.runtime.manifest import load_successful_tiled_slides as load_successful_tiled_slides_fn
|
|
55
62
|
slide_records, tiling_results = load_successful_tiled_slides_fn(output_dir)
|
|
56
|
-
|
|
57
|
-
|
|
63
|
+
# Key by the composite (sample_id, annotation) work unit so a multi-class slide's sibling
|
|
64
|
+
# classes never overwrite each other; flat units collapse to the bare sample_id key.
|
|
65
|
+
paired_by_unit = {
|
|
66
|
+
encode_work_unit(slide.sample_id, tiling_result_annotation(tiling_result)): (slide, tiling_result)
|
|
58
67
|
for slide, tiling_result in zip(slide_records, tiling_results)
|
|
59
68
|
}
|
|
60
69
|
progress_events_path = request.get("progress_events_path")
|
|
@@ -71,8 +80,9 @@ def main(argv=None) -> int:
|
|
|
71
80
|
|
|
72
81
|
with context:
|
|
73
82
|
if request["strategy"] == "tile_shard":
|
|
74
|
-
|
|
75
|
-
|
|
83
|
+
work_unit = request["work_unit"]
|
|
84
|
+
shard_stem = work_unit_shard_stem(*decode_work_unit(work_unit))
|
|
85
|
+
slide, tiling_result = paired_by_unit[work_unit]
|
|
76
86
|
loaded = model._load_backend()
|
|
77
87
|
if is_hierarchical_preprocessing(preprocessing):
|
|
78
88
|
geometry = resolve_hierarchical_geometry(preprocessing, tiling_result)
|
|
@@ -103,7 +113,7 @@ def main(argv=None) -> int:
|
|
|
103
113
|
"flat_index": torch.as_tensor(shard_indices, dtype=torch.long),
|
|
104
114
|
"tile_embeddings": tile_embeddings.detach().cpu() if torch.is_tensor(tile_embeddings) else torch.as_tensor(tile_embeddings),
|
|
105
115
|
}
|
|
106
|
-
torch.save(payload, coordination_dir / f"{
|
|
116
|
+
torch.save(payload, coordination_dir / f"{shard_stem}.hier.rank{global_rank}.pt")
|
|
107
117
|
else:
|
|
108
118
|
num_tiles = len(tiling_result.x)
|
|
109
119
|
tile_indices = np.array_split(np.arange(num_tiles, dtype=np.int64), world_size)[global_rank]
|
|
@@ -129,14 +139,14 @@ def main(argv=None) -> int:
|
|
|
129
139
|
"tile_index": torch.as_tensor(tile_indices, dtype=torch.long),
|
|
130
140
|
"tile_embeddings": tile_embeddings.detach().cpu() if torch.is_tensor(tile_embeddings) else torch.as_tensor(tile_embeddings),
|
|
131
141
|
}
|
|
132
|
-
torch.save(payload, coordination_dir / f"{
|
|
142
|
+
torch.save(payload, coordination_dir / f"{shard_stem}.tiles.rank{global_rank}.pt")
|
|
133
143
|
return 0
|
|
134
144
|
|
|
135
145
|
assigned_ids = list(request.get("assignments", {}).get(str(global_rank), []))
|
|
136
146
|
if not assigned_ids:
|
|
137
147
|
return 0
|
|
138
|
-
assigned_slides = [
|
|
139
|
-
assigned_tiling_results = [
|
|
148
|
+
assigned_slides = [paired_by_unit[unit_key][0] for unit_key in assigned_ids]
|
|
149
|
+
assigned_tiling_results = [paired_by_unit[unit_key][1] for unit_key in assigned_ids]
|
|
140
150
|
|
|
141
151
|
def _persist_embedded_slide(slide, tiling_result, embedded_slide) -> None:
|
|
142
152
|
payload = {
|
|
@@ -144,7 +154,12 @@ def main(argv=None) -> int:
|
|
|
144
154
|
"slide_embedding": _to_cpu_payload(embedded_slide.slide_embedding),
|
|
145
155
|
"latents": _to_cpu_payload(embedded_slide.latents),
|
|
146
156
|
}
|
|
147
|
-
|
|
157
|
+
# Stem by (sample_id, annotation) so two classes of one slide never overwrite each
|
|
158
|
+
# other; flat units keep the bare-sample_id filename for backward compatibility.
|
|
159
|
+
stem = work_unit_shard_stem(
|
|
160
|
+
embedded_slide.sample_id, tiling_result_annotation(tiling_result)
|
|
161
|
+
)
|
|
162
|
+
torch.save(payload, coordination_dir / f"{stem}.embedded.pt")
|
|
148
163
|
|
|
149
164
|
compute_embedded_slides_fn = getattr(inference, "_compute_embedded_slides", None)
|
|
150
165
|
if not callable(compute_embedded_slides_fn):
|
|
@@ -3,7 +3,8 @@ from contextlib import nullcontext
|
|
|
3
3
|
import json
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
from slide2vec.runtime.distributed import assign_slides_to_ranks
|
|
6
|
+
from slide2vec.runtime.distributed import assign_slides_to_ranks, encode_work_unit
|
|
7
|
+
from slide2vec.runtime.embedding import tiling_result_annotation
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def get_args_parser(add_help: bool = True) -> argparse.ArgumentParser:
|
|
@@ -48,26 +49,29 @@ def main(argv=None) -> int:
|
|
|
48
49
|
if not callable(load_successful_tiled_slides_fn):
|
|
49
50
|
from slide2vec.runtime.manifest import load_successful_tiled_slides as load_successful_tiled_slides_fn
|
|
50
51
|
slide_records, tiling_results = load_successful_tiled_slides_fn(tiling_input_dir)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
52
|
+
# Each (sample_id, annotation) row is an independent work unit; key by the composite so a
|
|
53
|
+
# multi-class slide's sibling classes never overwrite each other. Flat units (None / tissue /
|
|
54
|
+
# merged) encode to the bare sample_id, byte-identical to pre-#168 single-class runs.
|
|
55
|
+
paired_by_unit = {
|
|
56
|
+
encode_work_unit(slide.sample_id, tiling_result_annotation(tiling_result)): (slide, tiling_result)
|
|
57
|
+
for slide, tiling_result in zip(slide_records, tiling_results)
|
|
58
|
+
}
|
|
59
|
+
requested_work_units = request.get("work_units")
|
|
60
|
+
if requested_work_units is not None:
|
|
61
|
+
requested_unit_set = {str(unit) for unit in requested_work_units}
|
|
62
|
+
paired_by_unit = {
|
|
63
|
+
unit_key: pair
|
|
64
|
+
for unit_key, pair in paired_by_unit.items()
|
|
65
|
+
if unit_key in requested_unit_set
|
|
66
|
+
}
|
|
67
|
+
slide_records = [slide for slide, _ in paired_by_unit.values()]
|
|
68
|
+
tiling_results = [tiling_result for _, tiling_result in paired_by_unit.values()]
|
|
61
69
|
assignments = assign_slides_to_ranks(slide_records, tiling_results, num_gpus=world_size)
|
|
62
70
|
assigned_ids = assignments.get(global_rank, [])
|
|
63
71
|
if not assigned_ids:
|
|
64
72
|
return 0
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
for slide, tiling_result in zip(slide_records, tiling_results)
|
|
68
|
-
}
|
|
69
|
-
assigned_slides = [paired_by_sample[sample_id][0] for sample_id in assigned_ids]
|
|
70
|
-
assigned_tiling_results = [paired_by_sample[sample_id][1] for sample_id in assigned_ids]
|
|
73
|
+
assigned_slides = [paired_by_unit[unit_key][0] for unit_key in assigned_ids]
|
|
74
|
+
assigned_tiling_results = [paired_by_unit[unit_key][1] for unit_key in assigned_ids]
|
|
71
75
|
progress_events_path = request.get("progress_events_path")
|
|
72
76
|
reporter = (
|
|
73
77
|
JsonlProgressReporter(
|