slide2vec 4.6.3__tar.gz → 4.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-4.6.3 → slide2vec-4.7.0}/PKG-INFO +1 -1
- {slide2vec-4.6.3 → slide2vec-4.7.0}/pyproject.toml +2 -2
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/__init__.py +1 -1
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/conch.py +12 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/hibou.py +6 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/midnight.py +6 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/musk.py +5 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/inference.py +74 -28
- slide2vec-4.7.0/slide2vec/runtime/dense_regions.py +229 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/persist_callbacks.py +60 -13
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec.egg-info/PKG-INFO +1 -1
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec.egg-info/SOURCES.txt +2 -0
- slide2vec-4.7.0/tests/test_dense_regions.py +117 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_regression_inference.py +127 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/LICENSE +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/README.md +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/setup.cfg +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/__main__.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/api.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/artifacts.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/cli.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/configs/default.yaml +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/configs/resources.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/data/__init__.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/data/dataset.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/data/tile_reader.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/distributed/pipeline_worker.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/__init__.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/base.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/__init__.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/gigapath.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/lunit.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/__init__.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/blocks.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/case.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/loading.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/slide.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/types.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/models/virchow.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/registry.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/encoders/validation.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/progress.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/__init__.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/artifacts_collect.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/batching.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/cpu_budget.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/distributed.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/distributed_stage.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/embedding.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/embedding_persist.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/embedding_pipeline.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/hierarchical.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/manifest.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/model_settings.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/patient_pipeline.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/persistence.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/process_list.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/progress_bridge.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/registry.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/serialization.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/slide_encode.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/tiling.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/tiling_pipeline.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/types.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/runtime/worker_io.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/utils/config.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/utils/tiling_io.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec/utils/utils.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec.egg-info/entry_points.txt +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec.egg-info/requires.txt +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_architecture_runtime_split.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_attention_extraction.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_dense_extraction.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_dense_locality_gated.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_encoder_registry.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_hs2p_package_cutover.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_output_consistency.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_progress.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_regression_core.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_regression_models.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_runtime_batching.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_tile_store.py +0 -0
- {slide2vec-4.6.3 → slide2vec-4.7.0}/tests/test_tiling_pipeline.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "4.
|
|
7
|
+
version = "4.7.0"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -164,7 +164,7 @@ no_implicit_reexport = true
|
|
|
164
164
|
max-line-length = 160
|
|
165
165
|
|
|
166
166
|
[tool.bumpver]
|
|
167
|
-
current_version = "4.
|
|
167
|
+
current_version = "4.7.0"
|
|
168
168
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
169
169
|
commit = false # We do version bumping in CI, not as a commit
|
|
170
170
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -141,6 +141,13 @@ class CONCH(TileEncoder):
|
|
|
141
141
|
def encode_dim(self) -> int:
|
|
142
142
|
return 512
|
|
143
143
|
|
|
144
|
+
@property
|
|
145
|
+
def patch_size(self) -> tuple[int, int]:
|
|
146
|
+
# The CONCH vision trunk is a timm ViT-B/16; expose its patch size so the
|
|
147
|
+
# dense path can resolve the token grid. open_clip builds the trunk without
|
|
148
|
+
# dynamic_img_size, so dense extraction must use the native 448 window.
|
|
149
|
+
return _patch_size_from_trunk(self._model.visual.trunk)
|
|
150
|
+
|
|
144
151
|
@property
|
|
145
152
|
def device(self) -> torch.device:
|
|
146
153
|
return self._device
|
|
@@ -204,6 +211,11 @@ class CONCHv15(TileEncoder):
|
|
|
204
211
|
def encode_dim(self) -> int:
|
|
205
212
|
return 768
|
|
206
213
|
|
|
214
|
+
@property
|
|
215
|
+
def patch_size(self) -> tuple[int, int]:
|
|
216
|
+
# CONCHv15's TITAN trunk is a timm ViT; expose its patch size for the dense path.
|
|
217
|
+
return _patch_size_from_trunk(self._model.trunk)
|
|
218
|
+
|
|
207
219
|
@property
|
|
208
220
|
def device(self) -> torch.device:
|
|
209
221
|
return self._device
|
|
@@ -123,6 +123,12 @@ class _HibouBase(TileEncoder):
|
|
|
123
123
|
def encode_dim(self) -> int:
|
|
124
124
|
return self._encode_dim
|
|
125
125
|
|
|
126
|
+
@property
|
|
127
|
+
def patch_size(self) -> tuple[int, int]:
|
|
128
|
+
# HF Dinov2-style config carries the patch size; expose it for the dense path.
|
|
129
|
+
patch = int(self._model.config.patch_size)
|
|
130
|
+
return patch, patch
|
|
131
|
+
|
|
126
132
|
@property
|
|
127
133
|
def device(self) -> torch.device:
|
|
128
134
|
return self._device
|
|
@@ -124,6 +124,12 @@ class Midnight(TileEncoder):
|
|
|
124
124
|
def encode_dim(self) -> int:
|
|
125
125
|
return 3072
|
|
126
126
|
|
|
127
|
+
@property
|
|
128
|
+
def patch_size(self) -> tuple[int, int]:
|
|
129
|
+
# HF Dinov2-style config carries the patch size; expose it for the dense path.
|
|
130
|
+
patch = int(self._model.config.patch_size)
|
|
131
|
+
return patch, patch
|
|
132
|
+
|
|
127
133
|
@property
|
|
128
134
|
def device(self) -> torch.device:
|
|
129
135
|
return self._device
|
|
@@ -114,6 +114,11 @@ class MUSK(TileEncoder):
|
|
|
114
114
|
def encode_dim(self) -> int:
|
|
115
115
|
return 2048 if self._output_variant == "ms_aug" else 1024 # cls
|
|
116
116
|
|
|
117
|
+
@property
|
|
118
|
+
def patch_size(self) -> tuple[int, int]:
|
|
119
|
+
# BEiT3 vision embedding carries the patch size; expose it for the dense path.
|
|
120
|
+
return _as_hw(self._model.beit3.vision_embed.patch_size)
|
|
121
|
+
|
|
117
122
|
@property
|
|
118
123
|
def device(self) -> torch.device:
|
|
119
124
|
return self._device
|
|
@@ -148,6 +148,52 @@ def load_model(
|
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
|
|
151
|
+
def _reconcile_embedding_process_list(
|
|
152
|
+
*,
|
|
153
|
+
model,
|
|
154
|
+
preprocessing: PreprocessingConfig,
|
|
155
|
+
execution: ExecutionOptions,
|
|
156
|
+
process_list_path,
|
|
157
|
+
embeddable_slides,
|
|
158
|
+
output_dir,
|
|
159
|
+
):
|
|
160
|
+
"""Reconcile the process_list with the embeddings on disk once, at end of run.
|
|
161
|
+
|
|
162
|
+
The incremental persist callback batches its process_list writes for the tile
|
|
163
|
+
path, so the trailing partial batch is only persisted by this final full-CSV
|
|
164
|
+
reconciliation. Every single-GPU embedding entry point must call it after its
|
|
165
|
+
embed loop. Collecting artifacts from disk (rather than the callback's
|
|
166
|
+
in-memory list) also covers resume-skipped slides. Returns the collected
|
|
167
|
+
(tile, hierarchical, slide) artifact lists.
|
|
168
|
+
"""
|
|
169
|
+
persist_tile_embeddings = embedding.should_persist_tile_embeddings(model, execution)
|
|
170
|
+
persist_hierarchical_embeddings = hierarchical.is_hierarchical_preprocessing(preprocessing)
|
|
171
|
+
include_slide_embeddings = model.level == "slide"
|
|
172
|
+
include_tile_embeddings = persist_tile_embeddings and not persist_hierarchical_embeddings
|
|
173
|
+
tile_artifacts, hierarchical_artifacts, slide_artifacts = artifacts_collect.collect_pipeline_artifacts(
|
|
174
|
+
embeddable_slides,
|
|
175
|
+
output_dir=output_dir,
|
|
176
|
+
output_format=execution.output_format,
|
|
177
|
+
include_tile_embeddings=include_tile_embeddings,
|
|
178
|
+
include_hierarchical_embeddings=persist_hierarchical_embeddings,
|
|
179
|
+
include_slide_embeddings=include_slide_embeddings,
|
|
180
|
+
)
|
|
181
|
+
if process_list_path is not None and Path(process_list_path).is_file():
|
|
182
|
+
persistence.update_process_list_after_embedding(
|
|
183
|
+
process_list_path,
|
|
184
|
+
successful_slides=embeddable_slides,
|
|
185
|
+
persist_tile_embeddings=persist_tile_embeddings,
|
|
186
|
+
persist_hierarchical_embeddings=persist_hierarchical_embeddings,
|
|
187
|
+
include_slide_embeddings=include_slide_embeddings,
|
|
188
|
+
encoder_name=model.name,
|
|
189
|
+
output_variant=process_list.resolved_process_list_output_variant(model),
|
|
190
|
+
tile_artifacts=tile_artifacts,
|
|
191
|
+
hierarchical_artifacts=hierarchical_artifacts,
|
|
192
|
+
slide_artifacts=slide_artifacts,
|
|
193
|
+
)
|
|
194
|
+
return tile_artifacts, hierarchical_artifacts, slide_artifacts
|
|
195
|
+
|
|
196
|
+
|
|
151
197
|
def embed_slides(
|
|
152
198
|
model,
|
|
153
199
|
slides,
|
|
@@ -266,6 +312,18 @@ def embed_slides(
|
|
|
266
312
|
hierarchical_artifacts=hierarchical_artifacts,
|
|
267
313
|
slide_artifacts=slide_artifacts,
|
|
268
314
|
)
|
|
315
|
+
elif execution.output_dir is not None:
|
|
316
|
+
# Single-GPU: the incremental callback persisted the embeddings but
|
|
317
|
+
# batches its process_list writes, so reconcile the full CSV once at
|
|
318
|
+
# the end (covers the trailing partial batch on a clean run).
|
|
319
|
+
_reconcile_embedding_process_list(
|
|
320
|
+
model=model,
|
|
321
|
+
preprocessing=preprocessing,
|
|
322
|
+
execution=execution,
|
|
323
|
+
process_list_path=process_list_path,
|
|
324
|
+
embeddable_slides=embeddable_slides,
|
|
325
|
+
output_dir=Path(execution.output_dir),
|
|
326
|
+
)
|
|
269
327
|
emit_progress(
|
|
270
328
|
"embedding.finished",
|
|
271
329
|
slide_count=len(embeddable_slides),
|
|
@@ -752,7 +810,6 @@ def run_pipeline(
|
|
|
752
810
|
persist_tile_embeddings = embedding.should_persist_tile_embeddings(model, execution)
|
|
753
811
|
persist_hierarchical_embeddings = hierarchical.is_hierarchical_preprocessing(resolved_preprocessing)
|
|
754
812
|
include_slide_embeddings = model.level == "slide"
|
|
755
|
-
include_tile_embeddings = persist_tile_embeddings and not persist_hierarchical_embeddings
|
|
756
813
|
pending_slides, pending_tiling_results = persist_callbacks.pending_local_embedding_records(
|
|
757
814
|
embeddable_slides,
|
|
758
815
|
embeddable_tiling_results,
|
|
@@ -790,25 +847,13 @@ def run_pipeline(
|
|
|
790
847
|
on_embedded_slide=local_persist_callback,
|
|
791
848
|
collect_results=False,
|
|
792
849
|
)
|
|
793
|
-
tile_artifacts, hierarchical_artifacts, slide_artifacts =
|
|
794
|
-
|
|
850
|
+
tile_artifacts, hierarchical_artifacts, slide_artifacts = _reconcile_embedding_process_list(
|
|
851
|
+
model=model,
|
|
852
|
+
preprocessing=resolved_preprocessing,
|
|
853
|
+
execution=execution,
|
|
854
|
+
process_list_path=process_list_path,
|
|
855
|
+
embeddable_slides=embeddable_slides,
|
|
795
856
|
output_dir=output_dir,
|
|
796
|
-
output_format=execution.output_format,
|
|
797
|
-
include_tile_embeddings=include_tile_embeddings,
|
|
798
|
-
include_hierarchical_embeddings=persist_hierarchical_embeddings,
|
|
799
|
-
include_slide_embeddings=include_slide_embeddings,
|
|
800
|
-
)
|
|
801
|
-
persistence.update_process_list_after_embedding(
|
|
802
|
-
process_list_path,
|
|
803
|
-
successful_slides=embeddable_slides,
|
|
804
|
-
persist_tile_embeddings=persist_tile_embeddings,
|
|
805
|
-
persist_hierarchical_embeddings=persist_hierarchical_embeddings,
|
|
806
|
-
include_slide_embeddings=include_slide_embeddings,
|
|
807
|
-
encoder_name=model.name,
|
|
808
|
-
output_variant=process_list.resolved_process_list_output_variant(model),
|
|
809
|
-
tile_artifacts=tile_artifacts,
|
|
810
|
-
hierarchical_artifacts=hierarchical_artifacts,
|
|
811
|
-
slide_artifacts=slide_artifacts,
|
|
812
857
|
)
|
|
813
858
|
emit_progress(
|
|
814
859
|
"embedding.finished",
|
|
@@ -907,7 +952,7 @@ def run_pipeline_with_coordinates(
|
|
|
907
952
|
slide_artifacts=slide_artifacts,
|
|
908
953
|
process_list_path=process_list_path,
|
|
909
954
|
)
|
|
910
|
-
local_persist_callback,
|
|
955
|
+
local_persist_callback, _, _ = persist_callbacks.build_incremental_persist_callback(
|
|
911
956
|
model=model,
|
|
912
957
|
preprocessing=resolved_preprocessing,
|
|
913
958
|
execution=execution,
|
|
@@ -922,17 +967,18 @@ def run_pipeline_with_coordinates(
|
|
|
922
967
|
on_embedded_slide=local_persist_callback,
|
|
923
968
|
collect_results=False,
|
|
924
969
|
)
|
|
925
|
-
tile_artifacts
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
970
|
+
tile_artifacts, hierarchical_artifacts, slide_artifacts = _reconcile_embedding_process_list(
|
|
971
|
+
model=model,
|
|
972
|
+
preprocessing=resolved_preprocessing,
|
|
973
|
+
execution=execution,
|
|
974
|
+
process_list_path=process_list_path,
|
|
975
|
+
embeddable_slides=embeddable_slides,
|
|
976
|
+
output_dir=output_dir,
|
|
977
|
+
)
|
|
932
978
|
return RunResult(
|
|
933
979
|
tile_artifacts=tile_artifacts,
|
|
934
980
|
hierarchical_artifacts=hierarchical_artifacts,
|
|
935
|
-
slide_artifacts=
|
|
981
|
+
slide_artifacts=slide_artifacts,
|
|
936
982
|
process_list_path=process_list_path,
|
|
937
983
|
)
|
|
938
984
|
except Exception as exc:
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""Dense ``(d, h, w)`` grid extraction over **slide regions at coordinates**.
|
|
2
|
+
|
|
3
|
+
The dense counterpart of the pooled coordinate path (``compute_tile_embeddings_for_slide``
|
|
4
|
+
→ ``run_forward_pass`` → ``encode_tiles``): instead of pooling each region to one vector,
|
|
5
|
+
each sampled ROI is read **spacing-aware** from the slide, run through the encoder's
|
|
6
|
+
normalization-only dense transform (``get_dense_transform`` — NOT the pooled transform,
|
|
7
|
+
which crops), padded up to the encoder's patch multiple, and encoded via
|
|
8
|
+
``encode_tiles_dense`` into a ``(d, grid_h, grid_w)`` token grid.
|
|
9
|
+
|
|
10
|
+
This is the extraction half of soma's slide-manifest segmentation path: slide2vec reads
|
|
11
|
+
regions + encodes (it already owns the region reader and the dense encode); soma sources
|
|
12
|
+
the ROI coordinates (hs2p annotation sampling) and persists/caches the grids. It mirrors
|
|
13
|
+
the pooled split exactly — extraction here, caching in soma.
|
|
14
|
+
|
|
15
|
+
Region reads are spacing-aware via hs2p (:meth:`hs2p.wsi.wsi.WSI.read_region_at_spacing`):
|
|
16
|
+
the finest pyramid level ``<=`` the requested µm/px is read and downscaled to the exact
|
|
17
|
+
``target_size`` (``area`` for images), so the token grid registers against a mask read at
|
|
18
|
+
the same spacing. The ``wsi`` is injected (any object exposing ``read_region_at_spacing``),
|
|
19
|
+
so the loop is unit-testable offline with a fake reader + a random-weight encoder.
|
|
20
|
+
|
|
21
|
+
Whole-tile only (one padded forward per region). Sliding-window dense extraction over
|
|
22
|
+
coordinates (``window_size`` < input) is a deferred follow-up — large ROIs that exceed the
|
|
23
|
+
encoder's comfortable field are out of scope for the first increment.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
from dataclasses import dataclass
|
|
29
|
+
from typing import Callable, Sequence
|
|
30
|
+
|
|
31
|
+
import numpy as np
|
|
32
|
+
import torch
|
|
33
|
+
import torch.nn.functional as F
|
|
34
|
+
from PIL import Image
|
|
35
|
+
|
|
36
|
+
from slide2vec.runtime.slide_encode import slide_encode_autocast_ctx
|
|
37
|
+
|
|
38
|
+
_PAD_MODES = {"reflect", "constant", "zero", "replicate"}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _normalize_hw(value: int | tuple[int, int], *, name: str) -> tuple[int, int]:
|
|
42
|
+
if isinstance(value, int):
|
|
43
|
+
if value <= 0:
|
|
44
|
+
raise ValueError(f"{name} must be positive, got {value}")
|
|
45
|
+
return value, value
|
|
46
|
+
try:
|
|
47
|
+
h, w = value
|
|
48
|
+
except (TypeError, ValueError) as exc:
|
|
49
|
+
raise ValueError(f"{name} must be an int or an (h, w) pair, got {value!r}") from exc
|
|
50
|
+
h, w = int(h), int(w)
|
|
51
|
+
if h <= 0 or w <= 0:
|
|
52
|
+
raise ValueError(f"{name} must be positive, got {(h, w)}")
|
|
53
|
+
return h, w
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _round_up(value: int, multiple: int) -> int:
|
|
57
|
+
return ((value + multiple - 1) // multiple) * multiple
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(frozen=True)
|
|
61
|
+
class DenseGridGeometry:
|
|
62
|
+
"""Resolved spatial layout for one dense extraction (slide2vec-owned).
|
|
63
|
+
|
|
64
|
+
``target_size`` is the supervision tile size (h, w); ``encoded_size`` is that rounded
|
|
65
|
+
up to the patch multiple (pad on bottom/right); ``grid_shape`` is the resulting token
|
|
66
|
+
grid (grid_h, grid_w). Mirrors soma's ``DenseGridGeometry`` — the dense-grid geometry
|
|
67
|
+
is extraction geometry and belongs in the extraction engine; soma reads it back from
|
|
68
|
+
the persisted sidecar.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
target_size: tuple[int, int]
|
|
72
|
+
patch_size: tuple[int, int]
|
|
73
|
+
encoded_size: tuple[int, int]
|
|
74
|
+
grid_shape: tuple[int, int]
|
|
75
|
+
pad: tuple[int, int] # (pad_bottom, pad_right)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def compute_dense_geometry(
|
|
79
|
+
*, target_size: int | tuple[int, int], patch_size: int | tuple[int, int]
|
|
80
|
+
) -> DenseGridGeometry:
|
|
81
|
+
"""Encoded size, token grid, and bottom/right padding for a ``target_size`` tile."""
|
|
82
|
+
target_h, target_w = _normalize_hw(target_size, name="target_size")
|
|
83
|
+
patch_h, patch_w = _normalize_hw(patch_size, name="patch_size")
|
|
84
|
+
encoded_h = _round_up(target_h, patch_h)
|
|
85
|
+
encoded_w = _round_up(target_w, patch_w)
|
|
86
|
+
return DenseGridGeometry(
|
|
87
|
+
target_size=(target_h, target_w),
|
|
88
|
+
patch_size=(patch_h, patch_w),
|
|
89
|
+
encoded_size=(encoded_h, encoded_w),
|
|
90
|
+
grid_shape=(encoded_h // patch_h, encoded_w // patch_w),
|
|
91
|
+
pad=(encoded_h - target_h, encoded_w - target_w),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def pad_image_to_encoded(
|
|
96
|
+
tensor: torch.Tensor,
|
|
97
|
+
geometry: DenseGridGeometry,
|
|
98
|
+
*,
|
|
99
|
+
pad_mode: str,
|
|
100
|
+
image_pad_value: float | None,
|
|
101
|
+
) -> torch.Tensor:
|
|
102
|
+
"""Pad a ``(C, H, W)`` tile (bottom/right) up to ``geometry.encoded_size``."""
|
|
103
|
+
pad_bottom, pad_right = geometry.pad
|
|
104
|
+
if pad_bottom == 0 and pad_right == 0:
|
|
105
|
+
return tensor
|
|
106
|
+
x = tensor.unsqueeze(0) # F.pad's 2-D modes need a batch dim
|
|
107
|
+
pad = (0, pad_right, 0, pad_bottom) # (left, right, top, bottom)
|
|
108
|
+
if pad_mode in ("constant", "zero"):
|
|
109
|
+
x = F.pad(x, pad, mode="constant", value=float(image_pad_value or 0.0))
|
|
110
|
+
else:
|
|
111
|
+
x = F.pad(x, pad, mode=pad_mode)
|
|
112
|
+
return x.squeeze(0)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _resolve_encode_fn(
|
|
116
|
+
model,
|
|
117
|
+
*,
|
|
118
|
+
feature_kind: str,
|
|
119
|
+
attention_blocks: tuple[int, ...],
|
|
120
|
+
attention_include_registers: bool,
|
|
121
|
+
) -> Callable[[torch.Tensor], torch.Tensor]:
|
|
122
|
+
if feature_kind == "patch_features":
|
|
123
|
+
return model.encode_tiles_dense
|
|
124
|
+
if feature_kind == "cls_attention":
|
|
125
|
+
blocks = tuple(int(b) for b in attention_blocks)
|
|
126
|
+
include_registers = bool(attention_include_registers)
|
|
127
|
+
|
|
128
|
+
def encode_fn(window: torch.Tensor) -> torch.Tensor:
|
|
129
|
+
return model.encode_tiles_attention(
|
|
130
|
+
window, blocks=blocks, include_registers=include_registers
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
return encode_fn
|
|
134
|
+
raise ValueError(
|
|
135
|
+
f"unsupported feature_kind {feature_kind!r}; expected 'patch_features' or 'cls_attention'"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def encode_regions_dense(
|
|
140
|
+
*,
|
|
141
|
+
model,
|
|
142
|
+
device: torch.device | str,
|
|
143
|
+
wsi,
|
|
144
|
+
coordinates: Sequence[tuple[int, int]],
|
|
145
|
+
requested_spacing_um: float,
|
|
146
|
+
target_size: int | tuple[int, int],
|
|
147
|
+
tolerance: float = 0.05,
|
|
148
|
+
pad_mode: str = "reflect",
|
|
149
|
+
image_pad_value: float | None = None,
|
|
150
|
+
feature_kind: str = "patch_features",
|
|
151
|
+
attention_blocks: tuple[int, ...] = (-1,),
|
|
152
|
+
attention_include_registers: bool = False,
|
|
153
|
+
batch_size: int = 1,
|
|
154
|
+
precision: str = "fp32",
|
|
155
|
+
dense_transform: Callable | None = None,
|
|
156
|
+
) -> np.ndarray:
|
|
157
|
+
"""Encode slide regions at ``coordinates`` into dense grids; return ``(N, d, gh, gw)``.
|
|
158
|
+
|
|
159
|
+
Injectable core: takes a constructed dense-capable ``model`` (with
|
|
160
|
+
``encode_tiles_dense`` / ``encode_tiles_attention`` / ``patch_size`` /
|
|
161
|
+
``get_dense_transform``) and a ``wsi`` exposing
|
|
162
|
+
``read_region_at_spacing(location, requested_spacing_um, size, *, tolerance,
|
|
163
|
+
interpolation)``, so it runs offline in tests with random weights + a fake reader.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
coordinates: ``(x, y)`` top-left locations in **level-0** pixel space (the hs2p
|
|
167
|
+
tiling convention; passed straight to ``read_region_at_spacing``).
|
|
168
|
+
requested_spacing_um: µm/px to read each region at.
|
|
169
|
+
target_size: supervision tile size (int or ``(h, w)``); the region is read at this
|
|
170
|
+
size at ``requested_spacing_um`` and the token grid registers to it.
|
|
171
|
+
|
|
172
|
+
Returns a ``float32`` array of dense grids in coordinate order. ``feature_kind``
|
|
173
|
+
selects ``encode_tiles_dense`` (patch grid) vs ``encode_tiles_attention`` (CLS-attention
|
|
174
|
+
grid); both produce a ``(C, gh, gw)`` grid and share this path.
|
|
175
|
+
"""
|
|
176
|
+
if pad_mode not in _PAD_MODES:
|
|
177
|
+
raise ValueError(f"unsupported pad_mode {pad_mode!r}; expected one of {sorted(_PAD_MODES)}")
|
|
178
|
+
geometry = compute_dense_geometry(target_size=target_size, patch_size=model.patch_size)
|
|
179
|
+
if dense_transform is None:
|
|
180
|
+
dense_transform = model.get_dense_transform()
|
|
181
|
+
encode_fn = _resolve_encode_fn(
|
|
182
|
+
model,
|
|
183
|
+
feature_kind=feature_kind,
|
|
184
|
+
attention_blocks=attention_blocks,
|
|
185
|
+
attention_include_registers=attention_include_registers,
|
|
186
|
+
)
|
|
187
|
+
target_h, target_w = geometry.target_size
|
|
188
|
+
|
|
189
|
+
coords = [(int(x), int(y)) for x, y in coordinates]
|
|
190
|
+
grid_h, grid_w = geometry.grid_shape
|
|
191
|
+
if not coords:
|
|
192
|
+
return np.empty((0, 0, grid_h, grid_w), dtype=np.float32)
|
|
193
|
+
|
|
194
|
+
def _read_padded(location: tuple[int, int]) -> torch.Tensor:
|
|
195
|
+
region = wsi.read_region_at_spacing(
|
|
196
|
+
location,
|
|
197
|
+
float(requested_spacing_um),
|
|
198
|
+
(target_w, target_h), # hs2p size is (width, height)
|
|
199
|
+
tolerance=float(tolerance),
|
|
200
|
+
interpolation="area",
|
|
201
|
+
)
|
|
202
|
+
region = np.ascontiguousarray(np.asarray(region)[..., :3])
|
|
203
|
+
tensor = torch.as_tensor(dense_transform(Image.fromarray(region))).as_subclass(torch.Tensor)
|
|
204
|
+
if tensor.ndim != 3:
|
|
205
|
+
raise ValueError(
|
|
206
|
+
f"dense transform at {location} produced a {tensor.ndim}-D tensor; expected (C, H, W)."
|
|
207
|
+
)
|
|
208
|
+
if tuple(int(s) for s in tensor.shape[-2:]) != (target_h, target_w):
|
|
209
|
+
raise ValueError(
|
|
210
|
+
f"region at {location} is {tuple(int(s) for s in tensor.shape[-2:])} after the dense "
|
|
211
|
+
f"transform, but target_size is {(target_h, target_w)}. The dense transform must be "
|
|
212
|
+
"normalization-only (no resize/crop)."
|
|
213
|
+
)
|
|
214
|
+
return pad_image_to_encoded(
|
|
215
|
+
tensor, geometry, pad_mode=pad_mode, image_pad_value=image_pad_value
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
grids: list[np.ndarray] = []
|
|
219
|
+
with torch.inference_mode(), slide_encode_autocast_ctx(device, precision):
|
|
220
|
+
for start in range(0, len(coords), max(1, int(batch_size))):
|
|
221
|
+
chunk = coords[start : start + max(1, int(batch_size))]
|
|
222
|
+
batch = torch.stack([_read_padded(loc) for loc in chunk]).to(device, non_blocking=True)
|
|
223
|
+
out = encode_fn(batch)
|
|
224
|
+
if out.ndim != 4:
|
|
225
|
+
raise ValueError(
|
|
226
|
+
f"{feature_kind} encode returned a {out.ndim}-D tensor; expected (B, d, gh, gw)."
|
|
227
|
+
)
|
|
228
|
+
grids.append(out.detach().float().cpu().numpy())
|
|
229
|
+
return np.concatenate(grids, axis=0)
|
|
@@ -18,6 +18,16 @@ from slide2vec.runtime.persistence import update_process_list_after_embedding
|
|
|
18
18
|
from slide2vec.runtime.process_list import resolved_process_list_output_variant
|
|
19
19
|
from slide2vec.utils.tiling_io import load_embedding_process_df
|
|
20
20
|
|
|
21
|
+
# Number of completed tile-level samples to buffer before rewriting the
|
|
22
|
+
# process_list CSV. Each rewrite re-reads and re-writes the *entire* CSV, so
|
|
23
|
+
# doing it once per sample is O(N^2) in I/O when every tile is its own sample
|
|
24
|
+
# (e.g. patch-level benchmarks with hundreds of thousands of tiles). Batching
|
|
25
|
+
# makes it O(N) while only risking the re-embedding of at most this many cheap
|
|
26
|
+
# tile samples after a crash (a clean run reconciles the full CSV at the end).
|
|
27
|
+
# Slide- and hierarchical-level runs (sample == slide: few, expensive samples)
|
|
28
|
+
# keep a flush interval of 1 so every completed slide is checkpointed.
|
|
29
|
+
TILE_EMBEDDING_FLUSH_INTERVAL = 1000
|
|
30
|
+
|
|
21
31
|
|
|
22
32
|
def has_complete_local_embedding_outputs(
|
|
23
33
|
sample_id: str,
|
|
@@ -141,6 +151,45 @@ def build_incremental_persist_callback(
|
|
|
141
151
|
persist_hierarchical_embeddings = is_hierarchical_preprocessing(preprocessing)
|
|
142
152
|
include_slide_embeddings = model.level == "slide"
|
|
143
153
|
|
|
154
|
+
# Only the pure tile-level path produces the many-cheap-samples workload that
|
|
155
|
+
# makes per-sample CSV rewrites O(N^2). When the model aggregates to slide
|
|
156
|
+
# level (or runs hierarchically) the sample is a slide/region — few and
|
|
157
|
+
# expensive — so checkpoint every one (interval 1). save_tile_embeddings on a
|
|
158
|
+
# slide-level model still iterates per slide, hence the include_slide check.
|
|
159
|
+
is_tile_level = (
|
|
160
|
+
persist_tile_embeddings
|
|
161
|
+
and not persist_hierarchical_embeddings
|
|
162
|
+
and not include_slide_embeddings
|
|
163
|
+
)
|
|
164
|
+
flush_interval = TILE_EMBEDDING_FLUSH_INTERVAL if is_tile_level else 1
|
|
165
|
+
|
|
166
|
+
# Buffered completions awaiting the next batched process_list rewrite.
|
|
167
|
+
pending_slides: list[SlideSpec] = []
|
|
168
|
+
pending_tile_artifacts: list[TileEmbeddingArtifact] = []
|
|
169
|
+
pending_hierarchical_artifacts: list[HierarchicalEmbeddingArtifact] = []
|
|
170
|
+
pending_slide_artifacts: list[SlideEmbeddingArtifact] = []
|
|
171
|
+
|
|
172
|
+
def _flush_process_list() -> None:
|
|
173
|
+
if not pending_slides:
|
|
174
|
+
return
|
|
175
|
+
if process_list_path is not None and process_list_path.is_file():
|
|
176
|
+
update_process_list_after_embedding(
|
|
177
|
+
process_list_path,
|
|
178
|
+
successful_slides=list(pending_slides),
|
|
179
|
+
persist_tile_embeddings=persist_tile_embeddings,
|
|
180
|
+
persist_hierarchical_embeddings=persist_hierarchical_embeddings,
|
|
181
|
+
include_slide_embeddings=include_slide_embeddings,
|
|
182
|
+
encoder_name=model.name,
|
|
183
|
+
output_variant=resolved_process_list_output_variant(model),
|
|
184
|
+
tile_artifacts=list(pending_tile_artifacts),
|
|
185
|
+
hierarchical_artifacts=list(pending_hierarchical_artifacts),
|
|
186
|
+
slide_artifacts=list(pending_slide_artifacts),
|
|
187
|
+
)
|
|
188
|
+
pending_slides.clear()
|
|
189
|
+
pending_tile_artifacts.clear()
|
|
190
|
+
pending_hierarchical_artifacts.clear()
|
|
191
|
+
pending_slide_artifacts.clear()
|
|
192
|
+
|
|
144
193
|
def _persist_completed_slide(slide: SlideSpec, tiling_result, embedded_slide: EmbeddedSlide) -> None:
|
|
145
194
|
tile_artifact, slide_artifact = persist_embedded_slide(
|
|
146
195
|
model,
|
|
@@ -153,18 +202,16 @@ def build_incremental_persist_callback(
|
|
|
153
202
|
tile_artifacts.append(tile_artifact)
|
|
154
203
|
if slide_artifact is not None:
|
|
155
204
|
slide_artifacts.append(slide_artifact)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
slide_artifacts=[slide_artifact] if slide_artifact is not None else [],
|
|
168
|
-
)
|
|
205
|
+
# Buffer this completion; a slide with no successful artifact is still
|
|
206
|
+
# recorded so the batched rewrite can mark its feature_status="error".
|
|
207
|
+
pending_slides.append(slide)
|
|
208
|
+
if isinstance(tile_artifact, TileEmbeddingArtifact):
|
|
209
|
+
pending_tile_artifacts.append(tile_artifact)
|
|
210
|
+
elif isinstance(tile_artifact, HierarchicalEmbeddingArtifact):
|
|
211
|
+
pending_hierarchical_artifacts.append(tile_artifact)
|
|
212
|
+
if slide_artifact is not None:
|
|
213
|
+
pending_slide_artifacts.append(slide_artifact)
|
|
214
|
+
if len(pending_slides) >= flush_interval:
|
|
215
|
+
_flush_process_list()
|
|
169
216
|
|
|
170
217
|
return _persist_completed_slide, tile_artifacts, slide_artifacts
|
|
@@ -53,6 +53,7 @@ slide2vec/runtime/__init__.py
|
|
|
53
53
|
slide2vec/runtime/artifacts_collect.py
|
|
54
54
|
slide2vec/runtime/batching.py
|
|
55
55
|
slide2vec/runtime/cpu_budget.py
|
|
56
|
+
slide2vec/runtime/dense_regions.py
|
|
56
57
|
slide2vec/runtime/distributed.py
|
|
57
58
|
slide2vec/runtime/distributed_stage.py
|
|
58
59
|
slide2vec/runtime/embedding.py
|
|
@@ -83,6 +84,7 @@ tests/test_architecture_runtime_split.py
|
|
|
83
84
|
tests/test_attention_extraction.py
|
|
84
85
|
tests/test_dense_extraction.py
|
|
85
86
|
tests/test_dense_locality_gated.py
|
|
87
|
+
tests/test_dense_regions.py
|
|
86
88
|
tests/test_encoder_registry.py
|
|
87
89
|
tests/test_hs2p_package_cutover.py
|
|
88
90
|
tests/test_output_consistency.py
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Tests for dense grid extraction over slide regions: ``encode_regions_dense``.
|
|
2
|
+
|
|
3
|
+
Fully offline (``pretrained=False`` random weights) + an injected fake reader, so no
|
|
4
|
+
weights, no real WSI. Checks (1) grid shapes over a batch of coordinates and (2) that the
|
|
5
|
+
orchestration is a faithful wrapper — its per-region grid is byte-identical to a direct
|
|
6
|
+
``encode_tiles_dense(transform → pad)`` of the same region.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
torch = pytest.importorskip("torch")
|
|
15
|
+
timm = pytest.importorskip("timm")
|
|
16
|
+
|
|
17
|
+
from slide2vec.encoders.base import TimmTileEncoder # noqa: E402
|
|
18
|
+
from slide2vec.runtime.dense_regions import ( # noqa: E402
|
|
19
|
+
compute_dense_geometry,
|
|
20
|
+
encode_regions_dense,
|
|
21
|
+
pad_image_to_encoded,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _encoder(**kwargs) -> TimmTileEncoder:
|
|
26
|
+
return TimmTileEncoder("vit_tiny_patch16_224", pretrained=False, num_classes=0,
|
|
27
|
+
dynamic_img_size=True, **kwargs)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class _FakeWSI:
|
|
31
|
+
"""Returns a deterministic RGB region per location (so reads are reproducible)."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, *, target_h: int, target_w: int):
|
|
34
|
+
self._target_h = target_h
|
|
35
|
+
self._target_w = target_w
|
|
36
|
+
self.calls: list[tuple] = []
|
|
37
|
+
|
|
38
|
+
def read_region_at_spacing(self, location, requested_spacing_um, size, *, tolerance, interpolation):
|
|
39
|
+
self.calls.append((tuple(location), requested_spacing_um, tuple(size), tolerance, interpolation))
|
|
40
|
+
width, height = size
|
|
41
|
+
x, y = location
|
|
42
|
+
rng = np.random.default_rng(abs(hash((int(x), int(y)))) % (2**32))
|
|
43
|
+
return rng.integers(0, 256, size=(height, width, 3), dtype=np.uint8)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_encode_regions_dense_shapes_over_coordinates():
|
|
47
|
+
enc = _encoder()
|
|
48
|
+
target_size = 64 # patch 16 -> grid 4x4, no padding
|
|
49
|
+
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
50
|
+
coords = [(0, 0), (64, 0), (0, 64)]
|
|
51
|
+
|
|
52
|
+
grids = encode_regions_dense(
|
|
53
|
+
model=enc,
|
|
54
|
+
device="cpu",
|
|
55
|
+
wsi=wsi,
|
|
56
|
+
coordinates=coords,
|
|
57
|
+
requested_spacing_um=0.5,
|
|
58
|
+
target_size=target_size,
|
|
59
|
+
batch_size=2,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
assert grids.shape == (3, enc.encode_dim, 4, 4)
|
|
63
|
+
assert grids.dtype == np.float32
|
|
64
|
+
# Reads went through read_region_at_spacing at (target_w, target_h), area interp, level-0 coords.
|
|
65
|
+
assert [c[0] for c in wsi.calls] == [(0, 0), (64, 0), (0, 64)]
|
|
66
|
+
assert all(c[2] == (target_size, target_size) and c[4] == "area" for c in wsi.calls)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_encode_regions_dense_pads_non_multiple_target():
|
|
70
|
+
enc = _encoder()
|
|
71
|
+
target_size = 60 # padded up to 64 -> grid 4x4
|
|
72
|
+
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
73
|
+
grids = encode_regions_dense(
|
|
74
|
+
model=enc, device="cpu", wsi=wsi, coordinates=[(0, 0)],
|
|
75
|
+
requested_spacing_um=0.5, target_size=target_size,
|
|
76
|
+
)
|
|
77
|
+
assert grids.shape == (1, enc.encode_dim, 4, 4)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_encode_regions_dense_matches_direct_encode():
|
|
81
|
+
"""The primitive is a faithful wrapper: parity vs a hand-rolled transform+pad+encode."""
|
|
82
|
+
enc = _encoder()
|
|
83
|
+
target_size = 64
|
|
84
|
+
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
85
|
+
coords = [(0, 0), (128, 256)]
|
|
86
|
+
|
|
87
|
+
grids = encode_regions_dense(
|
|
88
|
+
model=enc, device="cpu", wsi=wsi, coordinates=coords,
|
|
89
|
+
requested_spacing_um=0.5, target_size=target_size,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Re-read the same regions (deterministic) and encode them directly.
|
|
93
|
+
from PIL import Image
|
|
94
|
+
|
|
95
|
+
geometry = compute_dense_geometry(target_size=target_size, patch_size=enc.patch_size)
|
|
96
|
+
transform = enc.get_dense_transform()
|
|
97
|
+
ref_wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
98
|
+
with torch.inference_mode():
|
|
99
|
+
for i, loc in enumerate(coords):
|
|
100
|
+
region = ref_wsi.read_region_at_spacing(
|
|
101
|
+
loc, 0.5, (target_size, target_size), tolerance=0.05, interpolation="area"
|
|
102
|
+
)
|
|
103
|
+
tensor = torch.as_tensor(transform(Image.fromarray(region))).as_subclass(torch.Tensor)
|
|
104
|
+
padded = pad_image_to_encoded(tensor, geometry, pad_mode="reflect", image_pad_value=None)
|
|
105
|
+
ref = enc.encode_tiles_dense(padded.unsqueeze(0)).detach().float().cpu().numpy()[0]
|
|
106
|
+
np.testing.assert_allclose(grids[i], ref, rtol=0, atol=1e-6)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def test_encode_regions_dense_empty_coordinates():
|
|
110
|
+
enc = _encoder()
|
|
111
|
+
wsi = _FakeWSI(target_h=64, target_w=64)
|
|
112
|
+
grids = encode_regions_dense(
|
|
113
|
+
model=enc, device="cpu", wsi=wsi, coordinates=[],
|
|
114
|
+
requested_spacing_um=0.5, target_size=64,
|
|
115
|
+
)
|
|
116
|
+
assert grids.shape == (0, 0, 4, 4)
|
|
117
|
+
assert wsi.calls == []
|
|
@@ -1578,6 +1578,68 @@ def test_run_pipeline_local_persists_completed_embeddings_before_later_slide_fai
|
|
|
1578
1578
|
assert recorded.loc["slide-b", "aggregation_status"] == "tbp"
|
|
1579
1579
|
|
|
1580
1580
|
|
|
1581
|
+
def _drive_persist_callback(monkeypatch, tmp_path, *, model_level, num_samples):
|
|
1582
|
+
"""Build an incremental persist callback and feed it `num_samples` completions.
|
|
1583
|
+
|
|
1584
|
+
Returns the list of per-write batch sizes recorded by a stubbed
|
|
1585
|
+
``update_process_list_after_embedding`` (i.e. how the process_list rewrites
|
|
1586
|
+
were grouped).
|
|
1587
|
+
"""
|
|
1588
|
+
process_list_path = tmp_path / "process_list.csv"
|
|
1589
|
+
process_list_path.write_text("sample_id\n", encoding="utf-8")
|
|
1590
|
+
|
|
1591
|
+
monkeypatch.setattr(persist_callbacks, "should_persist_tile_embeddings", lambda *a, **k: True)
|
|
1592
|
+
monkeypatch.setattr(persist_callbacks, "is_hierarchical_preprocessing", lambda *a, **k: False)
|
|
1593
|
+
monkeypatch.setattr(persist_callbacks, "resolved_process_list_output_variant", lambda *a, **k: None)
|
|
1594
|
+
monkeypatch.setattr(persist_callbacks, "TILE_EMBEDDING_FLUSH_INTERVAL", 3)
|
|
1595
|
+
|
|
1596
|
+
def fake_persist_embedded_slide(model, embedded_slide, tiling_result, *, preprocessing, execution):
|
|
1597
|
+
artifact = persist_callbacks.TileEmbeddingArtifact(
|
|
1598
|
+
sample_id=embedded_slide.sample_id,
|
|
1599
|
+
path=tmp_path / f"{embedded_slide.sample_id}.pt",
|
|
1600
|
+
metadata_path=tmp_path / f"{embedded_slide.sample_id}.meta.json",
|
|
1601
|
+
format="pt",
|
|
1602
|
+
feature_dim=4,
|
|
1603
|
+
num_tiles=2,
|
|
1604
|
+
)
|
|
1605
|
+
return artifact, None
|
|
1606
|
+
|
|
1607
|
+
monkeypatch.setattr(persist_callbacks, "persist_embedded_slide", fake_persist_embedded_slide)
|
|
1608
|
+
|
|
1609
|
+
batch_sizes: list[int] = []
|
|
1610
|
+
monkeypatch.setattr(
|
|
1611
|
+
persist_callbacks,
|
|
1612
|
+
"update_process_list_after_embedding",
|
|
1613
|
+
lambda *a, **k: batch_sizes.append(len(k["successful_slides"])),
|
|
1614
|
+
)
|
|
1615
|
+
|
|
1616
|
+
model = SimpleNamespace(name="enc", level=model_level)
|
|
1617
|
+
callback, _, _ = persist_callbacks.build_incremental_persist_callback(
|
|
1618
|
+
model=model,
|
|
1619
|
+
preprocessing=SimpleNamespace(),
|
|
1620
|
+
execution=SimpleNamespace(output_dir=tmp_path),
|
|
1621
|
+
process_list_path=process_list_path,
|
|
1622
|
+
)
|
|
1623
|
+
for i in range(num_samples):
|
|
1624
|
+
callback(make_slide(f"s-{i}"), SimpleNamespace(), SimpleNamespace(sample_id=f"s-{i}"))
|
|
1625
|
+
return batch_sizes
|
|
1626
|
+
|
|
1627
|
+
|
|
1628
|
+
def test_incremental_persist_callback_batches_tile_level_process_list_writes(monkeypatch, tmp_path: Path):
|
|
1629
|
+
# Tile-level (many cheap samples): writes are batched at the flush interval,
|
|
1630
|
+
# and the trailing partial batch is left for the caller's final reconciliation
|
|
1631
|
+
# (so 7 completions at interval 3 -> two writes of 3, one sample still buffered).
|
|
1632
|
+
batch_sizes = _drive_persist_callback(monkeypatch, tmp_path, model_level="tile", num_samples=7)
|
|
1633
|
+
assert batch_sizes == [3, 3]
|
|
1634
|
+
|
|
1635
|
+
|
|
1636
|
+
def test_incremental_persist_callback_checkpoints_slide_level_every_sample(monkeypatch, tmp_path: Path):
|
|
1637
|
+
# Slide-level (few expensive samples): every completion is checkpointed
|
|
1638
|
+
# immediately so a crash never loses an expensive slide embedding.
|
|
1639
|
+
batch_sizes = _drive_persist_callback(monkeypatch, tmp_path, model_level="slide", num_samples=4)
|
|
1640
|
+
assert batch_sizes == [1, 1, 1, 1]
|
|
1641
|
+
|
|
1642
|
+
|
|
1581
1643
|
def test_tile_slides_forwards_spacing_at_level_0_to_hs2p(monkeypatch, tmp_path: Path):
|
|
1582
1644
|
import slide2vec.inference as inference
|
|
1583
1645
|
|
|
@@ -2532,6 +2594,71 @@ def test_direct_embed_slides_persists_completed_embeddings_before_later_slide_fa
|
|
|
2532
2594
|
assert recorded.loc["slide-b", "feature_status"] == "tbp"
|
|
2533
2595
|
assert recorded.loc["slide-b", "aggregation_status"] == "tbp"
|
|
2534
2596
|
|
|
2597
|
+
|
|
2598
|
+
def test_direct_embed_slides_single_gpu_reconciles_batched_tile_status_on_clean_run(monkeypatch, tmp_path: Path):
|
|
2599
|
+
# Tile-level single-GPU run with fewer slides than TILE_EMBEDDING_FLUSH_INTERVAL:
|
|
2600
|
+
# the incremental callback buffers every completion and never flushes mid-run,
|
|
2601
|
+
# so feature_status is written only by the end-of-run reconciliation. Without
|
|
2602
|
+
# it, a clean run would leave these rows as "tbp" despite the .pt files existing.
|
|
2603
|
+
pytest.importorskip("torch")
|
|
2604
|
+
import slide2vec.inference as inference
|
|
2605
|
+
|
|
2606
|
+
slides = [make_slide("slide-a"), make_slide("slide-b")]
|
|
2607
|
+
tiling_results = [
|
|
2608
|
+
SimpleNamespace(
|
|
2609
|
+
x=np.array([0, 1]),
|
|
2610
|
+
y=np.array([2, 3]),
|
|
2611
|
+
tile_size_lv0=224,
|
|
2612
|
+
coordinates_npz_path=Path("/tmp/slide-a.coordinates.npz"),
|
|
2613
|
+
coordinates_meta_path=Path("/tmp/slide-a.coordinates.meta.json"),
|
|
2614
|
+
),
|
|
2615
|
+
SimpleNamespace(
|
|
2616
|
+
x=np.array([4, 5]),
|
|
2617
|
+
y=np.array([6, 7]),
|
|
2618
|
+
tile_size_lv0=224,
|
|
2619
|
+
coordinates_npz_path=Path("/tmp/slide-b.coordinates.npz"),
|
|
2620
|
+
coordinates_meta_path=Path("/tmp/slide-b.coordinates.meta.json"),
|
|
2621
|
+
),
|
|
2622
|
+
]
|
|
2623
|
+
process_list_path = tmp_path / "process_list.csv"
|
|
2624
|
+
process_list_path.write_text(
|
|
2625
|
+
"sample_id,annotation,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n"
|
|
2626
|
+
"slide-a,tissue,/tmp/slide-a.svs,,,"
|
|
2627
|
+
"success,2,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n"
|
|
2628
|
+
"slide-b,tissue,/tmp/slide-b.svs,,,"
|
|
2629
|
+
"success,2,/tmp/slide-b.coordinates.npz,/tmp/slide-b.coordinates.meta.json,,\n",
|
|
2630
|
+
encoding="utf-8",
|
|
2631
|
+
)
|
|
2632
|
+
|
|
2633
|
+
monkeypatch.setattr(tiling_pipeline, "prepare_tiled_slides",
|
|
2634
|
+
lambda slide_records, preprocessing, output_dir, num_workers: (slides, tiling_results, process_list_path),
|
|
2635
|
+
)
|
|
2636
|
+
# Clean run: every slide embeds successfully.
|
|
2637
|
+
monkeypatch.setattr(embedding_pipeline, "compute_tile_embeddings_for_slide",
|
|
2638
|
+
lambda *args, **kwargs: np.zeros((2, 4), dtype=np.float32),
|
|
2639
|
+
)
|
|
2640
|
+
|
|
2641
|
+
model = SimpleNamespace(
|
|
2642
|
+
name="uni2",
|
|
2643
|
+
level="tile",
|
|
2644
|
+
_requested_device="cpu",
|
|
2645
|
+
_load_backend=lambda: SimpleNamespace(),
|
|
2646
|
+
)
|
|
2647
|
+
|
|
2648
|
+
inference.embed_slides(
|
|
2649
|
+
model,
|
|
2650
|
+
slides,
|
|
2651
|
+
preprocessing=DEFAULT_PREPROCESSING,
|
|
2652
|
+
execution=ExecutionOptions(output_dir=tmp_path, save_tile_embeddings=True, num_gpus=1),
|
|
2653
|
+
)
|
|
2654
|
+
|
|
2655
|
+
assert (tmp_path / "tile_embeddings" / "slide-a.pt").is_file()
|
|
2656
|
+
assert (tmp_path / "tile_embeddings" / "slide-b.pt").is_file()
|
|
2657
|
+
recorded = pd.read_csv(process_list_path).set_index("sample_id")
|
|
2658
|
+
assert recorded.loc["slide-a", "feature_status"] == "success"
|
|
2659
|
+
assert recorded.loc["slide-b", "feature_status"] == "success"
|
|
2660
|
+
|
|
2661
|
+
|
|
2535
2662
|
def test_slide_level_pipeline_skips_tile_artifacts_when_save_tile_embeddings_is_false(monkeypatch, tmp_path: Path):
|
|
2536
2663
|
import slide2vec.inference as inference
|
|
2537
2664
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|