slide2vec 5.0.1__tar.gz → 5.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-5.0.1 → slide2vec-5.1.0}/PKG-INFO +1 -1
- {slide2vec-5.0.1 → slide2vec-5.1.0}/pyproject.toml +2 -2
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/__init__.py +1 -1
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/dense_regions.py +70 -27
- slide2vec-5.1.0/slide2vec/runtime/dense_sliding.py +185 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec.egg-info/PKG-INFO +1 -1
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec.egg-info/SOURCES.txt +2 -0
- slide2vec-5.1.0/tests/test_dense_regions.py +221 -0
- slide2vec-5.1.0/tests/test_dense_sliding.py +121 -0
- slide2vec-5.0.1/tests/test_dense_regions.py +0 -117
- {slide2vec-5.0.1 → slide2vec-5.1.0}/LICENSE +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/README.md +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/setup.cfg +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/__main__.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/api.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/artifacts.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/cli.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/configs/default.yaml +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/configs/resources.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/data/__init__.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/data/dataset.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/data/tile_reader.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/distributed/pipeline_worker.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/__init__.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/base.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/__init__.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/conch.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/gigapath.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/hibou.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/lunit.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/midnight.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/__init__.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/blocks.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/case.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/loading.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/slide.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/types.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/musk.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/models/virchow.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/registry.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/encoders/validation.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/inference.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/progress.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/__init__.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/artifacts_collect.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/batching.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/cpu_budget.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/distributed.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/distributed_stage.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/embedding.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/embedding_persist.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/embedding_pipeline.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/hierarchical.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/manifest.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/model_settings.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/patient_pipeline.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/persist_callbacks.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/persistence.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/process_list.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/progress_bridge.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/registry.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/serialization.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/slide_encode.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/tiling.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/tiling_pipeline.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/types.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/runtime/worker_io.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/utils/config.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/utils/tiling_io.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec/utils/utils.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec.egg-info/entry_points.txt +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec.egg-info/requires.txt +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_architecture_runtime_split.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_attention_extraction.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_dense_extraction.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_encoder_registry.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_hs2p_package_cutover.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_output_consistency.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_progress.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_regression_core.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_regression_inference.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_regression_models.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_runtime_batching.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_tile_store.py +0 -0
- {slide2vec-5.0.1 → slide2vec-5.1.0}/tests/test_tiling_pipeline.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "5.0
|
|
7
|
+
version = "5.1.0"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -167,7 +167,7 @@ no_implicit_reexport = true
|
|
|
167
167
|
max-line-length = 160
|
|
168
168
|
|
|
169
169
|
[tool.bumpver]
|
|
170
|
-
current_version = "5.0
|
|
170
|
+
current_version = "5.1.0"
|
|
171
171
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
172
172
|
commit = false # We do version bumping in CI, not as a commit
|
|
173
173
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -5,7 +5,10 @@ The dense counterpart of the pooled coordinate path (``compute_tile_embeddings_f
|
|
|
5
5
|
each sampled ROI is read **spacing-aware** from the slide, run through the encoder's
|
|
6
6
|
normalization-only dense transform (``get_dense_transform`` — NOT the pooled transform,
|
|
7
7
|
which crops), padded up to the encoder's patch multiple, and encoded via
|
|
8
|
-
``encode_tiles_dense`` into a ``(d, grid_h, grid_w)`` token grid.
|
|
8
|
+
``encode_tiles_dense`` into a ``(d, grid_h, grid_w)`` token grid. ``iter_regions_dense``
|
|
9
|
+
**streams** these grids — yielding one per coordinate, in coordinate order, holding at most
|
|
10
|
+
one ``batch_size`` chunk resident — so host memory is bounded by ``batch_size`` rather than
|
|
11
|
+
by a slide's ROI count.
|
|
9
12
|
|
|
10
13
|
This is the extraction half of soma's slide-manifest segmentation path: slide2vec reads
|
|
11
14
|
regions + encodes (it already owns the region reader and the dense encode); soma sources
|
|
@@ -18,21 +21,25 @@ the finest pyramid level ``<=`` the requested µm/px is read and downscaled to t
|
|
|
18
21
|
the same spacing. The ``wsi`` is injected (any object exposing ``read_region_at_spacing``),
|
|
19
22
|
so the loop is unit-testable offline with a fake reader + a random-weight encoder.
|
|
20
23
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
+
Both dense modes run through one primitive (:func:`~slide2vec.runtime.dense_sliding.encode_dense_sliding`):
|
|
25
|
+
``window_size=None`` is a single whole-tile forward (byte-identical to the legacy
|
|
26
|
+
whole-region encode), and a ``window_size`` smaller than the encoded tile slides the
|
|
27
|
+
encoder's native field over the padded tile and blends the per-window token grids with a
|
|
28
|
+
separable raised-cosine map — letting a native-field encoder (e.g. 224-px Virchow2/phikon)
|
|
29
|
+
serve a larger ROI without interpolating its position embeddings.
|
|
24
30
|
"""
|
|
25
31
|
|
|
26
32
|
from __future__ import annotations
|
|
27
33
|
|
|
28
34
|
from dataclasses import dataclass
|
|
29
|
-
from typing import Callable, Sequence
|
|
35
|
+
from typing import Callable, Iterator, Sequence
|
|
30
36
|
|
|
31
37
|
import numpy as np
|
|
32
38
|
import torch
|
|
33
39
|
import torch.nn.functional as F
|
|
34
40
|
from PIL import Image
|
|
35
41
|
|
|
42
|
+
from slide2vec.runtime.dense_sliding import encode_dense_sliding
|
|
36
43
|
from slide2vec.runtime.slide_encode import slide_encode_autocast_ctx
|
|
37
44
|
|
|
38
45
|
_PAD_MODES = {"reflect", "constant", "zero", "replicate"}
|
|
@@ -136,7 +143,7 @@ def _resolve_encode_fn(
|
|
|
136
143
|
)
|
|
137
144
|
|
|
138
145
|
|
|
139
|
-
def
|
|
146
|
+
def iter_regions_dense(
|
|
140
147
|
*,
|
|
141
148
|
model,
|
|
142
149
|
device: torch.device | str,
|
|
@@ -147,14 +154,21 @@ def encode_regions_dense(
|
|
|
147
154
|
tolerance: float = 0.05,
|
|
148
155
|
pad_mode: str = "reflect",
|
|
149
156
|
image_pad_value: float | None = None,
|
|
157
|
+
window_size: int | None = None,
|
|
158
|
+
overlap: float = 0.0,
|
|
150
159
|
feature_kind: str = "patch_features",
|
|
151
160
|
attention_blocks: tuple[int, ...] = (-1,),
|
|
152
161
|
attention_include_registers: bool = False,
|
|
153
162
|
batch_size: int = 1,
|
|
154
163
|
precision: str = "fp32",
|
|
155
164
|
dense_transform: Callable | None = None,
|
|
156
|
-
) -> np.ndarray:
|
|
157
|
-
"""
|
|
165
|
+
) -> Iterator[np.ndarray]:
|
|
166
|
+
"""Stream slide regions at ``coordinates`` into dense grids, one per coordinate.
|
|
167
|
+
|
|
168
|
+
Yields one ``(d, grid_h, grid_w)`` ``float32`` grid per coordinate, in coordinate
|
|
169
|
+
order. Regions are read and encoded one ``batch_size`` chunk at a time, so resident
|
|
170
|
+
host memory is bounded by ``batch_size`` rather than by a slide's ROI count (the loop
|
|
171
|
+
holds at most one batch of grids resident — no per-slide accumulation).
|
|
158
172
|
|
|
159
173
|
Injectable core: takes a constructed dense-capable ``model`` (with
|
|
160
174
|
``encode_tiles_dense`` / ``encode_tiles_attention`` / ``patch_size`` /
|
|
@@ -162,16 +176,30 @@ def encode_regions_dense(
|
|
|
162
176
|
``read_region_at_spacing(location, requested_spacing_um, size, *, tolerance,
|
|
163
177
|
interpolation)``, so it runs offline in tests with random weights + a fake reader.
|
|
164
178
|
|
|
179
|
+
Arguments are validated and geometry is resolved **eagerly** (before any region is
|
|
180
|
+
read): an invalid ``pad_mode`` or ``feature_kind`` raises at the call site, not on the
|
|
181
|
+
first ``next()``. Iteration itself is lazy — reads advance one batch at a time.
|
|
182
|
+
|
|
165
183
|
Args:
|
|
166
184
|
coordinates: ``(x, y)`` top-left locations in **level-0** pixel space (the hs2p
|
|
167
185
|
tiling convention; passed straight to ``read_region_at_spacing``).
|
|
168
186
|
requested_spacing_um: µm/px to read each region at.
|
|
169
187
|
target_size: supervision tile size (int or ``(h, w)``); the region is read at this
|
|
170
188
|
size at ``requested_spacing_um`` and the token grid registers to it.
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
189
|
+
window_size: encoder field-of-view chunk fed through the backbone per forward.
|
|
190
|
+
``None`` (default) is one whole-tile forward, byte-identical to the
|
|
191
|
+
whole-region encode; a value smaller than the encoded tile slides the encoder
|
|
192
|
+
over patch-aligned windows and blends the token grids (raised-cosine map). The
|
|
193
|
+
output grid is always the whole geometry's ``(grid_h, grid_w)`` either way —
|
|
194
|
+
sliding is internal to extraction.
|
|
195
|
+
overlap: fractional window overlap in ``[0, 1)`` for the sliding path (ignored when
|
|
196
|
+
``window_size is None``); the stride is ``window * (1 - overlap)``.
|
|
197
|
+
|
|
198
|
+
Yields ``float32`` grids in coordinate order; empty ``coordinates`` yields nothing.
|
|
199
|
+
``feature_kind`` selects ``encode_tiles_dense`` (patch grid) vs
|
|
200
|
+
``encode_tiles_attention`` (CLS-attention grid); both produce a ``(C, gh, gw)`` grid and
|
|
201
|
+
share this path. Each yielded grid is a standalone contiguous copy, so it does not pin
|
|
202
|
+
the rest of its batch's memory alive.
|
|
175
203
|
"""
|
|
176
204
|
if pad_mode not in _PAD_MODES:
|
|
177
205
|
raise ValueError(f"unsupported pad_mode {pad_mode!r}; expected one of {sorted(_PAD_MODES)}")
|
|
@@ -185,11 +213,8 @@ def encode_regions_dense(
|
|
|
185
213
|
attention_include_registers=attention_include_registers,
|
|
186
214
|
)
|
|
187
215
|
target_h, target_w = geometry.target_size
|
|
188
|
-
|
|
189
216
|
coords = [(int(x), int(y)) for x, y in coordinates]
|
|
190
|
-
|
|
191
|
-
if not coords:
|
|
192
|
-
return np.empty((0, 0, grid_h, grid_w), dtype=np.float32)
|
|
217
|
+
step = max(1, int(batch_size))
|
|
193
218
|
|
|
194
219
|
def _read_padded(location: tuple[int, int]) -> torch.Tensor:
|
|
195
220
|
region = wsi.read_region_at_spacing(
|
|
@@ -215,15 +240,33 @@ def encode_regions_dense(
|
|
|
215
240
|
tensor, geometry, pad_mode=pad_mode, image_pad_value=image_pad_value
|
|
216
241
|
)
|
|
217
242
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
243
|
+
def _stream() -> Iterator[np.ndarray]:
|
|
244
|
+
with torch.inference_mode(), slide_encode_autocast_ctx(device, precision):
|
|
245
|
+
for start in range(0, len(coords), step):
|
|
246
|
+
chunk = coords[start : start + step]
|
|
247
|
+
batch = torch.stack([_read_padded(loc) for loc in chunk]).to(
|
|
248
|
+
device, non_blocking=True
|
|
249
|
+
)
|
|
250
|
+
# Every batch goes through the one windowed primitive: window_size=None
|
|
251
|
+
# short-circuits to a single whole-tile forward (byte-identical to the
|
|
252
|
+
# whole-region encode), so there is no separate whole-region branch.
|
|
253
|
+
out = encode_dense_sliding(
|
|
254
|
+
model,
|
|
255
|
+
batch,
|
|
256
|
+
geometry=geometry,
|
|
257
|
+
window_size=window_size,
|
|
258
|
+
overlap=overlap,
|
|
259
|
+
encode_fn=encode_fn,
|
|
227
260
|
)
|
|
228
|
-
|
|
229
|
-
|
|
261
|
+
if out.ndim != 4:
|
|
262
|
+
raise ValueError(
|
|
263
|
+
f"{feature_kind} encode returned a {out.ndim}-D tensor; expected (B, d, gh, gw)."
|
|
264
|
+
)
|
|
265
|
+
batch_np = out.detach().float().cpu().numpy()
|
|
266
|
+
for i in range(batch_np.shape[0]):
|
|
267
|
+
# Standalone C-contiguous copy: a per-row view would pin the whole
|
|
268
|
+
# batch alive (the blended sliding output is contiguous, so a view of
|
|
269
|
+
# it would not copy). ``.copy()`` always copies, in C order.
|
|
270
|
+
yield batch_np[i].copy()
|
|
271
|
+
|
|
272
|
+
return _stream()
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Sliding-window dense encoding — ``window_size`` + ``overlap`` as a free knob.
|
|
2
|
+
|
|
3
|
+
The ``whole`` path feeds the full padded tile through the encoder in one forward,
|
|
4
|
+
interpolating the positional embeddings to the larger grid. That is one end of a
|
|
5
|
+
single mechanism; the other end is running the encoder over smaller **windows** and
|
|
6
|
+
stitching the per-window token grids. Three sizes that are usually conflated —
|
|
7
|
+
|
|
8
|
+
* **native size** (e.g. 224) — sets the pos-embed table; not a hard input limit
|
|
9
|
+
(``dynamic_img_size`` lets a ViT process a larger field at the correct mpp);
|
|
10
|
+
* **window size** ``W`` — how big a chunk goes through the ViT in one forward;
|
|
11
|
+
* **input size** — the padded ``encoded_size`` we want dense features for.
|
|
12
|
+
|
|
13
|
+
``whole`` is ``W >= input`` (one window, zero stitching); native sliding is ``W = 224``;
|
|
14
|
+
the useful middle is ``W = 512`` slid over a larger input. So this is **one**
|
|
15
|
+
parametrized path, not a separate mode: :func:`encode_dense_sliding` takes
|
|
16
|
+
``window_size`` (``None`` ⇒ ``whole``) and ``overlap``, and the ``whole`` case falls out
|
|
17
|
+
as the degenerate single window — which we short-circuit to the exact same
|
|
18
|
+
``encode_tiles_dense(batch)`` call, so it stays **byte-identical** to the whole-region
|
|
19
|
+
path (the parity anchor).
|
|
20
|
+
|
|
21
|
+
Stitching happens in **token space** (the grid the decoder/head consume), so the output
|
|
22
|
+
is always ``(B, d, grid_h, grid_w)`` for ``geometry.grid_shape`` regardless of
|
|
23
|
+
``window_size`` — sliding is purely internal to extraction. Windows and strides are kept
|
|
24
|
+
patch-aligned, so each window maps cleanly onto a block of tokens; overlapping windows
|
|
25
|
+
are blended with a separable raised-cosine importance map (the standard frozen-backbone
|
|
26
|
+
dense-inference recipe, cf. MONAI ``sliding_window_inference``) to remove the
|
|
27
|
+
block-boundary seams naive non-overlapping tiling would introduce.
|
|
28
|
+
|
|
29
|
+
Ported from soma's ``soma/dense/sliding.py`` (the window/blend math is encoder
|
|
30
|
+
featurization that belongs in slide2vec); adapted to slide2vec's own
|
|
31
|
+
:class:`~slide2vec.runtime.dense_regions.DenseGridGeometry`.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import math
|
|
37
|
+
from typing import TYPE_CHECKING, Callable
|
|
38
|
+
|
|
39
|
+
import torch
|
|
40
|
+
|
|
41
|
+
if TYPE_CHECKING:
|
|
42
|
+
from slide2vec.runtime.dense_regions import DenseGridGeometry
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"cover_origins",
|
|
46
|
+
"encode_dense_sliding",
|
|
47
|
+
"resolve_window_geometry",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _round_up(value: int, multiple: int) -> int:
|
|
52
|
+
return ((value + multiple - 1) // multiple) * multiple
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _round_to(value: float, multiple: int) -> int:
|
|
56
|
+
return max(multiple, int(round(value / multiple)) * multiple)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def cover_origins(extent: int, size: int, stride: int) -> list[int]:
|
|
60
|
+
"""Start offsets of ``size``-wide windows that fully cover ``[0, extent)``.
|
|
61
|
+
|
|
62
|
+
Walks ``[0, extent - size]`` in ``stride`` steps and, if the last step leaves a gap,
|
|
63
|
+
appends one final start flush to the far edge (``extent - size``) so coverage is
|
|
64
|
+
complete with no partial tail. ``extent``/``size``/``stride`` are patch multiples,
|
|
65
|
+
so every start is too — the edge-flush ``extent - size`` is a difference of patch
|
|
66
|
+
multiples.
|
|
67
|
+
"""
|
|
68
|
+
if size >= extent:
|
|
69
|
+
return [0]
|
|
70
|
+
starts = list(range(0, extent - size + 1, stride))
|
|
71
|
+
if starts[-1] + size < extent:
|
|
72
|
+
starts.append(extent - size) # shift the last window flush to the edge
|
|
73
|
+
return starts
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _window_starts(extent: int, win: int, stride: int) -> list[int]:
|
|
77
|
+
"""Patch-aligned encoder-window starts — the token-space use of :func:`cover_origins`."""
|
|
78
|
+
return cover_origins(extent, win, stride)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def resolve_window_geometry(
|
|
82
|
+
geometry: DenseGridGeometry, *, window_size: int | None, overlap: float
|
|
83
|
+
) -> tuple[tuple[int, int], tuple[int, int], list[int], list[int]]:
|
|
84
|
+
"""Resolve per-dim window size, stride, and start offsets (all patch-aligned).
|
|
85
|
+
|
|
86
|
+
``window_size`` is rounded up to the patch multiple and clamped to the encoded
|
|
87
|
+
extent; because ``round_up`` is monotonic, ``window_size >= target_size`` always
|
|
88
|
+
clamps to the full extent ⇒ a single window ⇒ the ``whole`` path. ``stride`` is
|
|
89
|
+
``window * (1 - overlap)`` rounded to the patch multiple and clamped to
|
|
90
|
+
``[patch, window]``.
|
|
91
|
+
"""
|
|
92
|
+
enc_h, enc_w = geometry.encoded_size
|
|
93
|
+
ph, pw = geometry.patch_size
|
|
94
|
+
if window_size is None:
|
|
95
|
+
return (enc_h, enc_w), (enc_h, enc_w), [0], [0]
|
|
96
|
+
|
|
97
|
+
win_h = min(_round_up(int(window_size), ph), enc_h)
|
|
98
|
+
win_w = min(_round_up(int(window_size), pw), enc_w)
|
|
99
|
+
keep = 1.0 - float(overlap)
|
|
100
|
+
stride_h = min(win_h, _round_to(win_h * keep, ph))
|
|
101
|
+
stride_w = min(win_w, _round_to(win_w * keep, pw))
|
|
102
|
+
starts_h = _window_starts(enc_h, win_h, stride_h)
|
|
103
|
+
starts_w = _window_starts(enc_w, win_w, stride_w)
|
|
104
|
+
return (win_h, win_w), (stride_h, stride_w), starts_h, starts_w
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _hann_1d(n: int, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
|
|
108
|
+
"""Strictly-positive raised-cosine weights of length ``n`` (uniform if ``n == 1``).
|
|
109
|
+
|
|
110
|
+
``0.5 - 0.5*cos(2*pi*(i+1)/(n+1))`` is > 0 for every ``i in [0, n)`` (no zeros at
|
|
111
|
+
the edges), so the accumulated weight map never hits zero where a window covers.
|
|
112
|
+
"""
|
|
113
|
+
if n <= 1:
|
|
114
|
+
return torch.ones(n, device=device, dtype=dtype)
|
|
115
|
+
i = torch.arange(1, n + 1, device=device, dtype=dtype)
|
|
116
|
+
return 0.5 - 0.5 * torch.cos(2.0 * math.pi * i / (n + 1))
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def encode_dense_sliding(
|
|
120
|
+
encoder,
|
|
121
|
+
batch: torch.Tensor,
|
|
122
|
+
*,
|
|
123
|
+
geometry: DenseGridGeometry,
|
|
124
|
+
window_size: int | None,
|
|
125
|
+
overlap: float = 0.0,
|
|
126
|
+
encode_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
|
|
127
|
+
) -> torch.Tensor:
|
|
128
|
+
"""Encode a padded ``(B, C, enc_h, enc_w)`` batch into ``(B, d, grid_h, grid_w)``.
|
|
129
|
+
|
|
130
|
+
``window_size is None`` (or any window that covers the whole encoded input) is the
|
|
131
|
+
degenerate single-window case: it short-circuits to one full-tile forward,
|
|
132
|
+
byte-identical to the whole-region path. Otherwise the encoder runs over
|
|
133
|
+
patch-aligned overlapping windows and the per-window token grids are blended with a
|
|
134
|
+
separable raised-cosine importance map. The stitch math runs in fp32 (sub-grids are
|
|
135
|
+
upcast before accumulation) so blended regions don't accumulate autocast-dtype error.
|
|
136
|
+
|
|
137
|
+
``encode_fn`` is the per-window encode callable ``(B, C, wh, ww) -> (B, d, th, tw)``;
|
|
138
|
+
it defaults to ``encoder.encode_tiles_dense`` (the patch-feature grid). The attention
|
|
139
|
+
path passes ``encoder.encode_tiles_attention`` (partial-applied with its
|
|
140
|
+
block/register knobs) so a CLS-attention grid stitches through the identical
|
|
141
|
+
raised-cosine blending — the output is just ``(B, K, grid)`` instead of ``(B, d, grid)``.
|
|
142
|
+
"""
|
|
143
|
+
if encode_fn is None:
|
|
144
|
+
encode_fn = encoder.encode_tiles_dense
|
|
145
|
+
(win_h, win_w), _, starts_h, starts_w = resolve_window_geometry(
|
|
146
|
+
geometry, window_size=window_size, overlap=overlap
|
|
147
|
+
)
|
|
148
|
+
if len(starts_h) == 1 and len(starts_w) == 1:
|
|
149
|
+
# Single window == the whole encoded tile: identical forward to the whole-region path.
|
|
150
|
+
return encode_fn(batch)
|
|
151
|
+
|
|
152
|
+
ph, pw = geometry.patch_size
|
|
153
|
+
grid_h, grid_w = geometry.grid_shape
|
|
154
|
+
wth, wtw = win_h // ph, win_w // pw
|
|
155
|
+
# Raised-cosine weights where windows overlap; uniform along any dim that is not
|
|
156
|
+
# actually tiled (a single window there) — avoids needless edge attenuation.
|
|
157
|
+
fdtype = torch.float32
|
|
158
|
+
wh = (
|
|
159
|
+
_hann_1d(wth, batch.device, fdtype)
|
|
160
|
+
if len(starts_h) > 1
|
|
161
|
+
else torch.ones(wth, device=batch.device, dtype=fdtype)
|
|
162
|
+
)
|
|
163
|
+
ww = (
|
|
164
|
+
_hann_1d(wtw, batch.device, fdtype)
|
|
165
|
+
if len(starts_w) > 1
|
|
166
|
+
else torch.ones(wtw, device=batch.device, dtype=fdtype)
|
|
167
|
+
)
|
|
168
|
+
weight = torch.outer(wh, ww) # (wth, wtw)
|
|
169
|
+
|
|
170
|
+
acc: torch.Tensor | None = None
|
|
171
|
+
wsum = torch.zeros(1, 1, grid_h, grid_w, device=batch.device, dtype=fdtype)
|
|
172
|
+
for sh in starts_h:
|
|
173
|
+
th = sh // ph
|
|
174
|
+
for sw in starts_w:
|
|
175
|
+
tw = sw // pw
|
|
176
|
+
window = batch[:, :, sh : sh + win_h, sw : sw + win_w]
|
|
177
|
+
sub = encode_fn(window).to(fdtype) # (B, d, wth, wtw)
|
|
178
|
+
if acc is None:
|
|
179
|
+
acc = torch.zeros(
|
|
180
|
+
sub.shape[0], sub.shape[1], grid_h, grid_w, device=batch.device, dtype=fdtype
|
|
181
|
+
)
|
|
182
|
+
acc[:, :, th : th + wth, tw : tw + wtw] += sub * weight
|
|
183
|
+
wsum[:, :, th : th + wth, tw : tw + wtw] += weight
|
|
184
|
+
assert acc is not None # at least one window always runs
|
|
185
|
+
return acc / wsum
|
|
@@ -54,6 +54,7 @@ slide2vec/runtime/artifacts_collect.py
|
|
|
54
54
|
slide2vec/runtime/batching.py
|
|
55
55
|
slide2vec/runtime/cpu_budget.py
|
|
56
56
|
slide2vec/runtime/dense_regions.py
|
|
57
|
+
slide2vec/runtime/dense_sliding.py
|
|
57
58
|
slide2vec/runtime/distributed.py
|
|
58
59
|
slide2vec/runtime/distributed_stage.py
|
|
59
60
|
slide2vec/runtime/embedding.py
|
|
@@ -84,6 +85,7 @@ tests/test_architecture_runtime_split.py
|
|
|
84
85
|
tests/test_attention_extraction.py
|
|
85
86
|
tests/test_dense_extraction.py
|
|
86
87
|
tests/test_dense_regions.py
|
|
88
|
+
tests/test_dense_sliding.py
|
|
87
89
|
tests/test_encoder_registry.py
|
|
88
90
|
tests/test_hs2p_package_cutover.py
|
|
89
91
|
tests/test_output_consistency.py
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Tests for dense grid extraction over slide regions: ``iter_regions_dense``.
|
|
2
|
+
|
|
3
|
+
Fully offline (``pretrained=False`` random weights) + an injected fake reader, so no
|
|
4
|
+
weights, no real WSI. ``iter_regions_dense`` is a streaming generator: it yields one
|
|
5
|
+
``(d, grid_h, grid_w)`` grid per coordinate in coordinate order, holding at most one batch
|
|
6
|
+
resident. Checks (1) grid shapes over a batch of coordinates, (2) that each yielded grid is
|
|
7
|
+
byte-identical to a direct ``transform → pad → encode`` of the same region (both feature
|
|
8
|
+
kinds), (3) streaming/laziness via a call-counting reader, and (4) eager validation.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pytest
|
|
15
|
+
|
|
16
|
+
torch = pytest.importorskip("torch")
|
|
17
|
+
timm = pytest.importorskip("timm")
|
|
18
|
+
|
|
19
|
+
from slide2vec.encoders.base import TimmTileEncoder # noqa: E402
|
|
20
|
+
from slide2vec.runtime.dense_regions import ( # noqa: E402
|
|
21
|
+
compute_dense_geometry,
|
|
22
|
+
iter_regions_dense,
|
|
23
|
+
pad_image_to_encoded,
|
|
24
|
+
)
|
|
25
|
+
from slide2vec.runtime.dense_sliding import encode_dense_sliding # noqa: E402
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _encoder(**kwargs) -> TimmTileEncoder:
|
|
29
|
+
return TimmTileEncoder("vit_tiny_patch16_224", pretrained=False, num_classes=0,
|
|
30
|
+
dynamic_img_size=True, **kwargs)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class _FakeWSI:
|
|
34
|
+
"""Returns a deterministic RGB region per location (so reads are reproducible)."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, *, target_h: int, target_w: int):
|
|
37
|
+
self._target_h = target_h
|
|
38
|
+
self._target_w = target_w
|
|
39
|
+
self.calls: list[tuple] = []
|
|
40
|
+
|
|
41
|
+
def read_region_at_spacing(self, location, requested_spacing_um, size, *, tolerance, interpolation):
|
|
42
|
+
self.calls.append((tuple(location), requested_spacing_um, tuple(size), tolerance, interpolation))
|
|
43
|
+
width, height = size
|
|
44
|
+
x, y = location
|
|
45
|
+
rng = np.random.default_rng(abs(hash((int(x), int(y)))) % (2**32))
|
|
46
|
+
return rng.integers(0, 256, size=(height, width, 3), dtype=np.uint8)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@pytest.mark.parametrize("feature_kind", ["patch_features", "cls_attention"])
|
|
50
|
+
@pytest.mark.parametrize("window_size", [None, 32], ids=["whole", "window32"])
|
|
51
|
+
def test_iter_regions_dense_yields_grid_per_coordinate_in_order(window_size, feature_kind):
|
|
52
|
+
enc = _encoder()
|
|
53
|
+
target_size = 64 # patch 16 -> grid 4x4, no padding
|
|
54
|
+
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
55
|
+
coords = [(0, 0), (64, 0), (0, 64)]
|
|
56
|
+
|
|
57
|
+
grids = list(
|
|
58
|
+
iter_regions_dense(
|
|
59
|
+
model=enc,
|
|
60
|
+
device="cpu",
|
|
61
|
+
wsi=wsi,
|
|
62
|
+
coordinates=coords,
|
|
63
|
+
requested_spacing_um=0.5,
|
|
64
|
+
target_size=target_size,
|
|
65
|
+
window_size=window_size,
|
|
66
|
+
feature_kind=feature_kind,
|
|
67
|
+
batch_size=2,
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# One standalone (d, gh, gw) grid per coordinate, in coordinate order — for both the
|
|
72
|
+
# whole-tile and sliding-window paths and both feature kinds (sliding is internal to
|
|
73
|
+
# extraction, so the output grid is always the whole geometry's 4x4 token grid).
|
|
74
|
+
assert len(grids) == 3
|
|
75
|
+
for grid in grids:
|
|
76
|
+
assert grid.shape[1:] == (4, 4)
|
|
77
|
+
assert grid.dtype == np.float32
|
|
78
|
+
assert grid.flags["C_CONTIGUOUS"]
|
|
79
|
+
assert grid.base is None # standalone copy, not a view pinning a batch
|
|
80
|
+
# Reads went through read_region_at_spacing at (target_w, target_h), area interp, level-0 coords.
|
|
81
|
+
assert [c[0] for c in wsi.calls] == [(0, 0), (64, 0), (0, 64)]
|
|
82
|
+
assert all(c[2] == (target_size, target_size) and c[4] == "area" for c in wsi.calls)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_iter_regions_dense_pads_non_multiple_target():
|
|
86
|
+
enc = _encoder()
|
|
87
|
+
target_size = 60 # padded up to 64 -> grid 4x4
|
|
88
|
+
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
89
|
+
grids = list(iter_regions_dense(
|
|
90
|
+
model=enc, device="cpu", wsi=wsi, coordinates=[(0, 0)],
|
|
91
|
+
requested_spacing_um=0.5, target_size=target_size,
|
|
92
|
+
))
|
|
93
|
+
assert len(grids) == 1
|
|
94
|
+
assert grids[0].shape == (enc.encode_dim, 4, 4)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _reference_grid(enc, loc, *, target_size, feature_kind, window_size=None, overlap=0.0):
|
|
98
|
+
"""Hand-rolled transform → pad → encode of one region, for parity checks.
|
|
99
|
+
|
|
100
|
+
``window_size=None`` is the direct whole-tile forward (the byte-identity anchor for
|
|
101
|
+
the whole-region path); a ``window_size`` routes the padded tile through the same
|
|
102
|
+
windowed primitive ``iter_regions_dense`` uses, so the seam stays exactly identical.
|
|
103
|
+
"""
|
|
104
|
+
from PIL import Image
|
|
105
|
+
|
|
106
|
+
geometry = compute_dense_geometry(target_size=target_size, patch_size=enc.patch_size)
|
|
107
|
+
transform = enc.get_dense_transform()
|
|
108
|
+
ref_wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
109
|
+
region = ref_wsi.read_region_at_spacing(
|
|
110
|
+
loc, 0.5, (target_size, target_size), tolerance=0.05, interpolation="area"
|
|
111
|
+
)
|
|
112
|
+
tensor = torch.as_tensor(transform(Image.fromarray(region))).as_subclass(torch.Tensor)
|
|
113
|
+
padded = pad_image_to_encoded(tensor, geometry, pad_mode="reflect", image_pad_value=None)
|
|
114
|
+
batch = padded.unsqueeze(0)
|
|
115
|
+
if feature_kind == "patch_features":
|
|
116
|
+
encode_fn = enc.encode_tiles_dense
|
|
117
|
+
else:
|
|
118
|
+
encode_fn = enc.encode_tiles_attention
|
|
119
|
+
with torch.inference_mode():
|
|
120
|
+
if window_size is None:
|
|
121
|
+
out = encode_fn(batch)
|
|
122
|
+
else:
|
|
123
|
+
out = encode_dense_sliding(
|
|
124
|
+
enc, batch, geometry=geometry, window_size=window_size,
|
|
125
|
+
overlap=overlap, encode_fn=encode_fn,
|
|
126
|
+
)
|
|
127
|
+
return out.detach().float().cpu().numpy()[0]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@pytest.mark.parametrize("feature_kind", ["patch_features", "cls_attention"])
|
|
131
|
+
@pytest.mark.parametrize("window_size", [None, 32], ids=["whole", "window32"])
|
|
132
|
+
def test_iter_regions_dense_matches_direct_encode(window_size, feature_kind):
|
|
133
|
+
"""Each yielded grid is byte-identical to a hand-rolled transform+pad+encode.
|
|
134
|
+
|
|
135
|
+
``window_size=None`` pins the whole-region path against a direct encode; a smaller
|
|
136
|
+
``window_size`` pins the streamed blended grid against the same windowed primitive.
|
|
137
|
+
"""
|
|
138
|
+
enc = _encoder()
|
|
139
|
+
target_size = 64
|
|
140
|
+
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
141
|
+
coords = [(0, 0), (128, 256)]
|
|
142
|
+
|
|
143
|
+
grids = list(iter_regions_dense(
|
|
144
|
+
model=enc, device="cpu", wsi=wsi, coordinates=coords,
|
|
145
|
+
requested_spacing_um=0.5, target_size=target_size,
|
|
146
|
+
window_size=window_size, feature_kind=feature_kind,
|
|
147
|
+
))
|
|
148
|
+
|
|
149
|
+
assert len(grids) == len(coords)
|
|
150
|
+
for grid, loc in zip(grids, coords):
|
|
151
|
+
ref = _reference_grid(
|
|
152
|
+
enc, loc, target_size=target_size, feature_kind=feature_kind,
|
|
153
|
+
window_size=window_size,
|
|
154
|
+
)
|
|
155
|
+
assert grid.shape == ref.shape
|
|
156
|
+
np.testing.assert_array_equal(grid, ref)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_iter_regions_dense_empty_coordinates_yields_nothing():
|
|
160
|
+
enc = _encoder()
|
|
161
|
+
wsi = _FakeWSI(target_h=64, target_w=64)
|
|
162
|
+
grids = list(iter_regions_dense(
|
|
163
|
+
model=enc, device="cpu", wsi=wsi, coordinates=[],
|
|
164
|
+
requested_spacing_um=0.5, target_size=64,
|
|
165
|
+
))
|
|
166
|
+
assert grids == []
|
|
167
|
+
assert wsi.calls == []
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@pytest.mark.parametrize("feature_kind", ["patch_features", "cls_attention"])
|
|
171
|
+
@pytest.mark.parametrize("window_size", [None, 32], ids=["whole", "window32"])
|
|
172
|
+
def test_iter_regions_dense_streams_one_batch_at_a_time(window_size, feature_kind):
|
|
173
|
+
"""Reads advance one batch at a time; first grids land before all coords are read.
|
|
174
|
+
|
|
175
|
+
The streaming/laziness contract is independent of the dense mode, so it holds for
|
|
176
|
+
both the whole-tile and sliding-window paths and both feature kinds.
|
|
177
|
+
"""
|
|
178
|
+
enc = _encoder()
|
|
179
|
+
target_size = 64
|
|
180
|
+
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
181
|
+
coords = [(0, 0), (64, 0), (0, 64), (64, 64), (128, 0)] # 5 coords, batches of [2, 2, 1]
|
|
182
|
+
|
|
183
|
+
gen = iter_regions_dense(
|
|
184
|
+
model=enc, device="cpu", wsi=wsi, coordinates=coords,
|
|
185
|
+
requested_spacing_um=0.5, target_size=target_size,
|
|
186
|
+
window_size=window_size, feature_kind=feature_kind, batch_size=2,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
assert wsi.calls == [] # iteration is lazy: building the generator reads nothing
|
|
190
|
+
|
|
191
|
+
first = next(gen)
|
|
192
|
+
assert first.shape[1:] == (4, 4)
|
|
193
|
+
# First grid is yielded after only the first batch (2 of 5) has been read.
|
|
194
|
+
assert len(wsi.calls) == 2
|
|
195
|
+
next(gen)
|
|
196
|
+
assert len(wsi.calls) == 2 # second grid comes from the already-read first batch
|
|
197
|
+
next(gen)
|
|
198
|
+
assert len(wsi.calls) == 4 # third grid forces the next batch to be read
|
|
199
|
+
|
|
200
|
+
rest = list(gen)
|
|
201
|
+
assert len(rest) == 2
|
|
202
|
+
assert len(wsi.calls) == len(coords) # total reads never exceed the coordinate count
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@pytest.mark.parametrize(
|
|
206
|
+
"kwargs", [{"pad_mode": "bogus"}, {"feature_kind": "bogus"}], ids=["pad_mode", "feature_kind"]
|
|
207
|
+
)
|
|
208
|
+
def test_iter_regions_dense_validates_eagerly_before_any_read(kwargs):
|
|
209
|
+
"""Invalid pad mode / feature kind raise at the call site, before any region is read."""
|
|
210
|
+
enc = _encoder()
|
|
211
|
+
target_size = 64
|
|
212
|
+
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
213
|
+
|
|
214
|
+
with pytest.raises(ValueError):
|
|
215
|
+
# The raise must come from the call itself, not from iterating the result — a
|
|
216
|
+
# single ``def … yield`` would wrongly defer validation to the first ``next()``.
|
|
217
|
+
iter_regions_dense(
|
|
218
|
+
model=enc, device="cpu", wsi=wsi, coordinates=[(0, 0)],
|
|
219
|
+
requested_spacing_um=0.5, target_size=target_size, **kwargs,
|
|
220
|
+
)
|
|
221
|
+
assert wsi.calls == []
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Tests for the window-as-knob dense sliding path (relocated from soma).
|
|
2
|
+
|
|
3
|
+
``window_size=None`` is ``whole`` (one padded forward); a smaller ``window_size``
|
|
4
|
+
(+ ``overlap``) slides the encoder over patch-aligned windows and blends the token
|
|
5
|
+
grids. The anchor invariant: any window that covers the whole encoded input — most
|
|
6
|
+
importantly ``window_size=None`` — is **byte-identical** to the legacy
|
|
7
|
+
``encode_tiles_dense(batch)`` forward, so the ``whole`` path is untouched.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
torch = pytest.importorskip("torch")
|
|
15
|
+
pytest.importorskip("timm")
|
|
16
|
+
|
|
17
|
+
from slide2vec.encoders.base import TimmTileEncoder # noqa: E402
|
|
18
|
+
from slide2vec.runtime.dense_regions import compute_dense_geometry # noqa: E402
|
|
19
|
+
from slide2vec.runtime.dense_sliding import ( # noqa: E402
|
|
20
|
+
_window_starts,
|
|
21
|
+
encode_dense_sliding,
|
|
22
|
+
resolve_window_geometry,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
PATCH = 16
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _encoder() -> TimmTileEncoder:
|
|
29
|
+
return TimmTileEncoder(
|
|
30
|
+
"vit_tiny_patch16_224", pretrained=False, num_classes=0, dynamic_img_size=True
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# --- pure geometry (no encoder) ------------------------------------------------
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_window_starts_cover_and_patch_aligned():
|
|
38
|
+
starts = _window_starts(extent=64, win=32, stride=16)
|
|
39
|
+
assert starts == [0, 16, 32]
|
|
40
|
+
assert all(s % PATCH == 0 for s in starts)
|
|
41
|
+
assert starts[-1] + 32 == 64 # last window flush to the edge
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_window_starts_appends_edge_window_when_stride_misses():
|
|
45
|
+
# stride 24 from 0 -> [0, 24] then last (24+32=56 < 64) appends edge 32.
|
|
46
|
+
starts = _window_starts(extent=64, win=32, stride=24)
|
|
47
|
+
assert starts[0] == 0 and starts[-1] == 32 and starts[-1] + 32 == 64
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_resolve_window_geometry_whole_is_single_window():
|
|
51
|
+
geom = compute_dense_geometry(target_size=64, patch_size=PATCH)
|
|
52
|
+
(win, stride, sh, sw) = resolve_window_geometry(geom, window_size=None, overlap=0.0)
|
|
53
|
+
assert win == geom.encoded_size and stride == geom.encoded_size
|
|
54
|
+
assert sh == [0] and sw == [0]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_resolve_window_geometry_large_window_clamps_to_whole():
|
|
58
|
+
geom = compute_dense_geometry(target_size=64, patch_size=PATCH)
|
|
59
|
+
# window >= target -> rounds/clamps to the full encoded extent -> one window.
|
|
60
|
+
_, _, sh, sw = resolve_window_geometry(geom, window_size=128, overlap=0.5)
|
|
61
|
+
assert sh == [0] and sw == [0]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_resolve_window_geometry_rounds_window_up_to_patch():
|
|
65
|
+
geom = compute_dense_geometry(target_size=64, patch_size=PATCH)
|
|
66
|
+
(win, _, sh, _) = resolve_window_geometry(geom, window_size=30, overlap=0.0)
|
|
67
|
+
assert win == (32, 32) # 30 -> round up to patch multiple
|
|
68
|
+
assert len(sh) == 2 # 32 over 64 at stride 32 -> two windows
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# --- parity: whole-covering windows == encode_tiles_dense ----------------------
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_sliding_window_none_is_byte_identical_to_encode_tiles_dense():
|
|
75
|
+
enc = _encoder()
|
|
76
|
+
geom = compute_dense_geometry(target_size=64, patch_size=enc.patch_size)
|
|
77
|
+
x = torch.randn(2, 3, *geom.encoded_size)
|
|
78
|
+
with torch.no_grad():
|
|
79
|
+
ref = enc.encode_tiles_dense(x)
|
|
80
|
+
got = encode_dense_sliding(enc, x, geometry=geom, window_size=None, overlap=0.0)
|
|
81
|
+
assert torch.equal(ref, got)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_sliding_window_covering_whole_is_byte_identical():
|
|
85
|
+
enc = _encoder()
|
|
86
|
+
geom = compute_dense_geometry(target_size=64, patch_size=enc.patch_size)
|
|
87
|
+
x = torch.randn(1, 3, *geom.encoded_size)
|
|
88
|
+
with torch.no_grad():
|
|
89
|
+
ref = enc.encode_tiles_dense(x)
|
|
90
|
+
# window >= input -> degenerate single window, must short-circuit to ref.
|
|
91
|
+
got = encode_dense_sliding(enc, x, geometry=geom, window_size=256, overlap=0.5)
|
|
92
|
+
assert torch.equal(ref, got)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# --- genuine sliding -----------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_sliding_outputs_full_grid_shape():
|
|
99
|
+
enc = _encoder()
|
|
100
|
+
geom = compute_dense_geometry(target_size=64, patch_size=enc.patch_size)
|
|
101
|
+
x = torch.randn(2, 3, *geom.encoded_size)
|
|
102
|
+
with torch.no_grad():
|
|
103
|
+
grid = encode_dense_sliding(enc, x, geometry=geom, window_size=32, overlap=0.5)
|
|
104
|
+
# Sliding is internal to extraction: output grid == the whole geometry's grid.
|
|
105
|
+
assert grid.shape == (2, enc.encode_dim, *geom.grid_shape)
|
|
106
|
+
assert torch.isfinite(grid).all()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def test_sliding_non_overlap_matches_block_encoding_on_interior():
|
|
110
|
+
"""With overlap=0 each token is covered by exactly one window; the blended result
|
|
111
|
+
equals encoding that window's block (the weight cancels for single-coverage tokens)."""
|
|
112
|
+
enc = _encoder()
|
|
113
|
+
geom = compute_dense_geometry(target_size=64, patch_size=enc.patch_size)
|
|
114
|
+
x = torch.randn(1, 3, *geom.encoded_size)
|
|
115
|
+
ph = enc.patch_size[0] if isinstance(enc.patch_size, tuple) else enc.patch_size
|
|
116
|
+
with torch.no_grad():
|
|
117
|
+
grid = encode_dense_sliding(enc, x, geometry=geom, window_size=32, overlap=0.0)
|
|
118
|
+
# top-left 32x32 block encoded on its own -> its 2x2 token sub-grid.
|
|
119
|
+
block = enc.encode_tiles_dense(x[:, :, :32, :32]).float()
|
|
120
|
+
wt = 32 // ph
|
|
121
|
+
torch.testing.assert_close(grid[:, :, :wt, :wt], block, rtol=1e-5, atol=1e-5)
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
"""Tests for dense grid extraction over slide regions: ``encode_regions_dense``.
|
|
2
|
-
|
|
3
|
-
Fully offline (``pretrained=False`` random weights) + an injected fake reader, so no
|
|
4
|
-
weights, no real WSI. Checks (1) grid shapes over a batch of coordinates and (2) that the
|
|
5
|
-
orchestration is a faithful wrapper — its per-region grid is byte-identical to a direct
|
|
6
|
-
``encode_tiles_dense(transform → pad)`` of the same region.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
import numpy as np
|
|
12
|
-
import pytest
|
|
13
|
-
|
|
14
|
-
torch = pytest.importorskip("torch")
|
|
15
|
-
timm = pytest.importorskip("timm")
|
|
16
|
-
|
|
17
|
-
from slide2vec.encoders.base import TimmTileEncoder # noqa: E402
|
|
18
|
-
from slide2vec.runtime.dense_regions import ( # noqa: E402
|
|
19
|
-
compute_dense_geometry,
|
|
20
|
-
encode_regions_dense,
|
|
21
|
-
pad_image_to_encoded,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _encoder(**kwargs) -> TimmTileEncoder:
|
|
26
|
-
return TimmTileEncoder("vit_tiny_patch16_224", pretrained=False, num_classes=0,
|
|
27
|
-
dynamic_img_size=True, **kwargs)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class _FakeWSI:
|
|
31
|
-
"""Returns a deterministic RGB region per location (so reads are reproducible)."""
|
|
32
|
-
|
|
33
|
-
def __init__(self, *, target_h: int, target_w: int):
|
|
34
|
-
self._target_h = target_h
|
|
35
|
-
self._target_w = target_w
|
|
36
|
-
self.calls: list[tuple] = []
|
|
37
|
-
|
|
38
|
-
def read_region_at_spacing(self, location, requested_spacing_um, size, *, tolerance, interpolation):
|
|
39
|
-
self.calls.append((tuple(location), requested_spacing_um, tuple(size), tolerance, interpolation))
|
|
40
|
-
width, height = size
|
|
41
|
-
x, y = location
|
|
42
|
-
rng = np.random.default_rng(abs(hash((int(x), int(y)))) % (2**32))
|
|
43
|
-
return rng.integers(0, 256, size=(height, width, 3), dtype=np.uint8)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def test_encode_regions_dense_shapes_over_coordinates():
|
|
47
|
-
enc = _encoder()
|
|
48
|
-
target_size = 64 # patch 16 -> grid 4x4, no padding
|
|
49
|
-
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
50
|
-
coords = [(0, 0), (64, 0), (0, 64)]
|
|
51
|
-
|
|
52
|
-
grids = encode_regions_dense(
|
|
53
|
-
model=enc,
|
|
54
|
-
device="cpu",
|
|
55
|
-
wsi=wsi,
|
|
56
|
-
coordinates=coords,
|
|
57
|
-
requested_spacing_um=0.5,
|
|
58
|
-
target_size=target_size,
|
|
59
|
-
batch_size=2,
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
assert grids.shape == (3, enc.encode_dim, 4, 4)
|
|
63
|
-
assert grids.dtype == np.float32
|
|
64
|
-
# Reads went through read_region_at_spacing at (target_w, target_h), area interp, level-0 coords.
|
|
65
|
-
assert [c[0] for c in wsi.calls] == [(0, 0), (64, 0), (0, 64)]
|
|
66
|
-
assert all(c[2] == (target_size, target_size) and c[4] == "area" for c in wsi.calls)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def test_encode_regions_dense_pads_non_multiple_target():
|
|
70
|
-
enc = _encoder()
|
|
71
|
-
target_size = 60 # padded up to 64 -> grid 4x4
|
|
72
|
-
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
73
|
-
grids = encode_regions_dense(
|
|
74
|
-
model=enc, device="cpu", wsi=wsi, coordinates=[(0, 0)],
|
|
75
|
-
requested_spacing_um=0.5, target_size=target_size,
|
|
76
|
-
)
|
|
77
|
-
assert grids.shape == (1, enc.encode_dim, 4, 4)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def test_encode_regions_dense_matches_direct_encode():
|
|
81
|
-
"""The primitive is a faithful wrapper: parity vs a hand-rolled transform+pad+encode."""
|
|
82
|
-
enc = _encoder()
|
|
83
|
-
target_size = 64
|
|
84
|
-
wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
85
|
-
coords = [(0, 0), (128, 256)]
|
|
86
|
-
|
|
87
|
-
grids = encode_regions_dense(
|
|
88
|
-
model=enc, device="cpu", wsi=wsi, coordinates=coords,
|
|
89
|
-
requested_spacing_um=0.5, target_size=target_size,
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
# Re-read the same regions (deterministic) and encode them directly.
|
|
93
|
-
from PIL import Image
|
|
94
|
-
|
|
95
|
-
geometry = compute_dense_geometry(target_size=target_size, patch_size=enc.patch_size)
|
|
96
|
-
transform = enc.get_dense_transform()
|
|
97
|
-
ref_wsi = _FakeWSI(target_h=target_size, target_w=target_size)
|
|
98
|
-
with torch.inference_mode():
|
|
99
|
-
for i, loc in enumerate(coords):
|
|
100
|
-
region = ref_wsi.read_region_at_spacing(
|
|
101
|
-
loc, 0.5, (target_size, target_size), tolerance=0.05, interpolation="area"
|
|
102
|
-
)
|
|
103
|
-
tensor = torch.as_tensor(transform(Image.fromarray(region))).as_subclass(torch.Tensor)
|
|
104
|
-
padded = pad_image_to_encoded(tensor, geometry, pad_mode="reflect", image_pad_value=None)
|
|
105
|
-
ref = enc.encode_tiles_dense(padded.unsqueeze(0)).detach().float().cpu().numpy()[0]
|
|
106
|
-
np.testing.assert_allclose(grids[i], ref, rtol=0, atol=1e-6)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def test_encode_regions_dense_empty_coordinates():
|
|
110
|
-
enc = _encoder()
|
|
111
|
-
wsi = _FakeWSI(target_h=64, target_w=64)
|
|
112
|
-
grids = encode_regions_dense(
|
|
113
|
-
model=enc, device="cpu", wsi=wsi, coordinates=[],
|
|
114
|
-
requested_spacing_um=0.5, target_size=64,
|
|
115
|
-
)
|
|
116
|
-
assert grids.shape == (0, 0, 4, 4)
|
|
117
|
-
assert wsi.calls == []
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|