slide2vec 5.0.0__tar.gz → 5.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-5.0.0 → slide2vec-5.1.0}/PKG-INFO +1 -1
- {slide2vec-5.0.0 → slide2vec-5.1.0}/pyproject.toml +5 -2
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/__init__.py +1 -1
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/hibou.py +9 -2
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/midnight.py +12 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/virchow.py +2 -8
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/dense_regions.py +70 -27
- slide2vec-5.1.0/slide2vec/runtime/dense_sliding.py +185 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/PKG-INFO +1 -1
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/SOURCES.txt +2 -1
- slide2vec-5.1.0/tests/test_dense_regions.py +221 -0
- slide2vec-5.1.0/tests/test_dense_sliding.py +121 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_output_consistency.py +1 -0
- slide2vec-5.0.0/tests/test_dense_locality_gated.py +0 -162
- slide2vec-5.0.0/tests/test_dense_regions.py +0 -117
- {slide2vec-5.0.0 → slide2vec-5.1.0}/LICENSE +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/README.md +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/setup.cfg +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/__main__.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/api.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/artifacts.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/cli.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/configs/default.yaml +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/configs/resources.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/data/__init__.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/data/dataset.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/data/tile_reader.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/distributed/pipeline_worker.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/__init__.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/base.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/__init__.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/conch.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/gigapath.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/lunit.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/__init__.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/blocks.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/case.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/loading.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/slide.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/types.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/musk.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/registry.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/validation.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/inference.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/progress.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/__init__.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/artifacts_collect.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/batching.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/cpu_budget.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/distributed.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/distributed_stage.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/embedding.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/embedding_persist.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/embedding_pipeline.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/hierarchical.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/manifest.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/model_settings.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/patient_pipeline.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/persist_callbacks.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/persistence.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/process_list.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/progress_bridge.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/registry.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/serialization.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/slide_encode.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/tiling.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/tiling_pipeline.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/types.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/worker_io.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/config.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/tiling_io.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/utils.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/entry_points.txt +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/requires.txt +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_architecture_runtime_split.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_attention_extraction.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_dense_extraction.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_encoder_registry.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_hs2p_package_cutover.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_progress.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_regression_core.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_regression_inference.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_regression_models.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_runtime_batching.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_tile_store.py +0 -0
- {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_tiling_pipeline.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "5.
|
|
7
|
+
version = "5.1.0"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -145,6 +145,9 @@ addopts = "--cov=slide2vec"
|
|
|
145
145
|
testpaths = [
|
|
146
146
|
"tests",
|
|
147
147
|
]
|
|
148
|
+
markers = [
|
|
149
|
+
"heavy: real-weight foundation-model inference on CPU; minutes per test. Excluded from the PR suite via `-m 'not heavy'`; run on the scheduled/manual heavy workflow (.github/workflows/nightly-heavy.yaml).",
|
|
150
|
+
]
|
|
148
151
|
|
|
149
152
|
[tool.mypy]
|
|
150
153
|
mypy_path = "."
|
|
@@ -164,7 +167,7 @@ no_implicit_reexport = true
|
|
|
164
167
|
max-line-length = 160
|
|
165
168
|
|
|
166
169
|
[tool.bumpver]
|
|
167
|
-
current_version = "5.
|
|
170
|
+
current_version = "5.1.0"
|
|
168
171
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
169
172
|
commit = false # We do version bumping in CI, not as a commit
|
|
170
173
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -54,6 +54,13 @@ class _HibouBase(TileEncoder):
|
|
|
54
54
|
v2.Normalize(mean=_HIBOU_MEAN, std=_HIBOU_STD),
|
|
55
55
|
])
|
|
56
56
|
|
|
57
|
+
@property
|
|
58
|
+
def _num_prefix_tokens(self) -> int:
|
|
59
|
+
# CLS + register tokens. Dinov2-with-registers carries the register tokens
|
|
60
|
+
# between the CLS and patch tokens, so both the dense and attention paths
|
|
61
|
+
# must strip them; deriving the count from config keeps the two in sync.
|
|
62
|
+
return 1 + int(getattr(self._model.config, "num_register_tokens", 0))
|
|
63
|
+
|
|
57
64
|
def encode_tiles(self, batch: Tensor) -> Tensor:
|
|
58
65
|
output = self._model(pixel_values=batch)
|
|
59
66
|
return output.pooler_output
|
|
@@ -77,7 +84,7 @@ class _HibouBase(TileEncoder):
|
|
|
77
84
|
output.last_hidden_state,
|
|
78
85
|
grid_h=height // patch,
|
|
79
86
|
grid_w=width // patch,
|
|
80
|
-
num_prefix_tokens=
|
|
87
|
+
num_prefix_tokens=self._num_prefix_tokens,
|
|
81
88
|
encoder_name=type(self).__name__,
|
|
82
89
|
)
|
|
83
90
|
|
|
@@ -111,7 +118,7 @@ class _HibouBase(TileEncoder):
|
|
|
111
118
|
output = self._model(pixel_values=batch, output_attentions=True)
|
|
112
119
|
return attentions_tuple_to_grids(
|
|
113
120
|
output.attentions,
|
|
114
|
-
num_prefix_tokens=
|
|
121
|
+
num_prefix_tokens=self._num_prefix_tokens,
|
|
115
122
|
blocks=blocks,
|
|
116
123
|
include_registers=include_registers,
|
|
117
124
|
grid_h=height // patch,
|
|
@@ -36,6 +36,18 @@ class Midnight(TileEncoder):
|
|
|
36
36
|
self._model = AutoModel.from_pretrained("kaiko-ai/midnight").eval()
|
|
37
37
|
self._device = preferred_default_device()
|
|
38
38
|
self._output_variant = resolve_requested_output_variant(output_variant)
|
|
39
|
+
# The pooled, dense, and attention paths all assume a single CLS prefix
|
|
40
|
+
# token (kaiko's reference recipe pools over output[:, 1:]). If a future
|
|
41
|
+
# checkpoint adds register tokens, that assumption silently folds them into
|
|
42
|
+
# the patch mean and mislabels the dense/attention grids — fail loudly here.
|
|
43
|
+
num_register_tokens = int(getattr(self._model.config, "num_register_tokens", 0))
|
|
44
|
+
if num_register_tokens:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
"Midnight encoder assumes a single CLS prefix token, but the loaded "
|
|
47
|
+
f"checkpoint reports num_register_tokens={num_register_tokens}. Update "
|
|
48
|
+
"the pooled/dense/attention paths to strip the register tokens before "
|
|
49
|
+
"using this checkpoint."
|
|
50
|
+
)
|
|
39
51
|
|
|
40
52
|
def get_transform(self) -> Callable:
|
|
41
53
|
return v2.Compose([
|
|
@@ -16,8 +16,6 @@ _VIRCHOW_OUTPUT_DIMS = {
|
|
|
16
16
|
class _VirchowBase(TimmTileEncoder):
|
|
17
17
|
"""Base for Virchow models that concat CLS + mean-pooled patch tokens."""
|
|
18
18
|
|
|
19
|
-
_num_prefix_tokens: int = 1 # Override in subclass if needed
|
|
20
|
-
|
|
21
19
|
def __init__(self, model_name: str, *, output_variant: str | None = None):
|
|
22
20
|
self._output_variant = resolve_requested_output_variant(
|
|
23
21
|
output_variant,
|
|
@@ -36,7 +34,7 @@ class _VirchowBase(TimmTileEncoder):
|
|
|
36
34
|
cls_token = output[:, 0]
|
|
37
35
|
if self._output_variant == "cls":
|
|
38
36
|
return cls_token
|
|
39
|
-
patch_tokens = output[:, self.
|
|
37
|
+
patch_tokens = output[:, self._model.num_prefix_tokens:]
|
|
40
38
|
return torch.cat([cls_token, patch_tokens.mean(dim=1)], dim=-1)
|
|
41
39
|
|
|
42
40
|
@property
|
|
@@ -57,8 +55,6 @@ class _VirchowBase(TimmTileEncoder):
|
|
|
57
55
|
source="paige-ai/Virchow",
|
|
58
56
|
)
|
|
59
57
|
class Virchow(_VirchowBase):
|
|
60
|
-
_num_prefix_tokens = 1
|
|
61
|
-
|
|
62
58
|
def __init__(self, *, output_variant: str | None = None):
|
|
63
59
|
super().__init__("hf-hub:paige-ai/Virchow", output_variant=output_variant)
|
|
64
60
|
|
|
@@ -71,12 +67,10 @@ class Virchow(_VirchowBase):
|
|
|
71
67
|
},
|
|
72
68
|
default_output_variant="cls_patch_mean",
|
|
73
69
|
input_size=224,
|
|
74
|
-
supported_spacing_um=[0.5, 1.0, 2.0],
|
|
70
|
+
supported_spacing_um=[0.25, 0.5, 1.0, 2.0],
|
|
75
71
|
precision="fp16",
|
|
76
72
|
source="paige-ai/Virchow2",
|
|
77
73
|
)
|
|
78
74
|
class Virchow2(_VirchowBase):
|
|
79
|
-
_num_prefix_tokens = 5 # 1 CLS + 4 register tokens
|
|
80
|
-
|
|
81
75
|
def __init__(self, *, output_variant: str | None = None):
|
|
82
76
|
super().__init__("hf-hub:paige-ai/Virchow2", output_variant=output_variant)
|
|
@@ -5,7 +5,10 @@ The dense counterpart of the pooled coordinate path (``compute_tile_embeddings_f
|
|
|
5
5
|
each sampled ROI is read **spacing-aware** from the slide, run through the encoder's
|
|
6
6
|
normalization-only dense transform (``get_dense_transform`` — NOT the pooled transform,
|
|
7
7
|
which crops), padded up to the encoder's patch multiple, and encoded via
|
|
8
|
-
``encode_tiles_dense`` into a ``(d, grid_h, grid_w)`` token grid.
|
|
8
|
+
``encode_tiles_dense`` into a ``(d, grid_h, grid_w)`` token grid. ``iter_regions_dense``
|
|
9
|
+
**streams** these grids — yielding one per coordinate, in coordinate order, holding at most
|
|
10
|
+
one ``batch_size`` chunk resident — so host memory is bounded by ``batch_size`` rather than
|
|
11
|
+
by a slide's ROI count.
|
|
9
12
|
|
|
10
13
|
This is the extraction half of soma's slide-manifest segmentation path: slide2vec reads
|
|
11
14
|
regions + encodes (it already owns the region reader and the dense encode); soma sources
|
|
@@ -18,21 +21,25 @@ the finest pyramid level ``<=`` the requested µm/px is read and downscaled to t
|
|
|
18
21
|
the same spacing. The ``wsi`` is injected (any object exposing ``read_region_at_spacing``),
|
|
19
22
|
so the loop is unit-testable offline with a fake reader + a random-weight encoder.
|
|
20
23
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
+
Both dense modes run through one primitive (:func:`~slide2vec.runtime.dense_sliding.encode_dense_sliding`):
|
|
25
|
+
``window_size=None`` is a single whole-tile forward (byte-identical to the legacy
|
|
26
|
+
whole-region encode), and a ``window_size`` smaller than the encoded tile slides the
|
|
27
|
+
encoder's native field over the padded tile and blends the per-window token grids with a
|
|
28
|
+
separable raised-cosine map — letting a native-field encoder (e.g. 224-px Virchow2/phikon)
|
|
29
|
+
serve a larger ROI without interpolating its position embeddings.
|
|
24
30
|
"""
|
|
25
31
|
|
|
26
32
|
from __future__ import annotations
|
|
27
33
|
|
|
28
34
|
from dataclasses import dataclass
|
|
29
|
-
from typing import Callable, Sequence
|
|
35
|
+
from typing import Callable, Iterator, Sequence
|
|
30
36
|
|
|
31
37
|
import numpy as np
|
|
32
38
|
import torch
|
|
33
39
|
import torch.nn.functional as F
|
|
34
40
|
from PIL import Image
|
|
35
41
|
|
|
42
|
+
from slide2vec.runtime.dense_sliding import encode_dense_sliding
|
|
36
43
|
from slide2vec.runtime.slide_encode import slide_encode_autocast_ctx
|
|
37
44
|
|
|
38
45
|
_PAD_MODES = {"reflect", "constant", "zero", "replicate"}
|
|
@@ -136,7 +143,7 @@ def _resolve_encode_fn(
|
|
|
136
143
|
)
|
|
137
144
|
|
|
138
145
|
|
|
139
|
-
def
|
|
146
|
+
def iter_regions_dense(
|
|
140
147
|
*,
|
|
141
148
|
model,
|
|
142
149
|
device: torch.device | str,
|
|
@@ -147,14 +154,21 @@ def encode_regions_dense(
|
|
|
147
154
|
tolerance: float = 0.05,
|
|
148
155
|
pad_mode: str = "reflect",
|
|
149
156
|
image_pad_value: float | None = None,
|
|
157
|
+
window_size: int | None = None,
|
|
158
|
+
overlap: float = 0.0,
|
|
150
159
|
feature_kind: str = "patch_features",
|
|
151
160
|
attention_blocks: tuple[int, ...] = (-1,),
|
|
152
161
|
attention_include_registers: bool = False,
|
|
153
162
|
batch_size: int = 1,
|
|
154
163
|
precision: str = "fp32",
|
|
155
164
|
dense_transform: Callable | None = None,
|
|
156
|
-
) -> np.ndarray:
|
|
157
|
-
"""
|
|
165
|
+
) -> Iterator[np.ndarray]:
|
|
166
|
+
"""Stream slide regions at ``coordinates`` into dense grids, one per coordinate.
|
|
167
|
+
|
|
168
|
+
Yields one ``(d, grid_h, grid_w)`` ``float32`` grid per coordinate, in coordinate
|
|
169
|
+
order. Regions are read and encoded one ``batch_size`` chunk at a time, so resident
|
|
170
|
+
host memory is bounded by ``batch_size`` rather than by a slide's ROI count (the loop
|
|
171
|
+
holds at most one batch of grids resident — no per-slide accumulation).
|
|
158
172
|
|
|
159
173
|
Injectable core: takes a constructed dense-capable ``model`` (with
|
|
160
174
|
``encode_tiles_dense`` / ``encode_tiles_attention`` / ``patch_size`` /
|
|
@@ -162,16 +176,30 @@ def encode_regions_dense(
|
|
|
162
176
|
``read_region_at_spacing(location, requested_spacing_um, size, *, tolerance,
|
|
163
177
|
interpolation)``, so it runs offline in tests with random weights + a fake reader.
|
|
164
178
|
|
|
179
|
+
Arguments are validated and geometry is resolved **eagerly** (before any region is
|
|
180
|
+
read): an invalid ``pad_mode`` or ``feature_kind`` raises at the call site, not on the
|
|
181
|
+
first ``next()``. Iteration itself is lazy — reads advance one batch at a time.
|
|
182
|
+
|
|
165
183
|
Args:
|
|
166
184
|
coordinates: ``(x, y)`` top-left locations in **level-0** pixel space (the hs2p
|
|
167
185
|
tiling convention; passed straight to ``read_region_at_spacing``).
|
|
168
186
|
requested_spacing_um: µm/px to read each region at.
|
|
169
187
|
target_size: supervision tile size (int or ``(h, w)``); the region is read at this
|
|
170
188
|
size at ``requested_spacing_um`` and the token grid registers to it.
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
189
|
+
window_size: encoder field-of-view chunk fed through the backbone per forward.
|
|
190
|
+
``None`` (default) is one whole-tile forward, byte-identical to the
|
|
191
|
+
whole-region encode; a value smaller than the encoded tile slides the encoder
|
|
192
|
+
over patch-aligned windows and blends the token grids (raised-cosine map). The
|
|
193
|
+
output grid is always the whole geometry's ``(grid_h, grid_w)`` either way —
|
|
194
|
+
sliding is internal to extraction.
|
|
195
|
+
overlap: fractional window overlap in ``[0, 1)`` for the sliding path (ignored when
|
|
196
|
+
``window_size is None``); the stride is ``window * (1 - overlap)``.
|
|
197
|
+
|
|
198
|
+
Yields ``float32`` grids in coordinate order; empty ``coordinates`` yields nothing.
|
|
199
|
+
``feature_kind`` selects ``encode_tiles_dense`` (patch grid) vs
|
|
200
|
+
``encode_tiles_attention`` (CLS-attention grid); both produce a ``(C, gh, gw)`` grid and
|
|
201
|
+
share this path. Each yielded grid is a standalone contiguous copy, so it does not pin
|
|
202
|
+
the rest of its batch's memory alive.
|
|
175
203
|
"""
|
|
176
204
|
if pad_mode not in _PAD_MODES:
|
|
177
205
|
raise ValueError(f"unsupported pad_mode {pad_mode!r}; expected one of {sorted(_PAD_MODES)}")
|
|
@@ -185,11 +213,8 @@ def encode_regions_dense(
|
|
|
185
213
|
attention_include_registers=attention_include_registers,
|
|
186
214
|
)
|
|
187
215
|
target_h, target_w = geometry.target_size
|
|
188
|
-
|
|
189
216
|
coords = [(int(x), int(y)) for x, y in coordinates]
|
|
190
|
-
|
|
191
|
-
if not coords:
|
|
192
|
-
return np.empty((0, 0, grid_h, grid_w), dtype=np.float32)
|
|
217
|
+
step = max(1, int(batch_size))
|
|
193
218
|
|
|
194
219
|
def _read_padded(location: tuple[int, int]) -> torch.Tensor:
|
|
195
220
|
region = wsi.read_region_at_spacing(
|
|
@@ -215,15 +240,33 @@ def encode_regions_dense(
|
|
|
215
240
|
tensor, geometry, pad_mode=pad_mode, image_pad_value=image_pad_value
|
|
216
241
|
)
|
|
217
242
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
243
|
+
def _stream() -> Iterator[np.ndarray]:
|
|
244
|
+
with torch.inference_mode(), slide_encode_autocast_ctx(device, precision):
|
|
245
|
+
for start in range(0, len(coords), step):
|
|
246
|
+
chunk = coords[start : start + step]
|
|
247
|
+
batch = torch.stack([_read_padded(loc) for loc in chunk]).to(
|
|
248
|
+
device, non_blocking=True
|
|
249
|
+
)
|
|
250
|
+
# Every batch goes through the one windowed primitive: window_size=None
|
|
251
|
+
# short-circuits to a single whole-tile forward (byte-identical to the
|
|
252
|
+
# whole-region encode), so there is no separate whole-region branch.
|
|
253
|
+
out = encode_dense_sliding(
|
|
254
|
+
model,
|
|
255
|
+
batch,
|
|
256
|
+
geometry=geometry,
|
|
257
|
+
window_size=window_size,
|
|
258
|
+
overlap=overlap,
|
|
259
|
+
encode_fn=encode_fn,
|
|
227
260
|
)
|
|
228
|
-
|
|
229
|
-
|
|
261
|
+
if out.ndim != 4:
|
|
262
|
+
raise ValueError(
|
|
263
|
+
f"{feature_kind} encode returned a {out.ndim}-D tensor; expected (B, d, gh, gw)."
|
|
264
|
+
)
|
|
265
|
+
batch_np = out.detach().float().cpu().numpy()
|
|
266
|
+
for i in range(batch_np.shape[0]):
|
|
267
|
+
# Standalone C-contiguous copy: a per-row view would pin the whole
|
|
268
|
+
# batch alive (the blended sliding output is contiguous, so a view of
|
|
269
|
+
# it would not copy). ``.copy()`` always copies, in C order.
|
|
270
|
+
yield batch_np[i].copy()
|
|
271
|
+
|
|
272
|
+
return _stream()
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Sliding-window dense encoding — ``window_size`` + ``overlap`` as a free knob.
|
|
2
|
+
|
|
3
|
+
The ``whole`` path feeds the full padded tile through the encoder in one forward,
|
|
4
|
+
interpolating the positional embeddings to the larger grid. That is one end of a
|
|
5
|
+
single mechanism; the other end is running the encoder over smaller **windows** and
|
|
6
|
+
stitching the per-window token grids. Three sizes that are usually conflated —
|
|
7
|
+
|
|
8
|
+
* **native size** (e.g. 224) — sets the pos-embed table; not a hard input limit
|
|
9
|
+
(``dynamic_img_size`` lets a ViT process a larger field at the correct mpp);
|
|
10
|
+
* **window size** ``W`` — how big a chunk goes through the ViT in one forward;
|
|
11
|
+
* **input size** — the padded ``encoded_size`` we want dense features for.
|
|
12
|
+
|
|
13
|
+
``whole`` is ``W >= input`` (one window, zero stitching); native sliding is ``W = 224``;
|
|
14
|
+
the useful middle is ``W = 512`` slid over a larger input. So this is **one**
|
|
15
|
+
parametrized path, not a separate mode: :func:`encode_dense_sliding` takes
|
|
16
|
+
``window_size`` (``None`` ⇒ ``whole``) and ``overlap``, and the ``whole`` case falls out
|
|
17
|
+
as the degenerate single window — which we short-circuit to the exact same
|
|
18
|
+
``encode_tiles_dense(batch)`` call, so it stays **byte-identical** to the whole-region
|
|
19
|
+
path (the parity anchor).
|
|
20
|
+
|
|
21
|
+
Stitching happens in **token space** (the grid the decoder/head consume), so the output
|
|
22
|
+
is always ``(B, d, grid_h, grid_w)`` for ``geometry.grid_shape`` regardless of
|
|
23
|
+
``window_size`` — sliding is purely internal to extraction. Windows and strides are kept
|
|
24
|
+
patch-aligned, so each window maps cleanly onto a block of tokens; overlapping windows
|
|
25
|
+
are blended with a separable raised-cosine importance map (the standard frozen-backbone
|
|
26
|
+
dense-inference recipe, cf. MONAI ``sliding_window_inference``) to remove the
|
|
27
|
+
block-boundary seams naive non-overlapping tiling would introduce.
|
|
28
|
+
|
|
29
|
+
Ported from soma's ``soma/dense/sliding.py`` (the window/blend math is encoder
|
|
30
|
+
featurization that belongs in slide2vec); adapted to slide2vec's own
|
|
31
|
+
:class:`~slide2vec.runtime.dense_regions.DenseGridGeometry`.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import math
|
|
37
|
+
from typing import TYPE_CHECKING, Callable
|
|
38
|
+
|
|
39
|
+
import torch
|
|
40
|
+
|
|
41
|
+
if TYPE_CHECKING:
|
|
42
|
+
from slide2vec.runtime.dense_regions import DenseGridGeometry
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"cover_origins",
|
|
46
|
+
"encode_dense_sliding",
|
|
47
|
+
"resolve_window_geometry",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _round_up(value: int, multiple: int) -> int:
|
|
52
|
+
return ((value + multiple - 1) // multiple) * multiple
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _round_to(value: float, multiple: int) -> int:
|
|
56
|
+
return max(multiple, int(round(value / multiple)) * multiple)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def cover_origins(extent: int, size: int, stride: int) -> list[int]:
|
|
60
|
+
"""Start offsets of ``size``-wide windows that fully cover ``[0, extent)``.
|
|
61
|
+
|
|
62
|
+
Walks ``[0, extent - size]`` in ``stride`` steps and, if the last step leaves a gap,
|
|
63
|
+
appends one final start flush to the far edge (``extent - size``) so coverage is
|
|
64
|
+
complete with no partial tail. ``extent``/``size``/``stride`` are patch multiples,
|
|
65
|
+
so every start is too — the edge-flush ``extent - size`` is a difference of patch
|
|
66
|
+
multiples.
|
|
67
|
+
"""
|
|
68
|
+
if size >= extent:
|
|
69
|
+
return [0]
|
|
70
|
+
starts = list(range(0, extent - size + 1, stride))
|
|
71
|
+
if starts[-1] + size < extent:
|
|
72
|
+
starts.append(extent - size) # shift the last window flush to the edge
|
|
73
|
+
return starts
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _window_starts(extent: int, win: int, stride: int) -> list[int]:
|
|
77
|
+
"""Patch-aligned encoder-window starts — the token-space use of :func:`cover_origins`."""
|
|
78
|
+
return cover_origins(extent, win, stride)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def resolve_window_geometry(
|
|
82
|
+
geometry: DenseGridGeometry, *, window_size: int | None, overlap: float
|
|
83
|
+
) -> tuple[tuple[int, int], tuple[int, int], list[int], list[int]]:
|
|
84
|
+
"""Resolve per-dim window size, stride, and start offsets (all patch-aligned).
|
|
85
|
+
|
|
86
|
+
``window_size`` is rounded up to the patch multiple and clamped to the encoded
|
|
87
|
+
extent; because ``round_up`` is monotonic, ``window_size >= target_size`` always
|
|
88
|
+
clamps to the full extent ⇒ a single window ⇒ the ``whole`` path. ``stride`` is
|
|
89
|
+
``window * (1 - overlap)`` rounded to the patch multiple and clamped to
|
|
90
|
+
``[patch, window]``.
|
|
91
|
+
"""
|
|
92
|
+
enc_h, enc_w = geometry.encoded_size
|
|
93
|
+
ph, pw = geometry.patch_size
|
|
94
|
+
if window_size is None:
|
|
95
|
+
return (enc_h, enc_w), (enc_h, enc_w), [0], [0]
|
|
96
|
+
|
|
97
|
+
win_h = min(_round_up(int(window_size), ph), enc_h)
|
|
98
|
+
win_w = min(_round_up(int(window_size), pw), enc_w)
|
|
99
|
+
keep = 1.0 - float(overlap)
|
|
100
|
+
stride_h = min(win_h, _round_to(win_h * keep, ph))
|
|
101
|
+
stride_w = min(win_w, _round_to(win_w * keep, pw))
|
|
102
|
+
starts_h = _window_starts(enc_h, win_h, stride_h)
|
|
103
|
+
starts_w = _window_starts(enc_w, win_w, stride_w)
|
|
104
|
+
return (win_h, win_w), (stride_h, stride_w), starts_h, starts_w
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _hann_1d(n: int, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
|
|
108
|
+
"""Strictly-positive raised-cosine weights of length ``n`` (uniform if ``n == 1``).
|
|
109
|
+
|
|
110
|
+
``0.5 - 0.5*cos(2*pi*(i+1)/(n+1))`` is > 0 for every ``i in [0, n)`` (no zeros at
|
|
111
|
+
the edges), so the accumulated weight map never hits zero where a window covers.
|
|
112
|
+
"""
|
|
113
|
+
if n <= 1:
|
|
114
|
+
return torch.ones(n, device=device, dtype=dtype)
|
|
115
|
+
i = torch.arange(1, n + 1, device=device, dtype=dtype)
|
|
116
|
+
return 0.5 - 0.5 * torch.cos(2.0 * math.pi * i / (n + 1))
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def encode_dense_sliding(
|
|
120
|
+
encoder,
|
|
121
|
+
batch: torch.Tensor,
|
|
122
|
+
*,
|
|
123
|
+
geometry: DenseGridGeometry,
|
|
124
|
+
window_size: int | None,
|
|
125
|
+
overlap: float = 0.0,
|
|
126
|
+
encode_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
|
|
127
|
+
) -> torch.Tensor:
|
|
128
|
+
"""Encode a padded ``(B, C, enc_h, enc_w)`` batch into ``(B, d, grid_h, grid_w)``.
|
|
129
|
+
|
|
130
|
+
``window_size is None`` (or any window that covers the whole encoded input) is the
|
|
131
|
+
degenerate single-window case: it short-circuits to one full-tile forward,
|
|
132
|
+
byte-identical to the whole-region path. Otherwise the encoder runs over
|
|
133
|
+
patch-aligned overlapping windows and the per-window token grids are blended with a
|
|
134
|
+
separable raised-cosine importance map. The stitch math runs in fp32 (sub-grids are
|
|
135
|
+
upcast before accumulation) so blended regions don't accumulate autocast-dtype error.
|
|
136
|
+
|
|
137
|
+
``encode_fn`` is the per-window encode callable ``(B, C, wh, ww) -> (B, d, th, tw)``;
|
|
138
|
+
it defaults to ``encoder.encode_tiles_dense`` (the patch-feature grid). The attention
|
|
139
|
+
path passes ``encoder.encode_tiles_attention`` (partial-applied with its
|
|
140
|
+
block/register knobs) so a CLS-attention grid stitches through the identical
|
|
141
|
+
raised-cosine blending — the output is just ``(B, K, grid)`` instead of ``(B, d, grid)``.
|
|
142
|
+
"""
|
|
143
|
+
if encode_fn is None:
|
|
144
|
+
encode_fn = encoder.encode_tiles_dense
|
|
145
|
+
(win_h, win_w), _, starts_h, starts_w = resolve_window_geometry(
|
|
146
|
+
geometry, window_size=window_size, overlap=overlap
|
|
147
|
+
)
|
|
148
|
+
if len(starts_h) == 1 and len(starts_w) == 1:
|
|
149
|
+
# Single window == the whole encoded tile: identical forward to the whole-region path.
|
|
150
|
+
return encode_fn(batch)
|
|
151
|
+
|
|
152
|
+
ph, pw = geometry.patch_size
|
|
153
|
+
grid_h, grid_w = geometry.grid_shape
|
|
154
|
+
wth, wtw = win_h // ph, win_w // pw
|
|
155
|
+
# Raised-cosine weights where windows overlap; uniform along any dim that is not
|
|
156
|
+
# actually tiled (a single window there) — avoids needless edge attenuation.
|
|
157
|
+
fdtype = torch.float32
|
|
158
|
+
wh = (
|
|
159
|
+
_hann_1d(wth, batch.device, fdtype)
|
|
160
|
+
if len(starts_h) > 1
|
|
161
|
+
else torch.ones(wth, device=batch.device, dtype=fdtype)
|
|
162
|
+
)
|
|
163
|
+
ww = (
|
|
164
|
+
_hann_1d(wtw, batch.device, fdtype)
|
|
165
|
+
if len(starts_w) > 1
|
|
166
|
+
else torch.ones(wtw, device=batch.device, dtype=fdtype)
|
|
167
|
+
)
|
|
168
|
+
weight = torch.outer(wh, ww) # (wth, wtw)
|
|
169
|
+
|
|
170
|
+
acc: torch.Tensor | None = None
|
|
171
|
+
wsum = torch.zeros(1, 1, grid_h, grid_w, device=batch.device, dtype=fdtype)
|
|
172
|
+
for sh in starts_h:
|
|
173
|
+
th = sh // ph
|
|
174
|
+
for sw in starts_w:
|
|
175
|
+
tw = sw // pw
|
|
176
|
+
window = batch[:, :, sh : sh + win_h, sw : sw + win_w]
|
|
177
|
+
sub = encode_fn(window).to(fdtype) # (B, d, wth, wtw)
|
|
178
|
+
if acc is None:
|
|
179
|
+
acc = torch.zeros(
|
|
180
|
+
sub.shape[0], sub.shape[1], grid_h, grid_w, device=batch.device, dtype=fdtype
|
|
181
|
+
)
|
|
182
|
+
acc[:, :, th : th + wth, tw : tw + wtw] += sub * weight
|
|
183
|
+
wsum[:, :, th : th + wth, tw : tw + wtw] += weight
|
|
184
|
+
assert acc is not None # at least one window always runs
|
|
185
|
+
return acc / wsum
|
|
@@ -54,6 +54,7 @@ slide2vec/runtime/artifacts_collect.py
|
|
|
54
54
|
slide2vec/runtime/batching.py
|
|
55
55
|
slide2vec/runtime/cpu_budget.py
|
|
56
56
|
slide2vec/runtime/dense_regions.py
|
|
57
|
+
slide2vec/runtime/dense_sliding.py
|
|
57
58
|
slide2vec/runtime/distributed.py
|
|
58
59
|
slide2vec/runtime/distributed_stage.py
|
|
59
60
|
slide2vec/runtime/embedding.py
|
|
@@ -83,8 +84,8 @@ slide2vec/utils/utils.py
|
|
|
83
84
|
tests/test_architecture_runtime_split.py
|
|
84
85
|
tests/test_attention_extraction.py
|
|
85
86
|
tests/test_dense_extraction.py
|
|
86
|
-
tests/test_dense_locality_gated.py
|
|
87
87
|
tests/test_dense_regions.py
|
|
88
|
+
tests/test_dense_sliding.py
|
|
88
89
|
tests/test_encoder_registry.py
|
|
89
90
|
tests/test_hs2p_package_cutover.py
|
|
90
91
|
tests/test_output_consistency.py
|