slide2vec 5.0.0__tar.gz → 5.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. {slide2vec-5.0.0 → slide2vec-5.1.0}/PKG-INFO +1 -1
  2. {slide2vec-5.0.0 → slide2vec-5.1.0}/pyproject.toml +5 -2
  3. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/__init__.py +1 -1
  4. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/hibou.py +9 -2
  5. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/midnight.py +12 -0
  6. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/virchow.py +2 -8
  7. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/dense_regions.py +70 -27
  8. slide2vec-5.1.0/slide2vec/runtime/dense_sliding.py +185 -0
  9. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/PKG-INFO +1 -1
  10. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/SOURCES.txt +2 -1
  11. slide2vec-5.1.0/tests/test_dense_regions.py +221 -0
  12. slide2vec-5.1.0/tests/test_dense_sliding.py +121 -0
  13. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_output_consistency.py +1 -0
  14. slide2vec-5.0.0/tests/test_dense_locality_gated.py +0 -162
  15. slide2vec-5.0.0/tests/test_dense_regions.py +0 -117
  16. {slide2vec-5.0.0 → slide2vec-5.1.0}/LICENSE +0 -0
  17. {slide2vec-5.0.0 → slide2vec-5.1.0}/README.md +0 -0
  18. {slide2vec-5.0.0 → slide2vec-5.1.0}/setup.cfg +0 -0
  19. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/__main__.py +0 -0
  20. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/api.py +0 -0
  21. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/artifacts.py +0 -0
  22. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/cli.py +0 -0
  23. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/configs/__init__.py +0 -0
  24. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/configs/default.yaml +0 -0
  25. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/configs/resources.py +0 -0
  26. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/data/__init__.py +0 -0
  27. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/data/dataset.py +0 -0
  28. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/data/tile_reader.py +0 -0
  29. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/data/tile_store.py +0 -0
  30. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/distributed/__init__.py +0 -0
  31. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
  32. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/distributed/pipeline_worker.py +0 -0
  33. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/__init__.py +0 -0
  34. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/base.py +0 -0
  35. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/__init__.py +0 -0
  36. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/conch.py +0 -0
  37. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/gigapath.py +0 -0
  38. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/hoptimus.py +0 -0
  39. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/lunit.py +0 -0
  40. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/__init__.py +0 -0
  41. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/blocks.py +0 -0
  42. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/case.py +0 -0
  43. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/loading.py +0 -0
  44. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/slide.py +0 -0
  45. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/moozy/types.py +0 -0
  46. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/musk.py +0 -0
  47. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/phikon.py +0 -0
  48. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/prism.py +0 -0
  49. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/prost40m.py +0 -0
  50. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/titan.py +0 -0
  51. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/models/uni.py +0 -0
  52. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/registry.py +0 -0
  53. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/encoders/validation.py +0 -0
  54. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/inference.py +0 -0
  55. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/progress.py +0 -0
  56. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/__init__.py +0 -0
  57. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/artifacts_collect.py +0 -0
  58. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/batching.py +0 -0
  59. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/cpu_budget.py +0 -0
  60. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/distributed.py +0 -0
  61. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/distributed_stage.py +0 -0
  62. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/embedding.py +0 -0
  63. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/embedding_persist.py +0 -0
  64. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/embedding_pipeline.py +0 -0
  65. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/hierarchical.py +0 -0
  66. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/manifest.py +0 -0
  67. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/model_settings.py +0 -0
  68. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/patient_pipeline.py +0 -0
  69. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/persist_callbacks.py +0 -0
  70. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/persistence.py +0 -0
  71. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/process_list.py +0 -0
  72. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/progress_bridge.py +0 -0
  73. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/registry.py +0 -0
  74. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/serialization.py +0 -0
  75. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/slide_encode.py +0 -0
  76. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/tiling.py +0 -0
  77. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/tiling_pipeline.py +0 -0
  78. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/types.py +0 -0
  79. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/runtime/worker_io.py +0 -0
  80. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/__init__.py +0 -0
  81. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/config.py +0 -0
  82. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/coordinates.py +0 -0
  83. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/log_utils.py +0 -0
  84. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/tiling_io.py +0 -0
  85. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec/utils/utils.py +0 -0
  86. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/dependency_links.txt +0 -0
  87. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/entry_points.txt +0 -0
  88. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/not-zip-safe +0 -0
  89. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/requires.txt +0 -0
  90. {slide2vec-5.0.0 → slide2vec-5.1.0}/slide2vec.egg-info/top_level.txt +0 -0
  91. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_architecture_runtime_split.py +0 -0
  92. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_attention_extraction.py +0 -0
  93. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_dense_extraction.py +0 -0
  94. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_encoder_registry.py +0 -0
  95. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_hs2p_package_cutover.py +0 -0
  96. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_progress.py +0 -0
  97. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_regression_core.py +0 -0
  98. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_regression_inference.py +0 -0
  99. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_regression_models.py +0 -0
  100. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_runtime_batching.py +0 -0
  101. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_tile_store.py +0 -0
  102. {slide2vec-5.0.0 → slide2vec-5.1.0}/tests/test_tiling_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 5.0.0
3
+ Version: 5.1.0
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "slide2vec"
7
- version = "5.0.0"
7
+ version = "5.1.0"
8
8
  description = "Embedding of whole slide images with Foundation Models"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -145,6 +145,9 @@ addopts = "--cov=slide2vec"
145
145
  testpaths = [
146
146
  "tests",
147
147
  ]
148
+ markers = [
149
+ "heavy: real-weight foundation-model inference on CPU; minutes per test. Excluded from the PR suite via `-m 'not heavy'`; run on the scheduled/manual heavy workflow (.github/workflows/nightly-heavy.yaml).",
150
+ ]
148
151
 
149
152
  [tool.mypy]
150
153
  mypy_path = "."
@@ -164,7 +167,7 @@ no_implicit_reexport = true
164
167
  max-line-length = 160
165
168
 
166
169
  [tool.bumpver]
167
- current_version = "5.0.0"
170
+ current_version = "5.1.0"
168
171
  version_pattern = "MAJOR.MINOR.PATCH"
169
172
  commit = false # We do version bumping in CI, not as a commit
170
173
  tag = false # Git tag already exists — we don't auto-tag
@@ -11,7 +11,7 @@ from slide2vec.api import (
11
11
  from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
12
12
 
13
13
 
14
- __version__ = "5.0.0"
14
+ __version__ = "5.1.0"
15
15
 
16
16
  __all__ = [
17
17
  "Model",
@@ -54,6 +54,13 @@ class _HibouBase(TileEncoder):
54
54
  v2.Normalize(mean=_HIBOU_MEAN, std=_HIBOU_STD),
55
55
  ])
56
56
 
57
+ @property
58
+ def _num_prefix_tokens(self) -> int:
59
+ # CLS + register tokens. Dinov2-with-registers carries the register tokens
60
+ # between the CLS and patch tokens, so both the dense and attention paths
61
+ # must strip them; deriving the count from config keeps the two in sync.
62
+ return 1 + int(getattr(self._model.config, "num_register_tokens", 0))
63
+
57
64
  def encode_tiles(self, batch: Tensor) -> Tensor:
58
65
  output = self._model(pixel_values=batch)
59
66
  return output.pooler_output
@@ -77,7 +84,7 @@ class _HibouBase(TileEncoder):
77
84
  output.last_hidden_state,
78
85
  grid_h=height // patch,
79
86
  grid_w=width // patch,
80
- num_prefix_tokens=1 + int(getattr(self._model.config, "num_register_tokens", 0)),
87
+ num_prefix_tokens=self._num_prefix_tokens,
81
88
  encoder_name=type(self).__name__,
82
89
  )
83
90
 
@@ -111,7 +118,7 @@ class _HibouBase(TileEncoder):
111
118
  output = self._model(pixel_values=batch, output_attentions=True)
112
119
  return attentions_tuple_to_grids(
113
120
  output.attentions,
114
- num_prefix_tokens=1 + int(getattr(self._model.config, "num_register_tokens", 0)),
121
+ num_prefix_tokens=self._num_prefix_tokens,
115
122
  blocks=blocks,
116
123
  include_registers=include_registers,
117
124
  grid_h=height // patch,
@@ -36,6 +36,18 @@ class Midnight(TileEncoder):
36
36
  self._model = AutoModel.from_pretrained("kaiko-ai/midnight").eval()
37
37
  self._device = preferred_default_device()
38
38
  self._output_variant = resolve_requested_output_variant(output_variant)
39
+ # The pooled, dense, and attention paths all assume a single CLS prefix
40
+ # token (kaiko's reference recipe pools over output[:, 1:]). If a future
41
+ # checkpoint adds register tokens, that assumption silently folds them into
42
+ # the patch mean and mislabels the dense/attention grids — fail loudly here.
43
+ num_register_tokens = int(getattr(self._model.config, "num_register_tokens", 0))
44
+ if num_register_tokens:
45
+ raise ValueError(
46
+ "Midnight encoder assumes a single CLS prefix token, but the loaded "
47
+ f"checkpoint reports num_register_tokens={num_register_tokens}. Update "
48
+ "the pooled/dense/attention paths to strip the register tokens before "
49
+ "using this checkpoint."
50
+ )
39
51
 
40
52
  def get_transform(self) -> Callable:
41
53
  return v2.Compose([
@@ -16,8 +16,6 @@ _VIRCHOW_OUTPUT_DIMS = {
16
16
  class _VirchowBase(TimmTileEncoder):
17
17
  """Base for Virchow models that concat CLS + mean-pooled patch tokens."""
18
18
 
19
- _num_prefix_tokens: int = 1 # Override in subclass if needed
20
-
21
19
  def __init__(self, model_name: str, *, output_variant: str | None = None):
22
20
  self._output_variant = resolve_requested_output_variant(
23
21
  output_variant,
@@ -36,7 +34,7 @@ class _VirchowBase(TimmTileEncoder):
36
34
  cls_token = output[:, 0]
37
35
  if self._output_variant == "cls":
38
36
  return cls_token
39
- patch_tokens = output[:, self._num_prefix_tokens:]
37
+ patch_tokens = output[:, self._model.num_prefix_tokens:]
40
38
  return torch.cat([cls_token, patch_tokens.mean(dim=1)], dim=-1)
41
39
 
42
40
  @property
@@ -57,8 +55,6 @@ class _VirchowBase(TimmTileEncoder):
57
55
  source="paige-ai/Virchow",
58
56
  )
59
57
  class Virchow(_VirchowBase):
60
- _num_prefix_tokens = 1
61
-
62
58
  def __init__(self, *, output_variant: str | None = None):
63
59
  super().__init__("hf-hub:paige-ai/Virchow", output_variant=output_variant)
64
60
 
@@ -71,12 +67,10 @@ class Virchow(_VirchowBase):
71
67
  },
72
68
  default_output_variant="cls_patch_mean",
73
69
  input_size=224,
74
- supported_spacing_um=[0.5, 1.0, 2.0],
70
+ supported_spacing_um=[0.25, 0.5, 1.0, 2.0],
75
71
  precision="fp16",
76
72
  source="paige-ai/Virchow2",
77
73
  )
78
74
  class Virchow2(_VirchowBase):
79
- _num_prefix_tokens = 5 # 1 CLS + 4 register tokens
80
-
81
75
  def __init__(self, *, output_variant: str | None = None):
82
76
  super().__init__("hf-hub:paige-ai/Virchow2", output_variant=output_variant)
@@ -5,7 +5,10 @@ The dense counterpart of the pooled coordinate path (``compute_tile_embeddings_f
5
5
  each sampled ROI is read **spacing-aware** from the slide, run through the encoder's
6
6
  normalization-only dense transform (``get_dense_transform`` — NOT the pooled transform,
7
7
  which crops), padded up to the encoder's patch multiple, and encoded via
8
- ``encode_tiles_dense`` into a ``(d, grid_h, grid_w)`` token grid.
8
+ ``encode_tiles_dense`` into a ``(d, grid_h, grid_w)`` token grid. ``iter_regions_dense``
9
+ **streams** these grids — yielding one per coordinate, in coordinate order, holding at most
10
+ one ``batch_size`` chunk resident — so host memory is bounded by ``batch_size`` rather than
11
+ by a slide's ROI count.
9
12
 
10
13
  This is the extraction half of soma's slide-manifest segmentation path: slide2vec reads
11
14
  regions + encodes (it already owns the region reader and the dense encode); soma sources
@@ -18,21 +21,25 @@ the finest pyramid level ``<=`` the requested µm/px is read and downscaled to t
18
21
  the same spacing. The ``wsi`` is injected (any object exposing ``read_region_at_spacing``),
19
22
  so the loop is unit-testable offline with a fake reader + a random-weight encoder.
20
23
 
21
- Whole-tile only (one padded forward per region). Sliding-window dense extraction over
22
- coordinates (``window_size`` < input) is a deferred follow-up large ROIs that exceed the
23
- encoder's comfortable field are out of scope for the first increment.
24
+ Both dense modes run through one primitive (:func:`~slide2vec.runtime.dense_sliding.encode_dense_sliding`):
25
+ ``window_size=None`` is a single whole-tile forward (byte-identical to the legacy
26
+ whole-region encode), and a ``window_size`` smaller than the encoded tile slides the
27
+ encoder's native field over the padded tile and blends the per-window token grids with a
28
+ separable raised-cosine map — letting a native-field encoder (e.g. 224-px Virchow2/phikon)
29
+ serve a larger ROI without interpolating its position embeddings.
24
30
  """
25
31
 
26
32
  from __future__ import annotations
27
33
 
28
34
  from dataclasses import dataclass
29
- from typing import Callable, Sequence
35
+ from typing import Callable, Iterator, Sequence
30
36
 
31
37
  import numpy as np
32
38
  import torch
33
39
  import torch.nn.functional as F
34
40
  from PIL import Image
35
41
 
42
+ from slide2vec.runtime.dense_sliding import encode_dense_sliding
36
43
  from slide2vec.runtime.slide_encode import slide_encode_autocast_ctx
37
44
 
38
45
  _PAD_MODES = {"reflect", "constant", "zero", "replicate"}
@@ -136,7 +143,7 @@ def _resolve_encode_fn(
136
143
  )
137
144
 
138
145
 
139
- def encode_regions_dense(
146
+ def iter_regions_dense(
140
147
  *,
141
148
  model,
142
149
  device: torch.device | str,
@@ -147,14 +154,21 @@ def encode_regions_dense(
147
154
  tolerance: float = 0.05,
148
155
  pad_mode: str = "reflect",
149
156
  image_pad_value: float | None = None,
157
+ window_size: int | None = None,
158
+ overlap: float = 0.0,
150
159
  feature_kind: str = "patch_features",
151
160
  attention_blocks: tuple[int, ...] = (-1,),
152
161
  attention_include_registers: bool = False,
153
162
  batch_size: int = 1,
154
163
  precision: str = "fp32",
155
164
  dense_transform: Callable | None = None,
156
- ) -> np.ndarray:
157
- """Encode slide regions at ``coordinates`` into dense grids; return ``(N, d, gh, gw)``.
165
+ ) -> Iterator[np.ndarray]:
166
+ """Stream slide regions at ``coordinates`` into dense grids, one per coordinate.
167
+
168
+ Yields one ``(d, grid_h, grid_w)`` ``float32`` grid per coordinate, in coordinate
169
+ order. Regions are read and encoded one ``batch_size`` chunk at a time, so resident
170
+ host memory is bounded by ``batch_size`` rather than by a slide's ROI count (the loop
171
+ holds at most one batch of grids resident — no per-slide accumulation).
158
172
 
159
173
  Injectable core: takes a constructed dense-capable ``model`` (with
160
174
  ``encode_tiles_dense`` / ``encode_tiles_attention`` / ``patch_size`` /
@@ -162,16 +176,30 @@ def encode_regions_dense(
162
176
  ``read_region_at_spacing(location, requested_spacing_um, size, *, tolerance,
163
177
  interpolation)``, so it runs offline in tests with random weights + a fake reader.
164
178
 
179
+ Arguments are validated and geometry is resolved **eagerly** (before any region is
180
+ read): an invalid ``pad_mode`` or ``feature_kind`` raises at the call site, not on the
181
+ first ``next()``. Iteration itself is lazy — reads advance one batch at a time.
182
+
165
183
  Args:
166
184
  coordinates: ``(x, y)`` top-left locations in **level-0** pixel space (the hs2p
167
185
  tiling convention; passed straight to ``read_region_at_spacing``).
168
186
  requested_spacing_um: µm/px to read each region at.
169
187
  target_size: supervision tile size (int or ``(h, w)``); the region is read at this
170
188
  size at ``requested_spacing_um`` and the token grid registers to it.
171
-
172
- Returns a ``float32`` array of dense grids in coordinate order. ``feature_kind``
173
- selects ``encode_tiles_dense`` (patch grid) vs ``encode_tiles_attention`` (CLS-attention
174
- grid); both produce a ``(C, gh, gw)`` grid and share this path.
189
+ window_size: encoder field-of-view chunk fed through the backbone per forward.
190
+ ``None`` (default) is one whole-tile forward, byte-identical to the
191
+ whole-region encode; a value smaller than the encoded tile slides the encoder
192
+ over patch-aligned windows and blends the token grids (raised-cosine map). The
193
+ output grid is always the whole geometry's ``(grid_h, grid_w)`` either way —
194
+ sliding is internal to extraction.
195
+ overlap: fractional window overlap in ``[0, 1)`` for the sliding path (ignored when
196
+ ``window_size is None``); the stride is ``window * (1 - overlap)``.
197
+
198
+ Yields ``float32`` grids in coordinate order; empty ``coordinates`` yields nothing.
199
+ ``feature_kind`` selects ``encode_tiles_dense`` (patch grid) vs
200
+ ``encode_tiles_attention`` (CLS-attention grid); both produce a ``(C, gh, gw)`` grid and
201
+ share this path. Each yielded grid is a standalone contiguous copy, so it does not pin
202
+ the rest of its batch's memory alive.
175
203
  """
176
204
  if pad_mode not in _PAD_MODES:
177
205
  raise ValueError(f"unsupported pad_mode {pad_mode!r}; expected one of {sorted(_PAD_MODES)}")
@@ -185,11 +213,8 @@ def encode_regions_dense(
185
213
  attention_include_registers=attention_include_registers,
186
214
  )
187
215
  target_h, target_w = geometry.target_size
188
-
189
216
  coords = [(int(x), int(y)) for x, y in coordinates]
190
- grid_h, grid_w = geometry.grid_shape
191
- if not coords:
192
- return np.empty((0, 0, grid_h, grid_w), dtype=np.float32)
217
+ step = max(1, int(batch_size))
193
218
 
194
219
  def _read_padded(location: tuple[int, int]) -> torch.Tensor:
195
220
  region = wsi.read_region_at_spacing(
@@ -215,15 +240,33 @@ def encode_regions_dense(
215
240
  tensor, geometry, pad_mode=pad_mode, image_pad_value=image_pad_value
216
241
  )
217
242
 
218
- grids: list[np.ndarray] = []
219
- with torch.inference_mode(), slide_encode_autocast_ctx(device, precision):
220
- for start in range(0, len(coords), max(1, int(batch_size))):
221
- chunk = coords[start : start + max(1, int(batch_size))]
222
- batch = torch.stack([_read_padded(loc) for loc in chunk]).to(device, non_blocking=True)
223
- out = encode_fn(batch)
224
- if out.ndim != 4:
225
- raise ValueError(
226
- f"{feature_kind} encode returned a {out.ndim}-D tensor; expected (B, d, gh, gw)."
243
+ def _stream() -> Iterator[np.ndarray]:
244
+ with torch.inference_mode(), slide_encode_autocast_ctx(device, precision):
245
+ for start in range(0, len(coords), step):
246
+ chunk = coords[start : start + step]
247
+ batch = torch.stack([_read_padded(loc) for loc in chunk]).to(
248
+ device, non_blocking=True
249
+ )
250
+ # Every batch goes through the one windowed primitive: window_size=None
251
+ # short-circuits to a single whole-tile forward (byte-identical to the
252
+ # whole-region encode), so there is no separate whole-region branch.
253
+ out = encode_dense_sliding(
254
+ model,
255
+ batch,
256
+ geometry=geometry,
257
+ window_size=window_size,
258
+ overlap=overlap,
259
+ encode_fn=encode_fn,
227
260
  )
228
- grids.append(out.detach().float().cpu().numpy())
229
- return np.concatenate(grids, axis=0)
261
+ if out.ndim != 4:
262
+ raise ValueError(
263
+ f"{feature_kind} encode returned a {out.ndim}-D tensor; expected (B, d, gh, gw)."
264
+ )
265
+ batch_np = out.detach().float().cpu().numpy()
266
+ for i in range(batch_np.shape[0]):
267
+ # Standalone C-contiguous copy: a per-row view would pin the whole
268
+ # batch alive (the blended sliding output is contiguous, so a view of
269
+ # it would not copy). ``.copy()`` always copies, in C order.
270
+ yield batch_np[i].copy()
271
+
272
+ return _stream()
@@ -0,0 +1,185 @@
1
+ """Sliding-window dense encoding — ``window_size`` + ``overlap`` as a free knob.
2
+
3
+ The ``whole`` path feeds the full padded tile through the encoder in one forward,
4
+ interpolating the positional embeddings to the larger grid. That is one end of a
5
+ single mechanism; the other end is running the encoder over smaller **windows** and
6
+ stitching the per-window token grids. Three sizes that are usually conflated —
7
+
8
+ * **native size** (e.g. 224) — sets the pos-embed table; not a hard input limit
9
+ (``dynamic_img_size`` lets a ViT process a larger field at the correct mpp);
10
+ * **window size** ``W`` — how big a chunk goes through the ViT in one forward;
11
+ * **input size** — the padded ``encoded_size`` we want dense features for.
12
+
13
+ ``whole`` is ``W >= input`` (one window, zero stitching); native sliding is ``W = 224``;
14
+ the useful middle is ``W = 512`` slid over a larger input. So this is **one**
15
+ parametrized path, not a separate mode: :func:`encode_dense_sliding` takes
16
+ ``window_size`` (``None`` ⇒ ``whole``) and ``overlap``, and the ``whole`` case falls out
17
+ as the degenerate single window — which we short-circuit to the exact same
18
+ ``encode_tiles_dense(batch)`` call, so it stays **byte-identical** to the whole-region
19
+ path (the parity anchor).
20
+
21
+ Stitching happens in **token space** (the grid the decoder/head consume), so the output
22
+ is always ``(B, d, grid_h, grid_w)`` for ``geometry.grid_shape`` regardless of
23
+ ``window_size`` — sliding is purely internal to extraction. Windows and strides are kept
24
+ patch-aligned, so each window maps cleanly onto a block of tokens; overlapping windows
25
+ are blended with a separable raised-cosine importance map (the standard frozen-backbone
26
+ dense-inference recipe, cf. MONAI ``sliding_window_inference``) to remove the
27
+ block-boundary seams naive non-overlapping tiling would introduce.
28
+
29
+ Ported from soma's ``soma/dense/sliding.py`` (the window/blend math is encoder
30
+ featurization that belongs in slide2vec); adapted to slide2vec's own
31
+ :class:`~slide2vec.runtime.dense_regions.DenseGridGeometry`.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import math
37
+ from typing import TYPE_CHECKING, Callable
38
+
39
+ import torch
40
+
41
+ if TYPE_CHECKING:
42
+ from slide2vec.runtime.dense_regions import DenseGridGeometry
43
+
44
+ __all__ = [
45
+ "cover_origins",
46
+ "encode_dense_sliding",
47
+ "resolve_window_geometry",
48
+ ]
49
+
50
+
51
+ def _round_up(value: int, multiple: int) -> int:
52
+ return ((value + multiple - 1) // multiple) * multiple
53
+
54
+
55
+ def _round_to(value: float, multiple: int) -> int:
56
+ return max(multiple, int(round(value / multiple)) * multiple)
57
+
58
+
59
+ def cover_origins(extent: int, size: int, stride: int) -> list[int]:
60
+ """Start offsets of ``size``-wide windows that fully cover ``[0, extent)``.
61
+
62
+ Walks ``[0, extent - size]`` in ``stride`` steps and, if the last step leaves a gap,
63
+ appends one final start flush to the far edge (``extent - size``) so coverage is
64
+ complete with no partial tail. ``extent``/``size``/``stride`` are patch multiples,
65
+ so every start is too — the edge-flush ``extent - size`` is a difference of patch
66
+ multiples.
67
+ """
68
+ if size >= extent:
69
+ return [0]
70
+ starts = list(range(0, extent - size + 1, stride))
71
+ if starts[-1] + size < extent:
72
+ starts.append(extent - size) # shift the last window flush to the edge
73
+ return starts
74
+
75
+
76
+ def _window_starts(extent: int, win: int, stride: int) -> list[int]:
77
+ """Patch-aligned encoder-window starts — the token-space use of :func:`cover_origins`."""
78
+ return cover_origins(extent, win, stride)
79
+
80
+
81
+ def resolve_window_geometry(
82
+ geometry: DenseGridGeometry, *, window_size: int | None, overlap: float
83
+ ) -> tuple[tuple[int, int], tuple[int, int], list[int], list[int]]:
84
+ """Resolve per-dim window size, stride, and start offsets (all patch-aligned).
85
+
86
+ ``window_size`` is rounded up to the patch multiple and clamped to the encoded
87
+ extent; because ``round_up`` is monotonic, ``window_size >= target_size`` always
88
+ clamps to the full extent ⇒ a single window ⇒ the ``whole`` path. ``stride`` is
89
+ ``window * (1 - overlap)`` rounded to the patch multiple and clamped to
90
+ ``[patch, window]``.
91
+ """
92
+ enc_h, enc_w = geometry.encoded_size
93
+ ph, pw = geometry.patch_size
94
+ if window_size is None:
95
+ return (enc_h, enc_w), (enc_h, enc_w), [0], [0]
96
+
97
+ win_h = min(_round_up(int(window_size), ph), enc_h)
98
+ win_w = min(_round_up(int(window_size), pw), enc_w)
99
+ keep = 1.0 - float(overlap)
100
+ stride_h = min(win_h, _round_to(win_h * keep, ph))
101
+ stride_w = min(win_w, _round_to(win_w * keep, pw))
102
+ starts_h = _window_starts(enc_h, win_h, stride_h)
103
+ starts_w = _window_starts(enc_w, win_w, stride_w)
104
+ return (win_h, win_w), (stride_h, stride_w), starts_h, starts_w
105
+
106
+
107
+ def _hann_1d(n: int, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
108
+ """Strictly-positive raised-cosine weights of length ``n`` (uniform if ``n == 1``).
109
+
110
+ ``0.5 - 0.5*cos(2*pi*(i+1)/(n+1))`` is > 0 for every ``i in [0, n)`` (no zeros at
111
+ the edges), so the accumulated weight map never hits zero where a window covers.
112
+ """
113
+ if n <= 1:
114
+ return torch.ones(n, device=device, dtype=dtype)
115
+ i = torch.arange(1, n + 1, device=device, dtype=dtype)
116
+ return 0.5 - 0.5 * torch.cos(2.0 * math.pi * i / (n + 1))
117
+
118
+
119
+ def encode_dense_sliding(
120
+ encoder,
121
+ batch: torch.Tensor,
122
+ *,
123
+ geometry: DenseGridGeometry,
124
+ window_size: int | None,
125
+ overlap: float = 0.0,
126
+ encode_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
127
+ ) -> torch.Tensor:
128
+ """Encode a padded ``(B, C, enc_h, enc_w)`` batch into ``(B, d, grid_h, grid_w)``.
129
+
130
+ ``window_size is None`` (or any window that covers the whole encoded input) is the
131
+ degenerate single-window case: it short-circuits to one full-tile forward,
132
+ byte-identical to the whole-region path. Otherwise the encoder runs over
133
+ patch-aligned overlapping windows and the per-window token grids are blended with a
134
+ separable raised-cosine importance map. The stitch math runs in fp32 (sub-grids are
135
+ upcast before accumulation) so blended regions don't accumulate autocast-dtype error.
136
+
137
+ ``encode_fn`` is the per-window encode callable ``(B, C, wh, ww) -> (B, d, th, tw)``;
138
+ it defaults to ``encoder.encode_tiles_dense`` (the patch-feature grid). The attention
139
+ path passes ``encoder.encode_tiles_attention`` (partial-applied with its
140
+ block/register knobs) so a CLS-attention grid stitches through the identical
141
+ raised-cosine blending — the output is just ``(B, K, grid)`` instead of ``(B, d, grid)``.
142
+ """
143
+ if encode_fn is None:
144
+ encode_fn = encoder.encode_tiles_dense
145
+ (win_h, win_w), _, starts_h, starts_w = resolve_window_geometry(
146
+ geometry, window_size=window_size, overlap=overlap
147
+ )
148
+ if len(starts_h) == 1 and len(starts_w) == 1:
149
+ # Single window == the whole encoded tile: identical forward to the whole-region path.
150
+ return encode_fn(batch)
151
+
152
+ ph, pw = geometry.patch_size
153
+ grid_h, grid_w = geometry.grid_shape
154
+ wth, wtw = win_h // ph, win_w // pw
155
+ # Raised-cosine weights where windows overlap; uniform along any dim that is not
156
+ # actually tiled (a single window there) — avoids needless edge attenuation.
157
+ fdtype = torch.float32
158
+ wh = (
159
+ _hann_1d(wth, batch.device, fdtype)
160
+ if len(starts_h) > 1
161
+ else torch.ones(wth, device=batch.device, dtype=fdtype)
162
+ )
163
+ ww = (
164
+ _hann_1d(wtw, batch.device, fdtype)
165
+ if len(starts_w) > 1
166
+ else torch.ones(wtw, device=batch.device, dtype=fdtype)
167
+ )
168
+ weight = torch.outer(wh, ww) # (wth, wtw)
169
+
170
+ acc: torch.Tensor | None = None
171
+ wsum = torch.zeros(1, 1, grid_h, grid_w, device=batch.device, dtype=fdtype)
172
+ for sh in starts_h:
173
+ th = sh // ph
174
+ for sw in starts_w:
175
+ tw = sw // pw
176
+ window = batch[:, :, sh : sh + win_h, sw : sw + win_w]
177
+ sub = encode_fn(window).to(fdtype) # (B, d, wth, wtw)
178
+ if acc is None:
179
+ acc = torch.zeros(
180
+ sub.shape[0], sub.shape[1], grid_h, grid_w, device=batch.device, dtype=fdtype
181
+ )
182
+ acc[:, :, th : th + wth, tw : tw + wtw] += sub * weight
183
+ wsum[:, :, th : th + wth, tw : tw + wtw] += weight
184
+ assert acc is not None # at least one window always runs
185
+ return acc / wsum
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 5.0.0
3
+ Version: 5.1.0
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -54,6 +54,7 @@ slide2vec/runtime/artifacts_collect.py
54
54
  slide2vec/runtime/batching.py
55
55
  slide2vec/runtime/cpu_budget.py
56
56
  slide2vec/runtime/dense_regions.py
57
+ slide2vec/runtime/dense_sliding.py
57
58
  slide2vec/runtime/distributed.py
58
59
  slide2vec/runtime/distributed_stage.py
59
60
  slide2vec/runtime/embedding.py
@@ -83,8 +84,8 @@ slide2vec/utils/utils.py
83
84
  tests/test_architecture_runtime_split.py
84
85
  tests/test_attention_extraction.py
85
86
  tests/test_dense_extraction.py
86
- tests/test_dense_locality_gated.py
87
87
  tests/test_dense_regions.py
88
+ tests/test_dense_sliding.py
88
89
  tests/test_encoder_registry.py
89
90
  tests/test_hs2p_package_cutover.py
90
91
  tests/test_output_consistency.py