slide2vec 4.6.4__tar.gz → 4.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. {slide2vec-4.6.4 → slide2vec-4.7.0}/PKG-INFO +1 -1
  2. {slide2vec-4.6.4 → slide2vec-4.7.0}/pyproject.toml +2 -2
  3. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/__init__.py +1 -1
  4. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/inference.py +74 -28
  5. slide2vec-4.7.0/slide2vec/runtime/dense_regions.py +229 -0
  6. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/persist_callbacks.py +60 -13
  7. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec.egg-info/PKG-INFO +1 -1
  8. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec.egg-info/SOURCES.txt +2 -0
  9. slide2vec-4.7.0/tests/test_dense_regions.py +117 -0
  10. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_regression_inference.py +127 -0
  11. {slide2vec-4.6.4 → slide2vec-4.7.0}/LICENSE +0 -0
  12. {slide2vec-4.6.4 → slide2vec-4.7.0}/README.md +0 -0
  13. {slide2vec-4.6.4 → slide2vec-4.7.0}/setup.cfg +0 -0
  14. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/__main__.py +0 -0
  15. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/api.py +0 -0
  16. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/artifacts.py +0 -0
  17. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/cli.py +0 -0
  18. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/configs/__init__.py +0 -0
  19. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/configs/default.yaml +0 -0
  20. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/configs/resources.py +0 -0
  21. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/data/__init__.py +0 -0
  22. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/data/dataset.py +0 -0
  23. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/data/tile_reader.py +0 -0
  24. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/data/tile_store.py +0 -0
  25. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/distributed/__init__.py +0 -0
  26. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
  27. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/distributed/pipeline_worker.py +0 -0
  28. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/__init__.py +0 -0
  29. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/base.py +0 -0
  30. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/__init__.py +0 -0
  31. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/conch.py +0 -0
  32. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/gigapath.py +0 -0
  33. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/hibou.py +0 -0
  34. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/hoptimus.py +0 -0
  35. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/lunit.py +0 -0
  36. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/midnight.py +0 -0
  37. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/__init__.py +0 -0
  38. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/blocks.py +0 -0
  39. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/case.py +0 -0
  40. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/loading.py +0 -0
  41. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/slide.py +0 -0
  42. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/moozy/types.py +0 -0
  43. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/musk.py +0 -0
  44. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/phikon.py +0 -0
  45. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/prism.py +0 -0
  46. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/prost40m.py +0 -0
  47. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/titan.py +0 -0
  48. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/uni.py +0 -0
  49. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/models/virchow.py +0 -0
  50. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/registry.py +0 -0
  51. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/encoders/validation.py +0 -0
  52. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/progress.py +0 -0
  53. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/__init__.py +0 -0
  54. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/artifacts_collect.py +0 -0
  55. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/batching.py +0 -0
  56. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/cpu_budget.py +0 -0
  57. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/distributed.py +0 -0
  58. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/distributed_stage.py +0 -0
  59. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/embedding.py +0 -0
  60. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/embedding_persist.py +0 -0
  61. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/embedding_pipeline.py +0 -0
  62. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/hierarchical.py +0 -0
  63. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/manifest.py +0 -0
  64. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/model_settings.py +0 -0
  65. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/patient_pipeline.py +0 -0
  66. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/persistence.py +0 -0
  67. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/process_list.py +0 -0
  68. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/progress_bridge.py +0 -0
  69. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/registry.py +0 -0
  70. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/serialization.py +0 -0
  71. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/slide_encode.py +0 -0
  72. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/tiling.py +0 -0
  73. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/tiling_pipeline.py +0 -0
  74. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/types.py +0 -0
  75. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/runtime/worker_io.py +0 -0
  76. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/utils/__init__.py +0 -0
  77. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/utils/config.py +0 -0
  78. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/utils/coordinates.py +0 -0
  79. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/utils/log_utils.py +0 -0
  80. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/utils/tiling_io.py +0 -0
  81. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec/utils/utils.py +0 -0
  82. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec.egg-info/dependency_links.txt +0 -0
  83. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec.egg-info/entry_points.txt +0 -0
  84. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec.egg-info/not-zip-safe +0 -0
  85. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec.egg-info/requires.txt +0 -0
  86. {slide2vec-4.6.4 → slide2vec-4.7.0}/slide2vec.egg-info/top_level.txt +0 -0
  87. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_architecture_runtime_split.py +0 -0
  88. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_attention_extraction.py +0 -0
  89. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_dense_extraction.py +0 -0
  90. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_dense_locality_gated.py +0 -0
  91. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_encoder_registry.py +0 -0
  92. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_hs2p_package_cutover.py +0 -0
  93. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_output_consistency.py +0 -0
  94. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_progress.py +0 -0
  95. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_regression_core.py +0 -0
  96. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_regression_models.py +0 -0
  97. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_runtime_batching.py +0 -0
  98. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_tile_store.py +0 -0
  99. {slide2vec-4.6.4 → slide2vec-4.7.0}/tests/test_tiling_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.6.4
3
+ Version: 4.7.0
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "slide2vec"
7
- version = "4.6.4"
7
+ version = "4.7.0"
8
8
  description = "Embedding of whole slide images with Foundation Models"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -164,7 +164,7 @@ no_implicit_reexport = true
164
164
  max-line-length = 160
165
165
 
166
166
  [tool.bumpver]
167
- current_version = "4.6.4"
167
+ current_version = "4.7.0"
168
168
  version_pattern = "MAJOR.MINOR.PATCH"
169
169
  commit = false # We do version bumping in CI, not as a commit
170
170
  tag = false # Git tag already exists — we don't auto-tag
@@ -11,7 +11,7 @@ from slide2vec.api import (
11
11
  from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
12
12
 
13
13
 
14
- __version__ = "4.6.4"
14
+ __version__ = "4.7.0"
15
15
 
16
16
  __all__ = [
17
17
  "Model",
@@ -148,6 +148,52 @@ def load_model(
148
148
  )
149
149
 
150
150
 
151
+ def _reconcile_embedding_process_list(
152
+ *,
153
+ model,
154
+ preprocessing: PreprocessingConfig,
155
+ execution: ExecutionOptions,
156
+ process_list_path,
157
+ embeddable_slides,
158
+ output_dir,
159
+ ):
160
+ """Reconcile the process_list with the embeddings on disk once, at end of run.
161
+
162
+ The incremental persist callback batches its process_list writes for the tile
163
+ path, so the trailing partial batch is only persisted by this final full-CSV
164
+ reconciliation. Every single-GPU embedding entry point must call it after its
165
+ embed loop. Collecting artifacts from disk (rather than the callback's
166
+ in-memory list) also covers resume-skipped slides. Returns the collected
167
+ (tile, hierarchical, slide) artifact lists.
168
+ """
169
+ persist_tile_embeddings = embedding.should_persist_tile_embeddings(model, execution)
170
+ persist_hierarchical_embeddings = hierarchical.is_hierarchical_preprocessing(preprocessing)
171
+ include_slide_embeddings = model.level == "slide"
172
+ include_tile_embeddings = persist_tile_embeddings and not persist_hierarchical_embeddings
173
+ tile_artifacts, hierarchical_artifacts, slide_artifacts = artifacts_collect.collect_pipeline_artifacts(
174
+ embeddable_slides,
175
+ output_dir=output_dir,
176
+ output_format=execution.output_format,
177
+ include_tile_embeddings=include_tile_embeddings,
178
+ include_hierarchical_embeddings=persist_hierarchical_embeddings,
179
+ include_slide_embeddings=include_slide_embeddings,
180
+ )
181
+ if process_list_path is not None and Path(process_list_path).is_file():
182
+ persistence.update_process_list_after_embedding(
183
+ process_list_path,
184
+ successful_slides=embeddable_slides,
185
+ persist_tile_embeddings=persist_tile_embeddings,
186
+ persist_hierarchical_embeddings=persist_hierarchical_embeddings,
187
+ include_slide_embeddings=include_slide_embeddings,
188
+ encoder_name=model.name,
189
+ output_variant=process_list.resolved_process_list_output_variant(model),
190
+ tile_artifacts=tile_artifacts,
191
+ hierarchical_artifacts=hierarchical_artifacts,
192
+ slide_artifacts=slide_artifacts,
193
+ )
194
+ return tile_artifacts, hierarchical_artifacts, slide_artifacts
195
+
196
+
151
197
  def embed_slides(
152
198
  model,
153
199
  slides,
@@ -266,6 +312,18 @@ def embed_slides(
266
312
  hierarchical_artifacts=hierarchical_artifacts,
267
313
  slide_artifacts=slide_artifacts,
268
314
  )
315
+ elif execution.output_dir is not None:
316
+ # Single-GPU: the incremental callback persisted the embeddings but
317
+ # batches its process_list writes, so reconcile the full CSV once at
318
+ # the end (covers the trailing partial batch on a clean run).
319
+ _reconcile_embedding_process_list(
320
+ model=model,
321
+ preprocessing=preprocessing,
322
+ execution=execution,
323
+ process_list_path=process_list_path,
324
+ embeddable_slides=embeddable_slides,
325
+ output_dir=Path(execution.output_dir),
326
+ )
269
327
  emit_progress(
270
328
  "embedding.finished",
271
329
  slide_count=len(embeddable_slides),
@@ -752,7 +810,6 @@ def run_pipeline(
752
810
  persist_tile_embeddings = embedding.should_persist_tile_embeddings(model, execution)
753
811
  persist_hierarchical_embeddings = hierarchical.is_hierarchical_preprocessing(resolved_preprocessing)
754
812
  include_slide_embeddings = model.level == "slide"
755
- include_tile_embeddings = persist_tile_embeddings and not persist_hierarchical_embeddings
756
813
  pending_slides, pending_tiling_results = persist_callbacks.pending_local_embedding_records(
757
814
  embeddable_slides,
758
815
  embeddable_tiling_results,
@@ -790,25 +847,13 @@ def run_pipeline(
790
847
  on_embedded_slide=local_persist_callback,
791
848
  collect_results=False,
792
849
  )
793
- tile_artifacts, hierarchical_artifacts, slide_artifacts = artifacts_collect.collect_pipeline_artifacts(
794
- embeddable_slides,
850
+ tile_artifacts, hierarchical_artifacts, slide_artifacts = _reconcile_embedding_process_list(
851
+ model=model,
852
+ preprocessing=resolved_preprocessing,
853
+ execution=execution,
854
+ process_list_path=process_list_path,
855
+ embeddable_slides=embeddable_slides,
795
856
  output_dir=output_dir,
796
- output_format=execution.output_format,
797
- include_tile_embeddings=include_tile_embeddings,
798
- include_hierarchical_embeddings=persist_hierarchical_embeddings,
799
- include_slide_embeddings=include_slide_embeddings,
800
- )
801
- persistence.update_process_list_after_embedding(
802
- process_list_path,
803
- successful_slides=embeddable_slides,
804
- persist_tile_embeddings=persist_tile_embeddings,
805
- persist_hierarchical_embeddings=persist_hierarchical_embeddings,
806
- include_slide_embeddings=include_slide_embeddings,
807
- encoder_name=model.name,
808
- output_variant=process_list.resolved_process_list_output_variant(model),
809
- tile_artifacts=tile_artifacts,
810
- hierarchical_artifacts=hierarchical_artifacts,
811
- slide_artifacts=slide_artifacts,
812
857
  )
813
858
  emit_progress(
814
859
  "embedding.finished",
@@ -907,7 +952,7 @@ def run_pipeline_with_coordinates(
907
952
  slide_artifacts=slide_artifacts,
908
953
  process_list_path=process_list_path,
909
954
  )
910
- local_persist_callback, tile_or_hier_artifacts, slide_artifacts = persist_callbacks.build_incremental_persist_callback(
955
+ local_persist_callback, _, _ = persist_callbacks.build_incremental_persist_callback(
911
956
  model=model,
912
957
  preprocessing=resolved_preprocessing,
913
958
  execution=execution,
@@ -922,17 +967,18 @@ def run_pipeline_with_coordinates(
922
967
  on_embedded_slide=local_persist_callback,
923
968
  collect_results=False,
924
969
  )
925
- tile_artifacts: list[TileEmbeddingArtifact] = []
926
- hierarchical_artifacts: list[HierarchicalEmbeddingArtifact] = []
927
- for artifact in tile_or_hier_artifacts:
928
- if isinstance(artifact, HierarchicalEmbeddingArtifact):
929
- hierarchical_artifacts.append(artifact)
930
- elif artifact is not None:
931
- tile_artifacts.append(artifact)
970
+ tile_artifacts, hierarchical_artifacts, slide_artifacts = _reconcile_embedding_process_list(
971
+ model=model,
972
+ preprocessing=resolved_preprocessing,
973
+ execution=execution,
974
+ process_list_path=process_list_path,
975
+ embeddable_slides=embeddable_slides,
976
+ output_dir=output_dir,
977
+ )
932
978
  return RunResult(
933
979
  tile_artifacts=tile_artifacts,
934
980
  hierarchical_artifacts=hierarchical_artifacts,
935
- slide_artifacts=list(slide_artifacts),
981
+ slide_artifacts=slide_artifacts,
936
982
  process_list_path=process_list_path,
937
983
  )
938
984
  except Exception as exc:
@@ -0,0 +1,229 @@
1
+ """Dense ``(d, h, w)`` grid extraction over **slide regions at coordinates**.
2
+
3
+ The dense counterpart of the pooled coordinate path (``compute_tile_embeddings_for_slide``
4
+ → ``run_forward_pass`` → ``encode_tiles``): instead of pooling each region to one vector,
5
+ each sampled ROI is read **spacing-aware** from the slide, run through the encoder's
6
+ normalization-only dense transform (``get_dense_transform`` — NOT the pooled transform,
7
+ which crops), padded up to the encoder's patch multiple, and encoded via
8
+ ``encode_tiles_dense`` into a ``(d, grid_h, grid_w)`` token grid.
9
+
10
+ This is the extraction half of soma's slide-manifest segmentation path: slide2vec reads
11
+ regions + encodes (it already owns the region reader and the dense encode); soma sources
12
+ the ROI coordinates (hs2p annotation sampling) and persists/caches the grids. It mirrors
13
+ the pooled split exactly — extraction here, caching in soma.
14
+
15
+ Region reads are spacing-aware via hs2p (:meth:`hs2p.wsi.wsi.WSI.read_region_at_spacing`):
16
+ the finest pyramid level ``<=`` the requested µm/px is read and downscaled to the exact
17
+ ``target_size`` (``area`` for images), so the token grid registers against a mask read at
18
+ the same spacing. The ``wsi`` is injected (any object exposing ``read_region_at_spacing``),
19
+ so the loop is unit-testable offline with a fake reader + a random-weight encoder.
20
+
21
+ Whole-tile only (one padded forward per region). Sliding-window dense extraction over
22
+ coordinates (``window_size`` < input) is a deferred follow-up — large ROIs that exceed the
23
+ encoder's comfortable field are out of scope for the first increment.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ from dataclasses import dataclass
29
+ from typing import Callable, Sequence
30
+
31
+ import numpy as np
32
+ import torch
33
+ import torch.nn.functional as F
34
+ from PIL import Image
35
+
36
+ from slide2vec.runtime.slide_encode import slide_encode_autocast_ctx
37
+
38
+ _PAD_MODES = {"reflect", "constant", "zero", "replicate"}
39
+
40
+
41
+ def _normalize_hw(value: int | tuple[int, int], *, name: str) -> tuple[int, int]:
42
+ if isinstance(value, int):
43
+ if value <= 0:
44
+ raise ValueError(f"{name} must be positive, got {value}")
45
+ return value, value
46
+ try:
47
+ h, w = value
48
+ except (TypeError, ValueError) as exc:
49
+ raise ValueError(f"{name} must be an int or an (h, w) pair, got {value!r}") from exc
50
+ h, w = int(h), int(w)
51
+ if h <= 0 or w <= 0:
52
+ raise ValueError(f"{name} must be positive, got {(h, w)}")
53
+ return h, w
54
+
55
+
56
+ def _round_up(value: int, multiple: int) -> int:
57
+ return ((value + multiple - 1) // multiple) * multiple
58
+
59
+
60
+ @dataclass(frozen=True)
61
+ class DenseGridGeometry:
62
+ """Resolved spatial layout for one dense extraction (slide2vec-owned).
63
+
64
+ ``target_size`` is the supervision tile size (h, w); ``encoded_size`` is that rounded
65
+ up to the patch multiple (pad on bottom/right); ``grid_shape`` is the resulting token
66
+ grid (grid_h, grid_w). Mirrors soma's ``DenseGridGeometry`` — the dense-grid geometry
67
+ is extraction geometry and belongs in the extraction engine; soma reads it back from
68
+ the persisted sidecar.
69
+ """
70
+
71
+ target_size: tuple[int, int]
72
+ patch_size: tuple[int, int]
73
+ encoded_size: tuple[int, int]
74
+ grid_shape: tuple[int, int]
75
+ pad: tuple[int, int] # (pad_bottom, pad_right)
76
+
77
+
78
+ def compute_dense_geometry(
79
+ *, target_size: int | tuple[int, int], patch_size: int | tuple[int, int]
80
+ ) -> DenseGridGeometry:
81
+ """Encoded size, token grid, and bottom/right padding for a ``target_size`` tile."""
82
+ target_h, target_w = _normalize_hw(target_size, name="target_size")
83
+ patch_h, patch_w = _normalize_hw(patch_size, name="patch_size")
84
+ encoded_h = _round_up(target_h, patch_h)
85
+ encoded_w = _round_up(target_w, patch_w)
86
+ return DenseGridGeometry(
87
+ target_size=(target_h, target_w),
88
+ patch_size=(patch_h, patch_w),
89
+ encoded_size=(encoded_h, encoded_w),
90
+ grid_shape=(encoded_h // patch_h, encoded_w // patch_w),
91
+ pad=(encoded_h - target_h, encoded_w - target_w),
92
+ )
93
+
94
+
95
+ def pad_image_to_encoded(
96
+ tensor: torch.Tensor,
97
+ geometry: DenseGridGeometry,
98
+ *,
99
+ pad_mode: str,
100
+ image_pad_value: float | None,
101
+ ) -> torch.Tensor:
102
+ """Pad a ``(C, H, W)`` tile (bottom/right) up to ``geometry.encoded_size``."""
103
+ pad_bottom, pad_right = geometry.pad
104
+ if pad_bottom == 0 and pad_right == 0:
105
+ return tensor
106
+ x = tensor.unsqueeze(0) # F.pad's 2-D modes need a batch dim
107
+ pad = (0, pad_right, 0, pad_bottom) # (left, right, top, bottom)
108
+ if pad_mode in ("constant", "zero"):
109
+ x = F.pad(x, pad, mode="constant", value=float(image_pad_value or 0.0))
110
+ else:
111
+ x = F.pad(x, pad, mode=pad_mode)
112
+ return x.squeeze(0)
113
+
114
+
115
+ def _resolve_encode_fn(
116
+ model,
117
+ *,
118
+ feature_kind: str,
119
+ attention_blocks: tuple[int, ...],
120
+ attention_include_registers: bool,
121
+ ) -> Callable[[torch.Tensor], torch.Tensor]:
122
+ if feature_kind == "patch_features":
123
+ return model.encode_tiles_dense
124
+ if feature_kind == "cls_attention":
125
+ blocks = tuple(int(b) for b in attention_blocks)
126
+ include_registers = bool(attention_include_registers)
127
+
128
+ def encode_fn(window: torch.Tensor) -> torch.Tensor:
129
+ return model.encode_tiles_attention(
130
+ window, blocks=blocks, include_registers=include_registers
131
+ )
132
+
133
+ return encode_fn
134
+ raise ValueError(
135
+ f"unsupported feature_kind {feature_kind!r}; expected 'patch_features' or 'cls_attention'"
136
+ )
137
+
138
+
139
+ def encode_regions_dense(
140
+ *,
141
+ model,
142
+ device: torch.device | str,
143
+ wsi,
144
+ coordinates: Sequence[tuple[int, int]],
145
+ requested_spacing_um: float,
146
+ target_size: int | tuple[int, int],
147
+ tolerance: float = 0.05,
148
+ pad_mode: str = "reflect",
149
+ image_pad_value: float | None = None,
150
+ feature_kind: str = "patch_features",
151
+ attention_blocks: tuple[int, ...] = (-1,),
152
+ attention_include_registers: bool = False,
153
+ batch_size: int = 1,
154
+ precision: str = "fp32",
155
+ dense_transform: Callable | None = None,
156
+ ) -> np.ndarray:
157
+ """Encode slide regions at ``coordinates`` into dense grids; return ``(N, d, gh, gw)``.
158
+
159
+ Injectable core: takes a constructed dense-capable ``model`` (with
160
+ ``encode_tiles_dense`` / ``encode_tiles_attention`` / ``patch_size`` /
161
+ ``get_dense_transform``) and a ``wsi`` exposing
162
+ ``read_region_at_spacing(location, requested_spacing_um, size, *, tolerance,
163
+ interpolation)``, so it runs offline in tests with random weights + a fake reader.
164
+
165
+ Args:
166
+ coordinates: ``(x, y)`` top-left locations in **level-0** pixel space (the hs2p
167
+ tiling convention; passed straight to ``read_region_at_spacing``).
168
+ requested_spacing_um: µm/px to read each region at.
169
+ target_size: supervision tile size (int or ``(h, w)``); the region is read at this
170
+ size at ``requested_spacing_um`` and the token grid registers to it.
171
+
172
+ Returns a ``float32`` array of dense grids in coordinate order. ``feature_kind``
173
+ selects ``encode_tiles_dense`` (patch grid) vs ``encode_tiles_attention`` (CLS-attention
174
+ grid); both produce a ``(C, gh, gw)`` grid and share this path.
175
+ """
176
+ if pad_mode not in _PAD_MODES:
177
+ raise ValueError(f"unsupported pad_mode {pad_mode!r}; expected one of {sorted(_PAD_MODES)}")
178
+ geometry = compute_dense_geometry(target_size=target_size, patch_size=model.patch_size)
179
+ if dense_transform is None:
180
+ dense_transform = model.get_dense_transform()
181
+ encode_fn = _resolve_encode_fn(
182
+ model,
183
+ feature_kind=feature_kind,
184
+ attention_blocks=attention_blocks,
185
+ attention_include_registers=attention_include_registers,
186
+ )
187
+ target_h, target_w = geometry.target_size
188
+
189
+ coords = [(int(x), int(y)) for x, y in coordinates]
190
+ grid_h, grid_w = geometry.grid_shape
191
+ if not coords:
192
+ return np.empty((0, 0, grid_h, grid_w), dtype=np.float32)
193
+
194
+ def _read_padded(location: tuple[int, int]) -> torch.Tensor:
195
+ region = wsi.read_region_at_spacing(
196
+ location,
197
+ float(requested_spacing_um),
198
+ (target_w, target_h), # hs2p size is (width, height)
199
+ tolerance=float(tolerance),
200
+ interpolation="area",
201
+ )
202
+ region = np.ascontiguousarray(np.asarray(region)[..., :3])
203
+ tensor = torch.as_tensor(dense_transform(Image.fromarray(region))).as_subclass(torch.Tensor)
204
+ if tensor.ndim != 3:
205
+ raise ValueError(
206
+ f"dense transform at {location} produced a {tensor.ndim}-D tensor; expected (C, H, W)."
207
+ )
208
+ if tuple(int(s) for s in tensor.shape[-2:]) != (target_h, target_w):
209
+ raise ValueError(
210
+ f"region at {location} is {tuple(int(s) for s in tensor.shape[-2:])} after the dense "
211
+ f"transform, but target_size is {(target_h, target_w)}. The dense transform must be "
212
+ "normalization-only (no resize/crop)."
213
+ )
214
+ return pad_image_to_encoded(
215
+ tensor, geometry, pad_mode=pad_mode, image_pad_value=image_pad_value
216
+ )
217
+
218
+ grids: list[np.ndarray] = []
219
+ with torch.inference_mode(), slide_encode_autocast_ctx(device, precision):
220
+ for start in range(0, len(coords), max(1, int(batch_size))):
221
+ chunk = coords[start : start + max(1, int(batch_size))]
222
+ batch = torch.stack([_read_padded(loc) for loc in chunk]).to(device, non_blocking=True)
223
+ out = encode_fn(batch)
224
+ if out.ndim != 4:
225
+ raise ValueError(
226
+ f"{feature_kind} encode returned a {out.ndim}-D tensor; expected (B, d, gh, gw)."
227
+ )
228
+ grids.append(out.detach().float().cpu().numpy())
229
+ return np.concatenate(grids, axis=0)
@@ -18,6 +18,16 @@ from slide2vec.runtime.persistence import update_process_list_after_embedding
18
18
  from slide2vec.runtime.process_list import resolved_process_list_output_variant
19
19
  from slide2vec.utils.tiling_io import load_embedding_process_df
20
20
 
21
+ # Number of completed tile-level samples to buffer before rewriting the
22
+ # process_list CSV. Each rewrite re-reads and re-writes the *entire* CSV, so
23
+ # doing it once per sample is O(N^2) in I/O when every tile is its own sample
24
+ # (e.g. patch-level benchmarks with hundreds of thousands of tiles). Batching
25
+ # makes it O(N) while only risking the re-embedding of at most this many cheap
26
+ # tile samples after a crash (a clean run reconciles the full CSV at the end).
27
+ # Slide- and hierarchical-level runs (sample == slide: few, expensive samples)
28
+ # keep a flush interval of 1 so every completed slide is checkpointed.
29
+ TILE_EMBEDDING_FLUSH_INTERVAL = 1000
30
+
21
31
 
22
32
  def has_complete_local_embedding_outputs(
23
33
  sample_id: str,
@@ -141,6 +151,45 @@ def build_incremental_persist_callback(
141
151
  persist_hierarchical_embeddings = is_hierarchical_preprocessing(preprocessing)
142
152
  include_slide_embeddings = model.level == "slide"
143
153
 
154
+ # Only the pure tile-level path produces the many-cheap-samples workload that
155
+ # makes per-sample CSV rewrites O(N^2). When the model aggregates to slide
156
+ # level (or runs hierarchically) the sample is a slide/region — few and
157
+ # expensive — so checkpoint every one (interval 1). save_tile_embeddings on a
158
+ # slide-level model still iterates per slide, hence the include_slide check.
159
+ is_tile_level = (
160
+ persist_tile_embeddings
161
+ and not persist_hierarchical_embeddings
162
+ and not include_slide_embeddings
163
+ )
164
+ flush_interval = TILE_EMBEDDING_FLUSH_INTERVAL if is_tile_level else 1
165
+
166
+ # Buffered completions awaiting the next batched process_list rewrite.
167
+ pending_slides: list[SlideSpec] = []
168
+ pending_tile_artifacts: list[TileEmbeddingArtifact] = []
169
+ pending_hierarchical_artifacts: list[HierarchicalEmbeddingArtifact] = []
170
+ pending_slide_artifacts: list[SlideEmbeddingArtifact] = []
171
+
172
+ def _flush_process_list() -> None:
173
+ if not pending_slides:
174
+ return
175
+ if process_list_path is not None and process_list_path.is_file():
176
+ update_process_list_after_embedding(
177
+ process_list_path,
178
+ successful_slides=list(pending_slides),
179
+ persist_tile_embeddings=persist_tile_embeddings,
180
+ persist_hierarchical_embeddings=persist_hierarchical_embeddings,
181
+ include_slide_embeddings=include_slide_embeddings,
182
+ encoder_name=model.name,
183
+ output_variant=resolved_process_list_output_variant(model),
184
+ tile_artifacts=list(pending_tile_artifacts),
185
+ hierarchical_artifacts=list(pending_hierarchical_artifacts),
186
+ slide_artifacts=list(pending_slide_artifacts),
187
+ )
188
+ pending_slides.clear()
189
+ pending_tile_artifacts.clear()
190
+ pending_hierarchical_artifacts.clear()
191
+ pending_slide_artifacts.clear()
192
+
144
193
  def _persist_completed_slide(slide: SlideSpec, tiling_result, embedded_slide: EmbeddedSlide) -> None:
145
194
  tile_artifact, slide_artifact = persist_embedded_slide(
146
195
  model,
@@ -153,18 +202,16 @@ def build_incremental_persist_callback(
153
202
  tile_artifacts.append(tile_artifact)
154
203
  if slide_artifact is not None:
155
204
  slide_artifacts.append(slide_artifact)
156
- if process_list_path is not None and process_list_path.is_file():
157
- update_process_list_after_embedding(
158
- process_list_path,
159
- successful_slides=[slide],
160
- persist_tile_embeddings=persist_tile_embeddings,
161
- persist_hierarchical_embeddings=persist_hierarchical_embeddings,
162
- include_slide_embeddings=include_slide_embeddings,
163
- encoder_name=model.name,
164
- output_variant=resolved_process_list_output_variant(model),
165
- tile_artifacts=[tile_artifact] if isinstance(tile_artifact, TileEmbeddingArtifact) else [],
166
- hierarchical_artifacts=[tile_artifact] if isinstance(tile_artifact, HierarchicalEmbeddingArtifact) else [],
167
- slide_artifacts=[slide_artifact] if slide_artifact is not None else [],
168
- )
205
+ # Buffer this completion; a slide with no successful artifact is still
206
+ # recorded so the batched rewrite can mark its feature_status="error".
207
+ pending_slides.append(slide)
208
+ if isinstance(tile_artifact, TileEmbeddingArtifact):
209
+ pending_tile_artifacts.append(tile_artifact)
210
+ elif isinstance(tile_artifact, HierarchicalEmbeddingArtifact):
211
+ pending_hierarchical_artifacts.append(tile_artifact)
212
+ if slide_artifact is not None:
213
+ pending_slide_artifacts.append(slide_artifact)
214
+ if len(pending_slides) >= flush_interval:
215
+ _flush_process_list()
169
216
 
170
217
  return _persist_completed_slide, tile_artifacts, slide_artifacts
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.6.4
3
+ Version: 4.7.0
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -53,6 +53,7 @@ slide2vec/runtime/__init__.py
53
53
  slide2vec/runtime/artifacts_collect.py
54
54
  slide2vec/runtime/batching.py
55
55
  slide2vec/runtime/cpu_budget.py
56
+ slide2vec/runtime/dense_regions.py
56
57
  slide2vec/runtime/distributed.py
57
58
  slide2vec/runtime/distributed_stage.py
58
59
  slide2vec/runtime/embedding.py
@@ -83,6 +84,7 @@ tests/test_architecture_runtime_split.py
83
84
  tests/test_attention_extraction.py
84
85
  tests/test_dense_extraction.py
85
86
  tests/test_dense_locality_gated.py
87
+ tests/test_dense_regions.py
86
88
  tests/test_encoder_registry.py
87
89
  tests/test_hs2p_package_cutover.py
88
90
  tests/test_output_consistency.py
@@ -0,0 +1,117 @@
1
+ """Tests for dense grid extraction over slide regions: ``encode_regions_dense``.
2
+
3
+ Fully offline (``pretrained=False`` random weights) + an injected fake reader, so no
4
+ weights, no real WSI. Checks (1) grid shapes over a batch of coordinates and (2) that the
5
+ orchestration is a faithful wrapper — its per-region grid is byte-identical to a direct
6
+ ``encode_tiles_dense(transform → pad)`` of the same region.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import numpy as np
12
+ import pytest
13
+
14
+ torch = pytest.importorskip("torch")
15
+ timm = pytest.importorskip("timm")
16
+
17
+ from slide2vec.encoders.base import TimmTileEncoder # noqa: E402
18
+ from slide2vec.runtime.dense_regions import ( # noqa: E402
19
+ compute_dense_geometry,
20
+ encode_regions_dense,
21
+ pad_image_to_encoded,
22
+ )
23
+
24
+
25
+ def _encoder(**kwargs) -> TimmTileEncoder:
26
+ return TimmTileEncoder("vit_tiny_patch16_224", pretrained=False, num_classes=0,
27
+ dynamic_img_size=True, **kwargs)
28
+
29
+
30
+ class _FakeWSI:
31
+ """Returns a deterministic RGB region per location (so reads are reproducible)."""
32
+
33
+ def __init__(self, *, target_h: int, target_w: int):
34
+ self._target_h = target_h
35
+ self._target_w = target_w
36
+ self.calls: list[tuple] = []
37
+
38
+ def read_region_at_spacing(self, location, requested_spacing_um, size, *, tolerance, interpolation):
39
+ self.calls.append((tuple(location), requested_spacing_um, tuple(size), tolerance, interpolation))
40
+ width, height = size
41
+ x, y = location
42
+ rng = np.random.default_rng(abs(hash((int(x), int(y)))) % (2**32))
43
+ return rng.integers(0, 256, size=(height, width, 3), dtype=np.uint8)
44
+
45
+
46
+ def test_encode_regions_dense_shapes_over_coordinates():
47
+ enc = _encoder()
48
+ target_size = 64 # patch 16 -> grid 4x4, no padding
49
+ wsi = _FakeWSI(target_h=target_size, target_w=target_size)
50
+ coords = [(0, 0), (64, 0), (0, 64)]
51
+
52
+ grids = encode_regions_dense(
53
+ model=enc,
54
+ device="cpu",
55
+ wsi=wsi,
56
+ coordinates=coords,
57
+ requested_spacing_um=0.5,
58
+ target_size=target_size,
59
+ batch_size=2,
60
+ )
61
+
62
+ assert grids.shape == (3, enc.encode_dim, 4, 4)
63
+ assert grids.dtype == np.float32
64
+ # Reads went through read_region_at_spacing at (target_w, target_h), area interp, level-0 coords.
65
+ assert [c[0] for c in wsi.calls] == [(0, 0), (64, 0), (0, 64)]
66
+ assert all(c[2] == (target_size, target_size) and c[4] == "area" for c in wsi.calls)
67
+
68
+
69
+ def test_encode_regions_dense_pads_non_multiple_target():
70
+ enc = _encoder()
71
+ target_size = 60 # padded up to 64 -> grid 4x4
72
+ wsi = _FakeWSI(target_h=target_size, target_w=target_size)
73
+ grids = encode_regions_dense(
74
+ model=enc, device="cpu", wsi=wsi, coordinates=[(0, 0)],
75
+ requested_spacing_um=0.5, target_size=target_size,
76
+ )
77
+ assert grids.shape == (1, enc.encode_dim, 4, 4)
78
+
79
+
80
+ def test_encode_regions_dense_matches_direct_encode():
81
+ """The primitive is a faithful wrapper: parity vs a hand-rolled transform+pad+encode."""
82
+ enc = _encoder()
83
+ target_size = 64
84
+ wsi = _FakeWSI(target_h=target_size, target_w=target_size)
85
+ coords = [(0, 0), (128, 256)]
86
+
87
+ grids = encode_regions_dense(
88
+ model=enc, device="cpu", wsi=wsi, coordinates=coords,
89
+ requested_spacing_um=0.5, target_size=target_size,
90
+ )
91
+
92
+ # Re-read the same regions (deterministic) and encode them directly.
93
+ from PIL import Image
94
+
95
+ geometry = compute_dense_geometry(target_size=target_size, patch_size=enc.patch_size)
96
+ transform = enc.get_dense_transform()
97
+ ref_wsi = _FakeWSI(target_h=target_size, target_w=target_size)
98
+ with torch.inference_mode():
99
+ for i, loc in enumerate(coords):
100
+ region = ref_wsi.read_region_at_spacing(
101
+ loc, 0.5, (target_size, target_size), tolerance=0.05, interpolation="area"
102
+ )
103
+ tensor = torch.as_tensor(transform(Image.fromarray(region))).as_subclass(torch.Tensor)
104
+ padded = pad_image_to_encoded(tensor, geometry, pad_mode="reflect", image_pad_value=None)
105
+ ref = enc.encode_tiles_dense(padded.unsqueeze(0)).detach().float().cpu().numpy()[0]
106
+ np.testing.assert_allclose(grids[i], ref, rtol=0, atol=1e-6)
107
+
108
+
109
+ def test_encode_regions_dense_empty_coordinates():
110
+ enc = _encoder()
111
+ wsi = _FakeWSI(target_h=64, target_w=64)
112
+ grids = encode_regions_dense(
113
+ model=enc, device="cpu", wsi=wsi, coordinates=[],
114
+ requested_spacing_um=0.5, target_size=64,
115
+ )
116
+ assert grids.shape == (0, 0, 4, 4)
117
+ assert wsi.calls == []
@@ -1578,6 +1578,68 @@ def test_run_pipeline_local_persists_completed_embeddings_before_later_slide_fai
1578
1578
  assert recorded.loc["slide-b", "aggregation_status"] == "tbp"
1579
1579
 
1580
1580
 
1581
+ def _drive_persist_callback(monkeypatch, tmp_path, *, model_level, num_samples):
1582
+ """Build an incremental persist callback and feed it `num_samples` completions.
1583
+
1584
+ Returns the list of per-write batch sizes recorded by a stubbed
1585
+ ``update_process_list_after_embedding`` (i.e. how the process_list rewrites
1586
+ were grouped).
1587
+ """
1588
+ process_list_path = tmp_path / "process_list.csv"
1589
+ process_list_path.write_text("sample_id\n", encoding="utf-8")
1590
+
1591
+ monkeypatch.setattr(persist_callbacks, "should_persist_tile_embeddings", lambda *a, **k: True)
1592
+ monkeypatch.setattr(persist_callbacks, "is_hierarchical_preprocessing", lambda *a, **k: False)
1593
+ monkeypatch.setattr(persist_callbacks, "resolved_process_list_output_variant", lambda *a, **k: None)
1594
+ monkeypatch.setattr(persist_callbacks, "TILE_EMBEDDING_FLUSH_INTERVAL", 3)
1595
+
1596
+ def fake_persist_embedded_slide(model, embedded_slide, tiling_result, *, preprocessing, execution):
1597
+ artifact = persist_callbacks.TileEmbeddingArtifact(
1598
+ sample_id=embedded_slide.sample_id,
1599
+ path=tmp_path / f"{embedded_slide.sample_id}.pt",
1600
+ metadata_path=tmp_path / f"{embedded_slide.sample_id}.meta.json",
1601
+ format="pt",
1602
+ feature_dim=4,
1603
+ num_tiles=2,
1604
+ )
1605
+ return artifact, None
1606
+
1607
+ monkeypatch.setattr(persist_callbacks, "persist_embedded_slide", fake_persist_embedded_slide)
1608
+
1609
+ batch_sizes: list[int] = []
1610
+ monkeypatch.setattr(
1611
+ persist_callbacks,
1612
+ "update_process_list_after_embedding",
1613
+ lambda *a, **k: batch_sizes.append(len(k["successful_slides"])),
1614
+ )
1615
+
1616
+ model = SimpleNamespace(name="enc", level=model_level)
1617
+ callback, _, _ = persist_callbacks.build_incremental_persist_callback(
1618
+ model=model,
1619
+ preprocessing=SimpleNamespace(),
1620
+ execution=SimpleNamespace(output_dir=tmp_path),
1621
+ process_list_path=process_list_path,
1622
+ )
1623
+ for i in range(num_samples):
1624
+ callback(make_slide(f"s-{i}"), SimpleNamespace(), SimpleNamespace(sample_id=f"s-{i}"))
1625
+ return batch_sizes
1626
+
1627
+
1628
+ def test_incremental_persist_callback_batches_tile_level_process_list_writes(monkeypatch, tmp_path: Path):
1629
+ # Tile-level (many cheap samples): writes are batched at the flush interval,
1630
+ # and the trailing partial batch is left for the caller's final reconciliation
1631
+ # (so 7 completions at interval 3 -> two writes of 3, one sample still buffered).
1632
+ batch_sizes = _drive_persist_callback(monkeypatch, tmp_path, model_level="tile", num_samples=7)
1633
+ assert batch_sizes == [3, 3]
1634
+
1635
+
1636
+ def test_incremental_persist_callback_checkpoints_slide_level_every_sample(monkeypatch, tmp_path: Path):
1637
+ # Slide-level (few expensive samples): every completion is checkpointed
1638
+ # immediately so a crash never loses an expensive slide embedding.
1639
+ batch_sizes = _drive_persist_callback(monkeypatch, tmp_path, model_level="slide", num_samples=4)
1640
+ assert batch_sizes == [1, 1, 1, 1]
1641
+
1642
+
1581
1643
  def test_tile_slides_forwards_spacing_at_level_0_to_hs2p(monkeypatch, tmp_path: Path):
1582
1644
  import slide2vec.inference as inference
1583
1645
 
@@ -2532,6 +2594,71 @@ def test_direct_embed_slides_persists_completed_embeddings_before_later_slide_fa
2532
2594
  assert recorded.loc["slide-b", "feature_status"] == "tbp"
2533
2595
  assert recorded.loc["slide-b", "aggregation_status"] == "tbp"
2534
2596
 
2597
+
2598
+ def test_direct_embed_slides_single_gpu_reconciles_batched_tile_status_on_clean_run(monkeypatch, tmp_path: Path):
2599
+ # Tile-level single-GPU run with fewer slides than TILE_EMBEDDING_FLUSH_INTERVAL:
2600
+ # the incremental callback buffers every completion and never flushes mid-run,
2601
+ # so feature_status is written only by the end-of-run reconciliation. Without
2602
+ # it, a clean run would leave these rows as "tbp" despite the .pt files existing.
2603
+ pytest.importorskip("torch")
2604
+ import slide2vec.inference as inference
2605
+
2606
+ slides = [make_slide("slide-a"), make_slide("slide-b")]
2607
+ tiling_results = [
2608
+ SimpleNamespace(
2609
+ x=np.array([0, 1]),
2610
+ y=np.array([2, 3]),
2611
+ tile_size_lv0=224,
2612
+ coordinates_npz_path=Path("/tmp/slide-a.coordinates.npz"),
2613
+ coordinates_meta_path=Path("/tmp/slide-a.coordinates.meta.json"),
2614
+ ),
2615
+ SimpleNamespace(
2616
+ x=np.array([4, 5]),
2617
+ y=np.array([6, 7]),
2618
+ tile_size_lv0=224,
2619
+ coordinates_npz_path=Path("/tmp/slide-b.coordinates.npz"),
2620
+ coordinates_meta_path=Path("/tmp/slide-b.coordinates.meta.json"),
2621
+ ),
2622
+ ]
2623
+ process_list_path = tmp_path / "process_list.csv"
2624
+ process_list_path.write_text(
2625
+ "sample_id,annotation,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n"
2626
+ "slide-a,tissue,/tmp/slide-a.svs,,,"
2627
+ "success,2,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n"
2628
+ "slide-b,tissue,/tmp/slide-b.svs,,,"
2629
+ "success,2,/tmp/slide-b.coordinates.npz,/tmp/slide-b.coordinates.meta.json,,\n",
2630
+ encoding="utf-8",
2631
+ )
2632
+
2633
+ monkeypatch.setattr(tiling_pipeline, "prepare_tiled_slides",
2634
+ lambda slide_records, preprocessing, output_dir, num_workers: (slides, tiling_results, process_list_path),
2635
+ )
2636
+ # Clean run: every slide embeds successfully.
2637
+ monkeypatch.setattr(embedding_pipeline, "compute_tile_embeddings_for_slide",
2638
+ lambda *args, **kwargs: np.zeros((2, 4), dtype=np.float32),
2639
+ )
2640
+
2641
+ model = SimpleNamespace(
2642
+ name="uni2",
2643
+ level="tile",
2644
+ _requested_device="cpu",
2645
+ _load_backend=lambda: SimpleNamespace(),
2646
+ )
2647
+
2648
+ inference.embed_slides(
2649
+ model,
2650
+ slides,
2651
+ preprocessing=DEFAULT_PREPROCESSING,
2652
+ execution=ExecutionOptions(output_dir=tmp_path, save_tile_embeddings=True, num_gpus=1),
2653
+ )
2654
+
2655
+ assert (tmp_path / "tile_embeddings" / "slide-a.pt").is_file()
2656
+ assert (tmp_path / "tile_embeddings" / "slide-b.pt").is_file()
2657
+ recorded = pd.read_csv(process_list_path).set_index("sample_id")
2658
+ assert recorded.loc["slide-a", "feature_status"] == "success"
2659
+ assert recorded.loc["slide-b", "feature_status"] == "success"
2660
+
2661
+
2535
2662
  def test_slide_level_pipeline_skips_tile_artifacts_when_save_tile_embeddings_is_false(monkeypatch, tmp_path: Path):
2536
2663
  import slide2vec.inference as inference
2537
2664
 
File without changes
File without changes
File without changes
File without changes
File without changes