slide2vec 3.0.1__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {slide2vec-3.0.1/slide2vec.egg-info → slide2vec-3.1.0}/PKG-INFO +5 -2
  2. {slide2vec-3.0.1 → slide2vec-3.1.0}/pyproject.toml +1 -1
  3. {slide2vec-3.0.1 → slide2vec-3.1.0}/setup.cfg +5 -2
  4. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/__init__.py +1 -1
  5. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/api.py +91 -21
  6. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/cli.py +3 -0
  7. slide2vec-3.1.0/slide2vec/configs/models/conch.yaml +16 -0
  8. slide2vec-3.1.0/slide2vec/configs/models/conchv15.yaml +16 -0
  9. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/configs/models/default.yaml +6 -5
  10. slide2vec-3.1.0/slide2vec/configs/models/h-optimus-0.yaml +16 -0
  11. slide2vec-3.1.0/slide2vec/configs/models/h-optimus-1.yaml +16 -0
  12. slide2vec-3.1.0/slide2vec/configs/models/h0-mini.yaml +16 -0
  13. slide2vec-3.1.0/slide2vec/configs/models/hibou.yaml +17 -0
  14. slide2vec-3.1.0/slide2vec/configs/models/kaiko-midnight.yaml +16 -0
  15. slide2vec-3.1.0/slide2vec/configs/models/kaiko.yaml +17 -0
  16. slide2vec-3.1.0/slide2vec/configs/models/musk.yaml +16 -0
  17. slide2vec-3.1.0/slide2vec/configs/models/panda-vit-s.yaml +17 -0
  18. slide2vec-3.1.0/slide2vec/configs/models/pathojepa.yaml +36 -0
  19. slide2vec-3.1.0/slide2vec/configs/models/phikonv2.yaml +16 -0
  20. slide2vec-3.1.0/slide2vec/configs/models/prism.yaml +18 -0
  21. slide2vec-3.1.0/slide2vec/configs/models/prov-gigapath-slide.yaml +17 -0
  22. slide2vec-3.1.0/slide2vec/configs/models/prov-gigapath-tile.yaml +16 -0
  23. slide2vec-3.1.0/slide2vec/configs/models/titan.yaml +17 -0
  24. slide2vec-3.1.0/slide2vec/configs/models/uni.yaml +16 -0
  25. slide2vec-3.1.0/slide2vec/configs/models/uni2.yaml +16 -0
  26. slide2vec-3.1.0/slide2vec/configs/models/virchow.yaml +16 -0
  27. slide2vec-3.1.0/slide2vec/configs/models/virchow2.yaml +16 -0
  28. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/configs/preprocessing/default.yaml +12 -5
  29. slide2vec-3.1.0/slide2vec/data/__init__.py +3 -0
  30. slide2vec-3.1.0/slide2vec/data/cucim_tile_reader.py +303 -0
  31. slide2vec-3.1.0/slide2vec/data/dataset.py +49 -0
  32. slide2vec-3.1.0/slide2vec/data/tile_store.py +55 -0
  33. slide2vec-3.1.0/slide2vec/data/wsd_tile_reader.py +173 -0
  34. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/distributed/direct_embed_worker.py +9 -1
  35. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/distributed/pipeline_worker.py +9 -1
  36. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/inference.py +647 -78
  37. slide2vec-3.1.0/slide2vec/model_settings.py +196 -0
  38. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/models.py +37 -12
  39. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/progress.py +123 -18
  40. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/utils/config.py +17 -0
  41. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/utils/tiling_io.py +12 -8
  42. {slide2vec-3.0.1 → slide2vec-3.1.0/slide2vec.egg-info}/PKG-INFO +5 -2
  43. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec.egg-info/SOURCES.txt +12 -1
  44. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec.egg-info/requires.txt +5 -1
  45. slide2vec-3.1.0/tests/test_batch_collator_timing.py +104 -0
  46. slide2vec-3.1.0/tests/test_benchmark_embedding_throughput.py +767 -0
  47. slide2vec-3.1.0/tests/test_benchmark_end_to_end_paths.py +424 -0
  48. slide2vec-3.1.0/tests/test_benchmark_tile_read_strategies.py +151 -0
  49. {slide2vec-3.0.1 → slide2vec-3.1.0}/tests/test_dependency_split.py +1 -5
  50. {slide2vec-3.0.1 → slide2vec-3.1.0}/tests/test_hs2p_package_cutover.py +5 -4
  51. {slide2vec-3.0.1 → slide2vec-3.1.0}/tests/test_output_consistency.py +7 -4
  52. {slide2vec-3.0.1 → slide2vec-3.1.0}/tests/test_progress.py +204 -0
  53. {slide2vec-3.0.1 → slide2vec-3.1.0}/tests/test_regression_core.py +307 -134
  54. {slide2vec-3.0.1 → slide2vec-3.1.0}/tests/test_regression_inference.py +1123 -31
  55. {slide2vec-3.0.1 → slide2vec-3.1.0}/tests/test_regression_models.py +308 -0
  56. slide2vec-3.1.0/tests/test_release.py +67 -0
  57. slide2vec-3.1.0/tests/test_tile_store.py +74 -0
  58. slide2vec-3.0.1/slide2vec/configs/models/conch.yaml +0 -44
  59. slide2vec-3.0.1/slide2vec/configs/models/h-optimus-0.yaml +0 -37
  60. slide2vec-3.0.1/slide2vec/configs/models/h-optimus-1.yaml +0 -16
  61. slide2vec-3.0.1/slide2vec/configs/models/h0-mini.yaml +0 -20
  62. slide2vec-3.0.1/slide2vec/configs/models/hibou.yaml +0 -19
  63. slide2vec-3.0.1/slide2vec/configs/models/kaiko-midnight.yaml +0 -18
  64. slide2vec-3.0.1/slide2vec/configs/models/kaiko.yaml +0 -19
  65. slide2vec-3.0.1/slide2vec/configs/models/musk.yaml +0 -18
  66. slide2vec-3.0.1/slide2vec/configs/models/panda-vit-s.yaml +0 -23
  67. slide2vec-3.0.1/slide2vec/configs/models/pathojepa.yaml +0 -40
  68. slide2vec-3.0.1/slide2vec/configs/models/phikonv2.yaml +0 -18
  69. slide2vec-3.0.1/slide2vec/configs/models/prism.yaml +0 -24
  70. slide2vec-3.0.1/slide2vec/configs/models/prov-gigapath-slide.yaml +0 -18
  71. slide2vec-3.0.1/slide2vec/configs/models/prov-gigapath-tile.yaml +0 -16
  72. slide2vec-3.0.1/slide2vec/configs/models/titan.yaml +0 -18
  73. slide2vec-3.0.1/slide2vec/configs/models/uni.yaml +0 -39
  74. slide2vec-3.0.1/slide2vec/configs/models/uni2.yaml +0 -24
  75. slide2vec-3.0.1/slide2vec/configs/models/virchow.yaml +0 -34
  76. slide2vec-3.0.1/slide2vec/configs/models/virchow2.yaml +0 -35
  77. slide2vec-3.0.1/slide2vec/data/__init__.py +0 -2
  78. slide2vec-3.0.1/slide2vec/data/dataset.py +0 -79
  79. {slide2vec-3.0.1 → slide2vec-3.1.0}/LICENSE +0 -0
  80. {slide2vec-3.0.1 → slide2vec-3.1.0}/MANIFEST.in +0 -0
  81. {slide2vec-3.0.1 → slide2vec-3.1.0}/README.md +0 -0
  82. {slide2vec-3.0.1 → slide2vec-3.1.0}/setup.py +0 -0
  83. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/__main__.py +0 -0
  84. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/artifacts.py +0 -0
  85. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/configs/__init__.py +0 -0
  86. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/data/augmentations.py +0 -0
  87. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/distributed/__init__.py +0 -0
  88. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/main.py +0 -0
  89. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/__init__.py +0 -0
  90. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/layers/__init__.py +0 -0
  91. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/layers/attention.py +0 -0
  92. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/layers/block.py +0 -0
  93. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/layers/dino_head.py +0 -0
  94. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/layers/drop_path.py +0 -0
  95. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/layers/layer_scale.py +0 -0
  96. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/layers/mlp.py +0 -0
  97. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/layers/patch_embed.py +0 -0
  98. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/layers/swiglu_ffn.py +0 -0
  99. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/vision_transformer_dino.py +0 -0
  100. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/vision_transformer_dinov2.py +0 -0
  101. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/models/vision_transformer_pathojepa.py +0 -0
  102. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/resources.py +0 -0
  103. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/utils/__init__.py +0 -0
  104. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/utils/coordinates.py +0 -0
  105. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/utils/log_utils.py +0 -0
  106. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/utils/paths.py +0 -0
  107. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec/utils/utils.py +0 -0
  108. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec.egg-info/dependency_links.txt +0 -0
  109. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec.egg-info/entry_points.txt +0 -0
  110. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec.egg-info/not-zip-safe +0 -0
  111. {slide2vec-3.0.1 → slide2vec-3.1.0}/slide2vec.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 3.0.1
3
+ Version: 3.1.0
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Home-page: https://github.com/clemsgrs/slide2vec
6
6
  Author: Clément Grisi
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3.13
20
20
  Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
- Requires-Dist: hs2p<3,>=2.3.0
23
+ Requires-Dist: hs2p<3,>=2.4.1
24
24
  Requires-Dist: omegaconf
25
25
  Requires-Dist: h5py
26
26
  Requires-Dist: matplotlib
@@ -36,6 +36,9 @@ Requires-Dist: wandb
36
36
  Requires-Dist: wholeslidedata<0.0.16
37
37
  Requires-Dist: einops
38
38
  Requires-Dist: timm
39
+ Provides-Extra: cucim
40
+ Requires-Dist: hs2p[cucim]<3,>=2.4.1; extra == "cucim"
41
+ Requires-Dist: PyTurboJPEG; extra == "cucim"
39
42
  Provides-Extra: models
40
43
  Requires-Dist: huggingface-hub; extra == "models"
41
44
  Requires-Dist: sacremoses; extra == "models"
@@ -23,7 +23,7 @@ warn_unused_configs = true
23
23
  no_implicit_reexport = true
24
24
 
25
25
  [tool.bumpver]
26
- current_version = "3.0.1"
26
+ current_version = "3.1.0"
27
27
  version_pattern = "MAJOR.MINOR.PATCH"
28
28
  commit = false # We do version bumping in CI, not as a commit
29
29
  tag = false # Git tag already exists — we don't auto-tag
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = slide2vec
3
- version = 3.0.1
3
+ version = 3.1.0
4
4
  description = Embedding of whole slide images with Foundation Models
5
5
  author = Clément Grisi
6
6
  platforms = unix, linux, osx, cygwin, win32
@@ -16,7 +16,7 @@ classifiers =
16
16
  packages =
17
17
  slide2vec
18
18
  install_requires =
19
- hs2p>=2.3.0,<3
19
+ hs2p>=2.4.1,<3
20
20
  omegaconf
21
21
  h5py
22
22
  matplotlib
@@ -37,6 +37,9 @@ zip_safe = no
37
37
  include_package_data = True
38
38
 
39
39
  [options.extras_require]
40
+ cucim =
41
+ hs2p[cucim]>=2.4.1,<3
42
+ PyTurboJPEG
40
43
  models =
41
44
  huggingface-hub
42
45
  sacremoses
@@ -2,7 +2,7 @@ from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, Prep
2
2
  from slide2vec.artifacts import SlideEmbeddingArtifact, TileEmbeddingArtifact
3
3
 
4
4
 
5
- __version__ = "3.0.1"
5
+ __version__ = "3.1.0"
6
6
 
7
7
  __all__ = [
8
8
  "Model",
@@ -3,6 +3,12 @@ from pathlib import Path
3
3
  from typing import TYPE_CHECKING, Any, Mapping, Protocol, Sequence, overload
4
4
 
5
5
  from slide2vec.artifacts import SlideEmbeddingArtifact, TileEmbeddingArtifact
6
+ from slide2vec.model_settings import (
7
+ canonicalize_model_name,
8
+ get_recommended_model_settings,
9
+ normalize_precision_name,
10
+ validate_model_runtime_compatibility,
11
+ )
6
12
 
7
13
  if TYPE_CHECKING:
8
14
  from hs2p import SlideSpec
@@ -17,12 +23,6 @@ DEFAULT_LEVEL_BY_NAME = {
17
23
  "titan": "slide",
18
24
  }
19
25
 
20
- MODEL_NAME_ALIASES = {
21
- "phikon-v2": "phikonv2",
22
- "hibou-b": "hibou",
23
- "hibou-l": "hibou",
24
- }
25
-
26
26
  PathLike = str | Path
27
27
 
28
28
 
@@ -37,9 +37,19 @@ SlideInput = PathLike | Mapping[str, object] | SlideLike | SlideSpec
37
37
  SlideSequence = Sequence[SlideInput]
38
38
  TilingResultsInput = Sequence[Any] | Mapping[str, Any]
39
39
 
40
+
41
+ def _cfg_num_cucim_workers(cfg: Any) -> int:
42
+ speed = getattr(cfg, "speed", None)
43
+ if speed is not None and hasattr(speed, "num_cucim_workers"):
44
+ return int(getattr(speed, "num_cucim_workers"))
45
+ tiling = getattr(cfg, "tiling", None)
46
+ if tiling is not None and hasattr(tiling, "num_cucim_workers"):
47
+ return int(getattr(tiling, "num_cucim_workers"))
48
+ return 4
49
+
40
50
  @dataclass(frozen=True)
41
51
  class PreprocessingConfig:
42
- backend: str = "asap"
52
+ backend: str = "auto"
43
53
  target_spacing_um: float = 0.5
44
54
  target_tile_size_px: int = 224
45
55
  tolerance: float = 0.05
@@ -47,7 +57,14 @@ class PreprocessingConfig:
47
57
  tissue_threshold: float = 0.01
48
58
  drop_holes: bool = False
49
59
  use_padding: bool = True
60
+ read_coordinates_from: Path | None = None
50
61
  read_tiles_from: Path | None = None
62
+ on_the_fly: bool = True
63
+ gpu_decode: bool = False
64
+ adaptive_batching: bool = False
65
+ use_supertiles: bool = True
66
+ jpeg_backend: str = "turbojpeg"
67
+ num_cucim_workers: int = 4
51
68
  resume: bool = False
52
69
  segmentation: dict[str, Any] = field(default_factory=dict)
53
70
  filtering: dict[str, Any] = field(default_factory=dict)
@@ -56,6 +73,12 @@ class PreprocessingConfig:
56
73
  @classmethod
57
74
  def from_config(cls, cfg: Any) -> "PreprocessingConfig":
58
75
  tiling = cfg.tiling
76
+ default_read_coordinates_from = Path(getattr(cfg, "output_dir", "output")) / "coordinates"
77
+ read_coordinates_from = getattr(tiling, "read_coordinates_from", None)
78
+ read_tiles_from = getattr(tiling, "read_tiles_from", None)
79
+ on_the_fly = bool(getattr(tiling, "on_the_fly", True))
80
+ gpu_decode = bool(getattr(tiling, "gpu_decode", False))
81
+ adaptive_batching = bool(getattr(tiling, "adaptive_batching", False))
59
82
  return cls(
60
83
  backend=tiling.backend,
61
84
  target_spacing_um=float(tiling.params.target_spacing_um),
@@ -65,7 +88,18 @@ class PreprocessingConfig:
65
88
  tissue_threshold=float(tiling.params.tissue_threshold),
66
89
  drop_holes=bool(tiling.params.drop_holes),
67
90
  use_padding=bool(tiling.params.use_padding),
68
- read_tiles_from=Path(tiling.read_tiles_from) if tiling.read_tiles_from else None,
91
+ read_coordinates_from=(
92
+ Path(read_coordinates_from) if read_coordinates_from else default_read_coordinates_from
93
+ ),
94
+ read_tiles_from=(
95
+ Path(read_tiles_from) if read_tiles_from else None
96
+ ),
97
+ on_the_fly=on_the_fly,
98
+ gpu_decode=gpu_decode,
99
+ adaptive_batching=adaptive_batching,
100
+ use_supertiles=bool(getattr(tiling, "use_supertiles", True)),
101
+ jpeg_backend=str(getattr(tiling, "jpeg_backend", "turbojpeg")),
102
+ num_cucim_workers=_cfg_num_cucim_workers(cfg),
69
103
  resume=bool(getattr(cfg, "resume", False)),
70
104
  segmentation=dict(tiling.seg_params),
71
105
  filtering=dict(tiling.filter_params),
@@ -86,21 +120,30 @@ class ExecutionOptions:
86
120
  output_format: str = "pt"
87
121
  batch_size: int = 1
88
122
  num_workers: int = 0
123
+ num_preprocessing_workers: int = 8
89
124
  num_gpus: int | None = None
90
- mixed_precision: bool = False
125
+ precision: str | None = None
126
+ prefetch_factor: int = 4
127
+ persistent_workers: bool = True
128
+ gpu_batch_preprocessing: bool = True
91
129
  save_tile_embeddings: bool = False
92
130
  save_latents: bool = False
93
131
 
94
132
  @classmethod
95
133
  def from_config(cls, cfg: Any, *, run_on_cpu: bool = False) -> "ExecutionOptions":
96
134
  configured_num_gpus = getattr(cfg.speed, "num_gpus", None)
135
+ requested_precision = normalize_precision_name(getattr(cfg.speed, "precision", "fp32"))
97
136
  return cls(
98
137
  output_dir=Path(cfg.output_dir),
99
138
  output_format="pt",
100
139
  batch_size=int(getattr(cfg.model, "batch_size", 1)),
101
- num_workers=int(getattr(cfg.speed, "num_workers_embedding", cfg.speed.num_workers)),
140
+ num_workers=int(getattr(cfg.speed, "num_dataloader_workers", getattr(cfg.speed, "num_workers_embedding", cfg.speed.num_workers))),
141
+ num_preprocessing_workers=int(getattr(cfg.speed, "num_preprocessing_workers", cfg.speed.num_workers)),
102
142
  num_gpus=1 if run_on_cpu else _coerce_num_gpus(configured_num_gpus),
103
- mixed_precision=bool(cfg.speed.fp16 and not run_on_cpu),
143
+ precision="fp32" if run_on_cpu else requested_precision,
144
+ prefetch_factor=int(getattr(cfg.speed, "prefetch_factor_embedding", 4)),
145
+ persistent_workers=bool(getattr(cfg.speed, "persistent_workers_embedding", True)),
146
+ gpu_batch_preprocessing=bool(getattr(cfg.speed, "gpu_batch_preprocessing", True)),
104
147
  save_tile_embeddings=bool(getattr(cfg.model, "save_tile_embeddings", False)),
105
148
  save_latents=bool(getattr(cfg.model, "save_latents", False)),
106
149
  )
@@ -108,8 +151,11 @@ class ExecutionOptions:
108
151
  def __post_init__(self) -> None:
109
152
  resolved_num_gpus = _default_num_gpus() if self.num_gpus is None else self.num_gpus
110
153
  object.__setattr__(self, "num_gpus", resolved_num_gpus)
154
+ object.__setattr__(self, "precision", normalize_precision_name(self.precision))
111
155
  if resolved_num_gpus < 1:
112
156
  raise ValueError("ExecutionOptions.num_gpus must be at least 1")
157
+ if self.prefetch_factor < 1:
158
+ raise ValueError("ExecutionOptions.prefetch_factor must be at least 1")
113
159
 
114
160
  def with_output_dir(self, output_dir: PathLike | None) -> "ExecutionOptions":
115
161
  if output_dir is None:
@@ -150,10 +196,12 @@ class Model:
150
196
  patch_size: int | None = None,
151
197
  token_size: int | None = None,
152
198
  normalize_embeddings: bool | None = None,
199
+ allow_non_recommended_settings: bool = False,
153
200
  ) -> None:
154
201
  self.name = _canonical_model_name(name)
155
202
  self.level = level
156
203
  self._requested_device = device
204
+ self.allow_non_recommended_settings = bool(allow_non_recommended_settings)
157
205
  self._model_kwargs = {
158
206
  "mode": mode,
159
207
  "arch": arch,
@@ -178,6 +226,7 @@ class Model:
178
226
  patch_size: int | None = None,
179
227
  token_size: int | None = None,
180
228
  normalize_embeddings: bool | None = None,
229
+ allow_non_recommended_settings: bool = False,
181
230
  device: str = "auto",
182
231
  ) -> "Model":
183
232
  canonical_name = _canonical_model_name(name)
@@ -193,6 +242,7 @@ class Model:
193
242
  patch_size=patch_size,
194
243
  token_size=token_size,
195
244
  normalize_embeddings=normalize_embeddings,
245
+ allow_non_recommended_settings=allow_non_recommended_settings,
196
246
  )
197
247
 
198
248
  @property
@@ -213,8 +263,10 @@ class Model:
213
263
  ) -> list[TileEmbeddingArtifact]:
214
264
  from slide2vec.inference import embed_tiles
215
265
 
216
- resolved = _coerce_execution_options(execution)
266
+ resolved = _coerce_execution_options(execution, model=self)
217
267
  _require_output_dir_for_persistence(resolved, method_name="Model.embed_tiles(...)")
268
+ if preprocessing is not None:
269
+ validate_model_runtime_compatibility(self, preprocessing, resolved)
218
270
  return embed_tiles(self, slides, tiling_results, execution=resolved, preprocessing=preprocessing)
219
271
 
220
272
  def aggregate_tiles(
@@ -226,7 +278,7 @@ class Model:
226
278
  ) -> list[SlideEmbeddingArtifact]:
227
279
  from slide2vec.inference import aggregate_tiles
228
280
 
229
- resolved = _coerce_execution_options(execution)
281
+ resolved = _coerce_execution_options(execution, model=self)
230
282
  _require_output_dir_for_persistence(resolved, method_name="Model.aggregate_tiles(...)")
231
283
  return aggregate_tiles(self, tile_artifacts, execution=resolved, preprocessing=preprocessing)
232
284
 
@@ -292,7 +344,8 @@ class Model:
292
344
  ) -> list[EmbeddedSlide]:
293
345
  from slide2vec.inference import embed_slides
294
346
 
295
- resolved = _coerce_execution_options(execution)
347
+ resolved = _coerce_execution_options(execution, model=self)
348
+ validate_model_runtime_compatibility(self, preprocessing, resolved)
296
349
  return embed_slides(
297
350
  self,
298
351
  slides,
@@ -303,13 +356,16 @@ class Model:
303
356
  def _load_backend(self) -> "LoadedModel":
304
357
  if self._backend is None:
305
358
  from slide2vec.inference import load_model
359
+ from slide2vec.progress import emit_progress
306
360
 
361
+ emit_progress("model.loading", model_name=self.name)
307
362
  self._backend = load_model(
308
363
  name=self.name,
309
364
  level=self.level,
310
365
  device=self._requested_device,
311
366
  **self._model_kwargs,
312
367
  )
368
+ emit_progress("model.ready", model_name=self.name, device=str(self._backend.device))
313
369
  return self._backend
314
370
 
315
371
 
@@ -323,7 +379,7 @@ class Pipeline:
323
379
  ) -> None:
324
380
  self.model = model
325
381
  self.preprocessing = preprocessing
326
- self.execution = _coerce_execution_options(execution)
382
+ self.execution = _coerce_execution_options(execution, model=model)
327
383
 
328
384
  def run(
329
385
  self,
@@ -334,6 +390,8 @@ class Pipeline:
334
390
  ) -> RunResult:
335
391
  from slide2vec.inference import run_pipeline
336
392
 
393
+ if not tiling_only:
394
+ validate_model_runtime_compatibility(self.model, self.preprocessing, self.execution)
337
395
  return run_pipeline(
338
396
  self.model,
339
397
  slides=slides,
@@ -345,14 +403,19 @@ class Pipeline:
345
403
 
346
404
 
347
405
  def _canonical_model_name(name: str) -> str:
348
- normalized = name.strip().lower()
349
- return MODEL_NAME_ALIASES.get(normalized, normalized)
406
+ return canonicalize_model_name(name)
350
407
 
351
408
 
352
- def _coerce_execution_options(options: ExecutionOptions | None) -> ExecutionOptions:
353
- if options is None:
354
- return ExecutionOptions()
355
- return options
409
+ def _coerce_execution_options(
410
+ options: ExecutionOptions | None,
411
+ *,
412
+ model: Model | None = None,
413
+ ) -> ExecutionOptions:
414
+ resolved = ExecutionOptions() if options is None else options
415
+ if resolved.precision is not None:
416
+ return resolved
417
+ recommended = _recommended_execution_precision(model)
418
+ return replace(resolved, precision=recommended)
356
419
 
357
420
 
358
421
  def _coerce_num_gpus(value: Any) -> int | None:
@@ -374,3 +437,10 @@ def _default_num_gpus() -> int:
374
437
  def _require_output_dir_for_persistence(execution: ExecutionOptions, *, method_name: str) -> None:
375
438
  if execution.output_dir is None:
376
439
  raise ValueError(f"ExecutionOptions.output_dir is required for {method_name}")
440
+
441
+
442
+ def _recommended_execution_precision(model: Model | None) -> str:
443
+ settings = get_recommended_model_settings(getattr(model, "name", None))
444
+ if settings is not None and settings.precision is not None:
445
+ return settings.precision
446
+ return "fp32"
@@ -33,6 +33,9 @@ def build_model_and_pipeline(args):
33
33
  patch_size=cfg.model.patch_size,
34
34
  token_size=cfg.model.token_size,
35
35
  normalize_embeddings=getattr(cfg.model, "normalize_embeddings", None),
36
+ allow_non_recommended_settings=bool(
37
+ getattr(cfg.model, "allow_non_recommended_settings", False)
38
+ ),
36
39
  device="cpu" if args.run_on_cpu else "auto",
37
40
  )
38
41
  preprocessing = PreprocessingConfig.from_config(cfg)
@@ -0,0 +1,16 @@
1
+ csv:
2
+
3
+ output_dir:
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 448
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "conch"
14
+
15
+ speed:
16
+ precision: "fp32"
@@ -0,0 +1,16 @@
1
+ csv:
2
+
3
+ output_dir:
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 448
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "conchv15"
14
+
15
+ speed:
16
+ precision: "fp16"
@@ -8,21 +8,22 @@ seed: 0 # seed for reproducibility
8
8
 
9
9
  model:
10
10
  level: "tile" # level at which to extract the features ("tile", "region" or "slide")
11
- name: # foundation model name ["uni", "uni2", "virchow", "virchow2", "prov-gigapath", "h-optimus-0", "h-optimus-1", "pathojepa", "titan", "prism"] (leave empty when using a custom model)
12
- mode: "cls" # embedding mode ["cls", "full"]
11
+ name: # foundation model name; see docs/models.md for supported request strings (leave empty when using a custom model)
12
+ mode: # embedding mode override ["cls", "full"]; leave empty for the model default
13
13
  arch: # architecture of custom model
14
14
  pretrained_weights: # path to the pretrained weights when using a custom model
15
- batch_size: 256
15
+ batch_size: 32
16
16
  input_size: ${tiling.params.target_tile_size_px}
17
17
  patch_size: 256 # if level is "region", size used to unroll the region into patches
18
18
  token_size: 16 # size of the tokens used model is a custom pretrained ViT
19
19
  save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
20
20
  save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
21
21
  normalize_embeddings: false # L2 normalize tile embeddings (used by some custom checkpoints such as pathojepa)
22
+ allow_non_recommended_settings: false # when true, non-recommended model input size / spacing / precision combinations warn instead of erroring
22
23
 
23
24
  speed:
24
- fp16: false # use mixed precision during model inference
25
- num_workers_embedding: 8 # number of workers for data loading when embedding slides
25
+ precision: fp32 # model inference precision ["fp32", "fp16", "bf16"]
26
+ num_dataloader_workers: 8 # number of DataLoader worker processes for reading tiles during embedding (tar path); on-the-fly path derives this automatically from cpu_count // speed.num_cucim_workers
26
27
  num_gpus: # number of GPUs to use for feature extraction; defaults to all available GPUs
27
28
 
28
29
  wandb:
@@ -0,0 +1,16 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 224
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "h-optimus-0"
14
+
15
+ speed:
16
+ precision: "fp16"
@@ -0,0 +1,16 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 224
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "h-optimus-1"
14
+
15
+ speed:
16
+ precision: "fp16"
@@ -0,0 +1,16 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 224
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "h0-mini"
14
+
15
+ speed:
16
+ precision: "fp16"
@@ -0,0 +1,17 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 224
10
+
11
+ model:
12
+ level: "tile"
13
+ arch: "hibou-b"
14
+ name: "hibou"
15
+
16
+ speed:
17
+ precision: "fp16"
@@ -0,0 +1,16 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 224
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "kaiko-midnight"
14
+
15
+ speed:
16
+ precision: "fp16"
@@ -0,0 +1,17 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 224
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "kaiko"
14
+ arch: "vitl14"
15
+
16
+ speed:
17
+ precision: "fp32"
@@ -0,0 +1,16 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 384
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "musk"
14
+
15
+ speed:
16
+ precision: "fp16"
@@ -0,0 +1,17 @@
1
+ csv: ""
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 224
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "panda-vit-s"
14
+ pretrained_weights: "/path/to/model/weights.pt"
15
+
16
+ speed:
17
+ precision: "fp32"
@@ -0,0 +1,36 @@
1
+ csv: "/data/pathology/projects/clement/leopard/csvs/brazil-slide2vec-august-2025-revision.csv"
2
+
3
+ output_dir: "/data/pathology/projects/clement/discern/pathojepa/slide2vec"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ tolerance: 0.05
10
+ target_tile_size_px: 2048
11
+ tissue_threshold: 0.1
12
+ seg_params:
13
+ downsample: 64
14
+ filter_params:
15
+ ref_tile_size: 256
16
+
17
+ model:
18
+ level: "region"
19
+ name: "pathojepa"
20
+ arch: "vit_small"
21
+ pretrained_weights: "/data/pathology/projects/clement/discern/pathojepa/runs/dmky8lh7/jepa-pathorob-latest.pth.tar"
22
+ input_size: 224
23
+ patch_size: 256
24
+ token_size: 16
25
+ normalize_embeddings: false
26
+ batch_size: 1
27
+
28
+ speed:
29
+ precision: "fp32"
30
+
31
+ wandb:
32
+ enable: true
33
+ project: "leopard"
34
+ username: "clemsg"
35
+ exp_name: "features"
36
+ tags: ["features", "dev", "${model.name}", "${model.level}", "${tiling.params.target_tile_size_px}"]
@@ -0,0 +1,16 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 224
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "phikonv2"
14
+
15
+ speed:
16
+ precision: "fp32"
@@ -0,0 +1,18 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 224
10
+
11
+ model:
12
+ level: "slide"
13
+ name: "prism"
14
+ save_tile_embeddings: true
15
+ save_latents: false
16
+
17
+ speed:
18
+ precision: "fp16"
@@ -0,0 +1,17 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 256
10
+
11
+ model:
12
+ level: "slide"
13
+ name: "prov-gigapath"
14
+ save_tile_embeddings: true
15
+
16
+ speed:
17
+ precision: "fp16"
@@ -0,0 +1,16 @@
1
+ csv:
2
+
3
+ output_dir: "output"
4
+ save_previews: false
5
+
6
+ tiling:
7
+ params:
8
+ target_spacing_um: 0.5
9
+ target_tile_size_px: 256
10
+
11
+ model:
12
+ level: "tile"
13
+ name: "prov-gigapath"
14
+
15
+ speed:
16
+ precision: "fp16"