slide2vec 3.1.0__tar.gz → 3.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {slide2vec-3.1.0/slide2vec.egg-info → slide2vec-3.2.1}/PKG-INFO +20 -20
  2. {slide2vec-3.1.0 → slide2vec-3.2.1}/README.md +16 -16
  3. {slide2vec-3.1.0 → slide2vec-3.2.1}/pyproject.toml +1 -1
  4. {slide2vec-3.1.0 → slide2vec-3.2.1}/setup.cfg +4 -4
  5. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/__init__.py +1 -1
  6. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/api.py +132 -27
  7. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/cli.py +1 -1
  8. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/preprocessing/default.yaml +1 -1
  9. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/cucim_tile_reader.py +12 -29
  10. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/wsd_tile_reader.py +3 -1
  11. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/distributed/direct_embed_worker.py +1 -1
  12. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/distributed/pipeline_worker.py +1 -1
  13. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/inference.py +56 -11
  14. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/model_settings.py +1 -1
  15. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/progress.py +43 -0
  16. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/log_utils.py +19 -0
  17. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/tiling_io.py +8 -0
  18. {slide2vec-3.1.0 → slide2vec-3.2.1/slide2vec.egg-info}/PKG-INFO +20 -20
  19. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/requires.txt +3 -3
  20. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_hs2p_package_cutover.py +37 -2
  21. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_output_consistency.py +20 -3
  22. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_progress.py +15 -1
  23. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_regression_core.py +56 -6
  24. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_regression_inference.py +77 -10
  25. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_regression_models.py +203 -13
  26. {slide2vec-3.1.0 → slide2vec-3.2.1}/LICENSE +0 -0
  27. {slide2vec-3.1.0 → slide2vec-3.2.1}/MANIFEST.in +0 -0
  28. {slide2vec-3.1.0 → slide2vec-3.2.1}/setup.py +0 -0
  29. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/__main__.py +0 -0
  30. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/artifacts.py +0 -0
  31. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/__init__.py +0 -0
  32. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/conch.yaml +0 -0
  33. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/conchv15.yaml +0 -0
  34. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/default.yaml +0 -0
  35. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/h-optimus-0.yaml +0 -0
  36. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/h-optimus-1.yaml +0 -0
  37. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/h0-mini.yaml +0 -0
  38. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/hibou.yaml +0 -0
  39. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/kaiko-midnight.yaml +0 -0
  40. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/kaiko.yaml +0 -0
  41. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/musk.yaml +0 -0
  42. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/panda-vit-s.yaml +0 -0
  43. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/pathojepa.yaml +0 -0
  44. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/phikonv2.yaml +0 -0
  45. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/prism.yaml +0 -0
  46. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/prov-gigapath-slide.yaml +0 -0
  47. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/prov-gigapath-tile.yaml +0 -0
  48. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/titan.yaml +0 -0
  49. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/uni.yaml +0 -0
  50. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/uni2.yaml +0 -0
  51. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/virchow.yaml +0 -0
  52. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/virchow2.yaml +0 -0
  53. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/__init__.py +0 -0
  54. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/augmentations.py +0 -0
  55. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/dataset.py +0 -0
  56. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/tile_store.py +0 -0
  57. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/distributed/__init__.py +0 -0
  58. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/main.py +0 -0
  59. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/__init__.py +0 -0
  60. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/__init__.py +0 -0
  61. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/attention.py +0 -0
  62. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/block.py +0 -0
  63. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/dino_head.py +0 -0
  64. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/drop_path.py +0 -0
  65. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/layer_scale.py +0 -0
  66. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/mlp.py +0 -0
  67. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/patch_embed.py +0 -0
  68. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/swiglu_ffn.py +0 -0
  69. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/models.py +0 -0
  70. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/vision_transformer_dino.py +0 -0
  71. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/vision_transformer_dinov2.py +0 -0
  72. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/vision_transformer_pathojepa.py +0 -0
  73. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/resources.py +0 -0
  74. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/__init__.py +0 -0
  75. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/config.py +0 -0
  76. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/coordinates.py +0 -0
  77. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/paths.py +0 -0
  78. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/utils.py +0 -0
  79. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/SOURCES.txt +0 -0
  80. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/dependency_links.txt +0 -0
  81. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/entry_points.txt +0 -0
  82. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/not-zip-safe +0 -0
  83. {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/top_level.txt +0 -0
  84. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_batch_collator_timing.py +0 -0
  85. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_benchmark_embedding_throughput.py +0 -0
  86. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_benchmark_end_to_end_paths.py +0 -0
  87. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_benchmark_tile_read_strategies.py +0 -0
  88. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_dependency_split.py +0 -0
  89. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_release.py +0 -0
  90. {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_tile_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 3.1.0
3
+ Version: 3.2.1
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Home-page: https://github.com/clemsgrs/slide2vec
6
6
  Author: Clément Grisi
@@ -20,13 +20,14 @@ Classifier: Programming Language :: Python :: 3.13
20
20
  Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
- Requires-Dist: hs2p<3,>=2.4.1
23
+ Requires-Dist: hs2p<3,>=2.5.1
24
24
  Requires-Dist: omegaconf
25
25
  Requires-Dist: h5py
26
26
  Requires-Dist: matplotlib
27
27
  Requires-Dist: numpy<2
28
28
  Requires-Dist: pandas
29
29
  Requires-Dist: pillow
30
+ Requires-Dist: PyTurboJPEG
30
31
  Requires-Dist: rich
31
32
  Requires-Dist: tqdm
32
33
  Requires-Dist: torch
@@ -37,8 +38,7 @@ Requires-Dist: wholeslidedata<0.0.16
37
38
  Requires-Dist: einops
38
39
  Requires-Dist: timm
39
40
  Provides-Extra: cucim
40
- Requires-Dist: hs2p[cucim]<3,>=2.4.1; extra == "cucim"
41
- Requires-Dist: PyTurboJPEG; extra == "cucim"
41
+ Requires-Dist: hs2p[cucim]<3,>=2.5.1; extra == "cucim"
42
42
  Provides-Extra: models
43
43
  Requires-Dist: huggingface-hub; extra == "models"
44
44
  Requires-Dist: sacremoses; extra == "models"
@@ -80,38 +80,37 @@ pip install "slide2vec[models]"
80
80
  ## Python API
81
81
 
82
82
  ```python
83
- from slide2vec import Model, PreprocessingConfig
83
+ from slide2vec import Model
84
+ from slide2vec.utils.config import hf_login
84
85
 
85
- model = Model.from_pretrained("virchow2", level="tile")
86
- preprocessing = PreprocessingConfig(
87
- target_spacing_um=0.5,
88
- target_tile_size_px=224,
89
- tissue_threshold=0.1,
90
- )
91
- embedded = model.embed_slide(
92
- "/path/to/slide.svs",
93
- preprocessing=preprocessing,
94
- )
86
+ hf_login()
87
+
88
+ model = Model.from_preset("virchow2")
89
+ embedded = model.embed_slide("/path/to/slide.svs")
95
90
 
96
91
  tile_embeddings = embedded.tile_embeddings
97
92
  coordinates = embedded.coordinates
98
93
  ```
99
94
 
100
- By default, `ExecutionOptions()` uses all available GPUs. Set `ExecutionOptions(num_gpus=4)` when you want to cap the sharding explicitly.
101
-
102
95
  Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
103
96
 
104
97
  ```python
105
- from slide2vec import ExecutionOptions, Pipeline
98
+ from slide2vec import ExecutionOptions, Pipeline, PreprocessingConfig
106
99
 
107
100
  pipeline = Pipeline(
108
101
  model=model,
109
- preprocessing=preprocessing,
102
+ preprocessing=PreprocessingConfig(
103
+ target_spacing_um=0.5,
104
+ target_tile_size_px=224,
105
+ tissue_threshold=0.1,
106
+ ),
110
107
  execution=ExecutionOptions(output_dir="outputs/demo"),
111
108
  )
112
109
  result = pipeline.run(manifest_path="/path/to/slides.csv")
113
110
  ```
114
111
 
112
+ By default, `ExecutionOptions()` uses all available GPUs. Set `ExecutionOptions(num_gpus=4)` when you want to cap the sharding explicitly.
113
+
115
114
  ### Input Manifest
116
115
 
117
116
  Manifest-driven runs use the schema below. `mask_path` and `spacing_at_level_0` are optional.
@@ -140,7 +139,7 @@ The package writes explicit artifact directories:
140
139
 
141
140
  ### Supported Models
142
141
 
143
- `slide2vec` currently ships preset configs for 10 tile-level models and 3 slide-level models.
142
+ `slide2vec` currently ships preset configs for 20 tile-level models and 3 slide-level models.
144
143
  For the full catalog and preset names, see [`docs/models.md`](docs/models.md).
145
144
 
146
145
  ## CLI
@@ -174,4 +173,5 @@ docker run --rm -it \
174
173
 
175
174
  - [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
176
175
  - [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
176
+ - [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
177
177
  - [`docs/models.md`](docs/models.md) for the full supported-model catalog
@@ -21,38 +21,37 @@ pip install "slide2vec[models]"
21
21
  ## Python API
22
22
 
23
23
  ```python
24
- from slide2vec import Model, PreprocessingConfig
24
+ from slide2vec import Model
25
+ from slide2vec.utils.config import hf_login
25
26
 
26
- model = Model.from_pretrained("virchow2", level="tile")
27
- preprocessing = PreprocessingConfig(
28
- target_spacing_um=0.5,
29
- target_tile_size_px=224,
30
- tissue_threshold=0.1,
31
- )
32
- embedded = model.embed_slide(
33
- "/path/to/slide.svs",
34
- preprocessing=preprocessing,
35
- )
27
+ hf_login()
28
+
29
+ model = Model.from_preset("virchow2")
30
+ embedded = model.embed_slide("/path/to/slide.svs")
36
31
 
37
32
  tile_embeddings = embedded.tile_embeddings
38
33
  coordinates = embedded.coordinates
39
34
  ```
40
35
 
41
- By default, `ExecutionOptions()` uses all available GPUs. Set `ExecutionOptions(num_gpus=4)` when you want to cap the sharding explicitly.
42
-
43
36
  Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
44
37
 
45
38
  ```python
46
- from slide2vec import ExecutionOptions, Pipeline
39
+ from slide2vec import ExecutionOptions, Pipeline, PreprocessingConfig
47
40
 
48
41
  pipeline = Pipeline(
49
42
  model=model,
50
- preprocessing=preprocessing,
43
+ preprocessing=PreprocessingConfig(
44
+ target_spacing_um=0.5,
45
+ target_tile_size_px=224,
46
+ tissue_threshold=0.1,
47
+ ),
51
48
  execution=ExecutionOptions(output_dir="outputs/demo"),
52
49
  )
53
50
  result = pipeline.run(manifest_path="/path/to/slides.csv")
54
51
  ```
55
52
 
53
+ By default, `ExecutionOptions()` uses all available GPUs. Set `ExecutionOptions(num_gpus=4)` when you want to cap the sharding explicitly.
54
+
56
55
  ### Input Manifest
57
56
 
58
57
  Manifest-driven runs use the schema below. `mask_path` and `spacing_at_level_0` are optional.
@@ -81,7 +80,7 @@ The package writes explicit artifact directories:
81
80
 
82
81
  ### Supported Models
83
82
 
84
- `slide2vec` currently ships preset configs for 10 tile-level models and 3 slide-level models.
83
+ `slide2vec` currently ships preset configs for 20 tile-level models and 3 slide-level models.
85
84
  For the full catalog and preset names, see [`docs/models.md`](docs/models.md).
86
85
 
87
86
  ## CLI
@@ -115,4 +114,5 @@ docker run --rm -it \
115
114
 
116
115
  - [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
117
116
  - [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
117
+ - [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
118
118
  - [`docs/models.md`](docs/models.md) for the full supported-model catalog
@@ -23,7 +23,7 @@ warn_unused_configs = true
23
23
  no_implicit_reexport = true
24
24
 
25
25
  [tool.bumpver]
26
- current_version = "3.1.0"
26
+ current_version = "3.2.1"
27
27
  version_pattern = "MAJOR.MINOR.PATCH"
28
28
  commit = false # We do version bumping in CI, not as a commit
29
29
  tag = false # Git tag already exists — we don't auto-tag
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = slide2vec
3
- version = 3.1.0
3
+ version = 3.2.1
4
4
  description = Embedding of whole slide images with Foundation Models
5
5
  author = Clément Grisi
6
6
  platforms = unix, linux, osx, cygwin, win32
@@ -16,13 +16,14 @@ classifiers =
16
16
  packages =
17
17
  slide2vec
18
18
  install_requires =
19
- hs2p>=2.4.1,<3
19
+ hs2p>=2.5.1,<3
20
20
  omegaconf
21
21
  h5py
22
22
  matplotlib
23
23
  numpy<2
24
24
  pandas
25
25
  pillow
26
+ PyTurboJPEG
26
27
  rich
27
28
  tqdm
28
29
  torch
@@ -38,8 +39,7 @@ include_package_data = True
38
39
 
39
40
  [options.extras_require]
40
41
  cucim =
41
- hs2p[cucim]>=2.4.1,<3
42
- PyTurboJPEG
42
+ hs2p[cucim]>=2.5.1,<3
43
43
  models =
44
44
  huggingface-hub
45
45
  sacremoses
@@ -2,7 +2,7 @@ from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, Prep
2
2
  from slide2vec.artifacts import SlideEmbeddingArtifact, TileEmbeddingArtifact
3
3
 
4
4
 
5
- __version__ = "3.1.0"
5
+ __version__ = "3.2.1"
6
6
 
7
7
  __all__ = [
8
8
  "Model",
@@ -1,4 +1,7 @@
1
+ import logging
2
+ import os
1
3
  from dataclasses import dataclass, field, replace
4
+ from contextlib import contextmanager
2
5
  from pathlib import Path
3
6
  from typing import TYPE_CHECKING, Any, Mapping, Protocol, Sequence, overload
4
7
 
@@ -23,6 +26,8 @@ DEFAULT_LEVEL_BY_NAME = {
23
26
  "titan": "slide",
24
27
  }
25
28
 
29
+ logger = logging.getLogger("slide2vec")
30
+
26
31
  PathLike = str | Path
27
32
 
28
33
 
@@ -137,8 +142,8 @@ class ExecutionOptions:
137
142
  output_dir=Path(cfg.output_dir),
138
143
  output_format="pt",
139
144
  batch_size=int(getattr(cfg.model, "batch_size", 1)),
140
- num_workers=int(getattr(cfg.speed, "num_dataloader_workers", getattr(cfg.speed, "num_workers_embedding", cfg.speed.num_workers))),
141
- num_preprocessing_workers=int(getattr(cfg.speed, "num_preprocessing_workers", cfg.speed.num_workers)),
145
+ num_workers=int(getattr(cfg.speed, "num_dataloader_workers", getattr(cfg.speed, "num_workers_embedding", 8))),
146
+ num_preprocessing_workers=int(getattr(cfg.speed, "num_preprocessing_workers", 8)),
142
147
  num_gpus=1 if run_on_cpu else _coerce_num_gpus(configured_num_gpus),
143
148
  precision="fp32" if run_on_cpu else requested_precision,
144
149
  prefetch_factor=int(getattr(cfg.speed, "prefetch_factor_embedding", 4)),
@@ -156,6 +161,15 @@ class ExecutionOptions:
156
161
  raise ValueError("ExecutionOptions.num_gpus must be at least 1")
157
162
  if self.prefetch_factor < 1:
158
163
  raise ValueError("ExecutionOptions.prefetch_factor must be at least 1")
164
+ slurm_cpu_limit = None
165
+ for env_name in ("SLURM_CPUS_PER_TASK", "SLURM_CPUS_ON_NODE", "SLURM_JOB_CPUS_PER_NODE"):
166
+ value = os.environ.get(env_name)
167
+ if value and value.strip().isdigit() and int(value.strip()) > 0:
168
+ slurm_cpu_limit = int(value.strip())
169
+ break
170
+ if slurm_cpu_limit is not None:
171
+ object.__setattr__(self, "num_workers", min(self.num_workers, slurm_cpu_limit))
172
+ object.__setattr__(self, "num_preprocessing_workers", min(self.num_preprocessing_workers, slurm_cpu_limit))
159
173
 
160
174
  def with_output_dir(self, output_dir: PathLike | None) -> "ExecutionOptions":
161
175
  if output_dir is None:
@@ -179,6 +193,9 @@ class EmbeddedSlide:
179
193
  tile_size_lv0: int
180
194
  image_path: Path
181
195
  mask_path: Path | None = None
196
+ num_tiles: int | None = None
197
+ mask_preview_path: Path | None = None
198
+ tiling_preview_path: Path | None = None
182
199
  latents: Any | None = None
183
200
 
184
201
 
@@ -214,7 +231,7 @@ class Model:
214
231
  self._backend: LoadedModel | None = None
215
232
 
216
233
  @classmethod
217
- def from_pretrained(
234
+ def from_preset(
218
235
  cls,
219
236
  name: str,
220
237
  *,
@@ -230,7 +247,7 @@ class Model:
230
247
  device: str = "auto",
231
248
  ) -> "Model":
232
249
  canonical_name = _canonical_model_name(name)
233
- resolved_level = level or DEFAULT_LEVEL_BY_NAME.get(canonical_name, "tile")
250
+ resolved_level = _resolve_model_level(canonical_name, requested_level=level)
234
251
  return cls(
235
252
  name=canonical_name,
236
253
  level=resolved_level,
@@ -267,7 +284,8 @@ class Model:
267
284
  _require_output_dir_for_persistence(resolved, method_name="Model.embed_tiles(...)")
268
285
  if preprocessing is not None:
269
286
  validate_model_runtime_compatibility(self, preprocessing, resolved)
270
- return embed_tiles(self, slides, tiling_results, execution=resolved, preprocessing=preprocessing)
287
+ with _auto_progress_reporting(output_dir=resolved.output_dir):
288
+ return embed_tiles(self, slides, tiling_results, execution=resolved, preprocessing=preprocessing)
271
289
 
272
290
  def aggregate_tiles(
273
291
  self,
@@ -280,14 +298,15 @@ class Model:
280
298
 
281
299
  resolved = _coerce_execution_options(execution, model=self)
282
300
  _require_output_dir_for_persistence(resolved, method_name="Model.aggregate_tiles(...)")
283
- return aggregate_tiles(self, tile_artifacts, execution=resolved, preprocessing=preprocessing)
301
+ with _auto_progress_reporting(output_dir=resolved.output_dir):
302
+ return aggregate_tiles(self, tile_artifacts, execution=resolved, preprocessing=preprocessing)
284
303
 
285
304
  @overload
286
305
  def embed_slide(
287
306
  self,
288
307
  slide: PathLike,
289
308
  *,
290
- preprocessing: PreprocessingConfig,
309
+ preprocessing: PreprocessingConfig | None = None,
291
310
  execution: ExecutionOptions | None = None,
292
311
  sample_id: str | None = None,
293
312
  mask_path: PathLike | None = None,
@@ -300,7 +319,7 @@ class Model:
300
319
  self,
301
320
  slide: Mapping[str, object] | SlideLike | SlideSpec,
302
321
  *,
303
- preprocessing: PreprocessingConfig,
322
+ preprocessing: PreprocessingConfig | None = None,
304
323
  execution: ExecutionOptions | None = None,
305
324
  sample_id: None = None,
306
325
  mask_path: None = None,
@@ -312,7 +331,7 @@ class Model:
312
331
  self,
313
332
  slide: SlideInput,
314
333
  *,
315
- preprocessing: PreprocessingConfig,
334
+ preprocessing: PreprocessingConfig | None = None,
316
335
  execution: ExecutionOptions | None = None,
317
336
  sample_id: str | None = None,
318
337
  mask_path: PathLike | None = None,
@@ -339,19 +358,21 @@ class Model:
339
358
  self,
340
359
  slides: SlideSequence,
341
360
  *,
342
- preprocessing: PreprocessingConfig,
361
+ preprocessing: PreprocessingConfig | None = None,
343
362
  execution: ExecutionOptions | None = None,
344
363
  ) -> list[EmbeddedSlide]:
345
364
  from slide2vec.inference import embed_slides
346
365
 
347
366
  resolved = _coerce_execution_options(execution, model=self)
348
- validate_model_runtime_compatibility(self, preprocessing, resolved)
349
- return embed_slides(
350
- self,
351
- slides,
352
- preprocessing=preprocessing,
353
- execution=resolved,
354
- )
367
+ resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
368
+ with _auto_progress_reporting(output_dir=resolved.output_dir):
369
+ validate_model_runtime_compatibility(self, resolved_preprocessing, resolved)
370
+ return embed_slides(
371
+ self,
372
+ slides,
373
+ preprocessing=resolved_preprocessing,
374
+ execution=resolved,
375
+ )
355
376
 
356
377
  def _load_backend(self) -> "LoadedModel":
357
378
  if self._backend is None:
@@ -390,22 +411,29 @@ class Pipeline:
390
411
  ) -> RunResult:
391
412
  from slide2vec.inference import run_pipeline
392
413
 
393
- if not tiling_only:
394
- validate_model_runtime_compatibility(self.model, self.preprocessing, self.execution)
395
- return run_pipeline(
396
- self.model,
397
- slides=slides,
398
- manifest_path=manifest_path,
399
- preprocessing=self.preprocessing,
400
- tiling_only=tiling_only,
401
- execution=self.execution,
402
- )
414
+ with _auto_progress_reporting(output_dir=self.execution.output_dir):
415
+ if not tiling_only:
416
+ validate_model_runtime_compatibility(self.model, self.preprocessing, self.execution)
417
+ return run_pipeline(
418
+ self.model,
419
+ slides=slides,
420
+ manifest_path=manifest_path,
421
+ preprocessing=self.preprocessing,
422
+ tiling_only=tiling_only,
423
+ execution=self.execution,
424
+ )
403
425
 
404
426
 
405
427
  def _canonical_model_name(name: str) -> str:
406
428
  return canonicalize_model_name(name)
407
429
 
408
430
 
431
+ def _resolve_model_level(name: str, *, requested_level: str | None) -> str:
432
+ if requested_level is not None:
433
+ return requested_level
434
+ return DEFAULT_LEVEL_BY_NAME.get(name, "tile")
435
+
436
+
409
437
  def _coerce_execution_options(
410
438
  options: ExecutionOptions | None,
411
439
  *,
@@ -444,3 +472,80 @@ def _recommended_execution_precision(model: Model | None) -> str:
444
472
  if settings is not None and settings.precision is not None:
445
473
  return settings.precision
446
474
  return "fp32"
475
+
476
+
477
+ def _resolve_direct_api_preprocessing(
478
+ model: Model,
479
+ preprocessing: PreprocessingConfig | None,
480
+ ) -> PreprocessingConfig:
481
+ if preprocessing is not None:
482
+ return preprocessing
483
+
484
+ settings = get_recommended_model_settings(getattr(model, "name", None))
485
+ target_tile_size_px = _default_target_tile_size_px(model, settings)
486
+ target_spacing_um = _default_target_spacing_um(model, settings)
487
+ return PreprocessingConfig(
488
+ backend="auto",
489
+ target_spacing_um=target_spacing_um,
490
+ target_tile_size_px=target_tile_size_px,
491
+ )
492
+
493
+
494
+ def _default_target_tile_size_px(model: Model, settings) -> int:
495
+ explicit_input_size = getattr(model, "_model_kwargs", {}).get("input_size")
496
+ if explicit_input_size is not None:
497
+ return int(explicit_input_size)
498
+ if settings is not None:
499
+ return int(settings.input_size[0])
500
+ return int(PreprocessingConfig().target_tile_size_px)
501
+
502
+
503
+ def _default_target_spacing_um(model: Model, settings) -> float:
504
+ if settings is None or not getattr(settings, "spacings_um", ()):
505
+ default_spacing = float(PreprocessingConfig().target_spacing_um)
506
+ logger.warning(
507
+ "No recommended preprocessing spacing is known for model '%s'; defaulting direct API calls to "
508
+ "target_spacing_um=%g. Pass PreprocessingConfig(...) to override.",
509
+ getattr(model, "name", None),
510
+ default_spacing,
511
+ )
512
+ return default_spacing
513
+
514
+ supported_spacings = tuple(float(value) for value in settings.spacings_um)
515
+ if len(supported_spacings) == 1:
516
+ return supported_spacings[0]
517
+
518
+ if any(abs(value - 0.5) <= 1e-8 for value in supported_spacings):
519
+ chosen = 0.5
520
+ else:
521
+ chosen = min(supported_spacings)
522
+ supported_text = ", ".join(f"{spacing:g}" for spacing in supported_spacings)
523
+ logger.warning(
524
+ "Model '%s' supports multiple spacings [%s]; defaulting direct API calls to target_spacing_um=%g. "
525
+ "Pass PreprocessingConfig(target_spacing_um=...) to choose another supported spacing.",
526
+ getattr(model, "name", None),
527
+ supported_text,
528
+ chosen,
529
+ )
530
+ return chosen
531
+
532
+
533
+ @contextmanager
534
+ def _auto_progress_reporting(*, output_dir: PathLike | None):
535
+ from slide2vec.progress import (
536
+ NullProgressReporter,
537
+ activate_progress_reporter,
538
+ create_api_progress_reporter,
539
+ get_progress_reporter,
540
+ )
541
+
542
+ active = get_progress_reporter()
543
+ if not isinstance(active, NullProgressReporter):
544
+ yield
545
+ return
546
+ reporter = create_api_progress_reporter(output_dir=output_dir)
547
+ if isinstance(reporter, NullProgressReporter):
548
+ yield
549
+ return
550
+ with activate_progress_reporter(reporter):
551
+ yield
@@ -23,7 +23,7 @@ def get_args_parser(add_help: bool = True):
23
23
  def build_model_and_pipeline(args):
24
24
  cfg, _cfg_path = _setup_cli_config(args)
25
25
  _hf_login()
26
- model = Model.from_pretrained(
26
+ model = Model.from_preset(
27
27
  cfg.model.name,
28
28
  level=cfg.model.level,
29
29
  mode=cfg.model.mode,
@@ -10,7 +10,7 @@ seed: 0 # seed for reproducibility
10
10
 
11
11
  tiling:
12
12
  on_the_fly: true # read tiles directly from WSI during embedding (requires cucim backend)
13
- gpu_decode: false # attempt GPU-accelerated JPEG decoding via nvImageCodec (experimental)
13
+ gpu_decode: false # GPU-accelerated batch decoding via device="cuda" in cucim read_region; set true to opt in when the runtime is configured for it
14
14
  adaptive_batching: false # when true, vary batch size to align with super tile boundaries (avoids redundant reads but batch size fluctuates)
15
15
  use_supertiles: true # group tiles into 8x8/4x4/2x2 super tile reads to reduce WSI read calls (on-the-fly path only)
16
16
  jpeg_backend: "turbojpeg" # JPEG encoder for tar extraction: "turbojpeg" (faster) or "pil" (compatible with older ground truth fixtures)
@@ -115,17 +115,16 @@ class CuCIMTileReader:
115
115
  tiling_result: TilingResult,
116
116
  *,
117
117
  num_cucim_workers: int = 4,
118
- gpu_decode: bool = False,
118
+ gpu_decode: bool = True,
119
119
  use_supertiles: bool = True,
120
120
  ):
121
- self._image_path = image_path
121
+ from hs2p.wsi.cucim_reader import CuImageReader
122
122
  self._x = tiling_result.x
123
123
  self._y = tiling_result.y
124
124
  self._read_level = tiling_result.read_level
125
125
  self._tile_size_px = int(tiling_result.read_tile_size_px)
126
126
  self._num_cucim_workers = num_cucim_workers
127
- self._gpu_decode = gpu_decode
128
- self._cu_image = None
127
+ self._reader = CuImageReader(image_path, gpu_decode=gpu_decode)
129
128
 
130
129
  self._use_supertiles = use_supertiles
131
130
  if use_supertiles:
@@ -141,29 +140,13 @@ class CuCIMTileReader:
141
140
  self._tile_to_st = None
142
141
  self.ordered_indices = None
143
142
 
144
- def _ensure_open(self):
145
- if self._cu_image is None:
146
- try:
147
- from cucim import CuImage
148
- except ImportError as exc:
149
- raise ImportError(
150
- "cucim is required for on-the-fly tile reading. "
151
- "Install it with: pip install cucim-cuXX (where XX matches your CUDA version)"
152
- ) from exc
153
- self._cu_image = CuImage(str(self._image_path))
154
-
155
143
  def _read_region(self, locations, size):
156
- kwargs = {
157
- "level": int(self._read_level),
158
- "num_workers": max(1, self._num_cucim_workers),
159
- }
160
- if self._gpu_decode:
161
- kwargs["device"] = "cuda"
162
- try:
163
- return self._cu_image.read_region(locations, size, **kwargs)
164
- except TypeError:
165
- kwargs.pop("device", None)
166
- return self._cu_image.read_region(locations, size, **kwargs)
144
+ return self._reader.read_region(
145
+ locations,
146
+ size,
147
+ level=int(self._read_level),
148
+ num_workers=self._num_cucim_workers,
149
+ )
167
150
 
168
151
  def read_batch(self, tile_indices: np.ndarray) -> torch.Tensor:
169
152
  tensor, _timing = self.read_batch_with_timing(tile_indices)
@@ -174,9 +157,9 @@ class CuCIMTileReader:
174
157
  return torch.empty(
175
158
  (0, 3, self._tile_size_px, self._tile_size_px), dtype=torch.uint8
176
159
  ), {"reader_open_ms": 0.0, "reader_read_ms": 0.0}
177
- was_closed = self._cu_image is None
160
+ was_closed = self._reader._cu_image is None
178
161
  open_start = time.perf_counter()
179
- self._ensure_open()
162
+ self._reader._ensure_open()
180
163
  reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
181
164
  read_start = time.perf_counter()
182
165
 
@@ -242,7 +225,7 @@ class OnTheFlyBatchTileCollator:
242
225
  image_path: Path,
243
226
  tiling_result: TilingResult,
244
227
  num_cucim_workers: int = 4,
245
- gpu_decode: bool = False,
228
+ gpu_decode: bool = True,
246
229
  use_supertiles: bool = True,
247
230
  ):
248
231
  self.tile_size = int(tiling_result.read_tile_size_px)
@@ -58,7 +58,9 @@ class WSDTileReader:
58
58
 
59
59
  def _ensure_open(self) -> None:
60
60
  if self._wsi is None:
61
- import wholeslidedata as wsd
61
+ from slide2vec.utils.log_utils import suppress_c_stderr
62
+ with suppress_c_stderr():
63
+ import wholeslidedata as wsd
62
64
  from hs2p.wsi.backend import coerce_wsd_path
63
65
 
64
66
  self._wsi = wsd.WholeSlideImage(
@@ -41,7 +41,7 @@ def main(argv=None) -> int:
41
41
  local_rank = distributed.get_local_rank()
42
42
 
43
43
  model_spec = dict(request["model"])
44
- model = Model.from_pretrained(
44
+ model = Model.from_preset(
45
45
  model_spec["name"],
46
46
  level=model_spec["level"],
47
47
  device=f"cuda:{local_rank}",
@@ -37,7 +37,7 @@ def main(argv=None) -> int:
37
37
  world_size = distributed.get_global_size()
38
38
 
39
39
  model_spec = dict(request["model"])
40
- model = Model.from_pretrained(
40
+ model = Model.from_preset(
41
41
  model_spec["name"],
42
42
  level=model_spec["level"],
43
43
  device=f"cuda:{local_rank}",