slide2vec 3.1.0__tar.gz → 3.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-3.1.0/slide2vec.egg-info → slide2vec-3.2.1}/PKG-INFO +20 -20
- {slide2vec-3.1.0 → slide2vec-3.2.1}/README.md +16 -16
- {slide2vec-3.1.0 → slide2vec-3.2.1}/pyproject.toml +1 -1
- {slide2vec-3.1.0 → slide2vec-3.2.1}/setup.cfg +4 -4
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/__init__.py +1 -1
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/api.py +132 -27
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/cli.py +1 -1
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/preprocessing/default.yaml +1 -1
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/cucim_tile_reader.py +12 -29
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/wsd_tile_reader.py +3 -1
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/distributed/direct_embed_worker.py +1 -1
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/distributed/pipeline_worker.py +1 -1
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/inference.py +56 -11
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/model_settings.py +1 -1
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/progress.py +43 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/log_utils.py +19 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/tiling_io.py +8 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1/slide2vec.egg-info}/PKG-INFO +20 -20
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/requires.txt +3 -3
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_hs2p_package_cutover.py +37 -2
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_output_consistency.py +20 -3
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_progress.py +15 -1
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_regression_core.py +56 -6
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_regression_inference.py +77 -10
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_regression_models.py +203 -13
- {slide2vec-3.1.0 → slide2vec-3.2.1}/LICENSE +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/MANIFEST.in +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/setup.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/__main__.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/artifacts.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/conch.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/conchv15.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/default.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/h-optimus-0.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/h-optimus-1.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/h0-mini.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/hibou.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/kaiko-midnight.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/kaiko.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/musk.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/panda-vit-s.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/pathojepa.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/phikonv2.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/prism.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/prov-gigapath-slide.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/prov-gigapath-tile.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/titan.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/uni.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/uni2.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/virchow.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/configs/models/virchow2.yaml +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/__init__.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/augmentations.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/dataset.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/main.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/__init__.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/__init__.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/attention.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/block.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/dino_head.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/drop_path.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/layer_scale.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/mlp.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/patch_embed.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/layers/swiglu_ffn.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/models.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/vision_transformer_dino.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/vision_transformer_dinov2.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/models/vision_transformer_pathojepa.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/resources.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/config.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/paths.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec/utils/utils.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/SOURCES.txt +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/entry_points.txt +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_batch_collator_timing.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_benchmark_embedding_throughput.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_benchmark_end_to_end_paths.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_benchmark_tile_read_strategies.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_dependency_split.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_release.py +0 -0
- {slide2vec-3.1.0 → slide2vec-3.2.1}/tests/test_tile_store.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version: 3.1
|
|
3
|
+
Version: 3.2.1
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Home-page: https://github.com/clemsgrs/slide2vec
|
|
6
6
|
Author: Clément Grisi
|
|
@@ -20,13 +20,14 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
20
20
|
Requires-Python: >=3.10
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
License-File: LICENSE
|
|
23
|
-
Requires-Dist: hs2p<3,>=2.
|
|
23
|
+
Requires-Dist: hs2p<3,>=2.5.1
|
|
24
24
|
Requires-Dist: omegaconf
|
|
25
25
|
Requires-Dist: h5py
|
|
26
26
|
Requires-Dist: matplotlib
|
|
27
27
|
Requires-Dist: numpy<2
|
|
28
28
|
Requires-Dist: pandas
|
|
29
29
|
Requires-Dist: pillow
|
|
30
|
+
Requires-Dist: PyTurboJPEG
|
|
30
31
|
Requires-Dist: rich
|
|
31
32
|
Requires-Dist: tqdm
|
|
32
33
|
Requires-Dist: torch
|
|
@@ -37,8 +38,7 @@ Requires-Dist: wholeslidedata<0.0.16
|
|
|
37
38
|
Requires-Dist: einops
|
|
38
39
|
Requires-Dist: timm
|
|
39
40
|
Provides-Extra: cucim
|
|
40
|
-
Requires-Dist: hs2p[cucim]<3,>=2.
|
|
41
|
-
Requires-Dist: PyTurboJPEG; extra == "cucim"
|
|
41
|
+
Requires-Dist: hs2p[cucim]<3,>=2.5.1; extra == "cucim"
|
|
42
42
|
Provides-Extra: models
|
|
43
43
|
Requires-Dist: huggingface-hub; extra == "models"
|
|
44
44
|
Requires-Dist: sacremoses; extra == "models"
|
|
@@ -80,38 +80,37 @@ pip install "slide2vec[models]"
|
|
|
80
80
|
## Python API
|
|
81
81
|
|
|
82
82
|
```python
|
|
83
|
-
from slide2vec import Model
|
|
83
|
+
from slide2vec import Model
|
|
84
|
+
from slide2vec.utils.config import hf_login
|
|
84
85
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
tissue_threshold=0.1,
|
|
90
|
-
)
|
|
91
|
-
embedded = model.embed_slide(
|
|
92
|
-
"/path/to/slide.svs",
|
|
93
|
-
preprocessing=preprocessing,
|
|
94
|
-
)
|
|
86
|
+
hf_login()
|
|
87
|
+
|
|
88
|
+
model = Model.from_preset("virchow2")
|
|
89
|
+
embedded = model.embed_slide("/path/to/slide.svs")
|
|
95
90
|
|
|
96
91
|
tile_embeddings = embedded.tile_embeddings
|
|
97
92
|
coordinates = embedded.coordinates
|
|
98
93
|
```
|
|
99
94
|
|
|
100
|
-
By default, `ExecutionOptions()` uses all available GPUs. Set `ExecutionOptions(num_gpus=4)` when you want to cap the sharding explicitly.
|
|
101
|
-
|
|
102
95
|
Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
|
|
103
96
|
|
|
104
97
|
```python
|
|
105
|
-
from slide2vec import ExecutionOptions, Pipeline
|
|
98
|
+
from slide2vec import ExecutionOptions, Pipeline, PreprocessingConfig
|
|
106
99
|
|
|
107
100
|
pipeline = Pipeline(
|
|
108
101
|
model=model,
|
|
109
|
-
preprocessing=
|
|
102
|
+
preprocessing=PreprocessingConfig(
|
|
103
|
+
target_spacing_um=0.5,
|
|
104
|
+
target_tile_size_px=224,
|
|
105
|
+
tissue_threshold=0.1,
|
|
106
|
+
),
|
|
110
107
|
execution=ExecutionOptions(output_dir="outputs/demo"),
|
|
111
108
|
)
|
|
112
109
|
result = pipeline.run(manifest_path="/path/to/slides.csv")
|
|
113
110
|
```
|
|
114
111
|
|
|
112
|
+
By default, `ExecutionOptions()` uses all available GPUs. Set `ExecutionOptions(num_gpus=4)` when you want to cap the sharding explicitly.
|
|
113
|
+
|
|
115
114
|
### Input Manifest
|
|
116
115
|
|
|
117
116
|
Manifest-driven runs use the schema below. `mask_path` and `spacing_at_level_0` are optional.
|
|
@@ -140,7 +139,7 @@ The package writes explicit artifact directories:
|
|
|
140
139
|
|
|
141
140
|
### Supported Models
|
|
142
141
|
|
|
143
|
-
`slide2vec` currently ships preset configs for
|
|
142
|
+
`slide2vec` currently ships preset configs for 20 tile-level models and 3 slide-level models.
|
|
144
143
|
For the full catalog and preset names, see [`docs/models.md`](docs/models.md).
|
|
145
144
|
|
|
146
145
|
## CLI
|
|
@@ -174,4 +173,5 @@ docker run --rm -it \
|
|
|
174
173
|
|
|
175
174
|
- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
|
|
176
175
|
- [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
|
|
176
|
+
- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
|
|
177
177
|
- [`docs/models.md`](docs/models.md) for the full supported-model catalog
|
|
@@ -21,38 +21,37 @@ pip install "slide2vec[models]"
|
|
|
21
21
|
## Python API
|
|
22
22
|
|
|
23
23
|
```python
|
|
24
|
-
from slide2vec import Model
|
|
24
|
+
from slide2vec import Model
|
|
25
|
+
from slide2vec.utils.config import hf_login
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
tissue_threshold=0.1,
|
|
31
|
-
)
|
|
32
|
-
embedded = model.embed_slide(
|
|
33
|
-
"/path/to/slide.svs",
|
|
34
|
-
preprocessing=preprocessing,
|
|
35
|
-
)
|
|
27
|
+
hf_login()
|
|
28
|
+
|
|
29
|
+
model = Model.from_preset("virchow2")
|
|
30
|
+
embedded = model.embed_slide("/path/to/slide.svs")
|
|
36
31
|
|
|
37
32
|
tile_embeddings = embedded.tile_embeddings
|
|
38
33
|
coordinates = embedded.coordinates
|
|
39
34
|
```
|
|
40
35
|
|
|
41
|
-
By default, `ExecutionOptions()` uses all available GPUs. Set `ExecutionOptions(num_gpus=4)` when you want to cap the sharding explicitly.
|
|
42
|
-
|
|
43
36
|
Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
|
|
44
37
|
|
|
45
38
|
```python
|
|
46
|
-
from slide2vec import ExecutionOptions, Pipeline
|
|
39
|
+
from slide2vec import ExecutionOptions, Pipeline, PreprocessingConfig
|
|
47
40
|
|
|
48
41
|
pipeline = Pipeline(
|
|
49
42
|
model=model,
|
|
50
|
-
preprocessing=
|
|
43
|
+
preprocessing=PreprocessingConfig(
|
|
44
|
+
target_spacing_um=0.5,
|
|
45
|
+
target_tile_size_px=224,
|
|
46
|
+
tissue_threshold=0.1,
|
|
47
|
+
),
|
|
51
48
|
execution=ExecutionOptions(output_dir="outputs/demo"),
|
|
52
49
|
)
|
|
53
50
|
result = pipeline.run(manifest_path="/path/to/slides.csv")
|
|
54
51
|
```
|
|
55
52
|
|
|
53
|
+
By default, `ExecutionOptions()` uses all available GPUs. Set `ExecutionOptions(num_gpus=4)` when you want to cap the sharding explicitly.
|
|
54
|
+
|
|
56
55
|
### Input Manifest
|
|
57
56
|
|
|
58
57
|
Manifest-driven runs use the schema below. `mask_path` and `spacing_at_level_0` are optional.
|
|
@@ -81,7 +80,7 @@ The package writes explicit artifact directories:
|
|
|
81
80
|
|
|
82
81
|
### Supported Models
|
|
83
82
|
|
|
84
|
-
`slide2vec` currently ships preset configs for
|
|
83
|
+
`slide2vec` currently ships preset configs for 20 tile-level models and 3 slide-level models.
|
|
85
84
|
For the full catalog and preset names, see [`docs/models.md`](docs/models.md).
|
|
86
85
|
|
|
87
86
|
## CLI
|
|
@@ -115,4 +114,5 @@ docker run --rm -it \
|
|
|
115
114
|
|
|
116
115
|
- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
|
|
117
116
|
- [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
|
|
117
|
+
- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
|
|
118
118
|
- [`docs/models.md`](docs/models.md) for the full supported-model catalog
|
|
@@ -23,7 +23,7 @@ warn_unused_configs = true
|
|
|
23
23
|
no_implicit_reexport = true
|
|
24
24
|
|
|
25
25
|
[tool.bumpver]
|
|
26
|
-
current_version = "3.1
|
|
26
|
+
current_version = "3.2.1"
|
|
27
27
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
28
28
|
commit = false # We do version bumping in CI, not as a commit
|
|
29
29
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = slide2vec
|
|
3
|
-
version = 3.1
|
|
3
|
+
version = 3.2.1
|
|
4
4
|
description = Embedding of whole slide images with Foundation Models
|
|
5
5
|
author = Clément Grisi
|
|
6
6
|
platforms = unix, linux, osx, cygwin, win32
|
|
@@ -16,13 +16,14 @@ classifiers =
|
|
|
16
16
|
packages =
|
|
17
17
|
slide2vec
|
|
18
18
|
install_requires =
|
|
19
|
-
hs2p>=2.
|
|
19
|
+
hs2p>=2.5.1,<3
|
|
20
20
|
omegaconf
|
|
21
21
|
h5py
|
|
22
22
|
matplotlib
|
|
23
23
|
numpy<2
|
|
24
24
|
pandas
|
|
25
25
|
pillow
|
|
26
|
+
PyTurboJPEG
|
|
26
27
|
rich
|
|
27
28
|
tqdm
|
|
28
29
|
torch
|
|
@@ -38,8 +39,7 @@ include_package_data = True
|
|
|
38
39
|
|
|
39
40
|
[options.extras_require]
|
|
40
41
|
cucim =
|
|
41
|
-
hs2p[cucim]>=2.
|
|
42
|
-
PyTurboJPEG
|
|
42
|
+
hs2p[cucim]>=2.5.1,<3
|
|
43
43
|
models =
|
|
44
44
|
huggingface-hub
|
|
45
45
|
sacremoses
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
1
3
|
from dataclasses import dataclass, field, replace
|
|
4
|
+
from contextlib import contextmanager
|
|
2
5
|
from pathlib import Path
|
|
3
6
|
from typing import TYPE_CHECKING, Any, Mapping, Protocol, Sequence, overload
|
|
4
7
|
|
|
@@ -23,6 +26,8 @@ DEFAULT_LEVEL_BY_NAME = {
|
|
|
23
26
|
"titan": "slide",
|
|
24
27
|
}
|
|
25
28
|
|
|
29
|
+
logger = logging.getLogger("slide2vec")
|
|
30
|
+
|
|
26
31
|
PathLike = str | Path
|
|
27
32
|
|
|
28
33
|
|
|
@@ -137,8 +142,8 @@ class ExecutionOptions:
|
|
|
137
142
|
output_dir=Path(cfg.output_dir),
|
|
138
143
|
output_format="pt",
|
|
139
144
|
batch_size=int(getattr(cfg.model, "batch_size", 1)),
|
|
140
|
-
num_workers=int(getattr(cfg.speed, "num_dataloader_workers", getattr(cfg.speed, "num_workers_embedding",
|
|
141
|
-
num_preprocessing_workers=int(getattr(cfg.speed, "num_preprocessing_workers",
|
|
145
|
+
num_workers=int(getattr(cfg.speed, "num_dataloader_workers", getattr(cfg.speed, "num_workers_embedding", 8))),
|
|
146
|
+
num_preprocessing_workers=int(getattr(cfg.speed, "num_preprocessing_workers", 8)),
|
|
142
147
|
num_gpus=1 if run_on_cpu else _coerce_num_gpus(configured_num_gpus),
|
|
143
148
|
precision="fp32" if run_on_cpu else requested_precision,
|
|
144
149
|
prefetch_factor=int(getattr(cfg.speed, "prefetch_factor_embedding", 4)),
|
|
@@ -156,6 +161,15 @@ class ExecutionOptions:
|
|
|
156
161
|
raise ValueError("ExecutionOptions.num_gpus must be at least 1")
|
|
157
162
|
if self.prefetch_factor < 1:
|
|
158
163
|
raise ValueError("ExecutionOptions.prefetch_factor must be at least 1")
|
|
164
|
+
slurm_cpu_limit = None
|
|
165
|
+
for env_name in ("SLURM_CPUS_PER_TASK", "SLURM_CPUS_ON_NODE", "SLURM_JOB_CPUS_PER_NODE"):
|
|
166
|
+
value = os.environ.get(env_name)
|
|
167
|
+
if value and value.strip().isdigit() and int(value.strip()) > 0:
|
|
168
|
+
slurm_cpu_limit = int(value.strip())
|
|
169
|
+
break
|
|
170
|
+
if slurm_cpu_limit is not None:
|
|
171
|
+
object.__setattr__(self, "num_workers", min(self.num_workers, slurm_cpu_limit))
|
|
172
|
+
object.__setattr__(self, "num_preprocessing_workers", min(self.num_preprocessing_workers, slurm_cpu_limit))
|
|
159
173
|
|
|
160
174
|
def with_output_dir(self, output_dir: PathLike | None) -> "ExecutionOptions":
|
|
161
175
|
if output_dir is None:
|
|
@@ -179,6 +193,9 @@ class EmbeddedSlide:
|
|
|
179
193
|
tile_size_lv0: int
|
|
180
194
|
image_path: Path
|
|
181
195
|
mask_path: Path | None = None
|
|
196
|
+
num_tiles: int | None = None
|
|
197
|
+
mask_preview_path: Path | None = None
|
|
198
|
+
tiling_preview_path: Path | None = None
|
|
182
199
|
latents: Any | None = None
|
|
183
200
|
|
|
184
201
|
|
|
@@ -214,7 +231,7 @@ class Model:
|
|
|
214
231
|
self._backend: LoadedModel | None = None
|
|
215
232
|
|
|
216
233
|
@classmethod
|
|
217
|
-
def
|
|
234
|
+
def from_preset(
|
|
218
235
|
cls,
|
|
219
236
|
name: str,
|
|
220
237
|
*,
|
|
@@ -230,7 +247,7 @@ class Model:
|
|
|
230
247
|
device: str = "auto",
|
|
231
248
|
) -> "Model":
|
|
232
249
|
canonical_name = _canonical_model_name(name)
|
|
233
|
-
resolved_level =
|
|
250
|
+
resolved_level = _resolve_model_level(canonical_name, requested_level=level)
|
|
234
251
|
return cls(
|
|
235
252
|
name=canonical_name,
|
|
236
253
|
level=resolved_level,
|
|
@@ -267,7 +284,8 @@ class Model:
|
|
|
267
284
|
_require_output_dir_for_persistence(resolved, method_name="Model.embed_tiles(...)")
|
|
268
285
|
if preprocessing is not None:
|
|
269
286
|
validate_model_runtime_compatibility(self, preprocessing, resolved)
|
|
270
|
-
|
|
287
|
+
with _auto_progress_reporting(output_dir=resolved.output_dir):
|
|
288
|
+
return embed_tiles(self, slides, tiling_results, execution=resolved, preprocessing=preprocessing)
|
|
271
289
|
|
|
272
290
|
def aggregate_tiles(
|
|
273
291
|
self,
|
|
@@ -280,14 +298,15 @@ class Model:
|
|
|
280
298
|
|
|
281
299
|
resolved = _coerce_execution_options(execution, model=self)
|
|
282
300
|
_require_output_dir_for_persistence(resolved, method_name="Model.aggregate_tiles(...)")
|
|
283
|
-
|
|
301
|
+
with _auto_progress_reporting(output_dir=resolved.output_dir):
|
|
302
|
+
return aggregate_tiles(self, tile_artifacts, execution=resolved, preprocessing=preprocessing)
|
|
284
303
|
|
|
285
304
|
@overload
|
|
286
305
|
def embed_slide(
|
|
287
306
|
self,
|
|
288
307
|
slide: PathLike,
|
|
289
308
|
*,
|
|
290
|
-
preprocessing: PreprocessingConfig,
|
|
309
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
291
310
|
execution: ExecutionOptions | None = None,
|
|
292
311
|
sample_id: str | None = None,
|
|
293
312
|
mask_path: PathLike | None = None,
|
|
@@ -300,7 +319,7 @@ class Model:
|
|
|
300
319
|
self,
|
|
301
320
|
slide: Mapping[str, object] | SlideLike | SlideSpec,
|
|
302
321
|
*,
|
|
303
|
-
preprocessing: PreprocessingConfig,
|
|
322
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
304
323
|
execution: ExecutionOptions | None = None,
|
|
305
324
|
sample_id: None = None,
|
|
306
325
|
mask_path: None = None,
|
|
@@ -312,7 +331,7 @@ class Model:
|
|
|
312
331
|
self,
|
|
313
332
|
slide: SlideInput,
|
|
314
333
|
*,
|
|
315
|
-
preprocessing: PreprocessingConfig,
|
|
334
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
316
335
|
execution: ExecutionOptions | None = None,
|
|
317
336
|
sample_id: str | None = None,
|
|
318
337
|
mask_path: PathLike | None = None,
|
|
@@ -339,19 +358,21 @@ class Model:
|
|
|
339
358
|
self,
|
|
340
359
|
slides: SlideSequence,
|
|
341
360
|
*,
|
|
342
|
-
preprocessing: PreprocessingConfig,
|
|
361
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
343
362
|
execution: ExecutionOptions | None = None,
|
|
344
363
|
) -> list[EmbeddedSlide]:
|
|
345
364
|
from slide2vec.inference import embed_slides
|
|
346
365
|
|
|
347
366
|
resolved = _coerce_execution_options(execution, model=self)
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
self,
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
367
|
+
resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
|
|
368
|
+
with _auto_progress_reporting(output_dir=resolved.output_dir):
|
|
369
|
+
validate_model_runtime_compatibility(self, resolved_preprocessing, resolved)
|
|
370
|
+
return embed_slides(
|
|
371
|
+
self,
|
|
372
|
+
slides,
|
|
373
|
+
preprocessing=resolved_preprocessing,
|
|
374
|
+
execution=resolved,
|
|
375
|
+
)
|
|
355
376
|
|
|
356
377
|
def _load_backend(self) -> "LoadedModel":
|
|
357
378
|
if self._backend is None:
|
|
@@ -390,22 +411,29 @@ class Pipeline:
|
|
|
390
411
|
) -> RunResult:
|
|
391
412
|
from slide2vec.inference import run_pipeline
|
|
392
413
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
414
|
+
with _auto_progress_reporting(output_dir=self.execution.output_dir):
|
|
415
|
+
if not tiling_only:
|
|
416
|
+
validate_model_runtime_compatibility(self.model, self.preprocessing, self.execution)
|
|
417
|
+
return run_pipeline(
|
|
418
|
+
self.model,
|
|
419
|
+
slides=slides,
|
|
420
|
+
manifest_path=manifest_path,
|
|
421
|
+
preprocessing=self.preprocessing,
|
|
422
|
+
tiling_only=tiling_only,
|
|
423
|
+
execution=self.execution,
|
|
424
|
+
)
|
|
403
425
|
|
|
404
426
|
|
|
405
427
|
def _canonical_model_name(name: str) -> str:
|
|
406
428
|
return canonicalize_model_name(name)
|
|
407
429
|
|
|
408
430
|
|
|
431
|
+
def _resolve_model_level(name: str, *, requested_level: str | None) -> str:
|
|
432
|
+
if requested_level is not None:
|
|
433
|
+
return requested_level
|
|
434
|
+
return DEFAULT_LEVEL_BY_NAME.get(name, "tile")
|
|
435
|
+
|
|
436
|
+
|
|
409
437
|
def _coerce_execution_options(
|
|
410
438
|
options: ExecutionOptions | None,
|
|
411
439
|
*,
|
|
@@ -444,3 +472,80 @@ def _recommended_execution_precision(model: Model | None) -> str:
|
|
|
444
472
|
if settings is not None and settings.precision is not None:
|
|
445
473
|
return settings.precision
|
|
446
474
|
return "fp32"
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def _resolve_direct_api_preprocessing(
|
|
478
|
+
model: Model,
|
|
479
|
+
preprocessing: PreprocessingConfig | None,
|
|
480
|
+
) -> PreprocessingConfig:
|
|
481
|
+
if preprocessing is not None:
|
|
482
|
+
return preprocessing
|
|
483
|
+
|
|
484
|
+
settings = get_recommended_model_settings(getattr(model, "name", None))
|
|
485
|
+
target_tile_size_px = _default_target_tile_size_px(model, settings)
|
|
486
|
+
target_spacing_um = _default_target_spacing_um(model, settings)
|
|
487
|
+
return PreprocessingConfig(
|
|
488
|
+
backend="auto",
|
|
489
|
+
target_spacing_um=target_spacing_um,
|
|
490
|
+
target_tile_size_px=target_tile_size_px,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def _default_target_tile_size_px(model: Model, settings) -> int:
|
|
495
|
+
explicit_input_size = getattr(model, "_model_kwargs", {}).get("input_size")
|
|
496
|
+
if explicit_input_size is not None:
|
|
497
|
+
return int(explicit_input_size)
|
|
498
|
+
if settings is not None:
|
|
499
|
+
return int(settings.input_size[0])
|
|
500
|
+
return int(PreprocessingConfig().target_tile_size_px)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _default_target_spacing_um(model: Model, settings) -> float:
|
|
504
|
+
if settings is None or not getattr(settings, "spacings_um", ()):
|
|
505
|
+
default_spacing = float(PreprocessingConfig().target_spacing_um)
|
|
506
|
+
logger.warning(
|
|
507
|
+
"No recommended preprocessing spacing is known for model '%s'; defaulting direct API calls to "
|
|
508
|
+
"target_spacing_um=%g. Pass PreprocessingConfig(...) to override.",
|
|
509
|
+
getattr(model, "name", None),
|
|
510
|
+
default_spacing,
|
|
511
|
+
)
|
|
512
|
+
return default_spacing
|
|
513
|
+
|
|
514
|
+
supported_spacings = tuple(float(value) for value in settings.spacings_um)
|
|
515
|
+
if len(supported_spacings) == 1:
|
|
516
|
+
return supported_spacings[0]
|
|
517
|
+
|
|
518
|
+
if any(abs(value - 0.5) <= 1e-8 for value in supported_spacings):
|
|
519
|
+
chosen = 0.5
|
|
520
|
+
else:
|
|
521
|
+
chosen = min(supported_spacings)
|
|
522
|
+
supported_text = ", ".join(f"{spacing:g}" for spacing in supported_spacings)
|
|
523
|
+
logger.warning(
|
|
524
|
+
"Model '%s' supports multiple spacings [%s]; defaulting direct API calls to target_spacing_um=%g. "
|
|
525
|
+
"Pass PreprocessingConfig(target_spacing_um=...) to choose another supported spacing.",
|
|
526
|
+
getattr(model, "name", None),
|
|
527
|
+
supported_text,
|
|
528
|
+
chosen,
|
|
529
|
+
)
|
|
530
|
+
return chosen
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
@contextmanager
|
|
534
|
+
def _auto_progress_reporting(*, output_dir: PathLike | None):
|
|
535
|
+
from slide2vec.progress import (
|
|
536
|
+
NullProgressReporter,
|
|
537
|
+
activate_progress_reporter,
|
|
538
|
+
create_api_progress_reporter,
|
|
539
|
+
get_progress_reporter,
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
active = get_progress_reporter()
|
|
543
|
+
if not isinstance(active, NullProgressReporter):
|
|
544
|
+
yield
|
|
545
|
+
return
|
|
546
|
+
reporter = create_api_progress_reporter(output_dir=output_dir)
|
|
547
|
+
if isinstance(reporter, NullProgressReporter):
|
|
548
|
+
yield
|
|
549
|
+
return
|
|
550
|
+
with activate_progress_reporter(reporter):
|
|
551
|
+
yield
|
|
@@ -23,7 +23,7 @@ def get_args_parser(add_help: bool = True):
|
|
|
23
23
|
def build_model_and_pipeline(args):
|
|
24
24
|
cfg, _cfg_path = _setup_cli_config(args)
|
|
25
25
|
_hf_login()
|
|
26
|
-
model = Model.
|
|
26
|
+
model = Model.from_preset(
|
|
27
27
|
cfg.model.name,
|
|
28
28
|
level=cfg.model.level,
|
|
29
29
|
mode=cfg.model.mode,
|
|
@@ -10,7 +10,7 @@ seed: 0 # seed for reproducibility
|
|
|
10
10
|
|
|
11
11
|
tiling:
|
|
12
12
|
on_the_fly: true # read tiles directly from WSI during embedding (requires cucim backend)
|
|
13
|
-
gpu_decode: false #
|
|
13
|
+
gpu_decode: false # GPU-accelerated batch decoding via device="cuda" in cucim read_region; set true to opt in when the runtime is configured for it
|
|
14
14
|
adaptive_batching: false # when true, vary batch size to align with super tile boundaries (avoids redundant reads but batch size fluctuates)
|
|
15
15
|
use_supertiles: true # group tiles into 8x8/4x4/2x2 super tile reads to reduce WSI read calls (on-the-fly path only)
|
|
16
16
|
jpeg_backend: "turbojpeg" # JPEG encoder for tar extraction: "turbojpeg" (faster) or "pil" (compatible with older ground truth fixtures)
|
|
@@ -115,17 +115,16 @@ class CuCIMTileReader:
|
|
|
115
115
|
tiling_result: TilingResult,
|
|
116
116
|
*,
|
|
117
117
|
num_cucim_workers: int = 4,
|
|
118
|
-
gpu_decode: bool =
|
|
118
|
+
gpu_decode: bool = True,
|
|
119
119
|
use_supertiles: bool = True,
|
|
120
120
|
):
|
|
121
|
-
|
|
121
|
+
from hs2p.wsi.cucim_reader import CuImageReader
|
|
122
122
|
self._x = tiling_result.x
|
|
123
123
|
self._y = tiling_result.y
|
|
124
124
|
self._read_level = tiling_result.read_level
|
|
125
125
|
self._tile_size_px = int(tiling_result.read_tile_size_px)
|
|
126
126
|
self._num_cucim_workers = num_cucim_workers
|
|
127
|
-
self.
|
|
128
|
-
self._cu_image = None
|
|
127
|
+
self._reader = CuImageReader(image_path, gpu_decode=gpu_decode)
|
|
129
128
|
|
|
130
129
|
self._use_supertiles = use_supertiles
|
|
131
130
|
if use_supertiles:
|
|
@@ -141,29 +140,13 @@ class CuCIMTileReader:
|
|
|
141
140
|
self._tile_to_st = None
|
|
142
141
|
self.ordered_indices = None
|
|
143
142
|
|
|
144
|
-
def _ensure_open(self):
|
|
145
|
-
if self._cu_image is None:
|
|
146
|
-
try:
|
|
147
|
-
from cucim import CuImage
|
|
148
|
-
except ImportError as exc:
|
|
149
|
-
raise ImportError(
|
|
150
|
-
"cucim is required for on-the-fly tile reading. "
|
|
151
|
-
"Install it with: pip install cucim-cuXX (where XX matches your CUDA version)"
|
|
152
|
-
) from exc
|
|
153
|
-
self._cu_image = CuImage(str(self._image_path))
|
|
154
|
-
|
|
155
143
|
def _read_region(self, locations, size):
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
try:
|
|
163
|
-
return self._cu_image.read_region(locations, size, **kwargs)
|
|
164
|
-
except TypeError:
|
|
165
|
-
kwargs.pop("device", None)
|
|
166
|
-
return self._cu_image.read_region(locations, size, **kwargs)
|
|
144
|
+
return self._reader.read_region(
|
|
145
|
+
locations,
|
|
146
|
+
size,
|
|
147
|
+
level=int(self._read_level),
|
|
148
|
+
num_workers=self._num_cucim_workers,
|
|
149
|
+
)
|
|
167
150
|
|
|
168
151
|
def read_batch(self, tile_indices: np.ndarray) -> torch.Tensor:
|
|
169
152
|
tensor, _timing = self.read_batch_with_timing(tile_indices)
|
|
@@ -174,9 +157,9 @@ class CuCIMTileReader:
|
|
|
174
157
|
return torch.empty(
|
|
175
158
|
(0, 3, self._tile_size_px, self._tile_size_px), dtype=torch.uint8
|
|
176
159
|
), {"reader_open_ms": 0.0, "reader_read_ms": 0.0}
|
|
177
|
-
was_closed = self._cu_image is None
|
|
160
|
+
was_closed = self._reader._cu_image is None
|
|
178
161
|
open_start = time.perf_counter()
|
|
179
|
-
self._ensure_open()
|
|
162
|
+
self._reader._ensure_open()
|
|
180
163
|
reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
|
|
181
164
|
read_start = time.perf_counter()
|
|
182
165
|
|
|
@@ -242,7 +225,7 @@ class OnTheFlyBatchTileCollator:
|
|
|
242
225
|
image_path: Path,
|
|
243
226
|
tiling_result: TilingResult,
|
|
244
227
|
num_cucim_workers: int = 4,
|
|
245
|
-
gpu_decode: bool =
|
|
228
|
+
gpu_decode: bool = True,
|
|
246
229
|
use_supertiles: bool = True,
|
|
247
230
|
):
|
|
248
231
|
self.tile_size = int(tiling_result.read_tile_size_px)
|
|
@@ -58,7 +58,9 @@ class WSDTileReader:
|
|
|
58
58
|
|
|
59
59
|
def _ensure_open(self) -> None:
|
|
60
60
|
if self._wsi is None:
|
|
61
|
-
|
|
61
|
+
from slide2vec.utils.log_utils import suppress_c_stderr
|
|
62
|
+
with suppress_c_stderr():
|
|
63
|
+
import wholeslidedata as wsd
|
|
62
64
|
from hs2p.wsi.backend import coerce_wsd_path
|
|
63
65
|
|
|
64
66
|
self._wsi = wsd.WholeSlideImage(
|
|
@@ -41,7 +41,7 @@ def main(argv=None) -> int:
|
|
|
41
41
|
local_rank = distributed.get_local_rank()
|
|
42
42
|
|
|
43
43
|
model_spec = dict(request["model"])
|
|
44
|
-
model = Model.
|
|
44
|
+
model = Model.from_preset(
|
|
45
45
|
model_spec["name"],
|
|
46
46
|
level=model_spec["level"],
|
|
47
47
|
device=f"cuda:{local_rank}",
|
|
@@ -37,7 +37,7 @@ def main(argv=None) -> int:
|
|
|
37
37
|
world_size = distributed.get_global_size()
|
|
38
38
|
|
|
39
39
|
model_spec = dict(request["model"])
|
|
40
|
-
model = Model.
|
|
40
|
+
model = Model.from_preset(
|
|
41
41
|
model_spec["name"],
|
|
42
42
|
level=model_spec["level"],
|
|
43
43
|
device=f"cuda:{local_rank}",
|