slide2vec 4.1.1__tar.gz → 4.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-4.1.1 → slide2vec-4.3.0}/PKG-INFO +31 -6
- {slide2vec-4.1.1 → slide2vec-4.3.0}/README.md +20 -3
- {slide2vec-4.1.1 → slide2vec-4.3.0}/pyproject.toml +15 -5
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/__init__.py +11 -2
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/api.py +122 -9
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/artifacts.py +53 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/cli.py +12 -9
- slide2vec-4.3.0/slide2vec/configs/__init__.py +4 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/configs/default.yaml +9 -5
- {slide2vec-4.1.1/slide2vec → slide2vec-4.3.0/slide2vec/configs}/resources.py +2 -1
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/distributed/direct_embed_worker.py +2 -2
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/distributed/pipeline_worker.py +4 -4
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/__init__.py +2 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/base.py +27 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/__init__.py +4 -0
- slide2vec-4.3.0/slide2vec/encoders/models/lunit.py +21 -0
- slide2vec-4.3.0/slide2vec/encoders/models/moozy/__init__.py +114 -0
- slide2vec-4.3.0/slide2vec/encoders/models/moozy/blocks.py +272 -0
- slide2vec-4.3.0/slide2vec/encoders/models/moozy/case.py +91 -0
- slide2vec-4.3.0/slide2vec/encoders/models/moozy/loading.py +103 -0
- slide2vec-4.3.0/slide2vec/encoders/models/moozy/slide.py +152 -0
- slide2vec-4.3.0/slide2vec/encoders/models/moozy/types.py +13 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/registry.py +7 -6
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/validation.py +1 -1
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/inference.py +531 -1296
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/progress.py +135 -5
- slide2vec-4.3.0/slide2vec/runtime/__init__.py +2 -0
- slide2vec-4.3.0/slide2vec/runtime/batching.py +441 -0
- slide2vec-4.3.0/slide2vec/runtime/distributed.py +194 -0
- slide2vec-4.3.0/slide2vec/runtime/embedding.py +157 -0
- slide2vec-4.3.0/slide2vec/runtime/hierarchical.py +105 -0
- {slide2vec-4.1.1/slide2vec → slide2vec-4.3.0/slide2vec/runtime}/model_settings.py +1 -0
- slide2vec-4.3.0/slide2vec/runtime/persistence.py +165 -0
- slide2vec-4.3.0/slide2vec/runtime/progress_bridge.py +52 -0
- {slide2vec-4.1.1/slide2vec → slide2vec-4.3.0/slide2vec/runtime}/registry.py +1 -0
- slide2vec-4.3.0/slide2vec/runtime/serialization.py +119 -0
- slide2vec-4.3.0/slide2vec/runtime/tiling.py +97 -0
- slide2vec-4.3.0/slide2vec/runtime/types.py +48 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/config.py +11 -6
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/tiling_io.py +21 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/PKG-INFO +31 -6
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/SOURCES.txt +21 -7
- slide2vec-4.3.0/slide2vec.egg-info/entry_points.txt +2 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/requires.txt +12 -2
- slide2vec-4.3.0/tests/test_architecture_runtime_split.py +60 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_hs2p_package_cutover.py +6 -4
- {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_progress.py +416 -8
- {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_regression_core.py +125 -8
- {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_regression_inference.py +342 -104
- {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_regression_models.py +1 -1
- slide2vec-4.1.1/slide2vec/configs/__init__.py +0 -4
- slide2vec-4.1.1/slide2vec/runtime_types.py +0 -14
- slide2vec-4.1.1/slide2vec.egg-info/entry_points.txt +0 -2
- slide2vec-4.1.1/tests/test_batch_collator_timing.py +0 -161
- slide2vec-4.1.1/tests/test_output_consistency.py +0 -186
- slide2vec-4.1.1/tests/test_packaging_metadata.py +0 -23
- {slide2vec-4.1.1 → slide2vec-4.3.0}/LICENSE +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/setup.cfg +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/__main__.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/data/__init__.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/data/dataset.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/data/tile_reader.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/conch.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/gigapath.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/hibou.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/midnight.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/musk.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/virchow.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/main.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/utils.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_encoder_registry.py +0 -0
- {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_tile_store.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.3.0
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=
|
|
18
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0
|
|
19
19
|
Requires-Dist: omegaconf
|
|
20
20
|
Requires-Dist: matplotlib
|
|
21
21
|
Requires-Dist: numpy<2
|
|
@@ -50,6 +50,8 @@ Requires-Dist: xformers==0.0.31; extra == "prism"
|
|
|
50
50
|
Provides-Extra: hibou
|
|
51
51
|
Requires-Dist: scipy~=1.8.1; extra == "hibou"
|
|
52
52
|
Requires-Dist: scikit-image~=0.19.3; extra == "hibou"
|
|
53
|
+
Provides-Extra: moozy
|
|
54
|
+
Requires-Dist: huggingface_hub<1.0,>=0.30.0; extra == "moozy"
|
|
53
55
|
Provides-Extra: titan
|
|
54
56
|
Requires-Dist: torch==2.0.1; extra == "titan"
|
|
55
57
|
Requires-Dist: timm==1.0.3; extra == "titan"
|
|
@@ -63,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
|
|
|
63
65
|
Requires-Dist: pandas; extra == "fm"
|
|
64
66
|
Requires-Dist: pillow; extra == "fm"
|
|
65
67
|
Requires-Dist: rich; extra == "fm"
|
|
66
|
-
Requires-Dist: hs2p[asap,cucim,openslide,vips]>=
|
|
68
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0; extra == "fm"
|
|
67
69
|
Requires-Dist: wandb; extra == "fm"
|
|
68
70
|
Requires-Dist: torch<2.8,>=2.3; extra == "fm"
|
|
69
71
|
Requires-Dist: torchvision>=0.18.0; extra == "fm"
|
|
@@ -87,6 +89,12 @@ Requires-Dist: fairscale; extra == "fm"
|
|
|
87
89
|
Requires-Dist: packaging==23.2; extra == "fm"
|
|
88
90
|
Requires-Dist: ninja==1.11.1.1; extra == "fm"
|
|
89
91
|
Requires-Dist: psutil<6; extra == "fm"
|
|
92
|
+
Provides-Extra: docs
|
|
93
|
+
Requires-Dist: sphinx>=8.1; extra == "docs"
|
|
94
|
+
Requires-Dist: furo; extra == "docs"
|
|
95
|
+
Requires-Dist: myst-parser; extra == "docs"
|
|
96
|
+
Requires-Dist: sphinx-copybutton; extra == "docs"
|
|
97
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == "docs"
|
|
90
98
|
Provides-Extra: testing
|
|
91
99
|
Requires-Dist: pytest>=6.0; extra == "testing"
|
|
92
100
|
Requires-Dist: pytest-cov>=2.0; extra == "testing"
|
|
@@ -99,9 +107,12 @@ Dynamic: license-file
|
|
|
99
107
|
# slide2vec
|
|
100
108
|
|
|
101
109
|
[](https://pypi.org/project/slide2vec/)
|
|
110
|
+
[](https://clemsgrs.github.io/slide2vec/)
|
|
102
111
|
|
|
103
112
|
`slide2vec` is a Python package for efficient encoding of whole-slide images using publicly available foundation models. It builds on [`hs2p`](https://pypi.org/project/hs2p/) for fast preprocessing and exposes a focused surface around `Model`, `Pipeline`, and `ExecutionOptions`.
|
|
104
113
|
|
|
114
|
+
Documentation site: [https://clemsgrs.github.io/slide2vec/](https://clemsgrs.github.io/slide2vec/)
|
|
115
|
+
|
|
105
116
|
## Installation
|
|
106
117
|
|
|
107
118
|
```shell
|
|
@@ -119,6 +130,8 @@ pip install git+https://github.com/Mahmoodlab/CONCH.git
|
|
|
119
130
|
pip install git+https://github.com/prov-gigapath/prov-gigapath.git
|
|
120
131
|
```
|
|
121
132
|
|
|
133
|
+
AtlasPatch-backed tissue segmentation is available through hs2p's `sam2` path in the bundled install.
|
|
134
|
+
|
|
122
135
|
## Python API
|
|
123
136
|
|
|
124
137
|
```python
|
|
@@ -135,6 +148,17 @@ x = embedded.x
|
|
|
135
148
|
y = embedded.y
|
|
136
149
|
```
|
|
137
150
|
|
|
151
|
+
Use `list_models()` when you want to inspect the shipped presets programmatically:
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
from slide2vec import list_models
|
|
155
|
+
|
|
156
|
+
all_models = list_models()
|
|
157
|
+
tile_models = list_models("tile")
|
|
158
|
+
slide_models = list_models("slide")
|
|
159
|
+
patient_models = list_models("patient")
|
|
160
|
+
```
|
|
161
|
+
|
|
138
162
|
Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
|
|
139
163
|
|
|
140
164
|
```python
|
|
@@ -210,7 +234,7 @@ The CLI is a thin wrapper over the package API.
|
|
|
210
234
|
Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
|
|
211
235
|
|
|
212
236
|
```shell
|
|
213
|
-
|
|
237
|
+
slide2vec /path/to/config.yaml
|
|
214
238
|
```
|
|
215
239
|
|
|
216
240
|
By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
|
|
@@ -233,7 +257,8 @@ docker run --rm -it \
|
|
|
233
257
|
|
|
234
258
|
## Documentation
|
|
235
259
|
|
|
236
|
-
- [
|
|
260
|
+
- [Documentation website](https://clemsgrs.github.io/slide2vec/) for the polished docs site
|
|
237
261
|
- [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
|
|
238
|
-
- [`
|
|
262
|
+
- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
|
|
239
263
|
- [`docs/models.md`](docs/models.md) for the full supported-model catalog
|
|
264
|
+
- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
# slide2vec
|
|
2
2
|
|
|
3
3
|
[](https://pypi.org/project/slide2vec/)
|
|
4
|
+
[](https://clemsgrs.github.io/slide2vec/)
|
|
4
5
|
|
|
5
6
|
`slide2vec` is a Python package for efficient encoding of whole-slide images using publicly available foundation models. It builds on [`hs2p`](https://pypi.org/project/hs2p/) for fast preprocessing and exposes a focused surface around `Model`, `Pipeline`, and `ExecutionOptions`.
|
|
6
7
|
|
|
8
|
+
Documentation site: [https://clemsgrs.github.io/slide2vec/](https://clemsgrs.github.io/slide2vec/)
|
|
9
|
+
|
|
7
10
|
## Installation
|
|
8
11
|
|
|
9
12
|
```shell
|
|
@@ -21,6 +24,8 @@ pip install git+https://github.com/Mahmoodlab/CONCH.git
|
|
|
21
24
|
pip install git+https://github.com/prov-gigapath/prov-gigapath.git
|
|
22
25
|
```
|
|
23
26
|
|
|
27
|
+
AtlasPatch-backed tissue segmentation is available through hs2p's `sam2` path in the bundled install.
|
|
28
|
+
|
|
24
29
|
## Python API
|
|
25
30
|
|
|
26
31
|
```python
|
|
@@ -37,6 +42,17 @@ x = embedded.x
|
|
|
37
42
|
y = embedded.y
|
|
38
43
|
```
|
|
39
44
|
|
|
45
|
+
Use `list_models()` when you want to inspect the shipped presets programmatically:
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from slide2vec import list_models
|
|
49
|
+
|
|
50
|
+
all_models = list_models()
|
|
51
|
+
tile_models = list_models("tile")
|
|
52
|
+
slide_models = list_models("slide")
|
|
53
|
+
patient_models = list_models("patient")
|
|
54
|
+
```
|
|
55
|
+
|
|
40
56
|
Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
|
|
41
57
|
|
|
42
58
|
```python
|
|
@@ -112,7 +128,7 @@ The CLI is a thin wrapper over the package API.
|
|
|
112
128
|
Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
|
|
113
129
|
|
|
114
130
|
```shell
|
|
115
|
-
|
|
131
|
+
slide2vec /path/to/config.yaml
|
|
116
132
|
```
|
|
117
133
|
|
|
118
134
|
By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
|
|
@@ -135,7 +151,8 @@ docker run --rm -it \
|
|
|
135
151
|
|
|
136
152
|
## Documentation
|
|
137
153
|
|
|
138
|
-
- [
|
|
154
|
+
- [Documentation website](https://clemsgrs.github.io/slide2vec/) for the polished docs site
|
|
139
155
|
- [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
|
|
140
|
-
- [`
|
|
156
|
+
- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
|
|
141
157
|
- [`docs/models.md`](docs/models.md) for the full supported-model catalog
|
|
158
|
+
- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "4.
|
|
7
|
+
version = "4.3.0"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -21,7 +21,7 @@ classifiers = [
|
|
|
21
21
|
"Programming Language :: Python :: 3.13",
|
|
22
22
|
]
|
|
23
23
|
dependencies = [
|
|
24
|
-
"hs2p[asap,cucim,openslide,vips]>=
|
|
24
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0",
|
|
25
25
|
"omegaconf",
|
|
26
26
|
"matplotlib",
|
|
27
27
|
"numpy<2",
|
|
@@ -42,7 +42,7 @@ Homepage = "https://github.com/clemsgrs/slide2vec"
|
|
|
42
42
|
"Bug Tracker" = "https://github.com/clemsgrs/slide2vec/issues"
|
|
43
43
|
|
|
44
44
|
[project.scripts]
|
|
45
|
-
slide2vec = "slide2vec.cli:
|
|
45
|
+
slide2vec = "slide2vec.cli:entrypoint"
|
|
46
46
|
|
|
47
47
|
[project.optional-dependencies]
|
|
48
48
|
hoptimus = [
|
|
@@ -71,6 +71,9 @@ hibou = [
|
|
|
71
71
|
"scipy~=1.8.1",
|
|
72
72
|
"scikit-image~=0.19.3",
|
|
73
73
|
]
|
|
74
|
+
moozy = [
|
|
75
|
+
"huggingface_hub>=0.30.0,<1.0",
|
|
76
|
+
]
|
|
74
77
|
titan = [
|
|
75
78
|
"torch==2.0.1",
|
|
76
79
|
"timm==1.0.3",
|
|
@@ -85,7 +88,7 @@ fm = [
|
|
|
85
88
|
"pandas",
|
|
86
89
|
"pillow",
|
|
87
90
|
"rich",
|
|
88
|
-
"hs2p[asap,cucim,openslide,vips]>=
|
|
91
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0",
|
|
89
92
|
"wandb",
|
|
90
93
|
"torch>=2.3,<2.8",
|
|
91
94
|
"torchvision>=0.18.0",
|
|
@@ -110,6 +113,13 @@ fm = [
|
|
|
110
113
|
"ninja==1.11.1.1",
|
|
111
114
|
"psutil<6",
|
|
112
115
|
]
|
|
116
|
+
docs = [
|
|
117
|
+
"sphinx>=8.1",
|
|
118
|
+
"furo",
|
|
119
|
+
"myst-parser",
|
|
120
|
+
"sphinx-copybutton",
|
|
121
|
+
"sphinx-autodoc-typehints",
|
|
122
|
+
]
|
|
113
123
|
testing = [
|
|
114
124
|
"pytest>=6.0",
|
|
115
125
|
"pytest-cov>=2.0",
|
|
@@ -154,7 +164,7 @@ no_implicit_reexport = true
|
|
|
154
164
|
max-line-length = 160
|
|
155
165
|
|
|
156
166
|
[tool.bumpver]
|
|
157
|
-
current_version = "4.
|
|
167
|
+
current_version = "4.3.0"
|
|
158
168
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
159
169
|
commit = false # We do version bumping in CI, not as a commit
|
|
160
170
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -1,11 +1,20 @@
|
|
|
1
|
-
from slide2vec.api import
|
|
1
|
+
from slide2vec.api import (
|
|
2
|
+
EmbeddedSlide,
|
|
3
|
+
ExecutionOptions,
|
|
4
|
+
Model,
|
|
5
|
+
Pipeline,
|
|
6
|
+
PreprocessingConfig,
|
|
7
|
+
RunResult,
|
|
8
|
+
list_models,
|
|
9
|
+
)
|
|
2
10
|
from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
|
|
3
11
|
|
|
4
12
|
|
|
5
|
-
__version__ = "4.
|
|
13
|
+
__version__ = "4.3.0"
|
|
6
14
|
|
|
7
15
|
__all__ = [
|
|
8
16
|
"Model",
|
|
17
|
+
"list_models",
|
|
9
18
|
"Pipeline",
|
|
10
19
|
"PreprocessingConfig",
|
|
11
20
|
"ExecutionOptions",
|
|
@@ -11,6 +11,7 @@ from hs2p import SlideSpec
|
|
|
11
11
|
|
|
12
12
|
from slide2vec.artifacts import (
|
|
13
13
|
HierarchicalEmbeddingArtifact,
|
|
14
|
+
PatientEmbeddingArtifact,
|
|
14
15
|
SlideEmbeddingArtifact,
|
|
15
16
|
TileEmbeddingArtifact,
|
|
16
17
|
)
|
|
@@ -19,9 +20,9 @@ from slide2vec.encoders.registry import (
|
|
|
19
20
|
resolve_preprocessing_defaults,
|
|
20
21
|
)
|
|
21
22
|
from slide2vec.encoders.validation import validate_encoder_config
|
|
22
|
-
from slide2vec.model_settings import canonicalize_model_name, normalize_precision_name
|
|
23
|
+
from slide2vec.runtime.model_settings import canonicalize_model_name, normalize_precision_name
|
|
23
24
|
from slide2vec.progress import emit_progress
|
|
24
|
-
from slide2vec.
|
|
25
|
+
from slide2vec.runtime.types import LoadedModel
|
|
25
26
|
from slide2vec.utils.utils import cpu_worker_limit, slurm_cpu_limit
|
|
26
27
|
|
|
27
28
|
PathLike = str | Path
|
|
@@ -71,8 +72,17 @@ class PreprocessingConfig:
|
|
|
71
72
|
gpu_decode = bool(tiling.gpu_decode)
|
|
72
73
|
adaptive_batching = bool(tiling.adaptive_batching)
|
|
73
74
|
preview_cfg = tiling.preview
|
|
74
|
-
preview_save = bool(preview_cfg.
|
|
75
|
-
|
|
75
|
+
preview_save = bool(preview_cfg.save_mask_preview)
|
|
76
|
+
preview_tiling_save = bool(preview_cfg.save_tiling_preview)
|
|
77
|
+
preview_kwargs: dict[str, Any] = {
|
|
78
|
+
"save_mask_preview": preview_save,
|
|
79
|
+
"save_tiling_preview": preview_tiling_save,
|
|
80
|
+
"downsample": int(preview_cfg.downsample),
|
|
81
|
+
}
|
|
82
|
+
preview_kwargs["tissue_contour_color"] = tuple(
|
|
83
|
+
int(channel) for channel in preview_cfg.tissue_contour_color
|
|
84
|
+
)
|
|
85
|
+
preview_kwargs["mask_overlay_alpha"] = float(preview_cfg.mask_overlay_alpha)
|
|
76
86
|
return cls(
|
|
77
87
|
backend=tiling.backend,
|
|
78
88
|
requested_spacing_um=float(tiling.params.requested_spacing_um),
|
|
@@ -103,11 +113,7 @@ class PreprocessingConfig:
|
|
|
103
113
|
resume=bool(cfg.resume),
|
|
104
114
|
segmentation=dict(tiling.seg_params),
|
|
105
115
|
filtering=dict(tiling.filter_params),
|
|
106
|
-
preview=
|
|
107
|
-
"save_mask_preview": preview_save,
|
|
108
|
-
"save_tiling_preview": preview_save,
|
|
109
|
-
"downsample": preview_downsample,
|
|
110
|
-
},
|
|
116
|
+
preview=preview_kwargs,
|
|
111
117
|
)
|
|
112
118
|
|
|
113
119
|
def with_backend(self, backend: str) -> "PreprocessingConfig":
|
|
@@ -127,6 +133,7 @@ class ExecutionOptions:
|
|
|
127
133
|
prefetch_factor: int = 4
|
|
128
134
|
persistent_workers: bool = True
|
|
129
135
|
save_tile_embeddings: bool = False
|
|
136
|
+
save_slide_embeddings: bool = False
|
|
130
137
|
save_latents: bool = False
|
|
131
138
|
|
|
132
139
|
@classmethod
|
|
@@ -151,6 +158,7 @@ class ExecutionOptions:
|
|
|
151
158
|
prefetch_factor=prefetch_factor,
|
|
152
159
|
persistent_workers=persistent_workers,
|
|
153
160
|
save_tile_embeddings=bool(cfg.model.save_tile_embeddings),
|
|
161
|
+
save_slide_embeddings=bool(cfg.model.save_slide_embeddings),
|
|
154
162
|
save_latents=bool(cfg.model.save_latents),
|
|
155
163
|
)
|
|
156
164
|
|
|
@@ -200,9 +208,17 @@ class RunResult:
|
|
|
200
208
|
tile_artifacts: list[TileEmbeddingArtifact]
|
|
201
209
|
hierarchical_artifacts: list[HierarchicalEmbeddingArtifact]
|
|
202
210
|
slide_artifacts: list[SlideEmbeddingArtifact]
|
|
211
|
+
patient_artifacts: list[PatientEmbeddingArtifact] = field(default_factory=list)
|
|
203
212
|
process_list_path: Path | None = None
|
|
204
213
|
|
|
205
214
|
|
|
215
|
+
@dataclass(frozen=True, kw_only=True)
|
|
216
|
+
class EmbeddedPatient:
|
|
217
|
+
patient_id: str
|
|
218
|
+
patient_embedding: Any # torch.Tensor [D]
|
|
219
|
+
slide_embeddings: dict[str, Any] # {sample_id: torch.Tensor [D]}
|
|
220
|
+
|
|
221
|
+
|
|
206
222
|
@dataclass(frozen=True, kw_only=True)
|
|
207
223
|
class EmbeddedSlide:
|
|
208
224
|
sample_id: str
|
|
@@ -343,6 +359,82 @@ class Model:
|
|
|
343
359
|
execution=resolved,
|
|
344
360
|
)
|
|
345
361
|
|
|
362
|
+
def embed_patient(
|
|
363
|
+
self,
|
|
364
|
+
slides: SlideSequence,
|
|
365
|
+
patient_id: str | None = None,
|
|
366
|
+
*,
|
|
367
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
368
|
+
execution: ExecutionOptions | None = None,
|
|
369
|
+
) -> "EmbeddedPatient":
|
|
370
|
+
"""Embed a single patient's slides and return one ``EmbeddedPatient``.
|
|
371
|
+
|
|
372
|
+
Convenience wrapper around :meth:`embed_patients` for the common case
|
|
373
|
+
where all *slides* belong to the same patient.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
slides: All slides for this patient.
|
|
377
|
+
patient_id: Optional patient identifier applied to every slide.
|
|
378
|
+
When omitted, ``patient_id`` is read from slide dict keys or
|
|
379
|
+
object attributes; slides that carry no ``patient_id`` fall
|
|
380
|
+
back to ``sample_id``.
|
|
381
|
+
"""
|
|
382
|
+
patient_id_map: dict | None = None
|
|
383
|
+
if patient_id is not None:
|
|
384
|
+
patient_id_map = {}
|
|
385
|
+
for s in slides:
|
|
386
|
+
if isinstance(s, (str, Path)):
|
|
387
|
+
patient_id_map[Path(s).stem] = patient_id
|
|
388
|
+
elif isinstance(s, dict):
|
|
389
|
+
patient_id_map[str(s["sample_id"])] = patient_id
|
|
390
|
+
else:
|
|
391
|
+
patient_id_map[str(s.sample_id)] = patient_id
|
|
392
|
+
return self.embed_patients(
|
|
393
|
+
slides,
|
|
394
|
+
patient_id_map=patient_id_map,
|
|
395
|
+
preprocessing=preprocessing,
|
|
396
|
+
execution=execution,
|
|
397
|
+
)[0]
|
|
398
|
+
|
|
399
|
+
def embed_patients(
|
|
400
|
+
self,
|
|
401
|
+
slides: SlideSequence,
|
|
402
|
+
patient_id_map: dict | None = None,
|
|
403
|
+
*,
|
|
404
|
+
preprocessing: PreprocessingConfig | None = None,
|
|
405
|
+
execution: ExecutionOptions | None = None,
|
|
406
|
+
) -> "list[EmbeddedPatient]":
|
|
407
|
+
"""Embed slides and aggregate them into patient-level embeddings.
|
|
408
|
+
|
|
409
|
+
Requires a patient-level model (e.g. ``moozy``). For each patient
|
|
410
|
+
all contributing slide embeddings are aggregated by the model's
|
|
411
|
+
``encode_patient`` method.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
slides: Slides to process. Each entry may be a path, a
|
|
415
|
+
``SlideSpec``, or a dict with ``sample_id`` / ``image_path``
|
|
416
|
+
keys. When *patient_id_map* is ``None`` a ``patient_id``
|
|
417
|
+
key in each dict is used to group slides.
|
|
418
|
+
patient_id_map: Optional explicit ``{sample_id: patient_id}``
|
|
419
|
+
mapping. When provided it takes precedence over any
|
|
420
|
+
``patient_id`` key embedded in the slide dicts. When
|
|
421
|
+
omitted and the slide dicts carry no ``patient_id``, each
|
|
422
|
+
slide is treated as its own patient.
|
|
423
|
+
"""
|
|
424
|
+
from slide2vec.inference import embed_patients
|
|
425
|
+
|
|
426
|
+
resolved = _coerce_execution_options(execution, model=self)
|
|
427
|
+
resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
|
|
428
|
+
with _auto_progress_reporting(output_dir=resolved.output_dir):
|
|
429
|
+
_validate_model_config(self, resolved_preprocessing, resolved)
|
|
430
|
+
return embed_patients(
|
|
431
|
+
self,
|
|
432
|
+
slides,
|
|
433
|
+
patient_id_map=patient_id_map,
|
|
434
|
+
preprocessing=resolved_preprocessing,
|
|
435
|
+
execution=resolved,
|
|
436
|
+
)
|
|
437
|
+
|
|
346
438
|
def _load_backend(self) -> LoadedModel:
|
|
347
439
|
if self._backend is None:
|
|
348
440
|
from slide2vec.inference import load_model
|
|
@@ -357,6 +449,27 @@ class Model:
|
|
|
357
449
|
return self._backend
|
|
358
450
|
|
|
359
451
|
|
|
452
|
+
def list_models(level: str | None = None) -> list[str]:
|
|
453
|
+
"""Return the available preset model names in a stable order.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
level: Optional model level filter. Supported values are ``"tile"``,
|
|
457
|
+
``"slide"``, and ``"patient"``.
|
|
458
|
+
"""
|
|
459
|
+
if level is None:
|
|
460
|
+
return sorted(encoder_registry.names())
|
|
461
|
+
|
|
462
|
+
normalized_level = str(level).strip().lower()
|
|
463
|
+
if normalized_level not in {"tile", "slide", "patient"}:
|
|
464
|
+
raise ValueError("list_models(level=...) must be one of: tile, slide, patient")
|
|
465
|
+
|
|
466
|
+
return sorted(
|
|
467
|
+
name
|
|
468
|
+
for name in encoder_registry.names()
|
|
469
|
+
if encoder_registry.info(name)["level"] == normalized_level
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
|
|
360
473
|
class Pipeline:
|
|
361
474
|
def __init__(
|
|
362
475
|
self,
|
|
@@ -35,6 +35,20 @@ class SlideEmbeddingArtifact:
|
|
|
35
35
|
return load_metadata(self.metadata_path)
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
@dataclass(frozen=True, kw_only=True)
|
|
39
|
+
class PatientEmbeddingArtifact:
|
|
40
|
+
patient_id: str
|
|
41
|
+
path: Path
|
|
42
|
+
metadata_path: Path
|
|
43
|
+
format: str
|
|
44
|
+
feature_dim: int
|
|
45
|
+
num_slides: int
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def metadata(self) -> dict[str, Any]:
|
|
49
|
+
return load_metadata(self.metadata_path)
|
|
50
|
+
|
|
51
|
+
|
|
38
52
|
@dataclass(frozen=True, kw_only=True)
|
|
39
53
|
class HierarchicalEmbeddingArtifact:
|
|
40
54
|
sample_id: str
|
|
@@ -223,6 +237,45 @@ def write_slide_embeddings(
|
|
|
223
237
|
)
|
|
224
238
|
|
|
225
239
|
|
|
240
|
+
def write_patient_embeddings(
|
|
241
|
+
patient_id: str,
|
|
242
|
+
embedding,
|
|
243
|
+
*,
|
|
244
|
+
output_dir: str | Path,
|
|
245
|
+
output_format: str = "pt",
|
|
246
|
+
metadata: dict[str, Any] | None = None,
|
|
247
|
+
num_slides: int = 0,
|
|
248
|
+
) -> PatientEmbeddingArtifact:
|
|
249
|
+
output_format = _validate_output_format(output_format)
|
|
250
|
+
artifact_path, metadata_path = _setup_artifact_paths(
|
|
251
|
+
output_dir, "patient_embeddings", patient_id, output_format
|
|
252
|
+
)
|
|
253
|
+
embedding_array = _ensure_array(embedding)
|
|
254
|
+
if output_format == "pt":
|
|
255
|
+
torch.save(_ensure_tensor(embedding), artifact_path)
|
|
256
|
+
else:
|
|
257
|
+
np.savez_compressed(artifact_path, features=embedding_array)
|
|
258
|
+
|
|
259
|
+
patient_metadata = {
|
|
260
|
+
"patient_id": patient_id,
|
|
261
|
+
"artifact_type": "patient_embeddings",
|
|
262
|
+
"format": output_format,
|
|
263
|
+
"feature_dim": int(embedding_array.shape[-1]) if embedding_array.ndim else 1,
|
|
264
|
+
"num_slides": num_slides,
|
|
265
|
+
}
|
|
266
|
+
if metadata:
|
|
267
|
+
patient_metadata.update(metadata)
|
|
268
|
+
_write_metadata(metadata_path, patient_metadata)
|
|
269
|
+
return PatientEmbeddingArtifact(
|
|
270
|
+
patient_id=patient_id,
|
|
271
|
+
path=artifact_path,
|
|
272
|
+
metadata_path=metadata_path,
|
|
273
|
+
format=output_format,
|
|
274
|
+
feature_dim=patient_metadata["feature_dim"],
|
|
275
|
+
num_slides=num_slides,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
226
279
|
def write_hierarchical_embeddings(
|
|
227
280
|
sample_id: str,
|
|
228
281
|
features,
|
|
@@ -7,20 +7,21 @@ import slide2vec.progress as progress
|
|
|
7
7
|
|
|
8
8
|
def get_args_parser(add_help: bool = True):
|
|
9
9
|
parser = argparse.ArgumentParser("slide2vec", add_help=add_help)
|
|
10
|
-
parser.add_argument("
|
|
10
|
+
parser.add_argument("config_file", metavar="CONFIG", help="path to config file")
|
|
11
11
|
parser.add_argument("--skip-datetime", action="store_true", help="skip run id datetime prefix")
|
|
12
12
|
parser.add_argument("--tiling-only", action="store_true", help="only run slide tiling")
|
|
13
13
|
parser.add_argument("--run-on-cpu", action="store_true", help="run inference on cpu")
|
|
14
14
|
parser.add_argument("--output-dir", type=str, default=None, help="output directory to save artifacts")
|
|
15
|
-
parser.add_argument(
|
|
16
|
-
"opts",
|
|
17
|
-
help='Modify config options at the end of the command using "path.key=value".',
|
|
18
|
-
default=None,
|
|
19
|
-
nargs=argparse.REMAINDER,
|
|
20
|
-
)
|
|
21
15
|
return parser
|
|
22
16
|
|
|
23
17
|
|
|
18
|
+
def parse_args(argv=None):
|
|
19
|
+
parser = get_args_parser(add_help=True)
|
|
20
|
+
args, opts = parser.parse_known_args(argv)
|
|
21
|
+
args.opts = opts
|
|
22
|
+
return args
|
|
23
|
+
|
|
24
|
+
|
|
24
25
|
def build_model_and_pipeline(args):
|
|
25
26
|
cfg, _cfg_path = setup(args)
|
|
26
27
|
hf_login()
|
|
@@ -39,8 +40,7 @@ def build_model_and_pipeline(args):
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
def main(argv=None):
|
|
42
|
-
|
|
43
|
-
args = parser.parse_args(argv)
|
|
43
|
+
args = parse_args(argv)
|
|
44
44
|
pipeline, cfg = build_model_and_pipeline(args)
|
|
45
45
|
reporter = progress.create_cli_progress_reporter(output_dir=getattr(cfg, "output_dir", None))
|
|
46
46
|
with progress.activate_progress_reporter(reporter):
|
|
@@ -50,3 +50,6 @@ def main(argv=None):
|
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
|
|
53
|
+
def entrypoint(argv=None):
|
|
54
|
+
main(argv)
|
|
55
|
+
return 0
|
|
@@ -13,6 +13,7 @@ model:
|
|
|
13
13
|
output_variant: # requested output variant for presets that expose multiple outputs
|
|
14
14
|
batch_size: 32
|
|
15
15
|
save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
|
|
16
|
+
save_slide_embeddings: false # whether to save per-slide embeddings when level is "patient" (e.g. moozy); requires a 'patient_id' column in the input CSV
|
|
16
17
|
save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
|
|
17
18
|
allow_non_recommended_settings: false # when true, non-recommended spacing / tile size / precision combinations warn instead of erroring
|
|
18
19
|
|
|
@@ -37,12 +38,14 @@ tiling:
|
|
|
37
38
|
# downsample controls which pyramid level is read for tissue segmentation.
|
|
38
39
|
# Larger values are faster and use less memory; smaller values can improve mask precision.
|
|
39
40
|
downsample: 64 # find the closest downsample in the slide for tissue segmentation
|
|
40
|
-
sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when
|
|
41
|
+
sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when method="otsu")
|
|
41
42
|
sthresh_up: 255 # upper threshold value for scaling the binary mask
|
|
42
43
|
mthresh: 7 # median filter size (positive, odd integer)
|
|
43
44
|
close: 4 # additional morphological closing to apply following initial thresholding (positive integer)
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
method: "hsv" # tissue segmentation method: "hsv", "otsu", "threshold", or "sam2"
|
|
46
|
+
sam2_checkpoint_path: # optional when method="sam2"; if empty, hs2p downloads the default AtlasPatch checkpoint from Hugging Face
|
|
47
|
+
sam2_config_path: # optional local override for the SAM2 model config; if empty, hs2p downloads the default AtlasPatch config from Hugging Face
|
|
48
|
+
sam2_device: "cpu" # device for SAM2 inference, e.g. "cpu", "cuda", or "cuda:0"
|
|
46
49
|
filter_params:
|
|
47
50
|
ref_tile_size: ${tiling.params.requested_tile_size_px} # reference tile size at the target spacing
|
|
48
51
|
a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.requested_spacing_um will be kept)
|
|
@@ -59,9 +62,10 @@ tiling:
|
|
|
59
62
|
blur_threshold: 50.0 # minimum blur score (higher is sharper)
|
|
60
63
|
qc_spacing_um: 2.0 # spacing at which pixel-based QC is evaluated
|
|
61
64
|
preview:
|
|
62
|
-
|
|
65
|
+
save_mask_preview: true # save preview images of mask overlays
|
|
66
|
+
save_tiling_preview: true # save preview images of tile layouts
|
|
63
67
|
downsample: 32 # downsample to use for preview rendering
|
|
64
|
-
|
|
68
|
+
tissue_contour_color: [157, 219, 129] # RGB color used for tissue contours in batch mask previews
|
|
65
69
|
mask_overlay_alpha: 0.5 # alpha used for tissue overlays in batch mask previews
|
|
66
70
|
|
|
67
71
|
speed:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
+
from contextlib import contextmanager
|
|
1
2
|
from importlib.resources import as_file, files
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Iterator
|
|
4
|
-
from contextlib import contextmanager
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def config_resource(*parts: str):
|
|
@@ -24,3 +24,4 @@ def config_path(*parts: str) -> Iterator[Path]:
|
|
|
24
24
|
resource = config_resource(*parts)
|
|
25
25
|
with as_file(resource) as resolved:
|
|
26
26
|
yield resolved
|
|
27
|
+
|
|
@@ -26,11 +26,10 @@ def main(argv=None) -> int:
|
|
|
26
26
|
_compute_tile_embeddings_for_slide,
|
|
27
27
|
_is_hierarchical_preprocessing,
|
|
28
28
|
_resolve_hierarchical_geometry,
|
|
29
|
-
deserialize_execution,
|
|
30
|
-
deserialize_preprocessing,
|
|
31
29
|
load_successful_tiled_slides,
|
|
32
30
|
)
|
|
33
31
|
from slide2vec.progress import JsonlProgressReporter, activate_progress_reporter
|
|
32
|
+
from slide2vec.runtime.serialization import deserialize_execution, deserialize_preprocessing
|
|
34
33
|
|
|
35
34
|
parser = get_args_parser(add_help=True)
|
|
36
35
|
args = parser.parse_args(argv)
|
|
@@ -49,6 +48,7 @@ def main(argv=None) -> int:
|
|
|
49
48
|
model_spec["name"],
|
|
50
49
|
device=f"cuda:{local_rank}",
|
|
51
50
|
output_variant=model_spec.get("output_variant"),
|
|
51
|
+
allow_non_recommended_settings=bool(model_spec["allow_non_recommended_settings"]),
|
|
52
52
|
)
|
|
53
53
|
preprocessing = deserialize_preprocessing(request["preprocessing"])
|
|
54
54
|
execution = deserialize_execution(request["execution"])
|