slide2vec 4.1.1__tar.gz → 4.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {slide2vec-4.1.1 → slide2vec-4.3.0}/PKG-INFO +31 -6
  2. {slide2vec-4.1.1 → slide2vec-4.3.0}/README.md +20 -3
  3. {slide2vec-4.1.1 → slide2vec-4.3.0}/pyproject.toml +15 -5
  4. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/__init__.py +11 -2
  5. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/api.py +122 -9
  6. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/artifacts.py +53 -0
  7. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/cli.py +12 -9
  8. slide2vec-4.3.0/slide2vec/configs/__init__.py +4 -0
  9. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/configs/default.yaml +9 -5
  10. {slide2vec-4.1.1/slide2vec → slide2vec-4.3.0/slide2vec/configs}/resources.py +2 -1
  11. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/distributed/direct_embed_worker.py +2 -2
  12. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/distributed/pipeline_worker.py +4 -4
  13. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/__init__.py +2 -0
  14. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/base.py +27 -0
  15. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/__init__.py +4 -0
  16. slide2vec-4.3.0/slide2vec/encoders/models/lunit.py +21 -0
  17. slide2vec-4.3.0/slide2vec/encoders/models/moozy/__init__.py +114 -0
  18. slide2vec-4.3.0/slide2vec/encoders/models/moozy/blocks.py +272 -0
  19. slide2vec-4.3.0/slide2vec/encoders/models/moozy/case.py +91 -0
  20. slide2vec-4.3.0/slide2vec/encoders/models/moozy/loading.py +103 -0
  21. slide2vec-4.3.0/slide2vec/encoders/models/moozy/slide.py +152 -0
  22. slide2vec-4.3.0/slide2vec/encoders/models/moozy/types.py +13 -0
  23. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/registry.py +7 -6
  24. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/validation.py +1 -1
  25. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/inference.py +531 -1296
  26. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/progress.py +135 -5
  27. slide2vec-4.3.0/slide2vec/runtime/__init__.py +2 -0
  28. slide2vec-4.3.0/slide2vec/runtime/batching.py +441 -0
  29. slide2vec-4.3.0/slide2vec/runtime/distributed.py +194 -0
  30. slide2vec-4.3.0/slide2vec/runtime/embedding.py +157 -0
  31. slide2vec-4.3.0/slide2vec/runtime/hierarchical.py +105 -0
  32. {slide2vec-4.1.1/slide2vec → slide2vec-4.3.0/slide2vec/runtime}/model_settings.py +1 -0
  33. slide2vec-4.3.0/slide2vec/runtime/persistence.py +165 -0
  34. slide2vec-4.3.0/slide2vec/runtime/progress_bridge.py +52 -0
  35. {slide2vec-4.1.1/slide2vec → slide2vec-4.3.0/slide2vec/runtime}/registry.py +1 -0
  36. slide2vec-4.3.0/slide2vec/runtime/serialization.py +119 -0
  37. slide2vec-4.3.0/slide2vec/runtime/tiling.py +97 -0
  38. slide2vec-4.3.0/slide2vec/runtime/types.py +48 -0
  39. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/config.py +11 -6
  40. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/tiling_io.py +21 -0
  41. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/PKG-INFO +31 -6
  42. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/SOURCES.txt +21 -7
  43. slide2vec-4.3.0/slide2vec.egg-info/entry_points.txt +2 -0
  44. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/requires.txt +12 -2
  45. slide2vec-4.3.0/tests/test_architecture_runtime_split.py +60 -0
  46. {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_hs2p_package_cutover.py +6 -4
  47. {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_progress.py +416 -8
  48. {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_regression_core.py +125 -8
  49. {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_regression_inference.py +342 -104
  50. {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_regression_models.py +1 -1
  51. slide2vec-4.1.1/slide2vec/configs/__init__.py +0 -4
  52. slide2vec-4.1.1/slide2vec/runtime_types.py +0 -14
  53. slide2vec-4.1.1/slide2vec.egg-info/entry_points.txt +0 -2
  54. slide2vec-4.1.1/tests/test_batch_collator_timing.py +0 -161
  55. slide2vec-4.1.1/tests/test_output_consistency.py +0 -186
  56. slide2vec-4.1.1/tests/test_packaging_metadata.py +0 -23
  57. {slide2vec-4.1.1 → slide2vec-4.3.0}/LICENSE +0 -0
  58. {slide2vec-4.1.1 → slide2vec-4.3.0}/setup.cfg +0 -0
  59. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/__main__.py +0 -0
  60. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/data/__init__.py +0 -0
  61. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/data/dataset.py +0 -0
  62. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/data/tile_reader.py +0 -0
  63. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/data/tile_store.py +0 -0
  64. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/distributed/__init__.py +0 -0
  65. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/conch.py +0 -0
  66. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/gigapath.py +0 -0
  67. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/hibou.py +0 -0
  68. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/hoptimus.py +0 -0
  69. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/midnight.py +0 -0
  70. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/musk.py +0 -0
  71. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/phikon.py +0 -0
  72. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/prism.py +0 -0
  73. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/prost40m.py +0 -0
  74. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/titan.py +0 -0
  75. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/uni.py +0 -0
  76. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/encoders/models/virchow.py +0 -0
  77. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/main.py +0 -0
  78. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/__init__.py +0 -0
  79. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/coordinates.py +0 -0
  80. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/log_utils.py +0 -0
  81. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec/utils/utils.py +0 -0
  82. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/dependency_links.txt +0 -0
  83. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/not-zip-safe +0 -0
  84. {slide2vec-4.1.1 → slide2vec-4.3.0}/slide2vec.egg-info/top_level.txt +0 -0
  85. {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_encoder_registry.py +0 -0
  86. {slide2vec-4.1.1 → slide2vec-4.3.0}/tests/test_tile_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.1.1
3
+ Version: 4.3.0
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.0
18
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0
19
19
  Requires-Dist: omegaconf
20
20
  Requires-Dist: matplotlib
21
21
  Requires-Dist: numpy<2
@@ -50,6 +50,8 @@ Requires-Dist: xformers==0.0.31; extra == "prism"
50
50
  Provides-Extra: hibou
51
51
  Requires-Dist: scipy~=1.8.1; extra == "hibou"
52
52
  Requires-Dist: scikit-image~=0.19.3; extra == "hibou"
53
+ Provides-Extra: moozy
54
+ Requires-Dist: huggingface_hub<1.0,>=0.30.0; extra == "moozy"
53
55
  Provides-Extra: titan
54
56
  Requires-Dist: torch==2.0.1; extra == "titan"
55
57
  Requires-Dist: timm==1.0.3; extra == "titan"
@@ -63,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
63
65
  Requires-Dist: pandas; extra == "fm"
64
66
  Requires-Dist: pillow; extra == "fm"
65
67
  Requires-Dist: rich; extra == "fm"
66
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.0; extra == "fm"
68
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0; extra == "fm"
67
69
  Requires-Dist: wandb; extra == "fm"
68
70
  Requires-Dist: torch<2.8,>=2.3; extra == "fm"
69
71
  Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -87,6 +89,12 @@ Requires-Dist: fairscale; extra == "fm"
87
89
  Requires-Dist: packaging==23.2; extra == "fm"
88
90
  Requires-Dist: ninja==1.11.1.1; extra == "fm"
89
91
  Requires-Dist: psutil<6; extra == "fm"
92
+ Provides-Extra: docs
93
+ Requires-Dist: sphinx>=8.1; extra == "docs"
94
+ Requires-Dist: furo; extra == "docs"
95
+ Requires-Dist: myst-parser; extra == "docs"
96
+ Requires-Dist: sphinx-copybutton; extra == "docs"
97
+ Requires-Dist: sphinx-autodoc-typehints; extra == "docs"
90
98
  Provides-Extra: testing
91
99
  Requires-Dist: pytest>=6.0; extra == "testing"
92
100
  Requires-Dist: pytest-cov>=2.0; extra == "testing"
@@ -99,9 +107,12 @@ Dynamic: license-file
99
107
  # slide2vec
100
108
 
101
109
  [![PyPI version](https://img.shields.io/pypi/v/slide2vec?label=pypi&logo=pypi&color=3776AB)](https://pypi.org/project/slide2vec/)
110
+ [![Docs](https://img.shields.io/badge/docs-website-blue)](https://clemsgrs.github.io/slide2vec/)
102
111
 
103
112
  `slide2vec` is a Python package for efficient encoding of whole-slide images using publicly available foundation models. It builds on [`hs2p`](https://pypi.org/project/hs2p/) for fast preprocessing and exposes a focused surface around `Model`, `Pipeline`, and `ExecutionOptions`.
104
113
 
114
+ Documentation site: [https://clemsgrs.github.io/slide2vec/](https://clemsgrs.github.io/slide2vec/)
115
+
105
116
  ## Installation
106
117
 
107
118
  ```shell
@@ -119,6 +130,8 @@ pip install git+https://github.com/Mahmoodlab/CONCH.git
119
130
  pip install git+https://github.com/prov-gigapath/prov-gigapath.git
120
131
  ```
121
132
 
133
+ AtlasPatch-backed tissue segmentation is available through hs2p's `sam2` path in the bundled install.
134
+
122
135
  ## Python API
123
136
 
124
137
  ```python
@@ -135,6 +148,17 @@ x = embedded.x
135
148
  y = embedded.y
136
149
  ```
137
150
 
151
+ Use `list_models()` when you want to inspect the shipped presets programmatically:
152
+
153
+ ```python
154
+ from slide2vec import list_models
155
+
156
+ all_models = list_models()
157
+ tile_models = list_models("tile")
158
+ slide_models = list_models("slide")
159
+ patient_models = list_models("patient")
160
+ ```
161
+
138
162
  Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
139
163
 
140
164
  ```python
@@ -210,7 +234,7 @@ The CLI is a thin wrapper over the package API.
210
234
  Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
211
235
 
212
236
  ```shell
213
- python -m slide2vec --config-file /path/to/config.yaml
237
+ slide2vec /path/to/config.yaml
214
238
  ```
215
239
 
216
240
  By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
@@ -233,7 +257,8 @@ docker run --rm -it \
233
257
 
234
258
  ## Documentation
235
259
 
236
- - [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
260
+ - [Documentation website](https://clemsgrs.github.io/slide2vec/) for the polished docs site
237
261
  - [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
238
- - [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
262
+ - [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
239
263
  - [`docs/models.md`](docs/models.md) for the full supported-model catalog
264
+ - [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
@@ -1,9 +1,12 @@
1
1
  # slide2vec
2
2
 
3
3
  [![PyPI version](https://img.shields.io/pypi/v/slide2vec?label=pypi&logo=pypi&color=3776AB)](https://pypi.org/project/slide2vec/)
4
+ [![Docs](https://img.shields.io/badge/docs-website-blue)](https://clemsgrs.github.io/slide2vec/)
4
5
 
5
6
  `slide2vec` is a Python package for efficient encoding of whole-slide images using publicly available foundation models. It builds on [`hs2p`](https://pypi.org/project/hs2p/) for fast preprocessing and exposes a focused surface around `Model`, `Pipeline`, and `ExecutionOptions`.
6
7
 
8
+ Documentation site: [https://clemsgrs.github.io/slide2vec/](https://clemsgrs.github.io/slide2vec/)
9
+
7
10
  ## Installation
8
11
 
9
12
  ```shell
@@ -21,6 +24,8 @@ pip install git+https://github.com/Mahmoodlab/CONCH.git
21
24
  pip install git+https://github.com/prov-gigapath/prov-gigapath.git
22
25
  ```
23
26
 
27
+ AtlasPatch-backed tissue segmentation is available through hs2p's `sam2` path in the bundled install.
28
+
24
29
  ## Python API
25
30
 
26
31
  ```python
@@ -37,6 +42,17 @@ x = embedded.x
37
42
  y = embedded.y
38
43
  ```
39
44
 
45
+ Use `list_models()` when you want to inspect the shipped presets programmatically:
46
+
47
+ ```python
48
+ from slide2vec import list_models
49
+
50
+ all_models = list_models()
51
+ tile_models = list_models("tile")
52
+ slide_models = list_models("slide")
53
+ patient_models = list_models("patient")
54
+ ```
55
+
40
56
  Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
41
57
 
42
58
  ```python
@@ -112,7 +128,7 @@ The CLI is a thin wrapper over the package API.
112
128
  Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
113
129
 
114
130
  ```shell
115
- python -m slide2vec --config-file /path/to/config.yaml
131
+ slide2vec /path/to/config.yaml
116
132
  ```
117
133
 
118
134
  By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
@@ -135,7 +151,8 @@ docker run --rm -it \
135
151
 
136
152
  ## Documentation
137
153
 
138
- - [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
154
+ - [Documentation website](https://clemsgrs.github.io/slide2vec/) for the polished docs site
139
155
  - [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
140
- - [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
156
+ - [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
141
157
  - [`docs/models.md`](docs/models.md) for the full supported-model catalog
158
+ - [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "slide2vec"
7
- version = "4.1.1"
7
+ version = "4.3.0"
8
8
  description = "Embedding of whole slide images with Foundation Models"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Programming Language :: Python :: 3.13",
22
22
  ]
23
23
  dependencies = [
24
- "hs2p[asap,cucim,openslide,vips]>=3.2.0",
24
+ "hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0",
25
25
  "omegaconf",
26
26
  "matplotlib",
27
27
  "numpy<2",
@@ -42,7 +42,7 @@ Homepage = "https://github.com/clemsgrs/slide2vec"
42
42
  "Bug Tracker" = "https://github.com/clemsgrs/slide2vec/issues"
43
43
 
44
44
  [project.scripts]
45
- slide2vec = "slide2vec.cli:main"
45
+ slide2vec = "slide2vec.cli:entrypoint"
46
46
 
47
47
  [project.optional-dependencies]
48
48
  hoptimus = [
@@ -71,6 +71,9 @@ hibou = [
71
71
  "scipy~=1.8.1",
72
72
  "scikit-image~=0.19.3",
73
73
  ]
74
+ moozy = [
75
+ "huggingface_hub>=0.30.0,<1.0",
76
+ ]
74
77
  titan = [
75
78
  "torch==2.0.1",
76
79
  "timm==1.0.3",
@@ -85,7 +88,7 @@ fm = [
85
88
  "pandas",
86
89
  "pillow",
87
90
  "rich",
88
- "hs2p[asap,cucim,openslide,vips]>=3.2.0",
91
+ "hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0",
89
92
  "wandb",
90
93
  "torch>=2.3,<2.8",
91
94
  "torchvision>=0.18.0",
@@ -110,6 +113,13 @@ fm = [
110
113
  "ninja==1.11.1.1",
111
114
  "psutil<6",
112
115
  ]
116
+ docs = [
117
+ "sphinx>=8.1",
118
+ "furo",
119
+ "myst-parser",
120
+ "sphinx-copybutton",
121
+ "sphinx-autodoc-typehints",
122
+ ]
113
123
  testing = [
114
124
  "pytest>=6.0",
115
125
  "pytest-cov>=2.0",
@@ -154,7 +164,7 @@ no_implicit_reexport = true
154
164
  max-line-length = 160
155
165
 
156
166
  [tool.bumpver]
157
- current_version = "4.1.1"
167
+ current_version = "4.3.0"
158
168
  version_pattern = "MAJOR.MINOR.PATCH"
159
169
  commit = false # We do version bumping in CI, not as a commit
160
170
  tag = false # Git tag already exists — we don't auto-tag
@@ -1,11 +1,20 @@
1
- from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, PreprocessingConfig, RunResult
1
+ from slide2vec.api import (
2
+ EmbeddedSlide,
3
+ ExecutionOptions,
4
+ Model,
5
+ Pipeline,
6
+ PreprocessingConfig,
7
+ RunResult,
8
+ list_models,
9
+ )
2
10
  from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
3
11
 
4
12
 
5
- __version__ = "4.1.1"
13
+ __version__ = "4.3.0"
6
14
 
7
15
  __all__ = [
8
16
  "Model",
17
+ "list_models",
9
18
  "Pipeline",
10
19
  "PreprocessingConfig",
11
20
  "ExecutionOptions",
@@ -11,6 +11,7 @@ from hs2p import SlideSpec
11
11
 
12
12
  from slide2vec.artifacts import (
13
13
  HierarchicalEmbeddingArtifact,
14
+ PatientEmbeddingArtifact,
14
15
  SlideEmbeddingArtifact,
15
16
  TileEmbeddingArtifact,
16
17
  )
@@ -19,9 +20,9 @@ from slide2vec.encoders.registry import (
19
20
  resolve_preprocessing_defaults,
20
21
  )
21
22
  from slide2vec.encoders.validation import validate_encoder_config
22
- from slide2vec.model_settings import canonicalize_model_name, normalize_precision_name
23
+ from slide2vec.runtime.model_settings import canonicalize_model_name, normalize_precision_name
23
24
  from slide2vec.progress import emit_progress
24
- from slide2vec.runtime_types import LoadedModel
25
+ from slide2vec.runtime.types import LoadedModel
25
26
  from slide2vec.utils.utils import cpu_worker_limit, slurm_cpu_limit
26
27
 
27
28
  PathLike = str | Path
@@ -71,8 +72,17 @@ class PreprocessingConfig:
71
72
  gpu_decode = bool(tiling.gpu_decode)
72
73
  adaptive_batching = bool(tiling.adaptive_batching)
73
74
  preview_cfg = tiling.preview
74
- preview_save = bool(preview_cfg.save)
75
- preview_downsample = int(preview_cfg.downsample)
75
+ preview_save = bool(preview_cfg.save_mask_preview)
76
+ preview_tiling_save = bool(preview_cfg.save_tiling_preview)
77
+ preview_kwargs: dict[str, Any] = {
78
+ "save_mask_preview": preview_save,
79
+ "save_tiling_preview": preview_tiling_save,
80
+ "downsample": int(preview_cfg.downsample),
81
+ }
82
+ preview_kwargs["tissue_contour_color"] = tuple(
83
+ int(channel) for channel in preview_cfg.tissue_contour_color
84
+ )
85
+ preview_kwargs["mask_overlay_alpha"] = float(preview_cfg.mask_overlay_alpha)
76
86
  return cls(
77
87
  backend=tiling.backend,
78
88
  requested_spacing_um=float(tiling.params.requested_spacing_um),
@@ -103,11 +113,7 @@ class PreprocessingConfig:
103
113
  resume=bool(cfg.resume),
104
114
  segmentation=dict(tiling.seg_params),
105
115
  filtering=dict(tiling.filter_params),
106
- preview={
107
- "save_mask_preview": preview_save,
108
- "save_tiling_preview": preview_save,
109
- "downsample": preview_downsample,
110
- },
116
+ preview=preview_kwargs,
111
117
  )
112
118
 
113
119
  def with_backend(self, backend: str) -> "PreprocessingConfig":
@@ -127,6 +133,7 @@ class ExecutionOptions:
127
133
  prefetch_factor: int = 4
128
134
  persistent_workers: bool = True
129
135
  save_tile_embeddings: bool = False
136
+ save_slide_embeddings: bool = False
130
137
  save_latents: bool = False
131
138
 
132
139
  @classmethod
@@ -151,6 +158,7 @@ class ExecutionOptions:
151
158
  prefetch_factor=prefetch_factor,
152
159
  persistent_workers=persistent_workers,
153
160
  save_tile_embeddings=bool(cfg.model.save_tile_embeddings),
161
+ save_slide_embeddings=bool(cfg.model.save_slide_embeddings),
154
162
  save_latents=bool(cfg.model.save_latents),
155
163
  )
156
164
 
@@ -200,9 +208,17 @@ class RunResult:
200
208
  tile_artifacts: list[TileEmbeddingArtifact]
201
209
  hierarchical_artifacts: list[HierarchicalEmbeddingArtifact]
202
210
  slide_artifacts: list[SlideEmbeddingArtifact]
211
+ patient_artifacts: list[PatientEmbeddingArtifact] = field(default_factory=list)
203
212
  process_list_path: Path | None = None
204
213
 
205
214
 
215
+ @dataclass(frozen=True, kw_only=True)
216
+ class EmbeddedPatient:
217
+ patient_id: str
218
+ patient_embedding: Any # torch.Tensor [D]
219
+ slide_embeddings: dict[str, Any] # {sample_id: torch.Tensor [D]}
220
+
221
+
206
222
  @dataclass(frozen=True, kw_only=True)
207
223
  class EmbeddedSlide:
208
224
  sample_id: str
@@ -343,6 +359,82 @@ class Model:
343
359
  execution=resolved,
344
360
  )
345
361
 
362
+ def embed_patient(
363
+ self,
364
+ slides: SlideSequence,
365
+ patient_id: str | None = None,
366
+ *,
367
+ preprocessing: PreprocessingConfig | None = None,
368
+ execution: ExecutionOptions | None = None,
369
+ ) -> "EmbeddedPatient":
370
+ """Embed a single patient's slides and return one ``EmbeddedPatient``.
371
+
372
+ Convenience wrapper around :meth:`embed_patients` for the common case
373
+ where all *slides* belong to the same patient.
374
+
375
+ Args:
376
+ slides: All slides for this patient.
377
+ patient_id: Optional patient identifier applied to every slide.
378
+ When omitted, ``patient_id`` is read from slide dict keys or
379
+ object attributes; slides that carry no ``patient_id`` fall
380
+ back to ``sample_id``.
381
+ """
382
+ patient_id_map: dict | None = None
383
+ if patient_id is not None:
384
+ patient_id_map = {}
385
+ for s in slides:
386
+ if isinstance(s, (str, Path)):
387
+ patient_id_map[Path(s).stem] = patient_id
388
+ elif isinstance(s, dict):
389
+ patient_id_map[str(s["sample_id"])] = patient_id
390
+ else:
391
+ patient_id_map[str(s.sample_id)] = patient_id
392
+ return self.embed_patients(
393
+ slides,
394
+ patient_id_map=patient_id_map,
395
+ preprocessing=preprocessing,
396
+ execution=execution,
397
+ )[0]
398
+
399
+ def embed_patients(
400
+ self,
401
+ slides: SlideSequence,
402
+ patient_id_map: dict | None = None,
403
+ *,
404
+ preprocessing: PreprocessingConfig | None = None,
405
+ execution: ExecutionOptions | None = None,
406
+ ) -> "list[EmbeddedPatient]":
407
+ """Embed slides and aggregate them into patient-level embeddings.
408
+
409
+ Requires a patient-level model (e.g. ``moozy``). For each patient
410
+ all contributing slide embeddings are aggregated by the model's
411
+ ``encode_patient`` method.
412
+
413
+ Args:
414
+ slides: Slides to process. Each entry may be a path, a
415
+ ``SlideSpec``, or a dict with ``sample_id`` / ``image_path``
416
+ keys. When *patient_id_map* is ``None`` a ``patient_id``
417
+ key in each dict is used to group slides.
418
+ patient_id_map: Optional explicit ``{sample_id: patient_id}``
419
+ mapping. When provided it takes precedence over any
420
+ ``patient_id`` key embedded in the slide dicts. When
421
+ omitted and the slide dicts carry no ``patient_id``, each
422
+ slide is treated as its own patient.
423
+ """
424
+ from slide2vec.inference import embed_patients
425
+
426
+ resolved = _coerce_execution_options(execution, model=self)
427
+ resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
428
+ with _auto_progress_reporting(output_dir=resolved.output_dir):
429
+ _validate_model_config(self, resolved_preprocessing, resolved)
430
+ return embed_patients(
431
+ self,
432
+ slides,
433
+ patient_id_map=patient_id_map,
434
+ preprocessing=resolved_preprocessing,
435
+ execution=resolved,
436
+ )
437
+
346
438
  def _load_backend(self) -> LoadedModel:
347
439
  if self._backend is None:
348
440
  from slide2vec.inference import load_model
@@ -357,6 +449,27 @@ class Model:
357
449
  return self._backend
358
450
 
359
451
 
452
+ def list_models(level: str | None = None) -> list[str]:
453
+ """Return the available preset model names in a stable order.
454
+
455
+ Args:
456
+ level: Optional model level filter. Supported values are ``"tile"``,
457
+ ``"slide"``, and ``"patient"``.
458
+ """
459
+ if level is None:
460
+ return sorted(encoder_registry.names())
461
+
462
+ normalized_level = str(level).strip().lower()
463
+ if normalized_level not in {"tile", "slide", "patient"}:
464
+ raise ValueError("list_models(level=...) must be one of: tile, slide, patient")
465
+
466
+ return sorted(
467
+ name
468
+ for name in encoder_registry.names()
469
+ if encoder_registry.info(name)["level"] == normalized_level
470
+ )
471
+
472
+
360
473
  class Pipeline:
361
474
  def __init__(
362
475
  self,
@@ -35,6 +35,20 @@ class SlideEmbeddingArtifact:
35
35
  return load_metadata(self.metadata_path)
36
36
 
37
37
 
38
+ @dataclass(frozen=True, kw_only=True)
39
+ class PatientEmbeddingArtifact:
40
+ patient_id: str
41
+ path: Path
42
+ metadata_path: Path
43
+ format: str
44
+ feature_dim: int
45
+ num_slides: int
46
+
47
+ @property
48
+ def metadata(self) -> dict[str, Any]:
49
+ return load_metadata(self.metadata_path)
50
+
51
+
38
52
  @dataclass(frozen=True, kw_only=True)
39
53
  class HierarchicalEmbeddingArtifact:
40
54
  sample_id: str
@@ -223,6 +237,45 @@ def write_slide_embeddings(
223
237
  )
224
238
 
225
239
 
240
+ def write_patient_embeddings(
241
+ patient_id: str,
242
+ embedding,
243
+ *,
244
+ output_dir: str | Path,
245
+ output_format: str = "pt",
246
+ metadata: dict[str, Any] | None = None,
247
+ num_slides: int = 0,
248
+ ) -> PatientEmbeddingArtifact:
249
+ output_format = _validate_output_format(output_format)
250
+ artifact_path, metadata_path = _setup_artifact_paths(
251
+ output_dir, "patient_embeddings", patient_id, output_format
252
+ )
253
+ embedding_array = _ensure_array(embedding)
254
+ if output_format == "pt":
255
+ torch.save(_ensure_tensor(embedding), artifact_path)
256
+ else:
257
+ np.savez_compressed(artifact_path, features=embedding_array)
258
+
259
+ patient_metadata = {
260
+ "patient_id": patient_id,
261
+ "artifact_type": "patient_embeddings",
262
+ "format": output_format,
263
+ "feature_dim": int(embedding_array.shape[-1]) if embedding_array.ndim else 1,
264
+ "num_slides": num_slides,
265
+ }
266
+ if metadata:
267
+ patient_metadata.update(metadata)
268
+ _write_metadata(metadata_path, patient_metadata)
269
+ return PatientEmbeddingArtifact(
270
+ patient_id=patient_id,
271
+ path=artifact_path,
272
+ metadata_path=metadata_path,
273
+ format=output_format,
274
+ feature_dim=patient_metadata["feature_dim"],
275
+ num_slides=num_slides,
276
+ )
277
+
278
+
226
279
  def write_hierarchical_embeddings(
227
280
  sample_id: str,
228
281
  features,
@@ -7,20 +7,21 @@ import slide2vec.progress as progress
7
7
 
8
8
  def get_args_parser(add_help: bool = True):
9
9
  parser = argparse.ArgumentParser("slide2vec", add_help=add_help)
10
- parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
10
+ parser.add_argument("config_file", metavar="CONFIG", help="path to config file")
11
11
  parser.add_argument("--skip-datetime", action="store_true", help="skip run id datetime prefix")
12
12
  parser.add_argument("--tiling-only", action="store_true", help="only run slide tiling")
13
13
  parser.add_argument("--run-on-cpu", action="store_true", help="run inference on cpu")
14
14
  parser.add_argument("--output-dir", type=str, default=None, help="output directory to save artifacts")
15
- parser.add_argument(
16
- "opts",
17
- help='Modify config options at the end of the command using "path.key=value".',
18
- default=None,
19
- nargs=argparse.REMAINDER,
20
- )
21
15
  return parser
22
16
 
23
17
 
18
+ def parse_args(argv=None):
19
+ parser = get_args_parser(add_help=True)
20
+ args, opts = parser.parse_known_args(argv)
21
+ args.opts = opts
22
+ return args
23
+
24
+
24
25
  def build_model_and_pipeline(args):
25
26
  cfg, _cfg_path = setup(args)
26
27
  hf_login()
@@ -39,8 +40,7 @@ def build_model_and_pipeline(args):
39
40
 
40
41
 
41
42
  def main(argv=None):
42
- parser = get_args_parser(add_help=True)
43
- args = parser.parse_args(argv)
43
+ args = parse_args(argv)
44
44
  pipeline, cfg = build_model_and_pipeline(args)
45
45
  reporter = progress.create_cli_progress_reporter(output_dir=getattr(cfg, "output_dir", None))
46
46
  with progress.activate_progress_reporter(reporter):
@@ -50,3 +50,6 @@ def main(argv=None):
50
50
  )
51
51
 
52
52
 
53
+ def entrypoint(argv=None):
54
+ main(argv)
55
+ return 0
@@ -0,0 +1,4 @@
1
+ from slide2vec.configs.resources import load_config
2
+
3
+
4
+ default_config = load_config("default")
@@ -13,6 +13,7 @@ model:
13
13
  output_variant: # requested output variant for presets that expose multiple outputs
14
14
  batch_size: 32
15
15
  save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
16
+ save_slide_embeddings: false # whether to save per-slide embeddings when level is "patient" (e.g. moozy); requires a 'patient_id' column in the input CSV
16
17
  save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
17
18
  allow_non_recommended_settings: false # when true, non-recommended spacing / tile size / precision combinations warn instead of erroring
18
19
 
@@ -37,12 +38,14 @@ tiling:
37
38
  # downsample controls which pyramid level is read for tissue segmentation.
38
39
  # Larger values are faster and use less memory; smaller values can improve mask precision.
39
40
  downsample: 64 # find the closest downsample in the slide for tissue segmentation
40
- sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when use_otsu=True)
41
+ sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when method="otsu")
41
42
  sthresh_up: 255 # upper threshold value for scaling the binary mask
42
43
  mthresh: 7 # median filter size (positive, odd integer)
43
44
  close: 4 # additional morphological closing to apply following initial thresholding (positive integer)
44
- use_otsu: false # use otsu's method instead of simple binary thresholding
45
- use_hsv: true # use HSV thresholding instead of simple binary thresholding
45
+ method: "hsv" # tissue segmentation method: "hsv", "otsu", "threshold", or "sam2"
46
+ sam2_checkpoint_path: # optional when method="sam2"; if empty, hs2p downloads the default AtlasPatch checkpoint from Hugging Face
47
+ sam2_config_path: # optional local override for the SAM2 model config; if empty, hs2p downloads the default AtlasPatch config from Hugging Face
48
+ sam2_device: "cpu" # device for SAM2 inference, e.g. "cpu", "cuda", or "cuda:0"
46
49
  filter_params:
47
50
  ref_tile_size: ${tiling.params.requested_tile_size_px} # reference tile size at the target spacing
48
51
  a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.requested_spacing_um will be kept)
@@ -59,9 +62,10 @@ tiling:
59
62
  blur_threshold: 50.0 # minimum blur score (higher is sharper)
60
63
  qc_spacing_um: 2.0 # spacing at which pixel-based QC is evaluated
61
64
  preview:
62
- save: true # save preview images of slide tiling and mask overlays
65
+ save_mask_preview: true # save preview images of mask overlays
66
+ save_tiling_preview: true # save preview images of tile layouts
63
67
  downsample: 32 # downsample to use for preview rendering
64
- mask_overlay_color: [157, 219, 129] # RGB color used for tissue overlays in batch mask previews
68
+ tissue_contour_color: [157, 219, 129] # RGB color used for tissue contours in batch mask previews
65
69
  mask_overlay_alpha: 0.5 # alpha used for tissue overlays in batch mask previews
66
70
 
67
71
  speed:
@@ -1,7 +1,7 @@
1
+ from contextlib import contextmanager
1
2
  from importlib.resources import as_file, files
2
3
  from pathlib import Path
3
4
  from typing import Iterator
4
- from contextlib import contextmanager
5
5
 
6
6
 
7
7
  def config_resource(*parts: str):
@@ -24,3 +24,4 @@ def config_path(*parts: str) -> Iterator[Path]:
24
24
  resource = config_resource(*parts)
25
25
  with as_file(resource) as resolved:
26
26
  yield resolved
27
+
@@ -26,11 +26,10 @@ def main(argv=None) -> int:
26
26
  _compute_tile_embeddings_for_slide,
27
27
  _is_hierarchical_preprocessing,
28
28
  _resolve_hierarchical_geometry,
29
- deserialize_execution,
30
- deserialize_preprocessing,
31
29
  load_successful_tiled_slides,
32
30
  )
33
31
  from slide2vec.progress import JsonlProgressReporter, activate_progress_reporter
32
+ from slide2vec.runtime.serialization import deserialize_execution, deserialize_preprocessing
34
33
 
35
34
  parser = get_args_parser(add_help=True)
36
35
  args = parser.parse_args(argv)
@@ -49,6 +48,7 @@ def main(argv=None) -> int:
49
48
  model_spec["name"],
50
49
  device=f"cuda:{local_rank}",
51
50
  output_variant=model_spec.get("output_variant"),
51
+ allow_non_recommended_settings=bool(model_spec["allow_non_recommended_settings"]),
52
52
  )
53
53
  preprocessing = deserialize_preprocessing(request["preprocessing"])
54
54
  execution = deserialize_execution(request["execution"])