slide2vec 4.1.0__tar.gz → 4.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {slide2vec-4.1.0 → slide2vec-4.2.0}/PKG-INFO +10 -8
  2. {slide2vec-4.1.0 → slide2vec-4.2.0}/README.md +5 -5
  3. {slide2vec-4.1.0 → slide2vec-4.2.0}/pyproject.toml +8 -5
  4. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/__init__.py +1 -1
  5. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/api.py +122 -35
  6. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/artifacts.py +53 -0
  7. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/cli.py +12 -9
  8. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/configs/default.yaml +7 -6
  9. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/data/tile_reader.py +6 -6
  10. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/__init__.py +2 -0
  11. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/base.py +28 -1
  12. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/__init__.py +4 -0
  13. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/gigapath.py +2 -2
  14. slide2vec-4.2.0/slide2vec/encoders/models/lunit.py +21 -0
  15. slide2vec-4.2.0/slide2vec/encoders/models/moozy/__init__.py +114 -0
  16. slide2vec-4.2.0/slide2vec/encoders/models/moozy/blocks.py +272 -0
  17. slide2vec-4.2.0/slide2vec/encoders/models/moozy/case.py +91 -0
  18. slide2vec-4.2.0/slide2vec/encoders/models/moozy/loading.py +103 -0
  19. slide2vec-4.2.0/slide2vec/encoders/models/moozy/slide.py +152 -0
  20. slide2vec-4.2.0/slide2vec/encoders/models/moozy/types.py +13 -0
  21. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/registry.py +8 -7
  22. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/validation.py +8 -8
  23. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/inference.py +479 -88
  24. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/progress.py +12 -0
  25. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/config.py +18 -13
  26. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/log_utils.py +7 -1
  27. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/tiling_io.py +16 -0
  28. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/PKG-INFO +10 -8
  29. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/SOURCES.txt +7 -0
  30. slide2vec-4.2.0/slide2vec.egg-info/entry_points.txt +2 -0
  31. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/requires.txt +5 -2
  32. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_batch_collator_timing.py +5 -5
  33. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_output_consistency.py +5 -4
  34. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_progress.py +42 -2
  35. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_regression_core.py +82 -48
  36. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_regression_inference.py +149 -52
  37. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_regression_models.py +7 -7
  38. slide2vec-4.1.0/slide2vec.egg-info/entry_points.txt +0 -2
  39. {slide2vec-4.1.0 → slide2vec-4.2.0}/LICENSE +0 -0
  40. {slide2vec-4.1.0 → slide2vec-4.2.0}/setup.cfg +0 -0
  41. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/__main__.py +0 -0
  42. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/configs/__init__.py +0 -0
  43. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/data/__init__.py +0 -0
  44. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/data/dataset.py +0 -0
  45. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/data/tile_store.py +0 -0
  46. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/distributed/__init__.py +0 -0
  47. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
  48. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/distributed/pipeline_worker.py +0 -0
  49. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/conch.py +0 -0
  50. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/hibou.py +0 -0
  51. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/hoptimus.py +0 -0
  52. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/midnight.py +0 -0
  53. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/musk.py +0 -0
  54. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/phikon.py +0 -0
  55. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/prism.py +0 -0
  56. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/prost40m.py +0 -0
  57. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/titan.py +0 -0
  58. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/uni.py +0 -0
  59. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/encoders/models/virchow.py +0 -0
  60. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/main.py +0 -0
  61. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/model_settings.py +0 -0
  62. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/registry.py +0 -0
  63. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/resources.py +0 -0
  64. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/runtime_types.py +0 -0
  65. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/__init__.py +0 -0
  66. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/coordinates.py +0 -0
  67. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec/utils/utils.py +0 -0
  68. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/dependency_links.txt +0 -0
  69. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/not-zip-safe +0 -0
  70. {slide2vec-4.1.0 → slide2vec-4.2.0}/slide2vec.egg-info/top_level.txt +0 -0
  71. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_encoder_registry.py +0 -0
  72. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_hs2p_package_cutover.py +0 -0
  73. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_packaging_metadata.py +0 -0
  74. {slide2vec-4.1.0 → slide2vec-4.2.0}/tests/test_tile_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.1.0
3
+ Version: 4.2.0
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.4
18
+ Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1
19
19
  Requires-Dist: omegaconf
20
20
  Requires-Dist: matplotlib
21
21
  Requires-Dist: numpy<2
@@ -50,6 +50,8 @@ Requires-Dist: xformers==0.0.31; extra == "prism"
50
50
  Provides-Extra: hibou
51
51
  Requires-Dist: scipy~=1.8.1; extra == "hibou"
52
52
  Requires-Dist: scikit-image~=0.19.3; extra == "hibou"
53
+ Provides-Extra: moozy
54
+ Requires-Dist: huggingface_hub<1.0,>=0.30.0; extra == "moozy"
53
55
  Provides-Extra: titan
54
56
  Requires-Dist: torch==2.0.1; extra == "titan"
55
57
  Requires-Dist: timm==1.0.3; extra == "titan"
@@ -63,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
63
65
  Requires-Dist: pandas; extra == "fm"
64
66
  Requires-Dist: pillow; extra == "fm"
65
67
  Requires-Dist: rich; extra == "fm"
66
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.4; extra == "fm"
68
+ Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1; extra == "fm"
67
69
  Requires-Dist: wandb; extra == "fm"
68
70
  Requires-Dist: torch<2.8,>=2.3; extra == "fm"
69
71
  Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -143,8 +145,8 @@ from slide2vec import ExecutionOptions, Pipeline, PreprocessingConfig
143
145
  pipeline = Pipeline(
144
146
  model=model,
145
147
  preprocessing=PreprocessingConfig(
146
- target_spacing_um=0.5,
147
- target_tile_size_px=224,
148
+ requested_spacing_um=0.5,
149
+ requested_tile_size_px=224,
148
150
  tissue_threshold=0.1,
149
151
  ),
150
152
  execution=ExecutionOptions(output_dir="outputs/demo"),
@@ -160,8 +162,8 @@ Tile embeddings can be spatially grouped into regions for downstream models that
160
162
 
161
163
  ```python
162
164
  preprocessing = PreprocessingConfig(
163
- target_spacing_um=0.5,
164
- target_tile_size_px=224,
165
+ requested_spacing_um=0.5,
166
+ requested_tile_size_px=224,
165
167
  region_tile_multiple=6, # 6x6 tiles per region
166
168
  )
167
169
  embedded = model.embed_slide("/path/to/slide.svs", preprocessing=preprocessing)
@@ -210,7 +212,7 @@ The CLI is a thin wrapper over the package API.
210
212
  Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
211
213
 
212
214
  ```shell
213
- python -m slide2vec --config-file /path/to/config.yaml
215
+ slide2vec /path/to/config.yaml
214
216
  ```
215
217
 
216
218
  By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
@@ -45,8 +45,8 @@ from slide2vec import ExecutionOptions, Pipeline, PreprocessingConfig
45
45
  pipeline = Pipeline(
46
46
  model=model,
47
47
  preprocessing=PreprocessingConfig(
48
- target_spacing_um=0.5,
49
- target_tile_size_px=224,
48
+ requested_spacing_um=0.5,
49
+ requested_tile_size_px=224,
50
50
  tissue_threshold=0.1,
51
51
  ),
52
52
  execution=ExecutionOptions(output_dir="outputs/demo"),
@@ -62,8 +62,8 @@ Tile embeddings can be spatially grouped into regions for downstream models that
62
62
 
63
63
  ```python
64
64
  preprocessing = PreprocessingConfig(
65
- target_spacing_um=0.5,
66
- target_tile_size_px=224,
65
+ requested_spacing_um=0.5,
66
+ requested_tile_size_px=224,
67
67
  region_tile_multiple=6, # 6x6 tiles per region
68
68
  )
69
69
  embedded = model.embed_slide("/path/to/slide.svs", preprocessing=preprocessing)
@@ -112,7 +112,7 @@ The CLI is a thin wrapper over the package API.
112
112
  Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
113
113
 
114
114
  ```shell
115
- python -m slide2vec --config-file /path/to/config.yaml
115
+ slide2vec /path/to/config.yaml
116
116
  ```
117
117
 
118
118
  By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "slide2vec"
7
- version = "4.1.0"
7
+ version = "4.2.0"
8
8
  description = "Embedding of whole slide images with Foundation Models"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Programming Language :: Python :: 3.13",
22
22
  ]
23
23
  dependencies = [
24
- "hs2p[asap,cucim,openslide,vips]>=3.1.4",
24
+ "hs2p[asap,cucim,openslide,vips]>=3.2.1",
25
25
  "omegaconf",
26
26
  "matplotlib",
27
27
  "numpy<2",
@@ -42,7 +42,7 @@ Homepage = "https://github.com/clemsgrs/slide2vec"
42
42
  "Bug Tracker" = "https://github.com/clemsgrs/slide2vec/issues"
43
43
 
44
44
  [project.scripts]
45
- slide2vec = "slide2vec.cli:main"
45
+ slide2vec = "slide2vec.cli:entrypoint"
46
46
 
47
47
  [project.optional-dependencies]
48
48
  hoptimus = [
@@ -71,6 +71,9 @@ hibou = [
71
71
  "scipy~=1.8.1",
72
72
  "scikit-image~=0.19.3",
73
73
  ]
74
+ moozy = [
75
+ "huggingface_hub>=0.30.0,<1.0",
76
+ ]
74
77
  titan = [
75
78
  "torch==2.0.1",
76
79
  "timm==1.0.3",
@@ -85,7 +88,7 @@ fm = [
85
88
  "pandas",
86
89
  "pillow",
87
90
  "rich",
88
- "hs2p[asap,cucim,openslide,vips]>=3.1.4",
91
+ "hs2p[asap,cucim,openslide,vips]>=3.2.1",
89
92
  "wandb",
90
93
  "torch>=2.3,<2.8",
91
94
  "torchvision>=0.18.0",
@@ -154,7 +157,7 @@ no_implicit_reexport = true
154
157
  max-line-length = 160
155
158
 
156
159
  [tool.bumpver]
157
- current_version = "4.1.0"
160
+ current_version = "4.2.0"
158
161
  version_pattern = "MAJOR.MINOR.PATCH"
159
162
  commit = false # We do version bumping in CI, not as a commit
160
163
  tag = false # Git tag already exists — we don't auto-tag
@@ -2,7 +2,7 @@ from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, Prep
2
2
  from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
3
3
 
4
4
 
5
- __version__ = "4.1.0"
5
+ __version__ = "4.2.0"
6
6
 
7
7
  __all__ = [
8
8
  "Model",
@@ -11,6 +11,7 @@ from hs2p import SlideSpec
11
11
 
12
12
  from slide2vec.artifacts import (
13
13
  HierarchicalEmbeddingArtifact,
14
+ PatientEmbeddingArtifact,
14
15
  SlideEmbeddingArtifact,
15
16
  TileEmbeddingArtifact,
16
17
  )
@@ -42,9 +43,9 @@ TilingResultsInput = Sequence[Any] | Mapping[str, Any]
42
43
  @dataclass(frozen=True, kw_only=True)
43
44
  class PreprocessingConfig:
44
45
  backend: str = "auto"
45
- target_spacing_um: float | None = None
46
- target_tile_size_px: int | None = None
47
- target_region_size_px: int | None = None
46
+ requested_spacing_um: float | None = None
47
+ requested_tile_size_px: int | None = None
48
+ requested_region_size_px: int | None = None
48
49
  region_tile_multiple: int | None = None
49
50
  tolerance: float = 0.05
50
51
  overlap: float = 0.0
@@ -75,11 +76,11 @@ class PreprocessingConfig:
75
76
  preview_downsample = int(preview_cfg.downsample)
76
77
  return cls(
77
78
  backend=tiling.backend,
78
- target_spacing_um=float(tiling.params.target_spacing_um),
79
- target_tile_size_px=int(tiling.params.target_tile_size_px),
80
- target_region_size_px=(
79
+ requested_spacing_um=float(tiling.params.requested_spacing_um),
80
+ requested_tile_size_px=int(tiling.params.requested_tile_size_px),
81
+ requested_region_size_px=(
81
82
  int(v)
82
- if (v := getattr(tiling.params, "target_region_size_px", None)) is not None
83
+ if (v := getattr(tiling.params, "requested_region_size_px", None)) is not None
83
84
  else None
84
85
  ),
85
86
  region_tile_multiple=(
@@ -127,6 +128,7 @@ class ExecutionOptions:
127
128
  prefetch_factor: int = 4
128
129
  persistent_workers: bool = True
129
130
  save_tile_embeddings: bool = False
131
+ save_slide_embeddings: bool = False
130
132
  save_latents: bool = False
131
133
 
132
134
  @classmethod
@@ -151,6 +153,7 @@ class ExecutionOptions:
151
153
  prefetch_factor=prefetch_factor,
152
154
  persistent_workers=persistent_workers,
153
155
  save_tile_embeddings=bool(cfg.model.save_tile_embeddings),
156
+ save_slide_embeddings=bool(cfg.model.save_slide_embeddings),
154
157
  save_latents=bool(cfg.model.save_latents),
155
158
  )
156
159
 
@@ -200,9 +203,17 @@ class RunResult:
200
203
  tile_artifacts: list[TileEmbeddingArtifact]
201
204
  hierarchical_artifacts: list[HierarchicalEmbeddingArtifact]
202
205
  slide_artifacts: list[SlideEmbeddingArtifact]
206
+ patient_artifacts: list[PatientEmbeddingArtifact] = field(default_factory=list)
203
207
  process_list_path: Path | None = None
204
208
 
205
209
 
210
+ @dataclass(frozen=True, kw_only=True)
211
+ class EmbeddedPatient:
212
+ patient_id: str
213
+ patient_embedding: Any # torch.Tensor [D]
214
+ slide_embeddings: dict[str, Any] # {sample_id: torch.Tensor [D]}
215
+
216
+
206
217
  @dataclass(frozen=True, kw_only=True)
207
218
  class EmbeddedSlide:
208
219
  sample_id: str
@@ -343,6 +354,82 @@ class Model:
343
354
  execution=resolved,
344
355
  )
345
356
 
357
+ def embed_patient(
358
+ self,
359
+ slides: SlideSequence,
360
+ patient_id: str | None = None,
361
+ *,
362
+ preprocessing: PreprocessingConfig | None = None,
363
+ execution: ExecutionOptions | None = None,
364
+ ) -> "EmbeddedPatient":
365
+ """Embed a single patient's slides and return one ``EmbeddedPatient``.
366
+
367
+ Convenience wrapper around :meth:`embed_patients` for the common case
368
+ where all *slides* belong to the same patient.
369
+
370
+ Args:
371
+ slides: All slides for this patient.
372
+ patient_id: Optional patient identifier applied to every slide.
373
+ When omitted, ``patient_id`` is read from slide dict keys or
374
+ object attributes; slides that carry no ``patient_id`` fall
375
+ back to ``sample_id``.
376
+ """
377
+ patient_id_map: dict | None = None
378
+ if patient_id is not None:
379
+ patient_id_map = {}
380
+ for s in slides:
381
+ if isinstance(s, (str, Path)):
382
+ patient_id_map[Path(s).stem] = patient_id
383
+ elif isinstance(s, dict):
384
+ patient_id_map[str(s["sample_id"])] = patient_id
385
+ else:
386
+ patient_id_map[str(s.sample_id)] = patient_id
387
+ return self.embed_patients(
388
+ slides,
389
+ patient_id_map=patient_id_map,
390
+ preprocessing=preprocessing,
391
+ execution=execution,
392
+ )[0]
393
+
394
+ def embed_patients(
395
+ self,
396
+ slides: SlideSequence,
397
+ patient_id_map: dict | None = None,
398
+ *,
399
+ preprocessing: PreprocessingConfig | None = None,
400
+ execution: ExecutionOptions | None = None,
401
+ ) -> "list[EmbeddedPatient]":
402
+ """Embed slides and aggregate them into patient-level embeddings.
403
+
404
+ Requires a patient-level model (e.g. ``moozy``). For each patient
405
+ all contributing slide embeddings are aggregated by the model's
406
+ ``encode_patient`` method.
407
+
408
+ Args:
409
+ slides: Slides to process. Each entry may be a path, a
410
+ ``SlideSpec``, or a dict with ``sample_id`` / ``image_path``
411
+ keys. When *patient_id_map* is ``None`` a ``patient_id``
412
+ key in each dict is used to group slides.
413
+ patient_id_map: Optional explicit ``{sample_id: patient_id}``
414
+ mapping. When provided it takes precedence over any
415
+ ``patient_id`` key embedded in the slide dicts. When
416
+ omitted and the slide dicts carry no ``patient_id``, each
417
+ slide is treated as its own patient.
418
+ """
419
+ from slide2vec.inference import embed_patients
420
+
421
+ resolved = _coerce_execution_options(execution, model=self)
422
+ resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
423
+ with _auto_progress_reporting(output_dir=resolved.output_dir):
424
+ _validate_model_config(self, resolved_preprocessing, resolved)
425
+ return embed_patients(
426
+ self,
427
+ slides,
428
+ patient_id_map=patient_id_map,
429
+ preprocessing=resolved_preprocessing,
430
+ execution=resolved,
431
+ )
432
+
346
433
  def _load_backend(self) -> LoadedModel:
347
434
  if self._backend is None:
348
435
  from slide2vec.inference import load_model
@@ -454,28 +541,28 @@ def _resolve_direct_api_preprocessing(
454
541
  return defaults
455
542
 
456
543
  if preprocessing is None:
457
- target_tile_size_px, target_spacing_um = ensure_defaults()
544
+ requested_tile_size_px, requested_spacing_um = ensure_defaults()
458
545
  return _resolve_hierarchical_preprocessing(
459
546
  PreprocessingConfig(
460
547
  backend="auto",
461
- target_spacing_um=target_spacing_um,
462
- target_tile_size_px=target_tile_size_px,
548
+ requested_spacing_um=requested_spacing_um,
549
+ requested_tile_size_px=requested_tile_size_px,
463
550
  )
464
551
  )
465
552
 
466
- target_spacing_um = preprocessing.target_spacing_um
467
- target_tile_size_px = preprocessing.target_tile_size_px
468
- if target_spacing_um is None or target_tile_size_px is None:
553
+ requested_spacing_um = preprocessing.requested_spacing_um
554
+ requested_tile_size_px = preprocessing.requested_tile_size_px
555
+ if requested_spacing_um is None or requested_tile_size_px is None:
469
556
  default_tile_size_px, default_spacing_um = ensure_defaults()
470
- if target_spacing_um is None:
471
- target_spacing_um = default_spacing_um
472
- if target_tile_size_px is None:
473
- target_tile_size_px = default_tile_size_px
557
+ if requested_spacing_um is None:
558
+ requested_spacing_um = default_spacing_um
559
+ if requested_tile_size_px is None:
560
+ requested_tile_size_px = default_tile_size_px
474
561
  return _resolve_hierarchical_preprocessing(
475
562
  replace(
476
563
  preprocessing,
477
- target_spacing_um=target_spacing_um,
478
- target_tile_size_px=target_tile_size_px,
564
+ requested_spacing_um=requested_spacing_um,
565
+ requested_tile_size_px=requested_tile_size_px,
479
566
  )
480
567
  )
481
568
 
@@ -484,7 +571,7 @@ def _default_preprocessing_from_registry(name: str | None) -> tuple[int, float]:
484
571
  if not name or name not in encoder_registry:
485
572
  raise ValueError(
486
573
  "Cannot infer preprocessing defaults without a registered model. "
487
- "Pass preprocessing.target_spacing_um and preprocessing.target_tile_size_px explicitly."
574
+ "Pass preprocessing.requested_spacing_um and preprocessing.requested_tile_size_px explicitly."
488
575
  )
489
576
 
490
577
  defaults = resolve_preprocessing_defaults(name)
@@ -499,7 +586,7 @@ def _validate_model_config(
499
586
  name = model.name
500
587
  if name not in encoder_registry:
501
588
  return
502
- if preprocessing.region_tile_multiple is not None or preprocessing.target_region_size_px is not None:
589
+ if preprocessing.region_tile_multiple is not None or preprocessing.requested_region_size_px is not None:
503
590
  info = encoder_registry.info(name)
504
591
  if info["level"] != "tile":
505
592
  raise ValueError("Hierarchical preprocessing is only supported for tile encoders")
@@ -508,8 +595,8 @@ def _validate_model_config(
508
595
  precision = None if on_cpu or execution is None else execution.precision
509
596
  validate_encoder_config(
510
597
  name,
511
- target_tile_size_px=preprocessing.target_tile_size_px,
512
- target_spacing_um=preprocessing.target_spacing_um,
598
+ requested_tile_size_px=preprocessing.requested_tile_size_px,
599
+ requested_spacing_um=preprocessing.requested_spacing_um,
513
600
  precision=precision,
514
601
  output_variant=model._output_variant,
515
602
  allow_non_recommended=bool(model.allow_non_recommended_settings),
@@ -518,32 +605,32 @@ def _validate_model_config(
518
605
 
519
606
  def _resolve_hierarchical_preprocessing(preprocessing: PreprocessingConfig) -> PreprocessingConfig:
520
607
  multiple = preprocessing.region_tile_multiple
521
- target_region_size_px = preprocessing.target_region_size_px
608
+ requested_region_size_px = preprocessing.requested_region_size_px
522
609
  if multiple is not None:
523
610
  multiple = int(multiple)
524
611
  if multiple < 2:
525
612
  raise ValueError("region_tile_multiple must be at least 2")
526
- if multiple is None and target_region_size_px is None:
613
+ if multiple is None and requested_region_size_px is None:
527
614
  return preprocessing
528
- if preprocessing.target_tile_size_px is None:
615
+ if preprocessing.requested_tile_size_px is None:
529
616
  raise ValueError(
530
- "target_tile_size_px must be resolved before deriving hierarchical region geometry"
617
+ "requested_tile_size_px must be resolved before deriving hierarchical region geometry"
531
618
  )
532
- if target_region_size_px is None:
533
- target_region_size_px = int(preprocessing.target_tile_size_px) * int(multiple)
619
+ if requested_region_size_px is None:
620
+ requested_region_size_px = int(preprocessing.requested_tile_size_px) * int(multiple)
534
621
  elif multiple is None:
535
- if int(target_region_size_px) % int(preprocessing.target_tile_size_px) != 0:
622
+ if int(requested_region_size_px) % int(preprocessing.requested_tile_size_px) != 0:
536
623
  raise ValueError(
537
- "target_region_size_px must be an exact multiple of target_tile_size_px"
624
+ "requested_region_size_px must be an exact multiple of requested_tile_size_px"
538
625
  )
539
- multiple = int(target_region_size_px) // int(preprocessing.target_tile_size_px)
540
- elif int(target_region_size_px) != int(preprocessing.target_tile_size_px) * int(multiple):
626
+ multiple = int(requested_region_size_px) // int(preprocessing.requested_tile_size_px)
627
+ elif int(requested_region_size_px) != int(preprocessing.requested_tile_size_px) * int(multiple):
541
628
  raise ValueError(
542
- "target_region_size_px must match target_tile_size_px * region_tile_multiple"
629
+ "requested_region_size_px must match requested_tile_size_px * region_tile_multiple"
543
630
  )
544
631
  return replace(
545
632
  preprocessing,
546
- target_region_size_px=int(target_region_size_px),
633
+ requested_region_size_px=int(requested_region_size_px),
547
634
  region_tile_multiple=int(multiple),
548
635
  )
549
636
 
@@ -35,6 +35,20 @@ class SlideEmbeddingArtifact:
35
35
  return load_metadata(self.metadata_path)
36
36
 
37
37
 
38
+ @dataclass(frozen=True, kw_only=True)
39
+ class PatientEmbeddingArtifact:
40
+ patient_id: str
41
+ path: Path
42
+ metadata_path: Path
43
+ format: str
44
+ feature_dim: int
45
+ num_slides: int
46
+
47
+ @property
48
+ def metadata(self) -> dict[str, Any]:
49
+ return load_metadata(self.metadata_path)
50
+
51
+
38
52
  @dataclass(frozen=True, kw_only=True)
39
53
  class HierarchicalEmbeddingArtifact:
40
54
  sample_id: str
@@ -223,6 +237,45 @@ def write_slide_embeddings(
223
237
  )
224
238
 
225
239
 
240
+ def write_patient_embeddings(
241
+ patient_id: str,
242
+ embedding,
243
+ *,
244
+ output_dir: str | Path,
245
+ output_format: str = "pt",
246
+ metadata: dict[str, Any] | None = None,
247
+ num_slides: int = 0,
248
+ ) -> PatientEmbeddingArtifact:
249
+ output_format = _validate_output_format(output_format)
250
+ artifact_path, metadata_path = _setup_artifact_paths(
251
+ output_dir, "patient_embeddings", patient_id, output_format
252
+ )
253
+ embedding_array = _ensure_array(embedding)
254
+ if output_format == "pt":
255
+ torch.save(_ensure_tensor(embedding), artifact_path)
256
+ else:
257
+ np.savez_compressed(artifact_path, features=embedding_array)
258
+
259
+ patient_metadata = {
260
+ "patient_id": patient_id,
261
+ "artifact_type": "patient_embeddings",
262
+ "format": output_format,
263
+ "feature_dim": int(embedding_array.shape[-1]) if embedding_array.ndim else 1,
264
+ "num_slides": num_slides,
265
+ }
266
+ if metadata:
267
+ patient_metadata.update(metadata)
268
+ _write_metadata(metadata_path, patient_metadata)
269
+ return PatientEmbeddingArtifact(
270
+ patient_id=patient_id,
271
+ path=artifact_path,
272
+ metadata_path=metadata_path,
273
+ format=output_format,
274
+ feature_dim=patient_metadata["feature_dim"],
275
+ num_slides=num_slides,
276
+ )
277
+
278
+
226
279
  def write_hierarchical_embeddings(
227
280
  sample_id: str,
228
281
  features,
@@ -7,20 +7,21 @@ import slide2vec.progress as progress
7
7
 
8
8
  def get_args_parser(add_help: bool = True):
9
9
  parser = argparse.ArgumentParser("slide2vec", add_help=add_help)
10
- parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
10
+ parser.add_argument("config_file", metavar="CONFIG", help="path to config file")
11
11
  parser.add_argument("--skip-datetime", action="store_true", help="skip run id datetime prefix")
12
12
  parser.add_argument("--tiling-only", action="store_true", help="only run slide tiling")
13
13
  parser.add_argument("--run-on-cpu", action="store_true", help="run inference on cpu")
14
14
  parser.add_argument("--output-dir", type=str, default=None, help="output directory to save artifacts")
15
- parser.add_argument(
16
- "opts",
17
- help='Modify config options at the end of the command using "path.key=value".',
18
- default=None,
19
- nargs=argparse.REMAINDER,
20
- )
21
15
  return parser
22
16
 
23
17
 
18
+ def parse_args(argv=None):
19
+ parser = get_args_parser(add_help=True)
20
+ args, opts = parser.parse_known_args(argv)
21
+ args.opts = opts
22
+ return args
23
+
24
+
24
25
  def build_model_and_pipeline(args):
25
26
  cfg, _cfg_path = setup(args)
26
27
  hf_login()
@@ -39,8 +40,7 @@ def build_model_and_pipeline(args):
39
40
 
40
41
 
41
42
  def main(argv=None):
42
- parser = get_args_parser(add_help=True)
43
- args = parser.parse_args(argv)
43
+ args = parse_args(argv)
44
44
  pipeline, cfg = build_model_and_pipeline(args)
45
45
  reporter = progress.create_cli_progress_reporter(output_dir=getattr(cfg, "output_dir", None))
46
46
  with progress.activate_progress_reporter(reporter):
@@ -50,3 +50,6 @@ def main(argv=None):
50
50
  )
51
51
 
52
52
 
53
+ def entrypoint(argv=None):
54
+ main(argv)
55
+ return 0
@@ -13,6 +13,7 @@ model:
13
13
  output_variant: # requested output variant for presets that expose multiple outputs
14
14
  batch_size: 32
15
15
  save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
16
+ save_slide_embeddings: false # whether to save per-slide embeddings when level is "patient" (e.g. moozy); requires a 'patient_id' column in the input CSV
16
17
  save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
17
18
  allow_non_recommended_settings: false # when true, non-recommended spacing / tile size / precision combinations warn instead of erroring
18
19
 
@@ -26,10 +27,10 @@ tiling:
26
27
  read_tiles_from: # path to an existing directory containing pre-extracted `.tiles.tar` tile stores to reuse instead of starting tiling from scratch
27
28
  backend: "auto" # backend to use for slide reading; "auto" lets hs2p resolve the best backend per slide, preferring cuCIM when available
28
29
  params:
29
- target_spacing_um: # spacing at which to tile the slide, in microns per pixel; filled from a preset model when available
30
+ requested_spacing_um: # spacing at which to tile the slide, in microns per pixel; filled from a preset model when available
30
31
  tolerance: 0.05 # tolerance for matching the spacing (float between 0 and 1, deciding how much the spacing can deviate from the one specified in the slide metadata)
31
- target_tile_size_px: # size of the tiles to extract, in pixels; filled from a preset model when available
32
- target_region_size_px: # size of hierarchical parent regions in pixels; when unset and region_tile_multiple is set, derived from target_tile_size_px * region_tile_multiple
32
+ requested_tile_size_px: # size of the tiles to extract, in pixels; filled from a preset model when available
33
+ requested_region_size_px: # size of hierarchical parent regions in pixels; when unset and region_tile_multiple is set, derived from requested_tile_size_px * region_tile_multiple
33
34
  region_tile_multiple: # hierarchical region grid width/height in tiles; e.g. 6 means 6x6 tiles per region
34
35
  overlap: 0.0 # percentage of overlap between two consecutive tiles (float between 0 and 1)
35
36
  tissue_threshold: 0.1 # minimum fraction of pixels that must be tissue to keep a tile (float between 0 and 1)
@@ -44,8 +45,8 @@ tiling:
44
45
  use_otsu: false # use otsu's method instead of simple binary thresholding
45
46
  use_hsv: true # use HSV thresholding instead of simple binary thresholding
46
47
  filter_params:
47
- ref_tile_size: ${tiling.params.target_tile_size_px} # reference tile size at the target spacing
48
- a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.target_spacing_um will be kept)
48
+ ref_tile_size: ${tiling.params.requested_tile_size_px} # reference tile size at the target spacing
49
+ a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.requested_spacing_um will be kept)
49
50
  a_h: 2 # area filter threshold for holes (positive integer, the minimum size of detected holes/cavities in foreground contours to avoid, once again relative to the reference tile size ref_tile_size)
50
51
  filter_white: false # whether to filter out mostly white tiles
51
52
  filter_black: false # whether to filter out mostly black tiles
@@ -78,7 +79,7 @@ wandb:
78
79
  project: "" # wandb project name
79
80
  username: "" # wandb username
80
81
  exp_name: "" # wandb experiment name
81
- tags: ["features", "${model.name}", "${tiling.params.target_tile_size_px}"] # wandb tags
82
+ tags: ["features", "${model.name}", "${tiling.params.requested_tile_size_px}"] # wandb tags
82
83
  dir: "/home/user/"
83
84
  group:
84
85
  resume_id: "${resume_dirname}"
@@ -89,7 +89,7 @@ class WSITileReader:
89
89
  self._num_cucim_workers = num_cucim_workers
90
90
  self._gpu_decode = gpu_decode
91
91
  self._read_level = int(tiling_result.read_level)
92
- self._tile_size_px = int(tiling_result.effective_tile_size_px)
92
+ self._tile_size_px = int(tiling_result.read_tile_size_px)
93
93
  self._x = tiling_result.x
94
94
  self._y = tiling_result.y
95
95
  self._reader = None
@@ -215,7 +215,7 @@ class OnTheFlyBatchTileCollator:
215
215
  gpu_decode: bool = False,
216
216
  use_supertiles: bool = True,
217
217
  ):
218
- self.tile_size = int(tiling_result.effective_tile_size_px)
218
+ self.tile_size = int(tiling_result.read_tile_size_px)
219
219
  self._reader = WSITileReader(
220
220
  image_path,
221
221
  tiling_result,
@@ -354,8 +354,8 @@ class OnTheFlyHierarchicalBatchCollator:
354
354
  tiling_result: TilingResult,
355
355
  region_index: np.ndarray,
356
356
  subtile_index_within_region: np.ndarray,
357
- effective_region_size_px: int,
358
- effective_tile_size_px: int,
357
+ read_region_size_px: int,
358
+ read_tile_size_px: int,
359
359
  backend: str = "cucim",
360
360
  num_cucim_workers: int = 4,
361
361
  gpu_decode: bool = False,
@@ -363,11 +363,11 @@ class OnTheFlyHierarchicalBatchCollator:
363
363
  self._region_index = np.asarray(region_index, dtype=np.int32)
364
364
  self._subtile_index_within_region = np.asarray(subtile_index_within_region, dtype=np.int32)
365
365
  self._tiles_per_region = int(self._subtile_index_within_region.max()) + 1 if len(self._subtile_index_within_region) else 0
366
- self._tile_size = int(effective_tile_size_px)
366
+ self._tile_size = int(read_tile_size_px)
367
367
  self._reader = WSIRegionReader(
368
368
  image_path,
369
369
  read_level=int(tiling_result.read_level),
370
- region_size_px=int(effective_region_size_px),
370
+ region_size_px=int(read_region_size_px),
371
371
  backend=backend,
372
372
  num_cucim_workers=num_cucim_workers,
373
373
  gpu_decode=gpu_decode,
@@ -6,6 +6,7 @@ the ``models`` subpackage.
6
6
 
7
7
  from slide2vec.encoders.base import (
8
8
  Encoder,
9
+ PatientEncoder,
9
10
  SlideEncoder,
10
11
  TileEncoder,
11
12
  TimmTileEncoder,
@@ -24,6 +25,7 @@ from slide2vec.encoders import models as _models_pkg # noqa: F401
24
25
 
25
26
  __all__ = [
26
27
  "Encoder",
28
+ "PatientEncoder",
27
29
  "TileEncoder",
28
30
  "SlideEncoder",
29
31
  "TimmTileEncoder",