slide2vec 4.1.1__tar.gz → 4.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {slide2vec-4.1.1 → slide2vec-4.2.0}/PKG-INFO +6 -4
  2. {slide2vec-4.1.1 → slide2vec-4.2.0}/README.md +1 -1
  3. {slide2vec-4.1.1 → slide2vec-4.2.0}/pyproject.toml +8 -5
  4. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/__init__.py +1 -1
  5. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/api.py +87 -0
  6. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/artifacts.py +53 -0
  7. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/cli.py +12 -9
  8. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/configs/default.yaml +1 -0
  9. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/__init__.py +2 -0
  10. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/base.py +27 -0
  11. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/__init__.py +4 -0
  12. slide2vec-4.2.0/slide2vec/encoders/models/lunit.py +21 -0
  13. slide2vec-4.2.0/slide2vec/encoders/models/moozy/__init__.py +114 -0
  14. slide2vec-4.2.0/slide2vec/encoders/models/moozy/blocks.py +272 -0
  15. slide2vec-4.2.0/slide2vec/encoders/models/moozy/case.py +91 -0
  16. slide2vec-4.2.0/slide2vec/encoders/models/moozy/loading.py +103 -0
  17. slide2vec-4.2.0/slide2vec/encoders/models/moozy/slide.py +152 -0
  18. slide2vec-4.2.0/slide2vec/encoders/models/moozy/types.py +13 -0
  19. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/registry.py +6 -5
  20. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/inference.py +396 -17
  21. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/progress.py +12 -0
  22. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/config.py +10 -5
  23. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/tiling_io.py +16 -0
  24. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/PKG-INFO +6 -4
  25. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/SOURCES.txt +7 -0
  26. slide2vec-4.2.0/slide2vec.egg-info/entry_points.txt +2 -0
  27. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/requires.txt +5 -2
  28. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_output_consistency.py +3 -2
  29. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_progress.py +40 -0
  30. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_regression_core.py +36 -2
  31. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_regression_inference.py +87 -11
  32. slide2vec-4.1.1/slide2vec.egg-info/entry_points.txt +0 -2
  33. {slide2vec-4.1.1 → slide2vec-4.2.0}/LICENSE +0 -0
  34. {slide2vec-4.1.1 → slide2vec-4.2.0}/setup.cfg +0 -0
  35. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/__main__.py +0 -0
  36. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/configs/__init__.py +0 -0
  37. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/data/__init__.py +0 -0
  38. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/data/dataset.py +0 -0
  39. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/data/tile_reader.py +0 -0
  40. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/data/tile_store.py +0 -0
  41. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/distributed/__init__.py +0 -0
  42. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
  43. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/distributed/pipeline_worker.py +0 -0
  44. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/conch.py +0 -0
  45. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/gigapath.py +0 -0
  46. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/hibou.py +0 -0
  47. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/hoptimus.py +0 -0
  48. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/midnight.py +0 -0
  49. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/musk.py +0 -0
  50. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/phikon.py +0 -0
  51. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/prism.py +0 -0
  52. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/prost40m.py +0 -0
  53. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/titan.py +0 -0
  54. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/uni.py +0 -0
  55. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/models/virchow.py +0 -0
  56. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/encoders/validation.py +0 -0
  57. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/main.py +0 -0
  58. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/model_settings.py +0 -0
  59. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/registry.py +0 -0
  60. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/resources.py +0 -0
  61. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/runtime_types.py +0 -0
  62. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/__init__.py +0 -0
  63. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/coordinates.py +0 -0
  64. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/log_utils.py +0 -0
  65. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec/utils/utils.py +0 -0
  66. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/dependency_links.txt +0 -0
  67. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/not-zip-safe +0 -0
  68. {slide2vec-4.1.1 → slide2vec-4.2.0}/slide2vec.egg-info/top_level.txt +0 -0
  69. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_batch_collator_timing.py +0 -0
  70. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_encoder_registry.py +0 -0
  71. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_hs2p_package_cutover.py +0 -0
  72. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_packaging_metadata.py +0 -0
  73. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_regression_models.py +0 -0
  74. {slide2vec-4.1.1 → slide2vec-4.2.0}/tests/test_tile_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.1.1
3
+ Version: 4.2.0
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.0
18
+ Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1
19
19
  Requires-Dist: omegaconf
20
20
  Requires-Dist: matplotlib
21
21
  Requires-Dist: numpy<2
@@ -50,6 +50,8 @@ Requires-Dist: xformers==0.0.31; extra == "prism"
50
50
  Provides-Extra: hibou
51
51
  Requires-Dist: scipy~=1.8.1; extra == "hibou"
52
52
  Requires-Dist: scikit-image~=0.19.3; extra == "hibou"
53
+ Provides-Extra: moozy
54
+ Requires-Dist: huggingface_hub<1.0,>=0.30.0; extra == "moozy"
53
55
  Provides-Extra: titan
54
56
  Requires-Dist: torch==2.0.1; extra == "titan"
55
57
  Requires-Dist: timm==1.0.3; extra == "titan"
@@ -63,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
63
65
  Requires-Dist: pandas; extra == "fm"
64
66
  Requires-Dist: pillow; extra == "fm"
65
67
  Requires-Dist: rich; extra == "fm"
66
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.0; extra == "fm"
68
+ Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.2.1; extra == "fm"
67
69
  Requires-Dist: wandb; extra == "fm"
68
70
  Requires-Dist: torch<2.8,>=2.3; extra == "fm"
69
71
  Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -210,7 +212,7 @@ The CLI is a thin wrapper over the package API.
210
212
  Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
211
213
 
212
214
  ```shell
213
- python -m slide2vec --config-file /path/to/config.yaml
215
+ slide2vec /path/to/config.yaml
214
216
  ```
215
217
 
216
218
  By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
@@ -112,7 +112,7 @@ The CLI is a thin wrapper over the package API.
112
112
  Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
113
113
 
114
114
  ```shell
115
- python -m slide2vec --config-file /path/to/config.yaml
115
+ slide2vec /path/to/config.yaml
116
116
  ```
117
117
 
118
118
  By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "slide2vec"
7
- version = "4.1.1"
7
+ version = "4.2.0"
8
8
  description = "Embedding of whole slide images with Foundation Models"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Programming Language :: Python :: 3.13",
22
22
  ]
23
23
  dependencies = [
24
- "hs2p[asap,cucim,openslide,vips]>=3.2.0",
24
+ "hs2p[asap,cucim,openslide,vips]>=3.2.1",
25
25
  "omegaconf",
26
26
  "matplotlib",
27
27
  "numpy<2",
@@ -42,7 +42,7 @@ Homepage = "https://github.com/clemsgrs/slide2vec"
42
42
  "Bug Tracker" = "https://github.com/clemsgrs/slide2vec/issues"
43
43
 
44
44
  [project.scripts]
45
- slide2vec = "slide2vec.cli:main"
45
+ slide2vec = "slide2vec.cli:entrypoint"
46
46
 
47
47
  [project.optional-dependencies]
48
48
  hoptimus = [
@@ -71,6 +71,9 @@ hibou = [
71
71
  "scipy~=1.8.1",
72
72
  "scikit-image~=0.19.3",
73
73
  ]
74
+ moozy = [
75
+ "huggingface_hub>=0.30.0,<1.0",
76
+ ]
74
77
  titan = [
75
78
  "torch==2.0.1",
76
79
  "timm==1.0.3",
@@ -85,7 +88,7 @@ fm = [
85
88
  "pandas",
86
89
  "pillow",
87
90
  "rich",
88
- "hs2p[asap,cucim,openslide,vips]>=3.2.0",
91
+ "hs2p[asap,cucim,openslide,vips]>=3.2.1",
89
92
  "wandb",
90
93
  "torch>=2.3,<2.8",
91
94
  "torchvision>=0.18.0",
@@ -154,7 +157,7 @@ no_implicit_reexport = true
154
157
  max-line-length = 160
155
158
 
156
159
  [tool.bumpver]
157
- current_version = "4.1.1"
160
+ current_version = "4.2.0"
158
161
  version_pattern = "MAJOR.MINOR.PATCH"
159
162
  commit = false # We do version bumping in CI, not as a commit
160
163
  tag = false # Git tag already exists — we don't auto-tag
@@ -2,7 +2,7 @@ from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, Prep
2
2
  from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
3
3
 
4
4
 
5
- __version__ = "4.1.1"
5
+ __version__ = "4.2.0"
6
6
 
7
7
  __all__ = [
8
8
  "Model",
@@ -11,6 +11,7 @@ from hs2p import SlideSpec
11
11
 
12
12
  from slide2vec.artifacts import (
13
13
  HierarchicalEmbeddingArtifact,
14
+ PatientEmbeddingArtifact,
14
15
  SlideEmbeddingArtifact,
15
16
  TileEmbeddingArtifact,
16
17
  )
@@ -127,6 +128,7 @@ class ExecutionOptions:
127
128
  prefetch_factor: int = 4
128
129
  persistent_workers: bool = True
129
130
  save_tile_embeddings: bool = False
131
+ save_slide_embeddings: bool = False
130
132
  save_latents: bool = False
131
133
 
132
134
  @classmethod
@@ -151,6 +153,7 @@ class ExecutionOptions:
151
153
  prefetch_factor=prefetch_factor,
152
154
  persistent_workers=persistent_workers,
153
155
  save_tile_embeddings=bool(cfg.model.save_tile_embeddings),
156
+ save_slide_embeddings=bool(cfg.model.save_slide_embeddings),
154
157
  save_latents=bool(cfg.model.save_latents),
155
158
  )
156
159
 
@@ -200,9 +203,17 @@ class RunResult:
200
203
  tile_artifacts: list[TileEmbeddingArtifact]
201
204
  hierarchical_artifacts: list[HierarchicalEmbeddingArtifact]
202
205
  slide_artifacts: list[SlideEmbeddingArtifact]
206
+ patient_artifacts: list[PatientEmbeddingArtifact] = field(default_factory=list)
203
207
  process_list_path: Path | None = None
204
208
 
205
209
 
210
+ @dataclass(frozen=True, kw_only=True)
211
+ class EmbeddedPatient:
212
+ patient_id: str
213
+ patient_embedding: Any # torch.Tensor [D]
214
+ slide_embeddings: dict[str, Any] # {sample_id: torch.Tensor [D]}
215
+
216
+
206
217
  @dataclass(frozen=True, kw_only=True)
207
218
  class EmbeddedSlide:
208
219
  sample_id: str
@@ -343,6 +354,82 @@ class Model:
343
354
  execution=resolved,
344
355
  )
345
356
 
357
+ def embed_patient(
358
+ self,
359
+ slides: SlideSequence,
360
+ patient_id: str | None = None,
361
+ *,
362
+ preprocessing: PreprocessingConfig | None = None,
363
+ execution: ExecutionOptions | None = None,
364
+ ) -> "EmbeddedPatient":
365
+ """Embed a single patient's slides and return one ``EmbeddedPatient``.
366
+
367
+ Convenience wrapper around :meth:`embed_patients` for the common case
368
+ where all *slides* belong to the same patient.
369
+
370
+ Args:
371
+ slides: All slides for this patient.
372
+ patient_id: Optional patient identifier applied to every slide.
373
+ When omitted, ``patient_id`` is read from slide dict keys or
374
+ object attributes; slides that carry no ``patient_id`` fall
375
+ back to ``sample_id``.
376
+ """
377
+ patient_id_map: dict | None = None
378
+ if patient_id is not None:
379
+ patient_id_map = {}
380
+ for s in slides:
381
+ if isinstance(s, (str, Path)):
382
+ patient_id_map[Path(s).stem] = patient_id
383
+ elif isinstance(s, dict):
384
+ patient_id_map[str(s["sample_id"])] = patient_id
385
+ else:
386
+ patient_id_map[str(s.sample_id)] = patient_id
387
+ return self.embed_patients(
388
+ slides,
389
+ patient_id_map=patient_id_map,
390
+ preprocessing=preprocessing,
391
+ execution=execution,
392
+ )[0]
393
+
394
+ def embed_patients(
395
+ self,
396
+ slides: SlideSequence,
397
+ patient_id_map: dict | None = None,
398
+ *,
399
+ preprocessing: PreprocessingConfig | None = None,
400
+ execution: ExecutionOptions | None = None,
401
+ ) -> "list[EmbeddedPatient]":
402
+ """Embed slides and aggregate them into patient-level embeddings.
403
+
404
+ Requires a patient-level model (e.g. ``moozy``). For each patient
405
+ all contributing slide embeddings are aggregated by the model's
406
+ ``encode_patient`` method.
407
+
408
+ Args:
409
+ slides: Slides to process. Each entry may be a path, a
410
+ ``SlideSpec``, or a dict with ``sample_id`` / ``image_path``
411
+ keys. When *patient_id_map* is ``None`` a ``patient_id``
412
+ key in each dict is used to group slides.
413
+ patient_id_map: Optional explicit ``{sample_id: patient_id}``
414
+ mapping. When provided it takes precedence over any
415
+ ``patient_id`` key embedded in the slide dicts. When
416
+ omitted and the slide dicts carry no ``patient_id``, each
417
+ slide is treated as its own patient.
418
+ """
419
+ from slide2vec.inference import embed_patients
420
+
421
+ resolved = _coerce_execution_options(execution, model=self)
422
+ resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
423
+ with _auto_progress_reporting(output_dir=resolved.output_dir):
424
+ _validate_model_config(self, resolved_preprocessing, resolved)
425
+ return embed_patients(
426
+ self,
427
+ slides,
428
+ patient_id_map=patient_id_map,
429
+ preprocessing=resolved_preprocessing,
430
+ execution=resolved,
431
+ )
432
+
346
433
  def _load_backend(self) -> LoadedModel:
347
434
  if self._backend is None:
348
435
  from slide2vec.inference import load_model
@@ -35,6 +35,20 @@ class SlideEmbeddingArtifact:
35
35
  return load_metadata(self.metadata_path)
36
36
 
37
37
 
38
+ @dataclass(frozen=True, kw_only=True)
39
+ class PatientEmbeddingArtifact:
40
+ patient_id: str
41
+ path: Path
42
+ metadata_path: Path
43
+ format: str
44
+ feature_dim: int
45
+ num_slides: int
46
+
47
+ @property
48
+ def metadata(self) -> dict[str, Any]:
49
+ return load_metadata(self.metadata_path)
50
+
51
+
38
52
  @dataclass(frozen=True, kw_only=True)
39
53
  class HierarchicalEmbeddingArtifact:
40
54
  sample_id: str
@@ -223,6 +237,45 @@ def write_slide_embeddings(
223
237
  )
224
238
 
225
239
 
240
+ def write_patient_embeddings(
241
+ patient_id: str,
242
+ embedding,
243
+ *,
244
+ output_dir: str | Path,
245
+ output_format: str = "pt",
246
+ metadata: dict[str, Any] | None = None,
247
+ num_slides: int = 0,
248
+ ) -> PatientEmbeddingArtifact:
249
+ output_format = _validate_output_format(output_format)
250
+ artifact_path, metadata_path = _setup_artifact_paths(
251
+ output_dir, "patient_embeddings", patient_id, output_format
252
+ )
253
+ embedding_array = _ensure_array(embedding)
254
+ if output_format == "pt":
255
+ torch.save(_ensure_tensor(embedding), artifact_path)
256
+ else:
257
+ np.savez_compressed(artifact_path, features=embedding_array)
258
+
259
+ patient_metadata = {
260
+ "patient_id": patient_id,
261
+ "artifact_type": "patient_embeddings",
262
+ "format": output_format,
263
+ "feature_dim": int(embedding_array.shape[-1]) if embedding_array.ndim else 1,
264
+ "num_slides": num_slides,
265
+ }
266
+ if metadata:
267
+ patient_metadata.update(metadata)
268
+ _write_metadata(metadata_path, patient_metadata)
269
+ return PatientEmbeddingArtifact(
270
+ patient_id=patient_id,
271
+ path=artifact_path,
272
+ metadata_path=metadata_path,
273
+ format=output_format,
274
+ feature_dim=patient_metadata["feature_dim"],
275
+ num_slides=num_slides,
276
+ )
277
+
278
+
226
279
  def write_hierarchical_embeddings(
227
280
  sample_id: str,
228
281
  features,
@@ -7,20 +7,21 @@ import slide2vec.progress as progress
7
7
 
8
8
  def get_args_parser(add_help: bool = True):
9
9
  parser = argparse.ArgumentParser("slide2vec", add_help=add_help)
10
- parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
10
+ parser.add_argument("config_file", metavar="CONFIG", help="path to config file")
11
11
  parser.add_argument("--skip-datetime", action="store_true", help="skip run id datetime prefix")
12
12
  parser.add_argument("--tiling-only", action="store_true", help="only run slide tiling")
13
13
  parser.add_argument("--run-on-cpu", action="store_true", help="run inference on cpu")
14
14
  parser.add_argument("--output-dir", type=str, default=None, help="output directory to save artifacts")
15
- parser.add_argument(
16
- "opts",
17
- help='Modify config options at the end of the command using "path.key=value".',
18
- default=None,
19
- nargs=argparse.REMAINDER,
20
- )
21
15
  return parser
22
16
 
23
17
 
18
+ def parse_args(argv=None):
19
+ parser = get_args_parser(add_help=True)
20
+ args, opts = parser.parse_known_args(argv)
21
+ args.opts = opts
22
+ return args
23
+
24
+
24
25
  def build_model_and_pipeline(args):
25
26
  cfg, _cfg_path = setup(args)
26
27
  hf_login()
@@ -39,8 +40,7 @@ def build_model_and_pipeline(args):
39
40
 
40
41
 
41
42
  def main(argv=None):
42
- parser = get_args_parser(add_help=True)
43
- args = parser.parse_args(argv)
43
+ args = parse_args(argv)
44
44
  pipeline, cfg = build_model_and_pipeline(args)
45
45
  reporter = progress.create_cli_progress_reporter(output_dir=getattr(cfg, "output_dir", None))
46
46
  with progress.activate_progress_reporter(reporter):
@@ -50,3 +50,6 @@ def main(argv=None):
50
50
  )
51
51
 
52
52
 
53
+ def entrypoint(argv=None):
54
+ main(argv)
55
+ return 0
@@ -13,6 +13,7 @@ model:
13
13
  output_variant: # requested output variant for presets that expose multiple outputs
14
14
  batch_size: 32
15
15
  save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
16
+ save_slide_embeddings: false # whether to save per-slide embeddings when level is "patient" (e.g. moozy); requires a 'patient_id' column in the input CSV
16
17
  save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
17
18
  allow_non_recommended_settings: false # when true, non-recommended spacing / tile size / precision combinations warn instead of erroring
18
19
 
@@ -6,6 +6,7 @@ the ``models`` subpackage.
6
6
 
7
7
  from slide2vec.encoders.base import (
8
8
  Encoder,
9
+ PatientEncoder,
9
10
  SlideEncoder,
10
11
  TileEncoder,
11
12
  TimmTileEncoder,
@@ -24,6 +25,7 @@ from slide2vec.encoders import models as _models_pkg # noqa: F401
24
25
 
25
26
  __all__ = [
26
27
  "Encoder",
28
+ "PatientEncoder",
27
29
  "TileEncoder",
28
30
  "SlideEncoder",
29
31
  "TimmTileEncoder",
@@ -96,6 +96,33 @@ class SlideEncoder(Encoder):
96
96
  return coordinates
97
97
 
98
98
 
99
+ class PatientEncoder(Encoder):
100
+ """Base class for encoders that aggregate slide embeddings into patient embeddings."""
101
+
102
+ tile_encoder: TileEncoder | None = None
103
+
104
+ def encode_tiles(self, batch: Tensor) -> Tensor:
105
+ if self.tile_encoder is None:
106
+ raise AttributeError("patient encoders must attach a tile_encoder before encoding tiles")
107
+ return self.tile_encoder.encode_tiles(batch)
108
+
109
+ @abstractmethod
110
+ def encode_slide(
111
+ self,
112
+ tile_features: Tensor,
113
+ coordinates: Tensor | None = None,
114
+ *,
115
+ tile_size_lv0: int | None = None,
116
+ ) -> Tensor:
117
+ """Pool tile-level features into a single slide-level embedding."""
118
+ ...
119
+
120
+ @abstractmethod
121
+ def encode_patient(self, slide_embeddings: Tensor) -> Tensor:
122
+ """Aggregate slide embeddings [S, D] into a single patient-level embedding [D]."""
123
+ ...
124
+
125
+
99
126
  class TimmTileEncoder(TileEncoder):
100
127
  """Convenience base for timm-backed tile encoders."""
101
128
 
@@ -8,7 +8,9 @@ from . import (
8
8
  gigapath,
9
9
  hibou,
10
10
  hoptimus,
11
+ lunit,
11
12
  midnight,
13
+ moozy,
12
14
  musk,
13
15
  phikon,
14
16
  prost40m,
@@ -23,7 +25,9 @@ __all__ = [
23
25
  "gigapath",
24
26
  "hibou",
25
27
  "hoptimus",
28
+ "lunit",
26
29
  "midnight",
30
+ "moozy",
27
31
  "musk",
28
32
  "phikon",
29
33
  "prost40m",
@@ -0,0 +1,21 @@
1
+ """Lunit ViT-S/8 tile encoder implementation."""
2
+
3
+ from slide2vec.encoders.base import TimmTileEncoder
4
+ from slide2vec.encoders.registry import register_encoder
5
+
6
+
7
+ @register_encoder(
8
+ "lunit",
9
+ output_variants={"default": {"encode_dim": 384}},
10
+ default_output_variant="default",
11
+ input_size=224,
12
+ supported_spacing_um=0.5,
13
+ precision="fp32",
14
+ source="1aurent/vit_small_patch8_224.lunit_dino",
15
+ )
16
+ class LunitTileEncoder(TimmTileEncoder):
17
+ def __init__(self, *, output_variant: str | None = None):
18
+ super().__init__(
19
+ "hf_hub:1aurent/vit_small_patch8_224.lunit_dino",
20
+ output_variant=output_variant,
21
+ )
@@ -0,0 +1,114 @@
1
+ """MOOZY slide and patient encoder implementations."""
2
+
3
+ import torch
4
+
5
+ from slide2vec.encoders.base import PatientEncoder, SlideEncoder, preferred_default_device, resolve_requested_output_variant
6
+ from .loading import load_moozy_inference_components
7
+ from slide2vec.encoders.registry import register_encoder
8
+
9
+ __all__ = [
10
+ "MOOZYSlideEncoder",
11
+ "MOOZYPatientEncoder",
12
+ ]
13
+
14
+
15
+ @register_encoder(
16
+ "moozy-slide",
17
+ level="slide",
18
+ tile_encoder="lunit",
19
+ tile_encoder_output_variant="default",
20
+ output_variants={"default": {"encode_dim": 768}},
21
+ default_output_variant="default",
22
+ supported_spacing_um=0.5,
23
+ precision="fp32",
24
+ source="AtlasAnalyticsLab/MOOZY",
25
+ )
26
+ class MOOZYSlideEncoder(SlideEncoder):
27
+ def __init__(self, *, output_variant: str | None = None):
28
+ components = load_moozy_inference_components(device=torch.device("cpu"))
29
+ self._model = components.slide_encoder.eval()
30
+ self._device = preferred_default_device()
31
+ self._output_variant = resolve_requested_output_variant(output_variant)
32
+
33
+ @property
34
+ def encode_dim(self) -> int:
35
+ return 768
36
+
37
+ @property
38
+ def device(self) -> torch.device:
39
+ return self._device
40
+
41
+ def to(self, device: torch.device | str) -> "MOOZYSlideEncoder":
42
+ self._device = torch.device(device)
43
+ self._model = self._model.to(self._device)
44
+ return self
45
+
46
+ def encode_slide(
47
+ self,
48
+ tile_features: torch.Tensor,
49
+ coordinates: torch.Tensor | None = None,
50
+ *,
51
+ tile_size_lv0: int | None = None,
52
+ ) -> torch.Tensor:
53
+ if coordinates is None or tile_size_lv0 is None:
54
+ raise ValueError("MOOZY slide encoding requires coordinates and tile_size_lv0")
55
+ # MOOZYSlideEncoder expects [B, crop_h, crop_w, feat_dim]; use [1, 1, N, D]
56
+ x = tile_features.unsqueeze(0).unsqueeze(0)
57
+ coords = coordinates.unsqueeze(0).to(torch.float32)
58
+ patch_sizes = torch.tensor([tile_size_lv0], dtype=torch.float32, device=tile_features.device)
59
+ cls, _, _ = self._model(x, coords_xy=coords, patch_sizes=patch_sizes)
60
+ return cls.squeeze(0)
61
+
62
+
63
+ @register_encoder(
64
+ "moozy",
65
+ level="patient",
66
+ tile_encoder="lunit",
67
+ tile_encoder_output_variant="default",
68
+ output_variants={"default": {"encode_dim": 768}},
69
+ default_output_variant="default",
70
+ supported_spacing_um=0.5,
71
+ precision="fp32",
72
+ source="AtlasAnalyticsLab/MOOZY",
73
+ )
74
+
75
+
76
+ class MOOZYPatientEncoder(PatientEncoder):
77
+ def __init__(self, *, output_variant: str | None = None):
78
+ components = load_moozy_inference_components(device=torch.device("cpu"))
79
+ self._slide_model = components.slide_encoder.eval()
80
+ self._case_transformer = components.case_transformer.eval()
81
+ self._device = preferred_default_device()
82
+ self._output_variant = resolve_requested_output_variant(output_variant)
83
+
84
+ @property
85
+ def encode_dim(self) -> int:
86
+ return 768
87
+
88
+ @property
89
+ def device(self) -> torch.device:
90
+ return self._device
91
+
92
+ def to(self, device: torch.device | str) -> "MOOZYPatientEncoder":
93
+ self._device = torch.device(device)
94
+ self._slide_model = self._slide_model.to(self._device)
95
+ self._case_transformer = self._case_transformer.to(self._device)
96
+ return self
97
+
98
+ def encode_slide(
99
+ self,
100
+ tile_features: torch.Tensor,
101
+ coordinates: torch.Tensor | None = None,
102
+ *,
103
+ tile_size_lv0: int | None = None,
104
+ ) -> torch.Tensor:
105
+ if coordinates is None or tile_size_lv0 is None:
106
+ raise ValueError("MOOZY patient encoding requires coordinates and tile_size_lv0")
107
+ x = tile_features.unsqueeze(0).unsqueeze(0)
108
+ coords = coordinates.unsqueeze(0).to(torch.float32)
109
+ patch_sizes = torch.tensor([tile_size_lv0], dtype=torch.float32, device=tile_features.device)
110
+ cls, _, _ = self._slide_model(x, coords_xy=coords, patch_sizes=patch_sizes)
111
+ return cls.squeeze(0)
112
+
113
+ def encode_patient(self, slide_embeddings: torch.Tensor) -> torch.Tensor:
114
+ return self._case_transformer(slide_embeddings)