slide2vec 4.8.0__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-4.8.0 → slide2vec-5.0.0}/PKG-INFO +4 -4
- {slide2vec-4.8.0 → slide2vec-5.0.0}/README.md +1 -1
- {slide2vec-4.8.0 → slide2vec-5.0.0}/pyproject.toml +4 -4
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/__init__.py +1 -1
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/api.py +93 -5
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/artifacts_collect.py +4 -5
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/distributed.py +4 -4
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/embedding_persist.py +1 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/persistence.py +5 -4
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/tiling.py +4 -3
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/utils/tiling_io.py +4 -5
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec.egg-info/PKG-INFO +4 -4
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec.egg-info/requires.txt +2 -2
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_regression_core.py +82 -6
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_regression_inference.py +31 -5
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_regression_models.py +182 -114
- {slide2vec-4.8.0 → slide2vec-5.0.0}/LICENSE +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/setup.cfg +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/__main__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/artifacts.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/cli.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/configs/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/configs/default.yaml +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/configs/resources.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/data/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/data/dataset.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/data/tile_reader.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/data/tile_store.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/distributed/direct_embed_worker.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/distributed/pipeline_worker.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/base.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/conch.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/gigapath.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/hibou.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/hoptimus.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/lunit.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/midnight.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/blocks.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/case.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/loading.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/slide.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/moozy/types.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/musk.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/phikon.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/prism.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/prost40m.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/titan.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/uni.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/models/virchow.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/registry.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/encoders/validation.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/inference.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/progress.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/batching.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/cpu_budget.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/dense_regions.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/distributed_stage.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/embedding.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/embedding_pipeline.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/hierarchical.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/manifest.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/model_settings.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/patient_pipeline.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/persist_callbacks.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/process_list.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/progress_bridge.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/registry.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/serialization.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/slide_encode.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/tiling_pipeline.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/types.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/runtime/worker_io.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/utils/__init__.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/utils/config.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/utils/coordinates.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec/utils/utils.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec.egg-info/SOURCES.txt +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec.egg-info/entry_points.txt +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/slide2vec.egg-info/top_level.txt +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_architecture_runtime_split.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_attention_extraction.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_dense_extraction.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_dense_locality_gated.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_dense_regions.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_encoder_registry.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_hs2p_package_cutover.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_output_consistency.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_progress.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_runtime_batching.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_tile_store.py +0 -0
- {slide2vec-4.8.0 → slide2vec-5.0.0}/tests/test_tiling_pipeline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
18
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
|
|
19
19
|
Requires-Dist: omegaconf
|
|
20
20
|
Requires-Dist: matplotlib
|
|
21
21
|
Requires-Dist: numpy<2
|
|
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
|
|
|
65
65
|
Requires-Dist: pandas; extra == "fm"
|
|
66
66
|
Requires-Dist: pillow; extra == "fm"
|
|
67
67
|
Requires-Dist: rich; extra == "fm"
|
|
68
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
68
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0; extra == "fm"
|
|
69
69
|
Requires-Dist: wandb; extra == "fm"
|
|
70
70
|
Requires-Dist: torch<2.8,>=2.3; extra == "fm"
|
|
71
71
|
Requires-Dist: torchvision>=0.18.0; extra == "fm"
|
|
@@ -169,7 +169,7 @@ pipeline = Pipeline(
|
|
|
169
169
|
preprocessing=PreprocessingConfig(
|
|
170
170
|
requested_spacing_um=0.5,
|
|
171
171
|
requested_tile_size_px=224,
|
|
172
|
-
|
|
172
|
+
masks={"min_coverage": {"tissue": 0.1}},
|
|
173
173
|
),
|
|
174
174
|
execution=ExecutionOptions(output_dir="outputs/demo"),
|
|
175
175
|
)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "slide2vec"
|
|
7
|
-
version = "
|
|
7
|
+
version = "5.0.0"
|
|
8
8
|
description = "Embedding of whole slide images with Foundation Models"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -21,7 +21,7 @@ classifiers = [
|
|
|
21
21
|
"Programming Language :: Python :: 3.13",
|
|
22
22
|
]
|
|
23
23
|
dependencies = [
|
|
24
|
-
"hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
24
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
|
|
25
25
|
"omegaconf",
|
|
26
26
|
"matplotlib",
|
|
27
27
|
"numpy<2",
|
|
@@ -88,7 +88,7 @@ fm = [
|
|
|
88
88
|
"pandas",
|
|
89
89
|
"pillow",
|
|
90
90
|
"rich",
|
|
91
|
-
"hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
91
|
+
"hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
|
|
92
92
|
"wandb",
|
|
93
93
|
"torch>=2.3,<2.8",
|
|
94
94
|
"torchvision>=0.18.0",
|
|
@@ -164,7 +164,7 @@ no_implicit_reexport = true
|
|
|
164
164
|
max-line-length = 160
|
|
165
165
|
|
|
166
166
|
[tool.bumpver]
|
|
167
|
-
current_version = "
|
|
167
|
+
current_version = "5.0.0"
|
|
168
168
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
169
169
|
commit = false # We do version bumping in CI, not as a commit
|
|
170
170
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -351,6 +351,11 @@ class EmbeddedSlide:
|
|
|
351
351
|
image_path: Path
|
|
352
352
|
#: Path to the tissue mask used for tiling, if any.
|
|
353
353
|
mask_path: Path | None = None
|
|
354
|
+
#: Annotation class this bag of tiles was sampled for. ``"tissue"`` for the
|
|
355
|
+
#: default tissue-only path, ``"merged"`` for the union output mode, or the
|
|
356
|
+
#: class name (e.g. ``"tumor"``) when annotation-aware sampling fans a slide
|
|
357
|
+
#: out into one bag per class. See the annotation-aware sampling documentation.
|
|
358
|
+
annotation: str | None = None
|
|
354
359
|
#: Number of tiles extracted from the slide.
|
|
355
360
|
num_tiles: int | None = None
|
|
356
361
|
#: Path to the mask preview image, if generated.
|
|
@@ -442,12 +447,13 @@ class Model:
|
|
|
442
447
|
self,
|
|
443
448
|
slide: SlideInput,
|
|
444
449
|
*,
|
|
450
|
+
annotation: str | list[str] | None = None,
|
|
445
451
|
preprocessing: PreprocessingConfig | None = None,
|
|
446
452
|
execution: ExecutionOptions | None = None,
|
|
447
453
|
sample_id: str | None = None,
|
|
448
454
|
mask_path: PathLike | None = None,
|
|
449
455
|
spacing_at_level_0: float | None = None,
|
|
450
|
-
) -> EmbeddedSlide:
|
|
456
|
+
) -> EmbeddedSlide | list[EmbeddedSlide]:
|
|
451
457
|
if isinstance(slide, (str, Path)):
|
|
452
458
|
slide = {
|
|
453
459
|
"sample_id": sample_id or Path(slide).stem,
|
|
@@ -459,31 +465,42 @@ class Model:
|
|
|
459
465
|
raise ValueError(
|
|
460
466
|
"sample_id, mask_path, and spacing_at_level_0 overrides are only supported when slide is a path-like input"
|
|
461
467
|
)
|
|
462
|
-
|
|
468
|
+
requested = None if isinstance(annotation, str) else annotation
|
|
469
|
+
grouped = self.embed_slides(
|
|
463
470
|
[slide],
|
|
471
|
+
annotations=requested,
|
|
464
472
|
preprocessing=preprocessing,
|
|
465
473
|
execution=execution,
|
|
466
|
-
)
|
|
474
|
+
)
|
|
475
|
+
# Single slide in → at most one outer key out. Flatten to the inner
|
|
476
|
+
# {label: EmbeddedSlide} mapping (empty when the run produced nothing).
|
|
477
|
+
bags: dict[str, EmbeddedSlide] = {}
|
|
478
|
+
for inner in grouped.values():
|
|
479
|
+
bags = inner
|
|
480
|
+
break
|
|
481
|
+
return _select_embedded_bag(bags, annotation)
|
|
467
482
|
|
|
468
483
|
def embed_slides(
|
|
469
484
|
self,
|
|
470
485
|
slides: SlideSequence,
|
|
471
486
|
*,
|
|
487
|
+
annotations: list[str] | None = None,
|
|
472
488
|
preprocessing: PreprocessingConfig | None = None,
|
|
473
489
|
execution: ExecutionOptions | None = None,
|
|
474
|
-
) ->
|
|
490
|
+
) -> dict[str, dict[str, EmbeddedSlide]]:
|
|
475
491
|
from slide2vec.inference import embed_slides
|
|
476
492
|
|
|
477
493
|
resolved = _coerce_execution_options(execution, model=self)
|
|
478
494
|
resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
|
|
479
495
|
with _auto_progress_reporting(output_dir=resolved.output_dir):
|
|
480
496
|
_validate_model_config(self, resolved_preprocessing, resolved)
|
|
481
|
-
|
|
497
|
+
embedded = embed_slides(
|
|
482
498
|
self,
|
|
483
499
|
slides,
|
|
484
500
|
preprocessing=resolved_preprocessing,
|
|
485
501
|
execution=resolved,
|
|
486
502
|
)
|
|
503
|
+
return _group_embedded_slides(embedded, annotations=annotations)
|
|
487
504
|
|
|
488
505
|
def embed_patient(
|
|
489
506
|
self,
|
|
@@ -650,6 +667,77 @@ class Pipeline:
|
|
|
650
667
|
)
|
|
651
668
|
|
|
652
669
|
|
|
670
|
+
def _select_embedded_bag(
|
|
671
|
+
bags: Mapping[str, EmbeddedSlide],
|
|
672
|
+
annotation: str | list[str] | None,
|
|
673
|
+
) -> EmbeddedSlide | list[EmbeddedSlide]:
|
|
674
|
+
"""Select per-class bag(s) from a single slide's ``{label: EmbeddedSlide}`` map.
|
|
675
|
+
|
|
676
|
+
numpy-style shape-in/shape-out:
|
|
677
|
+
|
|
678
|
+
- a single class string returns one :class:`EmbeddedSlide`;
|
|
679
|
+
- a list of class strings returns a list in the requested order;
|
|
680
|
+
- ``None`` returns the single bag when the run produced exactly one,
|
|
681
|
+
otherwise raises naming the available bags and directing to
|
|
682
|
+
:meth:`Model.embed_slides`.
|
|
683
|
+
|
|
684
|
+
Requesting a class the run did not produce raises naming what is available.
|
|
685
|
+
"""
|
|
686
|
+
available = sorted(bags)
|
|
687
|
+
if isinstance(annotation, str):
|
|
688
|
+
if annotation not in bags:
|
|
689
|
+
raise ValueError(
|
|
690
|
+
f"embed_slide() found no '{annotation}' annotation bag for this "
|
|
691
|
+
f"slide; available bags: {available}."
|
|
692
|
+
)
|
|
693
|
+
return bags[annotation]
|
|
694
|
+
if annotation is not None:
|
|
695
|
+
selected: list[EmbeddedSlide] = []
|
|
696
|
+
for label in annotation:
|
|
697
|
+
if label not in bags:
|
|
698
|
+
raise ValueError(
|
|
699
|
+
f"embed_slide() found no '{label}' annotation bag for this "
|
|
700
|
+
f"slide; available bags: {available}."
|
|
701
|
+
)
|
|
702
|
+
selected.append(bags[label])
|
|
703
|
+
return selected
|
|
704
|
+
if len(bags) == 1:
|
|
705
|
+
return next(iter(bags.values()))
|
|
706
|
+
raise ValueError(
|
|
707
|
+
f"embed_slide() received {len(bags)} annotation bags for this slide "
|
|
708
|
+
f"({available}); annotation-aware sampling produces one bag per class. "
|
|
709
|
+
"Pass annotation=... to select a class, or use Model.embed_slides(...) "
|
|
710
|
+
"to receive every per-class EmbeddedSlide (each carries its .annotation)."
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _group_embedded_slides(
|
|
715
|
+
embedded: Sequence[EmbeddedSlide],
|
|
716
|
+
*,
|
|
717
|
+
annotations: list[str] | None = None,
|
|
718
|
+
) -> dict[str, dict[str, EmbeddedSlide]]:
|
|
719
|
+
"""Group flat per-row :class:`EmbeddedSlide` results into a nested mapping.
|
|
720
|
+
|
|
721
|
+
The outer key is ``sample_id``; the inner key is the bag's informative
|
|
722
|
+
annotation label (``"tissue"``/``"merged"``/class name), never ``None``.
|
|
723
|
+
A bag whose ``.annotation`` is ``None`` (defensive — post-#173 real runs
|
|
724
|
+
always carry a label) does not produce a ``None`` key.
|
|
725
|
+
|
|
726
|
+
When *annotations* is given, the inner keys are restricted to the named
|
|
727
|
+
classes (in encounter order).
|
|
728
|
+
"""
|
|
729
|
+
requested = None if annotations is None else set(annotations)
|
|
730
|
+
grouped: dict[str, dict[str, EmbeddedSlide]] = {}
|
|
731
|
+
for bag in embedded:
|
|
732
|
+
label = bag.annotation
|
|
733
|
+
if label is None:
|
|
734
|
+
continue
|
|
735
|
+
if requested is not None and label not in requested:
|
|
736
|
+
continue
|
|
737
|
+
grouped.setdefault(bag.sample_id, {})[label] = bag
|
|
738
|
+
return grouped
|
|
739
|
+
|
|
740
|
+
|
|
653
741
|
def _coerce_execution_options(
|
|
654
742
|
options: ExecutionOptions | None,
|
|
655
743
|
*,
|
|
@@ -101,15 +101,14 @@ def _normalized_row_annotation(annotation) -> str | None:
|
|
|
101
101
|
"""Collapse a process-list ``annotation`` cell to the per-class key (``None`` for the flat path).
|
|
102
102
|
|
|
103
103
|
Mirrors the in-memory single-GPU path: ``None``/NaN and hs2p's flat-layout sentinels
|
|
104
|
-
(:func:`hs2p.fileops.is_flattened_annotation
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
keys those rows to the flat embedding path with no per-class subdir.
|
|
104
|
+
(:func:`hs2p.fileops.is_flattened_annotation` — the single source of truth, which flattens
|
|
105
|
+
``None``/``"tissue"``/``"merged"``) land flat — so the distributed reconcile keys those rows
|
|
106
|
+
to the flat embedding path with no per-class subdir.
|
|
108
107
|
"""
|
|
109
108
|
if annotation is None or (isinstance(annotation, float) and pd.isna(annotation)):
|
|
110
109
|
return None
|
|
111
110
|
annotation = str(annotation)
|
|
112
|
-
if
|
|
111
|
+
if is_flattened_annotation(annotation):
|
|
113
112
|
return None
|
|
114
113
|
return annotation
|
|
115
114
|
|
|
@@ -32,15 +32,15 @@ def normalize_work_unit_annotation(annotation: str | None) -> str | None:
|
|
|
32
32
|
"""Collapse flat-layout annotations to ``None`` so flat units key by bare ``sample_id``.
|
|
33
33
|
|
|
34
34
|
Mirrors the in-memory single-GPU path and the distributed reconcile
|
|
35
|
-
(:func:`slide2vec.runtime.artifacts_collect._normalized_row_annotation`):
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
(:func:`slide2vec.runtime.artifacts_collect._normalized_row_annotation`): hs2p's flat-layout
|
|
36
|
+
sentinels (:func:`hs2p.fileops.is_flattened_annotation`, the single source of truth — it
|
|
37
|
+
flattens ``None``/``"tissue"``/``"merged"``) all collapse to ``None``. Only genuine per-class
|
|
38
38
|
annotations survive as a composite key.
|
|
39
39
|
"""
|
|
40
40
|
if annotation is None:
|
|
41
41
|
return None
|
|
42
42
|
annotation = str(annotation)
|
|
43
|
-
if
|
|
43
|
+
if is_flattened_annotation(annotation):
|
|
44
44
|
return None
|
|
45
45
|
return annotation
|
|
46
46
|
|
|
@@ -56,6 +56,7 @@ def make_embedded_slide(
|
|
|
56
56
|
tile_size_lv0=int(tiling_result.tile_size_lv0),
|
|
57
57
|
image_path=slide.image_path,
|
|
58
58
|
mask_path=slide.mask_path,
|
|
59
|
+
annotation=tiling_result_annotation(tiling_result),
|
|
59
60
|
num_tiles=int(n_tiles) if n_tiles is not None else len(x_values),
|
|
60
61
|
mask_preview_path=Path(mask_preview_path) if mask_preview_path is not None else None,
|
|
61
62
|
tiling_preview_path=Path(tiling_preview_path) if tiling_preview_path is not None else None,
|
|
@@ -265,14 +265,15 @@ def _normalized_annotation(annotation: Any) -> str | None:
|
|
|
265
265
|
|
|
266
266
|
Keying the per-class feature-path map on this normalized value lets the flat tissue-only
|
|
267
267
|
path and a real class share one matching rule without the sentinel leaking into lookups.
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
268
|
+
Flattening is decided solely by :func:`hs2p.fileops.is_flattened_annotation` (the single
|
|
269
|
+
source of truth), which flattens ``None``/``"tissue"``/``"merged"`` to the flat root, so
|
|
270
|
+
``"merged"`` (hs2p's merged output-mode label, which carries no class) resolves to the flat
|
|
271
|
+
embedding path rather than being left unmatched.
|
|
271
272
|
"""
|
|
272
273
|
if annotation is None or (isinstance(annotation, float) and pd.isna(annotation)):
|
|
273
274
|
return None
|
|
274
275
|
annotation = str(annotation)
|
|
275
|
-
if
|
|
276
|
+
if is_flattened_annotation(annotation):
|
|
276
277
|
return None
|
|
277
278
|
return annotation
|
|
278
279
|
|
|
@@ -46,9 +46,10 @@ def build_hs2p_configs(
|
|
|
46
46
|
if is_hierarchical_preprocessing(preprocessing)
|
|
47
47
|
else preprocessing.requested_tile_size_px
|
|
48
48
|
)
|
|
49
|
-
# Reuse hs2p's tiling-config resolver so the
|
|
50
|
-
# masks.min_coverage
|
|
51
|
-
# is threaded consistently. The resolver reads
|
|
49
|
+
# Reuse hs2p's tiling-config resolver so the resolved min_coverage map comes from
|
|
50
|
+
# masks.min_coverage (the single source of truth; min_coverage["tissue"] is the tissue
|
|
51
|
+
# threshold) and independent_sampling is threaded consistently. The resolver reads
|
|
52
|
+
# attributes, so wrap the masks dict.
|
|
52
53
|
tiling_adapter = SimpleNamespace(
|
|
53
54
|
tiling=SimpleNamespace(
|
|
54
55
|
masks=SimpleNamespace(**dict(preprocessing.masks)),
|
|
@@ -244,11 +244,10 @@ def load_tiling_result_from_row(row):
|
|
|
244
244
|
annotation = annotation if annotation is None else str(annotation)
|
|
245
245
|
# The merged output mode (hs2p's CoordinateOutputMode.MERGED) emits a single per-slide
|
|
246
246
|
# coordinate set over the union of tiles passing any active class threshold. hs2p labels
|
|
247
|
-
# that process-list row "merged" so it is not mistaken for plain tissue
|
|
248
|
-
#
|
|
249
|
-
#
|
|
250
|
-
|
|
251
|
-
annotation = None
|
|
247
|
+
# that process-list row "merged" so it is not mistaken for plain tissue. The informative
|
|
248
|
+
# label is preserved verbatim here — artifact placement is decided downstream solely by
|
|
249
|
+
# hs2p.fileops.is_flattened_annotation (which flattens None/"tissue"/"merged" to the flat
|
|
250
|
+
# output root), so "merged" still lands flat without erasing its self-describing label.
|
|
252
251
|
setattr(tiling_result, "annotation", annotation)
|
|
253
252
|
setattr(tiling_result, "tiles_tar_path", _optional_path(row.get("tiles_tar_path")))
|
|
254
253
|
setattr(tiling_result, "mask_preview_path", _optional_path(row.get("mask_preview_path")))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
18
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
|
|
19
19
|
Requires-Dist: omegaconf
|
|
20
20
|
Requires-Dist: matplotlib
|
|
21
21
|
Requires-Dist: numpy<2
|
|
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
|
|
|
65
65
|
Requires-Dist: pandas; extra == "fm"
|
|
66
66
|
Requires-Dist: pillow; extra == "fm"
|
|
67
67
|
Requires-Dist: rich; extra == "fm"
|
|
68
|
-
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
68
|
+
Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0; extra == "fm"
|
|
69
69
|
Requires-Dist: wandb; extra == "fm"
|
|
70
70
|
Requires-Dist: torch<2.8,>=2.3; extra == "fm"
|
|
71
71
|
Requires-Dist: torchvision>=0.18.0; extra == "fm"
|
|
@@ -169,7 +169,7 @@ pipeline = Pipeline(
|
|
|
169
169
|
preprocessing=PreprocessingConfig(
|
|
170
170
|
requested_spacing_um=0.5,
|
|
171
171
|
requested_tile_size_px=224,
|
|
172
|
-
|
|
172
|
+
masks={"min_coverage": {"tissue": 0.1}},
|
|
173
173
|
),
|
|
174
174
|
execution=ExecutionOptions(output_dir="outputs/demo"),
|
|
175
175
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
1
|
+
hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
|
|
2
2
|
omegaconf
|
|
3
3
|
matplotlib
|
|
4
4
|
numpy<2
|
|
@@ -27,7 +27,7 @@ numpy<2
|
|
|
27
27
|
pandas
|
|
28
28
|
pillow
|
|
29
29
|
rich
|
|
30
|
-
hs2p[asap,cucim,openslide,sam2,vips]>=4.
|
|
30
|
+
hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
|
|
31
31
|
wandb
|
|
32
32
|
torch<2.8,>=2.3
|
|
33
33
|
torchvision>=0.18.0
|
|
@@ -700,7 +700,7 @@ def test_masks_min_coverage_tissue_drives_derived_tiling_threshold():
|
|
|
700
700
|
|
|
701
701
|
tiling_cfg = build_hs2p_configs(preprocessing)[0]
|
|
702
702
|
|
|
703
|
-
assert tiling_cfg.
|
|
703
|
+
assert tiling_cfg.min_coverage["tissue"] == pytest.approx(0.37)
|
|
704
704
|
assert tiling_cfg.independent_sampling is False
|
|
705
705
|
|
|
706
706
|
|
|
@@ -842,9 +842,10 @@ def test_independent_sampling_toggle_selects_selection_strategy():
|
|
|
842
842
|
assert joint[-2] == "joint_sampling"
|
|
843
843
|
|
|
844
844
|
|
|
845
|
-
def
|
|
846
|
-
"""A merged tiling row is labelled ``merged`` by hs2p
|
|
847
|
-
|
|
845
|
+
def test_merged_annotation_label_survives_round_trip_to_flat_root(tmp_path: Path):
|
|
846
|
+
"""A merged tiling row is labelled ``merged`` by hs2p. The informative label must
|
|
847
|
+
survive the round-trip (no collapse to ``None``), yet artifacts still land at the flat
|
|
848
|
+
output root because hs2p's ``is_flattened_annotation`` flattens ``"merged"``."""
|
|
848
849
|
from slide2vec.utils.tiling_io import load_tiling_result_from_row
|
|
849
850
|
|
|
850
851
|
coordinates_meta_path = tmp_path / "slide-a.coordinates.meta.json"
|
|
@@ -868,8 +869,8 @@ def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
|
|
|
868
869
|
finally:
|
|
869
870
|
tiling_io.load_tiling_result = original
|
|
870
871
|
|
|
871
|
-
#
|
|
872
|
-
assert result.annotation
|
|
872
|
+
# The informative label survives the round-trip; it is not blanked to None.
|
|
873
|
+
assert result.annotation == "merged"
|
|
873
874
|
artifact = write_tile_embeddings(
|
|
874
875
|
"slide-a",
|
|
875
876
|
np.arange(8, dtype=np.float32).reshape(2, 4),
|
|
@@ -877,9 +878,84 @@ def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
|
|
|
877
878
|
output_format="npz",
|
|
878
879
|
annotation=result.annotation,
|
|
879
880
|
)
|
|
881
|
+
# ...but placement is decided by is_flattened_annotation, so it still lands flat.
|
|
880
882
|
assert artifact.path == tmp_path / "tile_embeddings" / "slide-a.npz"
|
|
881
883
|
|
|
882
884
|
|
|
885
|
+
def test_tissue_annotation_survives_round_trip_to_flat_root(tmp_path: Path):
|
|
886
|
+
"""A ``"tissue"`` row keeps its informative label through the round-trip while still
|
|
887
|
+
resolving to flat-root placement via ``is_flattened_annotation``."""
|
|
888
|
+
from slide2vec.utils.tiling_io import load_tiling_result_from_row
|
|
889
|
+
|
|
890
|
+
coordinates_meta_path = tmp_path / "slide-t.coordinates.meta.json"
|
|
891
|
+
coordinates_meta_path.write_text("{}", encoding="utf-8")
|
|
892
|
+
|
|
893
|
+
def fake_load_tiling_result(**kwargs):
|
|
894
|
+
return SimpleNamespace()
|
|
895
|
+
|
|
896
|
+
import slide2vec.utils.tiling_io as tiling_io
|
|
897
|
+
|
|
898
|
+
original = tiling_io.load_tiling_result
|
|
899
|
+
tiling_io.load_tiling_result = fake_load_tiling_result
|
|
900
|
+
try:
|
|
901
|
+
result = load_tiling_result_from_row(
|
|
902
|
+
{
|
|
903
|
+
"annotation": "tissue",
|
|
904
|
+
"coordinates_npz_path": str(tmp_path / "slide-t.coordinates.npz"),
|
|
905
|
+
"coordinates_meta_path": str(coordinates_meta_path),
|
|
906
|
+
}
|
|
907
|
+
)
|
|
908
|
+
finally:
|
|
909
|
+
tiling_io.load_tiling_result = original
|
|
910
|
+
|
|
911
|
+
assert result.annotation == "tissue"
|
|
912
|
+
artifact = write_tile_embeddings(
|
|
913
|
+
"slide-t",
|
|
914
|
+
np.arange(8, dtype=np.float32).reshape(2, 4),
|
|
915
|
+
output_dir=tmp_path,
|
|
916
|
+
output_format="npz",
|
|
917
|
+
annotation=result.annotation,
|
|
918
|
+
)
|
|
919
|
+
assert artifact.path == tmp_path / "tile_embeddings" / "slide-t.npz"
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
def test_real_class_annotation_survives_round_trip_to_per_class_subdir(tmp_path: Path):
|
|
923
|
+
"""A genuine class label (e.g. ``"tumor"``) survives the round-trip and routes to its
|
|
924
|
+
own per-class subdir, since ``is_flattened_annotation`` does not flatten it."""
|
|
925
|
+
from slide2vec.utils.tiling_io import load_tiling_result_from_row
|
|
926
|
+
|
|
927
|
+
coordinates_meta_path = tmp_path / "slide-u.coordinates.meta.json"
|
|
928
|
+
coordinates_meta_path.write_text("{}", encoding="utf-8")
|
|
929
|
+
|
|
930
|
+
def fake_load_tiling_result(**kwargs):
|
|
931
|
+
return SimpleNamespace()
|
|
932
|
+
|
|
933
|
+
import slide2vec.utils.tiling_io as tiling_io
|
|
934
|
+
|
|
935
|
+
original = tiling_io.load_tiling_result
|
|
936
|
+
tiling_io.load_tiling_result = fake_load_tiling_result
|
|
937
|
+
try:
|
|
938
|
+
result = load_tiling_result_from_row(
|
|
939
|
+
{
|
|
940
|
+
"annotation": "tumor",
|
|
941
|
+
"coordinates_npz_path": str(tmp_path / "slide-u.coordinates.npz"),
|
|
942
|
+
"coordinates_meta_path": str(coordinates_meta_path),
|
|
943
|
+
}
|
|
944
|
+
)
|
|
945
|
+
finally:
|
|
946
|
+
tiling_io.load_tiling_result = original
|
|
947
|
+
|
|
948
|
+
assert result.annotation == "tumor"
|
|
949
|
+
artifact = write_tile_embeddings(
|
|
950
|
+
"slide-u",
|
|
951
|
+
np.arange(8, dtype=np.float32).reshape(2, 4),
|
|
952
|
+
output_dir=tmp_path,
|
|
953
|
+
output_format="npz",
|
|
954
|
+
annotation=result.annotation,
|
|
955
|
+
)
|
|
956
|
+
assert artifact.path == tmp_path / "tile_embeddings" / "tumor" / "slide-u.npz"
|
|
957
|
+
|
|
958
|
+
|
|
883
959
|
def test_invalid_masks_block_with_duplicate_pixel_values_fails_fast():
|
|
884
960
|
from slide2vec.runtime.tiling import build_hs2p_configs
|
|
885
961
|
|
|
@@ -1138,6 +1138,7 @@ def test_model_embed_slide_updates_process_list_feature_status_and_path_in_distr
|
|
|
1138
1138
|
tile_size_lv0=224,
|
|
1139
1139
|
image_path=slide_path,
|
|
1140
1140
|
mask_path=None,
|
|
1141
|
+
annotation="tissue",
|
|
1141
1142
|
num_tiles=1,
|
|
1142
1143
|
)
|
|
1143
1144
|
|
|
@@ -1557,6 +1558,27 @@ def test_make_embedded_slide_carries_tiling_artifact_fields():
|
|
|
1557
1558
|
assert embedded.num_tiles == 7
|
|
1558
1559
|
assert embedded.mask_preview_path == Path("/tmp/slide-a-mask-preview.png")
|
|
1559
1560
|
assert embedded.tiling_preview_path == Path("/tmp/slide-a-tiling-preview.png")
|
|
1561
|
+
# A tiling_result with no annotation attr yields the helper default (None).
|
|
1562
|
+
assert embedded.annotation is None
|
|
1563
|
+
|
|
1564
|
+
|
|
1565
|
+
@pytest.mark.parametrize("annotation", ["tissue", "merged", "tumor"])
|
|
1566
|
+
def test_make_embedded_slide_carries_per_class_annotation(annotation):
|
|
1567
|
+
slide = make_slide("slide-a")
|
|
1568
|
+
tiling_result = SimpleNamespace(
|
|
1569
|
+
x=np.array([0], dtype=np.int64),
|
|
1570
|
+
y=np.array([1], dtype=np.int64),
|
|
1571
|
+
tile_size_lv0=224,
|
|
1572
|
+
num_tiles=3,
|
|
1573
|
+
annotation=annotation,
|
|
1574
|
+
)
|
|
1575
|
+
embedded = embedding_persist.make_embedded_slide(
|
|
1576
|
+
slide=slide,
|
|
1577
|
+
tiling_result=tiling_result,
|
|
1578
|
+
tile_embeddings=np.zeros((1, 2), dtype=np.float32),
|
|
1579
|
+
slide_embedding=None,
|
|
1580
|
+
)
|
|
1581
|
+
assert embedded.annotation == annotation
|
|
1560
1582
|
|
|
1561
1583
|
|
|
1562
1584
|
def test_run_pipeline_local_branch_uses_incremental_persist_callback(monkeypatch, tmp_path: Path):
|
|
@@ -2280,9 +2302,10 @@ def test_tile_slides_forwards_independent_sampling_selection_strategy(monkeypatc
|
|
|
2280
2302
|
assert captured["kwargs"]["output_mode"] == "per_annotation"
|
|
2281
2303
|
|
|
2282
2304
|
|
|
2283
|
-
def
|
|
2284
|
-
"""A merged tiling row (hs2p labels it ``merged``) must load with
|
|
2285
|
-
|
|
2305
|
+
def test_prepare_tiled_slides_preserves_merged_row_label(monkeypatch, tmp_path: Path):
|
|
2306
|
+
"""A merged tiling row (hs2p labels it ``merged``) must load with its informative label
|
|
2307
|
+
intact (annotation == "merged"), not blanked to None. Artifact placement still flattens
|
|
2308
|
+
to the output root downstream via hs2p's ``is_flattened_annotation``."""
|
|
2286
2309
|
process_list_path = tmp_path / "process_list.csv"
|
|
2287
2310
|
process_list_path.write_text(
|
|
2288
2311
|
"sample_id,annotation,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n"
|
|
@@ -2316,7 +2339,10 @@ def test_prepare_tiled_slides_collapses_merged_row_to_flat_root(monkeypatch, tmp
|
|
|
2316
2339
|
num_workers=0,
|
|
2317
2340
|
)
|
|
2318
2341
|
|
|
2319
|
-
assert captured["annotation"]
|
|
2342
|
+
assert captured["annotation"] == "merged"
|
|
2343
|
+
from hs2p.fileops import is_flattened_annotation
|
|
2344
|
+
|
|
2345
|
+
assert is_flattened_annotation(captured["annotation"]) is True
|
|
2320
2346
|
|
|
2321
2347
|
|
|
2322
2348
|
def test_tile_slides_does_not_pre_resolve_backend_auto(monkeypatch, tmp_path: Path):
|
|
@@ -2472,7 +2498,7 @@ def test_build_hs2p_configs_constructs_preview_config():
|
|
|
2472
2498
|
) = runtime_tiling.build_hs2p_configs(preprocessing)
|
|
2473
2499
|
|
|
2474
2500
|
assert tiling_cfg.backend == "asap"
|
|
2475
|
-
assert tiling_cfg.
|
|
2501
|
+
assert tiling_cfg.min_coverage["tissue"] == pytest.approx(0.1) # resolved from masks.min_coverage.tissue
|
|
2476
2502
|
assert segmentation_cfg.downsample == 64
|
|
2477
2503
|
assert segmentation_cfg.method == "hsv"
|
|
2478
2504
|
assert filtering_cfg.ref_tile_size == 224
|