slide2vec 4.8.0__tar.gz → 5.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. {slide2vec-4.8.0 → slide2vec-5.0.1}/PKG-INFO +4 -4
  2. {slide2vec-4.8.0 → slide2vec-5.0.1}/README.md +1 -1
  3. {slide2vec-4.8.0 → slide2vec-5.0.1}/pyproject.toml +7 -4
  4. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/__init__.py +1 -1
  5. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/api.py +93 -5
  6. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/hibou.py +9 -2
  7. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/midnight.py +12 -0
  8. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/virchow.py +2 -8
  9. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/artifacts_collect.py +4 -5
  10. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/distributed.py +4 -4
  11. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/embedding_persist.py +1 -0
  12. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/persistence.py +5 -4
  13. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/tiling.py +4 -3
  14. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/tiling_io.py +4 -5
  15. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/PKG-INFO +4 -4
  16. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/SOURCES.txt +0 -1
  17. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/requires.txt +2 -2
  18. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_output_consistency.py +1 -0
  19. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_regression_core.py +82 -6
  20. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_regression_inference.py +31 -5
  21. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_regression_models.py +182 -114
  22. slide2vec-4.8.0/tests/test_dense_locality_gated.py +0 -162
  23. {slide2vec-4.8.0 → slide2vec-5.0.1}/LICENSE +0 -0
  24. {slide2vec-4.8.0 → slide2vec-5.0.1}/setup.cfg +0 -0
  25. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/__main__.py +0 -0
  26. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/artifacts.py +0 -0
  27. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/cli.py +0 -0
  28. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/configs/__init__.py +0 -0
  29. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/configs/default.yaml +0 -0
  30. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/configs/resources.py +0 -0
  31. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/data/__init__.py +0 -0
  32. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/data/dataset.py +0 -0
  33. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/data/tile_reader.py +0 -0
  34. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/data/tile_store.py +0 -0
  35. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/distributed/__init__.py +0 -0
  36. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/distributed/direct_embed_worker.py +0 -0
  37. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/distributed/pipeline_worker.py +0 -0
  38. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/__init__.py +0 -0
  39. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/base.py +0 -0
  40. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/__init__.py +0 -0
  41. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/conch.py +0 -0
  42. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/gigapath.py +0 -0
  43. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/hoptimus.py +0 -0
  44. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/lunit.py +0 -0
  45. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/__init__.py +0 -0
  46. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/blocks.py +0 -0
  47. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/case.py +0 -0
  48. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/loading.py +0 -0
  49. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/slide.py +0 -0
  50. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/moozy/types.py +0 -0
  51. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/musk.py +0 -0
  52. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/phikon.py +0 -0
  53. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/prism.py +0 -0
  54. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/prost40m.py +0 -0
  55. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/titan.py +0 -0
  56. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/models/uni.py +0 -0
  57. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/registry.py +0 -0
  58. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/encoders/validation.py +0 -0
  59. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/inference.py +0 -0
  60. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/progress.py +0 -0
  61. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/__init__.py +0 -0
  62. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/batching.py +0 -0
  63. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/cpu_budget.py +0 -0
  64. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/dense_regions.py +0 -0
  65. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/distributed_stage.py +0 -0
  66. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/embedding.py +0 -0
  67. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/embedding_pipeline.py +0 -0
  68. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/hierarchical.py +0 -0
  69. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/manifest.py +0 -0
  70. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/model_settings.py +0 -0
  71. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/patient_pipeline.py +0 -0
  72. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/persist_callbacks.py +0 -0
  73. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/process_list.py +0 -0
  74. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/progress_bridge.py +0 -0
  75. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/registry.py +0 -0
  76. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/serialization.py +0 -0
  77. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/slide_encode.py +0 -0
  78. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/tiling_pipeline.py +0 -0
  79. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/types.py +0 -0
  80. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/runtime/worker_io.py +0 -0
  81. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/__init__.py +0 -0
  82. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/config.py +0 -0
  83. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/coordinates.py +0 -0
  84. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/log_utils.py +0 -0
  85. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec/utils/utils.py +0 -0
  86. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/dependency_links.txt +0 -0
  87. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/entry_points.txt +0 -0
  88. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/not-zip-safe +0 -0
  89. {slide2vec-4.8.0 → slide2vec-5.0.1}/slide2vec.egg-info/top_level.txt +0 -0
  90. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_architecture_runtime_split.py +0 -0
  91. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_attention_extraction.py +0 -0
  92. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_dense_extraction.py +0 -0
  93. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_dense_regions.py +0 -0
  94. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_encoder_registry.py +0 -0
  95. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_hs2p_package_cutover.py +0 -0
  96. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_progress.py +0 -0
  97. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_runtime_batching.py +0 -0
  98. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_tile_store.py +0 -0
  99. {slide2vec-4.8.0 → slide2vec-5.0.1}/tests/test_tiling_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.8.0
3
+ Version: 5.0.1
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1
18
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
19
19
  Requires-Dist: omegaconf
20
20
  Requires-Dist: matplotlib
21
21
  Requires-Dist: numpy<2
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
65
65
  Requires-Dist: pandas; extra == "fm"
66
66
  Requires-Dist: pillow; extra == "fm"
67
67
  Requires-Dist: rich; extra == "fm"
68
- Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1; extra == "fm"
68
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0; extra == "fm"
69
69
  Requires-Dist: wandb; extra == "fm"
70
70
  Requires-Dist: torch<2.8,>=2.3; extra == "fm"
71
71
  Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -169,7 +169,7 @@ pipeline = Pipeline(
169
169
  preprocessing=PreprocessingConfig(
170
170
  requested_spacing_um=0.5,
171
171
  requested_tile_size_px=224,
172
- tissue_threshold=0.1,
172
+ masks={"min_coverage": {"tissue": 0.1}},
173
173
  ),
174
174
  execution=ExecutionOptions(output_dir="outputs/demo"),
175
175
  )
@@ -63,7 +63,7 @@ pipeline = Pipeline(
63
63
  preprocessing=PreprocessingConfig(
64
64
  requested_spacing_um=0.5,
65
65
  requested_tile_size_px=224,
66
- tissue_threshold=0.1,
66
+ masks={"min_coverage": {"tissue": 0.1}},
67
67
  ),
68
68
  execution=ExecutionOptions(output_dir="outputs/demo"),
69
69
  )
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "slide2vec"
7
- version = "4.8.0"
7
+ version = "5.0.1"
8
8
  description = "Embedding of whole slide images with Foundation Models"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Programming Language :: Python :: 3.13",
22
22
  ]
23
23
  dependencies = [
24
- "hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1",
24
+ "hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
25
25
  "omegaconf",
26
26
  "matplotlib",
27
27
  "numpy<2",
@@ -88,7 +88,7 @@ fm = [
88
88
  "pandas",
89
89
  "pillow",
90
90
  "rich",
91
- "hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1",
91
+ "hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0",
92
92
  "wandb",
93
93
  "torch>=2.3,<2.8",
94
94
  "torchvision>=0.18.0",
@@ -145,6 +145,9 @@ addopts = "--cov=slide2vec"
145
145
  testpaths = [
146
146
  "tests",
147
147
  ]
148
+ markers = [
149
+ "heavy: real-weight foundation-model inference on CPU; minutes per test. Excluded from the PR suite via `-m 'not heavy'`; run on the scheduled/manual heavy workflow (.github/workflows/nightly-heavy.yaml).",
150
+ ]
148
151
 
149
152
  [tool.mypy]
150
153
  mypy_path = "."
@@ -164,7 +167,7 @@ no_implicit_reexport = true
164
167
  max-line-length = 160
165
168
 
166
169
  [tool.bumpver]
167
- current_version = "4.8.0"
170
+ current_version = "5.0.1"
168
171
  version_pattern = "MAJOR.MINOR.PATCH"
169
172
  commit = false # We do version bumping in CI, not as a commit
170
173
  tag = false # Git tag already exists — we don't auto-tag
@@ -11,7 +11,7 @@ from slide2vec.api import (
11
11
  from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
12
12
 
13
13
 
14
- __version__ = "4.8.0"
14
+ __version__ = "5.0.1"
15
15
 
16
16
  __all__ = [
17
17
  "Model",
@@ -351,6 +351,11 @@ class EmbeddedSlide:
351
351
  image_path: Path
352
352
  #: Path to the tissue mask used for tiling, if any.
353
353
  mask_path: Path | None = None
354
+ #: Annotation class this bag of tiles was sampled for. ``"tissue"`` for the
355
+ #: default tissue-only path, ``"merged"`` for the union output mode, or the
356
+ #: class name (e.g. ``"tumor"``) when annotation-aware sampling fans a slide
357
+ #: out into one bag per class. See the annotation-aware sampling documentation.
358
+ annotation: str | None = None
354
359
  #: Number of tiles extracted from the slide.
355
360
  num_tiles: int | None = None
356
361
  #: Path to the mask preview image, if generated.
@@ -442,12 +447,13 @@ class Model:
442
447
  self,
443
448
  slide: SlideInput,
444
449
  *,
450
+ annotation: str | list[str] | None = None,
445
451
  preprocessing: PreprocessingConfig | None = None,
446
452
  execution: ExecutionOptions | None = None,
447
453
  sample_id: str | None = None,
448
454
  mask_path: PathLike | None = None,
449
455
  spacing_at_level_0: float | None = None,
450
- ) -> EmbeddedSlide:
456
+ ) -> EmbeddedSlide | list[EmbeddedSlide]:
451
457
  if isinstance(slide, (str, Path)):
452
458
  slide = {
453
459
  "sample_id": sample_id or Path(slide).stem,
@@ -459,31 +465,42 @@ class Model:
459
465
  raise ValueError(
460
466
  "sample_id, mask_path, and spacing_at_level_0 overrides are only supported when slide is a path-like input"
461
467
  )
462
- return self.embed_slides(
468
+ requested = None if isinstance(annotation, str) else annotation
469
+ grouped = self.embed_slides(
463
470
  [slide],
471
+ annotations=requested,
464
472
  preprocessing=preprocessing,
465
473
  execution=execution,
466
- )[0]
474
+ )
475
+ # Single slide in → at most one outer key out. Flatten to the inner
476
+ # {label: EmbeddedSlide} mapping (empty when the run produced nothing).
477
+ bags: dict[str, EmbeddedSlide] = {}
478
+ for inner in grouped.values():
479
+ bags = inner
480
+ break
481
+ return _select_embedded_bag(bags, annotation)
467
482
 
468
483
  def embed_slides(
469
484
  self,
470
485
  slides: SlideSequence,
471
486
  *,
487
+ annotations: list[str] | None = None,
472
488
  preprocessing: PreprocessingConfig | None = None,
473
489
  execution: ExecutionOptions | None = None,
474
- ) -> list[EmbeddedSlide]:
490
+ ) -> dict[str, dict[str, EmbeddedSlide]]:
475
491
  from slide2vec.inference import embed_slides
476
492
 
477
493
  resolved = _coerce_execution_options(execution, model=self)
478
494
  resolved_preprocessing = _resolve_direct_api_preprocessing(self, preprocessing)
479
495
  with _auto_progress_reporting(output_dir=resolved.output_dir):
480
496
  _validate_model_config(self, resolved_preprocessing, resolved)
481
- return embed_slides(
497
+ embedded = embed_slides(
482
498
  self,
483
499
  slides,
484
500
  preprocessing=resolved_preprocessing,
485
501
  execution=resolved,
486
502
  )
503
+ return _group_embedded_slides(embedded, annotations=annotations)
487
504
 
488
505
  def embed_patient(
489
506
  self,
@@ -650,6 +667,77 @@ class Pipeline:
650
667
  )
651
668
 
652
669
 
670
+ def _select_embedded_bag(
671
+ bags: Mapping[str, EmbeddedSlide],
672
+ annotation: str | list[str] | None,
673
+ ) -> EmbeddedSlide | list[EmbeddedSlide]:
674
+ """Select per-class bag(s) from a single slide's ``{label: EmbeddedSlide}`` map.
675
+
676
+ numpy-style shape-in/shape-out:
677
+
678
+ - a single class string returns one :class:`EmbeddedSlide`;
679
+ - a list of class strings returns a list in the requested order;
680
+ - ``None`` returns the single bag when the run produced exactly one,
681
+ otherwise raises naming the available bags and directing to
682
+ :meth:`Model.embed_slides`.
683
+
684
+ Requesting a class the run did not produce raises naming what is available.
685
+ """
686
+ available = sorted(bags)
687
+ if isinstance(annotation, str):
688
+ if annotation not in bags:
689
+ raise ValueError(
690
+ f"embed_slide() found no '{annotation}' annotation bag for this "
691
+ f"slide; available bags: {available}."
692
+ )
693
+ return bags[annotation]
694
+ if annotation is not None:
695
+ selected: list[EmbeddedSlide] = []
696
+ for label in annotation:
697
+ if label not in bags:
698
+ raise ValueError(
699
+ f"embed_slide() found no '{label}' annotation bag for this "
700
+ f"slide; available bags: {available}."
701
+ )
702
+ selected.append(bags[label])
703
+ return selected
704
+ if len(bags) == 1:
705
+ return next(iter(bags.values()))
706
+ raise ValueError(
707
+ f"embed_slide() received {len(bags)} annotation bags for this slide "
708
+ f"({available}); annotation-aware sampling produces one bag per class. "
709
+ "Pass annotation=... to select a class, or use Model.embed_slides(...) "
710
+ "to receive every per-class EmbeddedSlide (each carries its .annotation)."
711
+ )
712
+
713
+
714
+ def _group_embedded_slides(
715
+ embedded: Sequence[EmbeddedSlide],
716
+ *,
717
+ annotations: list[str] | None = None,
718
+ ) -> dict[str, dict[str, EmbeddedSlide]]:
719
+ """Group flat per-row :class:`EmbeddedSlide` results into a nested mapping.
720
+
721
+ The outer key is ``sample_id``; the inner key is the bag's informative
722
+ annotation label (``"tissue"``/``"merged"``/class name), never ``None``.
723
+ A bag whose ``.annotation`` is ``None`` (defensive — post-#173 real runs
724
+ always carry a label) does not produce a ``None`` key.
725
+
726
+ When *annotations* is given, the inner keys are restricted to the named
727
+ classes (in encounter order).
728
+ """
729
+ requested = None if annotations is None else set(annotations)
730
+ grouped: dict[str, dict[str, EmbeddedSlide]] = {}
731
+ for bag in embedded:
732
+ label = bag.annotation
733
+ if label is None:
734
+ continue
735
+ if requested is not None and label not in requested:
736
+ continue
737
+ grouped.setdefault(bag.sample_id, {})[label] = bag
738
+ return grouped
739
+
740
+
653
741
  def _coerce_execution_options(
654
742
  options: ExecutionOptions | None,
655
743
  *,
@@ -54,6 +54,13 @@ class _HibouBase(TileEncoder):
54
54
  v2.Normalize(mean=_HIBOU_MEAN, std=_HIBOU_STD),
55
55
  ])
56
56
 
57
+ @property
58
+ def _num_prefix_tokens(self) -> int:
59
+ # CLS + register tokens. Dinov2-with-registers carries the register tokens
60
+ # between the CLS and patch tokens, so both the dense and attention paths
61
+ # must strip them; deriving the count from config keeps the two in sync.
62
+ return 1 + int(getattr(self._model.config, "num_register_tokens", 0))
63
+
57
64
  def encode_tiles(self, batch: Tensor) -> Tensor:
58
65
  output = self._model(pixel_values=batch)
59
66
  return output.pooler_output
@@ -77,7 +84,7 @@ class _HibouBase(TileEncoder):
77
84
  output.last_hidden_state,
78
85
  grid_h=height // patch,
79
86
  grid_w=width // patch,
80
- num_prefix_tokens=1 + int(getattr(self._model.config, "num_register_tokens", 0)),
87
+ num_prefix_tokens=self._num_prefix_tokens,
81
88
  encoder_name=type(self).__name__,
82
89
  )
83
90
 
@@ -111,7 +118,7 @@ class _HibouBase(TileEncoder):
111
118
  output = self._model(pixel_values=batch, output_attentions=True)
112
119
  return attentions_tuple_to_grids(
113
120
  output.attentions,
114
- num_prefix_tokens=1 + int(getattr(self._model.config, "num_register_tokens", 0)),
121
+ num_prefix_tokens=self._num_prefix_tokens,
115
122
  blocks=blocks,
116
123
  include_registers=include_registers,
117
124
  grid_h=height // patch,
@@ -36,6 +36,18 @@ class Midnight(TileEncoder):
36
36
  self._model = AutoModel.from_pretrained("kaiko-ai/midnight").eval()
37
37
  self._device = preferred_default_device()
38
38
  self._output_variant = resolve_requested_output_variant(output_variant)
39
+ # The pooled, dense, and attention paths all assume a single CLS prefix
40
+ # token (kaiko's reference recipe pools over output[:, 1:]). If a future
41
+ # checkpoint adds register tokens, that assumption silently folds them into
42
+ # the patch mean and mislabels the dense/attention grids — fail loudly here.
43
+ num_register_tokens = int(getattr(self._model.config, "num_register_tokens", 0))
44
+ if num_register_tokens:
45
+ raise ValueError(
46
+ "Midnight encoder assumes a single CLS prefix token, but the loaded "
47
+ f"checkpoint reports num_register_tokens={num_register_tokens}. Update "
48
+ "the pooled/dense/attention paths to strip the register tokens before "
49
+ "using this checkpoint."
50
+ )
39
51
 
40
52
  def get_transform(self) -> Callable:
41
53
  return v2.Compose([
@@ -16,8 +16,6 @@ _VIRCHOW_OUTPUT_DIMS = {
16
16
  class _VirchowBase(TimmTileEncoder):
17
17
  """Base for Virchow models that concat CLS + mean-pooled patch tokens."""
18
18
 
19
- _num_prefix_tokens: int = 1 # Override in subclass if needed
20
-
21
19
  def __init__(self, model_name: str, *, output_variant: str | None = None):
22
20
  self._output_variant = resolve_requested_output_variant(
23
21
  output_variant,
@@ -36,7 +34,7 @@ class _VirchowBase(TimmTileEncoder):
36
34
  cls_token = output[:, 0]
37
35
  if self._output_variant == "cls":
38
36
  return cls_token
39
- patch_tokens = output[:, self._num_prefix_tokens:]
37
+ patch_tokens = output[:, self._model.num_prefix_tokens:]
40
38
  return torch.cat([cls_token, patch_tokens.mean(dim=1)], dim=-1)
41
39
 
42
40
  @property
@@ -57,8 +55,6 @@ class _VirchowBase(TimmTileEncoder):
57
55
  source="paige-ai/Virchow",
58
56
  )
59
57
  class Virchow(_VirchowBase):
60
- _num_prefix_tokens = 1
61
-
62
58
  def __init__(self, *, output_variant: str | None = None):
63
59
  super().__init__("hf-hub:paige-ai/Virchow", output_variant=output_variant)
64
60
 
@@ -71,12 +67,10 @@ class Virchow(_VirchowBase):
71
67
  },
72
68
  default_output_variant="cls_patch_mean",
73
69
  input_size=224,
74
- supported_spacing_um=[0.5, 1.0, 2.0],
70
+ supported_spacing_um=[0.25, 0.5, 1.0, 2.0],
75
71
  precision="fp16",
76
72
  source="paige-ai/Virchow2",
77
73
  )
78
74
  class Virchow2(_VirchowBase):
79
- _num_prefix_tokens = 5 # 1 CLS + 4 register tokens
80
-
81
75
  def __init__(self, *, output_variant: str | None = None):
82
76
  super().__init__("hf-hub:paige-ai/Virchow2", output_variant=output_variant)
@@ -101,15 +101,14 @@ def _normalized_row_annotation(annotation) -> str | None:
101
101
  """Collapse a process-list ``annotation`` cell to the per-class key (``None`` for the flat path).
102
102
 
103
103
  Mirrors the in-memory single-GPU path: ``None``/NaN and hs2p's flat-layout sentinels
104
- (:func:`hs2p.fileops.is_flattened_annotation`, e.g. ``"tissue"``) land flat, and the merged
105
- output-mode label ``"merged"`` is collapsed to ``None`` exactly as
106
- :func:`slide2vec.utils.tiling_io.load_tiling_result_from_row` does so the distributed reconcile
107
- keys those rows to the flat embedding path with no per-class subdir.
104
+ (:func:`hs2p.fileops.is_flattened_annotation` the single source of truth, which flattens
105
+ ``None``/``"tissue"``/``"merged"``) land flat so the distributed reconcile keys those rows
106
+ to the flat embedding path with no per-class subdir.
108
107
  """
109
108
  if annotation is None or (isinstance(annotation, float) and pd.isna(annotation)):
110
109
  return None
111
110
  annotation = str(annotation)
112
- if annotation == "merged" or is_flattened_annotation(annotation):
111
+ if is_flattened_annotation(annotation):
113
112
  return None
114
113
  return annotation
115
114
 
@@ -32,15 +32,15 @@ def normalize_work_unit_annotation(annotation: str | None) -> str | None:
32
32
  """Collapse flat-layout annotations to ``None`` so flat units key by bare ``sample_id``.
33
33
 
34
34
  Mirrors the in-memory single-GPU path and the distributed reconcile
35
- (:func:`slide2vec.runtime.artifacts_collect._normalized_row_annotation`): ``None``, hs2p's
36
- flat-layout sentinels (:func:`hs2p.fileops.is_flattened_annotation`, e.g. ``"tissue"``), and the
37
- merged output-mode label ``"merged"`` all collapse to ``None``. Only genuine per-class
35
+ (:func:`slide2vec.runtime.artifacts_collect._normalized_row_annotation`): hs2p's flat-layout
36
+ sentinels (:func:`hs2p.fileops.is_flattened_annotation`, the single source of truth — it
37
+ flattens ``None``/``"tissue"``/``"merged"``) all collapse to ``None``. Only genuine per-class
38
38
  annotations survive as a composite key.
39
39
  """
40
40
  if annotation is None:
41
41
  return None
42
42
  annotation = str(annotation)
43
- if annotation == "merged" or is_flattened_annotation(annotation):
43
+ if is_flattened_annotation(annotation):
44
44
  return None
45
45
  return annotation
46
46
 
@@ -56,6 +56,7 @@ def make_embedded_slide(
56
56
  tile_size_lv0=int(tiling_result.tile_size_lv0),
57
57
  image_path=slide.image_path,
58
58
  mask_path=slide.mask_path,
59
+ annotation=tiling_result_annotation(tiling_result),
59
60
  num_tiles=int(n_tiles) if n_tiles is not None else len(x_values),
60
61
  mask_preview_path=Path(mask_preview_path) if mask_preview_path is not None else None,
61
62
  tiling_preview_path=Path(tiling_preview_path) if tiling_preview_path is not None else None,
@@ -265,14 +265,15 @@ def _normalized_annotation(annotation: Any) -> str | None:
265
265
 
266
266
  Keying the per-class feature-path map on this normalized value lets the flat tissue-only
267
267
  path and a real class share one matching rule without the sentinel leaking into lookups.
268
- ``"merged"`` (hs2p's merged output-mode label) carries no class and is collapsed to ``None``
269
- here, matching :func:`slide2vec.utils.tiling_io.load_tiling_result_from_row`, so its
270
- process-list row resolves to the flat embedding path rather than being left unmatched.
268
+ Flattening is decided solely by :func:`hs2p.fileops.is_flattened_annotation` (the single
269
+ source of truth), which flattens ``None``/``"tissue"``/``"merged"`` to the flat root, so
270
+ ``"merged"`` (hs2p's merged output-mode label, which carries no class) resolves to the flat
271
+ embedding path rather than being left unmatched.
271
272
  """
272
273
  if annotation is None or (isinstance(annotation, float) and pd.isna(annotation)):
273
274
  return None
274
275
  annotation = str(annotation)
275
- if annotation == "merged" or is_flattened_annotation(annotation):
276
+ if is_flattened_annotation(annotation):
276
277
  return None
277
278
  return annotation
278
279
 
@@ -46,9 +46,10 @@ def build_hs2p_configs(
46
46
  if is_hierarchical_preprocessing(preprocessing)
47
47
  else preprocessing.requested_tile_size_px
48
48
  )
49
- # Reuse hs2p's tiling-config resolver so the derived tissue_threshold comes from
50
- # masks.min_coverage.tissue (the single source of truth) and independent_sampling
51
- # is threaded consistently. The resolver reads attributes, so wrap the masks dict.
49
+ # Reuse hs2p's tiling-config resolver so the resolved min_coverage map comes from
50
+ # masks.min_coverage (the single source of truth; min_coverage["tissue"] is the tissue
51
+ # threshold) and independent_sampling is threaded consistently. The resolver reads
52
+ # attributes, so wrap the masks dict.
52
53
  tiling_adapter = SimpleNamespace(
53
54
  tiling=SimpleNamespace(
54
55
  masks=SimpleNamespace(**dict(preprocessing.masks)),
@@ -244,11 +244,10 @@ def load_tiling_result_from_row(row):
244
244
  annotation = annotation if annotation is None else str(annotation)
245
245
  # The merged output mode (hs2p's CoordinateOutputMode.MERGED) emits a single per-slide
246
246
  # coordinate set over the union of tiles passing any active class threshold. hs2p labels
247
- # that process-list row "merged" so it is not mistaken for plain tissue, but it carries no
248
- # class collapse it to None here so the flatten rule (is_flattened_annotation) lands its
249
- # artifacts at the flat output root, with no per-class subdir.
250
- if annotation == "merged":
251
- annotation = None
247
+ # that process-list row "merged" so it is not mistaken for plain tissue. The informative
248
+ # label is preserved verbatim here artifact placement is decided downstream solely by
249
+ # hs2p.fileops.is_flattened_annotation (which flattens None/"tissue"/"merged" to the flat
250
+ # output root), so "merged" still lands flat without erasing its self-describing label.
252
251
  setattr(tiling_result, "annotation", annotation)
253
252
  setattr(tiling_result, "tiles_tar_path", _optional_path(row.get("tiles_tar_path")))
254
253
  setattr(tiling_result, "mask_preview_path", _optional_path(row.get("mask_preview_path")))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.8.0
3
+ Version: 5.0.1
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1
18
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
19
19
  Requires-Dist: omegaconf
20
20
  Requires-Dist: matplotlib
21
21
  Requires-Dist: numpy<2
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
65
65
  Requires-Dist: pandas; extra == "fm"
66
66
  Requires-Dist: pillow; extra == "fm"
67
67
  Requires-Dist: rich; extra == "fm"
68
- Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1; extra == "fm"
68
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0; extra == "fm"
69
69
  Requires-Dist: wandb; extra == "fm"
70
70
  Requires-Dist: torch<2.8,>=2.3; extra == "fm"
71
71
  Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -169,7 +169,7 @@ pipeline = Pipeline(
169
169
  preprocessing=PreprocessingConfig(
170
170
  requested_spacing_um=0.5,
171
171
  requested_tile_size_px=224,
172
- tissue_threshold=0.1,
172
+ masks={"min_coverage": {"tissue": 0.1}},
173
173
  ),
174
174
  execution=ExecutionOptions(output_dir="outputs/demo"),
175
175
  )
@@ -83,7 +83,6 @@ slide2vec/utils/utils.py
83
83
  tests/test_architecture_runtime_split.py
84
84
  tests/test_attention_extraction.py
85
85
  tests/test_dense_extraction.py
86
- tests/test_dense_locality_gated.py
87
86
  tests/test_dense_regions.py
88
87
  tests/test_encoder_registry.py
89
88
  tests/test_hs2p_package_cutover.py
@@ -1,4 +1,4 @@
1
- hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1
1
+ hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
2
2
  omegaconf
3
3
  matplotlib
4
4
  numpy<2
@@ -27,7 +27,7 @@ numpy<2
27
27
  pandas
28
28
  pillow
29
29
  rich
30
- hs2p[asap,cucim,openslide,sam2,vips]>=4.1.1
30
+ hs2p[asap,cucim,openslide,sam2,vips]>=4.2.0
31
31
  wandb
32
32
  torch<2.8,>=2.3
33
33
  torchvision>=0.18.0
@@ -101,6 +101,7 @@ def mask_path() -> Path:
101
101
  return p
102
102
 
103
103
 
104
+ @pytest.mark.heavy
104
105
  @pytest.mark.skipif(
105
106
  not os.environ.get("HF_TOKEN"),
106
107
  reason="HF_TOKEN required for model weight download",
@@ -700,7 +700,7 @@ def test_masks_min_coverage_tissue_drives_derived_tiling_threshold():
700
700
 
701
701
  tiling_cfg = build_hs2p_configs(preprocessing)[0]
702
702
 
703
- assert tiling_cfg.tissue_threshold == pytest.approx(0.37)
703
+ assert tiling_cfg.min_coverage["tissue"] == pytest.approx(0.37)
704
704
  assert tiling_cfg.independent_sampling is False
705
705
 
706
706
 
@@ -842,9 +842,10 @@ def test_independent_sampling_toggle_selects_selection_strategy():
842
842
  assert joint[-2] == "joint_sampling"
843
843
 
844
844
 
845
- def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
846
- """A merged tiling row is labelled ``merged`` by hs2p, but carries no class — it must
847
- collapse to the flat output root (no per-class subdir), exactly like tissue/None."""
845
+ def test_merged_annotation_label_survives_round_trip_to_flat_root(tmp_path: Path):
846
+ """A merged tiling row is labelled ``merged`` by hs2p. The informative label must
847
+ survive the round-trip (no collapse to ``None``), yet artifacts still land at the flat
848
+ output root because hs2p's ``is_flattened_annotation`` flattens ``"merged"``."""
848
849
  from slide2vec.utils.tiling_io import load_tiling_result_from_row
849
850
 
850
851
  coordinates_meta_path = tmp_path / "slide-a.coordinates.meta.json"
@@ -868,8 +869,8 @@ def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
868
869
  finally:
869
870
  tiling_io.load_tiling_result = original
870
871
 
871
- # Merged carries no class label; the flatten rule sends it to the flat root.
872
- assert result.annotation is None
872
+ # The informative label survives the round-trip; it is not blanked to None.
873
+ assert result.annotation == "merged"
873
874
  artifact = write_tile_embeddings(
874
875
  "slide-a",
875
876
  np.arange(8, dtype=np.float32).reshape(2, 4),
@@ -877,9 +878,84 @@ def test_merged_annotation_label_collapses_to_flat_root(tmp_path: Path):
877
878
  output_format="npz",
878
879
  annotation=result.annotation,
879
880
  )
881
+ # ...but placement is decided by is_flattened_annotation, so it still lands flat.
880
882
  assert artifact.path == tmp_path / "tile_embeddings" / "slide-a.npz"
881
883
 
882
884
 
885
+ def test_tissue_annotation_survives_round_trip_to_flat_root(tmp_path: Path):
886
+ """A ``"tissue"`` row keeps its informative label through the round-trip while still
887
+ resolving to flat-root placement via ``is_flattened_annotation``."""
888
+ from slide2vec.utils.tiling_io import load_tiling_result_from_row
889
+
890
+ coordinates_meta_path = tmp_path / "slide-t.coordinates.meta.json"
891
+ coordinates_meta_path.write_text("{}", encoding="utf-8")
892
+
893
+ def fake_load_tiling_result(**kwargs):
894
+ return SimpleNamespace()
895
+
896
+ import slide2vec.utils.tiling_io as tiling_io
897
+
898
+ original = tiling_io.load_tiling_result
899
+ tiling_io.load_tiling_result = fake_load_tiling_result
900
+ try:
901
+ result = load_tiling_result_from_row(
902
+ {
903
+ "annotation": "tissue",
904
+ "coordinates_npz_path": str(tmp_path / "slide-t.coordinates.npz"),
905
+ "coordinates_meta_path": str(coordinates_meta_path),
906
+ }
907
+ )
908
+ finally:
909
+ tiling_io.load_tiling_result = original
910
+
911
+ assert result.annotation == "tissue"
912
+ artifact = write_tile_embeddings(
913
+ "slide-t",
914
+ np.arange(8, dtype=np.float32).reshape(2, 4),
915
+ output_dir=tmp_path,
916
+ output_format="npz",
917
+ annotation=result.annotation,
918
+ )
919
+ assert artifact.path == tmp_path / "tile_embeddings" / "slide-t.npz"
920
+
921
+
922
+ def test_real_class_annotation_survives_round_trip_to_per_class_subdir(tmp_path: Path):
923
+ """A genuine class label (e.g. ``"tumor"``) survives the round-trip and routes to its
924
+ own per-class subdir, since ``is_flattened_annotation`` does not flatten it."""
925
+ from slide2vec.utils.tiling_io import load_tiling_result_from_row
926
+
927
+ coordinates_meta_path = tmp_path / "slide-u.coordinates.meta.json"
928
+ coordinates_meta_path.write_text("{}", encoding="utf-8")
929
+
930
+ def fake_load_tiling_result(**kwargs):
931
+ return SimpleNamespace()
932
+
933
+ import slide2vec.utils.tiling_io as tiling_io
934
+
935
+ original = tiling_io.load_tiling_result
936
+ tiling_io.load_tiling_result = fake_load_tiling_result
937
+ try:
938
+ result = load_tiling_result_from_row(
939
+ {
940
+ "annotation": "tumor",
941
+ "coordinates_npz_path": str(tmp_path / "slide-u.coordinates.npz"),
942
+ "coordinates_meta_path": str(coordinates_meta_path),
943
+ }
944
+ )
945
+ finally:
946
+ tiling_io.load_tiling_result = original
947
+
948
+ assert result.annotation == "tumor"
949
+ artifact = write_tile_embeddings(
950
+ "slide-u",
951
+ np.arange(8, dtype=np.float32).reshape(2, 4),
952
+ output_dir=tmp_path,
953
+ output_format="npz",
954
+ annotation=result.annotation,
955
+ )
956
+ assert artifact.path == tmp_path / "tile_embeddings" / "tumor" / "slide-u.npz"
957
+
958
+
883
959
  def test_invalid_masks_block_with_duplicate_pixel_values_fails_fast():
884
960
  from slide2vec.runtime.tiling import build_hs2p_configs
885
961