slide2vec 4.0.1__tar.gz → 4.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {slide2vec-4.0.1 → slide2vec-4.0.3}/PKG-INFO +3 -3
  2. {slide2vec-4.0.1 → slide2vec-4.0.3}/pyproject.toml +4 -4
  3. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/__init__.py +1 -1
  4. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/api.py +25 -12
  5. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/configs/default.yaml +2 -2
  6. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/data/tile_reader.py +48 -31
  7. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/inference.py +208 -67
  8. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/utils/tiling_io.py +21 -4
  9. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/utils/utils.py +26 -2
  10. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec.egg-info/PKG-INFO +3 -3
  11. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec.egg-info/requires.txt +2 -2
  12. slide2vec-4.0.3/tests/test_batch_collator_timing.py +161 -0
  13. {slide2vec-4.0.1 → slide2vec-4.0.3}/tests/test_hs2p_package_cutover.py +17 -13
  14. {slide2vec-4.0.1 → slide2vec-4.0.3}/tests/test_output_consistency.py +1 -2
  15. {slide2vec-4.0.1 → slide2vec-4.0.3}/tests/test_regression_core.py +88 -0
  16. {slide2vec-4.0.1 → slide2vec-4.0.3}/tests/test_regression_inference.py +351 -21
  17. slide2vec-4.0.1/tests/test_batch_collator_timing.py +0 -73
  18. {slide2vec-4.0.1 → slide2vec-4.0.3}/LICENSE +0 -0
  19. {slide2vec-4.0.1 → slide2vec-4.0.3}/README.md +0 -0
  20. {slide2vec-4.0.1 → slide2vec-4.0.3}/setup.cfg +0 -0
  21. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/__main__.py +0 -0
  22. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/artifacts.py +0 -0
  23. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/cli.py +0 -0
  24. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/configs/__init__.py +0 -0
  25. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/data/__init__.py +0 -0
  26. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/data/dataset.py +0 -0
  27. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/data/tile_store.py +0 -0
  28. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/distributed/__init__.py +0 -0
  29. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/distributed/direct_embed_worker.py +0 -0
  30. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/distributed/pipeline_worker.py +0 -0
  31. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/__init__.py +0 -0
  32. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/base.py +0 -0
  33. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/__init__.py +0 -0
  34. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/conch.py +0 -0
  35. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/gigapath.py +0 -0
  36. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/hibou.py +0 -0
  37. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/hoptimus.py +0 -0
  38. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/midnight.py +0 -0
  39. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/musk.py +0 -0
  40. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/phikon.py +0 -0
  41. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/prism.py +0 -0
  42. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/prost40m.py +0 -0
  43. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/titan.py +0 -0
  44. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/uni.py +0 -0
  45. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/models/virchow.py +0 -0
  46. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/registry.py +0 -0
  47. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/encoders/validation.py +0 -0
  48. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/main.py +0 -0
  49. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/model_settings.py +0 -0
  50. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/progress.py +0 -0
  51. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/registry.py +0 -0
  52. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/resources.py +0 -0
  53. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/runtime_types.py +0 -0
  54. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/utils/__init__.py +0 -0
  55. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/utils/config.py +0 -0
  56. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/utils/coordinates.py +0 -0
  57. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec/utils/log_utils.py +0 -0
  58. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec.egg-info/SOURCES.txt +0 -0
  59. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec.egg-info/dependency_links.txt +0 -0
  60. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec.egg-info/entry_points.txt +0 -0
  61. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec.egg-info/not-zip-safe +0 -0
  62. {slide2vec-4.0.1 → slide2vec-4.0.3}/slide2vec.egg-info/top_level.txt +0 -0
  63. {slide2vec-4.0.1 → slide2vec-4.0.3}/tests/test_encoder_registry.py +0 -0
  64. {slide2vec-4.0.1 → slide2vec-4.0.3}/tests/test_packaging_metadata.py +0 -0
  65. {slide2vec-4.0.1 → slide2vec-4.0.3}/tests/test_progress.py +0 -0
  66. {slide2vec-4.0.1 → slide2vec-4.0.3}/tests/test_regression_models.py +0 -0
  67. {slide2vec-4.0.1 → slide2vec-4.0.3}/tests/test_tile_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.0.1
3
+ Version: 4.0.3
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.3
18
+ Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.4
19
19
  Requires-Dist: omegaconf
20
20
  Requires-Dist: matplotlib
21
21
  Requires-Dist: numpy<2
@@ -63,7 +63,7 @@ Requires-Dist: numpy<2; extra == "fm"
63
63
  Requires-Dist: pandas; extra == "fm"
64
64
  Requires-Dist: pillow; extra == "fm"
65
65
  Requires-Dist: rich; extra == "fm"
66
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.3; extra == "fm"
66
+ Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.4; extra == "fm"
67
67
  Requires-Dist: wandb; extra == "fm"
68
68
  Requires-Dist: torch<2.8,>=2.3; extra == "fm"
69
69
  Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "slide2vec"
7
- version = "4.0.1"
7
+ version = "4.0.3"
8
8
  description = "Embedding of whole slide images with Foundation Models"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Programming Language :: Python :: 3.13",
22
22
  ]
23
23
  dependencies = [
24
- "hs2p[asap,cucim,openslide,vips]>=3.1.3",
24
+ "hs2p[asap,cucim,openslide,vips]>=3.1.4",
25
25
  "omegaconf",
26
26
  "matplotlib",
27
27
  "numpy<2",
@@ -85,7 +85,7 @@ fm = [
85
85
  "pandas",
86
86
  "pillow",
87
87
  "rich",
88
- "hs2p[asap,cucim,openslide,vips]>=3.1.3",
88
+ "hs2p[asap,cucim,openslide,vips]>=3.1.4",
89
89
  "wandb",
90
90
  "torch>=2.3,<2.8",
91
91
  "torchvision>=0.18.0",
@@ -154,7 +154,7 @@ no_implicit_reexport = true
154
154
  max-line-length = 160
155
155
 
156
156
  [tool.bumpver]
157
- current_version = "4.0.1"
157
+ current_version = "4.0.3"
158
158
  version_pattern = "MAJOR.MINOR.PATCH"
159
159
  commit = false # We do version bumping in CI, not as a commit
160
160
  tag = false # Git tag already exists — we don't auto-tag
@@ -2,7 +2,7 @@ from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, Prep
2
2
  from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
3
3
 
4
4
 
5
- __version__ = "4.0.1"
5
+ __version__ = "4.0.3"
6
6
 
7
7
  __all__ = [
8
8
  "Model",
@@ -22,7 +22,7 @@ from slide2vec.encoders.validation import validate_encoder_config
22
22
  from slide2vec.model_settings import canonicalize_model_name, normalize_precision_name
23
23
  from slide2vec.progress import emit_progress
24
24
  from slide2vec.runtime_types import LoadedModel
25
- from slide2vec.utils.utils import slurm_cpu_limit
25
+ from slide2vec.utils.utils import cpu_worker_limit, slurm_cpu_limit
26
26
 
27
27
  PathLike = str | Path
28
28
 
@@ -120,8 +120,8 @@ class ExecutionOptions:
120
120
  output_dir: Path | None = None
121
121
  output_format: str = "pt"
122
122
  batch_size: int = 1
123
- num_workers: int = 0
124
- num_preprocessing_workers: int = 8
123
+ num_workers: int | None = None
124
+ num_preprocessing_workers: int | None = None
125
125
  num_gpus: int | None = None
126
126
  precision: str | None = None
127
127
  prefetch_factor: int = 4
@@ -140,8 +140,12 @@ class ExecutionOptions:
140
140
  output_dir=Path(cfg.output_dir),
141
141
  output_format="pt",
142
142
  batch_size=int(cfg.model.batch_size),
143
- num_workers=int(num_workers),
144
- num_preprocessing_workers=int(cfg.speed.num_preprocessing_workers),
143
+ num_workers=int(num_workers) if num_workers is not None else None,
144
+ num_preprocessing_workers=(
145
+ int(cfg.speed.num_preprocessing_workers)
146
+ if cfg.speed.num_preprocessing_workers is not None
147
+ else None
148
+ ),
145
149
  num_gpus=1 if run_on_cpu else (int(configured_num_gpus) if configured_num_gpus is not None else None),
146
150
  precision="fp32" if run_on_cpu else requested_precision,
147
151
  prefetch_factor=prefetch_factor,
@@ -158,24 +162,33 @@ class ExecutionOptions:
158
162
  raise ValueError("ExecutionOptions.num_gpus must be at least 1")
159
163
  if self.prefetch_factor < 1:
160
164
  raise ValueError("ExecutionOptions.prefetch_factor must be at least 1")
165
+ cap = cpu_worker_limit()
161
166
  cpu_count = os.cpu_count() or 1
162
167
  slurm_limit = slurm_cpu_limit()
163
- cap = min(cpu_count, slurm_limit) if slurm_limit is not None else cpu_count
164
- capped_num_workers = min(self.num_workers, cap)
165
- capped_num_preprocessing_workers = min(self.num_preprocessing_workers, cap)
166
- object.__setattr__(self, "num_workers", capped_num_workers)
168
+ capped_num_preprocessing_workers = (
169
+ cap if self.num_preprocessing_workers is None else min(self.num_preprocessing_workers, cap)
170
+ )
167
171
  object.__setattr__(self, "num_preprocessing_workers", capped_num_preprocessing_workers)
168
172
  logger = logging.getLogger(__name__)
169
173
  cap_source = f"slurm_cpu_limit={slurm_limit}" if slurm_limit is not None else f"cpu_count={cpu_count}"
174
+ resolved_num_workers = self.resolved_num_workers()
175
+ num_workers_label = (
176
+ f"{resolved_num_workers} (requested=auto)"
177
+ if self.num_workers is None
178
+ else str(resolved_num_workers)
179
+ )
170
180
  logger.info(
171
- "ExecutionOptions: num_workers=%d, num_preprocessing_workers=%d "
172
- "(cap=%d via %s)",
173
- capped_num_workers,
181
+ "ExecutionOptions: num_workers=%s, num_preprocessing_workers=%d "
182
+ "(preprocessing cap=%d via %s)",
183
+ num_workers_label,
174
184
  capped_num_preprocessing_workers,
175
185
  cap,
176
186
  cap_source,
177
187
  )
178
188
 
189
+ def resolved_num_workers(self) -> int:
190
+ return cpu_worker_limit() if self.num_workers is None else int(self.num_workers)
191
+
179
192
  def with_output_dir(self, output_dir: PathLike | None) -> "ExecutionOptions":
180
193
  if output_dir is None:
181
194
  return self
@@ -66,9 +66,9 @@ tiling:
66
66
 
67
67
  speed:
68
68
  precision: # model inference precision ["fp32", "fp16", "bf16"]; if not set, determined automatically based on model recommendations
69
- num_dataloader_workers: 8 # number of DataLoader worker processes for reading tiles during embedding (tar path); on-the-fly path derives this automatically from cpu_count // speed.num_cucim_workers
69
+ num_dataloader_workers: # number of DataLoader worker processes for reading tiles during embedding; defaults to auto (job CPU budget, except cuCIM on-the-fly uses cpu_budget // speed.num_cucim_workers)
70
70
  num_gpus: # number of GPUs to use for feature extraction; defaults to all available GPUs
71
- num_preprocessing_workers: 8 # number of workers for hs2p tiling (WSI reading, JPEG encoding, tar writing)
71
+ num_preprocessing_workers: # number of workers for hs2p tiling (WSI reading, JPEG encoding, tar writing); defaults to the runtime CPU budget capped at 64
72
72
  num_cucim_workers: 4 # number of internal cucim threads per read_region call (embedding path, on-the-fly only); DataLoader workers are auto-set to cpu_count // num_cucim_workers
73
73
  prefetch_factor_embedding: 4 # prefetch factor for tile embedding dataloaders
74
74
  persistent_workers_embedding: true # keep DataLoader workers alive across epochs/batches
@@ -1,4 +1,5 @@
1
1
  from collections import defaultdict
2
+ from contextlib import nullcontext
2
3
  import time
3
4
  from pathlib import Path
4
5
 
@@ -6,7 +7,9 @@ import numpy as np
6
7
  import torch
7
8
 
8
9
  from hs2p import TilingResult
10
+ from hs2p.utils.stderr import run_with_filtered_stderr
9
11
  from hs2p.wsi.streaming.plans import build_supertile_index
12
+ from slide2vec.utils.log_utils import suppress_c_stderr
10
13
 
11
14
 
12
15
  class SuperTileBatchSampler:
@@ -139,15 +142,17 @@ class WSITileReader:
139
142
  torch.empty((0, 3, ts, ts), dtype=torch.uint8),
140
143
  {"reader_open_ms": 0.0, "reader_read_ms": 0.0},
141
144
  )
142
- was_closed = self._reader is None
143
- open_start = time.perf_counter()
144
- self._ensure_open()
145
- reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
146
- read_start = time.perf_counter()
147
- if self._use_supertiles:
148
- tensor = self._read_batch_supertiles(tile_indices)
149
- else:
150
- tensor = self._read_batch_simple(tile_indices)
145
+ stderr_context = suppress_c_stderr() if self._backend == "cucim" else nullcontext()
146
+ with stderr_context:
147
+ was_closed = self._reader is None
148
+ open_start = time.perf_counter()
149
+ self._ensure_open()
150
+ reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
151
+ read_start = time.perf_counter()
152
+ if self._use_supertiles:
153
+ tensor = self._read_batch_supertiles(tile_indices)
154
+ else:
155
+ tensor = self._read_batch_simple(tile_indices)
151
156
  reader_read_ms = (time.perf_counter() - read_start) * 1000.0
152
157
  return tensor, {"reader_open_ms": reader_open_ms, "reader_read_ms": reader_read_ms}
153
158
 
@@ -260,11 +265,16 @@ class OnTheFlyBatchTileCollator:
260
265
  torch.empty((0, 3, self.tile_size, self.tile_size), dtype=torch.uint8),
261
266
  {"worker_batch_ms": 0.0, "reader_open_ms": 0.0, "reader_read_ms": 0.0},
262
267
  )
263
- worker_start = time.perf_counter()
264
- tile_indices = np.asarray(batch_indices, dtype=np.int64)
265
- tensor, timing = self._reader.read_batch_with_timing(tile_indices)
266
- timing["worker_batch_ms"] = (time.perf_counter() - worker_start) * 1000.0
267
- return torch.as_tensor(tile_indices, dtype=torch.long), tensor, timing
268
+ def _run_batch():
269
+ worker_start = time.perf_counter()
270
+ tile_indices = np.asarray(batch_indices, dtype=np.int64)
271
+ tensor, timing = self._reader.read_batch_with_timing(tile_indices)
272
+ timing["worker_batch_ms"] = (time.perf_counter() - worker_start) * 1000.0
273
+ return torch.as_tensor(tile_indices, dtype=torch.long), tensor, timing
274
+
275
+ if getattr(self._reader, "_backend", None) == "cucim":
276
+ return run_with_filtered_stderr(_run_batch)
277
+ return _run_batch()
268
278
 
269
279
 
270
280
  class WSIRegionReader:
@@ -320,12 +330,14 @@ class WSIRegionReader:
320
330
  torch.empty((0, 3, self._region_size_px, self._region_size_px), dtype=torch.uint8),
321
331
  {"reader_open_ms": 0.0, "reader_read_ms": 0.0},
322
332
  )
323
- was_closed = self._reader is None
324
- open_start = time.perf_counter()
325
- self._ensure_open()
326
- reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
327
- read_start = time.perf_counter()
328
- regions = self._read_regions_batch(locations)
333
+ stderr_context = suppress_c_stderr() if self._backend == "cucim" else nullcontext()
334
+ with stderr_context:
335
+ was_closed = self._reader is None
336
+ open_start = time.perf_counter()
337
+ self._ensure_open()
338
+ reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
339
+ read_start = time.perf_counter()
340
+ regions = self._read_regions_batch(locations)
329
341
  reader_read_ms = (time.perf_counter() - read_start) * 1000.0
330
342
  batch = np.stack([np.asarray(region)[:, :, :3] for region in regions], axis=0)
331
343
  tensor = torch.from_numpy(batch).permute(0, 3, 1, 2).contiguous()
@@ -385,17 +397,22 @@ class OnTheFlyHierarchicalBatchCollator:
385
397
  torch.empty((0, 3, self._tile_size, self._tile_size), dtype=torch.uint8),
386
398
  {"worker_batch_ms": 0.0, "reader_open_ms": 0.0, "reader_read_ms": 0.0},
387
399
  )
388
- worker_start = time.perf_counter()
389
- flat_indices = np.asarray(batch_indices, dtype=np.int64)
390
- requested_regions = self._region_index[flat_indices]
391
- unique_regions, inverse = np.unique(requested_regions, return_inverse=True)
392
- locations = [self._region_locations[int(region)] for region in unique_regions]
393
- region_tensor, timing = self._reader.read_batch_with_timing(locations)
394
- unfolded = _unfold_region_tensor_uint8(region_tensor, self._tile_size)
395
- subtile_indices = self._subtile_index_within_region[flat_indices]
396
- out = unfolded[torch.as_tensor(inverse, dtype=torch.long), torch.as_tensor(subtile_indices, dtype=torch.long)]
397
- timing["worker_batch_ms"] = (time.perf_counter() - worker_start) * 1000.0
398
- return torch.as_tensor(flat_indices, dtype=torch.long), out, timing
400
+ def _run_batch():
401
+ worker_start = time.perf_counter()
402
+ flat_indices = np.asarray(batch_indices, dtype=np.int64)
403
+ requested_regions = self._region_index[flat_indices]
404
+ unique_regions, inverse = np.unique(requested_regions, return_inverse=True)
405
+ locations = [self._region_locations[int(region)] for region in unique_regions]
406
+ region_tensor, timing = self._reader.read_batch_with_timing(locations)
407
+ unfolded = _unfold_region_tensor_uint8(region_tensor, self._tile_size)
408
+ subtile_indices = self._subtile_index_within_region[flat_indices]
409
+ out = unfolded[torch.as_tensor(inverse, dtype=torch.long), torch.as_tensor(subtile_indices, dtype=torch.long)]
410
+ timing["worker_batch_ms"] = (time.perf_counter() - worker_start) * 1000.0
411
+ return torch.as_tensor(flat_indices, dtype=torch.long), out, timing
412
+
413
+ if getattr(self._reader, "_backend", None) == "cucim":
414
+ return run_with_filtered_stderr(_run_batch)
415
+ return _run_batch()
399
416
 
400
417
 
401
418
  def _unfold_region_tensor_uint8(region_tensor: torch.Tensor, tile_size: int) -> torch.Tensor: