slide2vec 4.0.1__tar.gz → 4.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {slide2vec-4.0.1 → slide2vec-4.0.2}/PKG-INFO +3 -3
  2. {slide2vec-4.0.1 → slide2vec-4.0.2}/pyproject.toml +4 -4
  3. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/__init__.py +1 -1
  4. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/api.py +11 -5
  5. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/configs/default.yaml +1 -1
  6. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/data/tile_reader.py +48 -31
  7. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/inference.py +141 -39
  8. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/utils/tiling_io.py +12 -4
  9. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/utils/utils.py +25 -2
  10. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec.egg-info/PKG-INFO +3 -3
  11. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec.egg-info/requires.txt +2 -2
  12. slide2vec-4.0.2/tests/test_batch_collator_timing.py +161 -0
  13. {slide2vec-4.0.1 → slide2vec-4.0.2}/tests/test_hs2p_package_cutover.py +11 -13
  14. {slide2vec-4.0.1 → slide2vec-4.0.2}/tests/test_regression_core.py +34 -0
  15. {slide2vec-4.0.1 → slide2vec-4.0.2}/tests/test_regression_inference.py +195 -16
  16. slide2vec-4.0.1/tests/test_batch_collator_timing.py +0 -73
  17. {slide2vec-4.0.1 → slide2vec-4.0.2}/LICENSE +0 -0
  18. {slide2vec-4.0.1 → slide2vec-4.0.2}/README.md +0 -0
  19. {slide2vec-4.0.1 → slide2vec-4.0.2}/setup.cfg +0 -0
  20. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/__main__.py +0 -0
  21. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/artifacts.py +0 -0
  22. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/cli.py +0 -0
  23. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/configs/__init__.py +0 -0
  24. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/data/__init__.py +0 -0
  25. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/data/dataset.py +0 -0
  26. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/data/tile_store.py +0 -0
  27. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/distributed/__init__.py +0 -0
  28. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/distributed/direct_embed_worker.py +0 -0
  29. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/distributed/pipeline_worker.py +0 -0
  30. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/__init__.py +0 -0
  31. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/base.py +0 -0
  32. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/__init__.py +0 -0
  33. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/conch.py +0 -0
  34. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/gigapath.py +0 -0
  35. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/hibou.py +0 -0
  36. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/hoptimus.py +0 -0
  37. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/midnight.py +0 -0
  38. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/musk.py +0 -0
  39. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/phikon.py +0 -0
  40. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/prism.py +0 -0
  41. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/prost40m.py +0 -0
  42. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/titan.py +0 -0
  43. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/uni.py +0 -0
  44. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/models/virchow.py +0 -0
  45. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/registry.py +0 -0
  46. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/encoders/validation.py +0 -0
  47. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/main.py +0 -0
  48. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/model_settings.py +0 -0
  49. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/progress.py +0 -0
  50. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/registry.py +0 -0
  51. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/resources.py +0 -0
  52. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/runtime_types.py +0 -0
  53. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/utils/__init__.py +0 -0
  54. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/utils/config.py +0 -0
  55. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/utils/coordinates.py +0 -0
  56. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec/utils/log_utils.py +0 -0
  57. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec.egg-info/SOURCES.txt +0 -0
  58. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec.egg-info/dependency_links.txt +0 -0
  59. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec.egg-info/entry_points.txt +0 -0
  60. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec.egg-info/not-zip-safe +0 -0
  61. {slide2vec-4.0.1 → slide2vec-4.0.2}/slide2vec.egg-info/top_level.txt +0 -0
  62. {slide2vec-4.0.1 → slide2vec-4.0.2}/tests/test_encoder_registry.py +0 -0
  63. {slide2vec-4.0.1 → slide2vec-4.0.2}/tests/test_output_consistency.py +0 -0
  64. {slide2vec-4.0.1 → slide2vec-4.0.2}/tests/test_packaging_metadata.py +0 -0
  65. {slide2vec-4.0.1 → slide2vec-4.0.2}/tests/test_progress.py +0 -0
  66. {slide2vec-4.0.1 → slide2vec-4.0.2}/tests/test_regression_models.py +0 -0
  67. {slide2vec-4.0.1 → slide2vec-4.0.2}/tests/test_tile_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.0.1
3
+ Version: 4.0.2
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.3
18
+ Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.4
19
19
  Requires-Dist: omegaconf
20
20
  Requires-Dist: matplotlib
21
21
  Requires-Dist: numpy<2
@@ -63,7 +63,7 @@ Requires-Dist: numpy<2; extra == "fm"
63
63
  Requires-Dist: pandas; extra == "fm"
64
64
  Requires-Dist: pillow; extra == "fm"
65
65
  Requires-Dist: rich; extra == "fm"
66
- Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.3; extra == "fm"
66
+ Requires-Dist: hs2p[asap,cucim,openslide,vips]>=3.1.4; extra == "fm"
67
67
  Requires-Dist: wandb; extra == "fm"
68
68
  Requires-Dist: torch<2.8,>=2.3; extra == "fm"
69
69
  Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "slide2vec"
7
- version = "4.0.1"
7
+ version = "4.0.2"
8
8
  description = "Embedding of whole slide images with Foundation Models"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Programming Language :: Python :: 3.13",
22
22
  ]
23
23
  dependencies = [
24
- "hs2p[asap,cucim,openslide,vips]>=3.1.3",
24
+ "hs2p[asap,cucim,openslide,vips]>=3.1.4",
25
25
  "omegaconf",
26
26
  "matplotlib",
27
27
  "numpy<2",
@@ -85,7 +85,7 @@ fm = [
85
85
  "pandas",
86
86
  "pillow",
87
87
  "rich",
88
- "hs2p[asap,cucim,openslide,vips]>=3.1.3",
88
+ "hs2p[asap,cucim,openslide,vips]>=3.1.4",
89
89
  "wandb",
90
90
  "torch>=2.3,<2.8",
91
91
  "torchvision>=0.18.0",
@@ -154,7 +154,7 @@ no_implicit_reexport = true
154
154
  max-line-length = 160
155
155
 
156
156
  [tool.bumpver]
157
- current_version = "4.0.1"
157
+ current_version = "4.0.2"
158
158
  version_pattern = "MAJOR.MINOR.PATCH"
159
159
  commit = false # We do version bumping in CI, not as a commit
160
160
  tag = false # Git tag already exists — we don't auto-tag
@@ -2,7 +2,7 @@ from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, Prep
2
2
  from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
3
3
 
4
4
 
5
- __version__ = "4.0.1"
5
+ __version__ = "4.0.2"
6
6
 
7
7
  __all__ = [
8
8
  "Model",
@@ -22,7 +22,7 @@ from slide2vec.encoders.validation import validate_encoder_config
22
22
  from slide2vec.model_settings import canonicalize_model_name, normalize_precision_name
23
23
  from slide2vec.progress import emit_progress
24
24
  from slide2vec.runtime_types import LoadedModel
25
- from slide2vec.utils.utils import slurm_cpu_limit
25
+ from slide2vec.utils.utils import cpu_worker_limit, slurm_cpu_limit
26
26
 
27
27
  PathLike = str | Path
28
28
 
@@ -121,7 +121,7 @@ class ExecutionOptions:
121
121
  output_format: str = "pt"
122
122
  batch_size: int = 1
123
123
  num_workers: int = 0
124
- num_preprocessing_workers: int = 8
124
+ num_preprocessing_workers: int | None = None
125
125
  num_gpus: int | None = None
126
126
  precision: str | None = None
127
127
  prefetch_factor: int = 4
@@ -141,7 +141,11 @@ class ExecutionOptions:
141
141
  output_format="pt",
142
142
  batch_size=int(cfg.model.batch_size),
143
143
  num_workers=int(num_workers),
144
- num_preprocessing_workers=int(cfg.speed.num_preprocessing_workers),
144
+ num_preprocessing_workers=(
145
+ int(cfg.speed.num_preprocessing_workers)
146
+ if cfg.speed.num_preprocessing_workers is not None
147
+ else None
148
+ ),
145
149
  num_gpus=1 if run_on_cpu else (int(configured_num_gpus) if configured_num_gpus is not None else None),
146
150
  precision="fp32" if run_on_cpu else requested_precision,
147
151
  prefetch_factor=prefetch_factor,
@@ -158,11 +162,13 @@ class ExecutionOptions:
158
162
  raise ValueError("ExecutionOptions.num_gpus must be at least 1")
159
163
  if self.prefetch_factor < 1:
160
164
  raise ValueError("ExecutionOptions.prefetch_factor must be at least 1")
165
+ cap = cpu_worker_limit()
161
166
  cpu_count = os.cpu_count() or 1
162
167
  slurm_limit = slurm_cpu_limit()
163
- cap = min(cpu_count, slurm_limit) if slurm_limit is not None else cpu_count
164
168
  capped_num_workers = min(self.num_workers, cap)
165
- capped_num_preprocessing_workers = min(self.num_preprocessing_workers, cap)
169
+ capped_num_preprocessing_workers = (
170
+ cap if self.num_preprocessing_workers is None else min(self.num_preprocessing_workers, cap)
171
+ )
166
172
  object.__setattr__(self, "num_workers", capped_num_workers)
167
173
  object.__setattr__(self, "num_preprocessing_workers", capped_num_preprocessing_workers)
168
174
  logger = logging.getLogger(__name__)
@@ -68,7 +68,7 @@ speed:
68
68
  precision: # model inference precision ["fp32", "fp16", "bf16"]; if not set, determined automatically based on model recommendations
69
69
  num_dataloader_workers: 8 # number of DataLoader worker processes for reading tiles during embedding (tar path); on-the-fly path derives this automatically from cpu_count // speed.num_cucim_workers
70
70
  num_gpus: # number of GPUs to use for feature extraction; defaults to all available GPUs
71
- num_preprocessing_workers: 8 # number of workers for hs2p tiling (WSI reading, JPEG encoding, tar writing)
71
+ num_preprocessing_workers: # number of workers for hs2p tiling (WSI reading, JPEG encoding, tar writing); defaults to the CPU budget at runtime
72
72
  num_cucim_workers: 4 # number of internal cucim threads per read_region call (embedding path, on-the-fly only); DataLoader workers are auto-set to cpu_count // num_cucim_workers
73
73
  prefetch_factor_embedding: 4 # prefetch factor for tile embedding dataloaders
74
74
  persistent_workers_embedding: true # keep DataLoader workers alive across epochs/batches
@@ -1,4 +1,5 @@
1
1
  from collections import defaultdict
2
+ from contextlib import nullcontext
2
3
  import time
3
4
  from pathlib import Path
4
5
 
@@ -6,7 +7,9 @@ import numpy as np
6
7
  import torch
7
8
 
8
9
  from hs2p import TilingResult
10
+ from hs2p.utils.stderr import run_with_filtered_stderr
9
11
  from hs2p.wsi.streaming.plans import build_supertile_index
12
+ from slide2vec.utils.log_utils import suppress_c_stderr
10
13
 
11
14
 
12
15
  class SuperTileBatchSampler:
@@ -139,15 +142,17 @@ class WSITileReader:
139
142
  torch.empty((0, 3, ts, ts), dtype=torch.uint8),
140
143
  {"reader_open_ms": 0.0, "reader_read_ms": 0.0},
141
144
  )
142
- was_closed = self._reader is None
143
- open_start = time.perf_counter()
144
- self._ensure_open()
145
- reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
146
- read_start = time.perf_counter()
147
- if self._use_supertiles:
148
- tensor = self._read_batch_supertiles(tile_indices)
149
- else:
150
- tensor = self._read_batch_simple(tile_indices)
145
+ stderr_context = suppress_c_stderr() if self._backend == "cucim" else nullcontext()
146
+ with stderr_context:
147
+ was_closed = self._reader is None
148
+ open_start = time.perf_counter()
149
+ self._ensure_open()
150
+ reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
151
+ read_start = time.perf_counter()
152
+ if self._use_supertiles:
153
+ tensor = self._read_batch_supertiles(tile_indices)
154
+ else:
155
+ tensor = self._read_batch_simple(tile_indices)
151
156
  reader_read_ms = (time.perf_counter() - read_start) * 1000.0
152
157
  return tensor, {"reader_open_ms": reader_open_ms, "reader_read_ms": reader_read_ms}
153
158
 
@@ -260,11 +265,16 @@ class OnTheFlyBatchTileCollator:
260
265
  torch.empty((0, 3, self.tile_size, self.tile_size), dtype=torch.uint8),
261
266
  {"worker_batch_ms": 0.0, "reader_open_ms": 0.0, "reader_read_ms": 0.0},
262
267
  )
263
- worker_start = time.perf_counter()
264
- tile_indices = np.asarray(batch_indices, dtype=np.int64)
265
- tensor, timing = self._reader.read_batch_with_timing(tile_indices)
266
- timing["worker_batch_ms"] = (time.perf_counter() - worker_start) * 1000.0
267
- return torch.as_tensor(tile_indices, dtype=torch.long), tensor, timing
268
+ def _run_batch():
269
+ worker_start = time.perf_counter()
270
+ tile_indices = np.asarray(batch_indices, dtype=np.int64)
271
+ tensor, timing = self._reader.read_batch_with_timing(tile_indices)
272
+ timing["worker_batch_ms"] = (time.perf_counter() - worker_start) * 1000.0
273
+ return torch.as_tensor(tile_indices, dtype=torch.long), tensor, timing
274
+
275
+ if getattr(self._reader, "_backend", None) == "cucim":
276
+ return run_with_filtered_stderr(_run_batch)
277
+ return _run_batch()
268
278
 
269
279
 
270
280
  class WSIRegionReader:
@@ -320,12 +330,14 @@ class WSIRegionReader:
320
330
  torch.empty((0, 3, self._region_size_px, self._region_size_px), dtype=torch.uint8),
321
331
  {"reader_open_ms": 0.0, "reader_read_ms": 0.0},
322
332
  )
323
- was_closed = self._reader is None
324
- open_start = time.perf_counter()
325
- self._ensure_open()
326
- reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
327
- read_start = time.perf_counter()
328
- regions = self._read_regions_batch(locations)
333
+ stderr_context = suppress_c_stderr() if self._backend == "cucim" else nullcontext()
334
+ with stderr_context:
335
+ was_closed = self._reader is None
336
+ open_start = time.perf_counter()
337
+ self._ensure_open()
338
+ reader_open_ms = (time.perf_counter() - open_start) * 1000.0 if was_closed else 0.0
339
+ read_start = time.perf_counter()
340
+ regions = self._read_regions_batch(locations)
329
341
  reader_read_ms = (time.perf_counter() - read_start) * 1000.0
330
342
  batch = np.stack([np.asarray(region)[:, :, :3] for region in regions], axis=0)
331
343
  tensor = torch.from_numpy(batch).permute(0, 3, 1, 2).contiguous()
@@ -385,17 +397,22 @@ class OnTheFlyHierarchicalBatchCollator:
385
397
  torch.empty((0, 3, self._tile_size, self._tile_size), dtype=torch.uint8),
386
398
  {"worker_batch_ms": 0.0, "reader_open_ms": 0.0, "reader_read_ms": 0.0},
387
399
  )
388
- worker_start = time.perf_counter()
389
- flat_indices = np.asarray(batch_indices, dtype=np.int64)
390
- requested_regions = self._region_index[flat_indices]
391
- unique_regions, inverse = np.unique(requested_regions, return_inverse=True)
392
- locations = [self._region_locations[int(region)] for region in unique_regions]
393
- region_tensor, timing = self._reader.read_batch_with_timing(locations)
394
- unfolded = _unfold_region_tensor_uint8(region_tensor, self._tile_size)
395
- subtile_indices = self._subtile_index_within_region[flat_indices]
396
- out = unfolded[torch.as_tensor(inverse, dtype=torch.long), torch.as_tensor(subtile_indices, dtype=torch.long)]
397
- timing["worker_batch_ms"] = (time.perf_counter() - worker_start) * 1000.0
398
- return torch.as_tensor(flat_indices, dtype=torch.long), out, timing
400
+ def _run_batch():
401
+ worker_start = time.perf_counter()
402
+ flat_indices = np.asarray(batch_indices, dtype=np.int64)
403
+ requested_regions = self._region_index[flat_indices]
404
+ unique_regions, inverse = np.unique(requested_regions, return_inverse=True)
405
+ locations = [self._region_locations[int(region)] for region in unique_regions]
406
+ region_tensor, timing = self._reader.read_batch_with_timing(locations)
407
+ unfolded = _unfold_region_tensor_uint8(region_tensor, self._tile_size)
408
+ subtile_indices = self._subtile_index_within_region[flat_indices]
409
+ out = unfolded[torch.as_tensor(inverse, dtype=torch.long), torch.as_tensor(subtile_indices, dtype=torch.long)]
410
+ timing["worker_batch_ms"] = (time.perf_counter() - worker_start) * 1000.0
411
+ return torch.as_tensor(flat_indices, dtype=torch.long), out, timing
412
+
413
+ if getattr(self._reader, "_backend", None) == "cucim":
414
+ return run_with_filtered_stderr(_run_batch)
415
+ return _run_batch()
399
416
 
400
417
 
401
418
  def _unfold_region_tensor_uint8(region_tensor: torch.Tensor, tile_size: int) -> torch.Tensor:
@@ -17,6 +17,7 @@ import logging
17
17
  import pandas as pd
18
18
  import torch
19
19
  from hs2p import SlideSpec, FilterConfig, PreviewConfig, SegmentationConfig, TilingConfig, load_tiling_result, tile_slides
20
+ from hs2p.utils.stderr import run_with_filtered_stderr
20
21
  import numpy as np
21
22
  from transformers.image_processing_utils import BaseImageProcessor
22
23
 
@@ -58,7 +59,7 @@ from slide2vec.utils.tiling_io import (
58
59
  load_tiling_result_from_row,
59
60
  _optional_float,
60
61
  )
61
- from slide2vec.utils.utils import slurm_cpu_limit
62
+ from slide2vec.utils.utils import cpu_worker_limit, slurm_cpu_limit
62
63
 
63
64
 
64
65
  @dataclass(frozen=True, kw_only=True)
@@ -174,17 +175,58 @@ def _num_embedding_items(tiling_result, preprocessing: PreprocessingConfig | Non
174
175
 
175
176
  def _resolve_on_the_fly_num_workers(num_cucim_workers: int) -> tuple[int, str]:
176
177
  cpu_count = os.cpu_count() or 1
177
- worker_budget = cpu_count
178
+ worker_budget = cpu_worker_limit()
178
179
  details = [f"cpu_count={cpu_count}"]
179
180
  slurm_limit = slurm_cpu_limit()
180
181
  if slurm_limit is not None:
181
- worker_budget = min(worker_budget, slurm_limit)
182
182
  details.append(f"slurm_cpu_limit={slurm_limit}")
183
183
  effective_num_workers = max(1, worker_budget // num_cucim_workers)
184
184
  details.append(f"num_cucim_workers={num_cucim_workers}")
185
185
  return effective_num_workers, " // ".join(details)
186
186
 
187
187
 
188
+ def _redirect_worker_output() -> None:
189
+ worker_log_path = os.path.join(
190
+ tempfile.gettempdir(),
191
+ "slide2vec-cucim-workers.log",
192
+ )
193
+ worker_log_fd = os.open(
194
+ worker_log_path,
195
+ os.O_WRONLY | os.O_CREAT | os.O_APPEND,
196
+ 0o644,
197
+ )
198
+ try:
199
+ os.dup2(worker_log_fd, 1)
200
+ os.dup2(worker_log_fd, 2)
201
+ finally:
202
+ os.close(worker_log_fd)
203
+
204
+
205
+ def _configure_cucim_worker_stderr(loader_kwargs: dict[str, Any], *, backend: str) -> None:
206
+ if backend != "cucim" or int(loader_kwargs.get("num_workers", 0)) <= 0:
207
+ return
208
+ existing_worker_init = loader_kwargs.get("worker_init_fn")
209
+
210
+ def _worker_init(worker_id: int) -> None:
211
+ _redirect_worker_output()
212
+ if existing_worker_init is not None:
213
+ existing_worker_init(worker_id)
214
+
215
+ loader_kwargs["worker_init_fn"] = _worker_init
216
+
217
+
218
+ def _should_suppress_cucim_dataloader_stderr(dataloader) -> bool:
219
+ if int(getattr(dataloader, "num_workers", 0)) <= 0:
220
+ return False
221
+ collate_fn = getattr(dataloader, "collate_fn", None)
222
+ reader = getattr(collate_fn, "_reader", None)
223
+ return getattr(reader, "_backend", None) == "cucim"
224
+
225
+
226
+ def _uses_cuda_runtime(device) -> bool:
227
+ return str(device).startswith("cuda") and torch.cuda.is_available()
228
+
229
+
188
230
  def _make_slide_spec(
189
231
  *,
190
232
  sample_id: str,
@@ -1105,7 +1147,7 @@ def _compute_tile_embeddings_for_slide(
1105
1147
  autocast_dtype = _autocast_dtype(torch, execution.precision)
1106
1148
  autocast_context = (
1107
1149
  torch.autocast(device_type="cuda", dtype=autocast_dtype)
1108
- if autocast_dtype is not None and str(loaded.device).startswith("cuda")
1150
+ if autocast_dtype is not None and _uses_cuda_runtime(loaded.device)
1109
1151
  else nullcontext()
1110
1152
  )
1111
1153
  resolved_indices = np.arange(_num_tiles(tiling_result), dtype=np.int64)
@@ -1163,6 +1205,7 @@ def _compute_tile_embeddings_for_slide(
1163
1205
  tiling_result,
1164
1206
  )
1165
1207
  loader_kwargs = _embedding_dataloader_kwargs(loaded, execution)
1208
+ resolved_backend = _resolve_slide_backend(preprocessing, tiling_result)
1166
1209
  if preprocessing.on_the_fly and preprocessing.read_tiles_from is None:
1167
1210
  effective_num_workers, worker_context = _resolve_on_the_fly_num_workers(preprocessing.num_cucim_workers)
1168
1211
  if effective_num_workers != execution.num_workers:
@@ -1175,6 +1218,7 @@ def _compute_tile_embeddings_for_slide(
1175
1218
  if effective_num_workers == 0:
1176
1219
  loader_kwargs.pop("persistent_workers", None)
1177
1220
  loader_kwargs.pop("prefetch_factor", None)
1221
+ _configure_cucim_worker_stderr(loader_kwargs, backend=resolved_backend)
1178
1222
  if batch_sampler is not None:
1179
1223
  loader_kwargs["batch_sampler"] = batch_sampler
1180
1224
  else:
@@ -1185,15 +1229,21 @@ def _compute_tile_embeddings_for_slide(
1185
1229
  collate_fn=collate_fn,
1186
1230
  **loader_kwargs,
1187
1231
  )
1188
- tile_embeddings = _run_forward_pass(
1189
- dataloader,
1190
- loaded,
1191
- autocast_context,
1192
- batch_preprocessor=batch_preprocessor,
1193
- sample_id=slide.sample_id,
1194
- total_items=len(dataset),
1195
- unit_label="tile",
1196
- )
1232
+ def _compute_embeddings():
1233
+ return _run_forward_pass(
1234
+ dataloader,
1235
+ loaded,
1236
+ autocast_context,
1237
+ batch_preprocessor=batch_preprocessor,
1238
+ sample_id=slide.sample_id,
1239
+ total_items=len(dataset),
1240
+ unit_label="tile",
1241
+ )
1242
+
1243
+ if resolved_backend == "cucim":
1244
+ tile_embeddings = run_with_filtered_stderr(_compute_embeddings)
1245
+ else:
1246
+ tile_embeddings = _compute_embeddings()
1197
1247
  if _supertile_reorder is not None:
1198
1248
  inverse = np.argsort(_supertile_reorder, kind="stable")
1199
1249
  tile_embeddings = tile_embeddings[torch.as_tensor(inverse, dtype=torch.long)]
@@ -1240,6 +1290,7 @@ def _compute_hierarchical_embeddings_for_slide(
1240
1290
  )
1241
1291
  loader_kwargs = _embedding_dataloader_kwargs(loaded, execution)
1242
1292
  effective_num_workers, worker_context = _resolve_on_the_fly_num_workers(preprocessing.num_cucim_workers)
1293
+ resolved_backend = _resolve_slide_backend(preprocessing, tiling_result)
1243
1294
  if effective_num_workers != execution.num_workers:
1244
1295
  logging.getLogger(__name__).info(
1245
1296
  f"on-the-fly hierarchical mode: setting DataLoader num_workers={effective_num_workers} "
@@ -1250,6 +1301,10 @@ def _compute_hierarchical_embeddings_for_slide(
1250
1301
  if effective_num_workers == 0:
1251
1302
  loader_kwargs.pop("persistent_workers", None)
1252
1303
  loader_kwargs.pop("prefetch_factor", None)
1304
+ _configure_cucim_worker_stderr(
1305
+ loader_kwargs,
1306
+ backend=resolved_backend,
1307
+ )
1253
1308
  loader_kwargs["batch_sampler"] = collate_fn.build_batch_sampler(
1254
1309
  batch_size=execution.batch_size,
1255
1310
  dataset_indices=np.asarray(resolved_indices, dtype=np.int64),
@@ -1262,19 +1317,25 @@ def _compute_hierarchical_embeddings_for_slide(
1262
1317
  autocast_dtype = _autocast_dtype(torch, execution.precision)
1263
1318
  autocast_context = (
1264
1319
  torch.autocast(device_type="cuda", dtype=autocast_dtype)
1265
- if autocast_dtype is not None and str(loaded.device).startswith("cuda")
1320
+ if autocast_dtype is not None and _uses_cuda_runtime(loaded.device)
1266
1321
  else nullcontext()
1267
1322
  )
1268
- batch_flat_indices, flat_embeddings = _run_forward_pass(
1269
- dataloader,
1270
- loaded,
1271
- autocast_context,
1272
- batch_preprocessor=batch_preprocessor,
1273
- sample_id=slide.sample_id,
1274
- total_items=len(dataset),
1275
- unit_label="tile",
1276
- return_indices=True,
1277
- )
1323
+ def _compute_embeddings():
1324
+ return _run_forward_pass(
1325
+ dataloader,
1326
+ loaded,
1327
+ autocast_context,
1328
+ batch_preprocessor=batch_preprocessor,
1329
+ sample_id=slide.sample_id,
1330
+ total_items=len(dataset),
1331
+ unit_label="tile",
1332
+ return_indices=True,
1333
+ )
1334
+
1335
+ if resolved_backend == "cucim":
1336
+ batch_flat_indices, flat_embeddings = run_with_filtered_stderr(_compute_embeddings)
1337
+ else:
1338
+ batch_flat_indices, flat_embeddings = _compute_embeddings()
1278
1339
  result = torch.empty(
1279
1340
  (index.num_regions * index.tiles_per_region, int(flat_embeddings.shape[-1])),
1280
1341
  dtype=flat_embeddings.dtype,
@@ -1316,10 +1377,15 @@ def _compute_hierarchical_embedding_shard_for_slide(
1316
1377
  )
1317
1378
  loader_kwargs = _embedding_dataloader_kwargs(loaded, execution)
1318
1379
  effective_num_workers, _worker_context = _resolve_on_the_fly_num_workers(preprocessing.num_cucim_workers)
1380
+ resolved_backend = _resolve_slide_backend(preprocessing, tiling_result)
1319
1381
  loader_kwargs["num_workers"] = effective_num_workers
1320
1382
  if effective_num_workers == 0:
1321
1383
  loader_kwargs.pop("persistent_workers", None)
1322
1384
  loader_kwargs.pop("prefetch_factor", None)
1385
+ _configure_cucim_worker_stderr(
1386
+ loader_kwargs,
1387
+ backend=resolved_backend,
1388
+ )
1323
1389
  loader_kwargs["batch_sampler"] = collate_fn.build_batch_sampler(
1324
1390
  batch_size=execution.batch_size,
1325
1391
  dataset_indices=resolved_indices,
@@ -1328,19 +1394,25 @@ def _compute_hierarchical_embedding_shard_for_slide(
1328
1394
  autocast_dtype = _autocast_dtype(torch, execution.precision)
1329
1395
  autocast_context = (
1330
1396
  torch.autocast(device_type="cuda", dtype=autocast_dtype)
1331
- if autocast_dtype is not None and str(loaded.device).startswith("cuda")
1397
+ if autocast_dtype is not None and _uses_cuda_runtime(loaded.device)
1332
1398
  else nullcontext()
1333
1399
  )
1334
- batch_flat_indices, flat_embeddings = _run_forward_pass(
1335
- dataloader,
1336
- loaded,
1337
- autocast_context,
1338
- batch_preprocessor=batch_preprocessor,
1339
- sample_id=slide.sample_id,
1340
- total_items=len(dataset),
1341
- unit_label="tile",
1342
- return_indices=True,
1343
- )
1400
+ def _compute_embeddings():
1401
+ return _run_forward_pass(
1402
+ dataloader,
1403
+ loaded,
1404
+ autocast_context,
1405
+ batch_preprocessor=batch_preprocessor,
1406
+ sample_id=slide.sample_id,
1407
+ total_items=len(dataset),
1408
+ unit_label="tile",
1409
+ return_indices=True,
1410
+ )
1411
+
1412
+ if resolved_backend == "cucim":
1413
+ batch_flat_indices, flat_embeddings = run_with_filtered_stderr(_compute_embeddings)
1414
+ else:
1415
+ batch_flat_indices, flat_embeddings = _compute_embeddings()
1344
1416
  return batch_flat_indices.numpy(), flat_embeddings
1345
1417
 
1346
1418
 
@@ -1616,7 +1688,7 @@ def _write_hierarchical_embedding_artifact(
1616
1688
  def _embedding_dataloader_kwargs(loaded: LoadedModel, execution: ExecutionOptions) -> dict[str, Any]:
1617
1689
  kwargs: dict[str, Any] = {
1618
1690
  "num_workers": execution.num_workers,
1619
- "pin_memory": str(loaded.device).startswith("cuda"),
1691
+ "pin_memory": _uses_cuda_runtime(loaded.device),
1620
1692
  }
1621
1693
  if execution.num_workers > 0:
1622
1694
  kwargs["persistent_workers"] = bool(execution.persistent_workers)
@@ -1836,7 +1908,7 @@ class _BatchPrefetcher:
1836
1908
  raise ValueError("Expected the embedding dataloader to yield (indices, image) or (indices, image, timing)")
1837
1909
 
1838
1910
  def _make_copy_stream(self):
1839
- if not str(self.loaded.device).startswith("cuda"):
1911
+ if not _uses_cuda_runtime(self.loaded.device):
1840
1912
  return None
1841
1913
  return torch.cuda.Stream(device=self.loaded.device)
1842
1914
 
@@ -1867,7 +1939,7 @@ class _BatchPrefetcher:
1867
1939
  if torch.is_tensor(prepared) and prepared.device != self.loaded.device:
1868
1940
  prepared = prepared.to(
1869
1941
  self.loaded.device,
1870
- non_blocking=str(self.loaded.device).startswith("cuda"),
1942
+ non_blocking=_uses_cuda_runtime(self.loaded.device),
1871
1943
  )
1872
1944
  preprocess_ms = (time.perf_counter() - preprocess_start) * 1000.0
1873
1945
  return prepared, preprocess_ms
@@ -1947,7 +2019,13 @@ def _run_forward_pass(
1947
2019
  batch_indices = [] if return_indices else None
1948
2020
  processed = 0
1949
2021
  batch_index = 0
1950
- prefetcher = _BatchPrefetcher(dataloader, loaded, batch_preprocessor)
2022
+ prefetcher_context = (
2023
+ suppress_c_stderr()
2024
+ if _should_suppress_cucim_dataloader_stderr(dataloader)
2025
+ else nullcontext()
2026
+ )
2027
+ with prefetcher_context:
2028
+ prefetcher = _BatchPrefetcher(dataloader, loaded, batch_preprocessor)
1951
2029
  with torch.inference_mode(), autocast_context:
1952
2030
  for prepared_batch in prefetcher:
1953
2031
  image = prepared_batch.image
@@ -2185,6 +2263,7 @@ def _prepare_tiled_slides(
2185
2263
  _record_slide_metadata_in_process_list(
2186
2264
  process_list_path,
2187
2265
  slide_records,
2266
+ preprocessing=preprocessing,
2188
2267
  tiling_artifacts=tiling_artifacts,
2189
2268
  )
2190
2269
  process_df = load_tiling_process_df(process_list_path)
@@ -2298,6 +2377,7 @@ def _record_slide_metadata_in_process_list(
2298
2377
  process_list_path: Path,
2299
2378
  slide_records: Sequence[SlideSpec],
2300
2379
  *,
2380
+ preprocessing: PreprocessingConfig,
2301
2381
  tiling_artifacts: Sequence[Any],
2302
2382
  ) -> None:
2303
2383
  def _resolve_path_str(value: Any) -> str | None:
@@ -2319,18 +2399,40 @@ def _record_slide_metadata_in_process_list(
2319
2399
  for artifact in tiling_artifacts
2320
2400
  }
2321
2401
  process_df = pd.read_csv(process_list_path)
2402
+ if "requested_backend" not in process_df.columns:
2403
+ process_df["requested_backend"] = [None] * len(process_df)
2404
+ if "backend" not in process_df.columns:
2405
+ process_df["backend"] = [None] * len(process_df)
2322
2406
  if "spacing_at_level_0" not in process_df.columns:
2323
2407
  process_df["spacing_at_level_0"] = [None] * len(process_df)
2324
2408
  if "mask_preview_path" not in process_df.columns:
2325
2409
  process_df["mask_preview_path"] = [None] * len(process_df)
2326
2410
  if "tiling_preview_path" not in process_df.columns:
2327
2411
  process_df["tiling_preview_path"] = [None] * len(process_df)
2412
+ requested_backend = str(preprocessing.backend)
2413
+ process_df["requested_backend"] = process_df["requested_backend"].where(
2414
+ process_df["requested_backend"].notna(),
2415
+ requested_backend,
2416
+ )
2328
2417
  if spacing_by_sample_id:
2329
2418
  mapped_spacing = process_df["sample_id"].astype(str).map(spacing_by_sample_id)
2330
2419
  process_df["spacing_at_level_0"] = process_df["spacing_at_level_0"].where(
2331
2420
  process_df["spacing_at_level_0"].notna(),
2332
2421
  mapped_spacing,
2333
2422
  )
2423
+ backend_by_sample_id = {}
2424
+ for row in process_df.to_dict("records"):
2425
+ sample_id = str(row["sample_id"])
2426
+ try:
2427
+ tiling_result = load_tiling_result_from_row(row)
2428
+ except Exception:
2429
+ continue
2430
+ backend = getattr(tiling_result, "backend", None)
2431
+ if backend is not None:
2432
+ backend_by_sample_id[sample_id] = backend
2433
+ if backend_by_sample_id:
2434
+ mapped_backend = process_df["sample_id"].astype(str).map(backend_by_sample_id)
2435
+ process_df["backend"] = process_df["backend"].where(process_df["backend"].notna(), mapped_backend)
2334
2436
  mapped_mask_preview_paths = process_df["sample_id"].astype(str).map(mask_preview_by_sample_id)
2335
2437
  process_df["mask_preview_path"] = process_df["mask_preview_path"].where(
2336
2438
  process_df["mask_preview_path"].notna(),
@@ -11,6 +11,8 @@ BASE_PROCESS_COLUMNS = (
11
11
  "sample_id",
12
12
  "image_path",
13
13
  "mask_path",
14
+ "requested_backend",
15
+ "backend",
14
16
  "tiling_status",
15
17
  "num_tiles",
16
18
  "coordinates_npz_path",
@@ -22,6 +24,8 @@ BASE_TILING_ORDERED_COLUMNS = (
22
24
  "sample_id",
23
25
  "image_path",
24
26
  "mask_path",
27
+ "requested_backend",
28
+ "backend",
25
29
  "spacing_at_level_0",
26
30
  "tiling_status",
27
31
  "num_tiles",
@@ -37,6 +41,8 @@ BASE_EMBEDDING_ORDERED_COLUMNS = (
37
41
  "sample_id",
38
42
  "image_path",
39
43
  "mask_path",
44
+ "requested_backend",
45
+ "backend",
40
46
  "spacing_at_level_0",
41
47
  "tiling_status",
42
48
  "num_tiles",
@@ -160,12 +166,14 @@ def load_embedding_process_df(
160
166
 
161
167
 
162
168
  def load_tiling_result_from_row(row):
169
+ coordinates_npz_path = _optional_path(row.get("coordinates_npz_path"))
170
+ coordinates_meta_path = Path(row["coordinates_meta_path"])
163
171
  tiling_result = load_tiling_result(
164
- coordinates_npz_path=Path(row["coordinates_npz_path"]),
165
- coordinates_meta_path=Path(row["coordinates_meta_path"]),
172
+ coordinates_npz_path=coordinates_npz_path,
173
+ coordinates_meta_path=coordinates_meta_path,
166
174
  )
167
- setattr(tiling_result, "coordinates_npz_path", Path(row["coordinates_npz_path"]))
168
- setattr(tiling_result, "coordinates_meta_path", Path(row["coordinates_meta_path"]))
175
+ setattr(tiling_result, "coordinates_npz_path", coordinates_npz_path)
176
+ setattr(tiling_result, "coordinates_meta_path", coordinates_meta_path)
169
177
  setattr(tiling_result, "tiles_tar_path", _optional_path(row.get("tiles_tar_path")))
170
178
  setattr(tiling_result, "mask_preview_path", _optional_path(row.get("mask_preview_path")))
171
179
  setattr(tiling_result, "tiling_preview_path", _optional_path(row.get("tiling_preview_path")))