slide2vec 4.5.0__tar.gz → 4.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {slide2vec-4.5.0 → slide2vec-4.5.2}/PKG-INFO +3 -3
  2. {slide2vec-4.5.0 → slide2vec-4.5.2}/pyproject.toml +4 -4
  3. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/__init__.py +1 -1
  4. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/progress.py +4 -1
  5. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/distributed.py +90 -30
  6. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec.egg-info/PKG-INFO +3 -3
  7. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec.egg-info/requires.txt +2 -2
  8. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_progress.py +67 -0
  9. {slide2vec-4.5.0 → slide2vec-4.5.2}/LICENSE +0 -0
  10. {slide2vec-4.5.0 → slide2vec-4.5.2}/README.md +0 -0
  11. {slide2vec-4.5.0 → slide2vec-4.5.2}/setup.cfg +0 -0
  12. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/__main__.py +0 -0
  13. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/api.py +0 -0
  14. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/artifacts.py +0 -0
  15. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/cli.py +0 -0
  16. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/configs/__init__.py +0 -0
  17. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/configs/default.yaml +0 -0
  18. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/configs/resources.py +0 -0
  19. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/data/__init__.py +0 -0
  20. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/data/dataset.py +0 -0
  21. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/data/tile_reader.py +0 -0
  22. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/data/tile_store.py +0 -0
  23. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/distributed/__init__.py +0 -0
  24. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/distributed/direct_embed_worker.py +0 -0
  25. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/distributed/pipeline_worker.py +0 -0
  26. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/__init__.py +0 -0
  27. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/base.py +0 -0
  28. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/__init__.py +0 -0
  29. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/conch.py +0 -0
  30. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/gigapath.py +0 -0
  31. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/hibou.py +0 -0
  32. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/hoptimus.py +0 -0
  33. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/lunit.py +0 -0
  34. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/midnight.py +0 -0
  35. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/moozy/__init__.py +0 -0
  36. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/moozy/blocks.py +0 -0
  37. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/moozy/case.py +0 -0
  38. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/moozy/loading.py +0 -0
  39. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/moozy/slide.py +0 -0
  40. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/moozy/types.py +0 -0
  41. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/musk.py +0 -0
  42. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/phikon.py +0 -0
  43. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/prism.py +0 -0
  44. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/prost40m.py +0 -0
  45. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/titan.py +0 -0
  46. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/uni.py +0 -0
  47. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/models/virchow.py +0 -0
  48. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/registry.py +0 -0
  49. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/encoders/validation.py +0 -0
  50. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/inference.py +0 -0
  51. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/__init__.py +0 -0
  52. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/artifacts_collect.py +0 -0
  53. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/batching.py +0 -0
  54. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/cpu_budget.py +0 -0
  55. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/distributed_stage.py +0 -0
  56. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/embedding.py +0 -0
  57. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/embedding_persist.py +0 -0
  58. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/embedding_pipeline.py +0 -0
  59. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/hierarchical.py +0 -0
  60. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/manifest.py +0 -0
  61. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/model_settings.py +0 -0
  62. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/patient_pipeline.py +0 -0
  63. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/persist_callbacks.py +0 -0
  64. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/persistence.py +0 -0
  65. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/process_list.py +0 -0
  66. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/progress_bridge.py +0 -0
  67. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/registry.py +0 -0
  68. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/serialization.py +0 -0
  69. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/slide_encode.py +0 -0
  70. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/tiling.py +0 -0
  71. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/tiling_pipeline.py +0 -0
  72. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/types.py +0 -0
  73. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/runtime/worker_io.py +0 -0
  74. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/utils/__init__.py +0 -0
  75. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/utils/config.py +0 -0
  76. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/utils/coordinates.py +0 -0
  77. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/utils/log_utils.py +0 -0
  78. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/utils/tiling_io.py +0 -0
  79. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec/utils/utils.py +0 -0
  80. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec.egg-info/SOURCES.txt +0 -0
  81. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec.egg-info/dependency_links.txt +0 -0
  82. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec.egg-info/entry_points.txt +0 -0
  83. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec.egg-info/not-zip-safe +0 -0
  84. {slide2vec-4.5.0 → slide2vec-4.5.2}/slide2vec.egg-info/top_level.txt +0 -0
  85. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_architecture_runtime_split.py +0 -0
  86. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_encoder_registry.py +0 -0
  87. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_hs2p_package_cutover.py +0 -0
  88. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_output_consistency.py +0 -0
  89. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_regression_core.py +0 -0
  90. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_regression_inference.py +0 -0
  91. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_regression_models.py +0 -0
  92. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_runtime_batching.py +0 -0
  93. {slide2vec-4.5.0 → slide2vec-4.5.2}/tests/test_tile_store.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.5.0
3
+ Version: 4.5.2
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5
18
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.6
19
19
  Requires-Dist: omegaconf
20
20
  Requires-Dist: matplotlib
21
21
  Requires-Dist: numpy<2
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
65
65
  Requires-Dist: pandas; extra == "fm"
66
66
  Requires-Dist: pillow; extra == "fm"
67
67
  Requires-Dist: rich; extra == "fm"
68
- Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5; extra == "fm"
68
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.6; extra == "fm"
69
69
  Requires-Dist: wandb; extra == "fm"
70
70
  Requires-Dist: torch<2.8,>=2.3; extra == "fm"
71
71
  Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "slide2vec"
7
- version = "4.5.0"
7
+ version = "4.5.2"
8
8
  description = "Embedding of whole slide images with Foundation Models"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Programming Language :: Python :: 3.13",
22
22
  ]
23
23
  dependencies = [
24
- "hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5",
24
+ "hs2p[asap,cucim,openslide,sam2,vips]>=4.0.6",
25
25
  "omegaconf",
26
26
  "matplotlib",
27
27
  "numpy<2",
@@ -88,7 +88,7 @@ fm = [
88
88
  "pandas",
89
89
  "pillow",
90
90
  "rich",
91
- "hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5",
91
+ "hs2p[asap,cucim,openslide,sam2,vips]>=4.0.6",
92
92
  "wandb",
93
93
  "torch>=2.3,<2.8",
94
94
  "torchvision>=0.18.0",
@@ -164,7 +164,7 @@ no_implicit_reexport = true
164
164
  max-line-length = 160
165
165
 
166
166
  [tool.bumpver]
167
- current_version = "4.5.0"
167
+ current_version = "4.5.2"
168
168
  version_pattern = "MAJOR.MINOR.PATCH"
169
169
  commit = false # We do version bumping in CI, not as a commit
170
170
  tag = false # Git tag already exists — we don't auto-tag
@@ -11,7 +11,7 @@ from slide2vec.api import (
11
11
  from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
12
12
 
13
13
 
14
- __version__ = "4.5.0"
14
+ __version__ = "4.5.2"
15
15
 
16
16
  __all__ = [
17
17
  "Model",
@@ -699,7 +699,10 @@ def read_tiling_progress_snapshot(process_list_path: str | Path, *, expected_tot
699
699
  path = Path(process_list_path)
700
700
  if not path.is_file():
701
701
  return None
702
- df = pd.read_csv(path)
702
+ try:
703
+ df = pd.read_csv(path)
704
+ except (pd.errors.EmptyDataError, pd.errors.ParserError):
705
+ return None
703
706
  if "tiling_status" not in df.columns:
704
707
  return None
705
708
  statuses = df["tiling_status"].fillna("tbp").astype(str)
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import heapq
4
+ import os
4
5
  import shutil
6
+ import signal
5
7
  import subprocess
6
8
  import sys
7
9
  import tempfile
@@ -55,6 +57,42 @@ def write_worker_logs(module: str, output_dir: Path, stdout_text: str, stderr_te
55
57
  return stdout_log_path, stderr_log_path
56
58
 
57
59
 
60
+ def terminate_process_group(process, *, grace_seconds: float = 10.0) -> None:
61
+ """SIGTERM then SIGKILL the worker's whole process group.
62
+
63
+ The torchrun *agent* and the GPU worker processes it spawns share the session
64
+ we start the agent in (``start_new_session=True``), so signalling the group id
65
+ reaps the agent and every worker at once — including the elastic agent, which
66
+ would otherwise respawn workers if we only killed them individually. A no-op if
67
+ the process already exited or the platform lacks process groups.
68
+ """
69
+ if process.poll() is not None:
70
+ return
71
+ pid = getattr(process, "pid", None)
72
+ if pid is None or not hasattr(os, "killpg"):
73
+ process.terminate()
74
+ return
75
+ try:
76
+ pgid = os.getpgid(pid)
77
+ except (ProcessLookupError, OSError):
78
+ return
79
+ try:
80
+ os.killpg(pgid, signal.SIGTERM)
81
+ except (ProcessLookupError, OSError):
82
+ return
83
+ try:
84
+ process.wait(timeout=grace_seconds)
85
+ except subprocess.TimeoutExpired:
86
+ try:
87
+ os.killpg(pgid, signal.SIGKILL)
88
+ except (ProcessLookupError, OSError):
89
+ pass
90
+ try:
91
+ process.wait(timeout=5.0)
92
+ except subprocess.TimeoutExpired:
93
+ pass
94
+
95
+
58
96
  def run_torchrun_worker(
59
97
  *,
60
98
  module: str,
@@ -79,6 +117,19 @@ def run_torchrun_worker(
79
117
  "--request-path",
80
118
  str(request_path),
81
119
  ]
120
+ # Run the agent in its own session so a single killpg reaps agent + workers
121
+ # (see terminate_process_group). A bare SIGTERM to *this* process would skip
122
+ # the finally block, so while the agent is alive we convert SIGTERM into a
123
+ # KeyboardInterrupt — but only from the main thread, where signal.signal is
124
+ # allowed; the original handler is restored in finally.
125
+ previous_sigterm = None
126
+ if threading.current_thread() is threading.main_thread():
127
+ def _raise_on_sigterm(signum, frame): # noqa: ANN001
128
+ raise KeyboardInterrupt
129
+ try:
130
+ previous_sigterm = signal.signal(signal.SIGTERM, _raise_on_sigterm)
131
+ except (ValueError, OSError):
132
+ previous_sigterm = None
82
133
  process = popen_factory(
83
134
  command,
84
135
  cwd=str(Path(__file__).resolve().parents[2]),
@@ -86,43 +137,52 @@ def run_torchrun_worker(
86
137
  stderr=subprocess.PIPE,
87
138
  text=True,
88
139
  bufsize=1,
140
+ start_new_session=True,
89
141
  )
90
- stdout_chunks: list[str] = []
91
- stderr_chunks: list[str] = []
92
- stdout_thread = threading.Thread(target=drain_stream_to_buffer, args=(process.stdout, stdout_chunks), daemon=True)
93
- stderr_thread = threading.Thread(target=drain_stream_to_buffer, args=(process.stderr, stderr_chunks), daemon=True)
94
- stdout_thread.start()
95
- stderr_thread.start()
96
- offsets: dict[Path, int] = {}
97
- while process.poll() is None:
142
+ try:
143
+ stdout_chunks: list[str] = []
144
+ stderr_chunks: list[str] = []
145
+ stdout_thread = threading.Thread(target=drain_stream_to_buffer, args=(process.stdout, stdout_chunks), daemon=True)
146
+ stderr_thread = threading.Thread(target=drain_stream_to_buffer, args=(process.stderr, stderr_chunks), daemon=True)
147
+ stdout_thread.start()
148
+ stderr_thread.start()
149
+ offsets: dict[Path, int] = {}
150
+ while process.poll() is None:
151
+ if progress_events_path is not None:
152
+ events, offsets = read_progress_events(progress_events_path, offsets=offsets)
153
+ for event in events:
154
+ emit_progress_event(event)
155
+ if progress_event_callback is not None:
156
+ progress_event_callback(event)
157
+ time.sleep(0.1)
98
158
  if progress_events_path is not None:
99
159
  events, offsets = read_progress_events(progress_events_path, offsets=offsets)
100
160
  for event in events:
101
161
  emit_progress_event(event)
102
162
  if progress_event_callback is not None:
103
163
  progress_event_callback(event)
104
- time.sleep(0.1)
105
- if progress_events_path is not None:
106
- events, offsets = read_progress_events(progress_events_path, offsets=offsets)
107
- for event in events:
108
- emit_progress_event(event)
109
- if progress_event_callback is not None:
110
- progress_event_callback(event)
111
- returncode = process.wait()
112
- stdout_thread.join(timeout=1.0)
113
- stderr_thread.join(timeout=1.0)
114
- stdout_text = "".join(stdout_chunks)
115
- stderr_text = "".join(stderr_chunks)
116
- stdout_log_path, stderr_log_path = write_worker_logs(module, output_dir, stdout_text, stderr_text)
117
- if returncode != 0:
118
- raise RuntimeError(
119
- f"{failure_title}.\n"
120
- f"See logs:\n"
121
- f"stdout: {stdout_log_path}\n"
122
- f"stderr: {stderr_log_path}\n"
123
- f"stdout:\n{stdout_text}\n"
124
- f"stderr:\n{stderr_text}"
125
- )
164
+ returncode = process.wait()
165
+ stdout_thread.join(timeout=1.0)
166
+ stderr_thread.join(timeout=1.0)
167
+ stdout_text = "".join(stdout_chunks)
168
+ stderr_text = "".join(stderr_chunks)
169
+ stdout_log_path, stderr_log_path = write_worker_logs(module, output_dir, stdout_text, stderr_text)
170
+ if returncode != 0:
171
+ raise RuntimeError(
172
+ f"{failure_title}.\n"
173
+ f"See logs:\n"
174
+ f"stdout: {stdout_log_path}\n"
175
+ f"stderr: {stderr_log_path}\n"
176
+ f"stdout:\n{stdout_text}\n"
177
+ f"stderr:\n{stderr_text}"
178
+ )
179
+ finally:
180
+ # On any early exit (Ctrl-C, converted SIGTERM, RuntimeError) reap the
181
+ # whole worker group so no orphaned agent/workers keep holding the GPUs.
182
+ # No-op on the normal path: the agent has already exited.
183
+ terminate_process_group(process)
184
+ if previous_sigterm is not None:
185
+ signal.signal(signal.SIGTERM, previous_sigterm)
126
186
 
127
187
 
128
188
  def assign_slides_to_ranks(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 4.5.0
3
+ Version: 4.5.2
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Author-email: Clément Grisi <clement.grisi@radboudumc.nl>
6
6
  License-Expression: Apache-2.0
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5
18
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.6
19
19
  Requires-Dist: omegaconf
20
20
  Requires-Dist: matplotlib
21
21
  Requires-Dist: numpy<2
@@ -65,7 +65,7 @@ Requires-Dist: numpy<2; extra == "fm"
65
65
  Requires-Dist: pandas; extra == "fm"
66
66
  Requires-Dist: pillow; extra == "fm"
67
67
  Requires-Dist: rich; extra == "fm"
68
- Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5; extra == "fm"
68
+ Requires-Dist: hs2p[asap,cucim,openslide,sam2,vips]>=4.0.6; extra == "fm"
69
69
  Requires-Dist: wandb; extra == "fm"
70
70
  Requires-Dist: torch<2.8,>=2.3; extra == "fm"
71
71
  Requires-Dist: torchvision>=0.18.0; extra == "fm"
@@ -1,4 +1,4 @@
1
- hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5
1
+ hs2p[asap,cucim,openslide,sam2,vips]>=4.0.6
2
2
  omegaconf
3
3
  matplotlib
4
4
  numpy<2
@@ -27,7 +27,7 @@ numpy<2
27
27
  pandas
28
28
  pillow
29
29
  rich
30
- hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5
30
+ hs2p[asap,cucim,openslide,sam2,vips]>=4.0.6
31
31
  wandb
32
32
  torch<2.8,>=2.3
33
33
  torchvision>=0.18.0
@@ -555,6 +555,15 @@ def test_read_tiling_progress_snapshot_summarizes_process_list(tmp_path: Path):
555
555
  assert snapshot.discovered_tiles == 10
556
556
 
557
557
 
558
+ def test_read_tiling_progress_snapshot_ignores_transient_empty_process_list(tmp_path: Path):
559
+ import slide2vec.progress as progress
560
+
561
+ process_list_path = tmp_path / "process_list.csv"
562
+ process_list_path.write_text("", encoding="utf-8")
563
+
564
+ assert progress.read_tiling_progress_snapshot(process_list_path, expected_total=3) is None
565
+
566
+
558
567
  def test_build_direct_embed_worker_request_payload_includes_progress_events_path(tmp_path: Path):
559
568
  import slide2vec.inference as inference
560
569
 
@@ -675,6 +684,64 @@ def test_run_torchrun_worker_uses_standalone_rendezvous(monkeypatch, tmp_path: P
675
684
  assert "--rdzv-endpoint" not in " ".join(command)
676
685
 
677
686
 
687
+ def test_run_torchrun_worker_starts_new_session(monkeypatch, tmp_path: Path):
688
+ # The agent must run in its own session so terminate_process_group can reap
689
+ # the agent and every worker it spawns with a single killpg.
690
+ request_path = tmp_path / "request.json"
691
+ request_path.write_text("{}", encoding="utf-8")
692
+ output_dir = tmp_path / "output"
693
+ output_dir.mkdir()
694
+
695
+ observed = {}
696
+
697
+ class FakePopen:
698
+ def __init__(self, command, **kwargs):
699
+ observed["kwargs"] = kwargs
700
+ self.stdout = io.StringIO("")
701
+ self.stderr = io.StringIO("")
702
+
703
+ def poll(self):
704
+ return 0
705
+
706
+ def wait(self, timeout=None):
707
+ return 0
708
+
709
+ monkeypatch.setattr(distributed.time, "sleep", lambda _seconds: None)
710
+
711
+ distributed.run_torchrun_worker(
712
+ module="slide2vec.distributed.direct_embed_worker",
713
+ num_gpus=2,
714
+ output_dir=output_dir,
715
+ request_path=request_path,
716
+ failure_title="boom",
717
+ popen_factory=FakePopen,
718
+ )
719
+
720
+ assert observed["kwargs"].get("start_new_session") is True
721
+
722
+
723
+ def test_terminate_process_group_reaps_whole_group():
724
+ # A real grandchild in the same session must die when we tear down the group.
725
+ parent = subprocess.Popen(
726
+ [
727
+ sys.executable,
728
+ "-c",
729
+ "import subprocess, sys, time; "
730
+ "subprocess.Popen([sys.executable, '-c', 'import time; time.sleep(60)']); "
731
+ "time.sleep(60)",
732
+ ],
733
+ start_new_session=True,
734
+ )
735
+ import os
736
+
737
+ pgid = os.getpgid(parent.pid)
738
+ distributed.terminate_process_group(parent, grace_seconds=5.0)
739
+ assert parent.poll() is not None
740
+ # The whole group is gone: signalling it with 0 must raise.
741
+ with pytest.raises(ProcessLookupError):
742
+ os.killpg(pgid, 0)
743
+
744
+
678
745
  def test_reset_progress_event_logs_is_idempotent(tmp_path: Path):
679
746
  import slide2vec.runtime.distributed as distributed
680
747
 
File without changes
File without changes
File without changes
File without changes
File without changes