experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (154) hide show
  1. experimaestro/__init__.py +12 -5
  2. experimaestro/cli/__init__.py +393 -134
  3. experimaestro/cli/filter.py +48 -23
  4. experimaestro/cli/jobs.py +253 -71
  5. experimaestro/cli/refactor.py +1 -2
  6. experimaestro/commandline.py +7 -4
  7. experimaestro/connectors/__init__.py +9 -1
  8. experimaestro/connectors/local.py +43 -3
  9. experimaestro/core/arguments.py +18 -18
  10. experimaestro/core/identifier.py +11 -11
  11. experimaestro/core/objects/config.py +96 -39
  12. experimaestro/core/objects/config_walk.py +3 -3
  13. experimaestro/core/{subparameters.py → partial.py} +16 -16
  14. experimaestro/core/partial_lock.py +394 -0
  15. experimaestro/core/types.py +12 -15
  16. experimaestro/dynamic.py +290 -0
  17. experimaestro/experiments/__init__.py +6 -2
  18. experimaestro/experiments/cli.py +223 -52
  19. experimaestro/experiments/configuration.py +24 -0
  20. experimaestro/generators.py +5 -5
  21. experimaestro/ipc.py +118 -1
  22. experimaestro/launcherfinder/__init__.py +2 -2
  23. experimaestro/launcherfinder/registry.py +6 -7
  24. experimaestro/launcherfinder/specs.py +2 -9
  25. experimaestro/launchers/slurm/__init__.py +2 -2
  26. experimaestro/launchers/slurm/base.py +62 -0
  27. experimaestro/locking.py +957 -1
  28. experimaestro/notifications.py +89 -201
  29. experimaestro/progress.py +63 -366
  30. experimaestro/rpyc.py +0 -2
  31. experimaestro/run.py +29 -2
  32. experimaestro/scheduler/__init__.py +8 -1
  33. experimaestro/scheduler/base.py +650 -53
  34. experimaestro/scheduler/dependencies.py +20 -16
  35. experimaestro/scheduler/experiment.py +764 -169
  36. experimaestro/scheduler/interfaces.py +338 -96
  37. experimaestro/scheduler/jobs.py +58 -20
  38. experimaestro/scheduler/remote/__init__.py +31 -0
  39. experimaestro/scheduler/remote/adaptive_sync.py +265 -0
  40. experimaestro/scheduler/remote/client.py +928 -0
  41. experimaestro/scheduler/remote/protocol.py +282 -0
  42. experimaestro/scheduler/remote/server.py +447 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +186 -35
  45. experimaestro/scheduler/state_provider.py +811 -2157
  46. experimaestro/scheduler/state_status.py +1247 -0
  47. experimaestro/scheduler/transient.py +31 -0
  48. experimaestro/scheduler/workspace.py +1 -1
  49. experimaestro/scheduler/workspace_state_provider.py +1273 -0
  50. experimaestro/scriptbuilder.py +4 -4
  51. experimaestro/settings.py +36 -0
  52. experimaestro/tests/conftest.py +33 -5
  53. experimaestro/tests/connectors/bin/executable.py +1 -1
  54. experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
  55. experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
  56. experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
  57. experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
  58. experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
  59. experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
  60. experimaestro/tests/launchers/bin/test.py +1 -0
  61. experimaestro/tests/launchers/test_slurm.py +9 -9
  62. experimaestro/tests/partial_reschedule.py +46 -0
  63. experimaestro/tests/restart.py +3 -3
  64. experimaestro/tests/restart_main.py +1 -0
  65. experimaestro/tests/scripts/notifyandwait.py +1 -0
  66. experimaestro/tests/task_partial.py +38 -0
  67. experimaestro/tests/task_tokens.py +2 -2
  68. experimaestro/tests/tasks/test_dynamic.py +6 -6
  69. experimaestro/tests/test_dependencies.py +3 -3
  70. experimaestro/tests/test_deprecated.py +15 -15
  71. experimaestro/tests/test_dynamic_locking.py +317 -0
  72. experimaestro/tests/test_environment.py +24 -14
  73. experimaestro/tests/test_experiment.py +171 -36
  74. experimaestro/tests/test_identifier.py +25 -25
  75. experimaestro/tests/test_identifier_stability.py +3 -5
  76. experimaestro/tests/test_multitoken.py +2 -4
  77. experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
  78. experimaestro/tests/test_partial_paths.py +81 -138
  79. experimaestro/tests/test_pre_experiment.py +219 -0
  80. experimaestro/tests/test_progress.py +2 -8
  81. experimaestro/tests/test_remote_state.py +1132 -0
  82. experimaestro/tests/test_stray_jobs.py +261 -0
  83. experimaestro/tests/test_tasks.py +1 -2
  84. experimaestro/tests/test_token_locking.py +52 -67
  85. experimaestro/tests/test_tokens.py +5 -6
  86. experimaestro/tests/test_transient.py +225 -0
  87. experimaestro/tests/test_workspace_state_provider.py +768 -0
  88. experimaestro/tests/token_reschedule.py +1 -3
  89. experimaestro/tests/utils.py +2 -7
  90. experimaestro/tokens.py +227 -372
  91. experimaestro/tools/diff.py +1 -0
  92. experimaestro/tools/documentation.py +4 -5
  93. experimaestro/tools/jobs.py +1 -2
  94. experimaestro/tui/app.py +459 -1895
  95. experimaestro/tui/app.tcss +162 -0
  96. experimaestro/tui/dialogs.py +172 -0
  97. experimaestro/tui/log_viewer.py +253 -3
  98. experimaestro/tui/messages.py +137 -0
  99. experimaestro/tui/utils.py +54 -0
  100. experimaestro/tui/widgets/__init__.py +23 -0
  101. experimaestro/tui/widgets/experiments.py +468 -0
  102. experimaestro/tui/widgets/global_services.py +238 -0
  103. experimaestro/tui/widgets/jobs.py +972 -0
  104. experimaestro/tui/widgets/log.py +156 -0
  105. experimaestro/tui/widgets/orphans.py +363 -0
  106. experimaestro/tui/widgets/runs.py +185 -0
  107. experimaestro/tui/widgets/services.py +314 -0
  108. experimaestro/tui/widgets/stray_jobs.py +528 -0
  109. experimaestro/utils/__init__.py +1 -1
  110. experimaestro/utils/environment.py +105 -22
  111. experimaestro/utils/fswatcher.py +124 -0
  112. experimaestro/utils/jobs.py +1 -2
  113. experimaestro/utils/jupyter.py +1 -2
  114. experimaestro/utils/logging.py +72 -0
  115. experimaestro/version.py +2 -2
  116. experimaestro/webui/__init__.py +9 -0
  117. experimaestro/webui/app.py +117 -0
  118. experimaestro/{server → webui}/data/index.css +66 -11
  119. experimaestro/webui/data/index.css.map +1 -0
  120. experimaestro/{server → webui}/data/index.js +82763 -87217
  121. experimaestro/webui/data/index.js.map +1 -0
  122. experimaestro/webui/routes/__init__.py +5 -0
  123. experimaestro/webui/routes/auth.py +53 -0
  124. experimaestro/webui/routes/proxy.py +117 -0
  125. experimaestro/webui/server.py +200 -0
  126. experimaestro/webui/state_bridge.py +152 -0
  127. experimaestro/webui/websocket.py +413 -0
  128. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
  129. experimaestro-2.0.0b17.dist-info/RECORD +219 -0
  130. experimaestro/cli/progress.py +0 -269
  131. experimaestro/scheduler/state.py +0 -75
  132. experimaestro/scheduler/state_db.py +0 -388
  133. experimaestro/scheduler/state_sync.py +0 -834
  134. experimaestro/server/__init__.py +0 -467
  135. experimaestro/server/data/index.css.map +0 -1
  136. experimaestro/server/data/index.js.map +0 -1
  137. experimaestro/tests/test_cli_jobs.py +0 -615
  138. experimaestro/tests/test_file_progress.py +0 -425
  139. experimaestro/tests/test_file_progress_integration.py +0 -477
  140. experimaestro/tests/test_state_db.py +0 -434
  141. experimaestro-2.0.0b4.dist-info/RECORD +0 -181
  142. /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
  143. /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
  144. /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
  145. /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
  146. /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
  147. /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
  148. /experimaestro/{server → webui}/data/favicon.ico +0 -0
  149. /experimaestro/{server → webui}/data/index.html +0 -0
  150. /experimaestro/{server → webui}/data/login.html +0 -0
  151. /experimaestro/{server → webui}/data/manifest.json +0 -0
  152. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
  153. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
  154. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,528 @@
1
+ """Orphan jobs tab widget for the TUI
2
+
3
+ Displays orphan jobs: Jobs on disk not referenced by any experiment.
4
+ Running orphan jobs (stray) are highlighted differently and can be killed.
5
+ Non-running orphan jobs can be deleted.
6
+ """
7
+
8
+ import logging
9
+ from typing import Optional
10
+ from textual.app import ComposeResult
11
+ from textual.containers import Vertical, Horizontal
12
+ from textual.widgets import DataTable, Static, Button
13
+ from textual.binding import Binding
14
+
15
+ from experimaestro.scheduler.state_provider import StateProvider
16
+ from experimaestro.tui.utils import get_status_icon
17
+ from experimaestro.tui.dialogs import DeleteConfirmScreen, KillConfirmScreen
18
+ from experimaestro.tui.messages import SizeCalculated
19
+
20
+ logger = logging.getLogger("xpm.tui.orphan_jobs")
21
+
22
+
23
+ class OrphanJobsTab(Vertical):
24
+ """Tab widget for viewing and managing orphan jobs
25
+
26
+ Orphan jobs: Jobs on disk not referenced by any experiment.
27
+ - Running orphan jobs (stray) are shown in yellow and can be killed (ctrl+k)
28
+ - Non-running orphan jobs can be deleted (ctrl+d)
29
+ """
30
+
31
+ BINDINGS = [
32
+ Binding("r", "refresh", "Refresh"),
33
+ Binding("ctrl+d", "delete_selected", "Delete", show=False),
34
+ Binding("ctrl+k", "kill_selected", "Kill", show=False),
35
+ Binding("T", "sort_by_task", "Sort Task", show=False),
36
+ Binding("Z", "sort_by_size", "Sort Size", show=False),
37
+ ]
38
+
39
+ _size_cache: dict = {} # Class-level cache (formatted strings)
40
+ _size_bytes_cache: dict = {} # Class-level cache (raw bytes for sorting)
41
+
42
+ def __init__(self, state_provider: StateProvider) -> None:
43
+ super().__init__()
44
+ self.state_provider = state_provider
45
+ self.orphan_jobs = [] # All orphan jobs
46
+ self._pending_jobs = [] # Jobs waiting for size calculation
47
+ self._sort_column: Optional[str] = None
48
+ self._sort_reverse: bool = False
49
+
50
+ def compose(self) -> ComposeResult:
51
+ yield Static("", id="orphan-warning", classes="warning-banner hidden")
52
+ with Horizontal(id="orphan-controls", classes="controls-bar"):
53
+ yield Button("Refresh", id="orphan-refresh-btn")
54
+ yield Button("Kill All", id="orphan-kill-all-btn", variant="error")
55
+ yield Button("Delete All", id="orphan-delete-all-btn", variant="warning")
56
+ yield DataTable(id="orphan-table", cursor_type="row")
57
+ yield Static("", id="orphan-job-info")
58
+
59
+ def on_mount(self) -> None:
60
+ """Initialize the orphan jobs table"""
61
+ table = self.query_one("#orphan-table", DataTable)
62
+ table.add_column("", key="status", width=3)
63
+ table.add_column("Job ID", key="job_id", width=10)
64
+ table.add_column("Task", key="task")
65
+ table.add_column("Size", key="size", width=10)
66
+ self.refresh_orphan_jobs()
67
+
68
+ def on_button_pressed(self, event: Button.Pressed) -> None:
69
+ """Handle button presses"""
70
+ if event.button.id == "orphan-refresh-btn":
71
+ self.action_refresh()
72
+ elif event.button.id == "orphan-kill-all-btn":
73
+ self.action_kill_all()
74
+ elif event.button.id == "orphan-delete-all-btn":
75
+ self.action_delete_all()
76
+
77
+ def action_sort_by_task(self) -> None:
78
+ """Sort by task name"""
79
+ if self._sort_column == "task":
80
+ self._sort_reverse = not self._sort_reverse
81
+ else:
82
+ self._sort_column = "task"
83
+ self._sort_reverse = False
84
+ self._rebuild_table()
85
+ order = "desc" if self._sort_reverse else "asc"
86
+ self.notify(f"Sorted by task ({order})", severity="information")
87
+
88
+ def action_sort_by_size(self) -> None:
89
+ """Sort by size"""
90
+ if self._sort_column == "size":
91
+ self._sort_reverse = not self._sort_reverse
92
+ else:
93
+ self._sort_column = "size"
94
+ self._sort_reverse = True # Default: largest first
95
+ self._rebuild_table()
96
+ order = "largest first" if self._sort_reverse else "smallest first"
97
+ self.notify(f"Sorted by size ({order})", severity="information")
98
+
99
+ def refresh_orphan_jobs(self) -> None:
100
+ """Refresh the orphan jobs list"""
101
+ # Check if remote provider
102
+ if self.state_provider.is_remote:
103
+ self._show_warning(
104
+ "Orphan job detection not available for remote workspaces"
105
+ )
106
+ return
107
+
108
+ # Get all orphan jobs (only those with existing folders)
109
+ all_orphans = self.state_provider.get_orphan_jobs()
110
+ self.orphan_jobs = [j for j in all_orphans if j.path and j.path.exists()]
111
+
112
+ # Count running jobs
113
+ running_count = sum(
114
+ 1 for j in self.orphan_jobs if j.state and j.state.running()
115
+ )
116
+
117
+ # Update warning based on scheduler status
118
+ self._update_scheduler_warning(running_count)
119
+
120
+ # Update tab title in parent app
121
+ self._update_tab_title()
122
+
123
+ # Collect jobs needing size calculation
124
+ self._pending_jobs = [
125
+ j for j in self.orphan_jobs if j.identifier not in self._size_cache
126
+ ]
127
+
128
+ # Rebuild table
129
+ self._rebuild_table()
130
+
131
+ # Start calculating sizes
132
+ if self._pending_jobs:
133
+ self._calculate_next_size()
134
+
135
+ def _update_scheduler_warning(self, running_count: int) -> None:
136
+ """Update warning banner based on scheduler status"""
137
+ warning = self.query_one("#orphan-warning", Static)
138
+
139
+ # Check if any experiments are running (ended_at is None means still running)
140
+ running_experiments = [
141
+ e
142
+ for e in self.state_provider.get_experiments()
143
+ if e.run_id and getattr(e, "ended_at", None) is None
144
+ ]
145
+
146
+ if running_experiments or self.state_provider.is_live:
147
+ warning.update(
148
+ "WARNING: At least one experiment is running. "
149
+ "Killing stray jobs or deleting orphans may cause issues!"
150
+ )
151
+ warning.remove_class("hidden")
152
+ elif running_count > 0:
153
+ warning.update(
154
+ f"{running_count} orphan jobs are still running (stray). "
155
+ "Use ctrl+k to kill them."
156
+ )
157
+ warning.remove_class("hidden")
158
+ else:
159
+ warning.add_class("hidden")
160
+
161
+ def _show_warning(self, message: str) -> None:
162
+ """Show a warning message"""
163
+ warning = self.query_one("#orphan-warning", Static)
164
+ warning.update(f"{message}")
165
+ warning.remove_class("hidden")
166
+
167
+ def _update_tab_title(self) -> None:
168
+ """Update the tab title with orphan job count"""
169
+ try:
170
+ self.app.update_orphan_tab_title()
171
+ except Exception:
172
+ pass
173
+
174
+ @property
175
+ def orphan_count(self) -> int:
176
+ """Number of all orphan jobs"""
177
+ return len(self.orphan_jobs)
178
+
179
+ @property
180
+ def running_count(self) -> int:
181
+ """Number of running orphan jobs (stray)"""
182
+ return sum(1 for j in self.orphan_jobs if j.state and j.state.running())
183
+
184
+ @property
185
+ def finished_count(self) -> int:
186
+ """Number of non-running orphan jobs"""
187
+ return len(self.orphan_jobs) - self.running_count
188
+
189
+ def _get_sorted_jobs(self):
190
+ """Return jobs sorted by current sort column"""
191
+ jobs = self.orphan_jobs[:]
192
+ if self._sort_column == "task":
193
+ jobs.sort(key=lambda j: j.task_id or "", reverse=self._sort_reverse)
194
+ elif self._sort_column == "size":
195
+ # Sort by raw bytes, jobs not in cache go to end
196
+ jobs.sort(
197
+ key=lambda j: self._size_bytes_cache.get(j.identifier, -1),
198
+ reverse=self._sort_reverse,
199
+ )
200
+ return jobs
201
+
202
+ def _rebuild_table(self) -> None:
203
+ """Rebuild the table with current sort order"""
204
+ from rich.text import Text
205
+
206
+ table = self.query_one("#orphan-table", DataTable)
207
+ table.clear()
208
+
209
+ for job in self._get_sorted_jobs():
210
+ failure_reason = getattr(job, "failure_reason", None)
211
+ transient = getattr(job, "transient", None)
212
+ status_icon = get_status_icon(
213
+ job.state.name if job.state else "unknown", failure_reason, transient
214
+ )
215
+
216
+ # Use different styling for running vs finished jobs
217
+ is_running = job.state and job.state.running()
218
+ if is_running:
219
+ # Running jobs (stray) in yellow/bold
220
+ job_id_text = Text(job.identifier[:7], style="bold yellow")
221
+ task_text = Text(job.task_id or "", style="yellow")
222
+ else:
223
+ # Finished jobs in normal style
224
+ job_id_text = Text(job.identifier[:7])
225
+ task_text = Text(job.task_id or "")
226
+
227
+ if job.identifier in self._size_cache:
228
+ size_text = self._size_cache[job.identifier]
229
+ else:
230
+ size_text = "waiting"
231
+
232
+ table.add_row(
233
+ status_icon,
234
+ job_id_text,
235
+ task_text,
236
+ size_text,
237
+ key=job.identifier,
238
+ )
239
+
240
+ def _calculate_next_size(self) -> None:
241
+ """Calculate size for the next pending job using a worker"""
242
+ if not self._pending_jobs:
243
+ return
244
+
245
+ job = self._pending_jobs.pop(0)
246
+ # Update to "calc..."
247
+ self._update_size_cell(job.identifier, "calc...")
248
+ # Run calculation in worker thread
249
+ self.run_worker(
250
+ self._calc_size_worker(job.identifier, job.path),
251
+ thread=True,
252
+ )
253
+
254
+ async def _calc_size_worker(self, job_id: str, path):
255
+ """Worker to calculate folder size"""
256
+ size_bytes = await self._get_folder_size_async(path)
257
+ size_str = self._format_size(size_bytes)
258
+ self._size_cache[job_id] = size_str
259
+ self._size_bytes_cache[job_id] = size_bytes
260
+ self.post_message(SizeCalculated(job_id, size_str, size_bytes))
261
+
262
+ def on_size_calculated(self, message: SizeCalculated) -> None:
263
+ """Handle size calculation completion"""
264
+ self._size_bytes_cache[message.job_id] = message.size_bytes
265
+ self._update_size_cell(message.job_id, message.size)
266
+ # Calculate next one
267
+ self._calculate_next_size()
268
+
269
+ @staticmethod
270
+ async def _get_folder_size_async(path) -> int:
271
+ """Calculate total size of a folder using du command if available"""
272
+ import asyncio
273
+ import shutil
274
+ import sys
275
+
276
+ # Try using du command for better performance
277
+ if shutil.which("du"):
278
+ try:
279
+ if sys.platform == "darwin":
280
+ # macOS: du -sk gives size in KB
281
+ proc = await asyncio.create_subprocess_exec(
282
+ "du",
283
+ "-sk",
284
+ str(path),
285
+ stdout=asyncio.subprocess.PIPE,
286
+ stderr=asyncio.subprocess.DEVNULL,
287
+ )
288
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30)
289
+ if proc.returncode == 0 and stdout:
290
+ # Output format: "SIZE\tPATH"
291
+ size_kb = int(stdout.decode().split()[0])
292
+ return size_kb * 1024
293
+ else:
294
+ # Linux: du -sb gives size in bytes
295
+ proc = await asyncio.create_subprocess_exec(
296
+ "du",
297
+ "-sb",
298
+ str(path),
299
+ stdout=asyncio.subprocess.PIPE,
300
+ stderr=asyncio.subprocess.DEVNULL,
301
+ )
302
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30)
303
+ if proc.returncode == 0 and stdout:
304
+ # Output format: "SIZE\tPATH"
305
+ return int(stdout.decode().split()[0])
306
+ except (asyncio.TimeoutError, ValueError, IndexError, OSError):
307
+ pass # Fall back to Python implementation
308
+
309
+ # Fallback: Python implementation
310
+ return OrphanJobsTab._get_folder_size_sync(path)
311
+
312
+ @staticmethod
313
+ def _get_folder_size_sync(path) -> int:
314
+ """Calculate total size of a folder using Python (fallback)"""
315
+ total = 0
316
+ try:
317
+ for entry in path.rglob("*"):
318
+ if entry.is_file():
319
+ total += entry.stat().st_size
320
+ except (OSError, PermissionError):
321
+ pass
322
+ return total
323
+
324
+ @staticmethod
325
+ def _format_size(size: int) -> str:
326
+ """Format size in human-readable format"""
327
+ for unit in ["B", "KB", "MB", "GB"]:
328
+ if size < 1024:
329
+ return f"{size:.1f}{unit}" if unit != "B" else f"{size}{unit}"
330
+ size /= 1024
331
+ return f"{size:.1f}TB"
332
+
333
+ def _update_size_cell(self, job_id: str, value: str = None) -> None:
334
+ """Update the size cell for a job"""
335
+ try:
336
+ table = self.query_one("#orphan-table", DataTable)
337
+ size_text = (
338
+ value if value is not None else self._size_cache.get(job_id, "-")
339
+ )
340
+ table.update_cell(job_id, "size", size_text)
341
+ except Exception:
342
+ pass # Table may have changed
343
+
344
+ def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
345
+ """Show job details when a row is selected"""
346
+ self._update_job_info()
347
+
348
+ def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
349
+ """Show job details when cursor moves"""
350
+ self._update_job_info()
351
+
352
+ def _update_job_info(self) -> None:
353
+ """Update the job info display"""
354
+ table = self.query_one("#orphan-table", DataTable)
355
+ info = self.query_one("#orphan-job-info", Static)
356
+
357
+ if table.cursor_row is None:
358
+ info.update("")
359
+ return
360
+
361
+ try:
362
+ row_key = list(table.rows.keys())[table.cursor_row]
363
+ except IndexError:
364
+ info.update("")
365
+ return
366
+
367
+ if row_key:
368
+ job_id = str(row_key.value)
369
+ job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
370
+ if job and job.path:
371
+ size = self._size_cache.get(job.identifier, "calculating...")
372
+ state = job.state.name if job.state else "unknown"
373
+ is_running = job.state and job.state.running()
374
+ hint = "(ctrl+k to kill)" if is_running else "(ctrl+d to delete)"
375
+ info.update(
376
+ f"Path: {job.path} | Size: {size} | State: {state} {hint}"
377
+ )
378
+ else:
379
+ info.update("")
380
+
381
+ def _get_selected_job(self):
382
+ """Get the currently selected job"""
383
+ table = self.query_one("#orphan-table", DataTable)
384
+ if table.cursor_row is None:
385
+ return None
386
+
387
+ try:
388
+ row_key = list(table.rows.keys())[table.cursor_row]
389
+ except IndexError:
390
+ return None
391
+
392
+ if row_key:
393
+ job_id = str(row_key.value)
394
+ return next((j for j in self.orphan_jobs if j.identifier == job_id), None)
395
+ return None
396
+
397
+ def action_refresh(self) -> None:
398
+ """Refresh the orphan jobs list"""
399
+ self.refresh_orphan_jobs()
400
+ self.notify("Refreshed orphan jobs list", severity="information")
401
+
402
+ def action_delete_selected(self) -> None:
403
+ """Delete the selected orphan job (if not running)"""
404
+ job = self._get_selected_job()
405
+ if not job:
406
+ return
407
+
408
+ if job.state and job.state.running():
409
+ self.notify(
410
+ "Cannot delete a running job - kill it first (ctrl+k)",
411
+ severity="warning",
412
+ )
413
+ return
414
+
415
+ self._delete_job(job)
416
+
417
+ def action_kill_selected(self) -> None:
418
+ """Kill the selected running orphan job"""
419
+ job = self._get_selected_job()
420
+ if not job:
421
+ return
422
+
423
+ if not job.state or not job.state.running():
424
+ self.notify("Job is not running", severity="warning")
425
+ return
426
+
427
+ self._kill_job(job)
428
+
429
+ def _delete_job(self, job) -> None:
430
+ """Delete a single orphan job with confirmation"""
431
+
432
+ def handle_delete(confirmed: bool) -> None:
433
+ if confirmed:
434
+ success, msg = self.state_provider.delete_job_safely(job)
435
+ if success:
436
+ self.notify(msg, severity="information")
437
+ self.refresh_orphan_jobs()
438
+ else:
439
+ self.notify(msg, severity="error")
440
+
441
+ self.app.push_screen(
442
+ DeleteConfirmScreen("orphan job", job.identifier),
443
+ handle_delete,
444
+ )
445
+
446
+ def _kill_job(self, job) -> None:
447
+ """Kill a running orphan job with confirmation"""
448
+
449
+ def handle_kill(confirmed: bool) -> None:
450
+ if confirmed:
451
+ success = self.state_provider.kill_job(job, perform=True)
452
+ if success:
453
+ self.notify(f"Job {job.identifier} killed", severity="information")
454
+ self.refresh_orphan_jobs()
455
+ else:
456
+ self.notify("Failed to kill job", severity="error")
457
+
458
+ self.app.push_screen(
459
+ KillConfirmScreen("orphan job", job.identifier),
460
+ handle_kill,
461
+ )
462
+
463
+ def action_kill_all(self) -> None:
464
+ """Kill all running orphan jobs"""
465
+ running_jobs = [j for j in self.orphan_jobs if j.state and j.state.running()]
466
+
467
+ if not running_jobs:
468
+ self.notify("No running orphan jobs to kill", severity="warning")
469
+ return
470
+
471
+ def handle_kill_all(confirmed: bool) -> None:
472
+ if confirmed:
473
+ killed = 0
474
+ for job in running_jobs:
475
+ if self.state_provider.kill_job(job, perform=True):
476
+ killed += 1
477
+
478
+ self.notify(
479
+ f"Killed {killed} of {len(running_jobs)} running jobs",
480
+ severity="information",
481
+ )
482
+ self.refresh_orphan_jobs()
483
+
484
+ self.app.push_screen(
485
+ KillConfirmScreen("all running orphan jobs", f"{len(running_jobs)} jobs"),
486
+ handle_kill_all,
487
+ )
488
+
489
+ def action_delete_all(self) -> None:
490
+ """Delete all non-running orphan jobs"""
491
+ deletable_jobs = [
492
+ j for j in self.orphan_jobs if not j.state or not j.state.running()
493
+ ]
494
+
495
+ if not deletable_jobs:
496
+ self.notify(
497
+ "No deletable orphan jobs (all are running)", severity="warning"
498
+ )
499
+ return
500
+
501
+ def handle_delete_all(confirmed: bool) -> None:
502
+ if confirmed:
503
+ deleted = 0
504
+ for job in deletable_jobs:
505
+ success, _ = self.state_provider.delete_job_safely(
506
+ job, cascade_orphans=False
507
+ )
508
+ if success:
509
+ deleted += 1
510
+
511
+ # Clean up orphan partials once at the end
512
+ self.state_provider.cleanup_orphan_partials(perform=True)
513
+
514
+ self.notify(f"Deleted {deleted} orphan jobs", severity="information")
515
+ self.refresh_orphan_jobs()
516
+
517
+ self.app.push_screen(
518
+ DeleteConfirmScreen(
519
+ "all finished orphan jobs",
520
+ f"{len(deletable_jobs)} jobs",
521
+ "This action cannot be undone",
522
+ ),
523
+ handle_delete_all,
524
+ )
525
+
526
+
527
+ # Keep old name for backwards compatibility
528
+ StrayJobsTab = OrphanJobsTab
@@ -6,7 +6,7 @@ import logging
6
6
  import shutil
7
7
  import inspect
8
8
 
9
- logger = logging.getLogger("xpm")
9
+ logger = logging.getLogger("xpm.scheduler")
10
10
 
11
11
 
12
12
  def get_caller_location(skip_frames: int = 1) -> str: