experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (122) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +278 -7
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/refactor.py +249 -0
  7. experimaestro/click.py +0 -1
  8. experimaestro/commandline.py +19 -3
  9. experimaestro/connectors/__init__.py +20 -1
  10. experimaestro/connectors/local.py +12 -0
  11. experimaestro/core/arguments.py +182 -46
  12. experimaestro/core/identifier.py +107 -6
  13. experimaestro/core/objects/__init__.py +6 -0
  14. experimaestro/core/objects/config.py +542 -25
  15. experimaestro/core/objects/config_walk.py +20 -0
  16. experimaestro/core/serialization.py +91 -34
  17. experimaestro/core/subparameters.py +164 -0
  18. experimaestro/core/types.py +175 -38
  19. experimaestro/exceptions.py +26 -0
  20. experimaestro/experiments/cli.py +111 -25
  21. experimaestro/generators.py +50 -9
  22. experimaestro/huggingface.py +3 -1
  23. experimaestro/launcherfinder/parser.py +29 -0
  24. experimaestro/launchers/__init__.py +26 -1
  25. experimaestro/launchers/direct.py +12 -0
  26. experimaestro/launchers/slurm/base.py +154 -2
  27. experimaestro/mkdocs/metaloader.py +0 -1
  28. experimaestro/mypy.py +452 -7
  29. experimaestro/notifications.py +63 -13
  30. experimaestro/progress.py +0 -2
  31. experimaestro/rpyc.py +0 -1
  32. experimaestro/run.py +19 -6
  33. experimaestro/scheduler/base.py +510 -125
  34. experimaestro/scheduler/dependencies.py +43 -28
  35. experimaestro/scheduler/dynamic_outputs.py +259 -130
  36. experimaestro/scheduler/experiment.py +256 -31
  37. experimaestro/scheduler/interfaces.py +501 -0
  38. experimaestro/scheduler/jobs.py +216 -206
  39. experimaestro/scheduler/remote/__init__.py +31 -0
  40. experimaestro/scheduler/remote/client.py +874 -0
  41. experimaestro/scheduler/remote/protocol.py +467 -0
  42. experimaestro/scheduler/remote/server.py +423 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +323 -23
  45. experimaestro/scheduler/state_db.py +437 -0
  46. experimaestro/scheduler/state_provider.py +2766 -0
  47. experimaestro/scheduler/state_sync.py +891 -0
  48. experimaestro/scheduler/workspace.py +52 -10
  49. experimaestro/scriptbuilder.py +7 -0
  50. experimaestro/server/__init__.py +147 -57
  51. experimaestro/server/data/index.css +0 -125
  52. experimaestro/server/data/index.css.map +1 -1
  53. experimaestro/server/data/index.js +194 -58
  54. experimaestro/server/data/index.js.map +1 -1
  55. experimaestro/settings.py +44 -5
  56. experimaestro/sphinx/__init__.py +3 -3
  57. experimaestro/taskglobals.py +20 -0
  58. experimaestro/tests/conftest.py +80 -0
  59. experimaestro/tests/core/test_generics.py +2 -2
  60. experimaestro/tests/identifier_stability.json +45 -0
  61. experimaestro/tests/launchers/bin/sacct +6 -2
  62. experimaestro/tests/launchers/bin/sbatch +4 -2
  63. experimaestro/tests/launchers/test_slurm.py +80 -0
  64. experimaestro/tests/tasks/test_dynamic.py +231 -0
  65. experimaestro/tests/test_cli_jobs.py +615 -0
  66. experimaestro/tests/test_deprecated.py +630 -0
  67. experimaestro/tests/test_environment.py +200 -0
  68. experimaestro/tests/test_file_progress_integration.py +1 -1
  69. experimaestro/tests/test_forward.py +3 -3
  70. experimaestro/tests/test_identifier.py +372 -41
  71. experimaestro/tests/test_identifier_stability.py +458 -0
  72. experimaestro/tests/test_instance.py +3 -3
  73. experimaestro/tests/test_multitoken.py +442 -0
  74. experimaestro/tests/test_mypy.py +433 -0
  75. experimaestro/tests/test_objects.py +312 -5
  76. experimaestro/tests/test_outputs.py +2 -2
  77. experimaestro/tests/test_param.py +8 -12
  78. experimaestro/tests/test_partial_paths.py +231 -0
  79. experimaestro/tests/test_progress.py +0 -48
  80. experimaestro/tests/test_remote_state.py +671 -0
  81. experimaestro/tests/test_resumable_task.py +480 -0
  82. experimaestro/tests/test_serializers.py +141 -1
  83. experimaestro/tests/test_state_db.py +434 -0
  84. experimaestro/tests/test_subparameters.py +160 -0
  85. experimaestro/tests/test_tags.py +136 -0
  86. experimaestro/tests/test_tasks.py +107 -121
  87. experimaestro/tests/test_token_locking.py +252 -0
  88. experimaestro/tests/test_tokens.py +17 -13
  89. experimaestro/tests/test_types.py +123 -1
  90. experimaestro/tests/test_workspace_triggers.py +158 -0
  91. experimaestro/tests/token_reschedule.py +4 -2
  92. experimaestro/tests/utils.py +2 -2
  93. experimaestro/tokens.py +154 -57
  94. experimaestro/tools/diff.py +1 -1
  95. experimaestro/tui/__init__.py +8 -0
  96. experimaestro/tui/app.py +2395 -0
  97. experimaestro/tui/app.tcss +353 -0
  98. experimaestro/tui/log_viewer.py +228 -0
  99. experimaestro/utils/__init__.py +23 -0
  100. experimaestro/utils/environment.py +148 -0
  101. experimaestro/utils/git.py +129 -0
  102. experimaestro/utils/resources.py +1 -1
  103. experimaestro/version.py +34 -0
  104. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +68 -38
  105. experimaestro-2.0.0b8.dist-info/RECORD +187 -0
  106. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +1 -1
  107. experimaestro-2.0.0b8.dist-info/entry_points.txt +16 -0
  108. experimaestro/compat.py +0 -6
  109. experimaestro/core/objects.pyi +0 -221
  110. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  111. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  112. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  113. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  114. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  115. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  116. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  117. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  118. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  119. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  120. experimaestro-2.0.0a8.dist-info/RECORD +0 -166
  121. experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
  122. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,2395 @@
1
+ """Main Textual TUI application for experiment monitoring"""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Optional
6
+ from textual.app import App, ComposeResult
7
+ from textual import work
8
+ from textual.containers import Container, Horizontal, Vertical
9
+ from textual.widgets import (
10
+ Header,
11
+ Footer,
12
+ DataTable,
13
+ Label,
14
+ TabbedContent,
15
+ TabPane,
16
+ RichLog,
17
+ Button,
18
+ Static,
19
+ Input,
20
+ )
21
+ from textual.widget import Widget
22
+ from textual.reactive import reactive
23
+ from textual.binding import Binding
24
+ from textual.message import Message
25
+ from textual.screen import ModalScreen, Screen
26
+ from textual import events
27
+ from rich.text import Text
28
+ from experimaestro.scheduler.state_provider import (
29
+ WorkspaceStateProvider,
30
+ StateEvent,
31
+ StateEventType,
32
+ )
33
+ from experimaestro.tui.log_viewer import LogViewerScreen
34
+
35
+
36
+ def format_duration(seconds: float) -> str:
37
+ """Format duration in seconds to human-readable string"""
38
+ if seconds < 0:
39
+ return "-"
40
+ seconds = int(seconds)
41
+ if seconds < 60:
42
+ return f"{seconds}s"
43
+ elif seconds < 3600:
44
+ return f"{seconds // 60}m {seconds % 60}s"
45
+ elif seconds < 86400:
46
+ return f"{seconds // 3600}h {(seconds % 3600) // 60}m"
47
+ else:
48
+ return f"{seconds // 86400}d {(seconds % 86400) // 3600}h"
49
+
50
+
51
+ class QuitConfirmScreen(ModalScreen[bool]):
52
+ """Modal screen for quit confirmation"""
53
+
54
+ def __init__(self, has_active_experiment: bool = False):
55
+ super().__init__()
56
+ self.has_active_experiment = has_active_experiment
57
+
58
+ def compose(self) -> ComposeResult:
59
+ with Vertical(id="quit-dialog"):
60
+ yield Static("Quit Experimaestro?", id="quit-title")
61
+
62
+ if self.has_active_experiment:
63
+ yield Static(
64
+ "⚠️ The experiment is still in progress.\n"
65
+ "Quitting will prevent new jobs from being launched.",
66
+ id="quit-warning",
67
+ )
68
+ else:
69
+ yield Static("Are you sure you want to quit?", id="quit-message")
70
+
71
+ with Horizontal(id="quit-buttons"):
72
+ yield Button("Quit", variant="error", id="quit-yes")
73
+ yield Button("Cancel", variant="primary", id="quit-no")
74
+
75
+ def on_button_pressed(self, event: Button.Pressed) -> None:
76
+ if event.button.id == "quit-yes":
77
+ self.dismiss(True)
78
+ else:
79
+ self.dismiss(False)
80
+
81
+
82
+ class DeleteConfirmScreen(ModalScreen[bool]):
83
+ """Modal screen for delete confirmation"""
84
+
85
+ def __init__(
86
+ self, item_type: str, item_name: str, warning: Optional[str] = None
87
+ ) -> None:
88
+ super().__init__()
89
+ self.item_type = item_type
90
+ self.item_name = item_name
91
+ self.warning = warning
92
+
93
+ def compose(self) -> ComposeResult:
94
+ with Vertical(id="delete-dialog"):
95
+ yield Static(f"Delete {self.item_type}?", id="delete-title")
96
+ yield Static(
97
+ f"This will permanently delete: {self.item_name}", id="delete-message"
98
+ )
99
+
100
+ if self.warning:
101
+ yield Static(f"Warning: {self.warning}", id="delete-warning")
102
+
103
+ with Horizontal(id="delete-buttons"):
104
+ yield Button("Delete", variant="error", id="delete-yes")
105
+ yield Button("Cancel", variant="primary", id="delete-no")
106
+
107
+ def on_mount(self) -> None:
108
+ """Focus cancel button by default"""
109
+ self.query_one("#delete-no", Button).focus()
110
+
111
+ def on_button_pressed(self, event: Button.Pressed) -> None:
112
+ if event.button.id == "delete-yes":
113
+ self.dismiss(True)
114
+ else:
115
+ self.dismiss(False)
116
+
117
+
118
+ class KillConfirmScreen(ModalScreen[bool]):
119
+ """Modal screen for kill confirmation"""
120
+
121
+ def __init__(self, item_type: str, item_name: str) -> None:
122
+ super().__init__()
123
+ self.item_type = item_type
124
+ self.item_name = item_name
125
+
126
+ def compose(self) -> ComposeResult:
127
+ with Vertical(id="kill-dialog"):
128
+ yield Static(f"Kill {self.item_type}?", id="kill-title")
129
+ yield Static(f"This will terminate: {self.item_name}", id="kill-message")
130
+
131
+ with Horizontal(id="kill-buttons"):
132
+ yield Button("Kill", variant="warning", id="kill-yes")
133
+ yield Button("Cancel", variant="primary", id="kill-no")
134
+
135
+ def on_mount(self) -> None:
136
+ """Focus cancel button by default"""
137
+ self.query_one("#kill-no", Button).focus()
138
+
139
+ def on_button_pressed(self, event: Button.Pressed) -> None:
140
+ if event.button.id == "kill-yes":
141
+ self.dismiss(True)
142
+ else:
143
+ self.dismiss(False)
144
+
145
+
146
+ def get_status_icon(status: str, failure_reason=None):
147
+ """Get status icon for a job state.
148
+
149
+ Args:
150
+ status: Job state name (e.g., "done", "error", "running")
151
+ failure_reason: Optional JobFailureStatus enum for error states
152
+
153
+ Returns:
154
+ Status icon string
155
+ """
156
+ if status == "done":
157
+ return "✓"
158
+ elif status == "error":
159
+ # Show different icons for different failure types
160
+ if failure_reason is not None:
161
+ from experimaestro.scheduler.interfaces import JobFailureStatus
162
+
163
+ if failure_reason == JobFailureStatus.DEPENDENCY:
164
+ return "🔗" # Dependency failed
165
+ elif failure_reason == JobFailureStatus.TIMEOUT:
166
+ return "⏱" # Timeout
167
+ elif failure_reason == JobFailureStatus.MEMORY:
168
+ return "💾" # Memory issue
169
+ # FAILED or unknown - use default error icon
170
+ return "❌"
171
+ elif status == "running":
172
+ return "▶"
173
+ elif status == "waiting":
174
+ return "⌛" # Waiting for dependencies
175
+ else:
176
+ # phantom, unscheduled or unknown
177
+ return "👻"
178
+
179
+
180
+ class CaptureLog(RichLog):
181
+ """Custom RichLog widget that captures print statements with log highlighting"""
182
+
183
+ def on_mount(self) -> None:
184
+ """Enable print capturing when widget is mounted"""
185
+ self.begin_capture_print()
186
+
187
+ def on_unmount(self) -> None:
188
+ """Stop print capturing when widget is unmounted"""
189
+ self.end_capture_print()
190
+
191
+ def _format_log_line(self, text: str) -> Text:
192
+ """Format a log line with appropriate styling based on log level"""
193
+ result = Text()
194
+
195
+ # Check for common log level patterns
196
+ if text.startswith("ERROR:") or ":ERROR:" in text:
197
+ result.append(text, style="bold red")
198
+ elif text.startswith("WARNING:") or ":WARNING:" in text:
199
+ result.append(text, style="yellow")
200
+ elif text.startswith("INFO:") or ":INFO:" in text:
201
+ result.append(text, style="green")
202
+ elif text.startswith("DEBUG:") or ":DEBUG:" in text:
203
+ result.append(text, style="dim")
204
+ elif text.startswith("CRITICAL:") or ":CRITICAL:" in text:
205
+ result.append(text, style="bold white on red")
206
+ else:
207
+ result.append(text)
208
+
209
+ return result
210
+
211
+ def on_print(self, event: events.Print) -> None:
212
+ """Handle print events from captured stdout/stderr"""
213
+ if text := event.text.strip():
214
+ self.write(self._format_log_line(text))
215
+
216
+
217
+ class ExperimentsList(Widget):
218
+ """Widget displaying list of experiments"""
219
+
220
+ BINDINGS = [
221
+ Binding("d", "delete_experiment", "Delete", show=False),
222
+ Binding("k", "kill_experiment", "Kill", show=False),
223
+ ]
224
+
225
+ current_experiment: reactive[Optional[str]] = reactive(None)
226
+ collapsed: reactive[bool] = reactive(False)
227
+
228
+ def __init__(self, state_provider: WorkspaceStateProvider) -> None:
229
+ super().__init__()
230
+ self.state_provider = state_provider
231
+ self.experiments = []
232
+
233
+ def _get_selected_experiment_id(self) -> Optional[str]:
234
+ """Get the experiment ID from the currently selected row"""
235
+ table = self.query_one("#experiments-table", DataTable)
236
+ if table.cursor_row is None:
237
+ return None
238
+ row_key = list(table.rows.keys())[table.cursor_row]
239
+ if row_key:
240
+ return str(row_key.value)
241
+ return None
242
+
243
+ def action_delete_experiment(self) -> None:
244
+ """Request to delete the selected experiment"""
245
+ exp_id = self._get_selected_experiment_id()
246
+ if exp_id:
247
+ self.post_message(DeleteExperimentRequest(exp_id))
248
+
249
+ def action_kill_experiment(self) -> None:
250
+ """Request to kill all running jobs in the selected experiment"""
251
+ exp_id = self._get_selected_experiment_id()
252
+ if exp_id:
253
+ self.post_message(KillExperimentRequest(exp_id))
254
+
255
+ def compose(self) -> ComposeResult:
256
+ # Collapsed header (hidden initially)
257
+ with Horizontal(id="collapsed-header", classes="hidden"):
258
+ yield Label("", id="collapsed-experiment-info")
259
+
260
+ # Full experiments table
261
+ with Container(id="experiments-table-container"):
262
+ yield Label("Experiments", classes="section-title")
263
+ yield DataTable(id="experiments-table", cursor_type="row")
264
+
265
+ def on_mount(self) -> None:
266
+ """Initialize the experiments table"""
267
+ table = self.query_one("#experiments-table", DataTable)
268
+ table.add_column("ID", key="id")
269
+ table.add_column("Host", key="host")
270
+ table.add_column("Jobs", key="jobs")
271
+ table.add_column("Status", key="status")
272
+ table.add_column("Started", key="started")
273
+ table.add_column("Duration", key="duration")
274
+ self.refresh_experiments()
275
+
276
+ # If there's only one experiment, automatically select it
277
+ if len(self.experiments) == 1:
278
+ exp_id = self.experiments[0].experiment_id
279
+ self.current_experiment = exp_id
280
+ self.collapse_to_experiment(exp_id)
281
+ self.post_message(ExperimentSelected(exp_id))
282
+
283
+ def refresh_experiments(self) -> None:
284
+ """Refresh the experiments list from state provider"""
285
+ table = self.query_one("#experiments-table", DataTable)
286
+
287
+ try:
288
+ self.experiments = self.state_provider.get_experiments()
289
+ self.log.debug(
290
+ f"Refreshing experiments: found {len(self.experiments)} experiments"
291
+ )
292
+ except Exception as e:
293
+ self.log.error(f"ERROR refreshing experiments: {e}")
294
+ import traceback
295
+
296
+ self.log.error(traceback.format_exc())
297
+ self.experiments = []
298
+ return
299
+
300
+ # Get existing row keys
301
+ existing_keys = set(table.rows.keys())
302
+ current_exp_ids = set()
303
+
304
+ from datetime import datetime
305
+ import time as time_module
306
+
307
+ for exp in self.experiments:
308
+ exp_id = exp.experiment_id
309
+ current_exp_ids.add(exp_id)
310
+ total = exp.total_jobs
311
+ finished = exp.finished_jobs
312
+ failed = exp.failed_jobs
313
+
314
+ # Determine status
315
+ if failed > 0:
316
+ status = f"❌ {failed} failed"
317
+ elif finished == total and total > 0:
318
+ status = "✓ Done"
319
+ elif finished < total:
320
+ status = f"▶ {finished}/{total}"
321
+ else:
322
+ status = "Empty"
323
+
324
+ jobs_text = f"{finished}/{total}"
325
+
326
+ # Format started time
327
+ if exp.started_at:
328
+ started = datetime.fromtimestamp(exp.started_at).strftime(
329
+ "%Y-%m-%d %H:%M"
330
+ )
331
+ else:
332
+ started = "-"
333
+
334
+ # Calculate duration
335
+ duration = "-"
336
+ if exp.started_at:
337
+ if exp.ended_at:
338
+ elapsed = exp.ended_at - exp.started_at
339
+ else:
340
+ # Still running - show elapsed time
341
+ elapsed = time_module.time() - exp.started_at
342
+ # Format duration
343
+ duration = format_duration(elapsed)
344
+
345
+ # Get hostname (may be None for older experiments)
346
+ hostname = getattr(exp, "hostname", None) or "-"
347
+
348
+ # Update existing row or add new one
349
+ if exp_id in existing_keys:
350
+ table.update_cell(exp_id, "id", exp_id, update_width=True)
351
+ table.update_cell(exp_id, "host", hostname, update_width=True)
352
+ table.update_cell(exp_id, "jobs", jobs_text, update_width=True)
353
+ table.update_cell(exp_id, "status", status, update_width=True)
354
+ table.update_cell(exp_id, "started", started, update_width=True)
355
+ table.update_cell(exp_id, "duration", duration, update_width=True)
356
+ else:
357
+ table.add_row(
358
+ exp_id, hostname, jobs_text, status, started, duration, key=exp_id
359
+ )
360
+
361
+ # Remove rows for experiments that no longer exist
362
+ for old_exp_id in existing_keys - current_exp_ids:
363
+ table.remove_row(old_exp_id)
364
+
365
+ # Update collapsed header if viewing an experiment
366
+ if self.collapsed and self.current_experiment:
367
+ self._update_collapsed_header(self.current_experiment)
368
+
369
+ def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
370
+ """Handle experiment selection"""
371
+ if event.row_key:
372
+ self.current_experiment = str(event.row_key.value)
373
+ self.collapse_to_experiment(self.current_experiment)
374
+ self.post_message(ExperimentSelected(str(event.row_key.value)))
375
+
376
+ def _update_collapsed_header(self, experiment_id: str) -> None:
377
+ """Update the collapsed experiment header with current stats"""
378
+ exp_info = next(
379
+ (exp for exp in self.experiments if exp.experiment_id == experiment_id),
380
+ None,
381
+ )
382
+ if not exp_info:
383
+ return
384
+
385
+ total = exp_info.total_jobs
386
+ finished = exp_info.finished_jobs
387
+ failed = exp_info.failed_jobs
388
+
389
+ if failed > 0:
390
+ status = f"❌ {failed} failed"
391
+ elif finished == total and total > 0:
392
+ status = "✓ Done"
393
+ elif finished < total:
394
+ status = f"▶ {finished}/{total}"
395
+ else:
396
+ status = "Empty"
397
+
398
+ collapsed_label = self.query_one("#collapsed-experiment-info", Label)
399
+ collapsed_label.update(f"📊 {experiment_id} - {status} (click to go back)")
400
+
401
+ def collapse_to_experiment(self, experiment_id: str) -> None:
402
+ """Collapse the experiments list to show only the selected experiment"""
403
+ self._update_collapsed_header(experiment_id)
404
+
405
+ # Hide table, show collapsed header
406
+ self.query_one("#experiments-table-container").add_class("hidden")
407
+ self.query_one("#collapsed-header").remove_class("hidden")
408
+ self.collapsed = True
409
+
410
+ def expand_experiments(self) -> None:
411
+ """Expand back to full experiments list"""
412
+ # Show table, hide collapsed header
413
+ self.query_one("#collapsed-header").add_class("hidden")
414
+ self.query_one("#experiments-table-container").remove_class("hidden")
415
+ self.collapsed = False
416
+ self.current_experiment = None
417
+
418
+ # Focus the experiments table
419
+ table = self.query_one("#experiments-table", DataTable)
420
+ table.focus()
421
+
422
+ def on_click(self) -> None:
423
+ """Handle clicks on the widget"""
424
+ if self.collapsed:
425
+ self.expand_experiments()
426
+ self.post_message(ExperimentDeselected())
427
+
428
+
429
+ class ExperimentSelected(Message):
430
+ """Message sent when an experiment is selected"""
431
+
432
+ def __init__(self, experiment_id: str) -> None:
433
+ super().__init__()
434
+ self.experiment_id = experiment_id
435
+
436
+
437
+ class ExperimentDeselected(Message):
438
+ """Message sent when an experiment is deselected"""
439
+
440
+ pass
441
+
442
+
443
+ class JobSelected(Message):
444
+ """Message sent when a job is selected"""
445
+
446
+ def __init__(self, job_id: str, experiment_id: str) -> None:
447
+ super().__init__()
448
+ self.job_id = job_id
449
+ self.experiment_id = experiment_id
450
+
451
+
452
+ class JobDeselected(Message):
453
+ """Message sent when returning from job detail view"""
454
+
455
+ pass
456
+
457
+
458
+ class ViewJobLogs(Message):
459
+ """Message sent when user wants to view job logs"""
460
+
461
+ def __init__(self, job_path: str, task_id: str) -> None:
462
+ super().__init__()
463
+ self.job_path = job_path
464
+ self.task_id = task_id
465
+
466
+
467
+ class ViewJobLogsRequest(Message):
468
+ """Message sent when user requests to view logs from jobs table"""
469
+
470
+ def __init__(self, job_id: str, experiment_id: str) -> None:
471
+ super().__init__()
472
+ self.job_id = job_id
473
+ self.experiment_id = experiment_id
474
+
475
+
476
+ class LogsSyncComplete(Message):
477
+ """Message sent when remote log sync is complete"""
478
+
479
+ def __init__(self, log_files: list, job_id: str) -> None:
480
+ super().__init__()
481
+ self.log_files = log_files
482
+ self.job_id = job_id
483
+
484
+
485
+ class LogsSyncFailed(Message):
486
+ """Message sent when remote log sync fails"""
487
+
488
+ def __init__(self, error: str) -> None:
489
+ super().__init__()
490
+ self.error = error
491
+
492
+
493
+ class DeleteJobRequest(Message):
494
+ """Message sent when user requests to delete a job"""
495
+
496
+ def __init__(self, job_id: str, experiment_id: str) -> None:
497
+ super().__init__()
498
+ self.job_id = job_id
499
+ self.experiment_id = experiment_id
500
+
501
+
502
+ class DeleteExperimentRequest(Message):
503
+ """Message sent when user requests to delete an experiment"""
504
+
505
+ def __init__(self, experiment_id: str) -> None:
506
+ super().__init__()
507
+ self.experiment_id = experiment_id
508
+
509
+
510
+ class KillJobRequest(Message):
511
+ """Message sent when user requests to kill a running job"""
512
+
513
+ def __init__(self, job_id: str, experiment_id: str) -> None:
514
+ super().__init__()
515
+ self.job_id = job_id
516
+ self.experiment_id = experiment_id
517
+
518
+
519
+ class KillExperimentRequest(Message):
520
+ """Message sent when user requests to kill all running jobs in an experiment"""
521
+
522
+ def __init__(self, experiment_id: str) -> None:
523
+ super().__init__()
524
+ self.experiment_id = experiment_id
525
+
526
+
527
+ class FilterChanged(Message):
528
+ """Message sent when search filter changes"""
529
+
530
+ def __init__(self, filter_fn) -> None:
531
+ super().__init__()
532
+ self.filter_fn = filter_fn
533
+
534
+
535
+ class ServicesList(Vertical):
536
+ """Widget displaying services for selected experiment
537
+
538
+ Services are retrieved from WorkspaceStateProvider.get_services() which
539
+ abstracts away whether services are live (from scheduler) or recreated
540
+ from database state_dict. The UI treats all services uniformly.
541
+ """
542
+
543
+ BINDINGS = [
544
+ Binding("s", "start_service", "Start"),
545
+ Binding("x", "stop_service", "Stop"),
546
+ Binding("u", "copy_url", "Copy URL", show=False),
547
+ ]
548
+
549
+ # State icons for display
550
+ STATE_ICONS = {
551
+ "STOPPED": "⏹",
552
+ "STARTING": "⏳",
553
+ "RUNNING": "▶",
554
+ "STOPPING": "⏳",
555
+ }
556
+
557
+ def __init__(self, state_provider: WorkspaceStateProvider) -> None:
558
+ super().__init__()
559
+ self.state_provider = state_provider
560
+ self.current_experiment: Optional[str] = None
561
+ self._services: dict = {} # service_id -> Service object
562
+
563
+ def compose(self) -> ComposeResult:
564
+ yield DataTable(id="services-table", cursor_type="row")
565
+
566
+ def on_mount(self) -> None:
567
+ """Set up the services table"""
568
+ table = self.query_one("#services-table", DataTable)
569
+ table.add_columns("ID", "Description", "State", "URL")
570
+ table.cursor_type = "row"
571
+
572
+ def set_experiment(self, experiment_id: Optional[str]) -> None:
573
+ """Set the current experiment and refresh services"""
574
+ self.current_experiment = experiment_id
575
+ self.refresh_services()
576
+
577
+ def refresh_services(self) -> None:
578
+ """Refresh the services list from state provider"""
579
+ table = self.query_one("#services-table", DataTable)
580
+ table.clear()
581
+ self._services = {}
582
+
583
+ if not self.current_experiment:
584
+ return
585
+
586
+ # Get services from state provider (handles live vs DB automatically)
587
+ services = self.state_provider.get_services(self.current_experiment)
588
+ self.log.info(
589
+ f"refresh_services got {len(services)} services: "
590
+ f"{[(s.id, id(s), getattr(s, 'url', None)) for s in services]}"
591
+ )
592
+
593
+ for service in services:
594
+ service_id = service.id
595
+ self._services[service_id] = service
596
+
597
+ state_name = service.state.name if hasattr(service, "state") else "UNKNOWN"
598
+ state_icon = self.STATE_ICONS.get(state_name, "?")
599
+ url = getattr(service, "url", None) or "-"
600
+ description = (
601
+ service.description() if hasattr(service, "description") else ""
602
+ )
603
+
604
+ table.add_row(
605
+ service_id,
606
+ description,
607
+ f"{state_icon} {state_name}",
608
+ url,
609
+ key=service_id,
610
+ )
611
+
612
+ def _get_selected_service(self):
613
+ """Get the currently selected Service object"""
614
+ table = self.query_one("#services-table", DataTable)
615
+ if table.cursor_row is not None and table.row_count > 0:
616
+ row_key = list(table.rows.keys())[table.cursor_row]
617
+ if row_key:
618
+ service_id = str(row_key.value)
619
+ return self._services.get(service_id)
620
+ return None
621
+
622
+ def action_start_service(self) -> None:
623
+ """Start the selected service"""
624
+ service = self._get_selected_service()
625
+ if not service:
626
+ return
627
+
628
+ self.log.info(f"Starting service {service.id} (id={id(service)})")
629
+
630
+ try:
631
+ if hasattr(service, "get_url"):
632
+ url = service.get_url()
633
+ self.log.info(f"Service started, url={url}, service.url={service.url}")
634
+ self.notify(f"Service started: {url}", severity="information")
635
+ else:
636
+ self.notify("Service does not support starting", severity="warning")
637
+ self.refresh_services()
638
+ except Exception as e:
639
+ self.notify(f"Failed to start service: {e}", severity="error")
640
+
641
+ def action_stop_service(self) -> None:
642
+ """Stop the selected service"""
643
+ service = self._get_selected_service()
644
+ if not service:
645
+ return
646
+
647
+ from experimaestro.scheduler.services import ServiceState
648
+
649
+ if service.state == ServiceState.STOPPED:
650
+ self.notify("Service is not running", severity="warning")
651
+ return
652
+
653
+ try:
654
+ if hasattr(service, "stop"):
655
+ service.stop()
656
+ self.notify(f"Service stopped: {service.id}", severity="information")
657
+ else:
658
+ self.notify("Service does not support stopping", severity="warning")
659
+ self.refresh_services()
660
+ except Exception as e:
661
+ self.notify(f"Failed to stop service: {e}", severity="error")
662
+
663
+ def action_copy_url(self) -> None:
664
+ """Copy the service URL to clipboard"""
665
+ service = self._get_selected_service()
666
+ if not service:
667
+ return
668
+
669
+ url = getattr(service, "url", None)
670
+ if url:
671
+ try:
672
+ import pyperclip
673
+
674
+ pyperclip.copy(url)
675
+ self.notify(f"URL copied: {url}", severity="information")
676
+ except Exception as e:
677
+ self.notify(f"Failed to copy: {e}", severity="error")
678
+ else:
679
+ self.notify("Start the service first to get URL", severity="warning")
680
+
681
+
682
+ class JobDetailView(Widget):
683
+ """Widget displaying detailed job information"""
684
+
685
+ BINDINGS = [
686
+ Binding("l", "view_logs", "View Logs", priority=True),
687
+ ]
688
+
689
+ def __init__(self, state_provider: WorkspaceStateProvider) -> None:
690
+ super().__init__()
691
+ self.state_provider = state_provider
692
+ self.current_job_id: Optional[str] = None
693
+ self.current_experiment_id: Optional[str] = None
694
+ self.job_data: Optional[dict] = None
695
+
696
+ def compose(self) -> ComposeResult:
697
+ yield Label("Job Details", classes="section-title")
698
+ with Vertical(id="job-detail-content"):
699
+ yield Label("", id="job-id-label")
700
+ yield Label("", id="job-task-label")
701
+ yield Label("", id="job-status-label")
702
+ yield Label("", id="job-path-label")
703
+ yield Label("", id="job-times-label")
704
+ yield Label("Tags:", classes="subsection-title")
705
+ yield Label("", id="job-tags-label")
706
+ yield Label("Progress:", classes="subsection-title")
707
+ yield Label("", id="job-progress-label")
708
+ yield Label("", id="job-logs-hint")
709
+
710
+ def action_view_logs(self) -> None:
711
+ """View job logs with toolong"""
712
+ if self.job_data and self.job_data.path and self.job_data.task_id:
713
+ self.post_message(
714
+ ViewJobLogs(str(self.job_data.path), self.job_data.task_id)
715
+ )
716
+
717
+ def set_job(self, job_id: str, experiment_id: str) -> None:
718
+ """Set the job to display"""
719
+ self.current_job_id = job_id
720
+ self.current_experiment_id = experiment_id
721
+ self.refresh_job_detail()
722
+
723
+ def refresh_job_detail(self) -> None:
724
+ """Refresh job details from state provider"""
725
+ if not self.current_job_id or not self.current_experiment_id:
726
+ return
727
+
728
+ job = self.state_provider.get_job(
729
+ self.current_job_id, self.current_experiment_id
730
+ )
731
+ if not job:
732
+ self.log(f"Job not found: {self.current_job_id}")
733
+ return
734
+
735
+ self.job_data = job
736
+
737
+ # Update labels
738
+ self.query_one("#job-id-label", Label).update(f"Job ID: {job.identifier}")
739
+ self.query_one("#job-task-label", Label).update(f"Task: {job.task_id}")
740
+
741
+ # Format status with icon and name
742
+ status_name = job.state.name if job.state else "unknown"
743
+ failure_reason = getattr(job, "failure_reason", None)
744
+ status_icon = get_status_icon(status_name, failure_reason)
745
+ status_text = f"{status_icon} {status_name}"
746
+ if failure_reason:
747
+ status_text += f" ({failure_reason.name})"
748
+
749
+ self.query_one("#job-status-label", Label).update(f"Status: {status_text}")
750
+
751
+ # Path (from locator)
752
+ locator = job.locator or "-"
753
+ self.query_one("#job-path-label", Label).update(f"Locator: {locator}")
754
+
755
+ # Times - format timestamps
756
+ from datetime import datetime
757
+ import time as time_module
758
+
759
+ def format_time(ts):
760
+ if ts:
761
+ return datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
762
+ return "-"
763
+
764
+ submitted = format_time(job.submittime)
765
+ start = format_time(job.starttime)
766
+ end = format_time(job.endtime)
767
+
768
+ # Calculate duration
769
+ duration = "-"
770
+ if job.starttime:
771
+ if job.endtime:
772
+ duration = format_duration(job.endtime - job.starttime)
773
+ else:
774
+ duration = (
775
+ format_duration(time_module.time() - job.starttime) + " (running)"
776
+ )
777
+
778
+ times_text = f"Submitted: {submitted} | Start: {start} | End: {end} | Duration: {duration}"
779
+ self.query_one("#job-times-label", Label).update(times_text)
780
+
781
+ # Tags - job.tags is now a dict
782
+ tags = job.tags
783
+ if tags:
784
+ tags_text = ", ".join(f"{k}={v}" for k, v in tags.items())
785
+ else:
786
+ tags_text = "(no tags)"
787
+ self.query_one("#job-tags-label", Label).update(tags_text)
788
+
789
+ # Progress
790
+ progress_list = job.progress or []
791
+ if progress_list:
792
+ progress_lines = []
793
+ for p in progress_list:
794
+ level = p.get("level", 0)
795
+ pct = p.get("progress", 0) * 100
796
+ desc = p.get("desc", "")
797
+ indent = " " * level
798
+ progress_lines.append(f"{indent}{pct:.1f}% {desc}")
799
+ progress_text = "\n".join(progress_lines) if progress_lines else "-"
800
+ else:
801
+ progress_text = "-"
802
+ self.query_one("#job-progress-label", Label).update(progress_text)
803
+
804
+ # Log files hint - log files are named after the last part of the task ID
805
+ job_path = job.path
806
+ task_id = job.task_id
807
+ if job_path and task_id:
808
+ # Extract the last component of the task ID (e.g., "evaluate" from "mnist_xp.learn.evaluate")
809
+ task_name = task_id.split(".")[-1]
810
+ stdout_path = job_path / f"{task_name}.out"
811
+ stderr_path = job_path / f"{task_name}.err"
812
+ logs_exist = stdout_path.exists() or stderr_path.exists()
813
+ if logs_exist:
814
+ self.query_one("#job-logs-hint", Label).update(
815
+ "[bold cyan]Press 'l' to view logs[/bold cyan]"
816
+ )
817
+ else:
818
+ self.query_one("#job-logs-hint", Label).update("(no log files found)")
819
+ else:
820
+ self.query_one("#job-logs-hint", Label).update("")
821
+
822
+
823
+ class SearchBar(Widget):
824
+ """Search bar widget with filter hints for filtering jobs"""
825
+
826
+ visible: reactive[bool] = reactive(False)
827
+ _keep_filter: bool = False # Flag to keep filter when hiding
828
+ _query_valid: bool = False # Track if current query is valid
829
+
830
+ def __init__(self) -> None:
831
+ super().__init__()
832
+ self.filter_fn = None
833
+ self.active_query = "" # Store the active query text
834
+
835
+ def compose(self) -> ComposeResult:
836
+ # Active filter indicator (shown when filter active but bar hidden)
837
+ yield Static("", id="active-filter")
838
+ # Search input container
839
+ with Vertical(id="search-container"):
840
+ yield Input(
841
+ placeholder="Filter: @state = 'done', @name ~ 'pattern', tag = 'value'",
842
+ id="search-input",
843
+ )
844
+ yield Static(
845
+ "Syntax: @state = 'done' | @name ~ 'regex' | tag = 'value' | and/or",
846
+ id="search-hints",
847
+ )
848
+ yield Static("", id="search-error")
849
+
850
+ def on_mount(self) -> None:
851
+ """Initialize visibility state"""
852
+ # Start with everything hidden
853
+ self.display = False
854
+ self.query_one("#search-container").display = False
855
+ self.query_one("#active-filter").display = False
856
+ self.query_one("#search-error").display = False
857
+
858
+ def watch_visible(self, visible: bool) -> None:
859
+ """Show/hide search bar"""
860
+ search_container = self.query_one("#search-container")
861
+ active_filter = self.query_one("#active-filter")
862
+ error_widget = self.query_one("#search-error")
863
+
864
+ if visible:
865
+ self.display = True
866
+ search_container.display = True
867
+ active_filter.display = False
868
+ self.query_one("#search-input", Input).focus()
869
+ else:
870
+ if not self._keep_filter:
871
+ self.query_one("#search-input", Input).value = ""
872
+ self.filter_fn = None
873
+ self.active_query = ""
874
+ self._query_valid = False
875
+ self._keep_filter = False
876
+
877
+ # Show/hide based on whether filter is active
878
+ if self.filter_fn is not None:
879
+ # Filter active - show indicator, hide input
880
+ self.display = True
881
+ search_container.display = False
882
+ error_widget.display = False
883
+ active_filter.update(
884
+ f"Filter: {self.active_query} (/ to edit, c to clear)"
885
+ )
886
+ active_filter.display = True
887
+ else:
888
+ # No filter - hide everything including this widget
889
+ self.display = False
890
+ search_container.display = False
891
+ active_filter.display = False
892
+ error_widget.display = False
893
+
894
+ def on_input_changed(self, event: Input.Changed) -> None:
895
+ """Parse filter expression when input changes"""
896
+ query = event.value.strip()
897
+ input_widget = self.query_one("#search-input", Input)
898
+ error_widget = self.query_one("#search-error", Static)
899
+
900
+ if not query:
901
+ self.filter_fn = None
902
+ self._query_valid = False
903
+ self.post_message(FilterChanged(None))
904
+ input_widget.remove_class("error")
905
+ input_widget.remove_class("valid")
906
+ error_widget.display = False
907
+ return
908
+
909
+ try:
910
+ from experimaestro.cli.filter import createFilter
911
+
912
+ self.filter_fn = createFilter(query)
913
+ self._query_valid = True
914
+ self.active_query = query
915
+ self.post_message(FilterChanged(self.filter_fn))
916
+ input_widget.remove_class("error")
917
+ input_widget.add_class("valid")
918
+ error_widget.display = False
919
+ except Exception as e:
920
+ self.filter_fn = None
921
+ self._query_valid = False
922
+ self.post_message(FilterChanged(None))
923
+ input_widget.remove_class("valid")
924
+ input_widget.add_class("error")
925
+ error_widget.update(f"Invalid query: {str(e)[:50]}")
926
+ error_widget.display = True
927
+
928
+ def on_input_submitted(self, event: Input.Submitted) -> None:
929
+ """Apply filter and hide search bar (only if query is valid)"""
930
+ if self._query_valid and self.filter_fn is not None:
931
+ # Set flag to keep filter when hiding
932
+ self._keep_filter = True
933
+ self.visible = False
934
+ # Post message to focus jobs table
935
+ self.post_message(SearchApplied())
936
+ # If invalid, do nothing (keep input focused for correction)
937
+
938
+
939
+ class SearchApplied(Message):
940
+ """Message sent when search filter is applied via Enter"""
941
+
942
+ pass
943
+
944
+
945
+ class JobsTable(Vertical):
946
+ """Widget displaying jobs for selected experiment"""
947
+
948
+ BINDINGS = [
949
+ Binding("d", "delete_job", "Delete", show=False),
950
+ Binding("k", "kill_job", "Kill", show=False),
951
+ Binding("l", "view_logs", "Logs"),
952
+ Binding("f", "copy_path", "Copy Path", show=False),
953
+ Binding("/", "toggle_search", "Search"),
954
+ Binding("c", "clear_filter", "Clear", show=False),
955
+ Binding("r", "refresh_live", "Refresh"),
956
+ Binding("S", "sort_by_status", "Sort ⚑", show=False),
957
+ Binding("T", "sort_by_task", "Sort Task", show=False),
958
+ Binding("D", "sort_by_submitted", "Sort Date", show=False),
959
+ Binding("escape", "clear_search", show=False, priority=True),
960
+ ]
961
+
962
+ # Track current sort state
963
+ _sort_column: Optional[str] = None
964
+ _sort_reverse: bool = False
965
+ _needs_rebuild: bool = True # Start with rebuild needed
966
+
967
+ def __init__(self, state_provider: WorkspaceStateProvider) -> None:
968
+ super().__init__()
969
+ self.state_provider = state_provider
970
+ self.filter_fn = None
971
+ self.current_experiment: Optional[str] = None
972
+
973
+ def compose(self) -> ComposeResult:
974
+ yield SearchBar()
975
+ yield DataTable(id="jobs-table", cursor_type="row")
976
+
977
+ def action_toggle_search(self) -> None:
978
+ """Toggle search bar visibility"""
979
+ search_bar = self.query_one(SearchBar)
980
+ search_bar.visible = not search_bar.visible
981
+
982
+ def action_clear_filter(self) -> None:
983
+ """Clear the active filter"""
984
+ if self.filter_fn is not None:
985
+ search_bar = self.query_one(SearchBar)
986
+ search_bar.query_one("#search-input", Input).value = ""
987
+ search_bar.filter_fn = None
988
+ search_bar.active_query = ""
989
+ search_bar._query_valid = False
990
+ # Hide the SearchBar completely
991
+ search_bar.display = False
992
+ search_bar.query_one("#search-container").display = False
993
+ search_bar.query_one("#active-filter").display = False
994
+ search_bar.query_one("#search-error").display = False
995
+ self.filter_fn = None
996
+ self.refresh_jobs()
997
+ self.notify("Filter cleared", severity="information")
998
+
999
+ def action_sort_by_status(self) -> None:
1000
+ """Sort jobs by status"""
1001
+ if self._sort_column == "status":
1002
+ self._sort_reverse = not self._sort_reverse
1003
+ else:
1004
+ self._sort_column = "status"
1005
+ self._sort_reverse = False
1006
+ self._needs_rebuild = True
1007
+ self._update_column_headers()
1008
+ self.refresh_jobs()
1009
+ order = "desc" if self._sort_reverse else "asc"
1010
+ self.notify(f"Sorted by status ({order})", severity="information")
1011
+
1012
+ def action_sort_by_task(self) -> None:
1013
+ """Sort jobs by task"""
1014
+ if self._sort_column == "task":
1015
+ self._sort_reverse = not self._sort_reverse
1016
+ else:
1017
+ self._sort_column = "task"
1018
+ self._sort_reverse = False
1019
+ self._needs_rebuild = True
1020
+ self._update_column_headers()
1021
+ self.refresh_jobs()
1022
+ order = "desc" if self._sort_reverse else "asc"
1023
+ self.notify(f"Sorted by task ({order})", severity="information")
1024
+
1025
+ def action_sort_by_submitted(self) -> None:
1026
+ """Sort jobs by submission time"""
1027
+ if self._sort_column == "submitted":
1028
+ self._sort_reverse = not self._sort_reverse
1029
+ else:
1030
+ self._sort_column = "submitted"
1031
+ self._sort_reverse = False
1032
+ self._needs_rebuild = True
1033
+ self._update_column_headers()
1034
+ self.refresh_jobs()
1035
+ order = "newest first" if self._sort_reverse else "oldest first"
1036
+ self.notify(f"Sorted by date ({order})", severity="information")
1037
+
1038
+ def action_clear_search(self) -> None:
1039
+ """Handle escape: hide search bar if visible, or go back"""
1040
+ search_bar = self.query_one(SearchBar)
1041
+ if search_bar.visible:
1042
+ # Search bar visible - hide it and clear filter
1043
+ search_bar.visible = False
1044
+ self.filter_fn = None
1045
+ self.refresh_jobs()
1046
+ # Focus the jobs table
1047
+ self.query_one("#jobs-table", DataTable).focus()
1048
+ else:
1049
+ # Search bar hidden - go back (keep filter)
1050
+ self.app.action_go_back()
1051
+
1052
+ def action_refresh_live(self) -> None:
1053
+ """Refresh the jobs table"""
1054
+ self.refresh_jobs()
1055
+ self.notify("Jobs refreshed", severity="information")
1056
+
1057
+ def on_filter_changed(self, message: FilterChanged) -> None:
1058
+ """Apply new filter"""
1059
+ self.filter_fn = message.filter_fn
1060
+ self.refresh_jobs()
1061
+
1062
+ def on_search_applied(self, message: SearchApplied) -> None:
1063
+ """Focus jobs table when search is applied"""
1064
+ self.query_one("#jobs-table", DataTable).focus()
1065
+
1066
+ def _get_selected_job_id(self) -> Optional[str]:
1067
+ """Get the job ID from the currently selected row"""
1068
+ table = self.query_one("#jobs-table", DataTable)
1069
+ if table.cursor_row is None:
1070
+ return None
1071
+ row_key = table.get_row_at(table.cursor_row)
1072
+ if row_key:
1073
+ # The first column is job_id
1074
+ return str(table.get_row_at(table.cursor_row)[0])
1075
+ return None
1076
+
1077
+ def action_delete_job(self) -> None:
1078
+ """Request to delete the selected job"""
1079
+ table = self.query_one("#jobs-table", DataTable)
1080
+ if table.cursor_row is None or not self.current_experiment:
1081
+ return
1082
+
1083
+ # Get job ID from the row key
1084
+ row_key = list(table.rows.keys())[table.cursor_row]
1085
+ if row_key:
1086
+ job_id = str(row_key.value)
1087
+ self.post_message(DeleteJobRequest(job_id, self.current_experiment))
1088
+
1089
+ def action_kill_job(self) -> None:
1090
+ """Request to kill the selected job"""
1091
+ table = self.query_one("#jobs-table", DataTable)
1092
+ if table.cursor_row is None or not self.current_experiment:
1093
+ return
1094
+
1095
+ row_key = list(table.rows.keys())[table.cursor_row]
1096
+ if row_key:
1097
+ job_id = str(row_key.value)
1098
+ self.post_message(KillJobRequest(job_id, self.current_experiment))
1099
+
1100
+ def action_view_logs(self) -> None:
1101
+ """Request to view logs for the selected job"""
1102
+ table = self.query_one("#jobs-table", DataTable)
1103
+ if table.cursor_row is None or not self.current_experiment:
1104
+ return
1105
+
1106
+ row_key = list(table.rows.keys())[table.cursor_row]
1107
+ if row_key:
1108
+ job_id = str(row_key.value)
1109
+ self.post_message(ViewJobLogsRequest(job_id, self.current_experiment))
1110
+
1111
+ def action_copy_path(self) -> None:
1112
+ """Copy the job folder path to clipboard"""
1113
+ import pyperclip
1114
+
1115
+ table = self.query_one("#jobs-table", DataTable)
1116
+ if table.cursor_row is None or not self.current_experiment:
1117
+ return
1118
+
1119
+ row_key = list(table.rows.keys())[table.cursor_row]
1120
+ if row_key:
1121
+ job_id = str(row_key.value)
1122
+ job = self.state_provider.get_job(job_id, self.current_experiment)
1123
+ if job and job.path:
1124
+ try:
1125
+ pyperclip.copy(str(job.path))
1126
+ self.notify(f"Path copied: {job.path}", severity="information")
1127
+ except Exception as e:
1128
+ self.notify(f"Failed to copy: {e}", severity="error")
1129
+ else:
1130
+ self.notify("No path available for this job", severity="warning")
1131
+
1132
+ # Status sort order (for sorting by status)
1133
+ STATUS_ORDER = {
1134
+ "running": 0,
1135
+ "waiting": 1,
1136
+ "error": 2,
1137
+ "done": 3,
1138
+ "unscheduled": 4,
1139
+ "phantom": 5,
1140
+ }
1141
+
1142
+ # Failure reason sort order (within error status)
1143
+ # More actionable failures first
1144
+ FAILURE_ORDER = {
1145
+ "TIMEOUT": 0, # Might just need retry
1146
+ "MEMORY": 1, # Might need resource adjustment
1147
+ "DEPENDENCY": 2, # Need to fix upstream job first
1148
+ "FAILED": 3, # Generic failure
1149
+ }
1150
+
1151
+ @classmethod
1152
+ def _get_status_sort_key(cls, job):
1153
+ """Get sort key for a job based on status and failure reason.
1154
+
1155
+ Returns tuple (status_order, failure_order) for proper sorting.
1156
+ """
1157
+ state_name = job.state.name if job.state else "unknown"
1158
+ status_order = cls.STATUS_ORDER.get(state_name, 99)
1159
+
1160
+ # For error jobs, also sort by failure reason
1161
+ if state_name == "error":
1162
+ failure_reason = getattr(job, "failure_reason", None)
1163
+ if failure_reason:
1164
+ failure_order = cls.FAILURE_ORDER.get(failure_reason.name, 99)
1165
+ else:
1166
+ failure_order = 99 # Unknown failure at end
1167
+ else:
1168
+ failure_order = 0
1169
+
1170
+ return (status_order, failure_order)
1171
+
1172
+ # Column key to display name mapping
1173
+ COLUMN_LABELS = {
1174
+ "job_id": "ID",
1175
+ "task": "Task",
1176
+ "status": "⚑",
1177
+ "tags": "Tags",
1178
+ "submitted": "Submitted",
1179
+ "duration": "Duration",
1180
+ }
1181
+
1182
+ # Columns that support sorting (column key -> sort column name)
1183
+ SORTABLE_COLUMNS = {
1184
+ "status": "status",
1185
+ "task": "task",
1186
+ "submitted": "submitted",
1187
+ }
1188
+
1189
+ def on_mount(self) -> None:
1190
+ """Initialize the jobs table"""
1191
+ table = self.query_one("#jobs-table", DataTable)
1192
+ table.add_column("ID", key="job_id")
1193
+ table.add_column("Task", key="task")
1194
+ table.add_column("⚑", key="status", width=6)
1195
+ table.add_column("Tags", key="tags")
1196
+ table.add_column("Submitted", key="submitted")
1197
+ table.add_column("Duration", key="duration")
1198
+ table.cursor_type = "row"
1199
+ table.zebra_stripes = True
1200
+
1201
+ def _update_column_headers(self) -> None:
1202
+ """Update column headers with sort indicators"""
1203
+ table = self.query_one("#jobs-table", DataTable)
1204
+ for column in table.columns.values():
1205
+ col_key = str(column.key.value) if column.key else None
1206
+ if col_key and col_key in self.COLUMN_LABELS:
1207
+ label = self.COLUMN_LABELS[col_key]
1208
+ sort_col = self.SORTABLE_COLUMNS.get(col_key)
1209
+ if sort_col and self._sort_column == sort_col:
1210
+ # Add sort indicator
1211
+ indicator = "▼" if self._sort_reverse else "▲"
1212
+ new_label = f"{label} {indicator}"
1213
+ else:
1214
+ new_label = label
1215
+ column.label = new_label
1216
+
1217
+ def on_data_table_header_selected(self, event: DataTable.HeaderSelected) -> None:
1218
+ """Handle column header click for sorting"""
1219
+ col_key = str(event.column_key.value) if event.column_key else None
1220
+ if col_key and col_key in self.SORTABLE_COLUMNS:
1221
+ sort_col = self.SORTABLE_COLUMNS[col_key]
1222
+ if self._sort_column == sort_col:
1223
+ self._sort_reverse = not self._sort_reverse
1224
+ else:
1225
+ self._sort_column = sort_col
1226
+ self._sort_reverse = False
1227
+ self._needs_rebuild = True
1228
+ self._update_column_headers()
1229
+ self.refresh_jobs()
1230
+
1231
+ def set_experiment(self, experiment_id: Optional[str]) -> None:
1232
+ """Set the current experiment and refresh jobs"""
1233
+ self.current_experiment = experiment_id
1234
+ self.refresh_jobs()
1235
+
1236
+ def refresh_jobs(self) -> None: # noqa: C901
1237
+ """Refresh the jobs list from state provider"""
1238
+ table = self.query_one("#jobs-table", DataTable)
1239
+
1240
+ if not self.current_experiment:
1241
+ return
1242
+
1243
+ jobs = self.state_provider.get_jobs(self.current_experiment)
1244
+ self.log.debug(
1245
+ f"Refreshing jobs for {self.current_experiment}: {len(jobs)} jobs"
1246
+ )
1247
+
1248
+ # Apply filter if set
1249
+ if self.filter_fn:
1250
+ jobs = [j for j in jobs if self.filter_fn(j)]
1251
+ self.log.debug(f"After filter: {len(jobs)} jobs")
1252
+
1253
+ # Sort jobs based on selected column
1254
+ if self._sort_column == "status":
1255
+ # Sort by status priority, then by failure reason for errors
1256
+ jobs.sort(
1257
+ key=self._get_status_sort_key,
1258
+ reverse=self._sort_reverse,
1259
+ )
1260
+ elif self._sort_column == "task":
1261
+ # Sort by task name
1262
+ jobs.sort(
1263
+ key=lambda j: j.task_id or "",
1264
+ reverse=self._sort_reverse,
1265
+ )
1266
+ else:
1267
+ # Default: sort by submission time (oldest first by default)
1268
+ # Jobs without submittime go to the end
1269
+ jobs.sort(
1270
+ key=lambda j: j.submittime or float("inf"),
1271
+ reverse=self._sort_reverse,
1272
+ )
1273
+
1274
+ # Check if we need to rebuild (new/removed jobs, or status changed when sorting by status)
1275
+ from datetime import datetime
1276
+ import time as time_module
1277
+
1278
+ existing_keys = {str(k.value) for k in table.rows.keys()}
1279
+ current_job_ids = {job.identifier for job in jobs}
1280
+
1281
+ # Check if job set changed
1282
+ jobs_changed = existing_keys != current_job_ids
1283
+
1284
+ # Check if status changed when sorting by status
1285
+ status_changed = False
1286
+ if self._sort_column == "status" and not jobs_changed:
1287
+ current_statuses = {
1288
+ job.identifier: (job.state.name if job.state else "unknown")
1289
+ for job in jobs
1290
+ }
1291
+ if (
1292
+ hasattr(self, "_last_statuses")
1293
+ and self._last_statuses != current_statuses
1294
+ ):
1295
+ status_changed = True
1296
+ self._last_statuses = current_statuses
1297
+
1298
+ needs_rebuild = self._needs_rebuild or jobs_changed or status_changed
1299
+ self._needs_rebuild = False
1300
+
1301
+ # Build row data for all jobs
1302
+ rows_data = {}
1303
+ for job in jobs:
1304
+ job_id = job.identifier
1305
+ task_id = job.task_id
1306
+ status = job.state.name if job.state else "unknown"
1307
+
1308
+ # Format status with icon (and progress % if running)
1309
+ if status == "running":
1310
+ progress_list = job.progress or []
1311
+ if progress_list:
1312
+ last_progress = progress_list[-1]
1313
+ progress_pct = last_progress.get("progress", 0) * 100
1314
+ status_text = f"▶ {progress_pct:.0f}%"
1315
+ else:
1316
+ status_text = "▶"
1317
+ else:
1318
+ failure_reason = getattr(job, "failure_reason", None)
1319
+ status_text = get_status_icon(status, failure_reason)
1320
+
1321
+ # Format tags - show all tags on single line
1322
+ tags = job.tags
1323
+ if tags:
1324
+ tags_text = Text()
1325
+ for i, (k, v) in enumerate(tags.items()):
1326
+ if i > 0:
1327
+ tags_text.append(", ")
1328
+ tags_text.append(f"{k}", style="bold")
1329
+ tags_text.append(f"={v}")
1330
+ else:
1331
+ tags_text = Text("-")
1332
+
1333
+ submitted = "-"
1334
+ if job.submittime:
1335
+ submitted = datetime.fromtimestamp(job.submittime).strftime(
1336
+ "%Y-%m-%d %H:%M"
1337
+ )
1338
+
1339
+ # Calculate duration
1340
+ start = job.starttime
1341
+ end = job.endtime
1342
+ duration = "-"
1343
+ if start:
1344
+ if end:
1345
+ elapsed = end - start
1346
+ else:
1347
+ elapsed = time_module.time() - start
1348
+ duration = self._format_duration(elapsed)
1349
+
1350
+ job_id_short = job_id[:7]
1351
+ rows_data[job_id] = (
1352
+ job_id_short,
1353
+ task_id,
1354
+ status_text,
1355
+ tags_text,
1356
+ submitted,
1357
+ duration,
1358
+ )
1359
+
1360
+ if needs_rebuild:
1361
+ # Full rebuild needed - save selection, clear, rebuild
1362
+ selected_key = None
1363
+ if table.cursor_row is not None and table.row_count > 0:
1364
+ try:
1365
+ row_keys = list(table.rows.keys())
1366
+ if table.cursor_row < len(row_keys):
1367
+ selected_key = str(row_keys[table.cursor_row].value)
1368
+ except (IndexError, KeyError):
1369
+ pass
1370
+
1371
+ table.clear()
1372
+ new_cursor_row = None
1373
+ for idx, job in enumerate(jobs):
1374
+ job_id = job.identifier
1375
+ table.add_row(*rows_data[job_id], key=job_id)
1376
+ if selected_key == job_id:
1377
+ new_cursor_row = idx
1378
+
1379
+ if new_cursor_row is not None and table.row_count > 0:
1380
+ table.move_cursor(row=new_cursor_row)
1381
+ else:
1382
+ # Just update cells in place - no reordering needed
1383
+ for job_id, row_data in rows_data.items():
1384
+ (
1385
+ job_id_short,
1386
+ task_id,
1387
+ status_text,
1388
+ tags_text,
1389
+ submitted,
1390
+ duration,
1391
+ ) = row_data
1392
+ table.update_cell(job_id, "job_id", job_id_short, update_width=True)
1393
+ table.update_cell(job_id, "task", task_id, update_width=True)
1394
+ table.update_cell(job_id, "status", status_text, update_width=True)
1395
+ table.update_cell(job_id, "tags", tags_text, update_width=True)
1396
+ table.update_cell(job_id, "submitted", submitted, update_width=True)
1397
+ table.update_cell(job_id, "duration", duration, update_width=True)
1398
+
1399
+ self.log.debug(
1400
+ f"Jobs table now has {table.row_count} rows (rebuild={needs_rebuild})"
1401
+ )
1402
+
1403
+ def _format_duration(self, seconds: float) -> str:
1404
+ """Format duration in seconds to human-readable string"""
1405
+ if seconds < 0:
1406
+ return "-"
1407
+
1408
+ seconds = int(seconds)
1409
+ if seconds < 60:
1410
+ return f"{seconds}s"
1411
+ elif seconds < 3600:
1412
+ minutes = seconds // 60
1413
+ secs = seconds % 60
1414
+ return f"{minutes}m {secs}s"
1415
+ elif seconds < 86400:
1416
+ hours = seconds // 3600
1417
+ minutes = (seconds % 3600) // 60
1418
+ return f"{hours}h {minutes}m"
1419
+ else:
1420
+ days = seconds // 86400
1421
+ hours = (seconds % 86400) // 3600
1422
+ return f"{days}d {hours}h"
1423
+
1424
+ def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
1425
+ """Handle job selection"""
1426
+ if event.row_key and self.current_experiment:
1427
+ job_id = str(event.row_key.value)
1428
+ self.post_message(JobSelected(job_id, self.current_experiment))
1429
+
1430
+
1431
+ class SizeCalculated(Message):
1432
+ """Message sent when a folder size has been calculated"""
1433
+
1434
+ def __init__(self, job_id: str, size: str, size_bytes: int) -> None:
1435
+ super().__init__()
1436
+ self.job_id = job_id
1437
+ self.size = size
1438
+ self.size_bytes = size_bytes
1439
+
1440
+
1441
+ class OrphanJobsScreen(Screen):
1442
+ """Screen for viewing and managing orphan jobs"""
1443
+
1444
+ BINDINGS = [
1445
+ Binding("d", "delete_selected", "Delete"),
1446
+ Binding("D", "delete_all", "Delete All", key_display="D"),
1447
+ Binding("escape", "go_back", "Back"),
1448
+ Binding("q", "go_back", "Quit"),
1449
+ Binding("r", "refresh", "Refresh"),
1450
+ Binding("f", "copy_path", "Copy Path", show=False),
1451
+ Binding("T", "sort_by_task", "Sort Task", show=False),
1452
+ Binding("Z", "sort_by_size", "Sort Size", show=False),
1453
+ ]
1454
+
1455
+ _size_cache: dict = {} # Class-level cache (formatted strings)
1456
+ _size_bytes_cache: dict = {} # Class-level cache (raw bytes for sorting)
1457
+
1458
+ def __init__(self, state_provider: WorkspaceStateProvider) -> None:
1459
+ super().__init__()
1460
+ self.state_provider = state_provider
1461
+ self.orphan_jobs = []
1462
+ self._pending_jobs = [] # Jobs waiting for size calculation
1463
+ self._sort_column: Optional[str] = None
1464
+ self._sort_reverse: bool = False
1465
+
1466
+ def compose(self) -> ComposeResult:
1467
+ yield Header()
1468
+ with Vertical(id="orphan-container"):
1469
+ yield Static("Orphan Jobs", id="orphan-title")
1470
+ yield Static("", id="orphan-stats")
1471
+ yield DataTable(id="orphan-table", cursor_type="row")
1472
+ yield Static("", id="orphan-job-info")
1473
+ yield Footer()
1474
+
1475
+ def on_mount(self) -> None:
1476
+ """Initialize the orphan jobs table"""
1477
+ table = self.query_one("#orphan-table", DataTable)
1478
+ table.add_column("⚑", key="status", width=3)
1479
+ table.add_column("Job ID", key="job_id", width=10)
1480
+ table.add_column("Task", key="task")
1481
+ table.add_column("Size", key="size", width=10)
1482
+ self.refresh_orphans()
1483
+
1484
+ def action_sort_by_task(self) -> None:
1485
+ """Sort by task name"""
1486
+ if self._sort_column == "task":
1487
+ self._sort_reverse = not self._sort_reverse
1488
+ else:
1489
+ self._sort_column = "task"
1490
+ self._sort_reverse = False
1491
+ self._rebuild_table()
1492
+ order = "desc" if self._sort_reverse else "asc"
1493
+ self.notify(f"Sorted by task ({order})", severity="information")
1494
+
1495
+ def action_sort_by_size(self) -> None:
1496
+ """Sort by size"""
1497
+ if self._sort_column == "size":
1498
+ self._sort_reverse = not self._sort_reverse
1499
+ else:
1500
+ self._sort_column = "size"
1501
+ self._sort_reverse = True # Default: largest first
1502
+ self._rebuild_table()
1503
+ order = "largest first" if self._sort_reverse else "smallest first"
1504
+ self.notify(f"Sorted by size ({order})", severity="information")
1505
+
1506
+ def _get_sorted_jobs(self):
1507
+ """Return jobs sorted by current sort column"""
1508
+ jobs = self.orphan_jobs[:]
1509
+ if self._sort_column == "task":
1510
+ jobs.sort(key=lambda j: j.task_id or "", reverse=self._sort_reverse)
1511
+ elif self._sort_column == "size":
1512
+ # Sort by raw bytes, jobs not in cache go to end
1513
+ jobs.sort(
1514
+ key=lambda j: self._size_bytes_cache.get(j.identifier, -1),
1515
+ reverse=self._sort_reverse,
1516
+ )
1517
+ return jobs
1518
+
1519
+ def _rebuild_table(self) -> None:
1520
+ """Rebuild the table with current sort order"""
1521
+ table = self.query_one("#orphan-table", DataTable)
1522
+ table.clear()
1523
+
1524
+ for job in self._get_sorted_jobs():
1525
+ failure_reason = getattr(job, "failure_reason", None)
1526
+ status_icon = get_status_icon(
1527
+ job.state.name if job.state else "unknown", failure_reason
1528
+ )
1529
+ if job.identifier in self._size_cache:
1530
+ size_text = self._size_cache[job.identifier]
1531
+ else:
1532
+ size_text = "waiting"
1533
+ table.add_row(
1534
+ status_icon,
1535
+ job.identifier[:7],
1536
+ job.task_id,
1537
+ size_text,
1538
+ key=job.identifier,
1539
+ )
1540
+
1541
+ def refresh_orphans(self) -> None:
1542
+ """Refresh the orphan jobs list"""
1543
+ # Only include orphan jobs that have an existing folder
1544
+ all_orphans = self.state_provider.get_orphan_jobs()
1545
+ self.orphan_jobs = [j for j in all_orphans if j.path and j.path.exists()]
1546
+
1547
+ # Update stats
1548
+ stats = self.query_one("#orphan-stats", Static)
1549
+ stats.update(f"Found {len(self.orphan_jobs)} orphan jobs")
1550
+
1551
+ # Collect jobs needing size calculation
1552
+ self._pending_jobs = [
1553
+ j for j in self.orphan_jobs if j.identifier not in self._size_cache
1554
+ ]
1555
+
1556
+ # Rebuild table
1557
+ self._rebuild_table()
1558
+
1559
+ # Start calculating sizes
1560
+ if self._pending_jobs:
1561
+ self._calculate_next_size()
1562
+
1563
+ def _calculate_next_size(self) -> None:
1564
+ """Calculate size for the next pending job using a worker"""
1565
+ if not self._pending_jobs:
1566
+ return
1567
+
1568
+ job = self._pending_jobs.pop(0)
1569
+ # Update to "calc..."
1570
+ self._update_size_cell(job.identifier, "calc...")
1571
+ # Run calculation in worker thread
1572
+ self.run_worker(
1573
+ self._calc_size_worker(job.identifier, job.path),
1574
+ thread=True,
1575
+ )
1576
+
1577
+ async def _calc_size_worker(self, job_id: str, path):
1578
+ """Worker to calculate folder size"""
1579
+ size_bytes = await self._get_folder_size_async(path)
1580
+ size_str = self._format_size(size_bytes)
1581
+ self._size_cache[job_id] = size_str
1582
+ self._size_bytes_cache[job_id] = size_bytes
1583
+ self.post_message(SizeCalculated(job_id, size_str, size_bytes))
1584
+
1585
+ def on_size_calculated(self, message: SizeCalculated) -> None:
1586
+ """Handle size calculation completion"""
1587
+ self._size_bytes_cache[message.job_id] = message.size_bytes
1588
+ self._update_size_cell(message.job_id, message.size)
1589
+ # Calculate next one
1590
+ self._calculate_next_size()
1591
+
1592
+ @staticmethod
1593
+ async def _get_folder_size_async(path) -> int:
1594
+ """Calculate total size of a folder using du command if available"""
1595
+ import asyncio
1596
+ import shutil
1597
+ import sys
1598
+
1599
+ # Try using du command for better performance
1600
+ if shutil.which("du"):
1601
+ try:
1602
+ if sys.platform == "darwin":
1603
+ # macOS: du -sk gives size in KB
1604
+ proc = await asyncio.create_subprocess_exec(
1605
+ "du",
1606
+ "-sk",
1607
+ str(path),
1608
+ stdout=asyncio.subprocess.PIPE,
1609
+ stderr=asyncio.subprocess.DEVNULL,
1610
+ )
1611
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30)
1612
+ if proc.returncode == 0 and stdout:
1613
+ # Output format: "SIZE\tPATH"
1614
+ size_kb = int(stdout.decode().split()[0])
1615
+ return size_kb * 1024
1616
+ else:
1617
+ # Linux: du -sb gives size in bytes
1618
+ proc = await asyncio.create_subprocess_exec(
1619
+ "du",
1620
+ "-sb",
1621
+ str(path),
1622
+ stdout=asyncio.subprocess.PIPE,
1623
+ stderr=asyncio.subprocess.DEVNULL,
1624
+ )
1625
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30)
1626
+ if proc.returncode == 0 and stdout:
1627
+ # Output format: "SIZE\tPATH"
1628
+ return int(stdout.decode().split()[0])
1629
+ except (asyncio.TimeoutError, ValueError, IndexError, OSError):
1630
+ pass # Fall back to Python implementation
1631
+
1632
+ # Fallback: Python implementation
1633
+ return OrphanJobsScreen._get_folder_size_sync(path)
1634
+
1635
+ @staticmethod
1636
+ def _get_folder_size_sync(path) -> int:
1637
+ """Calculate total size of a folder using Python (fallback)"""
1638
+ total = 0
1639
+ try:
1640
+ for entry in path.rglob("*"):
1641
+ if entry.is_file():
1642
+ total += entry.stat().st_size
1643
+ except (OSError, PermissionError):
1644
+ pass
1645
+ return total
1646
+
1647
+ @staticmethod
1648
+ def _format_size(size: int) -> str:
1649
+ """Format size in human-readable format"""
1650
+ for unit in ["B", "KB", "MB", "GB"]:
1651
+ if size < 1024:
1652
+ return f"{size:.1f}{unit}" if unit != "B" else f"{size}{unit}"
1653
+ size /= 1024
1654
+ return f"{size:.1f}TB"
1655
+
1656
+ def _update_size_cell(self, job_id: str, value: str = None) -> None:
1657
+ """Update the size cell for a job"""
1658
+ try:
1659
+ table = self.query_one("#orphan-table", DataTable)
1660
+ size_text = (
1661
+ value if value is not None else self._size_cache.get(job_id, "-")
1662
+ )
1663
+ table.update_cell(job_id, "size", size_text)
1664
+ except Exception:
1665
+ pass # Table may have changed
1666
+
1667
+ def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
1668
+ """Show job details when a row is selected"""
1669
+ self._update_job_info()
1670
+
1671
+ def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
1672
+ """Show job details when cursor moves"""
1673
+ self._update_job_info()
1674
+
1675
+ def _update_job_info(self) -> None:
1676
+ """Update the job info display"""
1677
+ table = self.query_one("#orphan-table", DataTable)
1678
+ info = self.query_one("#orphan-job-info", Static)
1679
+
1680
+ if table.cursor_row is None:
1681
+ info.update("")
1682
+ return
1683
+
1684
+ row_key = list(table.rows.keys())[table.cursor_row]
1685
+ if row_key:
1686
+ job_id = str(row_key.value)
1687
+ job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
1688
+ if job and job.path:
1689
+ size = self._size_cache.get(job.identifier, "calculating...")
1690
+ info.update(f"Path: {job.path} | Size: {size}")
1691
+ else:
1692
+ info.update("")
1693
+
1694
+ def action_copy_path(self) -> None:
1695
+ """Copy the job folder path to clipboard"""
1696
+ import pyperclip
1697
+
1698
+ table = self.query_one("#orphan-table", DataTable)
1699
+ if table.cursor_row is None:
1700
+ return
1701
+
1702
+ row_key = list(table.rows.keys())[table.cursor_row]
1703
+ if row_key:
1704
+ job_id = str(row_key.value)
1705
+ job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
1706
+ if job and job.path:
1707
+ try:
1708
+ pyperclip.copy(str(job.path))
1709
+ self.notify("Path copied", severity="information")
1710
+ except Exception as e:
1711
+ self.notify(f"Failed to copy: {e}", severity="error")
1712
+
1713
+ def action_delete_selected(self) -> None:
1714
+ """Delete the selected orphan job"""
1715
+ table = self.query_one("#orphan-table", DataTable)
1716
+ if table.cursor_row is None:
1717
+ return
1718
+
1719
+ row_key = list(table.rows.keys())[table.cursor_row]
1720
+ if row_key:
1721
+ job_id = str(row_key.value)
1722
+ job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
1723
+ if job:
1724
+ self._delete_job(job)
1725
+
1726
+ def _delete_job(self, job) -> None:
1727
+ """Delete a single orphan job with confirmation"""
1728
+
1729
+ def handle_delete(confirmed: bool) -> None:
1730
+ if confirmed:
1731
+ success, msg = self.state_provider.delete_job_safely(job)
1732
+ if success:
1733
+ self.notify(msg, severity="information")
1734
+ self.refresh_orphans()
1735
+ else:
1736
+ self.notify(msg, severity="error")
1737
+
1738
+ self.app.push_screen(
1739
+ DeleteConfirmScreen("orphan job", job.identifier),
1740
+ handle_delete,
1741
+ )
1742
+
1743
+ def action_delete_all(self) -> None:
1744
+ """Delete all orphan jobs"""
1745
+ if not self.orphan_jobs:
1746
+ self.notify("No orphan jobs to delete", severity="warning")
1747
+ return
1748
+
1749
+ # Filter out running jobs
1750
+ deletable_jobs = [j for j in self.orphan_jobs if not j.state.running()]
1751
+
1752
+ if not deletable_jobs:
1753
+ self.notify("All orphan jobs are running", severity="warning")
1754
+ return
1755
+
1756
+ def handle_delete_all(confirmed: bool) -> None:
1757
+ if confirmed:
1758
+ deleted = 0
1759
+ for job in deletable_jobs:
1760
+ success, _ = self.state_provider.delete_job_safely(
1761
+ job, cascade_orphans=False
1762
+ )
1763
+ if success:
1764
+ deleted += 1
1765
+
1766
+ # Clean up orphan partials once at the end
1767
+ self.state_provider.cleanup_orphan_partials(perform=True)
1768
+
1769
+ self.notify(f"Deleted {deleted} orphan jobs", severity="information")
1770
+ self.refresh_orphans()
1771
+
1772
+ self.app.push_screen(
1773
+ DeleteConfirmScreen(
1774
+ "all orphan jobs",
1775
+ f"{len(deletable_jobs)} jobs",
1776
+ "This action cannot be undone",
1777
+ ),
1778
+ handle_delete_all,
1779
+ )
1780
+
1781
+ def action_refresh(self) -> None:
1782
+ """Refresh the orphan jobs list"""
1783
+ self.refresh_orphans()
1784
+
1785
+ def action_go_back(self) -> None:
1786
+ """Go back to main screen"""
1787
+ self.dismiss()
1788
+
1789
+
1790
+ class HelpScreen(ModalScreen[None]):
1791
+ """Modal screen showing keyboard shortcuts"""
1792
+
1793
+ BINDINGS = [
1794
+ Binding("escape", "close", "Close"),
1795
+ Binding("?", "close", "Close"),
1796
+ ]
1797
+
1798
+ def compose(self) -> ComposeResult:
1799
+ from textual.containers import VerticalScroll
1800
+
1801
+ help_text = """
1802
+ [bold]Keyboard Shortcuts[/bold]
1803
+
1804
+ [bold cyan]Navigation[/bold cyan]
1805
+ q Quit application
1806
+ Esc Go back / Close dialog
1807
+ r Refresh data
1808
+ ? Show this help
1809
+ j Switch to Jobs tab
1810
+ s Switch to Services tab
1811
+
1812
+ [bold cyan]Experiments[/bold cyan]
1813
+ Enter Select experiment
1814
+ d Delete experiment
1815
+ k Kill all running jobs
1816
+
1817
+ [bold cyan]Jobs[/bold cyan]
1818
+ l View job logs
1819
+ d Delete job
1820
+ k Kill running job
1821
+ / Open search filter
1822
+ c Clear search filter
1823
+ S Sort by status
1824
+ T Sort by task
1825
+ D Sort by date
1826
+ f Copy folder path
1827
+
1828
+ [bold cyan]Services[/bold cyan]
1829
+ s Start service
1830
+ x Stop service
1831
+ u Copy URL
1832
+
1833
+ [bold cyan]Search Filter[/bold cyan]
1834
+ Enter Apply filter
1835
+ Esc Close and clear filter
1836
+
1837
+ [bold cyan]Orphan Jobs[/bold cyan]
1838
+ o Show orphan jobs
1839
+ T Sort by task
1840
+ Z Sort by size
1841
+ d Delete selected
1842
+ D Delete all
1843
+ f Copy folder path
1844
+ """
1845
+ with Vertical(id="help-dialog"):
1846
+ yield Static("Experimaestro Help", id="help-title")
1847
+ with VerticalScroll(id="help-scroll"):
1848
+ yield Static(help_text, id="help-content")
1849
+ yield Button("Close", id="help-close-btn")
1850
+
1851
+ def on_button_pressed(self, event: Button.Pressed) -> None:
1852
+ self.dismiss()
1853
+
1854
+ def action_close(self) -> None:
1855
+ self.dismiss()
1856
+
1857
+
1858
+ class ExperimaestroUI(App):
1859
+ """Textual TUI for monitoring experiments"""
1860
+
1861
+ TITLE = "Experimaestro UI"
1862
+ CSS_PATH = "app.tcss"
1863
+
1864
+ BINDINGS = [
1865
+ Binding("q", "quit", "Quit"),
1866
+ Binding("?", "show_help", "Help"),
1867
+ Binding("escape", "go_back", "Back", show=False),
1868
+ Binding("l", "view_logs", "Logs", show=False),
1869
+ Binding("o", "show_orphans", "Orphans", show=False),
1870
+ Binding("j", "focus_jobs", "Jobs", show=False),
1871
+ Binding("s", "focus_services", "Services", show=False),
1872
+ ]
1873
+
1874
+ def __init__(
1875
+ self,
1876
+ workdir: Optional[Path] = None,
1877
+ watch: bool = True,
1878
+ state_provider: Optional[WorkspaceStateProvider] = None,
1879
+ show_logs: bool = False,
1880
+ ):
1881
+ """Initialize the TUI
1882
+
1883
+ Args:
1884
+ workdir: Workspace directory (required if state_provider not provided)
1885
+ watch: Enable filesystem watching for workspace mode
1886
+ state_provider: Pre-initialized state provider (for active experiments)
1887
+ show_logs: Whether to show the logs tab (for active experiments)
1888
+ """
1889
+ super().__init__()
1890
+ self.workdir = workdir
1891
+ self.watch = watch
1892
+ self.show_logs = show_logs
1893
+ self._listener_registered = False
1894
+
1895
+ # Initialize state provider before compose
1896
+ if state_provider:
1897
+ self.state_provider = state_provider
1898
+ self.owns_provider = False # Don't close external provider
1899
+ self._has_active_experiment = True # External provider = active experiment
1900
+ else:
1901
+ from experimaestro.scheduler.state_provider import WorkspaceStateProvider
1902
+
1903
+ # Get singleton provider instance for this workspace
1904
+ self.state_provider = WorkspaceStateProvider.get_instance(
1905
+ self.workdir,
1906
+ read_only=False,
1907
+ sync_on_start=True,
1908
+ sync_interval_minutes=5,
1909
+ )
1910
+ self.owns_provider = False # Provider is singleton, don't close
1911
+ self._has_active_experiment = False # Just viewing, no active experiment
1912
+
1913
+ def compose(self) -> ComposeResult:
1914
+ """Compose the TUI layout"""
1915
+ yield Header()
1916
+
1917
+ if self.show_logs:
1918
+ # Tabbed layout with logs
1919
+ with TabbedContent(id="main-tabs"):
1920
+ with TabPane("Monitor", id="monitor-tab"):
1921
+ yield from self._compose_monitor_view()
1922
+ with TabPane("Logs", id="logs-tab"):
1923
+ yield CaptureLog(id="logs", auto_scroll=True, wrap=True)
1924
+ else:
1925
+ # Simple layout without logs
1926
+ with Vertical(id="main-container"):
1927
+ yield from self._compose_monitor_view()
1928
+
1929
+ yield Footer()
1930
+
1931
+ def _compose_monitor_view(self):
1932
+ """Compose the monitor view with experiments, jobs/services tabs, and job details"""
1933
+ yield ExperimentsList(self.state_provider)
1934
+ # Tabbed view for jobs and services (hidden initially)
1935
+ with TabbedContent(id="experiment-tabs", classes="hidden"):
1936
+ with TabPane("Jobs", id="jobs-tab"):
1937
+ yield JobsTable(self.state_provider)
1938
+ with TabPane("Services", id="services-tab"):
1939
+ yield ServicesList(self.state_provider)
1940
+ # Job detail view (hidden initially)
1941
+ with Vertical(id="job-detail-container", classes="hidden"):
1942
+ yield JobDetailView(self.state_provider)
1943
+
1944
+ def on_mount(self) -> None:
1945
+ """Initialize the application"""
1946
+ # Resets logging
1947
+ logging.basicConfig(level=logging.INFO, force=True)
1948
+
1949
+ # Get the widgets
1950
+ experiments_list = self.query_one(ExperimentsList)
1951
+ experiments_list.refresh_experiments()
1952
+
1953
+ # Register as listener for state change notifications
1954
+ # The state provider handles its own notification strategy internally
1955
+ if self.state_provider:
1956
+ self.state_provider.add_listener(self._on_state_event)
1957
+ self._listener_registered = True
1958
+ self.log("Registered state listener for notifications")
1959
+
1960
+ def _on_state_event(self, event: StateEvent) -> None:
1961
+ """Handle state change events from the state provider
1962
+
1963
+ This may be called from the state provider's thread or the main thread,
1964
+ so we check before using call_from_thread.
1965
+ """
1966
+ import threading
1967
+
1968
+ if threading.current_thread() is threading.main_thread():
1969
+ # Already in main thread, call directly
1970
+ self._handle_state_event(event)
1971
+ else:
1972
+ # From background thread, use call_from_thread
1973
+ self.call_from_thread(self._handle_state_event, event)
1974
+
1975
+ def _handle_state_event(self, event: StateEvent) -> None:
1976
+ """Process state event on the main thread"""
1977
+ # Use query() instead of query_one() to avoid NoMatches exception
1978
+ # when widgets aren't visible yet
1979
+ jobs_tables = self.query(JobsTable)
1980
+ services_lists = self.query(ServicesList)
1981
+
1982
+ self.log.debug(
1983
+ f"State event {event.event_type.name}, "
1984
+ f"JobsTable found: {len(jobs_tables)}, ServicesList found: {len(services_lists)}"
1985
+ )
1986
+
1987
+ if event.event_type == StateEventType.EXPERIMENT_UPDATED:
1988
+ # Refresh experiments list
1989
+ for exp_list in self.query(ExperimentsList):
1990
+ exp_list.refresh_experiments()
1991
+
1992
+ elif event.event_type == StateEventType.JOB_UPDATED:
1993
+ event_exp_id = event.data.get("experimentId")
1994
+
1995
+ # Refresh jobs table if we're viewing the affected experiment
1996
+ for jobs_table in jobs_tables:
1997
+ if jobs_table.current_experiment == event_exp_id:
1998
+ jobs_table.refresh_jobs()
1999
+
2000
+ # Also refresh job detail if we're viewing the affected job
2001
+ for job_detail_container in self.query("#job-detail-container"):
2002
+ if not job_detail_container.has_class("hidden"):
2003
+ for job_detail_view in self.query(JobDetailView):
2004
+ event_job_id = event.data.get("jobId")
2005
+ if job_detail_view.current_job_id == event_job_id:
2006
+ job_detail_view.refresh_job_detail()
2007
+
2008
+ # Also update the experiment stats in the experiments list
2009
+ for exp_list in self.query(ExperimentsList):
2010
+ exp_list.refresh_experiments()
2011
+
2012
+ elif event.event_type == StateEventType.RUN_UPDATED:
2013
+ # Refresh experiments list to show updated run info
2014
+ for exp_list in self.query(ExperimentsList):
2015
+ exp_list.refresh_experiments()
2016
+
2017
+ elif event.event_type == StateEventType.SERVICE_UPDATED:
2018
+ event_exp_id = event.data.get("experimentId")
2019
+
2020
+ # Refresh services list if we're viewing the affected experiment
2021
+ for services_list in services_lists:
2022
+ if services_list.current_experiment == event_exp_id:
2023
+ services_list.refresh_services()
2024
+
2025
+ def on_experiment_selected(self, message: ExperimentSelected) -> None:
2026
+ """Handle experiment selection - show jobs/services tabs"""
2027
+ self.log(f"Experiment selected: {message.experiment_id}")
2028
+
2029
+ # Set up services list
2030
+ services_list = self.query_one(ServicesList)
2031
+ services_list.set_experiment(message.experiment_id)
2032
+
2033
+ # Set up jobs table
2034
+ jobs_table_widget = self.query_one(JobsTable)
2035
+ jobs_table_widget.set_experiment(message.experiment_id)
2036
+
2037
+ # Show the tabbed content
2038
+ tabs = self.query_one("#experiment-tabs", TabbedContent)
2039
+ tabs.remove_class("hidden")
2040
+
2041
+ # Focus the jobs table
2042
+ jobs_table = self.query_one("#jobs-table", DataTable)
2043
+ jobs_table.focus()
2044
+
2045
+ def on_experiment_deselected(self, message: ExperimentDeselected) -> None:
2046
+ """Handle experiment deselection - hide jobs/services tabs"""
2047
+ # Hide the tabbed content
2048
+ tabs = self.query_one("#experiment-tabs", TabbedContent)
2049
+ tabs.add_class("hidden")
2050
+ # Also hide job detail if visible
2051
+ job_detail_container = self.query_one("#job-detail-container")
2052
+ job_detail_container.add_class("hidden")
2053
+
2054
+ def on_job_selected(self, message: JobSelected) -> None:
2055
+ """Handle job selection - show job detail view"""
2056
+ self.log(f"Job selected: {message.job_id} from {message.experiment_id}")
2057
+
2058
+ # Hide tabs, show job detail
2059
+ tabs = self.query_one("#experiment-tabs", TabbedContent)
2060
+ tabs.add_class("hidden")
2061
+
2062
+ job_detail_container = self.query_one("#job-detail-container")
2063
+ job_detail_container.remove_class("hidden")
2064
+
2065
+ # Set the job to display
2066
+ job_detail_view = self.query_one(JobDetailView)
2067
+ job_detail_view.set_job(message.job_id, message.experiment_id)
2068
+
2069
+ def on_job_deselected(self, message: JobDeselected) -> None:
2070
+ """Handle job deselection - go back to jobs view"""
2071
+ # Hide job detail, show tabs
2072
+ job_detail_container = self.query_one("#job-detail-container")
2073
+ job_detail_container.add_class("hidden")
2074
+
2075
+ tabs = self.query_one("#experiment-tabs", TabbedContent)
2076
+ tabs.remove_class("hidden")
2077
+
2078
+ # Focus the jobs table
2079
+ jobs_table = self.query_one("#jobs-table", DataTable)
2080
+ jobs_table.focus()
2081
+
2082
+ def action_refresh(self) -> None:
2083
+ """Manually refresh the data"""
2084
+ experiments_list = self.query_one(ExperimentsList)
2085
+ jobs_table = self.query_one(JobsTable)
2086
+
2087
+ experiments_list.refresh_experiments()
2088
+ jobs_table.refresh_jobs()
2089
+
2090
+ # Also refresh job detail if visible
2091
+ job_detail_container = self.query_one("#job-detail-container")
2092
+ if not job_detail_container.has_class("hidden"):
2093
+ job_detail_view = self.query_one(JobDetailView)
2094
+ job_detail_view.refresh_job_detail()
2095
+
2096
+ def action_go_back(self) -> None:
2097
+ """Go back one level in the navigation hierarchy"""
2098
+ # Check if job detail is visible -> go back to jobs/services tabs
2099
+ job_detail_container = self.query_one("#job-detail-container")
2100
+ if not job_detail_container.has_class("hidden"):
2101
+ self.post_message(JobDeselected())
2102
+ return
2103
+
2104
+ # Check if experiment tabs visible -> go back to experiments list
2105
+ experiment_tabs = self.query_one("#experiment-tabs", TabbedContent)
2106
+ if not experiment_tabs.has_class("hidden"):
2107
+ experiments_list = self.query_one(ExperimentsList)
2108
+ if experiments_list.collapsed:
2109
+ experiments_list.expand_experiments()
2110
+ experiment_tabs.add_class("hidden")
2111
+ self.post_message(ExperimentDeselected())
2112
+
2113
+ def action_view_logs(self) -> None:
2114
+ """View logs for the current job (if job detail is visible)"""
2115
+ job_detail_container = self.query_one("#job-detail-container")
2116
+ if not job_detail_container.has_class("hidden"):
2117
+ job_detail_view = self.query_one(JobDetailView)
2118
+ job_detail_view.action_view_logs()
2119
+
2120
+ def action_show_orphans(self) -> None:
2121
+ """Show orphan jobs screen"""
2122
+ self.push_screen(OrphanJobsScreen(self.state_provider))
2123
+
2124
+ @work(thread=True, exclusive=True)
2125
+ def _sync_and_view_logs(self, job_path: Path, task_id: str) -> None:
2126
+ """Sync logs from remote and then view them (runs in worker thread)"""
2127
+ try:
2128
+ # Sync the job directory
2129
+ local_path = self.state_provider.sync_path(str(job_path))
2130
+ if not local_path:
2131
+ self.post_message(LogsSyncFailed("Failed to sync logs from remote"))
2132
+ return
2133
+
2134
+ job_path = local_path
2135
+
2136
+ # Log files are named after the last part of the task ID
2137
+ task_name = task_id.split(".")[-1]
2138
+ stdout_path = job_path / f"{task_name}.out"
2139
+ stderr_path = job_path / f"{task_name}.err"
2140
+
2141
+ # Collect existing log files
2142
+ log_files = []
2143
+ if stdout_path.exists():
2144
+ log_files.append(str(stdout_path))
2145
+ if stderr_path.exists():
2146
+ log_files.append(str(stderr_path))
2147
+
2148
+ if not log_files:
2149
+ self.post_message(
2150
+ LogsSyncFailed(f"No log files found: {task_name}.out/.err")
2151
+ )
2152
+ return
2153
+
2154
+ # Signal completion via message
2155
+ job_id = job_path.name
2156
+ self.post_message(LogsSyncComplete(log_files, job_id))
2157
+
2158
+ except Exception as e:
2159
+ self.post_message(LogsSyncFailed(str(e)))
2160
+
2161
+ def on_logs_sync_complete(self, message: LogsSyncComplete) -> None:
2162
+ """Handle successful log sync - show log viewer"""
2163
+ self.push_screen(LogViewerScreen(message.log_files, message.job_id))
2164
+
2165
+ def on_logs_sync_failed(self, message: LogsSyncFailed) -> None:
2166
+ """Handle failed log sync"""
2167
+ self.notify(message.error, severity="warning")
2168
+
2169
+ def on_view_job_logs(self, message: ViewJobLogs) -> None:
2170
+ """Handle request to view job logs - push LogViewerScreen"""
2171
+ job_path = Path(message.job_path)
2172
+
2173
+ # For remote monitoring, sync the job directory first (in worker thread)
2174
+ if self.state_provider.is_remote:
2175
+ self.notify("Syncing logs from remote...", timeout=5)
2176
+ self._sync_and_view_logs(job_path, message.task_id)
2177
+ return
2178
+
2179
+ # Local monitoring - no sync needed
2180
+ task_name = message.task_id.split(".")[-1]
2181
+ stdout_path = job_path / f"{task_name}.out"
2182
+ stderr_path = job_path / f"{task_name}.err"
2183
+
2184
+ # Collect existing log files
2185
+ log_files = []
2186
+ if stdout_path.exists():
2187
+ log_files.append(str(stdout_path))
2188
+ if stderr_path.exists():
2189
+ log_files.append(str(stderr_path))
2190
+
2191
+ if not log_files:
2192
+ self.notify(
2193
+ f"No log files found: {task_name}.out/.err in {job_path}",
2194
+ severity="warning",
2195
+ )
2196
+ return
2197
+
2198
+ # Push the log viewer screen
2199
+ job_id = job_path.name
2200
+ self.push_screen(LogViewerScreen(log_files, job_id))
2201
+
2202
+ def on_view_job_logs_request(self, message: ViewJobLogsRequest) -> None:
2203
+ """Handle log viewing request from jobs table"""
2204
+ job = self.state_provider.get_job(message.job_id, message.experiment_id)
2205
+ if not job or not job.path or not job.task_id:
2206
+ self.notify("Cannot find job logs", severity="warning")
2207
+ return
2208
+ self.post_message(ViewJobLogs(str(job.path), job.task_id))
2209
+
2210
+ def on_delete_job_request(self, message: DeleteJobRequest) -> None:
2211
+ """Handle job deletion request"""
2212
+ job = self.state_provider.get_job(message.job_id, message.experiment_id)
2213
+ if not job:
2214
+ self.notify("Job not found", severity="error")
2215
+ return
2216
+
2217
+ if job.state.running():
2218
+ self.notify("Cannot delete a running job", severity="warning")
2219
+ return
2220
+
2221
+ # Save cursor position to restore after delete
2222
+ jobs_table = self.query_one(JobsTable)
2223
+ table = jobs_table.query_one("#jobs-table", DataTable)
2224
+ cursor_row = table.cursor_row
2225
+
2226
+ def handle_delete_response(confirmed: bool) -> None:
2227
+ if confirmed:
2228
+ success, msg = self.state_provider.delete_job_safely(job)
2229
+ if success:
2230
+ self.notify(msg, severity="information")
2231
+ self.action_refresh()
2232
+ # Move cursor to previous row (or first if was at top)
2233
+ if cursor_row is not None and table.row_count > 0:
2234
+ new_row = min(cursor_row, table.row_count - 1)
2235
+ if new_row > 0 and cursor_row > 0:
2236
+ new_row = cursor_row - 1
2237
+ table.move_cursor(row=new_row)
2238
+ else:
2239
+ self.notify(msg, severity="error")
2240
+
2241
+ self.push_screen(
2242
+ DeleteConfirmScreen("job", job.identifier),
2243
+ handle_delete_response,
2244
+ )
2245
+
2246
+ def on_delete_experiment_request(self, message: DeleteExperimentRequest) -> None:
2247
+ """Handle experiment deletion request"""
2248
+ jobs = self.state_provider.get_jobs(message.experiment_id)
2249
+ running_jobs = [j for j in jobs if j.state.running()]
2250
+
2251
+ if running_jobs:
2252
+ self.notify(
2253
+ f"Cannot delete: {len(running_jobs)} jobs are running",
2254
+ severity="warning",
2255
+ )
2256
+ return
2257
+
2258
+ warning = (
2259
+ f"{len(jobs)} jobs will remain (not deleted by default)" if jobs else None
2260
+ )
2261
+
2262
+ def handle_delete_response(confirmed: bool) -> None:
2263
+ if confirmed:
2264
+ success, msg = self.state_provider.delete_experiment(
2265
+ message.experiment_id, delete_jobs=False
2266
+ )
2267
+ if success:
2268
+ self.notify(msg, severity="information")
2269
+ # Go back to experiments list
2270
+ experiments_list = self.query_one(ExperimentsList)
2271
+ experiments_list.expand_experiments()
2272
+ self.post_message(ExperimentDeselected())
2273
+ self.action_refresh()
2274
+ else:
2275
+ self.notify(msg, severity="error")
2276
+
2277
+ self.push_screen(
2278
+ DeleteConfirmScreen("experiment", message.experiment_id, warning),
2279
+ handle_delete_response,
2280
+ )
2281
+
2282
+ def on_kill_job_request(self, message: KillJobRequest) -> None:
2283
+ """Handle job kill request"""
2284
+ job = self.state_provider.get_job(message.job_id, message.experiment_id)
2285
+ if not job:
2286
+ self.notify("Job not found", severity="error")
2287
+ return
2288
+
2289
+ if not job.state.running():
2290
+ self.notify("Job is not running", severity="warning")
2291
+ return
2292
+
2293
+ def handle_kill_response(confirmed: bool) -> None:
2294
+ if confirmed:
2295
+ success = self.state_provider.kill_job(job, perform=True)
2296
+ if success:
2297
+ self.notify(f"Job {job.identifier} killed", severity="information")
2298
+ self.action_refresh()
2299
+ else:
2300
+ self.notify("Failed to kill job", severity="error")
2301
+
2302
+ self.push_screen(
2303
+ KillConfirmScreen("job", job.identifier),
2304
+ handle_kill_response,
2305
+ )
2306
+
2307
+ def on_kill_experiment_request(self, message: KillExperimentRequest) -> None:
2308
+ """Handle experiment kill request (kill all running jobs)"""
2309
+ jobs = self.state_provider.get_jobs(message.experiment_id)
2310
+ running_jobs = [j for j in jobs if j.state.running()]
2311
+
2312
+ if not running_jobs:
2313
+ self.notify("No running jobs in experiment", severity="warning")
2314
+ return
2315
+
2316
+ def handle_kill_response(confirmed: bool) -> None:
2317
+ if confirmed:
2318
+ killed = 0
2319
+ for job in running_jobs:
2320
+ if self.state_provider.kill_job(job, perform=True):
2321
+ killed += 1
2322
+ self.notify(
2323
+ f"Killed {killed} of {len(running_jobs)} running jobs",
2324
+ severity="information",
2325
+ )
2326
+ self.action_refresh()
2327
+
2328
+ self.push_screen(
2329
+ KillConfirmScreen("experiment", f"{len(running_jobs)} running jobs"),
2330
+ handle_kill_response,
2331
+ )
2332
+
2333
+ def action_focus_jobs(self) -> None:
2334
+ """Switch to the jobs tab"""
2335
+ tabs = self.query_one("#experiment-tabs", TabbedContent)
2336
+ if not tabs.has_class("hidden"):
2337
+ tabs.active = "jobs-tab"
2338
+ jobs_table = self.query_one("#jobs-table", DataTable)
2339
+ jobs_table.focus()
2340
+ else:
2341
+ self.notify("Select an experiment first", severity="warning")
2342
+
2343
+ def action_focus_services(self) -> None:
2344
+ """Switch to the services tab"""
2345
+ tabs = self.query_one("#experiment-tabs", TabbedContent)
2346
+ if not tabs.has_class("hidden"):
2347
+ tabs.active = "services-tab"
2348
+ services_table = self.query_one("#services-table", DataTable)
2349
+ services_table.focus()
2350
+ else:
2351
+ self.notify("Select an experiment first", severity="warning")
2352
+
2353
+ def action_switch_focus(self) -> None:
2354
+ """Switch focus between experiments table and current tab"""
2355
+ focused = self.focused
2356
+ if focused:
2357
+ experiments_table = self.query_one("#experiments-table", DataTable)
2358
+ tabs = self.query_one("#experiment-tabs", TabbedContent)
2359
+
2360
+ if focused == experiments_table and not tabs.has_class("hidden"):
2361
+ # Focus the active tab's table
2362
+ if tabs.active == "services-tab":
2363
+ self.query_one("#services-table", DataTable).focus()
2364
+ else:
2365
+ self.query_one("#jobs-table", DataTable).focus()
2366
+ else:
2367
+ experiments_table.focus()
2368
+
2369
+ def action_quit(self) -> None:
2370
+ """Show quit confirmation dialog"""
2371
+
2372
+ def handle_quit_response(confirmed: bool) -> None:
2373
+ if confirmed:
2374
+ self.exit()
2375
+
2376
+ self.push_screen(
2377
+ QuitConfirmScreen(has_active_experiment=self._has_active_experiment),
2378
+ handle_quit_response,
2379
+ )
2380
+
2381
+ def action_show_help(self) -> None:
2382
+ """Show help screen with keyboard shortcuts"""
2383
+ self.push_screen(HelpScreen())
2384
+
2385
+ def on_unmount(self) -> None:
2386
+ """Clean up when closing"""
2387
+ # Unregister listener
2388
+ if self._listener_registered and self.state_provider:
2389
+ self.state_provider.remove_listener(self._on_state_event)
2390
+ self._listener_registered = False
2391
+ self.log("Unregistered state listener")
2392
+
2393
+ # Only close state provider if we own it (not external/active experiment)
2394
+ if self.state_provider and self.owns_provider:
2395
+ self.state_provider.close()