experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (154) hide show
  1. experimaestro/__init__.py +12 -5
  2. experimaestro/cli/__init__.py +393 -134
  3. experimaestro/cli/filter.py +48 -23
  4. experimaestro/cli/jobs.py +253 -71
  5. experimaestro/cli/refactor.py +1 -2
  6. experimaestro/commandline.py +7 -4
  7. experimaestro/connectors/__init__.py +9 -1
  8. experimaestro/connectors/local.py +43 -3
  9. experimaestro/core/arguments.py +18 -18
  10. experimaestro/core/identifier.py +11 -11
  11. experimaestro/core/objects/config.py +96 -39
  12. experimaestro/core/objects/config_walk.py +3 -3
  13. experimaestro/core/{subparameters.py → partial.py} +16 -16
  14. experimaestro/core/partial_lock.py +394 -0
  15. experimaestro/core/types.py +12 -15
  16. experimaestro/dynamic.py +290 -0
  17. experimaestro/experiments/__init__.py +6 -2
  18. experimaestro/experiments/cli.py +223 -52
  19. experimaestro/experiments/configuration.py +24 -0
  20. experimaestro/generators.py +5 -5
  21. experimaestro/ipc.py +118 -1
  22. experimaestro/launcherfinder/__init__.py +2 -2
  23. experimaestro/launcherfinder/registry.py +6 -7
  24. experimaestro/launcherfinder/specs.py +2 -9
  25. experimaestro/launchers/slurm/__init__.py +2 -2
  26. experimaestro/launchers/slurm/base.py +62 -0
  27. experimaestro/locking.py +957 -1
  28. experimaestro/notifications.py +89 -201
  29. experimaestro/progress.py +63 -366
  30. experimaestro/rpyc.py +0 -2
  31. experimaestro/run.py +29 -2
  32. experimaestro/scheduler/__init__.py +8 -1
  33. experimaestro/scheduler/base.py +650 -53
  34. experimaestro/scheduler/dependencies.py +20 -16
  35. experimaestro/scheduler/experiment.py +764 -169
  36. experimaestro/scheduler/interfaces.py +338 -96
  37. experimaestro/scheduler/jobs.py +58 -20
  38. experimaestro/scheduler/remote/__init__.py +31 -0
  39. experimaestro/scheduler/remote/adaptive_sync.py +265 -0
  40. experimaestro/scheduler/remote/client.py +928 -0
  41. experimaestro/scheduler/remote/protocol.py +282 -0
  42. experimaestro/scheduler/remote/server.py +447 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +186 -35
  45. experimaestro/scheduler/state_provider.py +811 -2157
  46. experimaestro/scheduler/state_status.py +1247 -0
  47. experimaestro/scheduler/transient.py +31 -0
  48. experimaestro/scheduler/workspace.py +1 -1
  49. experimaestro/scheduler/workspace_state_provider.py +1273 -0
  50. experimaestro/scriptbuilder.py +4 -4
  51. experimaestro/settings.py +36 -0
  52. experimaestro/tests/conftest.py +33 -5
  53. experimaestro/tests/connectors/bin/executable.py +1 -1
  54. experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
  55. experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
  56. experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
  57. experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
  58. experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
  59. experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
  60. experimaestro/tests/launchers/bin/test.py +1 -0
  61. experimaestro/tests/launchers/test_slurm.py +9 -9
  62. experimaestro/tests/partial_reschedule.py +46 -0
  63. experimaestro/tests/restart.py +3 -3
  64. experimaestro/tests/restart_main.py +1 -0
  65. experimaestro/tests/scripts/notifyandwait.py +1 -0
  66. experimaestro/tests/task_partial.py +38 -0
  67. experimaestro/tests/task_tokens.py +2 -2
  68. experimaestro/tests/tasks/test_dynamic.py +6 -6
  69. experimaestro/tests/test_dependencies.py +3 -3
  70. experimaestro/tests/test_deprecated.py +15 -15
  71. experimaestro/tests/test_dynamic_locking.py +317 -0
  72. experimaestro/tests/test_environment.py +24 -14
  73. experimaestro/tests/test_experiment.py +171 -36
  74. experimaestro/tests/test_identifier.py +25 -25
  75. experimaestro/tests/test_identifier_stability.py +3 -5
  76. experimaestro/tests/test_multitoken.py +2 -4
  77. experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
  78. experimaestro/tests/test_partial_paths.py +81 -138
  79. experimaestro/tests/test_pre_experiment.py +219 -0
  80. experimaestro/tests/test_progress.py +2 -8
  81. experimaestro/tests/test_remote_state.py +1132 -0
  82. experimaestro/tests/test_stray_jobs.py +261 -0
  83. experimaestro/tests/test_tasks.py +1 -2
  84. experimaestro/tests/test_token_locking.py +52 -67
  85. experimaestro/tests/test_tokens.py +5 -6
  86. experimaestro/tests/test_transient.py +225 -0
  87. experimaestro/tests/test_workspace_state_provider.py +768 -0
  88. experimaestro/tests/token_reschedule.py +1 -3
  89. experimaestro/tests/utils.py +2 -7
  90. experimaestro/tokens.py +227 -372
  91. experimaestro/tools/diff.py +1 -0
  92. experimaestro/tools/documentation.py +4 -5
  93. experimaestro/tools/jobs.py +1 -2
  94. experimaestro/tui/app.py +459 -1895
  95. experimaestro/tui/app.tcss +162 -0
  96. experimaestro/tui/dialogs.py +172 -0
  97. experimaestro/tui/log_viewer.py +253 -3
  98. experimaestro/tui/messages.py +137 -0
  99. experimaestro/tui/utils.py +54 -0
  100. experimaestro/tui/widgets/__init__.py +23 -0
  101. experimaestro/tui/widgets/experiments.py +468 -0
  102. experimaestro/tui/widgets/global_services.py +238 -0
  103. experimaestro/tui/widgets/jobs.py +972 -0
  104. experimaestro/tui/widgets/log.py +156 -0
  105. experimaestro/tui/widgets/orphans.py +363 -0
  106. experimaestro/tui/widgets/runs.py +185 -0
  107. experimaestro/tui/widgets/services.py +314 -0
  108. experimaestro/tui/widgets/stray_jobs.py +528 -0
  109. experimaestro/utils/__init__.py +1 -1
  110. experimaestro/utils/environment.py +105 -22
  111. experimaestro/utils/fswatcher.py +124 -0
  112. experimaestro/utils/jobs.py +1 -2
  113. experimaestro/utils/jupyter.py +1 -2
  114. experimaestro/utils/logging.py +72 -0
  115. experimaestro/version.py +2 -2
  116. experimaestro/webui/__init__.py +9 -0
  117. experimaestro/webui/app.py +117 -0
  118. experimaestro/{server → webui}/data/index.css +66 -11
  119. experimaestro/webui/data/index.css.map +1 -0
  120. experimaestro/{server → webui}/data/index.js +82763 -87217
  121. experimaestro/webui/data/index.js.map +1 -0
  122. experimaestro/webui/routes/__init__.py +5 -0
  123. experimaestro/webui/routes/auth.py +53 -0
  124. experimaestro/webui/routes/proxy.py +117 -0
  125. experimaestro/webui/server.py +200 -0
  126. experimaestro/webui/state_bridge.py +152 -0
  127. experimaestro/webui/websocket.py +413 -0
  128. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
  129. experimaestro-2.0.0b17.dist-info/RECORD +219 -0
  130. experimaestro/cli/progress.py +0 -269
  131. experimaestro/scheduler/state.py +0 -75
  132. experimaestro/scheduler/state_db.py +0 -388
  133. experimaestro/scheduler/state_sync.py +0 -834
  134. experimaestro/server/__init__.py +0 -467
  135. experimaestro/server/data/index.css.map +0 -1
  136. experimaestro/server/data/index.js.map +0 -1
  137. experimaestro/tests/test_cli_jobs.py +0 -615
  138. experimaestro/tests/test_file_progress.py +0 -425
  139. experimaestro/tests/test_file_progress_integration.py +0 -477
  140. experimaestro/tests/test_state_db.py +0 -434
  141. experimaestro-2.0.0b4.dist-info/RECORD +0 -181
  142. /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
  143. /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
  144. /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
  145. /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
  146. /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
  147. /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
  148. /experimaestro/{server → webui}/data/favicon.ico +0 -0
  149. /experimaestro/{server → webui}/data/index.html +0 -0
  150. /experimaestro/{server → webui}/data/login.html +0 -0
  151. /experimaestro/{server → webui}/data/manifest.json +0 -0
  152. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
  153. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
  154. {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
experimaestro/tui/app.py CHANGED
@@ -4,1823 +4,62 @@ import logging
4
4
  from pathlib import Path
5
5
  from typing import Optional
6
6
  from textual.app import App, ComposeResult
7
- from textual.containers import Container, Horizontal, Vertical
7
+ from textual.containers import Vertical
8
8
  from textual.widgets import (
9
9
  Header,
10
10
  Footer,
11
11
  DataTable,
12
- Label,
13
12
  TabbedContent,
14
13
  TabPane,
15
- RichLog,
16
- Button,
17
- Static,
18
- Input,
19
14
  )
20
- from textual.widget import Widget
21
- from textual.reactive import reactive
22
15
  from textual.binding import Binding
23
- from textual.message import Message
24
- from textual.screen import ModalScreen, Screen
25
- from textual import events
26
- from rich.text import Text
27
- from experimaestro.scheduler.state_provider import (
28
- WorkspaceStateProvider,
29
- StateEvent,
30
- StateEventType,
16
+
17
+ from experimaestro.scheduler.state_provider import StateProvider
18
+ from experimaestro.scheduler.state_status import (
19
+ EventBase,
20
+ ExperimentUpdatedEvent,
21
+ RunUpdatedEvent,
22
+ JobStateChangedEvent,
23
+ JobProgressEvent,
24
+ JobSubmittedEvent,
25
+ ServiceAddedEvent,
26
+ ServiceStateChangedEvent,
31
27
  )
32
28
  from experimaestro.tui.log_viewer import LogViewerScreen
33
-
34
-
35
- def format_duration(seconds: float) -> str:
36
- """Format duration in seconds to human-readable string"""
37
- if seconds < 0:
38
- return "-"
39
- seconds = int(seconds)
40
- if seconds < 60:
41
- return f"{seconds}s"
42
- elif seconds < 3600:
43
- return f"{seconds // 60}m {seconds % 60}s"
44
- elif seconds < 86400:
45
- return f"{seconds // 3600}h {(seconds % 3600) // 60}m"
46
- else:
47
- return f"{seconds // 86400}d {(seconds % 86400) // 3600}h"
48
-
49
-
50
- class QuitConfirmScreen(ModalScreen[bool]):
51
- """Modal screen for quit confirmation"""
52
-
53
- def __init__(self, has_active_experiment: bool = False):
54
- super().__init__()
55
- self.has_active_experiment = has_active_experiment
56
-
57
- def compose(self) -> ComposeResult:
58
- with Vertical(id="quit-dialog"):
59
- yield Static("Quit Experimaestro?", id="quit-title")
60
-
61
- if self.has_active_experiment:
62
- yield Static(
63
- "⚠️ The experiment is still in progress.\n"
64
- "Quitting will prevent new jobs from being launched.",
65
- id="quit-warning",
66
- )
67
- else:
68
- yield Static("Are you sure you want to quit?", id="quit-message")
69
-
70
- with Horizontal(id="quit-buttons"):
71
- yield Button("Quit", variant="error", id="quit-yes")
72
- yield Button("Cancel", variant="primary", id="quit-no")
73
-
74
- def on_button_pressed(self, event: Button.Pressed) -> None:
75
- if event.button.id == "quit-yes":
76
- self.dismiss(True)
77
- else:
78
- self.dismiss(False)
79
-
80
-
81
- class DeleteConfirmScreen(ModalScreen[bool]):
82
- """Modal screen for delete confirmation"""
83
-
84
- def __init__(
85
- self, item_type: str, item_name: str, warning: Optional[str] = None
86
- ) -> None:
87
- super().__init__()
88
- self.item_type = item_type
89
- self.item_name = item_name
90
- self.warning = warning
91
-
92
- def compose(self) -> ComposeResult:
93
- with Vertical(id="delete-dialog"):
94
- yield Static(f"Delete {self.item_type}?", id="delete-title")
95
- yield Static(
96
- f"This will permanently delete: {self.item_name}", id="delete-message"
97
- )
98
-
99
- if self.warning:
100
- yield Static(f"Warning: {self.warning}", id="delete-warning")
101
-
102
- with Horizontal(id="delete-buttons"):
103
- yield Button("Delete", variant="error", id="delete-yes")
104
- yield Button("Cancel", variant="primary", id="delete-no")
105
-
106
- def on_mount(self) -> None:
107
- """Focus cancel button by default"""
108
- self.query_one("#delete-no", Button).focus()
109
-
110
- def on_button_pressed(self, event: Button.Pressed) -> None:
111
- if event.button.id == "delete-yes":
112
- self.dismiss(True)
113
- else:
114
- self.dismiss(False)
115
-
116
-
117
- class KillConfirmScreen(ModalScreen[bool]):
118
- """Modal screen for kill confirmation"""
119
-
120
- def __init__(self, item_type: str, item_name: str) -> None:
121
- super().__init__()
122
- self.item_type = item_type
123
- self.item_name = item_name
124
-
125
- def compose(self) -> ComposeResult:
126
- with Vertical(id="kill-dialog"):
127
- yield Static(f"Kill {self.item_type}?", id="kill-title")
128
- yield Static(f"This will terminate: {self.item_name}", id="kill-message")
129
-
130
- with Horizontal(id="kill-buttons"):
131
- yield Button("Kill", variant="warning", id="kill-yes")
132
- yield Button("Cancel", variant="primary", id="kill-no")
133
-
134
- def on_mount(self) -> None:
135
- """Focus cancel button by default"""
136
- self.query_one("#kill-no", Button).focus()
137
-
138
- def on_button_pressed(self, event: Button.Pressed) -> None:
139
- if event.button.id == "kill-yes":
140
- self.dismiss(True)
141
- else:
142
- self.dismiss(False)
143
-
144
-
145
- def get_status_icon(status: str, failure_reason=None):
146
- """Get status icon for a job state.
147
-
148
- Args:
149
- status: Job state name (e.g., "done", "error", "running")
150
- failure_reason: Optional JobFailureStatus enum for error states
151
-
152
- Returns:
153
- Status icon string
154
- """
155
- if status == "done":
156
- return "✓"
157
- elif status == "error":
158
- # Show different icons for different failure types
159
- if failure_reason is not None:
160
- from experimaestro.scheduler.interfaces import JobFailureStatus
161
-
162
- if failure_reason == JobFailureStatus.DEPENDENCY:
163
- return "🔗" # Dependency failed
164
- elif failure_reason == JobFailureStatus.TIMEOUT:
165
- return "⏱" # Timeout
166
- elif failure_reason == JobFailureStatus.MEMORY:
167
- return "💾" # Memory issue
168
- # FAILED or unknown - use default error icon
169
- return "❌"
170
- elif status == "running":
171
- return "▶"
172
- elif status == "waiting":
173
- return "⌛" # Waiting for dependencies
174
- else:
175
- # phantom, unscheduled or unknown
176
- return "👻"
177
-
178
-
179
- class CaptureLog(RichLog):
180
- """Custom RichLog widget that captures print statements with log highlighting"""
181
-
182
- def on_mount(self) -> None:
183
- """Enable print capturing when widget is mounted"""
184
- self.begin_capture_print()
185
-
186
- def on_unmount(self) -> None:
187
- """Stop print capturing when widget is unmounted"""
188
- self.end_capture_print()
189
-
190
- def _format_log_line(self, text: str) -> Text:
191
- """Format a log line with appropriate styling based on log level"""
192
- result = Text()
193
-
194
- # Check for common log level patterns
195
- if text.startswith("ERROR:") or ":ERROR:" in text:
196
- result.append(text, style="bold red")
197
- elif text.startswith("WARNING:") or ":WARNING:" in text:
198
- result.append(text, style="yellow")
199
- elif text.startswith("INFO:") or ":INFO:" in text:
200
- result.append(text, style="green")
201
- elif text.startswith("DEBUG:") or ":DEBUG:" in text:
202
- result.append(text, style="dim")
203
- elif text.startswith("CRITICAL:") or ":CRITICAL:" in text:
204
- result.append(text, style="bold white on red")
205
- else:
206
- result.append(text)
207
-
208
- return result
209
-
210
- def on_print(self, event: events.Print) -> None:
211
- """Handle print events from captured stdout/stderr"""
212
- if text := event.text.strip():
213
- self.write(self._format_log_line(text))
214
-
215
-
216
- class ExperimentsList(Widget):
217
- """Widget displaying list of experiments"""
218
-
219
- BINDINGS = [
220
- Binding("d", "delete_experiment", "Delete", show=False),
221
- Binding("k", "kill_experiment", "Kill", show=False),
222
- ]
223
-
224
- current_experiment: reactive[Optional[str]] = reactive(None)
225
- collapsed: reactive[bool] = reactive(False)
226
-
227
- def __init__(self, state_provider: WorkspaceStateProvider) -> None:
228
- super().__init__()
229
- self.state_provider = state_provider
230
- self.experiments = []
231
-
232
- def _get_selected_experiment_id(self) -> Optional[str]:
233
- """Get the experiment ID from the currently selected row"""
234
- table = self.query_one("#experiments-table", DataTable)
235
- if table.cursor_row is None:
236
- return None
237
- row_key = list(table.rows.keys())[table.cursor_row]
238
- if row_key:
239
- return str(row_key.value)
240
- return None
241
-
242
- def action_delete_experiment(self) -> None:
243
- """Request to delete the selected experiment"""
244
- exp_id = self._get_selected_experiment_id()
245
- if exp_id:
246
- self.post_message(DeleteExperimentRequest(exp_id))
247
-
248
- def action_kill_experiment(self) -> None:
249
- """Request to kill all running jobs in the selected experiment"""
250
- exp_id = self._get_selected_experiment_id()
251
- if exp_id:
252
- self.post_message(KillExperimentRequest(exp_id))
253
-
254
- def compose(self) -> ComposeResult:
255
- # Collapsed header (hidden initially)
256
- with Horizontal(id="collapsed-header", classes="hidden"):
257
- yield Label("", id="collapsed-experiment-info")
258
-
259
- # Full experiments table
260
- with Container(id="experiments-table-container"):
261
- yield Label("Experiments", classes="section-title")
262
- yield DataTable(id="experiments-table", cursor_type="row")
263
-
264
- def on_mount(self) -> None:
265
- """Initialize the experiments table"""
266
- table = self.query_one("#experiments-table", DataTable)
267
- table.add_column("ID", key="id")
268
- table.add_column("Jobs", key="jobs")
269
- table.add_column("Status", key="status")
270
- table.add_column("Started", key="started")
271
- table.add_column("Duration", key="duration")
272
- self.refresh_experiments()
273
-
274
- # If there's only one experiment, automatically select it
275
- if len(self.experiments) == 1:
276
- exp_id = self.experiments[0].experiment_id
277
- self.current_experiment = exp_id
278
- self.collapse_to_experiment(exp_id)
279
- self.post_message(ExperimentSelected(exp_id))
280
-
281
- def refresh_experiments(self) -> None:
282
- """Refresh the experiments list from state provider"""
283
- table = self.query_one("#experiments-table", DataTable)
284
-
285
- try:
286
- self.experiments = self.state_provider.get_experiments()
287
- self.log.debug(
288
- f"Refreshing experiments: found {len(self.experiments)} experiments"
289
- )
290
- except Exception as e:
291
- self.log.error(f"ERROR refreshing experiments: {e}")
292
- import traceback
293
-
294
- self.log.error(traceback.format_exc())
295
- self.experiments = []
296
- return
297
-
298
- # Get existing row keys
299
- existing_keys = set(table.rows.keys())
300
- current_exp_ids = set()
301
-
302
- from datetime import datetime
303
- import time as time_module
304
-
305
- for exp in self.experiments:
306
- exp_id = exp.experiment_id
307
- current_exp_ids.add(exp_id)
308
- total = exp.total_jobs
309
- finished = exp.finished_jobs
310
- failed = exp.failed_jobs
311
-
312
- # Determine status
313
- if failed > 0:
314
- status = f"❌ {failed} failed"
315
- elif finished == total and total > 0:
316
- status = "✓ Done"
317
- elif finished < total:
318
- status = f"▶ {finished}/{total}"
319
- else:
320
- status = "Empty"
321
-
322
- jobs_text = f"{finished}/{total}"
323
-
324
- # Format started time
325
- if exp.started_at:
326
- started = datetime.fromtimestamp(exp.started_at).strftime(
327
- "%Y-%m-%d %H:%M"
328
- )
329
- else:
330
- started = "-"
331
-
332
- # Calculate duration
333
- duration = "-"
334
- if exp.started_at:
335
- if exp.ended_at:
336
- elapsed = exp.ended_at - exp.started_at
337
- else:
338
- # Still running - show elapsed time
339
- elapsed = time_module.time() - exp.started_at
340
- # Format duration
341
- duration = format_duration(elapsed)
342
-
343
- # Update existing row or add new one
344
- if exp_id in existing_keys:
345
- table.update_cell(exp_id, "id", exp_id, update_width=True)
346
- table.update_cell(exp_id, "jobs", jobs_text, update_width=True)
347
- table.update_cell(exp_id, "status", status, update_width=True)
348
- table.update_cell(exp_id, "started", started, update_width=True)
349
- table.update_cell(exp_id, "duration", duration, update_width=True)
350
- else:
351
- table.add_row(exp_id, jobs_text, status, started, duration, key=exp_id)
352
-
353
- # Remove rows for experiments that no longer exist
354
- for old_exp_id in existing_keys - current_exp_ids:
355
- table.remove_row(old_exp_id)
356
-
357
- # Update collapsed header if viewing an experiment
358
- if self.collapsed and self.current_experiment:
359
- self._update_collapsed_header(self.current_experiment)
360
-
361
- def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
362
- """Handle experiment selection"""
363
- if event.row_key:
364
- self.current_experiment = str(event.row_key.value)
365
- self.collapse_to_experiment(self.current_experiment)
366
- self.post_message(ExperimentSelected(str(event.row_key.value)))
367
-
368
- def _update_collapsed_header(self, experiment_id: str) -> None:
369
- """Update the collapsed experiment header with current stats"""
370
- exp_info = next(
371
- (exp for exp in self.experiments if exp.experiment_id == experiment_id),
372
- None,
373
- )
374
- if not exp_info:
375
- return
376
-
377
- total = exp_info.total_jobs
378
- finished = exp_info.finished_jobs
379
- failed = exp_info.failed_jobs
380
-
381
- if failed > 0:
382
- status = f"❌ {failed} failed"
383
- elif finished == total and total > 0:
384
- status = "✓ Done"
385
- elif finished < total:
386
- status = f"▶ {finished}/{total}"
387
- else:
388
- status = "Empty"
389
-
390
- collapsed_label = self.query_one("#collapsed-experiment-info", Label)
391
- collapsed_label.update(f"📊 {experiment_id} - {status} (click to go back)")
392
-
393
- def collapse_to_experiment(self, experiment_id: str) -> None:
394
- """Collapse the experiments list to show only the selected experiment"""
395
- self._update_collapsed_header(experiment_id)
396
-
397
- # Hide table, show collapsed header
398
- self.query_one("#experiments-table-container").add_class("hidden")
399
- self.query_one("#collapsed-header").remove_class("hidden")
400
- self.collapsed = True
401
-
402
- def expand_experiments(self) -> None:
403
- """Expand back to full experiments list"""
404
- # Show table, hide collapsed header
405
- self.query_one("#collapsed-header").add_class("hidden")
406
- self.query_one("#experiments-table-container").remove_class("hidden")
407
- self.collapsed = False
408
- self.current_experiment = None
409
-
410
- # Focus the experiments table
411
- table = self.query_one("#experiments-table", DataTable)
412
- table.focus()
413
-
414
- def on_click(self) -> None:
415
- """Handle clicks on the widget"""
416
- if self.collapsed:
417
- self.expand_experiments()
418
- self.post_message(ExperimentDeselected())
419
-
420
-
421
- class ExperimentSelected(Message):
422
- """Message sent when an experiment is selected"""
423
-
424
- def __init__(self, experiment_id: str) -> None:
425
- super().__init__()
426
- self.experiment_id = experiment_id
427
-
428
-
429
- class ExperimentDeselected(Message):
430
- """Message sent when an experiment is deselected"""
431
-
432
- pass
433
-
434
-
435
- class JobSelected(Message):
436
- """Message sent when a job is selected"""
437
-
438
- def __init__(self, job_id: str, experiment_id: str) -> None:
439
- super().__init__()
440
- self.job_id = job_id
441
- self.experiment_id = experiment_id
442
-
443
-
444
- class JobDeselected(Message):
445
- """Message sent when returning from job detail view"""
446
-
447
- pass
448
-
449
-
450
- class ViewJobLogs(Message):
451
- """Message sent when user wants to view job logs"""
452
-
453
- def __init__(self, job_path: str, task_id: str) -> None:
454
- super().__init__()
455
- self.job_path = job_path
456
- self.task_id = task_id
457
-
458
-
459
- class ViewJobLogsRequest(Message):
460
- """Message sent when user requests to view logs from jobs table"""
461
-
462
- def __init__(self, job_id: str, experiment_id: str) -> None:
463
- super().__init__()
464
- self.job_id = job_id
465
- self.experiment_id = experiment_id
466
-
467
-
468
- class DeleteJobRequest(Message):
469
- """Message sent when user requests to delete a job"""
470
-
471
- def __init__(self, job_id: str, experiment_id: str) -> None:
472
- super().__init__()
473
- self.job_id = job_id
474
- self.experiment_id = experiment_id
475
-
476
-
477
- class DeleteExperimentRequest(Message):
478
- """Message sent when user requests to delete an experiment"""
479
-
480
- def __init__(self, experiment_id: str) -> None:
481
- super().__init__()
482
- self.experiment_id = experiment_id
483
-
484
-
485
- class KillJobRequest(Message):
486
- """Message sent when user requests to kill a running job"""
487
-
488
- def __init__(self, job_id: str, experiment_id: str) -> None:
489
- super().__init__()
490
- self.job_id = job_id
491
- self.experiment_id = experiment_id
492
-
493
-
494
- class KillExperimentRequest(Message):
495
- """Message sent when user requests to kill all running jobs in an experiment"""
496
-
497
- def __init__(self, experiment_id: str) -> None:
498
- super().__init__()
499
- self.experiment_id = experiment_id
500
-
501
-
502
- class FilterChanged(Message):
503
- """Message sent when search filter changes"""
504
-
505
- def __init__(self, filter_fn) -> None:
506
- super().__init__()
507
- self.filter_fn = filter_fn
508
-
509
-
510
- class ServicesList(Vertical):
511
- """Widget displaying services for selected experiment
512
-
513
- Services are retrieved from WorkspaceStateProvider.get_services() which
514
- abstracts away whether services are live (from scheduler) or recreated
515
- from database state_dict. The UI treats all services uniformly.
516
- """
517
-
518
- BINDINGS = [
519
- Binding("s", "start_service", "Start"),
520
- Binding("x", "stop_service", "Stop"),
521
- Binding("u", "copy_url", "Copy URL", show=False),
522
- ]
523
-
524
- # State icons for display
525
- STATE_ICONS = {
526
- "STOPPED": "⏹",
527
- "STARTING": "⏳",
528
- "RUNNING": "▶",
529
- "STOPPING": "⏳",
530
- }
531
-
532
- def __init__(self, state_provider: WorkspaceStateProvider) -> None:
533
- super().__init__()
534
- self.state_provider = state_provider
535
- self.current_experiment: Optional[str] = None
536
- self._services: dict = {} # service_id -> Service object
537
-
538
- def compose(self) -> ComposeResult:
539
- yield DataTable(id="services-table", cursor_type="row")
540
-
541
- def on_mount(self) -> None:
542
- """Set up the services table"""
543
- table = self.query_one("#services-table", DataTable)
544
- table.add_columns("ID", "Description", "State", "URL")
545
- table.cursor_type = "row"
546
-
547
- def set_experiment(self, experiment_id: Optional[str]) -> None:
548
- """Set the current experiment and refresh services"""
549
- self.current_experiment = experiment_id
550
- self.refresh_services()
551
-
552
- def refresh_services(self) -> None:
553
- """Refresh the services list from state provider"""
554
- table = self.query_one("#services-table", DataTable)
555
- table.clear()
556
- self._services = {}
557
-
558
- if not self.current_experiment:
559
- return
560
-
561
- # Get services from state provider (handles live vs DB automatically)
562
- services = self.state_provider.get_services(self.current_experiment)
563
-
564
- for service in services:
565
- service_id = service.id
566
- self._services[service_id] = service
567
-
568
- state_name = service.state.name if hasattr(service, "state") else "UNKNOWN"
569
- state_icon = self.STATE_ICONS.get(state_name, "?")
570
- url = getattr(service, "url", None) or "-"
571
- description = (
572
- service.description() if hasattr(service, "description") else ""
573
- )
574
-
575
- table.add_row(
576
- service_id,
577
- description,
578
- f"{state_icon} {state_name}",
579
- url,
580
- key=service_id,
581
- )
582
-
583
- def _get_selected_service(self):
584
- """Get the currently selected Service object"""
585
- table = self.query_one("#services-table", DataTable)
586
- if table.cursor_row is not None and table.row_count > 0:
587
- row_key = list(table.rows.keys())[table.cursor_row]
588
- if row_key:
589
- service_id = str(row_key.value)
590
- return self._services.get(service_id)
591
- return None
592
-
593
- def action_start_service(self) -> None:
594
- """Start the selected service"""
595
- service = self._get_selected_service()
596
- if not service:
597
- return
598
-
599
- try:
600
- if hasattr(service, "get_url"):
601
- url = service.get_url()
602
- self.notify(f"Service started: {url}", severity="information")
603
- else:
604
- self.notify("Service does not support starting", severity="warning")
605
- self.refresh_services()
606
- except Exception as e:
607
- self.notify(f"Failed to start service: {e}", severity="error")
608
-
609
- def action_stop_service(self) -> None:
610
- """Stop the selected service"""
611
- service = self._get_selected_service()
612
- if not service:
613
- return
614
-
615
- from experimaestro.scheduler.services import ServiceState
616
-
617
- if service.state == ServiceState.STOPPED:
618
- self.notify("Service is not running", severity="warning")
619
- return
620
-
621
- try:
622
- if hasattr(service, "stop"):
623
- service.stop()
624
- self.notify(f"Service stopped: {service.id}", severity="information")
625
- else:
626
- self.notify("Service does not support stopping", severity="warning")
627
- self.refresh_services()
628
- except Exception as e:
629
- self.notify(f"Failed to stop service: {e}", severity="error")
630
-
631
- def action_copy_url(self) -> None:
632
- """Copy the service URL to clipboard"""
633
- service = self._get_selected_service()
634
- if not service:
635
- return
636
-
637
- url = getattr(service, "url", None)
638
- if url:
639
- try:
640
- import pyperclip
641
-
642
- pyperclip.copy(url)
643
- self.notify(f"URL copied: {url}", severity="information")
644
- except Exception as e:
645
- self.notify(f"Failed to copy: {e}", severity="error")
646
- else:
647
- self.notify("Start the service first to get URL", severity="warning")
648
-
649
-
650
- class JobDetailView(Widget):
651
- """Widget displaying detailed job information"""
652
-
653
- BINDINGS = [
654
- Binding("l", "view_logs", "View Logs", priority=True),
655
- ]
656
-
657
- def __init__(self, state_provider: WorkspaceStateProvider) -> None:
658
- super().__init__()
659
- self.state_provider = state_provider
660
- self.current_job_id: Optional[str] = None
661
- self.current_experiment_id: Optional[str] = None
662
- self.job_data: Optional[dict] = None
663
-
664
- def compose(self) -> ComposeResult:
665
- yield Label("Job Details", classes="section-title")
666
- with Vertical(id="job-detail-content"):
667
- yield Label("", id="job-id-label")
668
- yield Label("", id="job-task-label")
669
- yield Label("", id="job-status-label")
670
- yield Label("", id="job-path-label")
671
- yield Label("", id="job-times-label")
672
- yield Label("Tags:", classes="subsection-title")
673
- yield Label("", id="job-tags-label")
674
- yield Label("Progress:", classes="subsection-title")
675
- yield Label("", id="job-progress-label")
676
- yield Label("", id="job-logs-hint")
677
-
678
- def action_view_logs(self) -> None:
679
- """View job logs with toolong"""
680
- if self.job_data and self.job_data.path and self.job_data.task_id:
681
- self.post_message(
682
- ViewJobLogs(str(self.job_data.path), self.job_data.task_id)
683
- )
684
-
685
- def set_job(self, job_id: str, experiment_id: str) -> None:
686
- """Set the job to display"""
687
- self.current_job_id = job_id
688
- self.current_experiment_id = experiment_id
689
- self.refresh_job_detail()
690
-
691
- def refresh_job_detail(self) -> None:
692
- """Refresh job details from state provider"""
693
- if not self.current_job_id or not self.current_experiment_id:
694
- return
695
-
696
- job = self.state_provider.get_job(
697
- self.current_job_id, self.current_experiment_id
698
- )
699
- if not job:
700
- self.log(f"Job not found: {self.current_job_id}")
701
- return
702
-
703
- self.job_data = job
704
-
705
- # Update labels
706
- self.query_one("#job-id-label", Label).update(f"Job ID: {job.identifier}")
707
- self.query_one("#job-task-label", Label).update(f"Task: {job.task_id}")
708
-
709
- # Format status with icon and name
710
- status_name = job.state.name if job.state else "unknown"
711
- failure_reason = getattr(job, "failure_reason", None)
712
- status_icon = get_status_icon(status_name, failure_reason)
713
- status_text = f"{status_icon} {status_name}"
714
- if failure_reason:
715
- status_text += f" ({failure_reason.name})"
716
-
717
- self.query_one("#job-status-label", Label).update(f"Status: {status_text}")
718
-
719
- # Path (from locator)
720
- locator = job.locator or "-"
721
- self.query_one("#job-path-label", Label).update(f"Locator: {locator}")
722
-
723
- # Times - format timestamps
724
- from datetime import datetime
725
- import time as time_module
726
-
727
- def format_time(ts):
728
- if ts:
729
- return datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
730
- return "-"
731
-
732
- submitted = format_time(job.submittime)
733
- start = format_time(job.starttime)
734
- end = format_time(job.endtime)
735
-
736
- # Calculate duration
737
- duration = "-"
738
- if job.starttime:
739
- if job.endtime:
740
- duration = format_duration(job.endtime - job.starttime)
741
- else:
742
- duration = (
743
- format_duration(time_module.time() - job.starttime) + " (running)"
744
- )
745
-
746
- times_text = f"Submitted: {submitted} | Start: {start} | End: {end} | Duration: {duration}"
747
- self.query_one("#job-times-label", Label).update(times_text)
748
-
749
- # Tags - job.tags is now a dict
750
- tags = job.tags
751
- if tags:
752
- tags_text = ", ".join(f"{k}={v}" for k, v in tags.items())
753
- else:
754
- tags_text = "(no tags)"
755
- self.query_one("#job-tags-label", Label).update(tags_text)
756
-
757
- # Progress
758
- progress_list = job.progress or []
759
- if progress_list:
760
- progress_lines = []
761
- for p in progress_list:
762
- level = p.get("level", 0)
763
- pct = p.get("progress", 0) * 100
764
- desc = p.get("desc", "")
765
- indent = " " * level
766
- progress_lines.append(f"{indent}{pct:.1f}% {desc}")
767
- progress_text = "\n".join(progress_lines) if progress_lines else "-"
768
- else:
769
- progress_text = "-"
770
- self.query_one("#job-progress-label", Label).update(progress_text)
771
-
772
- # Log files hint - log files are named after the last part of the task ID
773
- job_path = job.path
774
- task_id = job.task_id
775
- if job_path and task_id:
776
- # Extract the last component of the task ID (e.g., "evaluate" from "mnist_xp.learn.evaluate")
777
- task_name = task_id.split(".")[-1]
778
- stdout_path = job_path / f"{task_name}.out"
779
- stderr_path = job_path / f"{task_name}.err"
780
- logs_exist = stdout_path.exists() or stderr_path.exists()
781
- if logs_exist:
782
- self.query_one("#job-logs-hint", Label).update(
783
- "[bold cyan]Press 'l' to view logs[/bold cyan]"
784
- )
785
- else:
786
- self.query_one("#job-logs-hint", Label).update("(no log files found)")
787
- else:
788
- self.query_one("#job-logs-hint", Label).update("")
789
-
790
-
791
- class SearchBar(Widget):
792
- """Search bar widget with filter hints for filtering jobs"""
793
-
794
- visible: reactive[bool] = reactive(False)
795
- _keep_filter: bool = False # Flag to keep filter when hiding
796
- _query_valid: bool = False # Track if current query is valid
797
-
798
- def __init__(self) -> None:
799
- super().__init__()
800
- self.filter_fn = None
801
- self.active_query = "" # Store the active query text
802
-
803
- def compose(self) -> ComposeResult:
804
- # Active filter indicator (shown when filter active but bar hidden)
805
- yield Static("", id="active-filter")
806
- # Search input container
807
- with Vertical(id="search-container"):
808
- yield Input(
809
- placeholder="Filter: @state = 'done', @name ~ 'pattern', tag = 'value'",
810
- id="search-input",
811
- )
812
- yield Static(
813
- "Syntax: @state = 'done' | @name ~ 'regex' | tag = 'value' | and/or",
814
- id="search-hints",
815
- )
816
- yield Static("", id="search-error")
817
-
818
- def on_mount(self) -> None:
819
- """Initialize visibility state"""
820
- # Start with everything hidden
821
- self.display = False
822
- self.query_one("#search-container").display = False
823
- self.query_one("#active-filter").display = False
824
- self.query_one("#search-error").display = False
825
-
826
- def watch_visible(self, visible: bool) -> None:
827
- """Show/hide search bar"""
828
- search_container = self.query_one("#search-container")
829
- active_filter = self.query_one("#active-filter")
830
- error_widget = self.query_one("#search-error")
831
-
832
- if visible:
833
- self.display = True
834
- search_container.display = True
835
- active_filter.display = False
836
- self.query_one("#search-input", Input).focus()
837
- else:
838
- if not self._keep_filter:
839
- self.query_one("#search-input", Input).value = ""
840
- self.filter_fn = None
841
- self.active_query = ""
842
- self._query_valid = False
843
- self._keep_filter = False
844
-
845
- # Show/hide based on whether filter is active
846
- if self.filter_fn is not None:
847
- # Filter active - show indicator, hide input
848
- self.display = True
849
- search_container.display = False
850
- error_widget.display = False
851
- active_filter.update(
852
- f"Filter: {self.active_query} (/ to edit, c to clear)"
853
- )
854
- active_filter.display = True
855
- else:
856
- # No filter - hide everything including this widget
857
- self.display = False
858
- search_container.display = False
859
- active_filter.display = False
860
- error_widget.display = False
861
-
862
- def on_input_changed(self, event: Input.Changed) -> None:
863
- """Parse filter expression when input changes"""
864
- query = event.value.strip()
865
- input_widget = self.query_one("#search-input", Input)
866
- error_widget = self.query_one("#search-error", Static)
867
-
868
- if not query:
869
- self.filter_fn = None
870
- self._query_valid = False
871
- self.post_message(FilterChanged(None))
872
- input_widget.remove_class("error")
873
- input_widget.remove_class("valid")
874
- error_widget.display = False
875
- return
876
-
877
- try:
878
- from experimaestro.cli.filter import createFilter
879
-
880
- self.filter_fn = createFilter(query)
881
- self._query_valid = True
882
- self.active_query = query
883
- self.post_message(FilterChanged(self.filter_fn))
884
- input_widget.remove_class("error")
885
- input_widget.add_class("valid")
886
- error_widget.display = False
887
- except Exception as e:
888
- self.filter_fn = None
889
- self._query_valid = False
890
- self.post_message(FilterChanged(None))
891
- input_widget.remove_class("valid")
892
- input_widget.add_class("error")
893
- error_widget.update(f"Invalid query: {str(e)[:50]}")
894
- error_widget.display = True
895
-
896
- def on_input_submitted(self, event: Input.Submitted) -> None:
897
- """Apply filter and hide search bar (only if query is valid)"""
898
- if self._query_valid and self.filter_fn is not None:
899
- # Set flag to keep filter when hiding
900
- self._keep_filter = True
901
- self.visible = False
902
- # Post message to focus jobs table
903
- self.post_message(SearchApplied())
904
- # If invalid, do nothing (keep input focused for correction)
905
-
906
-
907
- class SearchApplied(Message):
908
- """Message sent when search filter is applied via Enter"""
909
-
910
- pass
911
-
912
-
913
- class JobsTable(Vertical):
914
- """Widget displaying jobs for selected experiment"""
915
-
916
- BINDINGS = [
917
- Binding("d", "delete_job", "Delete", show=False),
918
- Binding("k", "kill_job", "Kill", show=False),
919
- Binding("l", "view_logs", "Logs"),
920
- Binding("f", "copy_path", "Copy Path", show=False),
921
- Binding("/", "toggle_search", "Search"),
922
- Binding("c", "clear_filter", "Clear", show=False),
923
- Binding("r", "refresh_live", "Refresh"),
924
- Binding("S", "sort_by_status", "Sort ⚑", show=False),
925
- Binding("T", "sort_by_task", "Sort Task", show=False),
926
- Binding("D", "sort_by_submitted", "Sort Date", show=False),
927
- Binding("escape", "clear_search", show=False, priority=True),
928
- ]
929
-
930
- # Track current sort state
931
- _sort_column: Optional[str] = None
932
- _sort_reverse: bool = False
933
- _needs_rebuild: bool = True # Start with rebuild needed
934
-
935
- def __init__(self, state_provider: WorkspaceStateProvider) -> None:
936
- super().__init__()
937
- self.state_provider = state_provider
938
- self.filter_fn = None
939
- self.current_experiment: Optional[str] = None
940
-
941
- def compose(self) -> ComposeResult:
942
- yield SearchBar()
943
- yield DataTable(id="jobs-table", cursor_type="row")
944
-
945
- def action_toggle_search(self) -> None:
946
- """Toggle search bar visibility"""
947
- search_bar = self.query_one(SearchBar)
948
- search_bar.visible = not search_bar.visible
949
-
950
- def action_clear_filter(self) -> None:
951
- """Clear the active filter"""
952
- if self.filter_fn is not None:
953
- search_bar = self.query_one(SearchBar)
954
- search_bar.query_one("#search-input", Input).value = ""
955
- search_bar.filter_fn = None
956
- search_bar.active_query = ""
957
- search_bar._query_valid = False
958
- # Hide the SearchBar completely
959
- search_bar.display = False
960
- search_bar.query_one("#search-container").display = False
961
- search_bar.query_one("#active-filter").display = False
962
- search_bar.query_one("#search-error").display = False
963
- self.filter_fn = None
964
- self.refresh_jobs()
965
- self.notify("Filter cleared", severity="information")
966
-
967
- def action_sort_by_status(self) -> None:
968
- """Sort jobs by status"""
969
- if self._sort_column == "status":
970
- self._sort_reverse = not self._sort_reverse
971
- else:
972
- self._sort_column = "status"
973
- self._sort_reverse = False
974
- self._needs_rebuild = True
975
- self._update_column_headers()
976
- self.refresh_jobs()
977
- order = "desc" if self._sort_reverse else "asc"
978
- self.notify(f"Sorted by status ({order})", severity="information")
979
-
980
- def action_sort_by_task(self) -> None:
981
- """Sort jobs by task"""
982
- if self._sort_column == "task":
983
- self._sort_reverse = not self._sort_reverse
984
- else:
985
- self._sort_column = "task"
986
- self._sort_reverse = False
987
- self._needs_rebuild = True
988
- self._update_column_headers()
989
- self.refresh_jobs()
990
- order = "desc" if self._sort_reverse else "asc"
991
- self.notify(f"Sorted by task ({order})", severity="information")
992
-
993
- def action_sort_by_submitted(self) -> None:
994
- """Sort jobs by submission time"""
995
- if self._sort_column == "submitted":
996
- self._sort_reverse = not self._sort_reverse
997
- else:
998
- self._sort_column = "submitted"
999
- self._sort_reverse = False
1000
- self._needs_rebuild = True
1001
- self._update_column_headers()
1002
- self.refresh_jobs()
1003
- order = "newest first" if self._sort_reverse else "oldest first"
1004
- self.notify(f"Sorted by date ({order})", severity="information")
1005
-
1006
- def action_clear_search(self) -> None:
1007
- """Handle escape: hide search bar if visible, or go back"""
1008
- search_bar = self.query_one(SearchBar)
1009
- if search_bar.visible:
1010
- # Search bar visible - hide it and clear filter
1011
- search_bar.visible = False
1012
- self.filter_fn = None
1013
- self.refresh_jobs()
1014
- # Focus the jobs table
1015
- self.query_one("#jobs-table", DataTable).focus()
1016
- else:
1017
- # Search bar hidden - go back (keep filter)
1018
- self.app.action_go_back()
1019
-
1020
- def action_refresh_live(self) -> None:
1021
- """Refresh the jobs table"""
1022
- self.refresh_jobs()
1023
- self.notify("Jobs refreshed", severity="information")
1024
-
1025
- def on_filter_changed(self, message: FilterChanged) -> None:
1026
- """Apply new filter"""
1027
- self.filter_fn = message.filter_fn
1028
- self.refresh_jobs()
1029
-
1030
- def on_search_applied(self, message: SearchApplied) -> None:
1031
- """Focus jobs table when search is applied"""
1032
- self.query_one("#jobs-table", DataTable).focus()
1033
-
1034
- def _get_selected_job_id(self) -> Optional[str]:
1035
- """Get the job ID from the currently selected row"""
1036
- table = self.query_one("#jobs-table", DataTable)
1037
- if table.cursor_row is None:
1038
- return None
1039
- row_key = table.get_row_at(table.cursor_row)
1040
- if row_key:
1041
- # The first column is job_id
1042
- return str(table.get_row_at(table.cursor_row)[0])
1043
- return None
1044
-
1045
- def action_delete_job(self) -> None:
1046
- """Request to delete the selected job"""
1047
- table = self.query_one("#jobs-table", DataTable)
1048
- if table.cursor_row is None or not self.current_experiment:
1049
- return
1050
-
1051
- # Get job ID from the row key
1052
- row_key = list(table.rows.keys())[table.cursor_row]
1053
- if row_key:
1054
- job_id = str(row_key.value)
1055
- self.post_message(DeleteJobRequest(job_id, self.current_experiment))
1056
-
1057
- def action_kill_job(self) -> None:
1058
- """Request to kill the selected job"""
1059
- table = self.query_one("#jobs-table", DataTable)
1060
- if table.cursor_row is None or not self.current_experiment:
1061
- return
1062
-
1063
- row_key = list(table.rows.keys())[table.cursor_row]
1064
- if row_key:
1065
- job_id = str(row_key.value)
1066
- self.post_message(KillJobRequest(job_id, self.current_experiment))
1067
-
1068
- def action_view_logs(self) -> None:
1069
- """Request to view logs for the selected job"""
1070
- table = self.query_one("#jobs-table", DataTable)
1071
- if table.cursor_row is None or not self.current_experiment:
1072
- return
1073
-
1074
- row_key = list(table.rows.keys())[table.cursor_row]
1075
- if row_key:
1076
- job_id = str(row_key.value)
1077
- self.post_message(ViewJobLogsRequest(job_id, self.current_experiment))
1078
-
1079
- def action_copy_path(self) -> None:
1080
- """Copy the job folder path to clipboard"""
1081
- import pyperclip
1082
-
1083
- table = self.query_one("#jobs-table", DataTable)
1084
- if table.cursor_row is None or not self.current_experiment:
1085
- return
1086
-
1087
- row_key = list(table.rows.keys())[table.cursor_row]
1088
- if row_key:
1089
- job_id = str(row_key.value)
1090
- job = self.state_provider.get_job(job_id, self.current_experiment)
1091
- if job and job.path:
1092
- try:
1093
- pyperclip.copy(str(job.path))
1094
- self.notify(f"Path copied: {job.path}", severity="information")
1095
- except Exception as e:
1096
- self.notify(f"Failed to copy: {e}", severity="error")
1097
- else:
1098
- self.notify("No path available for this job", severity="warning")
1099
-
1100
- # Status sort order (for sorting by status)
1101
- STATUS_ORDER = {
1102
- "running": 0,
1103
- "waiting": 1,
1104
- "error": 2,
1105
- "done": 3,
1106
- "unscheduled": 4,
1107
- "phantom": 5,
1108
- }
1109
-
1110
- # Failure reason sort order (within error status)
1111
- # More actionable failures first
1112
- FAILURE_ORDER = {
1113
- "TIMEOUT": 0, # Might just need retry
1114
- "MEMORY": 1, # Might need resource adjustment
1115
- "DEPENDENCY": 2, # Need to fix upstream job first
1116
- "FAILED": 3, # Generic failure
1117
- }
1118
-
1119
- @classmethod
1120
- def _get_status_sort_key(cls, job):
1121
- """Get sort key for a job based on status and failure reason.
1122
-
1123
- Returns tuple (status_order, failure_order) for proper sorting.
1124
- """
1125
- state_name = job.state.name if job.state else "unknown"
1126
- status_order = cls.STATUS_ORDER.get(state_name, 99)
1127
-
1128
- # For error jobs, also sort by failure reason
1129
- if state_name == "error":
1130
- failure_reason = getattr(job, "failure_reason", None)
1131
- if failure_reason:
1132
- failure_order = cls.FAILURE_ORDER.get(failure_reason.name, 99)
1133
- else:
1134
- failure_order = 99 # Unknown failure at end
1135
- else:
1136
- failure_order = 0
1137
-
1138
- return (status_order, failure_order)
1139
-
1140
- # Column key to display name mapping
1141
- COLUMN_LABELS = {
1142
- "job_id": "ID",
1143
- "task": "Task",
1144
- "status": "⚑",
1145
- "tags": "Tags",
1146
- "submitted": "Submitted",
1147
- "duration": "Duration",
1148
- }
1149
-
1150
- # Columns that support sorting (column key -> sort column name)
1151
- SORTABLE_COLUMNS = {
1152
- "status": "status",
1153
- "task": "task",
1154
- "submitted": "submitted",
1155
- }
1156
-
1157
- def on_mount(self) -> None:
1158
- """Initialize the jobs table"""
1159
- table = self.query_one("#jobs-table", DataTable)
1160
- table.add_column("ID", key="job_id")
1161
- table.add_column("Task", key="task")
1162
- table.add_column("⚑", key="status", width=6)
1163
- table.add_column("Tags", key="tags")
1164
- table.add_column("Submitted", key="submitted")
1165
- table.add_column("Duration", key="duration")
1166
- table.cursor_type = "row"
1167
- table.zebra_stripes = True
1168
-
1169
- def _update_column_headers(self) -> None:
1170
- """Update column headers with sort indicators"""
1171
- table = self.query_one("#jobs-table", DataTable)
1172
- for column in table.columns.values():
1173
- col_key = str(column.key.value) if column.key else None
1174
- if col_key and col_key in self.COLUMN_LABELS:
1175
- label = self.COLUMN_LABELS[col_key]
1176
- sort_col = self.SORTABLE_COLUMNS.get(col_key)
1177
- if sort_col and self._sort_column == sort_col:
1178
- # Add sort indicator
1179
- indicator = "▼" if self._sort_reverse else "▲"
1180
- new_label = f"{label} {indicator}"
1181
- else:
1182
- new_label = label
1183
- column.label = new_label
1184
-
1185
- def on_data_table_header_selected(self, event: DataTable.HeaderSelected) -> None:
1186
- """Handle column header click for sorting"""
1187
- col_key = str(event.column_key.value) if event.column_key else None
1188
- if col_key and col_key in self.SORTABLE_COLUMNS:
1189
- sort_col = self.SORTABLE_COLUMNS[col_key]
1190
- if self._sort_column == sort_col:
1191
- self._sort_reverse = not self._sort_reverse
1192
- else:
1193
- self._sort_column = sort_col
1194
- self._sort_reverse = False
1195
- self._needs_rebuild = True
1196
- self._update_column_headers()
1197
- self.refresh_jobs()
1198
-
1199
- def set_experiment(self, experiment_id: Optional[str]) -> None:
1200
- """Set the current experiment and refresh jobs"""
1201
- self.current_experiment = experiment_id
1202
- self.refresh_jobs()
1203
-
1204
- def refresh_jobs(self) -> None: # noqa: C901
1205
- """Refresh the jobs list from state provider"""
1206
- table = self.query_one("#jobs-table", DataTable)
1207
-
1208
- if not self.current_experiment:
1209
- return
1210
-
1211
- jobs = self.state_provider.get_jobs(self.current_experiment)
1212
- self.log.debug(
1213
- f"Refreshing jobs for {self.current_experiment}: {len(jobs)} jobs"
1214
- )
1215
-
1216
- # Apply filter if set
1217
- if self.filter_fn:
1218
- jobs = [j for j in jobs if self.filter_fn(j)]
1219
- self.log.debug(f"After filter: {len(jobs)} jobs")
1220
-
1221
- # Sort jobs based on selected column
1222
- if self._sort_column == "status":
1223
- # Sort by status priority, then by failure reason for errors
1224
- jobs.sort(
1225
- key=self._get_status_sort_key,
1226
- reverse=self._sort_reverse,
1227
- )
1228
- elif self._sort_column == "task":
1229
- # Sort by task name
1230
- jobs.sort(
1231
- key=lambda j: j.task_id or "",
1232
- reverse=self._sort_reverse,
1233
- )
1234
- else:
1235
- # Default: sort by submission time (oldest first by default)
1236
- # Jobs without submittime go to the end
1237
- jobs.sort(
1238
- key=lambda j: j.submittime or float("inf"),
1239
- reverse=self._sort_reverse,
1240
- )
1241
-
1242
- # Check if we need to rebuild (new/removed jobs, or status changed when sorting by status)
1243
- from datetime import datetime
1244
- import time as time_module
1245
-
1246
- existing_keys = {str(k.value) for k in table.rows.keys()}
1247
- current_job_ids = {job.identifier for job in jobs}
1248
-
1249
- # Check if job set changed
1250
- jobs_changed = existing_keys != current_job_ids
1251
-
1252
- # Check if status changed when sorting by status
1253
- status_changed = False
1254
- if self._sort_column == "status" and not jobs_changed:
1255
- current_statuses = {
1256
- job.identifier: (job.state.name if job.state else "unknown")
1257
- for job in jobs
1258
- }
1259
- if (
1260
- hasattr(self, "_last_statuses")
1261
- and self._last_statuses != current_statuses
1262
- ):
1263
- status_changed = True
1264
- self._last_statuses = current_statuses
1265
-
1266
- needs_rebuild = self._needs_rebuild or jobs_changed or status_changed
1267
- self._needs_rebuild = False
1268
-
1269
- # Build row data for all jobs
1270
- rows_data = {}
1271
- for job in jobs:
1272
- job_id = job.identifier
1273
- task_id = job.task_id
1274
- status = job.state.name if job.state else "unknown"
1275
-
1276
- # Format status with icon (and progress % if running)
1277
- if status == "running":
1278
- progress_list = job.progress or []
1279
- if progress_list:
1280
- last_progress = progress_list[-1]
1281
- progress_pct = last_progress.get("progress", 0) * 100
1282
- status_text = f"▶ {progress_pct:.0f}%"
1283
- else:
1284
- status_text = "▶"
1285
- else:
1286
- failure_reason = getattr(job, "failure_reason", None)
1287
- status_text = get_status_icon(status, failure_reason)
1288
-
1289
- # Format tags - show all tags on single line
1290
- tags = job.tags
1291
- if tags:
1292
- tags_text = Text()
1293
- for i, (k, v) in enumerate(tags.items()):
1294
- if i > 0:
1295
- tags_text.append(", ")
1296
- tags_text.append(f"{k}", style="bold")
1297
- tags_text.append(f"={v}")
1298
- else:
1299
- tags_text = Text("-")
1300
-
1301
- submitted = "-"
1302
- if job.submittime:
1303
- submitted = datetime.fromtimestamp(job.submittime).strftime(
1304
- "%Y-%m-%d %H:%M"
1305
- )
1306
-
1307
- # Calculate duration
1308
- start = job.starttime
1309
- end = job.endtime
1310
- duration = "-"
1311
- if start:
1312
- if end:
1313
- elapsed = end - start
1314
- else:
1315
- elapsed = time_module.time() - start
1316
- duration = self._format_duration(elapsed)
1317
-
1318
- job_id_short = job_id[:7]
1319
- rows_data[job_id] = (
1320
- job_id_short,
1321
- task_id,
1322
- status_text,
1323
- tags_text,
1324
- submitted,
1325
- duration,
1326
- )
1327
-
1328
- if needs_rebuild:
1329
- # Full rebuild needed - save selection, clear, rebuild
1330
- selected_key = None
1331
- if table.cursor_row is not None and table.row_count > 0:
1332
- try:
1333
- row_keys = list(table.rows.keys())
1334
- if table.cursor_row < len(row_keys):
1335
- selected_key = str(row_keys[table.cursor_row].value)
1336
- except (IndexError, KeyError):
1337
- pass
1338
-
1339
- table.clear()
1340
- new_cursor_row = None
1341
- for idx, job in enumerate(jobs):
1342
- job_id = job.identifier
1343
- table.add_row(*rows_data[job_id], key=job_id)
1344
- if selected_key == job_id:
1345
- new_cursor_row = idx
1346
-
1347
- if new_cursor_row is not None and table.row_count > 0:
1348
- table.move_cursor(row=new_cursor_row)
1349
- else:
1350
- # Just update cells in place - no reordering needed
1351
- for job_id, row_data in rows_data.items():
1352
- (
1353
- job_id_short,
1354
- task_id,
1355
- status_text,
1356
- tags_text,
1357
- submitted,
1358
- duration,
1359
- ) = row_data
1360
- table.update_cell(job_id, "job_id", job_id_short, update_width=True)
1361
- table.update_cell(job_id, "task", task_id, update_width=True)
1362
- table.update_cell(job_id, "status", status_text, update_width=True)
1363
- table.update_cell(job_id, "tags", tags_text, update_width=True)
1364
- table.update_cell(job_id, "submitted", submitted, update_width=True)
1365
- table.update_cell(job_id, "duration", duration, update_width=True)
1366
-
1367
- self.log.debug(
1368
- f"Jobs table now has {table.row_count} rows (rebuild={needs_rebuild})"
1369
- )
1370
-
1371
- def _format_duration(self, seconds: float) -> str:
1372
- """Format duration in seconds to human-readable string"""
1373
- if seconds < 0:
1374
- return "-"
1375
-
1376
- seconds = int(seconds)
1377
- if seconds < 60:
1378
- return f"{seconds}s"
1379
- elif seconds < 3600:
1380
- minutes = seconds // 60
1381
- secs = seconds % 60
1382
- return f"{minutes}m {secs}s"
1383
- elif seconds < 86400:
1384
- hours = seconds // 3600
1385
- minutes = (seconds % 3600) // 60
1386
- return f"{hours}h {minutes}m"
1387
- else:
1388
- days = seconds // 86400
1389
- hours = (seconds % 86400) // 3600
1390
- return f"{days}d {hours}h"
1391
-
1392
- def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
1393
- """Handle job selection"""
1394
- if event.row_key and self.current_experiment:
1395
- job_id = str(event.row_key.value)
1396
- self.post_message(JobSelected(job_id, self.current_experiment))
1397
-
1398
-
1399
- class SizeCalculated(Message):
1400
- """Message sent when a folder size has been calculated"""
1401
-
1402
- def __init__(self, job_id: str, size: str, size_bytes: int) -> None:
1403
- super().__init__()
1404
- self.job_id = job_id
1405
- self.size = size
1406
- self.size_bytes = size_bytes
1407
-
1408
-
1409
- class OrphanJobsScreen(Screen):
1410
- """Screen for viewing and managing orphan jobs"""
1411
-
1412
- BINDINGS = [
1413
- Binding("d", "delete_selected", "Delete"),
1414
- Binding("D", "delete_all", "Delete All", key_display="D"),
1415
- Binding("escape", "go_back", "Back"),
1416
- Binding("q", "go_back", "Quit"),
1417
- Binding("r", "refresh", "Refresh"),
1418
- Binding("f", "copy_path", "Copy Path", show=False),
1419
- Binding("T", "sort_by_task", "Sort Task", show=False),
1420
- Binding("Z", "sort_by_size", "Sort Size", show=False),
1421
- ]
1422
-
1423
- _size_cache: dict = {} # Class-level cache (formatted strings)
1424
- _size_bytes_cache: dict = {} # Class-level cache (raw bytes for sorting)
1425
-
1426
- def __init__(self, state_provider: WorkspaceStateProvider) -> None:
1427
- super().__init__()
1428
- self.state_provider = state_provider
1429
- self.orphan_jobs = []
1430
- self._pending_jobs = [] # Jobs waiting for size calculation
1431
- self._sort_column: Optional[str] = None
1432
- self._sort_reverse: bool = False
1433
-
1434
- def compose(self) -> ComposeResult:
1435
- yield Header()
1436
- with Vertical(id="orphan-container"):
1437
- yield Static("Orphan Jobs", id="orphan-title")
1438
- yield Static("", id="orphan-stats")
1439
- yield DataTable(id="orphan-table", cursor_type="row")
1440
- yield Static("", id="orphan-job-info")
1441
- yield Footer()
1442
-
1443
- def on_mount(self) -> None:
1444
- """Initialize the orphan jobs table"""
1445
- table = self.query_one("#orphan-table", DataTable)
1446
- table.add_column("⚑", key="status", width=3)
1447
- table.add_column("Job ID", key="job_id", width=10)
1448
- table.add_column("Task", key="task")
1449
- table.add_column("Size", key="size", width=10)
1450
- self.refresh_orphans()
1451
-
1452
- def action_sort_by_task(self) -> None:
1453
- """Sort by task name"""
1454
- if self._sort_column == "task":
1455
- self._sort_reverse = not self._sort_reverse
1456
- else:
1457
- self._sort_column = "task"
1458
- self._sort_reverse = False
1459
- self._rebuild_table()
1460
- order = "desc" if self._sort_reverse else "asc"
1461
- self.notify(f"Sorted by task ({order})", severity="information")
1462
-
1463
- def action_sort_by_size(self) -> None:
1464
- """Sort by size"""
1465
- if self._sort_column == "size":
1466
- self._sort_reverse = not self._sort_reverse
1467
- else:
1468
- self._sort_column = "size"
1469
- self._sort_reverse = True # Default: largest first
1470
- self._rebuild_table()
1471
- order = "largest first" if self._sort_reverse else "smallest first"
1472
- self.notify(f"Sorted by size ({order})", severity="information")
1473
-
1474
- def _get_sorted_jobs(self):
1475
- """Return jobs sorted by current sort column"""
1476
- jobs = self.orphan_jobs[:]
1477
- if self._sort_column == "task":
1478
- jobs.sort(key=lambda j: j.task_id or "", reverse=self._sort_reverse)
1479
- elif self._sort_column == "size":
1480
- # Sort by raw bytes, jobs not in cache go to end
1481
- jobs.sort(
1482
- key=lambda j: self._size_bytes_cache.get(j.identifier, -1),
1483
- reverse=self._sort_reverse,
1484
- )
1485
- return jobs
1486
-
1487
- def _rebuild_table(self) -> None:
1488
- """Rebuild the table with current sort order"""
1489
- table = self.query_one("#orphan-table", DataTable)
1490
- table.clear()
1491
-
1492
- for job in self._get_sorted_jobs():
1493
- failure_reason = getattr(job, "failure_reason", None)
1494
- status_icon = get_status_icon(
1495
- job.state.name if job.state else "unknown", failure_reason
1496
- )
1497
- if job.identifier in self._size_cache:
1498
- size_text = self._size_cache[job.identifier]
1499
- else:
1500
- size_text = "waiting"
1501
- table.add_row(
1502
- status_icon,
1503
- job.identifier[:7],
1504
- job.task_id,
1505
- size_text,
1506
- key=job.identifier,
1507
- )
1508
-
1509
- def refresh_orphans(self) -> None:
1510
- """Refresh the orphan jobs list"""
1511
- # Only include orphan jobs that have an existing folder
1512
- all_orphans = self.state_provider.get_orphan_jobs()
1513
- self.orphan_jobs = [j for j in all_orphans if j.path and j.path.exists()]
1514
-
1515
- # Update stats
1516
- stats = self.query_one("#orphan-stats", Static)
1517
- stats.update(f"Found {len(self.orphan_jobs)} orphan jobs")
1518
-
1519
- # Collect jobs needing size calculation
1520
- self._pending_jobs = [
1521
- j for j in self.orphan_jobs if j.identifier not in self._size_cache
1522
- ]
1523
-
1524
- # Rebuild table
1525
- self._rebuild_table()
1526
-
1527
- # Start calculating sizes
1528
- if self._pending_jobs:
1529
- self._calculate_next_size()
1530
-
1531
- def _calculate_next_size(self) -> None:
1532
- """Calculate size for the next pending job using a worker"""
1533
- if not self._pending_jobs:
1534
- return
1535
-
1536
- job = self._pending_jobs.pop(0)
1537
- # Update to "calc..."
1538
- self._update_size_cell(job.identifier, "calc...")
1539
- # Run calculation in worker thread
1540
- self.run_worker(
1541
- self._calc_size_worker(job.identifier, job.path),
1542
- thread=True,
1543
- )
1544
-
1545
- async def _calc_size_worker(self, job_id: str, path):
1546
- """Worker to calculate folder size"""
1547
- size_bytes = await self._get_folder_size_async(path)
1548
- size_str = self._format_size(size_bytes)
1549
- self._size_cache[job_id] = size_str
1550
- self._size_bytes_cache[job_id] = size_bytes
1551
- self.post_message(SizeCalculated(job_id, size_str, size_bytes))
1552
-
1553
- def on_size_calculated(self, message: SizeCalculated) -> None:
1554
- """Handle size calculation completion"""
1555
- self._size_bytes_cache[message.job_id] = message.size_bytes
1556
- self._update_size_cell(message.job_id, message.size)
1557
- # Calculate next one
1558
- self._calculate_next_size()
1559
-
1560
- @staticmethod
1561
- async def _get_folder_size_async(path) -> int:
1562
- """Calculate total size of a folder using du command if available"""
1563
- import asyncio
1564
- import shutil
1565
- import sys
1566
-
1567
- # Try using du command for better performance
1568
- if shutil.which("du"):
1569
- try:
1570
- if sys.platform == "darwin":
1571
- # macOS: du -sk gives size in KB
1572
- proc = await asyncio.create_subprocess_exec(
1573
- "du",
1574
- "-sk",
1575
- str(path),
1576
- stdout=asyncio.subprocess.PIPE,
1577
- stderr=asyncio.subprocess.DEVNULL,
1578
- )
1579
- stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30)
1580
- if proc.returncode == 0 and stdout:
1581
- # Output format: "SIZE\tPATH"
1582
- size_kb = int(stdout.decode().split()[0])
1583
- return size_kb * 1024
1584
- else:
1585
- # Linux: du -sb gives size in bytes
1586
- proc = await asyncio.create_subprocess_exec(
1587
- "du",
1588
- "-sb",
1589
- str(path),
1590
- stdout=asyncio.subprocess.PIPE,
1591
- stderr=asyncio.subprocess.DEVNULL,
1592
- )
1593
- stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30)
1594
- if proc.returncode == 0 and stdout:
1595
- # Output format: "SIZE\tPATH"
1596
- return int(stdout.decode().split()[0])
1597
- except (asyncio.TimeoutError, ValueError, IndexError, OSError):
1598
- pass # Fall back to Python implementation
1599
-
1600
- # Fallback: Python implementation
1601
- return OrphanJobsScreen._get_folder_size_sync(path)
1602
-
1603
- @staticmethod
1604
- def _get_folder_size_sync(path) -> int:
1605
- """Calculate total size of a folder using Python (fallback)"""
1606
- total = 0
1607
- try:
1608
- for entry in path.rglob("*"):
1609
- if entry.is_file():
1610
- total += entry.stat().st_size
1611
- except (OSError, PermissionError):
1612
- pass
1613
- return total
1614
-
1615
- @staticmethod
1616
- def _format_size(size: int) -> str:
1617
- """Format size in human-readable format"""
1618
- for unit in ["B", "KB", "MB", "GB"]:
1619
- if size < 1024:
1620
- return f"{size:.1f}{unit}" if unit != "B" else f"{size}{unit}"
1621
- size /= 1024
1622
- return f"{size:.1f}TB"
1623
-
1624
- def _update_size_cell(self, job_id: str, value: str = None) -> None:
1625
- """Update the size cell for a job"""
1626
- try:
1627
- table = self.query_one("#orphan-table", DataTable)
1628
- size_text = (
1629
- value if value is not None else self._size_cache.get(job_id, "-")
1630
- )
1631
- table.update_cell(job_id, "size", size_text)
1632
- except Exception:
1633
- pass # Table may have changed
1634
-
1635
- def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
1636
- """Show job details when a row is selected"""
1637
- self._update_job_info()
1638
-
1639
- def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
1640
- """Show job details when cursor moves"""
1641
- self._update_job_info()
1642
-
1643
- def _update_job_info(self) -> None:
1644
- """Update the job info display"""
1645
- table = self.query_one("#orphan-table", DataTable)
1646
- info = self.query_one("#orphan-job-info", Static)
1647
-
1648
- if table.cursor_row is None:
1649
- info.update("")
1650
- return
1651
-
1652
- row_key = list(table.rows.keys())[table.cursor_row]
1653
- if row_key:
1654
- job_id = str(row_key.value)
1655
- job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
1656
- if job and job.path:
1657
- size = self._size_cache.get(job.identifier, "calculating...")
1658
- info.update(f"Path: {job.path} | Size: {size}")
1659
- else:
1660
- info.update("")
1661
-
1662
- def action_copy_path(self) -> None:
1663
- """Copy the job folder path to clipboard"""
1664
- import pyperclip
1665
-
1666
- table = self.query_one("#orphan-table", DataTable)
1667
- if table.cursor_row is None:
1668
- return
1669
-
1670
- row_key = list(table.rows.keys())[table.cursor_row]
1671
- if row_key:
1672
- job_id = str(row_key.value)
1673
- job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
1674
- if job and job.path:
1675
- try:
1676
- pyperclip.copy(str(job.path))
1677
- self.notify("Path copied", severity="information")
1678
- except Exception as e:
1679
- self.notify(f"Failed to copy: {e}", severity="error")
1680
-
1681
- def action_delete_selected(self) -> None:
1682
- """Delete the selected orphan job"""
1683
- table = self.query_one("#orphan-table", DataTable)
1684
- if table.cursor_row is None:
1685
- return
1686
-
1687
- row_key = list(table.rows.keys())[table.cursor_row]
1688
- if row_key:
1689
- job_id = str(row_key.value)
1690
- job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
1691
- if job:
1692
- self._delete_job(job)
1693
-
1694
- def _delete_job(self, job) -> None:
1695
- """Delete a single orphan job with confirmation"""
1696
-
1697
- def handle_delete(confirmed: bool) -> None:
1698
- if confirmed:
1699
- success, msg = self.state_provider.delete_job_safely(job)
1700
- if success:
1701
- self.notify(msg, severity="information")
1702
- self.refresh_orphans()
1703
- else:
1704
- self.notify(msg, severity="error")
1705
-
1706
- self.app.push_screen(
1707
- DeleteConfirmScreen("orphan job", job.identifier),
1708
- handle_delete,
1709
- )
1710
-
1711
- def action_delete_all(self) -> None:
1712
- """Delete all orphan jobs"""
1713
- if not self.orphan_jobs:
1714
- self.notify("No orphan jobs to delete", severity="warning")
1715
- return
1716
-
1717
- # Filter out running jobs
1718
- deletable_jobs = [j for j in self.orphan_jobs if not j.state.running()]
1719
-
1720
- if not deletable_jobs:
1721
- self.notify("All orphan jobs are running", severity="warning")
1722
- return
1723
-
1724
- def handle_delete_all(confirmed: bool) -> None:
1725
- if confirmed:
1726
- deleted = 0
1727
- for job in deletable_jobs:
1728
- success, _ = self.state_provider.delete_job_safely(
1729
- job, cascade_orphans=False
1730
- )
1731
- if success:
1732
- deleted += 1
1733
-
1734
- # Clean up orphan partials once at the end
1735
- self.state_provider.cleanup_orphan_partials(perform=True)
1736
-
1737
- self.notify(f"Deleted {deleted} orphan jobs", severity="information")
1738
- self.refresh_orphans()
1739
-
1740
- self.app.push_screen(
1741
- DeleteConfirmScreen(
1742
- "all orphan jobs",
1743
- f"{len(deletable_jobs)} jobs",
1744
- "This action cannot be undone",
1745
- ),
1746
- handle_delete_all,
1747
- )
1748
-
1749
- def action_refresh(self) -> None:
1750
- """Refresh the orphan jobs list"""
1751
- self.refresh_orphans()
1752
-
1753
- def action_go_back(self) -> None:
1754
- """Go back to main screen"""
1755
- self.dismiss()
1756
-
1757
-
1758
- class HelpScreen(ModalScreen[None]):
1759
- """Modal screen showing keyboard shortcuts"""
1760
-
1761
- BINDINGS = [
1762
- Binding("escape", "close", "Close"),
1763
- Binding("?", "close", "Close"),
1764
- ]
1765
-
1766
- def compose(self) -> ComposeResult:
1767
- from textual.containers import VerticalScroll
1768
-
1769
- help_text = """
1770
- [bold]Keyboard Shortcuts[/bold]
1771
-
1772
- [bold cyan]Navigation[/bold cyan]
1773
- q Quit application
1774
- Esc Go back / Close dialog
1775
- r Refresh data
1776
- ? Show this help
1777
- j Switch to Jobs tab
1778
- s Switch to Services tab
1779
-
1780
- [bold cyan]Experiments[/bold cyan]
1781
- Enter Select experiment
1782
- d Delete experiment
1783
- k Kill all running jobs
1784
-
1785
- [bold cyan]Jobs[/bold cyan]
1786
- l View job logs
1787
- d Delete job
1788
- k Kill running job
1789
- / Open search filter
1790
- c Clear search filter
1791
- S Sort by status
1792
- T Sort by task
1793
- D Sort by date
1794
- f Copy folder path
1795
-
1796
- [bold cyan]Services[/bold cyan]
1797
- s Start service
1798
- x Stop service
1799
- u Copy URL
1800
-
1801
- [bold cyan]Search Filter[/bold cyan]
1802
- Enter Apply filter
1803
- Esc Close and clear filter
1804
-
1805
- [bold cyan]Orphan Jobs[/bold cyan]
1806
- o Show orphan jobs
1807
- T Sort by task
1808
- Z Sort by size
1809
- d Delete selected
1810
- D Delete all
1811
- f Copy folder path
1812
- """
1813
- with Vertical(id="help-dialog"):
1814
- yield Static("Experimaestro Help", id="help-title")
1815
- with VerticalScroll(id="help-scroll"):
1816
- yield Static(help_text, id="help-content")
1817
- yield Button("Close", id="help-close-btn")
1818
-
1819
- def on_button_pressed(self, event: Button.Pressed) -> None:
1820
- self.dismiss()
1821
-
1822
- def action_close(self) -> None:
1823
- self.dismiss()
29
+ from experimaestro.tui.utils import format_duration, get_status_icon # noqa: F401
30
+ from experimaestro.tui.messages import (
31
+ ExperimentSelected,
32
+ ExperimentDeselected,
33
+ JobSelected,
34
+ JobDeselected,
35
+ ViewJobLogs,
36
+ ViewJobLogsRequest,
37
+ DeleteJobRequest,
38
+ DeleteExperimentRequest,
39
+ KillJobRequest,
40
+ KillExperimentRequest,
41
+ FilterChanged, # noqa: F401
42
+ SearchApplied, # noqa: F401
43
+ SizeCalculated, # noqa: F401
44
+ ShowRunsRequest,
45
+ RunSelected,
46
+ )
47
+ from experimaestro.tui.dialogs import (
48
+ QuitConfirmScreen,
49
+ DeleteConfirmScreen,
50
+ KillConfirmScreen,
51
+ HelpScreen,
52
+ )
53
+ from experimaestro.tui.widgets import (
54
+ CaptureLog,
55
+ ExperimentsList,
56
+ ServicesList,
57
+ JobsTable,
58
+ JobDetailView,
59
+ RunsList,
60
+ GlobalServiceSyncs,
61
+ )
62
+ from experimaestro.tui.widgets.stray_jobs import OrphanJobsTab
1824
63
 
1825
64
 
1826
65
  class ExperimaestroUI(App):
@@ -1834,7 +73,6 @@ class ExperimaestroUI(App):
1834
73
  Binding("?", "show_help", "Help"),
1835
74
  Binding("escape", "go_back", "Back", show=False),
1836
75
  Binding("l", "view_logs", "Logs", show=False),
1837
- Binding("o", "show_orphans", "Orphans", show=False),
1838
76
  Binding("j", "focus_jobs", "Jobs", show=False),
1839
77
  Binding("s", "focus_services", "Services", show=False),
1840
78
  ]
@@ -1843,15 +81,19 @@ class ExperimaestroUI(App):
1843
81
  self,
1844
82
  workdir: Optional[Path] = None,
1845
83
  watch: bool = True,
1846
- state_provider: Optional[WorkspaceStateProvider] = None,
84
+ state_provider: Optional[StateProvider] = None,
1847
85
  show_logs: bool = False,
1848
86
  ):
1849
87
  """Initialize the TUI
1850
88
 
1851
89
  Args:
1852
- workdir: Workspace directory (required if state_provider not provided)
90
+ workdir: Workspace directory (required if state_provider not provided
91
+ and not using deferred mode)
1853
92
  watch: Enable filesystem watching for workspace mode
1854
- state_provider: Pre-initialized state provider (for active experiments)
93
+ state_provider: Pre-initialized state provider (for active experiments).
94
+ If None and workdir is provided, creates a WorkspaceStateProvider.
95
+ If None and workdir is None, starts in deferred mode (logs only)
96
+ and waits for set_state_provider() to be called.
1855
97
  show_logs: Whether to show the logs tab (for active experiments)
1856
98
  """
1857
99
  super().__init__()
@@ -1859,46 +101,73 @@ class ExperimaestroUI(App):
1859
101
  self.watch = watch
1860
102
  self.show_logs = show_logs
1861
103
  self._listener_registered = False
104
+ self._monitor_mounted = False
1862
105
 
1863
106
  # Initialize state provider before compose
1864
107
  if state_provider:
1865
108
  self.state_provider = state_provider
1866
- self.owns_provider = False # Don't close external provider
1867
- self._has_active_experiment = True # External provider = active experiment
1868
- else:
1869
- from experimaestro.scheduler.state_provider import WorkspaceStateProvider
109
+ elif workdir:
110
+ from experimaestro.scheduler.workspace_state_provider import (
111
+ WorkspaceStateProvider,
112
+ )
1870
113
 
1871
114
  # Get singleton provider instance for this workspace
1872
- self.state_provider = WorkspaceStateProvider.get_instance(
1873
- self.workdir,
1874
- read_only=False,
1875
- sync_on_start=True,
1876
- sync_interval_minutes=5,
1877
- )
1878
- self.owns_provider = False # Provider is singleton, don't close
1879
- self._has_active_experiment = False # Just viewing, no active experiment
115
+ self.state_provider = WorkspaceStateProvider.get_instance(self.workdir)
116
+ else:
117
+ # Deferred mode: no provider yet, will be set later via set_state_provider()
118
+ self.state_provider = None
119
+
120
+ # Set subtitle to show scheduler status
121
+ self._update_scheduler_status()
122
+
123
+ def _update_scheduler_status(self) -> None:
124
+ """Update the subtitle to reflect scheduler status"""
125
+ if self.state_provider is None:
126
+ self.sub_title = "○ Waiting for experiment..."
127
+ elif self.state_provider.is_live:
128
+ self.sub_title = "● Running experiment"
129
+ else:
130
+ self.sub_title = "○ Monitoring workspace"
1880
131
 
1881
132
  def compose(self) -> ComposeResult:
1882
133
  """Compose the TUI layout"""
1883
134
  yield Header()
1884
135
 
1885
- if self.show_logs:
1886
- # Tabbed layout with logs
136
+ if self.state_provider is None:
137
+ # Deferred mode: only show logs, monitor will be added later
138
+ with TabbedContent(id="main-tabs"):
139
+ with TabPane("Logs", id="logs-tab"):
140
+ yield CaptureLog(id="logs", auto_scroll=True, wrap=True)
141
+ elif self.show_logs:
142
+ # Tabbed layout with logs and services
1887
143
  with TabbedContent(id="main-tabs"):
1888
144
  with TabPane("Monitor", id="monitor-tab"):
1889
145
  yield from self._compose_monitor_view()
146
+ with TabPane("Services (0)", id="services-sync-tab"):
147
+ yield GlobalServiceSyncs(self.state_provider)
148
+ with TabPane("Orphans (0)", id="orphan-tab"):
149
+ yield OrphanJobsTab(self.state_provider)
1890
150
  with TabPane("Logs", id="logs-tab"):
1891
- yield CaptureLog(id="logs", auto_scroll=True)
151
+ yield CaptureLog(id="logs", auto_scroll=True, wrap=True)
152
+ self._monitor_mounted = True
1892
153
  else:
1893
- # Simple layout without logs
1894
- with Vertical(id="main-container"):
1895
- yield from self._compose_monitor_view()
154
+ # Simple layout without logs but with services
155
+ with TabbedContent(id="main-tabs"):
156
+ with TabPane("Monitor", id="monitor-tab"):
157
+ yield from self._compose_monitor_view()
158
+ with TabPane("Services (0)", id="services-sync-tab"):
159
+ yield GlobalServiceSyncs(self.state_provider)
160
+ with TabPane("Orphans (0)", id="orphan-tab"):
161
+ yield OrphanJobsTab(self.state_provider)
162
+ self._monitor_mounted = True
1896
163
 
1897
164
  yield Footer()
1898
165
 
1899
166
  def _compose_monitor_view(self):
1900
- """Compose the monitor view with experiments, jobs/services tabs, and job details"""
167
+ """Compose the monitor view with experiments, runs, jobs/services tabs, and job details"""
1901
168
  yield ExperimentsList(self.state_provider)
169
+ # Runs list (hidden initially, shown when 'd' pressed on experiment)
170
+ yield RunsList(self.state_provider)
1902
171
  # Tabbed view for jobs and services (hidden initially)
1903
172
  with TabbedContent(id="experiment-tabs", classes="hidden"):
1904
173
  with TabPane("Jobs", id="jobs-tab"):
@@ -1914,77 +183,324 @@ class ExperimaestroUI(App):
1914
183
  # Resets logging
1915
184
  logging.basicConfig(level=logging.INFO, force=True)
1916
185
 
1917
- # Get the widgets
1918
- experiments_list = self.query_one(ExperimentsList)
1919
- experiments_list.refresh_experiments()
186
+ # If monitor is mounted, refresh experiments
187
+ if self._monitor_mounted:
188
+ experiments_list = self.query_one(ExperimentsList)
189
+ experiments_list.refresh_experiments()
1920
190
 
1921
- # Register as listener for push notifications from state provider
191
+ # Register as listener for state change notifications
192
+ # The state provider handles its own notification strategy internally
1922
193
  if self.state_provider:
1923
194
  self.state_provider.add_listener(self._on_state_event)
1924
195
  self._listener_registered = True
1925
- self.log("Registered state listener for push notifications")
196
+ self.log("Registered state listener for notifications")
197
+
198
+ def set_state_provider(self, state_provider: StateProvider) -> None:
199
+ """Set the state provider and mount monitor widgets (for deferred mode)
200
+
201
+ Call this method from a background thread after starting the experiment.
202
+ The TUI will add the Monitor, Services, and Orphans tabs.
203
+
204
+ Args:
205
+ state_provider: The state provider (typically the Scheduler)
206
+ """
207
+ self.state_provider = state_provider
208
+ self._update_scheduler_status()
209
+
210
+ # Mount monitor widgets if not already done
211
+ if not self._monitor_mounted:
212
+ self._mount_monitor_widgets()
213
+
214
+ # Register listener
215
+ if not self._listener_registered:
216
+ self.state_provider.add_listener(self._on_state_event)
217
+ self._listener_registered = True
218
+ self.log("Registered state listener for notifications")
219
+
220
+ def _mount_monitor_widgets(self) -> None:
221
+ """Mount the monitor widgets dynamically (for deferred mode)"""
222
+ tabs = self.query_one("#main-tabs", TabbedContent)
223
+
224
+ # Create monitor pane with all its children composed
225
+ monitor_pane = TabPane("Monitor", id="monitor-tab")
226
+ tabs.add_pane(monitor_pane, before="logs-tab")
227
+
228
+ # Create widgets
229
+ experiments_list = ExperimentsList(self.state_provider)
230
+ runs_list = RunsList(self.state_provider)
231
+ jobs_table = JobsTable(self.state_provider)
232
+ services_list = ServicesList(self.state_provider)
233
+ job_detail_view = JobDetailView(self.state_provider)
234
+
235
+ # Mount experiments and runs lists
236
+ monitor_pane.mount(experiments_list)
237
+ monitor_pane.mount(runs_list)
238
+
239
+ # Create experiment tabs with children using compose_add_child
240
+ experiment_tabs = TabbedContent(id="experiment-tabs", classes="hidden")
241
+ jobs_pane = TabPane("Jobs", id="jobs-tab")
242
+ services_pane = TabPane("Services", id="services-tab")
243
+ jobs_pane.compose_add_child(jobs_table)
244
+ services_pane.compose_add_child(services_list)
245
+ experiment_tabs.compose_add_child(jobs_pane)
246
+ experiment_tabs.compose_add_child(services_pane)
247
+ monitor_pane.mount(experiment_tabs)
248
+
249
+ # Create job detail container
250
+ job_detail_container = Vertical(id="job-detail-container", classes="hidden")
251
+ job_detail_container.compose_add_child(job_detail_view)
252
+ monitor_pane.mount(job_detail_container)
253
+
254
+ # Create and mount services sync tab
255
+ services_sync_pane = TabPane("Services (0)", id="services-sync-tab")
256
+ services_sync_pane.compose_add_child(GlobalServiceSyncs(self.state_provider))
257
+ tabs.add_pane(services_sync_pane, before="logs-tab")
258
+
259
+ # Create and mount orphans tab (only if not live)
260
+ if not self.state_provider.is_live:
261
+ orphan_pane = TabPane("Orphans (0)", id="orphan-tab")
262
+ orphan_pane.compose_add_child(OrphanJobsTab(self.state_provider))
263
+ tabs.add_pane(orphan_pane, before="logs-tab")
264
+
265
+ self._monitor_mounted = True
266
+
267
+ # Refresh experiments list
268
+ experiments_list.refresh_experiments()
1926
269
 
1927
- def _on_state_event(self, event: StateEvent) -> None:
270
+ def update_services_tab_title(self) -> None:
271
+ """Update the Services tab title with running service count"""
272
+ try:
273
+ # Count running services from state provider
274
+ from experimaestro.scheduler.services import ServiceState
275
+
276
+ all_services = self.state_provider.get_services()
277
+ running_count = sum(
278
+ 1
279
+ for s in all_services
280
+ if hasattr(s, "state") and s.state == ServiceState.RUNNING
281
+ )
282
+
283
+ # Find and update the tab pane title
284
+ tabs = self.query_one("#main-tabs", TabbedContent)
285
+ tab = tabs.get_tab("services-sync-tab")
286
+ if tab:
287
+ tab.label = f"Services ({running_count})"
288
+ except Exception:
289
+ pass
290
+
291
+ def update_orphan_tab_title(self) -> None:
292
+ """Update the Orphans tab title with orphan job count
293
+
294
+ Format: Orphans (X/Y) where X=running (stray), Y=non-running (finished)
295
+ """
296
+ try:
297
+ orphan_tab = self.query_one(OrphanJobsTab)
298
+ running = orphan_tab.running_count
299
+ finished = orphan_tab.finished_count
300
+ # Find and update the tab pane title
301
+ tabs = self.query_one("#main-tabs", TabbedContent)
302
+ tab = tabs.get_tab("orphan-tab")
303
+ if tab:
304
+ tab.label = f"Orphans ({running}/{finished})"
305
+ except Exception:
306
+ pass
307
+
308
+ def update_logs_tab_title(self) -> None:
309
+ """Update the Logs tab title to show unread indicator (bold when unread)"""
310
+ if not self.show_logs:
311
+ return
312
+ try:
313
+ from rich.text import Text
314
+
315
+ log_widget = self.query_one(CaptureLog)
316
+ tabs = self.query_one("#main-tabs", TabbedContent)
317
+ tab = tabs.get_tab("logs-tab")
318
+ if tab:
319
+ if log_widget.has_unread:
320
+ tab.label = Text("Logs *", style="bold")
321
+ else:
322
+ tab.label = "Logs"
323
+ except Exception:
324
+ pass
325
+
326
+ def on_tabbed_content_tab_activated(
327
+ self, event: TabbedContent.TabActivated
328
+ ) -> None:
329
+ """Handle tab switching"""
330
+ # event.pane is the TabPane, event.tab is the Tab widget (header)
331
+ if event.pane.id == "logs-tab" and self.show_logs:
332
+ try:
333
+ log_widget = self.query_one(CaptureLog)
334
+ log_widget.mark_as_read()
335
+ except Exception:
336
+ pass
337
+ elif event.pane.id == "services-sync-tab":
338
+ # Refresh global services when switching to Services tab
339
+ try:
340
+ global_services = self.query_one(GlobalServiceSyncs)
341
+ global_services.refresh_services()
342
+ except Exception:
343
+ pass
344
+
345
+ def _on_state_event(self, event: EventBase) -> None:
1928
346
  """Handle state change events from the state provider
1929
347
 
1930
- This is called from the state provider's thread, so we use
1931
- call_from_thread to safely update the UI.
348
+ This may be called from the state provider's thread or the main thread,
349
+ so we check before using call_from_thread.
1932
350
  """
1933
- self.call_from_thread(self._handle_state_event, event)
1934
-
1935
- def _handle_state_event(self, event: StateEvent) -> None:
1936
- """Process state event on the main thread"""
1937
- # Use query() instead of query_one() to avoid NoMatches exception
1938
- # when widgets aren't visible yet
1939
- jobs_tables = self.query(JobsTable)
1940
- services_lists = self.query(ServicesList)
1941
-
1942
- self.log.debug(
1943
- f"State event {event.event_type.name}, "
1944
- f"JobsTable found: {len(jobs_tables)}, ServicesList found: {len(services_lists)}"
351
+ import threading
352
+
353
+ self.log.info(f"_on_state_event called with: {type(event).__name__}")
354
+
355
+ if threading.current_thread() is threading.main_thread():
356
+ # Already in main thread, call directly
357
+ self._handle_state_event(event)
358
+ else:
359
+ # From background thread, use call_from_thread
360
+ self.call_from_thread(self._handle_state_event, event)
361
+
362
+ def _handle_state_event(self, event: EventBase) -> None:
363
+ """Process state event on the main thread using handler dispatch"""
364
+ self.log.info(f"State event: {event}")
365
+
366
+ # Dispatch to handler if one exists for this event type
367
+ handler = self.STATE_EVENT_HANDLERS.get(type(event))
368
+ if handler:
369
+ self.log.info(f"Dispatching to handler: {handler.__name__}")
370
+ try:
371
+ handler(self, event)
372
+ except Exception as e:
373
+ self.log.error(f"Error in handler: {e}")
374
+ else:
375
+ self.log.warning(f"No handler for event type: {type(event).__name__}")
376
+
377
+ def _handle_experiment_updated(self, event: ExperimentUpdatedEvent) -> None:
378
+ """Handle ExperimentUpdatedEvent - refresh experiments list and jobs"""
379
+ for exp_list in self.query(ExperimentsList):
380
+ exp_list.refresh_experiments()
381
+
382
+ # Also refresh jobs table if we're viewing the affected experiment
383
+ # (this handles the case when experiment finishes and events are deleted)
384
+ for jobs_table in self.query(JobsTable):
385
+ if jobs_table.current_experiment == event.experiment_id:
386
+ jobs_table.refresh_jobs()
387
+
388
+ def _handle_run_updated(self, event: RunUpdatedEvent) -> None:
389
+ """Handle RunUpdatedEvent - refresh experiments list"""
390
+ for exp_list in self.query(ExperimentsList):
391
+ exp_list.refresh_experiments()
392
+
393
+ def _handle_service_added(self, event: ServiceAddedEvent) -> None:
394
+ """Handle ServiceAddedEvent - refresh services list and update tab title"""
395
+ event_exp_id = event.experiment_id
396
+ self.log.info(
397
+ f"ServiceAddedEvent received: exp={event_exp_id}, service={event.service_id}"
1945
398
  )
1946
399
 
1947
- if event.event_type == StateEventType.EXPERIMENT_UPDATED:
1948
- # Refresh experiments list
1949
- for exp_list in self.query(ExperimentsList):
1950
- exp_list.refresh_experiments()
400
+ # Refresh the global services widget
401
+ try:
402
+ global_services = self.query_one(GlobalServiceSyncs)
403
+ self.log.info("Calling GlobalServiceSyncs.refresh_services()")
404
+ global_services.refresh_services()
405
+ except Exception as e:
406
+ self.log.warning(f"Failed to refresh global services: {e}")
1951
407
 
1952
- elif event.event_type == StateEventType.JOB_UPDATED:
1953
- event_exp_id = event.data.get("experimentId")
408
+ # Refresh per-experiment services list
409
+ for services_list in self.query(ServicesList):
410
+ if services_list.current_experiment == event_exp_id:
411
+ services_list.refresh_services()
1954
412
 
1955
- # Refresh jobs table if we're viewing the affected experiment
1956
- for jobs_table in jobs_tables:
1957
- if jobs_table.current_experiment == event_exp_id:
1958
- jobs_table.refresh_jobs()
413
+ def _handle_service_state_changed(self, event: ServiceStateChangedEvent) -> None:
414
+ """Handle ServiceStateChangedEvent - update tab title when service state changes"""
415
+ # Update the Services tab title (running count may have changed)
416
+ self.update_services_tab_title()
1959
417
 
1960
- # Also refresh job detail if we're viewing the affected job
1961
- for job_detail_container in self.query("#job-detail-container"):
1962
- if not job_detail_container.has_class("hidden"):
1963
- for job_detail_view in self.query(JobDetailView):
1964
- event_job_id = event.data.get("jobId")
1965
- if job_detail_view.current_job_id == event_job_id:
1966
- job_detail_view.refresh_job_detail()
418
+ # Also refresh global services widget if visible
419
+ try:
420
+ global_services = self.query_one(GlobalServiceSyncs)
421
+ global_services.refresh_services()
422
+ except Exception:
423
+ pass
1967
424
 
1968
- # Also update the experiment stats in the experiments list
1969
- for exp_list in self.query(ExperimentsList):
1970
- exp_list.refresh_experiments()
425
+ # Refresh per-experiment services list
426
+ for services_list in self.query(ServicesList):
427
+ if services_list.current_experiment == event.experiment_id:
428
+ services_list.refresh_services()
429
+
430
+ def _handle_job_submitted(self, event: JobSubmittedEvent) -> None:
431
+ """Handle JobSubmittedEvent - update tags, dependencies, and refresh job list"""
432
+ event_exp_id = event.experiment_id
433
+
434
+ # Update tags_map, dependencies_map, and refresh jobs for the affected experiment
435
+ for jobs_table in self.query(JobsTable):
436
+ if jobs_table.current_experiment == event_exp_id:
437
+ # Add the new job's tags to the cache
438
+ if event.tags:
439
+ jobs_table.tags_map[event.job_id] = {
440
+ tag.key: tag.value for tag in event.tags
441
+ }
442
+ # Add the new job's dependencies to the cache
443
+ if event.depends_on:
444
+ jobs_table.dependencies_map[event.job_id] = event.depends_on
445
+ # Refresh to show the new job
446
+ jobs_table.refresh_jobs()
447
+
448
+ # Also update experiment stats
449
+ for exp_list in self.query(ExperimentsList):
450
+ exp_list.refresh_experiments()
451
+
452
+ def _handle_job_state_changed(self, event: JobStateChangedEvent) -> None:
453
+ """Handle JobStateChangedEvent - refresh job display
454
+
455
+ This event is dispatched once per job state change.
456
+ Used for progress updates and state changes from job processes.
457
+ """
458
+ # Refresh all jobs tables that might contain this job
459
+ for jobs_table in self.query(JobsTable):
460
+ jobs_table.refresh_jobs()
1971
461
 
1972
- elif event.event_type == StateEventType.RUN_UPDATED:
1973
- # Refresh experiments list to show updated run info
1974
- for exp_list in self.query(ExperimentsList):
1975
- exp_list.refresh_experiments()
462
+ # Also refresh job detail if we're viewing this job
463
+ for job_detail_container in self.query("#job-detail-container"):
464
+ if not job_detail_container.has_class("hidden"):
465
+ for job_detail_view in self.query(JobDetailView):
466
+ if job_detail_view.current_job_id == event.job_id:
467
+ job_detail_view.refresh_job_detail()
1976
468
 
1977
- elif event.event_type == StateEventType.SERVICE_UPDATED:
1978
- event_exp_id = event.data.get("experimentId")
469
+ # Also update the experiment stats in the experiments list
470
+ for exp_list in self.query(ExperimentsList):
471
+ exp_list.refresh_experiments()
1979
472
 
1980
- # Refresh services list if we're viewing the affected experiment
1981
- for services_list in services_lists:
1982
- if services_list.current_experiment == event_exp_id:
1983
- services_list.refresh_services()
473
+ def _handle_job_progress(self, event: JobProgressEvent) -> None:
474
+ """Handle JobProgressEvent - refresh job progress display
475
+
476
+ This event is dispatched when a job reports progress updates.
477
+ """
478
+ # Refresh all jobs tables that might contain this job
479
+ for jobs_table in self.query(JobsTable):
480
+ jobs_table.refresh_jobs()
481
+
482
+ # Also refresh job detail if we're viewing this job
483
+ for job_detail_container in self.query("#job-detail-container"):
484
+ if not job_detail_container.has_class("hidden"):
485
+ for job_detail_view in self.query(JobDetailView):
486
+ if job_detail_view.current_job_id == event.job_id:
487
+ job_detail_view.refresh_job_detail()
488
+
489
+ STATE_EVENT_HANDLERS = {
490
+ ExperimentUpdatedEvent: _handle_experiment_updated,
491
+ JobStateChangedEvent: _handle_job_state_changed,
492
+ JobProgressEvent: _handle_job_progress,
493
+ RunUpdatedEvent: _handle_run_updated,
494
+ ServiceAddedEvent: _handle_service_added,
495
+ ServiceStateChangedEvent: _handle_service_state_changed,
496
+ JobSubmittedEvent: _handle_job_submitted,
497
+ }
1984
498
 
1985
499
  def on_experiment_selected(self, message: ExperimentSelected) -> None:
1986
500
  """Handle experiment selection - show jobs/services tabs"""
1987
- self.log(f"Experiment selected: {message.experiment_id}")
501
+ self.log(
502
+ f"Experiment selected: {message.experiment_id} (run: {message.run_id})"
503
+ )
1988
504
 
1989
505
  # Set up services list
1990
506
  services_list = self.query_one(ServicesList)
@@ -1992,7 +508,7 @@ class ExperimaestroUI(App):
1992
508
 
1993
509
  # Set up jobs table
1994
510
  jobs_table_widget = self.query_one(JobsTable)
1995
- jobs_table_widget.set_experiment(message.experiment_id)
511
+ jobs_table_widget.set_experiment(message.experiment_id, message.run_id)
1996
512
 
1997
513
  # Show the tabbed content
1998
514
  tabs = self.query_one("#experiment-tabs", TabbedContent)
@@ -2077,14 +593,31 @@ class ExperimaestroUI(App):
2077
593
  job_detail_view = self.query_one(JobDetailView)
2078
594
  job_detail_view.action_view_logs()
2079
595
 
2080
- def action_show_orphans(self) -> None:
2081
- """Show orphan jobs screen"""
2082
- self.push_screen(OrphanJobsScreen(self.state_provider))
2083
-
2084
596
  def on_view_job_logs(self, message: ViewJobLogs) -> None:
2085
- """Handle request to view job logs - push LogViewerScreen"""
597
+ """Handle request to view job logs - push LogViewerScreen
598
+
599
+ For remote monitoring, switches to log viewer immediately with loading state,
600
+ then starts adaptive sync in background.
601
+ """
2086
602
  job_path = Path(message.job_path)
2087
- # Log files are named after the last part of the task ID
603
+ job_id = job_path.name
604
+
605
+ # For remote monitoring, switch screen immediately with loading state
606
+ if self.state_provider.is_remote:
607
+ # Push screen immediately - it will handle sync and show loading state
608
+ self.push_screen(
609
+ LogViewerScreen(
610
+ log_files=[], # Will be populated after sync
611
+ job_id=job_id,
612
+ sync_func=self.state_provider.sync_path,
613
+ remote_path=str(job_path),
614
+ task_id=message.task_id,
615
+ job_state=message.job_state,
616
+ )
617
+ )
618
+ return
619
+
620
+ # Local monitoring - no sync needed
2088
621
  task_name = message.task_id.split(".")[-1]
2089
622
  stdout_path = job_path / f"{task_name}.out"
2090
623
  stderr_path = job_path / f"{task_name}.err"
@@ -2104,7 +637,6 @@ class ExperimaestroUI(App):
2104
637
  return
2105
638
 
2106
639
  # Push the log viewer screen
2107
- job_id = job_path.name
2108
640
  self.push_screen(LogViewerScreen(log_files, job_id))
2109
641
 
2110
642
  def on_view_job_logs_request(self, message: ViewJobLogsRequest) -> None:
@@ -2113,7 +645,7 @@ class ExperimaestroUI(App):
2113
645
  if not job or not job.path or not job.task_id:
2114
646
  self.notify("Cannot find job logs", severity="warning")
2115
647
  return
2116
- self.post_message(ViewJobLogs(str(job.path), job.task_id))
648
+ self.post_message(ViewJobLogs(str(job.path), job.task_id, job.state))
2117
649
 
2118
650
  def on_delete_job_request(self, message: DeleteJobRequest) -> None:
2119
651
  """Handle job deletion request"""
@@ -2238,6 +770,42 @@ class ExperimaestroUI(App):
2238
770
  handle_kill_response,
2239
771
  )
2240
772
 
773
+ def on_show_runs_request(self, message: ShowRunsRequest) -> None:
774
+ """Handle request to show experiment runs"""
775
+ runs_list = self.query_one(RunsList)
776
+ runs_list.set_experiment(message.experiment_id, message.current_run_id)
777
+
778
+ def on_run_selected(self, message: RunSelected) -> None:
779
+ """Handle run selection - show jobs for the selected run"""
780
+ self.log(
781
+ f"Run selected: {message.run_id} (current={message.is_current}) "
782
+ f"for {message.experiment_id}"
783
+ )
784
+
785
+ # Set up jobs table with the selected run
786
+ jobs_table_widget = self.query_one(JobsTable)
787
+ jobs_table_widget.set_experiment(
788
+ message.experiment_id,
789
+ message.run_id,
790
+ is_past_run=not message.is_current,
791
+ )
792
+
793
+ # Set up services list
794
+ services_list = self.query_one(ServicesList)
795
+ services_list.set_experiment(message.experiment_id)
796
+
797
+ # Show the tabbed content
798
+ tabs = self.query_one("#experiment-tabs", TabbedContent)
799
+ tabs.remove_class("hidden")
800
+
801
+ # Collapse experiments list
802
+ experiments_list = self.query_one(ExperimentsList)
803
+ experiments_list.collapse_to_experiment(message.experiment_id)
804
+
805
+ # Focus the jobs table
806
+ jobs_table = self.query_one("#jobs-table", DataTable)
807
+ jobs_table.focus()
808
+
2241
809
  def action_focus_jobs(self) -> None:
2242
810
  """Switch to the jobs tab"""
2243
811
  tabs = self.query_one("#experiment-tabs", TabbedContent)
@@ -2282,7 +850,7 @@ class ExperimaestroUI(App):
2282
850
  self.exit()
2283
851
 
2284
852
  self.push_screen(
2285
- QuitConfirmScreen(has_active_experiment=self._has_active_experiment),
853
+ QuitConfirmScreen(has_active_experiment=self.state_provider.is_live),
2286
854
  handle_quit_response,
2287
855
  )
2288
856
 
@@ -2297,7 +865,3 @@ class ExperimaestroUI(App):
2297
865
  self.state_provider.remove_listener(self._on_state_event)
2298
866
  self._listener_registered = False
2299
867
  self.log("Unregistered state listener")
2300
-
2301
- # Only close state provider if we own it (not external/active experiment)
2302
- if self.state_provider and self.owns_provider:
2303
- self.state_provider.close()