experimaestro 2.0.0b8__py3-none-any.whl → 2.0.0b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +12 -5
- experimaestro/cli/__init__.py +239 -126
- experimaestro/cli/filter.py +48 -23
- experimaestro/cli/jobs.py +253 -71
- experimaestro/cli/refactor.py +1 -2
- experimaestro/commandline.py +7 -4
- experimaestro/connectors/__init__.py +9 -1
- experimaestro/connectors/local.py +43 -3
- experimaestro/core/arguments.py +18 -18
- experimaestro/core/identifier.py +11 -11
- experimaestro/core/objects/config.py +96 -39
- experimaestro/core/objects/config_walk.py +3 -3
- experimaestro/core/{subparameters.py → partial.py} +16 -16
- experimaestro/core/partial_lock.py +394 -0
- experimaestro/core/types.py +12 -15
- experimaestro/dynamic.py +290 -0
- experimaestro/experiments/__init__.py +6 -2
- experimaestro/experiments/cli.py +217 -50
- experimaestro/experiments/configuration.py +24 -0
- experimaestro/generators.py +5 -5
- experimaestro/ipc.py +118 -1
- experimaestro/launcherfinder/__init__.py +2 -2
- experimaestro/launcherfinder/registry.py +6 -7
- experimaestro/launcherfinder/specs.py +2 -9
- experimaestro/launchers/slurm/__init__.py +2 -2
- experimaestro/launchers/slurm/base.py +62 -0
- experimaestro/locking.py +957 -1
- experimaestro/notifications.py +89 -201
- experimaestro/progress.py +63 -366
- experimaestro/rpyc.py +0 -2
- experimaestro/run.py +29 -2
- experimaestro/scheduler/__init__.py +8 -1
- experimaestro/scheduler/base.py +629 -53
- experimaestro/scheduler/dependencies.py +20 -16
- experimaestro/scheduler/experiment.py +732 -167
- experimaestro/scheduler/interfaces.py +316 -101
- experimaestro/scheduler/jobs.py +58 -20
- experimaestro/scheduler/remote/adaptive_sync.py +265 -0
- experimaestro/scheduler/remote/client.py +171 -117
- experimaestro/scheduler/remote/protocol.py +8 -193
- experimaestro/scheduler/remote/server.py +95 -71
- experimaestro/scheduler/services.py +53 -28
- experimaestro/scheduler/state_provider.py +663 -2430
- experimaestro/scheduler/state_status.py +1247 -0
- experimaestro/scheduler/transient.py +31 -0
- experimaestro/scheduler/workspace.py +1 -1
- experimaestro/scheduler/workspace_state_provider.py +1273 -0
- experimaestro/scriptbuilder.py +4 -4
- experimaestro/settings.py +36 -0
- experimaestro/tests/conftest.py +33 -5
- experimaestro/tests/connectors/bin/executable.py +1 -1
- experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
- experimaestro/tests/launchers/bin/test.py +1 -0
- experimaestro/tests/launchers/test_slurm.py +9 -9
- experimaestro/tests/partial_reschedule.py +46 -0
- experimaestro/tests/restart.py +3 -3
- experimaestro/tests/restart_main.py +1 -0
- experimaestro/tests/scripts/notifyandwait.py +1 -0
- experimaestro/tests/task_partial.py +38 -0
- experimaestro/tests/task_tokens.py +2 -2
- experimaestro/tests/tasks/test_dynamic.py +6 -6
- experimaestro/tests/test_dependencies.py +3 -3
- experimaestro/tests/test_deprecated.py +15 -15
- experimaestro/tests/test_dynamic_locking.py +317 -0
- experimaestro/tests/test_environment.py +24 -14
- experimaestro/tests/test_experiment.py +171 -36
- experimaestro/tests/test_identifier.py +25 -25
- experimaestro/tests/test_identifier_stability.py +3 -5
- experimaestro/tests/test_multitoken.py +2 -4
- experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
- experimaestro/tests/test_partial_paths.py +81 -138
- experimaestro/tests/test_pre_experiment.py +219 -0
- experimaestro/tests/test_progress.py +2 -8
- experimaestro/tests/test_remote_state.py +560 -99
- experimaestro/tests/test_stray_jobs.py +261 -0
- experimaestro/tests/test_tasks.py +1 -2
- experimaestro/tests/test_token_locking.py +52 -67
- experimaestro/tests/test_tokens.py +5 -6
- experimaestro/tests/test_transient.py +225 -0
- experimaestro/tests/test_workspace_state_provider.py +768 -0
- experimaestro/tests/token_reschedule.py +1 -3
- experimaestro/tests/utils.py +2 -7
- experimaestro/tokens.py +227 -372
- experimaestro/tools/diff.py +1 -0
- experimaestro/tools/documentation.py +4 -5
- experimaestro/tools/jobs.py +1 -2
- experimaestro/tui/app.py +438 -1966
- experimaestro/tui/app.tcss +162 -0
- experimaestro/tui/dialogs.py +172 -0
- experimaestro/tui/log_viewer.py +253 -3
- experimaestro/tui/messages.py +137 -0
- experimaestro/tui/utils.py +54 -0
- experimaestro/tui/widgets/__init__.py +23 -0
- experimaestro/tui/widgets/experiments.py +468 -0
- experimaestro/tui/widgets/global_services.py +238 -0
- experimaestro/tui/widgets/jobs.py +972 -0
- experimaestro/tui/widgets/log.py +156 -0
- experimaestro/tui/widgets/orphans.py +363 -0
- experimaestro/tui/widgets/runs.py +185 -0
- experimaestro/tui/widgets/services.py +314 -0
- experimaestro/tui/widgets/stray_jobs.py +528 -0
- experimaestro/utils/__init__.py +1 -1
- experimaestro/utils/environment.py +105 -22
- experimaestro/utils/fswatcher.py +124 -0
- experimaestro/utils/jobs.py +1 -2
- experimaestro/utils/jupyter.py +1 -2
- experimaestro/utils/logging.py +72 -0
- experimaestro/version.py +2 -2
- experimaestro/webui/__init__.py +9 -0
- experimaestro/webui/app.py +117 -0
- experimaestro/{server → webui}/data/index.css +66 -11
- experimaestro/webui/data/index.css.map +1 -0
- experimaestro/{server → webui}/data/index.js +82763 -87217
- experimaestro/webui/data/index.js.map +1 -0
- experimaestro/webui/routes/__init__.py +5 -0
- experimaestro/webui/routes/auth.py +53 -0
- experimaestro/webui/routes/proxy.py +117 -0
- experimaestro/webui/server.py +200 -0
- experimaestro/webui/state_bridge.py +152 -0
- experimaestro/webui/websocket.py +413 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +5 -6
- experimaestro-2.0.0b17.dist-info/RECORD +219 -0
- experimaestro/cli/progress.py +0 -269
- experimaestro/scheduler/state.py +0 -75
- experimaestro/scheduler/state_db.py +0 -437
- experimaestro/scheduler/state_sync.py +0 -891
- experimaestro/server/__init__.py +0 -467
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/tests/test_cli_jobs.py +0 -615
- experimaestro/tests/test_file_progress.py +0 -425
- experimaestro/tests/test_file_progress_integration.py +0 -477
- experimaestro/tests/test_state_db.py +0 -434
- experimaestro-2.0.0b8.dist-info/RECORD +0 -187
- /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
- /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
- /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
- /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
- /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
- /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
- /experimaestro/{server → webui}/data/favicon.ico +0 -0
- /experimaestro/{server → webui}/data/index.html +0 -0
- /experimaestro/{server → webui}/data/login.html +0 -0
- /experimaestro/{server → webui}/data/manifest.json +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
experimaestro/tui/app.py
CHANGED
|
@@ -4,1855 +4,62 @@ import logging
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Optional
|
|
6
6
|
from textual.app import App, ComposeResult
|
|
7
|
-
from textual import
|
|
8
|
-
from textual.containers import Container, Horizontal, Vertical
|
|
7
|
+
from textual.containers import Vertical
|
|
9
8
|
from textual.widgets import (
|
|
10
9
|
Header,
|
|
11
10
|
Footer,
|
|
12
11
|
DataTable,
|
|
13
|
-
Label,
|
|
14
12
|
TabbedContent,
|
|
15
13
|
TabPane,
|
|
16
|
-
RichLog,
|
|
17
|
-
Button,
|
|
18
|
-
Static,
|
|
19
|
-
Input,
|
|
20
14
|
)
|
|
21
|
-
from textual.widget import Widget
|
|
22
|
-
from textual.reactive import reactive
|
|
23
15
|
from textual.binding import Binding
|
|
24
|
-
|
|
25
|
-
from
|
|
26
|
-
from
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
16
|
+
|
|
17
|
+
from experimaestro.scheduler.state_provider import StateProvider
|
|
18
|
+
from experimaestro.scheduler.state_status import (
|
|
19
|
+
EventBase,
|
|
20
|
+
ExperimentUpdatedEvent,
|
|
21
|
+
RunUpdatedEvent,
|
|
22
|
+
JobStateChangedEvent,
|
|
23
|
+
JobProgressEvent,
|
|
24
|
+
JobSubmittedEvent,
|
|
25
|
+
ServiceAddedEvent,
|
|
26
|
+
ServiceStateChangedEvent,
|
|
32
27
|
)
|
|
33
28
|
from experimaestro.tui.log_viewer import LogViewerScreen
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
else:
|
|
69
|
-
yield Static("Are you sure you want to quit?", id="quit-message")
|
|
70
|
-
|
|
71
|
-
with Horizontal(id="quit-buttons"):
|
|
72
|
-
yield Button("Quit", variant="error", id="quit-yes")
|
|
73
|
-
yield Button("Cancel", variant="primary", id="quit-no")
|
|
74
|
-
|
|
75
|
-
def on_button_pressed(self, event: Button.Pressed) -> None:
|
|
76
|
-
if event.button.id == "quit-yes":
|
|
77
|
-
self.dismiss(True)
|
|
78
|
-
else:
|
|
79
|
-
self.dismiss(False)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
class DeleteConfirmScreen(ModalScreen[bool]):
|
|
83
|
-
"""Modal screen for delete confirmation"""
|
|
84
|
-
|
|
85
|
-
def __init__(
|
|
86
|
-
self, item_type: str, item_name: str, warning: Optional[str] = None
|
|
87
|
-
) -> None:
|
|
88
|
-
super().__init__()
|
|
89
|
-
self.item_type = item_type
|
|
90
|
-
self.item_name = item_name
|
|
91
|
-
self.warning = warning
|
|
92
|
-
|
|
93
|
-
def compose(self) -> ComposeResult:
|
|
94
|
-
with Vertical(id="delete-dialog"):
|
|
95
|
-
yield Static(f"Delete {self.item_type}?", id="delete-title")
|
|
96
|
-
yield Static(
|
|
97
|
-
f"This will permanently delete: {self.item_name}", id="delete-message"
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
if self.warning:
|
|
101
|
-
yield Static(f"Warning: {self.warning}", id="delete-warning")
|
|
102
|
-
|
|
103
|
-
with Horizontal(id="delete-buttons"):
|
|
104
|
-
yield Button("Delete", variant="error", id="delete-yes")
|
|
105
|
-
yield Button("Cancel", variant="primary", id="delete-no")
|
|
106
|
-
|
|
107
|
-
def on_mount(self) -> None:
|
|
108
|
-
"""Focus cancel button by default"""
|
|
109
|
-
self.query_one("#delete-no", Button).focus()
|
|
110
|
-
|
|
111
|
-
def on_button_pressed(self, event: Button.Pressed) -> None:
|
|
112
|
-
if event.button.id == "delete-yes":
|
|
113
|
-
self.dismiss(True)
|
|
114
|
-
else:
|
|
115
|
-
self.dismiss(False)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
class KillConfirmScreen(ModalScreen[bool]):
|
|
119
|
-
"""Modal screen for kill confirmation"""
|
|
120
|
-
|
|
121
|
-
def __init__(self, item_type: str, item_name: str) -> None:
|
|
122
|
-
super().__init__()
|
|
123
|
-
self.item_type = item_type
|
|
124
|
-
self.item_name = item_name
|
|
125
|
-
|
|
126
|
-
def compose(self) -> ComposeResult:
|
|
127
|
-
with Vertical(id="kill-dialog"):
|
|
128
|
-
yield Static(f"Kill {self.item_type}?", id="kill-title")
|
|
129
|
-
yield Static(f"This will terminate: {self.item_name}", id="kill-message")
|
|
130
|
-
|
|
131
|
-
with Horizontal(id="kill-buttons"):
|
|
132
|
-
yield Button("Kill", variant="warning", id="kill-yes")
|
|
133
|
-
yield Button("Cancel", variant="primary", id="kill-no")
|
|
134
|
-
|
|
135
|
-
def on_mount(self) -> None:
|
|
136
|
-
"""Focus cancel button by default"""
|
|
137
|
-
self.query_one("#kill-no", Button).focus()
|
|
138
|
-
|
|
139
|
-
def on_button_pressed(self, event: Button.Pressed) -> None:
|
|
140
|
-
if event.button.id == "kill-yes":
|
|
141
|
-
self.dismiss(True)
|
|
142
|
-
else:
|
|
143
|
-
self.dismiss(False)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def get_status_icon(status: str, failure_reason=None):
|
|
147
|
-
"""Get status icon for a job state.
|
|
148
|
-
|
|
149
|
-
Args:
|
|
150
|
-
status: Job state name (e.g., "done", "error", "running")
|
|
151
|
-
failure_reason: Optional JobFailureStatus enum for error states
|
|
152
|
-
|
|
153
|
-
Returns:
|
|
154
|
-
Status icon string
|
|
155
|
-
"""
|
|
156
|
-
if status == "done":
|
|
157
|
-
return "✓"
|
|
158
|
-
elif status == "error":
|
|
159
|
-
# Show different icons for different failure types
|
|
160
|
-
if failure_reason is not None:
|
|
161
|
-
from experimaestro.scheduler.interfaces import JobFailureStatus
|
|
162
|
-
|
|
163
|
-
if failure_reason == JobFailureStatus.DEPENDENCY:
|
|
164
|
-
return "🔗" # Dependency failed
|
|
165
|
-
elif failure_reason == JobFailureStatus.TIMEOUT:
|
|
166
|
-
return "⏱" # Timeout
|
|
167
|
-
elif failure_reason == JobFailureStatus.MEMORY:
|
|
168
|
-
return "💾" # Memory issue
|
|
169
|
-
# FAILED or unknown - use default error icon
|
|
170
|
-
return "❌"
|
|
171
|
-
elif status == "running":
|
|
172
|
-
return "▶"
|
|
173
|
-
elif status == "waiting":
|
|
174
|
-
return "⌛" # Waiting for dependencies
|
|
175
|
-
else:
|
|
176
|
-
# phantom, unscheduled or unknown
|
|
177
|
-
return "👻"
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
class CaptureLog(RichLog):
|
|
181
|
-
"""Custom RichLog widget that captures print statements with log highlighting"""
|
|
182
|
-
|
|
183
|
-
def on_mount(self) -> None:
|
|
184
|
-
"""Enable print capturing when widget is mounted"""
|
|
185
|
-
self.begin_capture_print()
|
|
186
|
-
|
|
187
|
-
def on_unmount(self) -> None:
|
|
188
|
-
"""Stop print capturing when widget is unmounted"""
|
|
189
|
-
self.end_capture_print()
|
|
190
|
-
|
|
191
|
-
def _format_log_line(self, text: str) -> Text:
|
|
192
|
-
"""Format a log line with appropriate styling based on log level"""
|
|
193
|
-
result = Text()
|
|
194
|
-
|
|
195
|
-
# Check for common log level patterns
|
|
196
|
-
if text.startswith("ERROR:") or ":ERROR:" in text:
|
|
197
|
-
result.append(text, style="bold red")
|
|
198
|
-
elif text.startswith("WARNING:") or ":WARNING:" in text:
|
|
199
|
-
result.append(text, style="yellow")
|
|
200
|
-
elif text.startswith("INFO:") or ":INFO:" in text:
|
|
201
|
-
result.append(text, style="green")
|
|
202
|
-
elif text.startswith("DEBUG:") or ":DEBUG:" in text:
|
|
203
|
-
result.append(text, style="dim")
|
|
204
|
-
elif text.startswith("CRITICAL:") or ":CRITICAL:" in text:
|
|
205
|
-
result.append(text, style="bold white on red")
|
|
206
|
-
else:
|
|
207
|
-
result.append(text)
|
|
208
|
-
|
|
209
|
-
return result
|
|
210
|
-
|
|
211
|
-
def on_print(self, event: events.Print) -> None:
|
|
212
|
-
"""Handle print events from captured stdout/stderr"""
|
|
213
|
-
if text := event.text.strip():
|
|
214
|
-
self.write(self._format_log_line(text))
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
class ExperimentsList(Widget):
|
|
218
|
-
"""Widget displaying list of experiments"""
|
|
219
|
-
|
|
220
|
-
BINDINGS = [
|
|
221
|
-
Binding("d", "delete_experiment", "Delete", show=False),
|
|
222
|
-
Binding("k", "kill_experiment", "Kill", show=False),
|
|
223
|
-
]
|
|
224
|
-
|
|
225
|
-
current_experiment: reactive[Optional[str]] = reactive(None)
|
|
226
|
-
collapsed: reactive[bool] = reactive(False)
|
|
227
|
-
|
|
228
|
-
def __init__(self, state_provider: WorkspaceStateProvider) -> None:
|
|
229
|
-
super().__init__()
|
|
230
|
-
self.state_provider = state_provider
|
|
231
|
-
self.experiments = []
|
|
232
|
-
|
|
233
|
-
def _get_selected_experiment_id(self) -> Optional[str]:
|
|
234
|
-
"""Get the experiment ID from the currently selected row"""
|
|
235
|
-
table = self.query_one("#experiments-table", DataTable)
|
|
236
|
-
if table.cursor_row is None:
|
|
237
|
-
return None
|
|
238
|
-
row_key = list(table.rows.keys())[table.cursor_row]
|
|
239
|
-
if row_key:
|
|
240
|
-
return str(row_key.value)
|
|
241
|
-
return None
|
|
242
|
-
|
|
243
|
-
def action_delete_experiment(self) -> None:
|
|
244
|
-
"""Request to delete the selected experiment"""
|
|
245
|
-
exp_id = self._get_selected_experiment_id()
|
|
246
|
-
if exp_id:
|
|
247
|
-
self.post_message(DeleteExperimentRequest(exp_id))
|
|
248
|
-
|
|
249
|
-
def action_kill_experiment(self) -> None:
|
|
250
|
-
"""Request to kill all running jobs in the selected experiment"""
|
|
251
|
-
exp_id = self._get_selected_experiment_id()
|
|
252
|
-
if exp_id:
|
|
253
|
-
self.post_message(KillExperimentRequest(exp_id))
|
|
254
|
-
|
|
255
|
-
def compose(self) -> ComposeResult:
|
|
256
|
-
# Collapsed header (hidden initially)
|
|
257
|
-
with Horizontal(id="collapsed-header", classes="hidden"):
|
|
258
|
-
yield Label("", id="collapsed-experiment-info")
|
|
259
|
-
|
|
260
|
-
# Full experiments table
|
|
261
|
-
with Container(id="experiments-table-container"):
|
|
262
|
-
yield Label("Experiments", classes="section-title")
|
|
263
|
-
yield DataTable(id="experiments-table", cursor_type="row")
|
|
264
|
-
|
|
265
|
-
def on_mount(self) -> None:
|
|
266
|
-
"""Initialize the experiments table"""
|
|
267
|
-
table = self.query_one("#experiments-table", DataTable)
|
|
268
|
-
table.add_column("ID", key="id")
|
|
269
|
-
table.add_column("Host", key="host")
|
|
270
|
-
table.add_column("Jobs", key="jobs")
|
|
271
|
-
table.add_column("Status", key="status")
|
|
272
|
-
table.add_column("Started", key="started")
|
|
273
|
-
table.add_column("Duration", key="duration")
|
|
274
|
-
self.refresh_experiments()
|
|
275
|
-
|
|
276
|
-
# If there's only one experiment, automatically select it
|
|
277
|
-
if len(self.experiments) == 1:
|
|
278
|
-
exp_id = self.experiments[0].experiment_id
|
|
279
|
-
self.current_experiment = exp_id
|
|
280
|
-
self.collapse_to_experiment(exp_id)
|
|
281
|
-
self.post_message(ExperimentSelected(exp_id))
|
|
282
|
-
|
|
283
|
-
def refresh_experiments(self) -> None:
|
|
284
|
-
"""Refresh the experiments list from state provider"""
|
|
285
|
-
table = self.query_one("#experiments-table", DataTable)
|
|
286
|
-
|
|
287
|
-
try:
|
|
288
|
-
self.experiments = self.state_provider.get_experiments()
|
|
289
|
-
self.log.debug(
|
|
290
|
-
f"Refreshing experiments: found {len(self.experiments)} experiments"
|
|
291
|
-
)
|
|
292
|
-
except Exception as e:
|
|
293
|
-
self.log.error(f"ERROR refreshing experiments: {e}")
|
|
294
|
-
import traceback
|
|
295
|
-
|
|
296
|
-
self.log.error(traceback.format_exc())
|
|
297
|
-
self.experiments = []
|
|
298
|
-
return
|
|
299
|
-
|
|
300
|
-
# Get existing row keys
|
|
301
|
-
existing_keys = set(table.rows.keys())
|
|
302
|
-
current_exp_ids = set()
|
|
303
|
-
|
|
304
|
-
from datetime import datetime
|
|
305
|
-
import time as time_module
|
|
306
|
-
|
|
307
|
-
for exp in self.experiments:
|
|
308
|
-
exp_id = exp.experiment_id
|
|
309
|
-
current_exp_ids.add(exp_id)
|
|
310
|
-
total = exp.total_jobs
|
|
311
|
-
finished = exp.finished_jobs
|
|
312
|
-
failed = exp.failed_jobs
|
|
313
|
-
|
|
314
|
-
# Determine status
|
|
315
|
-
if failed > 0:
|
|
316
|
-
status = f"❌ {failed} failed"
|
|
317
|
-
elif finished == total and total > 0:
|
|
318
|
-
status = "✓ Done"
|
|
319
|
-
elif finished < total:
|
|
320
|
-
status = f"▶ {finished}/{total}"
|
|
321
|
-
else:
|
|
322
|
-
status = "Empty"
|
|
323
|
-
|
|
324
|
-
jobs_text = f"{finished}/{total}"
|
|
325
|
-
|
|
326
|
-
# Format started time
|
|
327
|
-
if exp.started_at:
|
|
328
|
-
started = datetime.fromtimestamp(exp.started_at).strftime(
|
|
329
|
-
"%Y-%m-%d %H:%M"
|
|
330
|
-
)
|
|
331
|
-
else:
|
|
332
|
-
started = "-"
|
|
333
|
-
|
|
334
|
-
# Calculate duration
|
|
335
|
-
duration = "-"
|
|
336
|
-
if exp.started_at:
|
|
337
|
-
if exp.ended_at:
|
|
338
|
-
elapsed = exp.ended_at - exp.started_at
|
|
339
|
-
else:
|
|
340
|
-
# Still running - show elapsed time
|
|
341
|
-
elapsed = time_module.time() - exp.started_at
|
|
342
|
-
# Format duration
|
|
343
|
-
duration = format_duration(elapsed)
|
|
344
|
-
|
|
345
|
-
# Get hostname (may be None for older experiments)
|
|
346
|
-
hostname = getattr(exp, "hostname", None) or "-"
|
|
347
|
-
|
|
348
|
-
# Update existing row or add new one
|
|
349
|
-
if exp_id in existing_keys:
|
|
350
|
-
table.update_cell(exp_id, "id", exp_id, update_width=True)
|
|
351
|
-
table.update_cell(exp_id, "host", hostname, update_width=True)
|
|
352
|
-
table.update_cell(exp_id, "jobs", jobs_text, update_width=True)
|
|
353
|
-
table.update_cell(exp_id, "status", status, update_width=True)
|
|
354
|
-
table.update_cell(exp_id, "started", started, update_width=True)
|
|
355
|
-
table.update_cell(exp_id, "duration", duration, update_width=True)
|
|
356
|
-
else:
|
|
357
|
-
table.add_row(
|
|
358
|
-
exp_id, hostname, jobs_text, status, started, duration, key=exp_id
|
|
359
|
-
)
|
|
360
|
-
|
|
361
|
-
# Remove rows for experiments that no longer exist
|
|
362
|
-
for old_exp_id in existing_keys - current_exp_ids:
|
|
363
|
-
table.remove_row(old_exp_id)
|
|
364
|
-
|
|
365
|
-
# Update collapsed header if viewing an experiment
|
|
366
|
-
if self.collapsed and self.current_experiment:
|
|
367
|
-
self._update_collapsed_header(self.current_experiment)
|
|
368
|
-
|
|
369
|
-
def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
|
|
370
|
-
"""Handle experiment selection"""
|
|
371
|
-
if event.row_key:
|
|
372
|
-
self.current_experiment = str(event.row_key.value)
|
|
373
|
-
self.collapse_to_experiment(self.current_experiment)
|
|
374
|
-
self.post_message(ExperimentSelected(str(event.row_key.value)))
|
|
375
|
-
|
|
376
|
-
def _update_collapsed_header(self, experiment_id: str) -> None:
|
|
377
|
-
"""Update the collapsed experiment header with current stats"""
|
|
378
|
-
exp_info = next(
|
|
379
|
-
(exp for exp in self.experiments if exp.experiment_id == experiment_id),
|
|
380
|
-
None,
|
|
381
|
-
)
|
|
382
|
-
if not exp_info:
|
|
383
|
-
return
|
|
384
|
-
|
|
385
|
-
total = exp_info.total_jobs
|
|
386
|
-
finished = exp_info.finished_jobs
|
|
387
|
-
failed = exp_info.failed_jobs
|
|
388
|
-
|
|
389
|
-
if failed > 0:
|
|
390
|
-
status = f"❌ {failed} failed"
|
|
391
|
-
elif finished == total and total > 0:
|
|
392
|
-
status = "✓ Done"
|
|
393
|
-
elif finished < total:
|
|
394
|
-
status = f"▶ {finished}/{total}"
|
|
395
|
-
else:
|
|
396
|
-
status = "Empty"
|
|
397
|
-
|
|
398
|
-
collapsed_label = self.query_one("#collapsed-experiment-info", Label)
|
|
399
|
-
collapsed_label.update(f"📊 {experiment_id} - {status} (click to go back)")
|
|
400
|
-
|
|
401
|
-
def collapse_to_experiment(self, experiment_id: str) -> None:
|
|
402
|
-
"""Collapse the experiments list to show only the selected experiment"""
|
|
403
|
-
self._update_collapsed_header(experiment_id)
|
|
404
|
-
|
|
405
|
-
# Hide table, show collapsed header
|
|
406
|
-
self.query_one("#experiments-table-container").add_class("hidden")
|
|
407
|
-
self.query_one("#collapsed-header").remove_class("hidden")
|
|
408
|
-
self.collapsed = True
|
|
409
|
-
|
|
410
|
-
def expand_experiments(self) -> None:
|
|
411
|
-
"""Expand back to full experiments list"""
|
|
412
|
-
# Show table, hide collapsed header
|
|
413
|
-
self.query_one("#collapsed-header").add_class("hidden")
|
|
414
|
-
self.query_one("#experiments-table-container").remove_class("hidden")
|
|
415
|
-
self.collapsed = False
|
|
416
|
-
self.current_experiment = None
|
|
417
|
-
|
|
418
|
-
# Focus the experiments table
|
|
419
|
-
table = self.query_one("#experiments-table", DataTable)
|
|
420
|
-
table.focus()
|
|
421
|
-
|
|
422
|
-
def on_click(self) -> None:
|
|
423
|
-
"""Handle clicks on the widget"""
|
|
424
|
-
if self.collapsed:
|
|
425
|
-
self.expand_experiments()
|
|
426
|
-
self.post_message(ExperimentDeselected())
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
class ExperimentSelected(Message):
|
|
430
|
-
"""Message sent when an experiment is selected"""
|
|
431
|
-
|
|
432
|
-
def __init__(self, experiment_id: str) -> None:
|
|
433
|
-
super().__init__()
|
|
434
|
-
self.experiment_id = experiment_id
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
class ExperimentDeselected(Message):
|
|
438
|
-
"""Message sent when an experiment is deselected"""
|
|
439
|
-
|
|
440
|
-
pass
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
class JobSelected(Message):
|
|
444
|
-
"""Message sent when a job is selected"""
|
|
445
|
-
|
|
446
|
-
def __init__(self, job_id: str, experiment_id: str) -> None:
|
|
447
|
-
super().__init__()
|
|
448
|
-
self.job_id = job_id
|
|
449
|
-
self.experiment_id = experiment_id
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
class JobDeselected(Message):
|
|
453
|
-
"""Message sent when returning from job detail view"""
|
|
454
|
-
|
|
455
|
-
pass
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
class ViewJobLogs(Message):
|
|
459
|
-
"""Message sent when user wants to view job logs"""
|
|
460
|
-
|
|
461
|
-
def __init__(self, job_path: str, task_id: str) -> None:
|
|
462
|
-
super().__init__()
|
|
463
|
-
self.job_path = job_path
|
|
464
|
-
self.task_id = task_id
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
class ViewJobLogsRequest(Message):
|
|
468
|
-
"""Message sent when user requests to view logs from jobs table"""
|
|
469
|
-
|
|
470
|
-
def __init__(self, job_id: str, experiment_id: str) -> None:
|
|
471
|
-
super().__init__()
|
|
472
|
-
self.job_id = job_id
|
|
473
|
-
self.experiment_id = experiment_id
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
class LogsSyncComplete(Message):
|
|
477
|
-
"""Message sent when remote log sync is complete"""
|
|
478
|
-
|
|
479
|
-
def __init__(self, log_files: list, job_id: str) -> None:
|
|
480
|
-
super().__init__()
|
|
481
|
-
self.log_files = log_files
|
|
482
|
-
self.job_id = job_id
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
class LogsSyncFailed(Message):
|
|
486
|
-
"""Message sent when remote log sync fails"""
|
|
487
|
-
|
|
488
|
-
def __init__(self, error: str) -> None:
|
|
489
|
-
super().__init__()
|
|
490
|
-
self.error = error
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
class DeleteJobRequest(Message):
|
|
494
|
-
"""Message sent when user requests to delete a job"""
|
|
495
|
-
|
|
496
|
-
def __init__(self, job_id: str, experiment_id: str) -> None:
|
|
497
|
-
super().__init__()
|
|
498
|
-
self.job_id = job_id
|
|
499
|
-
self.experiment_id = experiment_id
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
class DeleteExperimentRequest(Message):
|
|
503
|
-
"""Message sent when user requests to delete an experiment"""
|
|
504
|
-
|
|
505
|
-
def __init__(self, experiment_id: str) -> None:
|
|
506
|
-
super().__init__()
|
|
507
|
-
self.experiment_id = experiment_id
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
class KillJobRequest(Message):
|
|
511
|
-
"""Message sent when user requests to kill a running job"""
|
|
512
|
-
|
|
513
|
-
def __init__(self, job_id: str, experiment_id: str) -> None:
|
|
514
|
-
super().__init__()
|
|
515
|
-
self.job_id = job_id
|
|
516
|
-
self.experiment_id = experiment_id
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
class KillExperimentRequest(Message):
|
|
520
|
-
"""Message sent when user requests to kill all running jobs in an experiment"""
|
|
521
|
-
|
|
522
|
-
def __init__(self, experiment_id: str) -> None:
|
|
523
|
-
super().__init__()
|
|
524
|
-
self.experiment_id = experiment_id
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
class FilterChanged(Message):
|
|
528
|
-
"""Message sent when search filter changes"""
|
|
529
|
-
|
|
530
|
-
def __init__(self, filter_fn) -> None:
|
|
531
|
-
super().__init__()
|
|
532
|
-
self.filter_fn = filter_fn
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
class ServicesList(Vertical):
|
|
536
|
-
"""Widget displaying services for selected experiment
|
|
537
|
-
|
|
538
|
-
Services are retrieved from WorkspaceStateProvider.get_services() which
|
|
539
|
-
abstracts away whether services are live (from scheduler) or recreated
|
|
540
|
-
from database state_dict. The UI treats all services uniformly.
|
|
541
|
-
"""
|
|
542
|
-
|
|
543
|
-
BINDINGS = [
|
|
544
|
-
Binding("s", "start_service", "Start"),
|
|
545
|
-
Binding("x", "stop_service", "Stop"),
|
|
546
|
-
Binding("u", "copy_url", "Copy URL", show=False),
|
|
547
|
-
]
|
|
548
|
-
|
|
549
|
-
# State icons for display
|
|
550
|
-
STATE_ICONS = {
|
|
551
|
-
"STOPPED": "⏹",
|
|
552
|
-
"STARTING": "⏳",
|
|
553
|
-
"RUNNING": "▶",
|
|
554
|
-
"STOPPING": "⏳",
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
def __init__(self, state_provider: WorkspaceStateProvider) -> None:
|
|
558
|
-
super().__init__()
|
|
559
|
-
self.state_provider = state_provider
|
|
560
|
-
self.current_experiment: Optional[str] = None
|
|
561
|
-
self._services: dict = {} # service_id -> Service object
|
|
562
|
-
|
|
563
|
-
def compose(self) -> ComposeResult:
|
|
564
|
-
yield DataTable(id="services-table", cursor_type="row")
|
|
565
|
-
|
|
566
|
-
def on_mount(self) -> None:
|
|
567
|
-
"""Set up the services table"""
|
|
568
|
-
table = self.query_one("#services-table", DataTable)
|
|
569
|
-
table.add_columns("ID", "Description", "State", "URL")
|
|
570
|
-
table.cursor_type = "row"
|
|
571
|
-
|
|
572
|
-
def set_experiment(self, experiment_id: Optional[str]) -> None:
|
|
573
|
-
"""Set the current experiment and refresh services"""
|
|
574
|
-
self.current_experiment = experiment_id
|
|
575
|
-
self.refresh_services()
|
|
576
|
-
|
|
577
|
-
def refresh_services(self) -> None:
|
|
578
|
-
"""Refresh the services list from state provider"""
|
|
579
|
-
table = self.query_one("#services-table", DataTable)
|
|
580
|
-
table.clear()
|
|
581
|
-
self._services = {}
|
|
582
|
-
|
|
583
|
-
if not self.current_experiment:
|
|
584
|
-
return
|
|
585
|
-
|
|
586
|
-
# Get services from state provider (handles live vs DB automatically)
|
|
587
|
-
services = self.state_provider.get_services(self.current_experiment)
|
|
588
|
-
self.log.info(
|
|
589
|
-
f"refresh_services got {len(services)} services: "
|
|
590
|
-
f"{[(s.id, id(s), getattr(s, 'url', None)) for s in services]}"
|
|
591
|
-
)
|
|
592
|
-
|
|
593
|
-
for service in services:
|
|
594
|
-
service_id = service.id
|
|
595
|
-
self._services[service_id] = service
|
|
596
|
-
|
|
597
|
-
state_name = service.state.name if hasattr(service, "state") else "UNKNOWN"
|
|
598
|
-
state_icon = self.STATE_ICONS.get(state_name, "?")
|
|
599
|
-
url = getattr(service, "url", None) or "-"
|
|
600
|
-
description = (
|
|
601
|
-
service.description() if hasattr(service, "description") else ""
|
|
602
|
-
)
|
|
603
|
-
|
|
604
|
-
table.add_row(
|
|
605
|
-
service_id,
|
|
606
|
-
description,
|
|
607
|
-
f"{state_icon} {state_name}",
|
|
608
|
-
url,
|
|
609
|
-
key=service_id,
|
|
610
|
-
)
|
|
611
|
-
|
|
612
|
-
def _get_selected_service(self):
|
|
613
|
-
"""Get the currently selected Service object"""
|
|
614
|
-
table = self.query_one("#services-table", DataTable)
|
|
615
|
-
if table.cursor_row is not None and table.row_count > 0:
|
|
616
|
-
row_key = list(table.rows.keys())[table.cursor_row]
|
|
617
|
-
if row_key:
|
|
618
|
-
service_id = str(row_key.value)
|
|
619
|
-
return self._services.get(service_id)
|
|
620
|
-
return None
|
|
621
|
-
|
|
622
|
-
def action_start_service(self) -> None:
|
|
623
|
-
"""Start the selected service"""
|
|
624
|
-
service = self._get_selected_service()
|
|
625
|
-
if not service:
|
|
626
|
-
return
|
|
627
|
-
|
|
628
|
-
self.log.info(f"Starting service {service.id} (id={id(service)})")
|
|
629
|
-
|
|
630
|
-
try:
|
|
631
|
-
if hasattr(service, "get_url"):
|
|
632
|
-
url = service.get_url()
|
|
633
|
-
self.log.info(f"Service started, url={url}, service.url={service.url}")
|
|
634
|
-
self.notify(f"Service started: {url}", severity="information")
|
|
635
|
-
else:
|
|
636
|
-
self.notify("Service does not support starting", severity="warning")
|
|
637
|
-
self.refresh_services()
|
|
638
|
-
except Exception as e:
|
|
639
|
-
self.notify(f"Failed to start service: {e}", severity="error")
|
|
640
|
-
|
|
641
|
-
def action_stop_service(self) -> None:
|
|
642
|
-
"""Stop the selected service"""
|
|
643
|
-
service = self._get_selected_service()
|
|
644
|
-
if not service:
|
|
645
|
-
return
|
|
646
|
-
|
|
647
|
-
from experimaestro.scheduler.services import ServiceState
|
|
648
|
-
|
|
649
|
-
if service.state == ServiceState.STOPPED:
|
|
650
|
-
self.notify("Service is not running", severity="warning")
|
|
651
|
-
return
|
|
652
|
-
|
|
653
|
-
try:
|
|
654
|
-
if hasattr(service, "stop"):
|
|
655
|
-
service.stop()
|
|
656
|
-
self.notify(f"Service stopped: {service.id}", severity="information")
|
|
657
|
-
else:
|
|
658
|
-
self.notify("Service does not support stopping", severity="warning")
|
|
659
|
-
self.refresh_services()
|
|
660
|
-
except Exception as e:
|
|
661
|
-
self.notify(f"Failed to stop service: {e}", severity="error")
|
|
662
|
-
|
|
663
|
-
def action_copy_url(self) -> None:
|
|
664
|
-
"""Copy the service URL to clipboard"""
|
|
665
|
-
service = self._get_selected_service()
|
|
666
|
-
if not service:
|
|
667
|
-
return
|
|
668
|
-
|
|
669
|
-
url = getattr(service, "url", None)
|
|
670
|
-
if url:
|
|
671
|
-
try:
|
|
672
|
-
import pyperclip
|
|
673
|
-
|
|
674
|
-
pyperclip.copy(url)
|
|
675
|
-
self.notify(f"URL copied: {url}", severity="information")
|
|
676
|
-
except Exception as e:
|
|
677
|
-
self.notify(f"Failed to copy: {e}", severity="error")
|
|
678
|
-
else:
|
|
679
|
-
self.notify("Start the service first to get URL", severity="warning")
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
class JobDetailView(Widget):
|
|
683
|
-
"""Widget displaying detailed job information"""
|
|
684
|
-
|
|
685
|
-
BINDINGS = [
|
|
686
|
-
Binding("l", "view_logs", "View Logs", priority=True),
|
|
687
|
-
]
|
|
688
|
-
|
|
689
|
-
def __init__(self, state_provider: WorkspaceStateProvider) -> None:
|
|
690
|
-
super().__init__()
|
|
691
|
-
self.state_provider = state_provider
|
|
692
|
-
self.current_job_id: Optional[str] = None
|
|
693
|
-
self.current_experiment_id: Optional[str] = None
|
|
694
|
-
self.job_data: Optional[dict] = None
|
|
695
|
-
|
|
696
|
-
def compose(self) -> ComposeResult:
|
|
697
|
-
yield Label("Job Details", classes="section-title")
|
|
698
|
-
with Vertical(id="job-detail-content"):
|
|
699
|
-
yield Label("", id="job-id-label")
|
|
700
|
-
yield Label("", id="job-task-label")
|
|
701
|
-
yield Label("", id="job-status-label")
|
|
702
|
-
yield Label("", id="job-path-label")
|
|
703
|
-
yield Label("", id="job-times-label")
|
|
704
|
-
yield Label("Tags:", classes="subsection-title")
|
|
705
|
-
yield Label("", id="job-tags-label")
|
|
706
|
-
yield Label("Progress:", classes="subsection-title")
|
|
707
|
-
yield Label("", id="job-progress-label")
|
|
708
|
-
yield Label("", id="job-logs-hint")
|
|
709
|
-
|
|
710
|
-
def action_view_logs(self) -> None:
|
|
711
|
-
"""View job logs with toolong"""
|
|
712
|
-
if self.job_data and self.job_data.path and self.job_data.task_id:
|
|
713
|
-
self.post_message(
|
|
714
|
-
ViewJobLogs(str(self.job_data.path), self.job_data.task_id)
|
|
715
|
-
)
|
|
716
|
-
|
|
717
|
-
def set_job(self, job_id: str, experiment_id: str) -> None:
|
|
718
|
-
"""Set the job to display"""
|
|
719
|
-
self.current_job_id = job_id
|
|
720
|
-
self.current_experiment_id = experiment_id
|
|
721
|
-
self.refresh_job_detail()
|
|
722
|
-
|
|
723
|
-
def refresh_job_detail(self) -> None:
|
|
724
|
-
"""Refresh job details from state provider"""
|
|
725
|
-
if not self.current_job_id or not self.current_experiment_id:
|
|
726
|
-
return
|
|
727
|
-
|
|
728
|
-
job = self.state_provider.get_job(
|
|
729
|
-
self.current_job_id, self.current_experiment_id
|
|
730
|
-
)
|
|
731
|
-
if not job:
|
|
732
|
-
self.log(f"Job not found: {self.current_job_id}")
|
|
733
|
-
return
|
|
734
|
-
|
|
735
|
-
self.job_data = job
|
|
736
|
-
|
|
737
|
-
# Update labels
|
|
738
|
-
self.query_one("#job-id-label", Label).update(f"Job ID: {job.identifier}")
|
|
739
|
-
self.query_one("#job-task-label", Label).update(f"Task: {job.task_id}")
|
|
740
|
-
|
|
741
|
-
# Format status with icon and name
|
|
742
|
-
status_name = job.state.name if job.state else "unknown"
|
|
743
|
-
failure_reason = getattr(job, "failure_reason", None)
|
|
744
|
-
status_icon = get_status_icon(status_name, failure_reason)
|
|
745
|
-
status_text = f"{status_icon} {status_name}"
|
|
746
|
-
if failure_reason:
|
|
747
|
-
status_text += f" ({failure_reason.name})"
|
|
748
|
-
|
|
749
|
-
self.query_one("#job-status-label", Label).update(f"Status: {status_text}")
|
|
750
|
-
|
|
751
|
-
# Path (from locator)
|
|
752
|
-
locator = job.locator or "-"
|
|
753
|
-
self.query_one("#job-path-label", Label).update(f"Locator: {locator}")
|
|
754
|
-
|
|
755
|
-
# Times - format timestamps
|
|
756
|
-
from datetime import datetime
|
|
757
|
-
import time as time_module
|
|
758
|
-
|
|
759
|
-
def format_time(ts):
|
|
760
|
-
if ts:
|
|
761
|
-
return datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
|
|
762
|
-
return "-"
|
|
763
|
-
|
|
764
|
-
submitted = format_time(job.submittime)
|
|
765
|
-
start = format_time(job.starttime)
|
|
766
|
-
end = format_time(job.endtime)
|
|
767
|
-
|
|
768
|
-
# Calculate duration
|
|
769
|
-
duration = "-"
|
|
770
|
-
if job.starttime:
|
|
771
|
-
if job.endtime:
|
|
772
|
-
duration = format_duration(job.endtime - job.starttime)
|
|
773
|
-
else:
|
|
774
|
-
duration = (
|
|
775
|
-
format_duration(time_module.time() - job.starttime) + " (running)"
|
|
776
|
-
)
|
|
777
|
-
|
|
778
|
-
times_text = f"Submitted: {submitted} | Start: {start} | End: {end} | Duration: {duration}"
|
|
779
|
-
self.query_one("#job-times-label", Label).update(times_text)
|
|
780
|
-
|
|
781
|
-
# Tags - job.tags is now a dict
|
|
782
|
-
tags = job.tags
|
|
783
|
-
if tags:
|
|
784
|
-
tags_text = ", ".join(f"{k}={v}" for k, v in tags.items())
|
|
785
|
-
else:
|
|
786
|
-
tags_text = "(no tags)"
|
|
787
|
-
self.query_one("#job-tags-label", Label).update(tags_text)
|
|
788
|
-
|
|
789
|
-
# Progress
|
|
790
|
-
progress_list = job.progress or []
|
|
791
|
-
if progress_list:
|
|
792
|
-
progress_lines = []
|
|
793
|
-
for p in progress_list:
|
|
794
|
-
level = p.get("level", 0)
|
|
795
|
-
pct = p.get("progress", 0) * 100
|
|
796
|
-
desc = p.get("desc", "")
|
|
797
|
-
indent = " " * level
|
|
798
|
-
progress_lines.append(f"{indent}{pct:.1f}% {desc}")
|
|
799
|
-
progress_text = "\n".join(progress_lines) if progress_lines else "-"
|
|
800
|
-
else:
|
|
801
|
-
progress_text = "-"
|
|
802
|
-
self.query_one("#job-progress-label", Label).update(progress_text)
|
|
803
|
-
|
|
804
|
-
# Log files hint - log files are named after the last part of the task ID
|
|
805
|
-
job_path = job.path
|
|
806
|
-
task_id = job.task_id
|
|
807
|
-
if job_path and task_id:
|
|
808
|
-
# Extract the last component of the task ID (e.g., "evaluate" from "mnist_xp.learn.evaluate")
|
|
809
|
-
task_name = task_id.split(".")[-1]
|
|
810
|
-
stdout_path = job_path / f"{task_name}.out"
|
|
811
|
-
stderr_path = job_path / f"{task_name}.err"
|
|
812
|
-
logs_exist = stdout_path.exists() or stderr_path.exists()
|
|
813
|
-
if logs_exist:
|
|
814
|
-
self.query_one("#job-logs-hint", Label).update(
|
|
815
|
-
"[bold cyan]Press 'l' to view logs[/bold cyan]"
|
|
816
|
-
)
|
|
817
|
-
else:
|
|
818
|
-
self.query_one("#job-logs-hint", Label).update("(no log files found)")
|
|
819
|
-
else:
|
|
820
|
-
self.query_one("#job-logs-hint", Label).update("")
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
class SearchBar(Widget):
|
|
824
|
-
"""Search bar widget with filter hints for filtering jobs"""
|
|
825
|
-
|
|
826
|
-
visible: reactive[bool] = reactive(False)
|
|
827
|
-
_keep_filter: bool = False # Flag to keep filter when hiding
|
|
828
|
-
_query_valid: bool = False # Track if current query is valid
|
|
829
|
-
|
|
830
|
-
def __init__(self) -> None:
|
|
831
|
-
super().__init__()
|
|
832
|
-
self.filter_fn = None
|
|
833
|
-
self.active_query = "" # Store the active query text
|
|
834
|
-
|
|
835
|
-
def compose(self) -> ComposeResult:
|
|
836
|
-
# Active filter indicator (shown when filter active but bar hidden)
|
|
837
|
-
yield Static("", id="active-filter")
|
|
838
|
-
# Search input container
|
|
839
|
-
with Vertical(id="search-container"):
|
|
840
|
-
yield Input(
|
|
841
|
-
placeholder="Filter: @state = 'done', @name ~ 'pattern', tag = 'value'",
|
|
842
|
-
id="search-input",
|
|
843
|
-
)
|
|
844
|
-
yield Static(
|
|
845
|
-
"Syntax: @state = 'done' | @name ~ 'regex' | tag = 'value' | and/or",
|
|
846
|
-
id="search-hints",
|
|
847
|
-
)
|
|
848
|
-
yield Static("", id="search-error")
|
|
849
|
-
|
|
850
|
-
def on_mount(self) -> None:
|
|
851
|
-
"""Initialize visibility state"""
|
|
852
|
-
# Start with everything hidden
|
|
853
|
-
self.display = False
|
|
854
|
-
self.query_one("#search-container").display = False
|
|
855
|
-
self.query_one("#active-filter").display = False
|
|
856
|
-
self.query_one("#search-error").display = False
|
|
857
|
-
|
|
858
|
-
def watch_visible(self, visible: bool) -> None:
|
|
859
|
-
"""Show/hide search bar"""
|
|
860
|
-
search_container = self.query_one("#search-container")
|
|
861
|
-
active_filter = self.query_one("#active-filter")
|
|
862
|
-
error_widget = self.query_one("#search-error")
|
|
863
|
-
|
|
864
|
-
if visible:
|
|
865
|
-
self.display = True
|
|
866
|
-
search_container.display = True
|
|
867
|
-
active_filter.display = False
|
|
868
|
-
self.query_one("#search-input", Input).focus()
|
|
869
|
-
else:
|
|
870
|
-
if not self._keep_filter:
|
|
871
|
-
self.query_one("#search-input", Input).value = ""
|
|
872
|
-
self.filter_fn = None
|
|
873
|
-
self.active_query = ""
|
|
874
|
-
self._query_valid = False
|
|
875
|
-
self._keep_filter = False
|
|
876
|
-
|
|
877
|
-
# Show/hide based on whether filter is active
|
|
878
|
-
if self.filter_fn is not None:
|
|
879
|
-
# Filter active - show indicator, hide input
|
|
880
|
-
self.display = True
|
|
881
|
-
search_container.display = False
|
|
882
|
-
error_widget.display = False
|
|
883
|
-
active_filter.update(
|
|
884
|
-
f"Filter: {self.active_query} (/ to edit, c to clear)"
|
|
885
|
-
)
|
|
886
|
-
active_filter.display = True
|
|
887
|
-
else:
|
|
888
|
-
# No filter - hide everything including this widget
|
|
889
|
-
self.display = False
|
|
890
|
-
search_container.display = False
|
|
891
|
-
active_filter.display = False
|
|
892
|
-
error_widget.display = False
|
|
893
|
-
|
|
894
|
-
def on_input_changed(self, event: Input.Changed) -> None:
|
|
895
|
-
"""Parse filter expression when input changes"""
|
|
896
|
-
query = event.value.strip()
|
|
897
|
-
input_widget = self.query_one("#search-input", Input)
|
|
898
|
-
error_widget = self.query_one("#search-error", Static)
|
|
899
|
-
|
|
900
|
-
if not query:
|
|
901
|
-
self.filter_fn = None
|
|
902
|
-
self._query_valid = False
|
|
903
|
-
self.post_message(FilterChanged(None))
|
|
904
|
-
input_widget.remove_class("error")
|
|
905
|
-
input_widget.remove_class("valid")
|
|
906
|
-
error_widget.display = False
|
|
907
|
-
return
|
|
908
|
-
|
|
909
|
-
try:
|
|
910
|
-
from experimaestro.cli.filter import createFilter
|
|
911
|
-
|
|
912
|
-
self.filter_fn = createFilter(query)
|
|
913
|
-
self._query_valid = True
|
|
914
|
-
self.active_query = query
|
|
915
|
-
self.post_message(FilterChanged(self.filter_fn))
|
|
916
|
-
input_widget.remove_class("error")
|
|
917
|
-
input_widget.add_class("valid")
|
|
918
|
-
error_widget.display = False
|
|
919
|
-
except Exception as e:
|
|
920
|
-
self.filter_fn = None
|
|
921
|
-
self._query_valid = False
|
|
922
|
-
self.post_message(FilterChanged(None))
|
|
923
|
-
input_widget.remove_class("valid")
|
|
924
|
-
input_widget.add_class("error")
|
|
925
|
-
error_widget.update(f"Invalid query: {str(e)[:50]}")
|
|
926
|
-
error_widget.display = True
|
|
927
|
-
|
|
928
|
-
def on_input_submitted(self, event: Input.Submitted) -> None:
|
|
929
|
-
"""Apply filter and hide search bar (only if query is valid)"""
|
|
930
|
-
if self._query_valid and self.filter_fn is not None:
|
|
931
|
-
# Set flag to keep filter when hiding
|
|
932
|
-
self._keep_filter = True
|
|
933
|
-
self.visible = False
|
|
934
|
-
# Post message to focus jobs table
|
|
935
|
-
self.post_message(SearchApplied())
|
|
936
|
-
# If invalid, do nothing (keep input focused for correction)
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
class SearchApplied(Message):
|
|
940
|
-
"""Message sent when search filter is applied via Enter"""
|
|
941
|
-
|
|
942
|
-
pass
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
class JobsTable(Vertical):
|
|
946
|
-
"""Widget displaying jobs for selected experiment"""
|
|
947
|
-
|
|
948
|
-
BINDINGS = [
|
|
949
|
-
Binding("d", "delete_job", "Delete", show=False),
|
|
950
|
-
Binding("k", "kill_job", "Kill", show=False),
|
|
951
|
-
Binding("l", "view_logs", "Logs"),
|
|
952
|
-
Binding("f", "copy_path", "Copy Path", show=False),
|
|
953
|
-
Binding("/", "toggle_search", "Search"),
|
|
954
|
-
Binding("c", "clear_filter", "Clear", show=False),
|
|
955
|
-
Binding("r", "refresh_live", "Refresh"),
|
|
956
|
-
Binding("S", "sort_by_status", "Sort ⚑", show=False),
|
|
957
|
-
Binding("T", "sort_by_task", "Sort Task", show=False),
|
|
958
|
-
Binding("D", "sort_by_submitted", "Sort Date", show=False),
|
|
959
|
-
Binding("escape", "clear_search", show=False, priority=True),
|
|
960
|
-
]
|
|
961
|
-
|
|
962
|
-
# Track current sort state
|
|
963
|
-
_sort_column: Optional[str] = None
|
|
964
|
-
_sort_reverse: bool = False
|
|
965
|
-
_needs_rebuild: bool = True # Start with rebuild needed
|
|
966
|
-
|
|
967
|
-
def __init__(self, state_provider: WorkspaceStateProvider) -> None:
|
|
968
|
-
super().__init__()
|
|
969
|
-
self.state_provider = state_provider
|
|
970
|
-
self.filter_fn = None
|
|
971
|
-
self.current_experiment: Optional[str] = None
|
|
972
|
-
|
|
973
|
-
def compose(self) -> ComposeResult:
|
|
974
|
-
yield SearchBar()
|
|
975
|
-
yield DataTable(id="jobs-table", cursor_type="row")
|
|
976
|
-
|
|
977
|
-
def action_toggle_search(self) -> None:
|
|
978
|
-
"""Toggle search bar visibility"""
|
|
979
|
-
search_bar = self.query_one(SearchBar)
|
|
980
|
-
search_bar.visible = not search_bar.visible
|
|
981
|
-
|
|
982
|
-
def action_clear_filter(self) -> None:
|
|
983
|
-
"""Clear the active filter"""
|
|
984
|
-
if self.filter_fn is not None:
|
|
985
|
-
search_bar = self.query_one(SearchBar)
|
|
986
|
-
search_bar.query_one("#search-input", Input).value = ""
|
|
987
|
-
search_bar.filter_fn = None
|
|
988
|
-
search_bar.active_query = ""
|
|
989
|
-
search_bar._query_valid = False
|
|
990
|
-
# Hide the SearchBar completely
|
|
991
|
-
search_bar.display = False
|
|
992
|
-
search_bar.query_one("#search-container").display = False
|
|
993
|
-
search_bar.query_one("#active-filter").display = False
|
|
994
|
-
search_bar.query_one("#search-error").display = False
|
|
995
|
-
self.filter_fn = None
|
|
996
|
-
self.refresh_jobs()
|
|
997
|
-
self.notify("Filter cleared", severity="information")
|
|
998
|
-
|
|
999
|
-
def action_sort_by_status(self) -> None:
|
|
1000
|
-
"""Sort jobs by status"""
|
|
1001
|
-
if self._sort_column == "status":
|
|
1002
|
-
self._sort_reverse = not self._sort_reverse
|
|
1003
|
-
else:
|
|
1004
|
-
self._sort_column = "status"
|
|
1005
|
-
self._sort_reverse = False
|
|
1006
|
-
self._needs_rebuild = True
|
|
1007
|
-
self._update_column_headers()
|
|
1008
|
-
self.refresh_jobs()
|
|
1009
|
-
order = "desc" if self._sort_reverse else "asc"
|
|
1010
|
-
self.notify(f"Sorted by status ({order})", severity="information")
|
|
1011
|
-
|
|
1012
|
-
def action_sort_by_task(self) -> None:
|
|
1013
|
-
"""Sort jobs by task"""
|
|
1014
|
-
if self._sort_column == "task":
|
|
1015
|
-
self._sort_reverse = not self._sort_reverse
|
|
1016
|
-
else:
|
|
1017
|
-
self._sort_column = "task"
|
|
1018
|
-
self._sort_reverse = False
|
|
1019
|
-
self._needs_rebuild = True
|
|
1020
|
-
self._update_column_headers()
|
|
1021
|
-
self.refresh_jobs()
|
|
1022
|
-
order = "desc" if self._sort_reverse else "asc"
|
|
1023
|
-
self.notify(f"Sorted by task ({order})", severity="information")
|
|
1024
|
-
|
|
1025
|
-
def action_sort_by_submitted(self) -> None:
|
|
1026
|
-
"""Sort jobs by submission time"""
|
|
1027
|
-
if self._sort_column == "submitted":
|
|
1028
|
-
self._sort_reverse = not self._sort_reverse
|
|
1029
|
-
else:
|
|
1030
|
-
self._sort_column = "submitted"
|
|
1031
|
-
self._sort_reverse = False
|
|
1032
|
-
self._needs_rebuild = True
|
|
1033
|
-
self._update_column_headers()
|
|
1034
|
-
self.refresh_jobs()
|
|
1035
|
-
order = "newest first" if self._sort_reverse else "oldest first"
|
|
1036
|
-
self.notify(f"Sorted by date ({order})", severity="information")
|
|
1037
|
-
|
|
1038
|
-
def action_clear_search(self) -> None:
|
|
1039
|
-
"""Handle escape: hide search bar if visible, or go back"""
|
|
1040
|
-
search_bar = self.query_one(SearchBar)
|
|
1041
|
-
if search_bar.visible:
|
|
1042
|
-
# Search bar visible - hide it and clear filter
|
|
1043
|
-
search_bar.visible = False
|
|
1044
|
-
self.filter_fn = None
|
|
1045
|
-
self.refresh_jobs()
|
|
1046
|
-
# Focus the jobs table
|
|
1047
|
-
self.query_one("#jobs-table", DataTable).focus()
|
|
1048
|
-
else:
|
|
1049
|
-
# Search bar hidden - go back (keep filter)
|
|
1050
|
-
self.app.action_go_back()
|
|
1051
|
-
|
|
1052
|
-
def action_refresh_live(self) -> None:
|
|
1053
|
-
"""Refresh the jobs table"""
|
|
1054
|
-
self.refresh_jobs()
|
|
1055
|
-
self.notify("Jobs refreshed", severity="information")
|
|
1056
|
-
|
|
1057
|
-
def on_filter_changed(self, message: FilterChanged) -> None:
|
|
1058
|
-
"""Apply new filter"""
|
|
1059
|
-
self.filter_fn = message.filter_fn
|
|
1060
|
-
self.refresh_jobs()
|
|
1061
|
-
|
|
1062
|
-
def on_search_applied(self, message: SearchApplied) -> None:
|
|
1063
|
-
"""Focus jobs table when search is applied"""
|
|
1064
|
-
self.query_one("#jobs-table", DataTable).focus()
|
|
1065
|
-
|
|
1066
|
-
def _get_selected_job_id(self) -> Optional[str]:
|
|
1067
|
-
"""Get the job ID from the currently selected row"""
|
|
1068
|
-
table = self.query_one("#jobs-table", DataTable)
|
|
1069
|
-
if table.cursor_row is None:
|
|
1070
|
-
return None
|
|
1071
|
-
row_key = table.get_row_at(table.cursor_row)
|
|
1072
|
-
if row_key:
|
|
1073
|
-
# The first column is job_id
|
|
1074
|
-
return str(table.get_row_at(table.cursor_row)[0])
|
|
1075
|
-
return None
|
|
1076
|
-
|
|
1077
|
-
def action_delete_job(self) -> None:
|
|
1078
|
-
"""Request to delete the selected job"""
|
|
1079
|
-
table = self.query_one("#jobs-table", DataTable)
|
|
1080
|
-
if table.cursor_row is None or not self.current_experiment:
|
|
1081
|
-
return
|
|
1082
|
-
|
|
1083
|
-
# Get job ID from the row key
|
|
1084
|
-
row_key = list(table.rows.keys())[table.cursor_row]
|
|
1085
|
-
if row_key:
|
|
1086
|
-
job_id = str(row_key.value)
|
|
1087
|
-
self.post_message(DeleteJobRequest(job_id, self.current_experiment))
|
|
1088
|
-
|
|
1089
|
-
def action_kill_job(self) -> None:
|
|
1090
|
-
"""Request to kill the selected job"""
|
|
1091
|
-
table = self.query_one("#jobs-table", DataTable)
|
|
1092
|
-
if table.cursor_row is None or not self.current_experiment:
|
|
1093
|
-
return
|
|
1094
|
-
|
|
1095
|
-
row_key = list(table.rows.keys())[table.cursor_row]
|
|
1096
|
-
if row_key:
|
|
1097
|
-
job_id = str(row_key.value)
|
|
1098
|
-
self.post_message(KillJobRequest(job_id, self.current_experiment))
|
|
1099
|
-
|
|
1100
|
-
def action_view_logs(self) -> None:
|
|
1101
|
-
"""Request to view logs for the selected job"""
|
|
1102
|
-
table = self.query_one("#jobs-table", DataTable)
|
|
1103
|
-
if table.cursor_row is None or not self.current_experiment:
|
|
1104
|
-
return
|
|
1105
|
-
|
|
1106
|
-
row_key = list(table.rows.keys())[table.cursor_row]
|
|
1107
|
-
if row_key:
|
|
1108
|
-
job_id = str(row_key.value)
|
|
1109
|
-
self.post_message(ViewJobLogsRequest(job_id, self.current_experiment))
|
|
1110
|
-
|
|
1111
|
-
def action_copy_path(self) -> None:
|
|
1112
|
-
"""Copy the job folder path to clipboard"""
|
|
1113
|
-
import pyperclip
|
|
1114
|
-
|
|
1115
|
-
table = self.query_one("#jobs-table", DataTable)
|
|
1116
|
-
if table.cursor_row is None or not self.current_experiment:
|
|
1117
|
-
return
|
|
1118
|
-
|
|
1119
|
-
row_key = list(table.rows.keys())[table.cursor_row]
|
|
1120
|
-
if row_key:
|
|
1121
|
-
job_id = str(row_key.value)
|
|
1122
|
-
job = self.state_provider.get_job(job_id, self.current_experiment)
|
|
1123
|
-
if job and job.path:
|
|
1124
|
-
try:
|
|
1125
|
-
pyperclip.copy(str(job.path))
|
|
1126
|
-
self.notify(f"Path copied: {job.path}", severity="information")
|
|
1127
|
-
except Exception as e:
|
|
1128
|
-
self.notify(f"Failed to copy: {e}", severity="error")
|
|
1129
|
-
else:
|
|
1130
|
-
self.notify("No path available for this job", severity="warning")
|
|
1131
|
-
|
|
1132
|
-
# Status sort order (for sorting by status)
|
|
1133
|
-
STATUS_ORDER = {
|
|
1134
|
-
"running": 0,
|
|
1135
|
-
"waiting": 1,
|
|
1136
|
-
"error": 2,
|
|
1137
|
-
"done": 3,
|
|
1138
|
-
"unscheduled": 4,
|
|
1139
|
-
"phantom": 5,
|
|
1140
|
-
}
|
|
1141
|
-
|
|
1142
|
-
# Failure reason sort order (within error status)
|
|
1143
|
-
# More actionable failures first
|
|
1144
|
-
FAILURE_ORDER = {
|
|
1145
|
-
"TIMEOUT": 0, # Might just need retry
|
|
1146
|
-
"MEMORY": 1, # Might need resource adjustment
|
|
1147
|
-
"DEPENDENCY": 2, # Need to fix upstream job first
|
|
1148
|
-
"FAILED": 3, # Generic failure
|
|
1149
|
-
}
|
|
1150
|
-
|
|
1151
|
-
@classmethod
|
|
1152
|
-
def _get_status_sort_key(cls, job):
|
|
1153
|
-
"""Get sort key for a job based on status and failure reason.
|
|
1154
|
-
|
|
1155
|
-
Returns tuple (status_order, failure_order) for proper sorting.
|
|
1156
|
-
"""
|
|
1157
|
-
state_name = job.state.name if job.state else "unknown"
|
|
1158
|
-
status_order = cls.STATUS_ORDER.get(state_name, 99)
|
|
1159
|
-
|
|
1160
|
-
# For error jobs, also sort by failure reason
|
|
1161
|
-
if state_name == "error":
|
|
1162
|
-
failure_reason = getattr(job, "failure_reason", None)
|
|
1163
|
-
if failure_reason:
|
|
1164
|
-
failure_order = cls.FAILURE_ORDER.get(failure_reason.name, 99)
|
|
1165
|
-
else:
|
|
1166
|
-
failure_order = 99 # Unknown failure at end
|
|
1167
|
-
else:
|
|
1168
|
-
failure_order = 0
|
|
1169
|
-
|
|
1170
|
-
return (status_order, failure_order)
|
|
1171
|
-
|
|
1172
|
-
# Column key to display name mapping
|
|
1173
|
-
COLUMN_LABELS = {
|
|
1174
|
-
"job_id": "ID",
|
|
1175
|
-
"task": "Task",
|
|
1176
|
-
"status": "⚑",
|
|
1177
|
-
"tags": "Tags",
|
|
1178
|
-
"submitted": "Submitted",
|
|
1179
|
-
"duration": "Duration",
|
|
1180
|
-
}
|
|
1181
|
-
|
|
1182
|
-
# Columns that support sorting (column key -> sort column name)
|
|
1183
|
-
SORTABLE_COLUMNS = {
|
|
1184
|
-
"status": "status",
|
|
1185
|
-
"task": "task",
|
|
1186
|
-
"submitted": "submitted",
|
|
1187
|
-
}
|
|
1188
|
-
|
|
1189
|
-
def on_mount(self) -> None:
|
|
1190
|
-
"""Initialize the jobs table"""
|
|
1191
|
-
table = self.query_one("#jobs-table", DataTable)
|
|
1192
|
-
table.add_column("ID", key="job_id")
|
|
1193
|
-
table.add_column("Task", key="task")
|
|
1194
|
-
table.add_column("⚑", key="status", width=6)
|
|
1195
|
-
table.add_column("Tags", key="tags")
|
|
1196
|
-
table.add_column("Submitted", key="submitted")
|
|
1197
|
-
table.add_column("Duration", key="duration")
|
|
1198
|
-
table.cursor_type = "row"
|
|
1199
|
-
table.zebra_stripes = True
|
|
1200
|
-
|
|
1201
|
-
def _update_column_headers(self) -> None:
|
|
1202
|
-
"""Update column headers with sort indicators"""
|
|
1203
|
-
table = self.query_one("#jobs-table", DataTable)
|
|
1204
|
-
for column in table.columns.values():
|
|
1205
|
-
col_key = str(column.key.value) if column.key else None
|
|
1206
|
-
if col_key and col_key in self.COLUMN_LABELS:
|
|
1207
|
-
label = self.COLUMN_LABELS[col_key]
|
|
1208
|
-
sort_col = self.SORTABLE_COLUMNS.get(col_key)
|
|
1209
|
-
if sort_col and self._sort_column == sort_col:
|
|
1210
|
-
# Add sort indicator
|
|
1211
|
-
indicator = "▼" if self._sort_reverse else "▲"
|
|
1212
|
-
new_label = f"{label} {indicator}"
|
|
1213
|
-
else:
|
|
1214
|
-
new_label = label
|
|
1215
|
-
column.label = new_label
|
|
1216
|
-
|
|
1217
|
-
def on_data_table_header_selected(self, event: DataTable.HeaderSelected) -> None:
|
|
1218
|
-
"""Handle column header click for sorting"""
|
|
1219
|
-
col_key = str(event.column_key.value) if event.column_key else None
|
|
1220
|
-
if col_key and col_key in self.SORTABLE_COLUMNS:
|
|
1221
|
-
sort_col = self.SORTABLE_COLUMNS[col_key]
|
|
1222
|
-
if self._sort_column == sort_col:
|
|
1223
|
-
self._sort_reverse = not self._sort_reverse
|
|
1224
|
-
else:
|
|
1225
|
-
self._sort_column = sort_col
|
|
1226
|
-
self._sort_reverse = False
|
|
1227
|
-
self._needs_rebuild = True
|
|
1228
|
-
self._update_column_headers()
|
|
1229
|
-
self.refresh_jobs()
|
|
1230
|
-
|
|
1231
|
-
def set_experiment(self, experiment_id: Optional[str]) -> None:
|
|
1232
|
-
"""Set the current experiment and refresh jobs"""
|
|
1233
|
-
self.current_experiment = experiment_id
|
|
1234
|
-
self.refresh_jobs()
|
|
1235
|
-
|
|
1236
|
-
def refresh_jobs(self) -> None: # noqa: C901
|
|
1237
|
-
"""Refresh the jobs list from state provider"""
|
|
1238
|
-
table = self.query_one("#jobs-table", DataTable)
|
|
1239
|
-
|
|
1240
|
-
if not self.current_experiment:
|
|
1241
|
-
return
|
|
1242
|
-
|
|
1243
|
-
jobs = self.state_provider.get_jobs(self.current_experiment)
|
|
1244
|
-
self.log.debug(
|
|
1245
|
-
f"Refreshing jobs for {self.current_experiment}: {len(jobs)} jobs"
|
|
1246
|
-
)
|
|
1247
|
-
|
|
1248
|
-
# Apply filter if set
|
|
1249
|
-
if self.filter_fn:
|
|
1250
|
-
jobs = [j for j in jobs if self.filter_fn(j)]
|
|
1251
|
-
self.log.debug(f"After filter: {len(jobs)} jobs")
|
|
1252
|
-
|
|
1253
|
-
# Sort jobs based on selected column
|
|
1254
|
-
if self._sort_column == "status":
|
|
1255
|
-
# Sort by status priority, then by failure reason for errors
|
|
1256
|
-
jobs.sort(
|
|
1257
|
-
key=self._get_status_sort_key,
|
|
1258
|
-
reverse=self._sort_reverse,
|
|
1259
|
-
)
|
|
1260
|
-
elif self._sort_column == "task":
|
|
1261
|
-
# Sort by task name
|
|
1262
|
-
jobs.sort(
|
|
1263
|
-
key=lambda j: j.task_id or "",
|
|
1264
|
-
reverse=self._sort_reverse,
|
|
1265
|
-
)
|
|
1266
|
-
else:
|
|
1267
|
-
# Default: sort by submission time (oldest first by default)
|
|
1268
|
-
# Jobs without submittime go to the end
|
|
1269
|
-
jobs.sort(
|
|
1270
|
-
key=lambda j: j.submittime or float("inf"),
|
|
1271
|
-
reverse=self._sort_reverse,
|
|
1272
|
-
)
|
|
1273
|
-
|
|
1274
|
-
# Check if we need to rebuild (new/removed jobs, or status changed when sorting by status)
|
|
1275
|
-
from datetime import datetime
|
|
1276
|
-
import time as time_module
|
|
1277
|
-
|
|
1278
|
-
existing_keys = {str(k.value) for k in table.rows.keys()}
|
|
1279
|
-
current_job_ids = {job.identifier for job in jobs}
|
|
1280
|
-
|
|
1281
|
-
# Check if job set changed
|
|
1282
|
-
jobs_changed = existing_keys != current_job_ids
|
|
1283
|
-
|
|
1284
|
-
# Check if status changed when sorting by status
|
|
1285
|
-
status_changed = False
|
|
1286
|
-
if self._sort_column == "status" and not jobs_changed:
|
|
1287
|
-
current_statuses = {
|
|
1288
|
-
job.identifier: (job.state.name if job.state else "unknown")
|
|
1289
|
-
for job in jobs
|
|
1290
|
-
}
|
|
1291
|
-
if (
|
|
1292
|
-
hasattr(self, "_last_statuses")
|
|
1293
|
-
and self._last_statuses != current_statuses
|
|
1294
|
-
):
|
|
1295
|
-
status_changed = True
|
|
1296
|
-
self._last_statuses = current_statuses
|
|
1297
|
-
|
|
1298
|
-
needs_rebuild = self._needs_rebuild or jobs_changed or status_changed
|
|
1299
|
-
self._needs_rebuild = False
|
|
1300
|
-
|
|
1301
|
-
# Build row data for all jobs
|
|
1302
|
-
rows_data = {}
|
|
1303
|
-
for job in jobs:
|
|
1304
|
-
job_id = job.identifier
|
|
1305
|
-
task_id = job.task_id
|
|
1306
|
-
status = job.state.name if job.state else "unknown"
|
|
1307
|
-
|
|
1308
|
-
# Format status with icon (and progress % if running)
|
|
1309
|
-
if status == "running":
|
|
1310
|
-
progress_list = job.progress or []
|
|
1311
|
-
if progress_list:
|
|
1312
|
-
last_progress = progress_list[-1]
|
|
1313
|
-
progress_pct = last_progress.get("progress", 0) * 100
|
|
1314
|
-
status_text = f"▶ {progress_pct:.0f}%"
|
|
1315
|
-
else:
|
|
1316
|
-
status_text = "▶"
|
|
1317
|
-
else:
|
|
1318
|
-
failure_reason = getattr(job, "failure_reason", None)
|
|
1319
|
-
status_text = get_status_icon(status, failure_reason)
|
|
1320
|
-
|
|
1321
|
-
# Format tags - show all tags on single line
|
|
1322
|
-
tags = job.tags
|
|
1323
|
-
if tags:
|
|
1324
|
-
tags_text = Text()
|
|
1325
|
-
for i, (k, v) in enumerate(tags.items()):
|
|
1326
|
-
if i > 0:
|
|
1327
|
-
tags_text.append(", ")
|
|
1328
|
-
tags_text.append(f"{k}", style="bold")
|
|
1329
|
-
tags_text.append(f"={v}")
|
|
1330
|
-
else:
|
|
1331
|
-
tags_text = Text("-")
|
|
1332
|
-
|
|
1333
|
-
submitted = "-"
|
|
1334
|
-
if job.submittime:
|
|
1335
|
-
submitted = datetime.fromtimestamp(job.submittime).strftime(
|
|
1336
|
-
"%Y-%m-%d %H:%M"
|
|
1337
|
-
)
|
|
1338
|
-
|
|
1339
|
-
# Calculate duration
|
|
1340
|
-
start = job.starttime
|
|
1341
|
-
end = job.endtime
|
|
1342
|
-
duration = "-"
|
|
1343
|
-
if start:
|
|
1344
|
-
if end:
|
|
1345
|
-
elapsed = end - start
|
|
1346
|
-
else:
|
|
1347
|
-
elapsed = time_module.time() - start
|
|
1348
|
-
duration = self._format_duration(elapsed)
|
|
1349
|
-
|
|
1350
|
-
job_id_short = job_id[:7]
|
|
1351
|
-
rows_data[job_id] = (
|
|
1352
|
-
job_id_short,
|
|
1353
|
-
task_id,
|
|
1354
|
-
status_text,
|
|
1355
|
-
tags_text,
|
|
1356
|
-
submitted,
|
|
1357
|
-
duration,
|
|
1358
|
-
)
|
|
1359
|
-
|
|
1360
|
-
if needs_rebuild:
|
|
1361
|
-
# Full rebuild needed - save selection, clear, rebuild
|
|
1362
|
-
selected_key = None
|
|
1363
|
-
if table.cursor_row is not None and table.row_count > 0:
|
|
1364
|
-
try:
|
|
1365
|
-
row_keys = list(table.rows.keys())
|
|
1366
|
-
if table.cursor_row < len(row_keys):
|
|
1367
|
-
selected_key = str(row_keys[table.cursor_row].value)
|
|
1368
|
-
except (IndexError, KeyError):
|
|
1369
|
-
pass
|
|
1370
|
-
|
|
1371
|
-
table.clear()
|
|
1372
|
-
new_cursor_row = None
|
|
1373
|
-
for idx, job in enumerate(jobs):
|
|
1374
|
-
job_id = job.identifier
|
|
1375
|
-
table.add_row(*rows_data[job_id], key=job_id)
|
|
1376
|
-
if selected_key == job_id:
|
|
1377
|
-
new_cursor_row = idx
|
|
1378
|
-
|
|
1379
|
-
if new_cursor_row is not None and table.row_count > 0:
|
|
1380
|
-
table.move_cursor(row=new_cursor_row)
|
|
1381
|
-
else:
|
|
1382
|
-
# Just update cells in place - no reordering needed
|
|
1383
|
-
for job_id, row_data in rows_data.items():
|
|
1384
|
-
(
|
|
1385
|
-
job_id_short,
|
|
1386
|
-
task_id,
|
|
1387
|
-
status_text,
|
|
1388
|
-
tags_text,
|
|
1389
|
-
submitted,
|
|
1390
|
-
duration,
|
|
1391
|
-
) = row_data
|
|
1392
|
-
table.update_cell(job_id, "job_id", job_id_short, update_width=True)
|
|
1393
|
-
table.update_cell(job_id, "task", task_id, update_width=True)
|
|
1394
|
-
table.update_cell(job_id, "status", status_text, update_width=True)
|
|
1395
|
-
table.update_cell(job_id, "tags", tags_text, update_width=True)
|
|
1396
|
-
table.update_cell(job_id, "submitted", submitted, update_width=True)
|
|
1397
|
-
table.update_cell(job_id, "duration", duration, update_width=True)
|
|
1398
|
-
|
|
1399
|
-
self.log.debug(
|
|
1400
|
-
f"Jobs table now has {table.row_count} rows (rebuild={needs_rebuild})"
|
|
1401
|
-
)
|
|
1402
|
-
|
|
1403
|
-
def _format_duration(self, seconds: float) -> str:
|
|
1404
|
-
"""Format duration in seconds to human-readable string"""
|
|
1405
|
-
if seconds < 0:
|
|
1406
|
-
return "-"
|
|
1407
|
-
|
|
1408
|
-
seconds = int(seconds)
|
|
1409
|
-
if seconds < 60:
|
|
1410
|
-
return f"{seconds}s"
|
|
1411
|
-
elif seconds < 3600:
|
|
1412
|
-
minutes = seconds // 60
|
|
1413
|
-
secs = seconds % 60
|
|
1414
|
-
return f"{minutes}m {secs}s"
|
|
1415
|
-
elif seconds < 86400:
|
|
1416
|
-
hours = seconds // 3600
|
|
1417
|
-
minutes = (seconds % 3600) // 60
|
|
1418
|
-
return f"{hours}h {minutes}m"
|
|
1419
|
-
else:
|
|
1420
|
-
days = seconds // 86400
|
|
1421
|
-
hours = (seconds % 86400) // 3600
|
|
1422
|
-
return f"{days}d {hours}h"
|
|
1423
|
-
|
|
1424
|
-
def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
|
|
1425
|
-
"""Handle job selection"""
|
|
1426
|
-
if event.row_key and self.current_experiment:
|
|
1427
|
-
job_id = str(event.row_key.value)
|
|
1428
|
-
self.post_message(JobSelected(job_id, self.current_experiment))
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
class SizeCalculated(Message):
|
|
1432
|
-
"""Message sent when a folder size has been calculated"""
|
|
1433
|
-
|
|
1434
|
-
def __init__(self, job_id: str, size: str, size_bytes: int) -> None:
|
|
1435
|
-
super().__init__()
|
|
1436
|
-
self.job_id = job_id
|
|
1437
|
-
self.size = size
|
|
1438
|
-
self.size_bytes = size_bytes
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
class OrphanJobsScreen(Screen):
|
|
1442
|
-
"""Screen for viewing and managing orphan jobs"""
|
|
1443
|
-
|
|
1444
|
-
BINDINGS = [
|
|
1445
|
-
Binding("d", "delete_selected", "Delete"),
|
|
1446
|
-
Binding("D", "delete_all", "Delete All", key_display="D"),
|
|
1447
|
-
Binding("escape", "go_back", "Back"),
|
|
1448
|
-
Binding("q", "go_back", "Quit"),
|
|
1449
|
-
Binding("r", "refresh", "Refresh"),
|
|
1450
|
-
Binding("f", "copy_path", "Copy Path", show=False),
|
|
1451
|
-
Binding("T", "sort_by_task", "Sort Task", show=False),
|
|
1452
|
-
Binding("Z", "sort_by_size", "Sort Size", show=False),
|
|
1453
|
-
]
|
|
1454
|
-
|
|
1455
|
-
_size_cache: dict = {} # Class-level cache (formatted strings)
|
|
1456
|
-
_size_bytes_cache: dict = {} # Class-level cache (raw bytes for sorting)
|
|
1457
|
-
|
|
1458
|
-
def __init__(self, state_provider: WorkspaceStateProvider) -> None:
|
|
1459
|
-
super().__init__()
|
|
1460
|
-
self.state_provider = state_provider
|
|
1461
|
-
self.orphan_jobs = []
|
|
1462
|
-
self._pending_jobs = [] # Jobs waiting for size calculation
|
|
1463
|
-
self._sort_column: Optional[str] = None
|
|
1464
|
-
self._sort_reverse: bool = False
|
|
1465
|
-
|
|
1466
|
-
def compose(self) -> ComposeResult:
|
|
1467
|
-
yield Header()
|
|
1468
|
-
with Vertical(id="orphan-container"):
|
|
1469
|
-
yield Static("Orphan Jobs", id="orphan-title")
|
|
1470
|
-
yield Static("", id="orphan-stats")
|
|
1471
|
-
yield DataTable(id="orphan-table", cursor_type="row")
|
|
1472
|
-
yield Static("", id="orphan-job-info")
|
|
1473
|
-
yield Footer()
|
|
1474
|
-
|
|
1475
|
-
def on_mount(self) -> None:
|
|
1476
|
-
"""Initialize the orphan jobs table"""
|
|
1477
|
-
table = self.query_one("#orphan-table", DataTable)
|
|
1478
|
-
table.add_column("⚑", key="status", width=3)
|
|
1479
|
-
table.add_column("Job ID", key="job_id", width=10)
|
|
1480
|
-
table.add_column("Task", key="task")
|
|
1481
|
-
table.add_column("Size", key="size", width=10)
|
|
1482
|
-
self.refresh_orphans()
|
|
1483
|
-
|
|
1484
|
-
def action_sort_by_task(self) -> None:
|
|
1485
|
-
"""Sort by task name"""
|
|
1486
|
-
if self._sort_column == "task":
|
|
1487
|
-
self._sort_reverse = not self._sort_reverse
|
|
1488
|
-
else:
|
|
1489
|
-
self._sort_column = "task"
|
|
1490
|
-
self._sort_reverse = False
|
|
1491
|
-
self._rebuild_table()
|
|
1492
|
-
order = "desc" if self._sort_reverse else "asc"
|
|
1493
|
-
self.notify(f"Sorted by task ({order})", severity="information")
|
|
1494
|
-
|
|
1495
|
-
def action_sort_by_size(self) -> None:
|
|
1496
|
-
"""Sort by size"""
|
|
1497
|
-
if self._sort_column == "size":
|
|
1498
|
-
self._sort_reverse = not self._sort_reverse
|
|
1499
|
-
else:
|
|
1500
|
-
self._sort_column = "size"
|
|
1501
|
-
self._sort_reverse = True # Default: largest first
|
|
1502
|
-
self._rebuild_table()
|
|
1503
|
-
order = "largest first" if self._sort_reverse else "smallest first"
|
|
1504
|
-
self.notify(f"Sorted by size ({order})", severity="information")
|
|
1505
|
-
|
|
1506
|
-
def _get_sorted_jobs(self):
|
|
1507
|
-
"""Return jobs sorted by current sort column"""
|
|
1508
|
-
jobs = self.orphan_jobs[:]
|
|
1509
|
-
if self._sort_column == "task":
|
|
1510
|
-
jobs.sort(key=lambda j: j.task_id or "", reverse=self._sort_reverse)
|
|
1511
|
-
elif self._sort_column == "size":
|
|
1512
|
-
# Sort by raw bytes, jobs not in cache go to end
|
|
1513
|
-
jobs.sort(
|
|
1514
|
-
key=lambda j: self._size_bytes_cache.get(j.identifier, -1),
|
|
1515
|
-
reverse=self._sort_reverse,
|
|
1516
|
-
)
|
|
1517
|
-
return jobs
|
|
1518
|
-
|
|
1519
|
-
def _rebuild_table(self) -> None:
|
|
1520
|
-
"""Rebuild the table with current sort order"""
|
|
1521
|
-
table = self.query_one("#orphan-table", DataTable)
|
|
1522
|
-
table.clear()
|
|
1523
|
-
|
|
1524
|
-
for job in self._get_sorted_jobs():
|
|
1525
|
-
failure_reason = getattr(job, "failure_reason", None)
|
|
1526
|
-
status_icon = get_status_icon(
|
|
1527
|
-
job.state.name if job.state else "unknown", failure_reason
|
|
1528
|
-
)
|
|
1529
|
-
if job.identifier in self._size_cache:
|
|
1530
|
-
size_text = self._size_cache[job.identifier]
|
|
1531
|
-
else:
|
|
1532
|
-
size_text = "waiting"
|
|
1533
|
-
table.add_row(
|
|
1534
|
-
status_icon,
|
|
1535
|
-
job.identifier[:7],
|
|
1536
|
-
job.task_id,
|
|
1537
|
-
size_text,
|
|
1538
|
-
key=job.identifier,
|
|
1539
|
-
)
|
|
1540
|
-
|
|
1541
|
-
def refresh_orphans(self) -> None:
|
|
1542
|
-
"""Refresh the orphan jobs list"""
|
|
1543
|
-
# Only include orphan jobs that have an existing folder
|
|
1544
|
-
all_orphans = self.state_provider.get_orphan_jobs()
|
|
1545
|
-
self.orphan_jobs = [j for j in all_orphans if j.path and j.path.exists()]
|
|
1546
|
-
|
|
1547
|
-
# Update stats
|
|
1548
|
-
stats = self.query_one("#orphan-stats", Static)
|
|
1549
|
-
stats.update(f"Found {len(self.orphan_jobs)} orphan jobs")
|
|
1550
|
-
|
|
1551
|
-
# Collect jobs needing size calculation
|
|
1552
|
-
self._pending_jobs = [
|
|
1553
|
-
j for j in self.orphan_jobs if j.identifier not in self._size_cache
|
|
1554
|
-
]
|
|
1555
|
-
|
|
1556
|
-
# Rebuild table
|
|
1557
|
-
self._rebuild_table()
|
|
1558
|
-
|
|
1559
|
-
# Start calculating sizes
|
|
1560
|
-
if self._pending_jobs:
|
|
1561
|
-
self._calculate_next_size()
|
|
1562
|
-
|
|
1563
|
-
def _calculate_next_size(self) -> None:
|
|
1564
|
-
"""Calculate size for the next pending job using a worker"""
|
|
1565
|
-
if not self._pending_jobs:
|
|
1566
|
-
return
|
|
1567
|
-
|
|
1568
|
-
job = self._pending_jobs.pop(0)
|
|
1569
|
-
# Update to "calc..."
|
|
1570
|
-
self._update_size_cell(job.identifier, "calc...")
|
|
1571
|
-
# Run calculation in worker thread
|
|
1572
|
-
self.run_worker(
|
|
1573
|
-
self._calc_size_worker(job.identifier, job.path),
|
|
1574
|
-
thread=True,
|
|
1575
|
-
)
|
|
1576
|
-
|
|
1577
|
-
async def _calc_size_worker(self, job_id: str, path):
|
|
1578
|
-
"""Worker to calculate folder size"""
|
|
1579
|
-
size_bytes = await self._get_folder_size_async(path)
|
|
1580
|
-
size_str = self._format_size(size_bytes)
|
|
1581
|
-
self._size_cache[job_id] = size_str
|
|
1582
|
-
self._size_bytes_cache[job_id] = size_bytes
|
|
1583
|
-
self.post_message(SizeCalculated(job_id, size_str, size_bytes))
|
|
1584
|
-
|
|
1585
|
-
def on_size_calculated(self, message: SizeCalculated) -> None:
|
|
1586
|
-
"""Handle size calculation completion"""
|
|
1587
|
-
self._size_bytes_cache[message.job_id] = message.size_bytes
|
|
1588
|
-
self._update_size_cell(message.job_id, message.size)
|
|
1589
|
-
# Calculate next one
|
|
1590
|
-
self._calculate_next_size()
|
|
1591
|
-
|
|
1592
|
-
@staticmethod
|
|
1593
|
-
async def _get_folder_size_async(path) -> int:
|
|
1594
|
-
"""Calculate total size of a folder using du command if available"""
|
|
1595
|
-
import asyncio
|
|
1596
|
-
import shutil
|
|
1597
|
-
import sys
|
|
1598
|
-
|
|
1599
|
-
# Try using du command for better performance
|
|
1600
|
-
if shutil.which("du"):
|
|
1601
|
-
try:
|
|
1602
|
-
if sys.platform == "darwin":
|
|
1603
|
-
# macOS: du -sk gives size in KB
|
|
1604
|
-
proc = await asyncio.create_subprocess_exec(
|
|
1605
|
-
"du",
|
|
1606
|
-
"-sk",
|
|
1607
|
-
str(path),
|
|
1608
|
-
stdout=asyncio.subprocess.PIPE,
|
|
1609
|
-
stderr=asyncio.subprocess.DEVNULL,
|
|
1610
|
-
)
|
|
1611
|
-
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30)
|
|
1612
|
-
if proc.returncode == 0 and stdout:
|
|
1613
|
-
# Output format: "SIZE\tPATH"
|
|
1614
|
-
size_kb = int(stdout.decode().split()[0])
|
|
1615
|
-
return size_kb * 1024
|
|
1616
|
-
else:
|
|
1617
|
-
# Linux: du -sb gives size in bytes
|
|
1618
|
-
proc = await asyncio.create_subprocess_exec(
|
|
1619
|
-
"du",
|
|
1620
|
-
"-sb",
|
|
1621
|
-
str(path),
|
|
1622
|
-
stdout=asyncio.subprocess.PIPE,
|
|
1623
|
-
stderr=asyncio.subprocess.DEVNULL,
|
|
1624
|
-
)
|
|
1625
|
-
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30)
|
|
1626
|
-
if proc.returncode == 0 and stdout:
|
|
1627
|
-
# Output format: "SIZE\tPATH"
|
|
1628
|
-
return int(stdout.decode().split()[0])
|
|
1629
|
-
except (asyncio.TimeoutError, ValueError, IndexError, OSError):
|
|
1630
|
-
pass # Fall back to Python implementation
|
|
1631
|
-
|
|
1632
|
-
# Fallback: Python implementation
|
|
1633
|
-
return OrphanJobsScreen._get_folder_size_sync(path)
|
|
1634
|
-
|
|
1635
|
-
@staticmethod
|
|
1636
|
-
def _get_folder_size_sync(path) -> int:
|
|
1637
|
-
"""Calculate total size of a folder using Python (fallback)"""
|
|
1638
|
-
total = 0
|
|
1639
|
-
try:
|
|
1640
|
-
for entry in path.rglob("*"):
|
|
1641
|
-
if entry.is_file():
|
|
1642
|
-
total += entry.stat().st_size
|
|
1643
|
-
except (OSError, PermissionError):
|
|
1644
|
-
pass
|
|
1645
|
-
return total
|
|
1646
|
-
|
|
1647
|
-
@staticmethod
|
|
1648
|
-
def _format_size(size: int) -> str:
|
|
1649
|
-
"""Format size in human-readable format"""
|
|
1650
|
-
for unit in ["B", "KB", "MB", "GB"]:
|
|
1651
|
-
if size < 1024:
|
|
1652
|
-
return f"{size:.1f}{unit}" if unit != "B" else f"{size}{unit}"
|
|
1653
|
-
size /= 1024
|
|
1654
|
-
return f"{size:.1f}TB"
|
|
1655
|
-
|
|
1656
|
-
def _update_size_cell(self, job_id: str, value: str = None) -> None:
|
|
1657
|
-
"""Update the size cell for a job"""
|
|
1658
|
-
try:
|
|
1659
|
-
table = self.query_one("#orphan-table", DataTable)
|
|
1660
|
-
size_text = (
|
|
1661
|
-
value if value is not None else self._size_cache.get(job_id, "-")
|
|
1662
|
-
)
|
|
1663
|
-
table.update_cell(job_id, "size", size_text)
|
|
1664
|
-
except Exception:
|
|
1665
|
-
pass # Table may have changed
|
|
1666
|
-
|
|
1667
|
-
def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
|
|
1668
|
-
"""Show job details when a row is selected"""
|
|
1669
|
-
self._update_job_info()
|
|
1670
|
-
|
|
1671
|
-
def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
|
|
1672
|
-
"""Show job details when cursor moves"""
|
|
1673
|
-
self._update_job_info()
|
|
1674
|
-
|
|
1675
|
-
def _update_job_info(self) -> None:
|
|
1676
|
-
"""Update the job info display"""
|
|
1677
|
-
table = self.query_one("#orphan-table", DataTable)
|
|
1678
|
-
info = self.query_one("#orphan-job-info", Static)
|
|
1679
|
-
|
|
1680
|
-
if table.cursor_row is None:
|
|
1681
|
-
info.update("")
|
|
1682
|
-
return
|
|
1683
|
-
|
|
1684
|
-
row_key = list(table.rows.keys())[table.cursor_row]
|
|
1685
|
-
if row_key:
|
|
1686
|
-
job_id = str(row_key.value)
|
|
1687
|
-
job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
|
|
1688
|
-
if job and job.path:
|
|
1689
|
-
size = self._size_cache.get(job.identifier, "calculating...")
|
|
1690
|
-
info.update(f"Path: {job.path} | Size: {size}")
|
|
1691
|
-
else:
|
|
1692
|
-
info.update("")
|
|
1693
|
-
|
|
1694
|
-
def action_copy_path(self) -> None:
|
|
1695
|
-
"""Copy the job folder path to clipboard"""
|
|
1696
|
-
import pyperclip
|
|
1697
|
-
|
|
1698
|
-
table = self.query_one("#orphan-table", DataTable)
|
|
1699
|
-
if table.cursor_row is None:
|
|
1700
|
-
return
|
|
1701
|
-
|
|
1702
|
-
row_key = list(table.rows.keys())[table.cursor_row]
|
|
1703
|
-
if row_key:
|
|
1704
|
-
job_id = str(row_key.value)
|
|
1705
|
-
job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
|
|
1706
|
-
if job and job.path:
|
|
1707
|
-
try:
|
|
1708
|
-
pyperclip.copy(str(job.path))
|
|
1709
|
-
self.notify("Path copied", severity="information")
|
|
1710
|
-
except Exception as e:
|
|
1711
|
-
self.notify(f"Failed to copy: {e}", severity="error")
|
|
1712
|
-
|
|
1713
|
-
def action_delete_selected(self) -> None:
|
|
1714
|
-
"""Delete the selected orphan job"""
|
|
1715
|
-
table = self.query_one("#orphan-table", DataTable)
|
|
1716
|
-
if table.cursor_row is None:
|
|
1717
|
-
return
|
|
1718
|
-
|
|
1719
|
-
row_key = list(table.rows.keys())[table.cursor_row]
|
|
1720
|
-
if row_key:
|
|
1721
|
-
job_id = str(row_key.value)
|
|
1722
|
-
job = next((j for j in self.orphan_jobs if j.identifier == job_id), None)
|
|
1723
|
-
if job:
|
|
1724
|
-
self._delete_job(job)
|
|
1725
|
-
|
|
1726
|
-
def _delete_job(self, job) -> None:
|
|
1727
|
-
"""Delete a single orphan job with confirmation"""
|
|
1728
|
-
|
|
1729
|
-
def handle_delete(confirmed: bool) -> None:
|
|
1730
|
-
if confirmed:
|
|
1731
|
-
success, msg = self.state_provider.delete_job_safely(job)
|
|
1732
|
-
if success:
|
|
1733
|
-
self.notify(msg, severity="information")
|
|
1734
|
-
self.refresh_orphans()
|
|
1735
|
-
else:
|
|
1736
|
-
self.notify(msg, severity="error")
|
|
1737
|
-
|
|
1738
|
-
self.app.push_screen(
|
|
1739
|
-
DeleteConfirmScreen("orphan job", job.identifier),
|
|
1740
|
-
handle_delete,
|
|
1741
|
-
)
|
|
1742
|
-
|
|
1743
|
-
def action_delete_all(self) -> None:
|
|
1744
|
-
"""Delete all orphan jobs"""
|
|
1745
|
-
if not self.orphan_jobs:
|
|
1746
|
-
self.notify("No orphan jobs to delete", severity="warning")
|
|
1747
|
-
return
|
|
1748
|
-
|
|
1749
|
-
# Filter out running jobs
|
|
1750
|
-
deletable_jobs = [j for j in self.orphan_jobs if not j.state.running()]
|
|
1751
|
-
|
|
1752
|
-
if not deletable_jobs:
|
|
1753
|
-
self.notify("All orphan jobs are running", severity="warning")
|
|
1754
|
-
return
|
|
1755
|
-
|
|
1756
|
-
def handle_delete_all(confirmed: bool) -> None:
|
|
1757
|
-
if confirmed:
|
|
1758
|
-
deleted = 0
|
|
1759
|
-
for job in deletable_jobs:
|
|
1760
|
-
success, _ = self.state_provider.delete_job_safely(
|
|
1761
|
-
job, cascade_orphans=False
|
|
1762
|
-
)
|
|
1763
|
-
if success:
|
|
1764
|
-
deleted += 1
|
|
1765
|
-
|
|
1766
|
-
# Clean up orphan partials once at the end
|
|
1767
|
-
self.state_provider.cleanup_orphan_partials(perform=True)
|
|
1768
|
-
|
|
1769
|
-
self.notify(f"Deleted {deleted} orphan jobs", severity="information")
|
|
1770
|
-
self.refresh_orphans()
|
|
1771
|
-
|
|
1772
|
-
self.app.push_screen(
|
|
1773
|
-
DeleteConfirmScreen(
|
|
1774
|
-
"all orphan jobs",
|
|
1775
|
-
f"{len(deletable_jobs)} jobs",
|
|
1776
|
-
"This action cannot be undone",
|
|
1777
|
-
),
|
|
1778
|
-
handle_delete_all,
|
|
1779
|
-
)
|
|
1780
|
-
|
|
1781
|
-
def action_refresh(self) -> None:
|
|
1782
|
-
"""Refresh the orphan jobs list"""
|
|
1783
|
-
self.refresh_orphans()
|
|
1784
|
-
|
|
1785
|
-
def action_go_back(self) -> None:
|
|
1786
|
-
"""Go back to main screen"""
|
|
1787
|
-
self.dismiss()
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
class HelpScreen(ModalScreen[None]):
|
|
1791
|
-
"""Modal screen showing keyboard shortcuts"""
|
|
1792
|
-
|
|
1793
|
-
BINDINGS = [
|
|
1794
|
-
Binding("escape", "close", "Close"),
|
|
1795
|
-
Binding("?", "close", "Close"),
|
|
1796
|
-
]
|
|
1797
|
-
|
|
1798
|
-
def compose(self) -> ComposeResult:
|
|
1799
|
-
from textual.containers import VerticalScroll
|
|
1800
|
-
|
|
1801
|
-
help_text = """
|
|
1802
|
-
[bold]Keyboard Shortcuts[/bold]
|
|
1803
|
-
|
|
1804
|
-
[bold cyan]Navigation[/bold cyan]
|
|
1805
|
-
q Quit application
|
|
1806
|
-
Esc Go back / Close dialog
|
|
1807
|
-
r Refresh data
|
|
1808
|
-
? Show this help
|
|
1809
|
-
j Switch to Jobs tab
|
|
1810
|
-
s Switch to Services tab
|
|
1811
|
-
|
|
1812
|
-
[bold cyan]Experiments[/bold cyan]
|
|
1813
|
-
Enter Select experiment
|
|
1814
|
-
d Delete experiment
|
|
1815
|
-
k Kill all running jobs
|
|
1816
|
-
|
|
1817
|
-
[bold cyan]Jobs[/bold cyan]
|
|
1818
|
-
l View job logs
|
|
1819
|
-
d Delete job
|
|
1820
|
-
k Kill running job
|
|
1821
|
-
/ Open search filter
|
|
1822
|
-
c Clear search filter
|
|
1823
|
-
S Sort by status
|
|
1824
|
-
T Sort by task
|
|
1825
|
-
D Sort by date
|
|
1826
|
-
f Copy folder path
|
|
1827
|
-
|
|
1828
|
-
[bold cyan]Services[/bold cyan]
|
|
1829
|
-
s Start service
|
|
1830
|
-
x Stop service
|
|
1831
|
-
u Copy URL
|
|
1832
|
-
|
|
1833
|
-
[bold cyan]Search Filter[/bold cyan]
|
|
1834
|
-
Enter Apply filter
|
|
1835
|
-
Esc Close and clear filter
|
|
1836
|
-
|
|
1837
|
-
[bold cyan]Orphan Jobs[/bold cyan]
|
|
1838
|
-
o Show orphan jobs
|
|
1839
|
-
T Sort by task
|
|
1840
|
-
Z Sort by size
|
|
1841
|
-
d Delete selected
|
|
1842
|
-
D Delete all
|
|
1843
|
-
f Copy folder path
|
|
1844
|
-
"""
|
|
1845
|
-
with Vertical(id="help-dialog"):
|
|
1846
|
-
yield Static("Experimaestro Help", id="help-title")
|
|
1847
|
-
with VerticalScroll(id="help-scroll"):
|
|
1848
|
-
yield Static(help_text, id="help-content")
|
|
1849
|
-
yield Button("Close", id="help-close-btn")
|
|
1850
|
-
|
|
1851
|
-
def on_button_pressed(self, event: Button.Pressed) -> None:
|
|
1852
|
-
self.dismiss()
|
|
1853
|
-
|
|
1854
|
-
def action_close(self) -> None:
|
|
1855
|
-
self.dismiss()
|
|
29
|
+
from experimaestro.tui.utils import format_duration, get_status_icon # noqa: F401
|
|
30
|
+
from experimaestro.tui.messages import (
|
|
31
|
+
ExperimentSelected,
|
|
32
|
+
ExperimentDeselected,
|
|
33
|
+
JobSelected,
|
|
34
|
+
JobDeselected,
|
|
35
|
+
ViewJobLogs,
|
|
36
|
+
ViewJobLogsRequest,
|
|
37
|
+
DeleteJobRequest,
|
|
38
|
+
DeleteExperimentRequest,
|
|
39
|
+
KillJobRequest,
|
|
40
|
+
KillExperimentRequest,
|
|
41
|
+
FilterChanged, # noqa: F401
|
|
42
|
+
SearchApplied, # noqa: F401
|
|
43
|
+
SizeCalculated, # noqa: F401
|
|
44
|
+
ShowRunsRequest,
|
|
45
|
+
RunSelected,
|
|
46
|
+
)
|
|
47
|
+
from experimaestro.tui.dialogs import (
|
|
48
|
+
QuitConfirmScreen,
|
|
49
|
+
DeleteConfirmScreen,
|
|
50
|
+
KillConfirmScreen,
|
|
51
|
+
HelpScreen,
|
|
52
|
+
)
|
|
53
|
+
from experimaestro.tui.widgets import (
|
|
54
|
+
CaptureLog,
|
|
55
|
+
ExperimentsList,
|
|
56
|
+
ServicesList,
|
|
57
|
+
JobsTable,
|
|
58
|
+
JobDetailView,
|
|
59
|
+
RunsList,
|
|
60
|
+
GlobalServiceSyncs,
|
|
61
|
+
)
|
|
62
|
+
from experimaestro.tui.widgets.stray_jobs import OrphanJobsTab
|
|
1856
63
|
|
|
1857
64
|
|
|
1858
65
|
class ExperimaestroUI(App):
|
|
@@ -1866,7 +73,6 @@ class ExperimaestroUI(App):
|
|
|
1866
73
|
Binding("?", "show_help", "Help"),
|
|
1867
74
|
Binding("escape", "go_back", "Back", show=False),
|
|
1868
75
|
Binding("l", "view_logs", "Logs", show=False),
|
|
1869
|
-
Binding("o", "show_orphans", "Orphans", show=False),
|
|
1870
76
|
Binding("j", "focus_jobs", "Jobs", show=False),
|
|
1871
77
|
Binding("s", "focus_services", "Services", show=False),
|
|
1872
78
|
]
|
|
@@ -1875,15 +81,19 @@ class ExperimaestroUI(App):
|
|
|
1875
81
|
self,
|
|
1876
82
|
workdir: Optional[Path] = None,
|
|
1877
83
|
watch: bool = True,
|
|
1878
|
-
state_provider: Optional[
|
|
84
|
+
state_provider: Optional[StateProvider] = None,
|
|
1879
85
|
show_logs: bool = False,
|
|
1880
86
|
):
|
|
1881
87
|
"""Initialize the TUI
|
|
1882
88
|
|
|
1883
89
|
Args:
|
|
1884
|
-
workdir: Workspace directory (required if state_provider not provided
|
|
90
|
+
workdir: Workspace directory (required if state_provider not provided
|
|
91
|
+
and not using deferred mode)
|
|
1885
92
|
watch: Enable filesystem watching for workspace mode
|
|
1886
|
-
state_provider: Pre-initialized state provider (for active experiments)
|
|
93
|
+
state_provider: Pre-initialized state provider (for active experiments).
|
|
94
|
+
If None and workdir is provided, creates a WorkspaceStateProvider.
|
|
95
|
+
If None and workdir is None, starts in deferred mode (logs only)
|
|
96
|
+
and waits for set_state_provider() to be called.
|
|
1887
97
|
show_logs: Whether to show the logs tab (for active experiments)
|
|
1888
98
|
"""
|
|
1889
99
|
super().__init__()
|
|
@@ -1891,46 +101,73 @@ class ExperimaestroUI(App):
|
|
|
1891
101
|
self.watch = watch
|
|
1892
102
|
self.show_logs = show_logs
|
|
1893
103
|
self._listener_registered = False
|
|
104
|
+
self._monitor_mounted = False
|
|
1894
105
|
|
|
1895
106
|
# Initialize state provider before compose
|
|
1896
107
|
if state_provider:
|
|
1897
108
|
self.state_provider = state_provider
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
109
|
+
elif workdir:
|
|
110
|
+
from experimaestro.scheduler.workspace_state_provider import (
|
|
111
|
+
WorkspaceStateProvider,
|
|
112
|
+
)
|
|
1902
113
|
|
|
1903
114
|
# Get singleton provider instance for this workspace
|
|
1904
|
-
self.state_provider = WorkspaceStateProvider.get_instance(
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
115
|
+
self.state_provider = WorkspaceStateProvider.get_instance(self.workdir)
|
|
116
|
+
else:
|
|
117
|
+
# Deferred mode: no provider yet, will be set later via set_state_provider()
|
|
118
|
+
self.state_provider = None
|
|
119
|
+
|
|
120
|
+
# Set subtitle to show scheduler status
|
|
121
|
+
self._update_scheduler_status()
|
|
122
|
+
|
|
123
|
+
def _update_scheduler_status(self) -> None:
|
|
124
|
+
"""Update the subtitle to reflect scheduler status"""
|
|
125
|
+
if self.state_provider is None:
|
|
126
|
+
self.sub_title = "○ Waiting for experiment..."
|
|
127
|
+
elif self.state_provider.is_live:
|
|
128
|
+
self.sub_title = "● Running experiment"
|
|
129
|
+
else:
|
|
130
|
+
self.sub_title = "○ Monitoring workspace"
|
|
1912
131
|
|
|
1913
132
|
def compose(self) -> ComposeResult:
|
|
1914
133
|
"""Compose the TUI layout"""
|
|
1915
134
|
yield Header()
|
|
1916
135
|
|
|
1917
|
-
if self.
|
|
1918
|
-
#
|
|
136
|
+
if self.state_provider is None:
|
|
137
|
+
# Deferred mode: only show logs, monitor will be added later
|
|
138
|
+
with TabbedContent(id="main-tabs"):
|
|
139
|
+
with TabPane("Logs", id="logs-tab"):
|
|
140
|
+
yield CaptureLog(id="logs", auto_scroll=True, wrap=True)
|
|
141
|
+
elif self.show_logs:
|
|
142
|
+
# Tabbed layout with logs and services
|
|
1919
143
|
with TabbedContent(id="main-tabs"):
|
|
1920
144
|
with TabPane("Monitor", id="monitor-tab"):
|
|
1921
145
|
yield from self._compose_monitor_view()
|
|
146
|
+
with TabPane("Services (0)", id="services-sync-tab"):
|
|
147
|
+
yield GlobalServiceSyncs(self.state_provider)
|
|
148
|
+
with TabPane("Orphans (0)", id="orphan-tab"):
|
|
149
|
+
yield OrphanJobsTab(self.state_provider)
|
|
1922
150
|
with TabPane("Logs", id="logs-tab"):
|
|
1923
151
|
yield CaptureLog(id="logs", auto_scroll=True, wrap=True)
|
|
152
|
+
self._monitor_mounted = True
|
|
1924
153
|
else:
|
|
1925
|
-
# Simple layout without logs
|
|
1926
|
-
with
|
|
1927
|
-
|
|
154
|
+
# Simple layout without logs but with services
|
|
155
|
+
with TabbedContent(id="main-tabs"):
|
|
156
|
+
with TabPane("Monitor", id="monitor-tab"):
|
|
157
|
+
yield from self._compose_monitor_view()
|
|
158
|
+
with TabPane("Services (0)", id="services-sync-tab"):
|
|
159
|
+
yield GlobalServiceSyncs(self.state_provider)
|
|
160
|
+
with TabPane("Orphans (0)", id="orphan-tab"):
|
|
161
|
+
yield OrphanJobsTab(self.state_provider)
|
|
162
|
+
self._monitor_mounted = True
|
|
1928
163
|
|
|
1929
164
|
yield Footer()
|
|
1930
165
|
|
|
1931
166
|
def _compose_monitor_view(self):
|
|
1932
|
-
"""Compose the monitor view with experiments, jobs/services tabs, and job details"""
|
|
167
|
+
"""Compose the monitor view with experiments, runs, jobs/services tabs, and job details"""
|
|
1933
168
|
yield ExperimentsList(self.state_provider)
|
|
169
|
+
# Runs list (hidden initially, shown when 'd' pressed on experiment)
|
|
170
|
+
yield RunsList(self.state_provider)
|
|
1934
171
|
# Tabbed view for jobs and services (hidden initially)
|
|
1935
172
|
with TabbedContent(id="experiment-tabs", classes="hidden"):
|
|
1936
173
|
with TabPane("Jobs", id="jobs-tab"):
|
|
@@ -1946,9 +183,10 @@ class ExperimaestroUI(App):
|
|
|
1946
183
|
# Resets logging
|
|
1947
184
|
logging.basicConfig(level=logging.INFO, force=True)
|
|
1948
185
|
|
|
1949
|
-
#
|
|
1950
|
-
|
|
1951
|
-
|
|
186
|
+
# If monitor is mounted, refresh experiments
|
|
187
|
+
if self._monitor_mounted:
|
|
188
|
+
experiments_list = self.query_one(ExperimentsList)
|
|
189
|
+
experiments_list.refresh_experiments()
|
|
1952
190
|
|
|
1953
191
|
# Register as listener for state change notifications
|
|
1954
192
|
# The state provider handles its own notification strategy internally
|
|
@@ -1957,7 +195,154 @@ class ExperimaestroUI(App):
|
|
|
1957
195
|
self._listener_registered = True
|
|
1958
196
|
self.log("Registered state listener for notifications")
|
|
1959
197
|
|
|
1960
|
-
def
|
|
198
|
+
def set_state_provider(self, state_provider: StateProvider) -> None:
|
|
199
|
+
"""Set the state provider and mount monitor widgets (for deferred mode)
|
|
200
|
+
|
|
201
|
+
Call this method from a background thread after starting the experiment.
|
|
202
|
+
The TUI will add the Monitor, Services, and Orphans tabs.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
state_provider: The state provider (typically the Scheduler)
|
|
206
|
+
"""
|
|
207
|
+
self.state_provider = state_provider
|
|
208
|
+
self._update_scheduler_status()
|
|
209
|
+
|
|
210
|
+
# Mount monitor widgets if not already done
|
|
211
|
+
if not self._monitor_mounted:
|
|
212
|
+
self._mount_monitor_widgets()
|
|
213
|
+
|
|
214
|
+
# Register listener
|
|
215
|
+
if not self._listener_registered:
|
|
216
|
+
self.state_provider.add_listener(self._on_state_event)
|
|
217
|
+
self._listener_registered = True
|
|
218
|
+
self.log("Registered state listener for notifications")
|
|
219
|
+
|
|
220
|
+
def _mount_monitor_widgets(self) -> None:
|
|
221
|
+
"""Mount the monitor widgets dynamically (for deferred mode)"""
|
|
222
|
+
tabs = self.query_one("#main-tabs", TabbedContent)
|
|
223
|
+
|
|
224
|
+
# Create monitor pane with all its children composed
|
|
225
|
+
monitor_pane = TabPane("Monitor", id="monitor-tab")
|
|
226
|
+
tabs.add_pane(monitor_pane, before="logs-tab")
|
|
227
|
+
|
|
228
|
+
# Create widgets
|
|
229
|
+
experiments_list = ExperimentsList(self.state_provider)
|
|
230
|
+
runs_list = RunsList(self.state_provider)
|
|
231
|
+
jobs_table = JobsTable(self.state_provider)
|
|
232
|
+
services_list = ServicesList(self.state_provider)
|
|
233
|
+
job_detail_view = JobDetailView(self.state_provider)
|
|
234
|
+
|
|
235
|
+
# Mount experiments and runs lists
|
|
236
|
+
monitor_pane.mount(experiments_list)
|
|
237
|
+
monitor_pane.mount(runs_list)
|
|
238
|
+
|
|
239
|
+
# Create experiment tabs with children using compose_add_child
|
|
240
|
+
experiment_tabs = TabbedContent(id="experiment-tabs", classes="hidden")
|
|
241
|
+
jobs_pane = TabPane("Jobs", id="jobs-tab")
|
|
242
|
+
services_pane = TabPane("Services", id="services-tab")
|
|
243
|
+
jobs_pane.compose_add_child(jobs_table)
|
|
244
|
+
services_pane.compose_add_child(services_list)
|
|
245
|
+
experiment_tabs.compose_add_child(jobs_pane)
|
|
246
|
+
experiment_tabs.compose_add_child(services_pane)
|
|
247
|
+
monitor_pane.mount(experiment_tabs)
|
|
248
|
+
|
|
249
|
+
# Create job detail container
|
|
250
|
+
job_detail_container = Vertical(id="job-detail-container", classes="hidden")
|
|
251
|
+
job_detail_container.compose_add_child(job_detail_view)
|
|
252
|
+
monitor_pane.mount(job_detail_container)
|
|
253
|
+
|
|
254
|
+
# Create and mount services sync tab
|
|
255
|
+
services_sync_pane = TabPane("Services (0)", id="services-sync-tab")
|
|
256
|
+
services_sync_pane.compose_add_child(GlobalServiceSyncs(self.state_provider))
|
|
257
|
+
tabs.add_pane(services_sync_pane, before="logs-tab")
|
|
258
|
+
|
|
259
|
+
# Create and mount orphans tab (only if not live)
|
|
260
|
+
if not self.state_provider.is_live:
|
|
261
|
+
orphan_pane = TabPane("Orphans (0)", id="orphan-tab")
|
|
262
|
+
orphan_pane.compose_add_child(OrphanJobsTab(self.state_provider))
|
|
263
|
+
tabs.add_pane(orphan_pane, before="logs-tab")
|
|
264
|
+
|
|
265
|
+
self._monitor_mounted = True
|
|
266
|
+
|
|
267
|
+
# Refresh experiments list
|
|
268
|
+
experiments_list.refresh_experiments()
|
|
269
|
+
|
|
270
|
+
def update_services_tab_title(self) -> None:
|
|
271
|
+
"""Update the Services tab title with running service count"""
|
|
272
|
+
try:
|
|
273
|
+
# Count running services from state provider
|
|
274
|
+
from experimaestro.scheduler.services import ServiceState
|
|
275
|
+
|
|
276
|
+
all_services = self.state_provider.get_services()
|
|
277
|
+
running_count = sum(
|
|
278
|
+
1
|
|
279
|
+
for s in all_services
|
|
280
|
+
if hasattr(s, "state") and s.state == ServiceState.RUNNING
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Find and update the tab pane title
|
|
284
|
+
tabs = self.query_one("#main-tabs", TabbedContent)
|
|
285
|
+
tab = tabs.get_tab("services-sync-tab")
|
|
286
|
+
if tab:
|
|
287
|
+
tab.label = f"Services ({running_count})"
|
|
288
|
+
except Exception:
|
|
289
|
+
pass
|
|
290
|
+
|
|
291
|
+
def update_orphan_tab_title(self) -> None:
|
|
292
|
+
"""Update the Orphans tab title with orphan job count
|
|
293
|
+
|
|
294
|
+
Format: Orphans (X/Y) where X=running (stray), Y=non-running (finished)
|
|
295
|
+
"""
|
|
296
|
+
try:
|
|
297
|
+
orphan_tab = self.query_one(OrphanJobsTab)
|
|
298
|
+
running = orphan_tab.running_count
|
|
299
|
+
finished = orphan_tab.finished_count
|
|
300
|
+
# Find and update the tab pane title
|
|
301
|
+
tabs = self.query_one("#main-tabs", TabbedContent)
|
|
302
|
+
tab = tabs.get_tab("orphan-tab")
|
|
303
|
+
if tab:
|
|
304
|
+
tab.label = f"Orphans ({running}/{finished})"
|
|
305
|
+
except Exception:
|
|
306
|
+
pass
|
|
307
|
+
|
|
308
|
+
def update_logs_tab_title(self) -> None:
|
|
309
|
+
"""Update the Logs tab title to show unread indicator (bold when unread)"""
|
|
310
|
+
if not self.show_logs:
|
|
311
|
+
return
|
|
312
|
+
try:
|
|
313
|
+
from rich.text import Text
|
|
314
|
+
|
|
315
|
+
log_widget = self.query_one(CaptureLog)
|
|
316
|
+
tabs = self.query_one("#main-tabs", TabbedContent)
|
|
317
|
+
tab = tabs.get_tab("logs-tab")
|
|
318
|
+
if tab:
|
|
319
|
+
if log_widget.has_unread:
|
|
320
|
+
tab.label = Text("Logs *", style="bold")
|
|
321
|
+
else:
|
|
322
|
+
tab.label = "Logs"
|
|
323
|
+
except Exception:
|
|
324
|
+
pass
|
|
325
|
+
|
|
326
|
+
def on_tabbed_content_tab_activated(
|
|
327
|
+
self, event: TabbedContent.TabActivated
|
|
328
|
+
) -> None:
|
|
329
|
+
"""Handle tab switching"""
|
|
330
|
+
# event.pane is the TabPane, event.tab is the Tab widget (header)
|
|
331
|
+
if event.pane.id == "logs-tab" and self.show_logs:
|
|
332
|
+
try:
|
|
333
|
+
log_widget = self.query_one(CaptureLog)
|
|
334
|
+
log_widget.mark_as_read()
|
|
335
|
+
except Exception:
|
|
336
|
+
pass
|
|
337
|
+
elif event.pane.id == "services-sync-tab":
|
|
338
|
+
# Refresh global services when switching to Services tab
|
|
339
|
+
try:
|
|
340
|
+
global_services = self.query_one(GlobalServiceSyncs)
|
|
341
|
+
global_services.refresh_services()
|
|
342
|
+
except Exception:
|
|
343
|
+
pass
|
|
344
|
+
|
|
345
|
+
def _on_state_event(self, event: EventBase) -> None:
|
|
1961
346
|
"""Handle state change events from the state provider
|
|
1962
347
|
|
|
1963
348
|
This may be called from the state provider's thread or the main thread,
|
|
@@ -1965,6 +350,8 @@ class ExperimaestroUI(App):
|
|
|
1965
350
|
"""
|
|
1966
351
|
import threading
|
|
1967
352
|
|
|
353
|
+
self.log.info(f"_on_state_event called with: {type(event).__name__}")
|
|
354
|
+
|
|
1968
355
|
if threading.current_thread() is threading.main_thread():
|
|
1969
356
|
# Already in main thread, call directly
|
|
1970
357
|
self._handle_state_event(event)
|
|
@@ -1972,59 +359,148 @@ class ExperimaestroUI(App):
|
|
|
1972
359
|
# From background thread, use call_from_thread
|
|
1973
360
|
self.call_from_thread(self._handle_state_event, event)
|
|
1974
361
|
|
|
1975
|
-
def _handle_state_event(self, event:
|
|
1976
|
-
"""Process state event on the main thread"""
|
|
1977
|
-
|
|
1978
|
-
# when widgets aren't visible yet
|
|
1979
|
-
jobs_tables = self.query(JobsTable)
|
|
1980
|
-
services_lists = self.query(ServicesList)
|
|
362
|
+
def _handle_state_event(self, event: EventBase) -> None:
|
|
363
|
+
"""Process state event on the main thread using handler dispatch"""
|
|
364
|
+
self.log.info(f"State event: {event}")
|
|
1981
365
|
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
|
|
366
|
+
# Dispatch to handler if one exists for this event type
|
|
367
|
+
handler = self.STATE_EVENT_HANDLERS.get(type(event))
|
|
368
|
+
if handler:
|
|
369
|
+
self.log.info(f"Dispatching to handler: {handler.__name__}")
|
|
370
|
+
try:
|
|
371
|
+
handler(self, event)
|
|
372
|
+
except Exception as e:
|
|
373
|
+
self.log.error(f"Error in handler: {e}")
|
|
374
|
+
else:
|
|
375
|
+
self.log.warning(f"No handler for event type: {type(event).__name__}")
|
|
376
|
+
|
|
377
|
+
def _handle_experiment_updated(self, event: ExperimentUpdatedEvent) -> None:
|
|
378
|
+
"""Handle ExperimentUpdatedEvent - refresh experiments list and jobs"""
|
|
379
|
+
for exp_list in self.query(ExperimentsList):
|
|
380
|
+
exp_list.refresh_experiments()
|
|
381
|
+
|
|
382
|
+
# Also refresh jobs table if we're viewing the affected experiment
|
|
383
|
+
# (this handles the case when experiment finishes and events are deleted)
|
|
384
|
+
for jobs_table in self.query(JobsTable):
|
|
385
|
+
if jobs_table.current_experiment == event.experiment_id:
|
|
386
|
+
jobs_table.refresh_jobs()
|
|
387
|
+
|
|
388
|
+
def _handle_run_updated(self, event: RunUpdatedEvent) -> None:
|
|
389
|
+
"""Handle RunUpdatedEvent - refresh experiments list"""
|
|
390
|
+
for exp_list in self.query(ExperimentsList):
|
|
391
|
+
exp_list.refresh_experiments()
|
|
392
|
+
|
|
393
|
+
def _handle_service_added(self, event: ServiceAddedEvent) -> None:
|
|
394
|
+
"""Handle ServiceAddedEvent - refresh services list and update tab title"""
|
|
395
|
+
event_exp_id = event.experiment_id
|
|
396
|
+
self.log.info(
|
|
397
|
+
f"ServiceAddedEvent received: exp={event_exp_id}, service={event.service_id}"
|
|
1985
398
|
)
|
|
1986
399
|
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
400
|
+
# Refresh the global services widget
|
|
401
|
+
try:
|
|
402
|
+
global_services = self.query_one(GlobalServiceSyncs)
|
|
403
|
+
self.log.info("Calling GlobalServiceSyncs.refresh_services()")
|
|
404
|
+
global_services.refresh_services()
|
|
405
|
+
except Exception as e:
|
|
406
|
+
self.log.warning(f"Failed to refresh global services: {e}")
|
|
1991
407
|
|
|
1992
|
-
|
|
1993
|
-
|
|
408
|
+
# Refresh per-experiment services list
|
|
409
|
+
for services_list in self.query(ServicesList):
|
|
410
|
+
if services_list.current_experiment == event_exp_id:
|
|
411
|
+
services_list.refresh_services()
|
|
1994
412
|
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
413
|
+
def _handle_service_state_changed(self, event: ServiceStateChangedEvent) -> None:
|
|
414
|
+
"""Handle ServiceStateChangedEvent - update tab title when service state changes"""
|
|
415
|
+
# Update the Services tab title (running count may have changed)
|
|
416
|
+
self.update_services_tab_title()
|
|
1999
417
|
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
job_detail_view.refresh_job_detail()
|
|
418
|
+
# Also refresh global services widget if visible
|
|
419
|
+
try:
|
|
420
|
+
global_services = self.query_one(GlobalServiceSyncs)
|
|
421
|
+
global_services.refresh_services()
|
|
422
|
+
except Exception:
|
|
423
|
+
pass
|
|
2007
424
|
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
425
|
+
# Refresh per-experiment services list
|
|
426
|
+
for services_list in self.query(ServicesList):
|
|
427
|
+
if services_list.current_experiment == event.experiment_id:
|
|
428
|
+
services_list.refresh_services()
|
|
429
|
+
|
|
430
|
+
def _handle_job_submitted(self, event: JobSubmittedEvent) -> None:
|
|
431
|
+
"""Handle JobSubmittedEvent - update tags, dependencies, and refresh job list"""
|
|
432
|
+
event_exp_id = event.experiment_id
|
|
433
|
+
|
|
434
|
+
# Update tags_map, dependencies_map, and refresh jobs for the affected experiment
|
|
435
|
+
for jobs_table in self.query(JobsTable):
|
|
436
|
+
if jobs_table.current_experiment == event_exp_id:
|
|
437
|
+
# Add the new job's tags to the cache
|
|
438
|
+
if event.tags:
|
|
439
|
+
jobs_table.tags_map[event.job_id] = {
|
|
440
|
+
tag.key: tag.value for tag in event.tags
|
|
441
|
+
}
|
|
442
|
+
# Add the new job's dependencies to the cache
|
|
443
|
+
if event.depends_on:
|
|
444
|
+
jobs_table.dependencies_map[event.job_id] = event.depends_on
|
|
445
|
+
# Refresh to show the new job
|
|
446
|
+
jobs_table.refresh_jobs()
|
|
447
|
+
|
|
448
|
+
# Also update experiment stats
|
|
449
|
+
for exp_list in self.query(ExperimentsList):
|
|
450
|
+
exp_list.refresh_experiments()
|
|
451
|
+
|
|
452
|
+
def _handle_job_state_changed(self, event: JobStateChangedEvent) -> None:
|
|
453
|
+
"""Handle JobStateChangedEvent - refresh job display
|
|
454
|
+
|
|
455
|
+
This event is dispatched once per job state change.
|
|
456
|
+
Used for progress updates and state changes from job processes.
|
|
457
|
+
"""
|
|
458
|
+
# Refresh all jobs tables that might contain this job
|
|
459
|
+
for jobs_table in self.query(JobsTable):
|
|
460
|
+
jobs_table.refresh_jobs()
|
|
2011
461
|
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
462
|
+
# Also refresh job detail if we're viewing this job
|
|
463
|
+
for job_detail_container in self.query("#job-detail-container"):
|
|
464
|
+
if not job_detail_container.has_class("hidden"):
|
|
465
|
+
for job_detail_view in self.query(JobDetailView):
|
|
466
|
+
if job_detail_view.current_job_id == event.job_id:
|
|
467
|
+
job_detail_view.refresh_job_detail()
|
|
2016
468
|
|
|
2017
|
-
|
|
2018
|
-
|
|
469
|
+
# Also update the experiment stats in the experiments list
|
|
470
|
+
for exp_list in self.query(ExperimentsList):
|
|
471
|
+
exp_list.refresh_experiments()
|
|
2019
472
|
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
473
|
+
def _handle_job_progress(self, event: JobProgressEvent) -> None:
|
|
474
|
+
"""Handle JobProgressEvent - refresh job progress display
|
|
475
|
+
|
|
476
|
+
This event is dispatched when a job reports progress updates.
|
|
477
|
+
"""
|
|
478
|
+
# Refresh all jobs tables that might contain this job
|
|
479
|
+
for jobs_table in self.query(JobsTable):
|
|
480
|
+
jobs_table.refresh_jobs()
|
|
481
|
+
|
|
482
|
+
# Also refresh job detail if we're viewing this job
|
|
483
|
+
for job_detail_container in self.query("#job-detail-container"):
|
|
484
|
+
if not job_detail_container.has_class("hidden"):
|
|
485
|
+
for job_detail_view in self.query(JobDetailView):
|
|
486
|
+
if job_detail_view.current_job_id == event.job_id:
|
|
487
|
+
job_detail_view.refresh_job_detail()
|
|
488
|
+
|
|
489
|
+
STATE_EVENT_HANDLERS = {
|
|
490
|
+
ExperimentUpdatedEvent: _handle_experiment_updated,
|
|
491
|
+
JobStateChangedEvent: _handle_job_state_changed,
|
|
492
|
+
JobProgressEvent: _handle_job_progress,
|
|
493
|
+
RunUpdatedEvent: _handle_run_updated,
|
|
494
|
+
ServiceAddedEvent: _handle_service_added,
|
|
495
|
+
ServiceStateChangedEvent: _handle_service_state_changed,
|
|
496
|
+
JobSubmittedEvent: _handle_job_submitted,
|
|
497
|
+
}
|
|
2024
498
|
|
|
2025
499
|
def on_experiment_selected(self, message: ExperimentSelected) -> None:
|
|
2026
500
|
"""Handle experiment selection - show jobs/services tabs"""
|
|
2027
|
-
self.log(
|
|
501
|
+
self.log(
|
|
502
|
+
f"Experiment selected: {message.experiment_id} (run: {message.run_id})"
|
|
503
|
+
)
|
|
2028
504
|
|
|
2029
505
|
# Set up services list
|
|
2030
506
|
services_list = self.query_one(ServicesList)
|
|
@@ -2032,7 +508,7 @@ class ExperimaestroUI(App):
|
|
|
2032
508
|
|
|
2033
509
|
# Set up jobs table
|
|
2034
510
|
jobs_table_widget = self.query_one(JobsTable)
|
|
2035
|
-
jobs_table_widget.set_experiment(message.experiment_id)
|
|
511
|
+
jobs_table_widget.set_experiment(message.experiment_id, message.run_id)
|
|
2036
512
|
|
|
2037
513
|
# Show the tabbed content
|
|
2038
514
|
tabs = self.query_one("#experiment-tabs", TabbedContent)
|
|
@@ -2117,63 +593,28 @@ class ExperimaestroUI(App):
|
|
|
2117
593
|
job_detail_view = self.query_one(JobDetailView)
|
|
2118
594
|
job_detail_view.action_view_logs()
|
|
2119
595
|
|
|
2120
|
-
def action_show_orphans(self) -> None:
|
|
2121
|
-
"""Show orphan jobs screen"""
|
|
2122
|
-
self.push_screen(OrphanJobsScreen(self.state_provider))
|
|
2123
|
-
|
|
2124
|
-
@work(thread=True, exclusive=True)
|
|
2125
|
-
def _sync_and_view_logs(self, job_path: Path, task_id: str) -> None:
|
|
2126
|
-
"""Sync logs from remote and then view them (runs in worker thread)"""
|
|
2127
|
-
try:
|
|
2128
|
-
# Sync the job directory
|
|
2129
|
-
local_path = self.state_provider.sync_path(str(job_path))
|
|
2130
|
-
if not local_path:
|
|
2131
|
-
self.post_message(LogsSyncFailed("Failed to sync logs from remote"))
|
|
2132
|
-
return
|
|
2133
|
-
|
|
2134
|
-
job_path = local_path
|
|
2135
|
-
|
|
2136
|
-
# Log files are named after the last part of the task ID
|
|
2137
|
-
task_name = task_id.split(".")[-1]
|
|
2138
|
-
stdout_path = job_path / f"{task_name}.out"
|
|
2139
|
-
stderr_path = job_path / f"{task_name}.err"
|
|
2140
|
-
|
|
2141
|
-
# Collect existing log files
|
|
2142
|
-
log_files = []
|
|
2143
|
-
if stdout_path.exists():
|
|
2144
|
-
log_files.append(str(stdout_path))
|
|
2145
|
-
if stderr_path.exists():
|
|
2146
|
-
log_files.append(str(stderr_path))
|
|
2147
|
-
|
|
2148
|
-
if not log_files:
|
|
2149
|
-
self.post_message(
|
|
2150
|
-
LogsSyncFailed(f"No log files found: {task_name}.out/.err")
|
|
2151
|
-
)
|
|
2152
|
-
return
|
|
2153
|
-
|
|
2154
|
-
# Signal completion via message
|
|
2155
|
-
job_id = job_path.name
|
|
2156
|
-
self.post_message(LogsSyncComplete(log_files, job_id))
|
|
2157
|
-
|
|
2158
|
-
except Exception as e:
|
|
2159
|
-
self.post_message(LogsSyncFailed(str(e)))
|
|
2160
|
-
|
|
2161
|
-
def on_logs_sync_complete(self, message: LogsSyncComplete) -> None:
|
|
2162
|
-
"""Handle successful log sync - show log viewer"""
|
|
2163
|
-
self.push_screen(LogViewerScreen(message.log_files, message.job_id))
|
|
2164
|
-
|
|
2165
|
-
def on_logs_sync_failed(self, message: LogsSyncFailed) -> None:
|
|
2166
|
-
"""Handle failed log sync"""
|
|
2167
|
-
self.notify(message.error, severity="warning")
|
|
2168
|
-
|
|
2169
596
|
def on_view_job_logs(self, message: ViewJobLogs) -> None:
|
|
2170
|
-
"""Handle request to view job logs - push LogViewerScreen
|
|
597
|
+
"""Handle request to view job logs - push LogViewerScreen
|
|
598
|
+
|
|
599
|
+
For remote monitoring, switches to log viewer immediately with loading state,
|
|
600
|
+
then starts adaptive sync in background.
|
|
601
|
+
"""
|
|
2171
602
|
job_path = Path(message.job_path)
|
|
603
|
+
job_id = job_path.name
|
|
2172
604
|
|
|
2173
|
-
# For remote monitoring,
|
|
605
|
+
# For remote monitoring, switch screen immediately with loading state
|
|
2174
606
|
if self.state_provider.is_remote:
|
|
2175
|
-
|
|
2176
|
-
self.
|
|
607
|
+
# Push screen immediately - it will handle sync and show loading state
|
|
608
|
+
self.push_screen(
|
|
609
|
+
LogViewerScreen(
|
|
610
|
+
log_files=[], # Will be populated after sync
|
|
611
|
+
job_id=job_id,
|
|
612
|
+
sync_func=self.state_provider.sync_path,
|
|
613
|
+
remote_path=str(job_path),
|
|
614
|
+
task_id=message.task_id,
|
|
615
|
+
job_state=message.job_state,
|
|
616
|
+
)
|
|
617
|
+
)
|
|
2177
618
|
return
|
|
2178
619
|
|
|
2179
620
|
# Local monitoring - no sync needed
|
|
@@ -2196,7 +637,6 @@ class ExperimaestroUI(App):
|
|
|
2196
637
|
return
|
|
2197
638
|
|
|
2198
639
|
# Push the log viewer screen
|
|
2199
|
-
job_id = job_path.name
|
|
2200
640
|
self.push_screen(LogViewerScreen(log_files, job_id))
|
|
2201
641
|
|
|
2202
642
|
def on_view_job_logs_request(self, message: ViewJobLogsRequest) -> None:
|
|
@@ -2205,7 +645,7 @@ class ExperimaestroUI(App):
|
|
|
2205
645
|
if not job or not job.path or not job.task_id:
|
|
2206
646
|
self.notify("Cannot find job logs", severity="warning")
|
|
2207
647
|
return
|
|
2208
|
-
self.post_message(ViewJobLogs(str(job.path), job.task_id))
|
|
648
|
+
self.post_message(ViewJobLogs(str(job.path), job.task_id, job.state))
|
|
2209
649
|
|
|
2210
650
|
def on_delete_job_request(self, message: DeleteJobRequest) -> None:
|
|
2211
651
|
"""Handle job deletion request"""
|
|
@@ -2330,6 +770,42 @@ class ExperimaestroUI(App):
|
|
|
2330
770
|
handle_kill_response,
|
|
2331
771
|
)
|
|
2332
772
|
|
|
773
|
+
def on_show_runs_request(self, message: ShowRunsRequest) -> None:
|
|
774
|
+
"""Handle request to show experiment runs"""
|
|
775
|
+
runs_list = self.query_one(RunsList)
|
|
776
|
+
runs_list.set_experiment(message.experiment_id, message.current_run_id)
|
|
777
|
+
|
|
778
|
+
def on_run_selected(self, message: RunSelected) -> None:
|
|
779
|
+
"""Handle run selection - show jobs for the selected run"""
|
|
780
|
+
self.log(
|
|
781
|
+
f"Run selected: {message.run_id} (current={message.is_current}) "
|
|
782
|
+
f"for {message.experiment_id}"
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
# Set up jobs table with the selected run
|
|
786
|
+
jobs_table_widget = self.query_one(JobsTable)
|
|
787
|
+
jobs_table_widget.set_experiment(
|
|
788
|
+
message.experiment_id,
|
|
789
|
+
message.run_id,
|
|
790
|
+
is_past_run=not message.is_current,
|
|
791
|
+
)
|
|
792
|
+
|
|
793
|
+
# Set up services list
|
|
794
|
+
services_list = self.query_one(ServicesList)
|
|
795
|
+
services_list.set_experiment(message.experiment_id)
|
|
796
|
+
|
|
797
|
+
# Show the tabbed content
|
|
798
|
+
tabs = self.query_one("#experiment-tabs", TabbedContent)
|
|
799
|
+
tabs.remove_class("hidden")
|
|
800
|
+
|
|
801
|
+
# Collapse experiments list
|
|
802
|
+
experiments_list = self.query_one(ExperimentsList)
|
|
803
|
+
experiments_list.collapse_to_experiment(message.experiment_id)
|
|
804
|
+
|
|
805
|
+
# Focus the jobs table
|
|
806
|
+
jobs_table = self.query_one("#jobs-table", DataTable)
|
|
807
|
+
jobs_table.focus()
|
|
808
|
+
|
|
2333
809
|
def action_focus_jobs(self) -> None:
|
|
2334
810
|
"""Switch to the jobs tab"""
|
|
2335
811
|
tabs = self.query_one("#experiment-tabs", TabbedContent)
|
|
@@ -2374,7 +850,7 @@ class ExperimaestroUI(App):
|
|
|
2374
850
|
self.exit()
|
|
2375
851
|
|
|
2376
852
|
self.push_screen(
|
|
2377
|
-
QuitConfirmScreen(has_active_experiment=self.
|
|
853
|
+
QuitConfirmScreen(has_active_experiment=self.state_provider.is_live),
|
|
2378
854
|
handle_quit_response,
|
|
2379
855
|
)
|
|
2380
856
|
|
|
@@ -2389,7 +865,3 @@ class ExperimaestroUI(App):
|
|
|
2389
865
|
self.state_provider.remove_listener(self._on_state_event)
|
|
2390
866
|
self._listener_registered = False
|
|
2391
867
|
self.log("Unregistered state listener")
|
|
2392
|
-
|
|
2393
|
-
# Only close state provider if we own it (not external/active experiment)
|
|
2394
|
-
if self.state_provider and self.owns_provider:
|
|
2395
|
-
self.state_provider.close()
|