experimaestro 2.0.0b8__py3-none-any.whl → 2.0.0b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +12 -5
- experimaestro/cli/__init__.py +239 -126
- experimaestro/cli/filter.py +48 -23
- experimaestro/cli/jobs.py +253 -71
- experimaestro/cli/refactor.py +1 -2
- experimaestro/commandline.py +7 -4
- experimaestro/connectors/__init__.py +9 -1
- experimaestro/connectors/local.py +43 -3
- experimaestro/core/arguments.py +18 -18
- experimaestro/core/identifier.py +11 -11
- experimaestro/core/objects/config.py +96 -39
- experimaestro/core/objects/config_walk.py +3 -3
- experimaestro/core/{subparameters.py → partial.py} +16 -16
- experimaestro/core/partial_lock.py +394 -0
- experimaestro/core/types.py +12 -15
- experimaestro/dynamic.py +290 -0
- experimaestro/experiments/__init__.py +6 -2
- experimaestro/experiments/cli.py +217 -50
- experimaestro/experiments/configuration.py +24 -0
- experimaestro/generators.py +5 -5
- experimaestro/ipc.py +118 -1
- experimaestro/launcherfinder/__init__.py +2 -2
- experimaestro/launcherfinder/registry.py +6 -7
- experimaestro/launcherfinder/specs.py +2 -9
- experimaestro/launchers/slurm/__init__.py +2 -2
- experimaestro/launchers/slurm/base.py +62 -0
- experimaestro/locking.py +957 -1
- experimaestro/notifications.py +89 -201
- experimaestro/progress.py +63 -366
- experimaestro/rpyc.py +0 -2
- experimaestro/run.py +29 -2
- experimaestro/scheduler/__init__.py +8 -1
- experimaestro/scheduler/base.py +629 -53
- experimaestro/scheduler/dependencies.py +20 -16
- experimaestro/scheduler/experiment.py +732 -167
- experimaestro/scheduler/interfaces.py +316 -101
- experimaestro/scheduler/jobs.py +58 -20
- experimaestro/scheduler/remote/adaptive_sync.py +265 -0
- experimaestro/scheduler/remote/client.py +171 -117
- experimaestro/scheduler/remote/protocol.py +8 -193
- experimaestro/scheduler/remote/server.py +95 -71
- experimaestro/scheduler/services.py +53 -28
- experimaestro/scheduler/state_provider.py +663 -2430
- experimaestro/scheduler/state_status.py +1247 -0
- experimaestro/scheduler/transient.py +31 -0
- experimaestro/scheduler/workspace.py +1 -1
- experimaestro/scheduler/workspace_state_provider.py +1273 -0
- experimaestro/scriptbuilder.py +4 -4
- experimaestro/settings.py +36 -0
- experimaestro/tests/conftest.py +33 -5
- experimaestro/tests/connectors/bin/executable.py +1 -1
- experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
- experimaestro/tests/launchers/bin/test.py +1 -0
- experimaestro/tests/launchers/test_slurm.py +9 -9
- experimaestro/tests/partial_reschedule.py +46 -0
- experimaestro/tests/restart.py +3 -3
- experimaestro/tests/restart_main.py +1 -0
- experimaestro/tests/scripts/notifyandwait.py +1 -0
- experimaestro/tests/task_partial.py +38 -0
- experimaestro/tests/task_tokens.py +2 -2
- experimaestro/tests/tasks/test_dynamic.py +6 -6
- experimaestro/tests/test_dependencies.py +3 -3
- experimaestro/tests/test_deprecated.py +15 -15
- experimaestro/tests/test_dynamic_locking.py +317 -0
- experimaestro/tests/test_environment.py +24 -14
- experimaestro/tests/test_experiment.py +171 -36
- experimaestro/tests/test_identifier.py +25 -25
- experimaestro/tests/test_identifier_stability.py +3 -5
- experimaestro/tests/test_multitoken.py +2 -4
- experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
- experimaestro/tests/test_partial_paths.py +81 -138
- experimaestro/tests/test_pre_experiment.py +219 -0
- experimaestro/tests/test_progress.py +2 -8
- experimaestro/tests/test_remote_state.py +560 -99
- experimaestro/tests/test_stray_jobs.py +261 -0
- experimaestro/tests/test_tasks.py +1 -2
- experimaestro/tests/test_token_locking.py +52 -67
- experimaestro/tests/test_tokens.py +5 -6
- experimaestro/tests/test_transient.py +225 -0
- experimaestro/tests/test_workspace_state_provider.py +768 -0
- experimaestro/tests/token_reschedule.py +1 -3
- experimaestro/tests/utils.py +2 -7
- experimaestro/tokens.py +227 -372
- experimaestro/tools/diff.py +1 -0
- experimaestro/tools/documentation.py +4 -5
- experimaestro/tools/jobs.py +1 -2
- experimaestro/tui/app.py +438 -1966
- experimaestro/tui/app.tcss +162 -0
- experimaestro/tui/dialogs.py +172 -0
- experimaestro/tui/log_viewer.py +253 -3
- experimaestro/tui/messages.py +137 -0
- experimaestro/tui/utils.py +54 -0
- experimaestro/tui/widgets/__init__.py +23 -0
- experimaestro/tui/widgets/experiments.py +468 -0
- experimaestro/tui/widgets/global_services.py +238 -0
- experimaestro/tui/widgets/jobs.py +972 -0
- experimaestro/tui/widgets/log.py +156 -0
- experimaestro/tui/widgets/orphans.py +363 -0
- experimaestro/tui/widgets/runs.py +185 -0
- experimaestro/tui/widgets/services.py +314 -0
- experimaestro/tui/widgets/stray_jobs.py +528 -0
- experimaestro/utils/__init__.py +1 -1
- experimaestro/utils/environment.py +105 -22
- experimaestro/utils/fswatcher.py +124 -0
- experimaestro/utils/jobs.py +1 -2
- experimaestro/utils/jupyter.py +1 -2
- experimaestro/utils/logging.py +72 -0
- experimaestro/version.py +2 -2
- experimaestro/webui/__init__.py +9 -0
- experimaestro/webui/app.py +117 -0
- experimaestro/{server → webui}/data/index.css +66 -11
- experimaestro/webui/data/index.css.map +1 -0
- experimaestro/{server → webui}/data/index.js +82763 -87217
- experimaestro/webui/data/index.js.map +1 -0
- experimaestro/webui/routes/__init__.py +5 -0
- experimaestro/webui/routes/auth.py +53 -0
- experimaestro/webui/routes/proxy.py +117 -0
- experimaestro/webui/server.py +200 -0
- experimaestro/webui/state_bridge.py +152 -0
- experimaestro/webui/websocket.py +413 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +5 -6
- experimaestro-2.0.0b17.dist-info/RECORD +219 -0
- experimaestro/cli/progress.py +0 -269
- experimaestro/scheduler/state.py +0 -75
- experimaestro/scheduler/state_db.py +0 -437
- experimaestro/scheduler/state_sync.py +0 -891
- experimaestro/server/__init__.py +0 -467
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/tests/test_cli_jobs.py +0 -615
- experimaestro/tests/test_file_progress.py +0 -425
- experimaestro/tests/test_file_progress_integration.py +0 -477
- experimaestro/tests/test_state_db.py +0 -434
- experimaestro-2.0.0b8.dist-info/RECORD +0 -187
- /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
- /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
- /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
- /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
- /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
- /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
- /experimaestro/{server → webui}/data/favicon.ico +0 -0
- /experimaestro/{server → webui}/data/index.html +0 -0
- /experimaestro/{server → webui}/data/login.html +0 -0
- /experimaestro/{server → webui}/data/manifest.json +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,972 @@
|
|
|
1
|
+
"""Jobs-related widgets for the TUI"""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import time as time_module
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from textual import work
|
|
7
|
+
from textual.app import ComposeResult
|
|
8
|
+
from textual.containers import Vertical, VerticalScroll
|
|
9
|
+
from textual.widgets import DataTable, Label, Input, Static
|
|
10
|
+
from textual.widget import Widget
|
|
11
|
+
from textual.reactive import reactive
|
|
12
|
+
from textual.binding import Binding
|
|
13
|
+
from rich.text import Text
|
|
14
|
+
|
|
15
|
+
from experimaestro.scheduler.state_provider import StateProvider
|
|
16
|
+
from experimaestro.tui.utils import format_duration, get_status_icon
|
|
17
|
+
from experimaestro.tui.messages import (
|
|
18
|
+
JobSelected,
|
|
19
|
+
ViewJobLogs,
|
|
20
|
+
ViewJobLogsRequest,
|
|
21
|
+
DeleteJobRequest,
|
|
22
|
+
KillJobRequest,
|
|
23
|
+
FilterChanged,
|
|
24
|
+
SearchApplied,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SearchBar(Widget):
|
|
29
|
+
"""Search bar widget with filter hints for filtering jobs"""
|
|
30
|
+
|
|
31
|
+
visible: reactive[bool] = reactive(False)
|
|
32
|
+
_keep_filter: bool = False # Flag to keep filter when hiding
|
|
33
|
+
_query_valid: bool = False # Track if current query is valid
|
|
34
|
+
|
|
35
|
+
def __init__(self) -> None:
|
|
36
|
+
super().__init__()
|
|
37
|
+
self.filter_fn = None
|
|
38
|
+
self.active_query = "" # Store the active query text
|
|
39
|
+
|
|
40
|
+
def compose(self) -> ComposeResult:
|
|
41
|
+
# Active filter indicator (shown when filter active but bar hidden)
|
|
42
|
+
yield Static("", id="active-filter")
|
|
43
|
+
# Search input container
|
|
44
|
+
with Vertical(id="search-container"):
|
|
45
|
+
yield Input(
|
|
46
|
+
placeholder="Filter: @state = 'done', @name ~ 'pattern', tag = 'value'",
|
|
47
|
+
id="search-input",
|
|
48
|
+
)
|
|
49
|
+
yield Static(
|
|
50
|
+
"Syntax: @state = 'done' | @name ~ 'regex' | tag = 'value' | and/or",
|
|
51
|
+
id="search-hints",
|
|
52
|
+
)
|
|
53
|
+
yield Static("", id="search-error")
|
|
54
|
+
|
|
55
|
+
def on_mount(self) -> None:
|
|
56
|
+
"""Initialize visibility state"""
|
|
57
|
+
# Start with everything hidden
|
|
58
|
+
self.display = False
|
|
59
|
+
self.query_one("#search-container").display = False
|
|
60
|
+
self.query_one("#active-filter").display = False
|
|
61
|
+
self.query_one("#search-error").display = False
|
|
62
|
+
|
|
63
|
+
def watch_visible(self, visible: bool) -> None:
|
|
64
|
+
"""Show/hide search bar"""
|
|
65
|
+
search_container = self.query_one("#search-container")
|
|
66
|
+
active_filter = self.query_one("#active-filter")
|
|
67
|
+
error_widget = self.query_one("#search-error")
|
|
68
|
+
|
|
69
|
+
if visible:
|
|
70
|
+
self.display = True
|
|
71
|
+
search_container.display = True
|
|
72
|
+
active_filter.display = False
|
|
73
|
+
self.query_one("#search-input", Input).focus()
|
|
74
|
+
else:
|
|
75
|
+
if not self._keep_filter:
|
|
76
|
+
self.query_one("#search-input", Input).value = ""
|
|
77
|
+
self.filter_fn = None
|
|
78
|
+
self.active_query = ""
|
|
79
|
+
self._query_valid = False
|
|
80
|
+
self._keep_filter = False
|
|
81
|
+
|
|
82
|
+
# Show/hide based on whether filter is active
|
|
83
|
+
if self.filter_fn is not None:
|
|
84
|
+
# Filter active - show indicator, hide input
|
|
85
|
+
self.display = True
|
|
86
|
+
search_container.display = False
|
|
87
|
+
error_widget.display = False
|
|
88
|
+
active_filter.update(
|
|
89
|
+
f"Filter: {self.active_query} (/ to edit, c to clear)"
|
|
90
|
+
)
|
|
91
|
+
active_filter.display = True
|
|
92
|
+
else:
|
|
93
|
+
# No filter - hide everything including this widget
|
|
94
|
+
self.display = False
|
|
95
|
+
search_container.display = False
|
|
96
|
+
active_filter.display = False
|
|
97
|
+
error_widget.display = False
|
|
98
|
+
|
|
99
|
+
def on_input_changed(self, event: Input.Changed) -> None:
|
|
100
|
+
"""Parse filter expression when input changes"""
|
|
101
|
+
query = event.value.strip()
|
|
102
|
+
input_widget = self.query_one("#search-input", Input)
|
|
103
|
+
error_widget = self.query_one("#search-error", Static)
|
|
104
|
+
|
|
105
|
+
if not query:
|
|
106
|
+
self.filter_fn = None
|
|
107
|
+
self._query_valid = False
|
|
108
|
+
self.post_message(FilterChanged(None))
|
|
109
|
+
input_widget.remove_class("error")
|
|
110
|
+
input_widget.remove_class("valid")
|
|
111
|
+
error_widget.display = False
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
from experimaestro.cli.filter import createFilter
|
|
116
|
+
|
|
117
|
+
self.filter_fn = createFilter(query)
|
|
118
|
+
self._query_valid = True
|
|
119
|
+
self.active_query = query
|
|
120
|
+
self.post_message(FilterChanged(self.filter_fn))
|
|
121
|
+
input_widget.remove_class("error")
|
|
122
|
+
input_widget.add_class("valid")
|
|
123
|
+
error_widget.display = False
|
|
124
|
+
except Exception as e:
|
|
125
|
+
self.filter_fn = None
|
|
126
|
+
self._query_valid = False
|
|
127
|
+
self.post_message(FilterChanged(None))
|
|
128
|
+
input_widget.remove_class("valid")
|
|
129
|
+
input_widget.add_class("error")
|
|
130
|
+
error_widget.update(f"Invalid query: {str(e)[:50]}")
|
|
131
|
+
error_widget.display = True
|
|
132
|
+
|
|
133
|
+
def on_input_submitted(self, event: Input.Submitted) -> None:
|
|
134
|
+
"""Apply filter and hide search bar (only if query is valid)"""
|
|
135
|
+
if self._query_valid and self.filter_fn is not None:
|
|
136
|
+
# Set flag to keep filter when hiding
|
|
137
|
+
self._keep_filter = True
|
|
138
|
+
self.visible = False
|
|
139
|
+
# Post message to focus jobs table
|
|
140
|
+
self.post_message(SearchApplied())
|
|
141
|
+
# If invalid, do nothing (keep input focused for correction)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class JobDetailView(Widget):
|
|
145
|
+
"""Widget displaying detailed job information"""
|
|
146
|
+
|
|
147
|
+
BINDINGS = [
|
|
148
|
+
Binding("l", "view_logs", "View Logs", priority=True),
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
def __init__(self, state_provider: StateProvider) -> None:
|
|
152
|
+
super().__init__()
|
|
153
|
+
self.state_provider = state_provider
|
|
154
|
+
self.current_job_id: Optional[str] = None
|
|
155
|
+
self.current_experiment_id: Optional[str] = None
|
|
156
|
+
self.job_data: Optional[dict] = None
|
|
157
|
+
self.tags_map: dict[str, dict[str, str]] = {} # job_id -> {tag_key: tag_value}
|
|
158
|
+
self.dependencies_map: dict[
|
|
159
|
+
str, list[str]
|
|
160
|
+
] = {} # job_id -> [depends_on_job_ids]
|
|
161
|
+
|
|
162
|
+
def compose(self) -> ComposeResult:
|
|
163
|
+
yield Label("Job Details", classes="section-title")
|
|
164
|
+
yield Static(
|
|
165
|
+
"Loading job details...", id="job-detail-loading", classes="hidden"
|
|
166
|
+
)
|
|
167
|
+
with VerticalScroll(id="job-detail-content"):
|
|
168
|
+
yield Label("", id="job-id-label")
|
|
169
|
+
yield Label("", id="job-task-label")
|
|
170
|
+
yield Label("", id="job-status-label")
|
|
171
|
+
yield Label("", id="job-path-label")
|
|
172
|
+
yield Label("", id="job-times-label")
|
|
173
|
+
yield Label("Process:", classes="subsection-title")
|
|
174
|
+
yield Label("", id="job-process-label")
|
|
175
|
+
yield Label("Tags:", classes="subsection-title")
|
|
176
|
+
yield Label("", id="job-tags-label")
|
|
177
|
+
yield Label("Dependencies:", classes="subsection-title")
|
|
178
|
+
yield Label("", id="job-dependencies-label")
|
|
179
|
+
yield Label("Progress:", classes="subsection-title")
|
|
180
|
+
yield Label("", id="job-progress-label")
|
|
181
|
+
yield Label("", id="job-logs-hint")
|
|
182
|
+
|
|
183
|
+
def action_view_logs(self) -> None:
|
|
184
|
+
"""View job logs with toolong"""
|
|
185
|
+
if self.job_data and self.job_data.path and self.job_data.task_id:
|
|
186
|
+
self.post_message(
|
|
187
|
+
ViewJobLogs(
|
|
188
|
+
str(self.job_data.path), self.job_data.task_id, self.job_data.state
|
|
189
|
+
)
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
def _get_process_info(self, job) -> str:
|
|
193
|
+
"""Get process information for a job using the state provider"""
|
|
194
|
+
pinfo = self.state_provider.get_process_info(job)
|
|
195
|
+
|
|
196
|
+
if pinfo is None:
|
|
197
|
+
if job.state and job.state.finished():
|
|
198
|
+
return "(process completed)"
|
|
199
|
+
return "(no process info)"
|
|
200
|
+
|
|
201
|
+
# Build process info string
|
|
202
|
+
parts = [f"PID: [bold]{pinfo.pid}[/bold]", f"Type: {pinfo.type}"]
|
|
203
|
+
|
|
204
|
+
if pinfo.running:
|
|
205
|
+
if pinfo.cpu_percent is not None:
|
|
206
|
+
parts.append(f"CPU: {pinfo.cpu_percent:.1f}%")
|
|
207
|
+
if pinfo.memory_mb is not None:
|
|
208
|
+
parts.append(f"Mem: {pinfo.memory_mb:.1f}MB")
|
|
209
|
+
if pinfo.num_threads is not None:
|
|
210
|
+
parts.append(f"Threads: {pinfo.num_threads}")
|
|
211
|
+
elif job.state and job.state.running():
|
|
212
|
+
parts.append("[dim](process not found)[/dim]")
|
|
213
|
+
|
|
214
|
+
return " | ".join(parts)
|
|
215
|
+
|
|
216
|
+
def set_job(self, job_id: str, experiment_id: str) -> None:
|
|
217
|
+
"""Set the job to display"""
|
|
218
|
+
self.current_job_id = job_id
|
|
219
|
+
self.current_experiment_id = experiment_id
|
|
220
|
+
|
|
221
|
+
# Show loading for remote
|
|
222
|
+
if self.state_provider.is_remote:
|
|
223
|
+
self.query_one("#job-detail-loading", Static).remove_class("hidden")
|
|
224
|
+
|
|
225
|
+
# Load in background
|
|
226
|
+
self._load_job_detail(job_id, experiment_id)
|
|
227
|
+
|
|
228
|
+
@work(thread=True, exclusive=True, group="job_detail_load")
|
|
229
|
+
def _load_job_detail(self, job_id: str, experiment_id: str) -> None:
|
|
230
|
+
"""Load job details in background thread"""
|
|
231
|
+
# Load tags and dependencies if needed
|
|
232
|
+
tags_map = self.state_provider.get_tags_map(experiment_id)
|
|
233
|
+
deps_map = self.state_provider.get_dependencies_map(experiment_id)
|
|
234
|
+
job = self.state_provider.get_job(job_id, experiment_id)
|
|
235
|
+
|
|
236
|
+
self.app.call_from_thread(self._on_job_loaded, job, tags_map, deps_map)
|
|
237
|
+
|
|
238
|
+
def _on_job_loaded(self, job, tags_map: dict, deps_map: dict) -> None:
|
|
239
|
+
"""Handle loaded job on main thread"""
|
|
240
|
+
self.query_one("#job-detail-loading", Static).add_class("hidden")
|
|
241
|
+
self.tags_map = tags_map
|
|
242
|
+
self.dependencies_map = deps_map
|
|
243
|
+
|
|
244
|
+
if not job:
|
|
245
|
+
self.log(f"Job not found: {self.current_job_id}")
|
|
246
|
+
return
|
|
247
|
+
|
|
248
|
+
self._update_job_display(job)
|
|
249
|
+
|
|
250
|
+
def refresh_job_detail(self) -> None:
|
|
251
|
+
"""Refresh job details from state provider"""
|
|
252
|
+
if not self.current_job_id or not self.current_experiment_id:
|
|
253
|
+
return
|
|
254
|
+
|
|
255
|
+
if self.state_provider.is_remote:
|
|
256
|
+
self._load_job_detail(self.current_job_id, self.current_experiment_id)
|
|
257
|
+
else:
|
|
258
|
+
job = self.state_provider.get_job(
|
|
259
|
+
self.current_job_id, self.current_experiment_id
|
|
260
|
+
)
|
|
261
|
+
if job:
|
|
262
|
+
self._update_job_display(job)
|
|
263
|
+
|
|
264
|
+
def _update_job_display(self, job) -> None:
|
|
265
|
+
"""Update the display with job data"""
|
|
266
|
+
self.job_data = job
|
|
267
|
+
|
|
268
|
+
# Update labels
|
|
269
|
+
self.query_one("#job-id-label", Label).update(f"Job ID: {job.identifier}")
|
|
270
|
+
self.query_one("#job-task-label", Label).update(f"Task: {job.task_id}")
|
|
271
|
+
|
|
272
|
+
# Format status with icon and name
|
|
273
|
+
status_name = job.state.name if job.state else "unknown"
|
|
274
|
+
failure_reason = getattr(job, "failure_reason", None)
|
|
275
|
+
transient = getattr(job, "transient", None)
|
|
276
|
+
status_icon = get_status_icon(status_name, failure_reason, transient)
|
|
277
|
+
status_text = f"{status_icon} {status_name}"
|
|
278
|
+
if failure_reason:
|
|
279
|
+
status_text += f" ({failure_reason.name})"
|
|
280
|
+
|
|
281
|
+
self.query_one("#job-status-label", Label).update(f"Status: {status_text}")
|
|
282
|
+
|
|
283
|
+
# Path (from locator)
|
|
284
|
+
locator = job.locator or "-"
|
|
285
|
+
self.query_one("#job-path-label", Label).update(f"Locator: {locator}")
|
|
286
|
+
|
|
287
|
+
# Times - format timestamps
|
|
288
|
+
def format_time(ts):
|
|
289
|
+
if ts:
|
|
290
|
+
return datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
|
|
291
|
+
return "-"
|
|
292
|
+
|
|
293
|
+
submitted = format_time(job.submittime)
|
|
294
|
+
start = format_time(job.starttime)
|
|
295
|
+
end = format_time(job.endtime)
|
|
296
|
+
|
|
297
|
+
# Calculate duration
|
|
298
|
+
duration = "-"
|
|
299
|
+
if job.starttime:
|
|
300
|
+
if job.endtime:
|
|
301
|
+
duration = format_duration(job.endtime - job.starttime)
|
|
302
|
+
else:
|
|
303
|
+
duration = (
|
|
304
|
+
format_duration(time_module.time() - job.starttime) + " (running)"
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
times_text = f"Submitted: {submitted} | Start: {start} | End: {end} | Duration: {duration}"
|
|
308
|
+
self.query_one("#job-times-label", Label).update(times_text)
|
|
309
|
+
|
|
310
|
+
# Process information
|
|
311
|
+
process_text = self._get_process_info(job)
|
|
312
|
+
self.query_one("#job-process-label", Label).update(process_text)
|
|
313
|
+
|
|
314
|
+
# Tags are stored in JobTagModel, accessed via tags_map
|
|
315
|
+
tags = self.tags_map.get(job.identifier, {})
|
|
316
|
+
if tags:
|
|
317
|
+
tags_text = ", ".join(f"{k}={v}" for k, v in tags.items())
|
|
318
|
+
else:
|
|
319
|
+
tags_text = "(no tags)"
|
|
320
|
+
self.query_one("#job-tags-label", Label).update(tags_text)
|
|
321
|
+
|
|
322
|
+
# Dependencies are stored in JobDependenciesModel, accessed via dependencies_map
|
|
323
|
+
depends_on = self.dependencies_map.get(job.identifier, [])
|
|
324
|
+
if depends_on:
|
|
325
|
+
# Try to get task IDs for the dependency jobs
|
|
326
|
+
dep_texts = []
|
|
327
|
+
for dep_job_id in depends_on:
|
|
328
|
+
dep_job = self.state_provider.get_job(
|
|
329
|
+
dep_job_id, self.current_experiment_id
|
|
330
|
+
)
|
|
331
|
+
if dep_job:
|
|
332
|
+
dep_task_name = dep_job.task_id.split(".")[-1]
|
|
333
|
+
dep_texts.append(f"{dep_task_name} ({dep_job_id[:8]}...)")
|
|
334
|
+
else:
|
|
335
|
+
dep_texts.append(f"{dep_job_id[:8]}...")
|
|
336
|
+
dependencies_text = ", ".join(dep_texts)
|
|
337
|
+
else:
|
|
338
|
+
dependencies_text = "(no dependencies)"
|
|
339
|
+
self.query_one("#job-dependencies-label", Label).update(dependencies_text)
|
|
340
|
+
|
|
341
|
+
# Progress
|
|
342
|
+
progress_list = job.progress or []
|
|
343
|
+
if progress_list:
|
|
344
|
+
progress_lines = []
|
|
345
|
+
for p in progress_list:
|
|
346
|
+
level = p.level
|
|
347
|
+
pct = p.progress * 100
|
|
348
|
+
desc = p.desc or ""
|
|
349
|
+
indent = " " * level
|
|
350
|
+
|
|
351
|
+
# Create visual progress bar (20 chars wide)
|
|
352
|
+
bar_width = 20
|
|
353
|
+
filled = int(p.progress * bar_width)
|
|
354
|
+
remaining = bar_width - filled
|
|
355
|
+
|
|
356
|
+
# Use Unicode block characters with colors
|
|
357
|
+
filled_bar = "█" * filled
|
|
358
|
+
remaining_bar = "░" * remaining
|
|
359
|
+
|
|
360
|
+
# Color based on progress level
|
|
361
|
+
if pct >= 100:
|
|
362
|
+
bar_color = "green"
|
|
363
|
+
elif pct >= 50:
|
|
364
|
+
bar_color = "cyan"
|
|
365
|
+
else:
|
|
366
|
+
bar_color = "yellow"
|
|
367
|
+
|
|
368
|
+
# Format: [bar] percentage description
|
|
369
|
+
bar_text = f"[{bar_color}]{filled_bar}[/][dim]{remaining_bar}[/]"
|
|
370
|
+
pct_text = f"[bold]{pct:5.1f}%[/bold]"
|
|
371
|
+
desc_text = f" [italic]{desc}[/]" if desc else ""
|
|
372
|
+
|
|
373
|
+
progress_lines.append(f"{indent}{bar_text} {pct_text}{desc_text}")
|
|
374
|
+
progress_text = "\n".join(progress_lines) if progress_lines else "-"
|
|
375
|
+
else:
|
|
376
|
+
progress_text = "-"
|
|
377
|
+
self.query_one("#job-progress-label", Label).update(progress_text)
|
|
378
|
+
|
|
379
|
+
# Log files hint - log files are named after the last part of the task ID
|
|
380
|
+
job_path = job.path
|
|
381
|
+
task_id = job.task_id
|
|
382
|
+
if job_path and task_id:
|
|
383
|
+
# Extract the last component of the task ID (e.g., "evaluate" from "mnist_xp.learn.evaluate")
|
|
384
|
+
task_name = task_id.split(".")[-1]
|
|
385
|
+
stdout_path = job_path / f"{task_name}.out"
|
|
386
|
+
stderr_path = job_path / f"{task_name}.err"
|
|
387
|
+
logs_exist = stdout_path.exists() or stderr_path.exists()
|
|
388
|
+
if logs_exist:
|
|
389
|
+
self.query_one("#job-logs-hint", Label).update(
|
|
390
|
+
"[bold cyan]Press 'l' to view logs[/bold cyan]"
|
|
391
|
+
)
|
|
392
|
+
else:
|
|
393
|
+
self.query_one("#job-logs-hint", Label).update("(no log files found)")
|
|
394
|
+
else:
|
|
395
|
+
self.query_one("#job-logs-hint", Label).update("")
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
class JobsTable(Vertical):
|
|
399
|
+
"""Widget displaying jobs for selected experiment"""
|
|
400
|
+
|
|
401
|
+
BINDINGS = [
|
|
402
|
+
Binding("ctrl+d", "delete_job", "Delete", show=False),
|
|
403
|
+
Binding("k", "kill_job", "Kill", show=False),
|
|
404
|
+
Binding("l", "view_logs", "Logs", key_display="l"),
|
|
405
|
+
Binding("f", "copy_path", "Copy Path", show=False),
|
|
406
|
+
Binding("/", "toggle_search", "Search"),
|
|
407
|
+
Binding("c", "clear_filter", "Clear", show=False),
|
|
408
|
+
Binding("r", "refresh_live", "Refresh"),
|
|
409
|
+
Binding("S", "sort_by_status", "Sort ⚑", show=False),
|
|
410
|
+
Binding("T", "sort_by_task", "Sort Task", show=False),
|
|
411
|
+
Binding("D", "sort_by_submitted", "Sort Date", show=False),
|
|
412
|
+
Binding("escape", "clear_search", show=False, priority=True),
|
|
413
|
+
]
|
|
414
|
+
|
|
415
|
+
# Track current sort state
|
|
416
|
+
_sort_column: Optional[str] = None
|
|
417
|
+
_sort_reverse: bool = False
|
|
418
|
+
_needs_rebuild: bool = True # Start with rebuild needed
|
|
419
|
+
|
|
420
|
+
def __init__(self, state_provider: StateProvider) -> None:
|
|
421
|
+
super().__init__()
|
|
422
|
+
self.state_provider = state_provider
|
|
423
|
+
self.filter_fn = None
|
|
424
|
+
self.current_experiment: Optional[str] = None
|
|
425
|
+
self.current_run_id: Optional[str] = None
|
|
426
|
+
self.is_past_run: bool = False
|
|
427
|
+
self.tags_map: dict[str, dict[str, str]] = {} # job_id -> {tag_key: tag_value}
|
|
428
|
+
self.dependencies_map: dict[
|
|
429
|
+
str, list[str]
|
|
430
|
+
] = {} # job_id -> [depends_on_job_ids]
|
|
431
|
+
|
|
432
|
+
def compose(self) -> ComposeResult:
|
|
433
|
+
yield Static("", id="past-run-banner", classes="hidden")
|
|
434
|
+
yield Static("Loading jobs...", id="jobs-loading", classes="hidden")
|
|
435
|
+
yield SearchBar()
|
|
436
|
+
yield DataTable(id="jobs-table", cursor_type="row")
|
|
437
|
+
|
|
438
|
+
def action_toggle_search(self) -> None:
|
|
439
|
+
"""Toggle search bar visibility"""
|
|
440
|
+
search_bar = self.query_one(SearchBar)
|
|
441
|
+
search_bar.visible = not search_bar.visible
|
|
442
|
+
|
|
443
|
+
def action_clear_filter(self) -> None:
|
|
444
|
+
"""Clear the active filter"""
|
|
445
|
+
if self.filter_fn is not None:
|
|
446
|
+
search_bar = self.query_one(SearchBar)
|
|
447
|
+
search_bar.query_one("#search-input", Input).value = ""
|
|
448
|
+
search_bar.filter_fn = None
|
|
449
|
+
search_bar.active_query = ""
|
|
450
|
+
search_bar._query_valid = False
|
|
451
|
+
# Hide the SearchBar completely
|
|
452
|
+
search_bar.display = False
|
|
453
|
+
search_bar.query_one("#search-container").display = False
|
|
454
|
+
search_bar.query_one("#active-filter").display = False
|
|
455
|
+
search_bar.query_one("#search-error").display = False
|
|
456
|
+
self.filter_fn = None
|
|
457
|
+
self.refresh_jobs()
|
|
458
|
+
self.notify("Filter cleared", severity="information")
|
|
459
|
+
|
|
460
|
+
def action_sort_by_status(self) -> None:
|
|
461
|
+
"""Sort jobs by status"""
|
|
462
|
+
if self._sort_column == "status":
|
|
463
|
+
self._sort_reverse = not self._sort_reverse
|
|
464
|
+
else:
|
|
465
|
+
self._sort_column = "status"
|
|
466
|
+
self._sort_reverse = False
|
|
467
|
+
self._needs_rebuild = True
|
|
468
|
+
self._update_column_headers()
|
|
469
|
+
self.refresh_jobs()
|
|
470
|
+
order = "desc" if self._sort_reverse else "asc"
|
|
471
|
+
self.notify(f"Sorted by status ({order})", severity="information")
|
|
472
|
+
|
|
473
|
+
def action_sort_by_task(self) -> None:
|
|
474
|
+
"""Sort jobs by task"""
|
|
475
|
+
if self._sort_column == "task":
|
|
476
|
+
self._sort_reverse = not self._sort_reverse
|
|
477
|
+
else:
|
|
478
|
+
self._sort_column = "task"
|
|
479
|
+
self._sort_reverse = False
|
|
480
|
+
self._needs_rebuild = True
|
|
481
|
+
self._update_column_headers()
|
|
482
|
+
self.refresh_jobs()
|
|
483
|
+
order = "desc" if self._sort_reverse else "asc"
|
|
484
|
+
self.notify(f"Sorted by task ({order})", severity="information")
|
|
485
|
+
|
|
486
|
+
def action_sort_by_submitted(self) -> None:
|
|
487
|
+
"""Sort jobs by submission time"""
|
|
488
|
+
if self._sort_column == "submitted":
|
|
489
|
+
self._sort_reverse = not self._sort_reverse
|
|
490
|
+
else:
|
|
491
|
+
self._sort_column = "submitted"
|
|
492
|
+
self._sort_reverse = False
|
|
493
|
+
self._needs_rebuild = True
|
|
494
|
+
self._update_column_headers()
|
|
495
|
+
self.refresh_jobs()
|
|
496
|
+
order = "newest first" if self._sort_reverse else "oldest first"
|
|
497
|
+
self.notify(f"Sorted by date ({order})", severity="information")
|
|
498
|
+
|
|
499
|
+
def action_clear_search(self) -> None:
|
|
500
|
+
"""Handle escape: hide search bar if visible, or go back"""
|
|
501
|
+
search_bar = self.query_one(SearchBar)
|
|
502
|
+
if search_bar.visible:
|
|
503
|
+
# Search bar visible - hide it and clear filter
|
|
504
|
+
search_bar.visible = False
|
|
505
|
+
self.filter_fn = None
|
|
506
|
+
self.refresh_jobs()
|
|
507
|
+
# Focus the jobs table
|
|
508
|
+
self.query_one("#jobs-table", DataTable).focus()
|
|
509
|
+
else:
|
|
510
|
+
# Search bar hidden - go back (keep filter)
|
|
511
|
+
self.app.action_go_back()
|
|
512
|
+
|
|
513
|
+
def action_refresh_live(self) -> None:
|
|
514
|
+
"""Refresh the jobs table"""
|
|
515
|
+
self.refresh_jobs()
|
|
516
|
+
self.notify("Jobs refreshed", severity="information")
|
|
517
|
+
|
|
518
|
+
def on_filter_changed(self, message: FilterChanged) -> None:
|
|
519
|
+
"""Apply new filter"""
|
|
520
|
+
self.filter_fn = message.filter_fn
|
|
521
|
+
self.refresh_jobs()
|
|
522
|
+
|
|
523
|
+
def on_search_applied(self, message: SearchApplied) -> None:
|
|
524
|
+
"""Focus jobs table when search is applied"""
|
|
525
|
+
self.query_one("#jobs-table", DataTable).focus()
|
|
526
|
+
|
|
527
|
+
def _get_selected_job_id(self) -> Optional[str]:
|
|
528
|
+
"""Get the job ID from the currently selected row"""
|
|
529
|
+
table = self.query_one("#jobs-table", DataTable)
|
|
530
|
+
if table.cursor_row is None:
|
|
531
|
+
return None
|
|
532
|
+
row_key = table.get_row_at(table.cursor_row)
|
|
533
|
+
if row_key:
|
|
534
|
+
# The first column is job_id
|
|
535
|
+
return str(table.get_row_at(table.cursor_row)[0])
|
|
536
|
+
return None
|
|
537
|
+
|
|
538
|
+
def action_delete_job(self) -> None:
|
|
539
|
+
"""Request to delete the selected job"""
|
|
540
|
+
table = self.query_one("#jobs-table", DataTable)
|
|
541
|
+
if table.cursor_row is None or not self.current_experiment:
|
|
542
|
+
return
|
|
543
|
+
|
|
544
|
+
# Get job ID from the row key
|
|
545
|
+
row_key = list(table.rows.keys())[table.cursor_row]
|
|
546
|
+
if row_key:
|
|
547
|
+
job_id = str(row_key.value)
|
|
548
|
+
self.post_message(DeleteJobRequest(job_id, self.current_experiment))
|
|
549
|
+
|
|
550
|
+
def action_kill_job(self) -> None:
|
|
551
|
+
"""Request to kill the selected job"""
|
|
552
|
+
table = self.query_one("#jobs-table", DataTable)
|
|
553
|
+
if table.cursor_row is None or not self.current_experiment:
|
|
554
|
+
return
|
|
555
|
+
|
|
556
|
+
row_key = list(table.rows.keys())[table.cursor_row]
|
|
557
|
+
if row_key:
|
|
558
|
+
job_id = str(row_key.value)
|
|
559
|
+
self.post_message(KillJobRequest(job_id, self.current_experiment))
|
|
560
|
+
|
|
561
|
+
def action_view_logs(self) -> None:
|
|
562
|
+
"""Request to view logs for the selected job"""
|
|
563
|
+
table = self.query_one("#jobs-table", DataTable)
|
|
564
|
+
if table.cursor_row is None or not self.current_experiment:
|
|
565
|
+
return
|
|
566
|
+
|
|
567
|
+
row_key = list(table.rows.keys())[table.cursor_row]
|
|
568
|
+
if row_key:
|
|
569
|
+
job_id = str(row_key.value)
|
|
570
|
+
self.post_message(ViewJobLogsRequest(job_id, self.current_experiment))
|
|
571
|
+
|
|
572
|
+
def action_copy_path(self) -> None:
|
|
573
|
+
"""Copy the job folder path to clipboard"""
|
|
574
|
+
import pyperclip
|
|
575
|
+
|
|
576
|
+
table = self.query_one("#jobs-table", DataTable)
|
|
577
|
+
if table.cursor_row is None or not self.current_experiment:
|
|
578
|
+
return
|
|
579
|
+
|
|
580
|
+
row_key = list(table.rows.keys())[table.cursor_row]
|
|
581
|
+
if row_key:
|
|
582
|
+
job_id = str(row_key.value)
|
|
583
|
+
job = self.state_provider.get_job(job_id, self.current_experiment)
|
|
584
|
+
if job.path:
|
|
585
|
+
try:
|
|
586
|
+
pyperclip.copy(str(job.path))
|
|
587
|
+
self.notify(f"Path copied: {job.path}", severity="information")
|
|
588
|
+
except Exception as e:
|
|
589
|
+
self.notify(f"Failed to copy: {e}", severity="error")
|
|
590
|
+
else:
|
|
591
|
+
self.notify("No path available for this job", severity="warning")
|
|
592
|
+
|
|
593
|
+
# Status sort order (for sorting by status)
|
|
594
|
+
STATUS_ORDER = {
|
|
595
|
+
"running": 0,
|
|
596
|
+
"waiting": 1,
|
|
597
|
+
"error": 2,
|
|
598
|
+
"done": 3,
|
|
599
|
+
"unscheduled": 4,
|
|
600
|
+
"phantom": 5,
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
# Failure reason sort order (within error status)
|
|
604
|
+
# More actionable failures first
|
|
605
|
+
FAILURE_ORDER = {
|
|
606
|
+
"TIMEOUT": 0, # Might just need retry
|
|
607
|
+
"MEMORY": 1, # Might need resource adjustment
|
|
608
|
+
"DEPENDENCY": 2, # Need to fix upstream job first
|
|
609
|
+
"FAILED": 3, # Generic failure
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
@classmethod
|
|
613
|
+
def _get_status_sort_key(cls, job):
|
|
614
|
+
"""Get sort key for a job based on status and failure reason.
|
|
615
|
+
|
|
616
|
+
Returns tuple (status_order, failure_order) for proper sorting.
|
|
617
|
+
"""
|
|
618
|
+
state_name = job.state.name if job.state else "unknown"
|
|
619
|
+
status_order = cls.STATUS_ORDER.get(state_name, 99)
|
|
620
|
+
|
|
621
|
+
# For error jobs, also sort by failure reason
|
|
622
|
+
if state_name == "error":
|
|
623
|
+
failure_reason = getattr(job, "failure_reason", None)
|
|
624
|
+
if failure_reason:
|
|
625
|
+
failure_order = cls.FAILURE_ORDER.get(failure_reason.name, 99)
|
|
626
|
+
else:
|
|
627
|
+
failure_order = 99 # Unknown failure at end
|
|
628
|
+
else:
|
|
629
|
+
failure_order = 0
|
|
630
|
+
|
|
631
|
+
return (status_order, failure_order)
|
|
632
|
+
|
|
633
|
+
# Column key to display name mapping
|
|
634
|
+
COLUMN_LABELS = {
|
|
635
|
+
"job_id": "ID",
|
|
636
|
+
"task": "Task",
|
|
637
|
+
"status": "⚑",
|
|
638
|
+
"tags": "Tags",
|
|
639
|
+
"submitted": "Submitted",
|
|
640
|
+
"duration": "Duration",
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
# Columns that support sorting (column key -> sort column name)
|
|
644
|
+
SORTABLE_COLUMNS = {
|
|
645
|
+
"status": "status",
|
|
646
|
+
"task": "task",
|
|
647
|
+
"submitted": "submitted",
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
def on_mount(self) -> None:
|
|
651
|
+
"""Initialize the jobs table"""
|
|
652
|
+
table = self.query_one("#jobs-table", DataTable)
|
|
653
|
+
table.add_column("ID", key="job_id")
|
|
654
|
+
table.add_column("Task", key="task")
|
|
655
|
+
table.add_column("⚑", key="status", width=6)
|
|
656
|
+
table.add_column("Tags", key="tags")
|
|
657
|
+
table.add_column("Submitted", key="submitted")
|
|
658
|
+
table.add_column("Duration", key="duration")
|
|
659
|
+
table.cursor_type = "row"
|
|
660
|
+
table.zebra_stripes = True
|
|
661
|
+
|
|
662
|
+
def _update_column_headers(self) -> None:
|
|
663
|
+
"""Update column headers with sort indicators"""
|
|
664
|
+
table = self.query_one("#jobs-table", DataTable)
|
|
665
|
+
for column in table.columns.values():
|
|
666
|
+
col_key = str(column.key.value) if column.key else None
|
|
667
|
+
if col_key and col_key in self.COLUMN_LABELS:
|
|
668
|
+
label = self.COLUMN_LABELS[col_key]
|
|
669
|
+
sort_col = self.SORTABLE_COLUMNS.get(col_key)
|
|
670
|
+
if sort_col and self._sort_column == sort_col:
|
|
671
|
+
# Add sort indicator
|
|
672
|
+
indicator = "▼" if self._sort_reverse else "▲"
|
|
673
|
+
new_label = f"{label} {indicator}"
|
|
674
|
+
else:
|
|
675
|
+
new_label = label
|
|
676
|
+
column.label = new_label
|
|
677
|
+
|
|
678
|
+
def on_data_table_header_selected(self, event: DataTable.HeaderSelected) -> None:
|
|
679
|
+
"""Handle column header click for sorting"""
|
|
680
|
+
col_key = str(event.column_key.value) if event.column_key else None
|
|
681
|
+
if col_key and col_key in self.SORTABLE_COLUMNS:
|
|
682
|
+
sort_col = self.SORTABLE_COLUMNS[col_key]
|
|
683
|
+
if self._sort_column == sort_col:
|
|
684
|
+
self._sort_reverse = not self._sort_reverse
|
|
685
|
+
else:
|
|
686
|
+
self._sort_column = sort_col
|
|
687
|
+
self._sort_reverse = False
|
|
688
|
+
self._needs_rebuild = True
|
|
689
|
+
self._update_column_headers()
|
|
690
|
+
self.refresh_jobs()
|
|
691
|
+
|
|
692
|
+
def set_experiment(
|
|
693
|
+
self,
|
|
694
|
+
experiment_id: Optional[str],
|
|
695
|
+
run_id: Optional[str] = None,
|
|
696
|
+
is_past_run: bool = False,
|
|
697
|
+
) -> None:
|
|
698
|
+
"""Set the current experiment and refresh jobs
|
|
699
|
+
|
|
700
|
+
Args:
|
|
701
|
+
experiment_id: The experiment ID to show jobs for
|
|
702
|
+
run_id: The specific run ID (optional)
|
|
703
|
+
is_past_run: Whether this is a past (non-current) run
|
|
704
|
+
"""
|
|
705
|
+
self.current_experiment = experiment_id
|
|
706
|
+
self.current_run_id = run_id
|
|
707
|
+
self.is_past_run = is_past_run
|
|
708
|
+
|
|
709
|
+
# Update the past run banner
|
|
710
|
+
banner = self.query_one("#past-run-banner", Static)
|
|
711
|
+
if is_past_run and run_id:
|
|
712
|
+
banner.update(f"[bold yellow]Viewing past run: {run_id}[/bold yellow]")
|
|
713
|
+
banner.remove_class("hidden")
|
|
714
|
+
else:
|
|
715
|
+
banner.add_class("hidden")
|
|
716
|
+
|
|
717
|
+
# Clear table and show loading for remote providers
|
|
718
|
+
if self.state_provider.is_remote:
|
|
719
|
+
table = self.query_one("#jobs-table", DataTable)
|
|
720
|
+
table.clear()
|
|
721
|
+
self.query_one("#jobs-loading", Static).remove_class("hidden")
|
|
722
|
+
|
|
723
|
+
# Load data in background worker
|
|
724
|
+
self._load_experiment_data(experiment_id, run_id)
|
|
725
|
+
|
|
726
|
+
@work(thread=True, exclusive=True, group="jobs_load")
|
|
727
|
+
def _load_experiment_data(
|
|
728
|
+
self, experiment_id: Optional[str], run_id: Optional[str]
|
|
729
|
+
) -> None:
|
|
730
|
+
"""Load experiment data in background thread"""
|
|
731
|
+
if not experiment_id:
|
|
732
|
+
self.tags_map = {}
|
|
733
|
+
self.dependencies_map = {}
|
|
734
|
+
self.app.call_from_thread(self._on_data_loaded, [])
|
|
735
|
+
return
|
|
736
|
+
|
|
737
|
+
# Fetch data (this is the slow part for remote)
|
|
738
|
+
tags_map = self.state_provider.get_tags_map(experiment_id, run_id)
|
|
739
|
+
dependencies_map = self.state_provider.get_dependencies_map(
|
|
740
|
+
experiment_id, run_id
|
|
741
|
+
)
|
|
742
|
+
jobs = self.state_provider.get_jobs(experiment_id, run_id=run_id)
|
|
743
|
+
|
|
744
|
+
# Update on main thread
|
|
745
|
+
self.app.call_from_thread(
|
|
746
|
+
self._on_data_loaded, jobs, tags_map, dependencies_map
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
def _on_data_loaded(
|
|
750
|
+
self,
|
|
751
|
+
jobs: list,
|
|
752
|
+
tags_map: dict = None,
|
|
753
|
+
dependencies_map: dict = None,
|
|
754
|
+
) -> None:
|
|
755
|
+
"""Handle loaded data on main thread"""
|
|
756
|
+
# Hide loading indicator
|
|
757
|
+
self.query_one("#jobs-loading", Static).add_class("hidden")
|
|
758
|
+
|
|
759
|
+
# Update maps
|
|
760
|
+
if tags_map is not None:
|
|
761
|
+
self.tags_map = tags_map
|
|
762
|
+
if dependencies_map is not None:
|
|
763
|
+
self.dependencies_map = dependencies_map
|
|
764
|
+
|
|
765
|
+
# Refresh display with loaded jobs
|
|
766
|
+
self._refresh_jobs_with_data(jobs)
|
|
767
|
+
|
|
768
|
+
def refresh_jobs(self) -> None:
|
|
769
|
+
"""Refresh the jobs list from state provider
|
|
770
|
+
|
|
771
|
+
For remote providers, this runs in background. For local, it's synchronous.
|
|
772
|
+
"""
|
|
773
|
+
if not self.current_experiment:
|
|
774
|
+
return
|
|
775
|
+
|
|
776
|
+
if self.state_provider.is_remote:
|
|
777
|
+
# Use background worker for remote
|
|
778
|
+
self._load_experiment_data(self.current_experiment, self.current_run_id)
|
|
779
|
+
else:
|
|
780
|
+
# Synchronous for local (fast)
|
|
781
|
+
jobs = self.state_provider.get_jobs(
|
|
782
|
+
self.current_experiment, run_id=self.current_run_id
|
|
783
|
+
)
|
|
784
|
+
self._refresh_jobs_with_data(jobs)
|
|
785
|
+
|
|
786
|
+
def _refresh_jobs_with_data(self, jobs: list) -> None: # noqa: C901
|
|
787
|
+
"""Refresh the jobs display with provided job data"""
|
|
788
|
+
table = self.query_one("#jobs-table", DataTable)
|
|
789
|
+
|
|
790
|
+
self.log.debug(
|
|
791
|
+
f"Refreshing jobs for {self.current_experiment}/{self.current_run_id}: {len(jobs)} jobs"
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
# Apply filter if set
|
|
795
|
+
if self.filter_fn:
|
|
796
|
+
jobs = [j for j in jobs if self.filter_fn(j)]
|
|
797
|
+
self.log.debug(f"After filter: {len(jobs)} jobs")
|
|
798
|
+
|
|
799
|
+
# Sort jobs based on selected column
|
|
800
|
+
if self._sort_column == "status":
|
|
801
|
+
# Sort by status priority, then by failure reason for errors
|
|
802
|
+
jobs.sort(
|
|
803
|
+
key=self._get_status_sort_key,
|
|
804
|
+
reverse=self._sort_reverse,
|
|
805
|
+
)
|
|
806
|
+
elif self._sort_column == "task":
|
|
807
|
+
# Sort by task name
|
|
808
|
+
jobs.sort(
|
|
809
|
+
key=lambda j: j.task_id or "",
|
|
810
|
+
reverse=self._sort_reverse,
|
|
811
|
+
)
|
|
812
|
+
else:
|
|
813
|
+
# Default: sort by submission time (oldest first by default)
|
|
814
|
+
# Jobs without submittime go to the end
|
|
815
|
+
jobs.sort(
|
|
816
|
+
key=lambda j: j.submittime or float("inf"),
|
|
817
|
+
reverse=self._sort_reverse,
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
# Check if we need to rebuild (new/removed jobs, or status changed when sorting by status)
|
|
821
|
+
existing_keys = {str(k.value) for k in table.rows.keys()}
|
|
822
|
+
current_job_ids = {job.identifier for job in jobs}
|
|
823
|
+
|
|
824
|
+
# Check if job set changed
|
|
825
|
+
jobs_changed = existing_keys != current_job_ids
|
|
826
|
+
|
|
827
|
+
# Check if status changed when sorting by status
|
|
828
|
+
status_changed = False
|
|
829
|
+
if self._sort_column == "status" and not jobs_changed:
|
|
830
|
+
current_statuses = {
|
|
831
|
+
job.identifier: (job.state.name if job.state else "unknown")
|
|
832
|
+
for job in jobs
|
|
833
|
+
}
|
|
834
|
+
if (
|
|
835
|
+
hasattr(self, "_last_statuses")
|
|
836
|
+
and self._last_statuses != current_statuses
|
|
837
|
+
):
|
|
838
|
+
status_changed = True
|
|
839
|
+
self._last_statuses = current_statuses
|
|
840
|
+
|
|
841
|
+
needs_rebuild = self._needs_rebuild or jobs_changed or status_changed
|
|
842
|
+
self._needs_rebuild = False
|
|
843
|
+
|
|
844
|
+
# Build row data for all jobs
|
|
845
|
+
rows_data = {}
|
|
846
|
+
for job in jobs:
|
|
847
|
+
job_id = job.identifier
|
|
848
|
+
task_id = job.task_id
|
|
849
|
+
status = job.state.name if job.state else "unknown"
|
|
850
|
+
|
|
851
|
+
# Format status with icon (and progress % if running)
|
|
852
|
+
if status == "running":
|
|
853
|
+
progress_list = job.progress or []
|
|
854
|
+
if progress_list:
|
|
855
|
+
# We only report main progress here (level 0)
|
|
856
|
+
last_progress = progress_list[0]
|
|
857
|
+
progress_pct = last_progress.progress * 100
|
|
858
|
+
status_text = f"▶ {progress_pct:.0f}%"
|
|
859
|
+
else:
|
|
860
|
+
status_text = "▶"
|
|
861
|
+
else:
|
|
862
|
+
failure_reason = getattr(job, "failure_reason", None)
|
|
863
|
+
transient = getattr(job, "transient", None)
|
|
864
|
+
status_text = get_status_icon(status, failure_reason, transient)
|
|
865
|
+
|
|
866
|
+
# Tags are stored in JobTagModel, accessed via tags_map
|
|
867
|
+
job_tags = self.tags_map.get(job.identifier, {})
|
|
868
|
+
if job_tags:
|
|
869
|
+
tags_text = Text()
|
|
870
|
+
for i, (k, v) in enumerate(job_tags.items()):
|
|
871
|
+
if i > 0:
|
|
872
|
+
tags_text.append(", ")
|
|
873
|
+
tags_text.append(f"{k}", style="bold")
|
|
874
|
+
tags_text.append(f"={v}")
|
|
875
|
+
else:
|
|
876
|
+
tags_text = Text("-")
|
|
877
|
+
|
|
878
|
+
submitted = "-"
|
|
879
|
+
if job.submittime:
|
|
880
|
+
submitted = datetime.fromtimestamp(job.submittime).strftime(
|
|
881
|
+
"%Y-%m-%d %H:%M"
|
|
882
|
+
)
|
|
883
|
+
|
|
884
|
+
# Calculate duration
|
|
885
|
+
start = job.starttime
|
|
886
|
+
end = job.endtime
|
|
887
|
+
duration = "-"
|
|
888
|
+
if start:
|
|
889
|
+
if end:
|
|
890
|
+
elapsed = end - start
|
|
891
|
+
else:
|
|
892
|
+
elapsed = time_module.time() - start
|
|
893
|
+
duration = self._format_duration(elapsed)
|
|
894
|
+
|
|
895
|
+
job_id_short = job_id[:7]
|
|
896
|
+
rows_data[job_id] = (
|
|
897
|
+
job_id_short,
|
|
898
|
+
task_id,
|
|
899
|
+
status_text,
|
|
900
|
+
tags_text,
|
|
901
|
+
submitted,
|
|
902
|
+
duration,
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
if needs_rebuild:
|
|
906
|
+
# Full rebuild needed - save selection, clear, rebuild
|
|
907
|
+
selected_key = None
|
|
908
|
+
if table.cursor_row is not None and table.row_count > 0:
|
|
909
|
+
try:
|
|
910
|
+
row_keys = list(table.rows.keys())
|
|
911
|
+
if table.cursor_row < len(row_keys):
|
|
912
|
+
selected_key = str(row_keys[table.cursor_row].value)
|
|
913
|
+
except (IndexError, KeyError):
|
|
914
|
+
pass
|
|
915
|
+
|
|
916
|
+
table.clear()
|
|
917
|
+
new_cursor_row = None
|
|
918
|
+
for idx, (job_id, row_data) in enumerate(rows_data.items()):
|
|
919
|
+
table.add_row(*row_data, key=job_id)
|
|
920
|
+
if selected_key == job_id:
|
|
921
|
+
new_cursor_row = idx
|
|
922
|
+
|
|
923
|
+
if new_cursor_row is not None and table.row_count > 0:
|
|
924
|
+
table.move_cursor(row=new_cursor_row)
|
|
925
|
+
else:
|
|
926
|
+
# Just update cells in place - no reordering needed
|
|
927
|
+
for job_id, row_data in rows_data.items():
|
|
928
|
+
(
|
|
929
|
+
job_id_short,
|
|
930
|
+
task_id,
|
|
931
|
+
status_text,
|
|
932
|
+
tags_text,
|
|
933
|
+
submitted,
|
|
934
|
+
duration,
|
|
935
|
+
) = row_data
|
|
936
|
+
table.update_cell(job_id, "job_id", job_id_short, update_width=True)
|
|
937
|
+
table.update_cell(job_id, "task", task_id, update_width=True)
|
|
938
|
+
table.update_cell(job_id, "status", status_text, update_width=True)
|
|
939
|
+
table.update_cell(job_id, "tags", tags_text, update_width=True)
|
|
940
|
+
table.update_cell(job_id, "submitted", submitted, update_width=True)
|
|
941
|
+
table.update_cell(job_id, "duration", duration, update_width=True)
|
|
942
|
+
|
|
943
|
+
self.log.debug(
|
|
944
|
+
f"Jobs table now has {table.row_count} rows (rebuild={needs_rebuild})"
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
def _format_duration(self, seconds: float) -> str:
|
|
948
|
+
"""Format duration in seconds to human-readable string"""
|
|
949
|
+
if seconds < 0:
|
|
950
|
+
return "-"
|
|
951
|
+
|
|
952
|
+
seconds = int(seconds)
|
|
953
|
+
if seconds < 60:
|
|
954
|
+
return f"{seconds}s"
|
|
955
|
+
elif seconds < 3600:
|
|
956
|
+
minutes = seconds // 60
|
|
957
|
+
secs = seconds % 60
|
|
958
|
+
return f"{minutes}m {secs}s"
|
|
959
|
+
elif seconds < 86400:
|
|
960
|
+
hours = seconds // 3600
|
|
961
|
+
minutes = (seconds % 3600) // 60
|
|
962
|
+
return f"{hours}h {minutes}m"
|
|
963
|
+
else:
|
|
964
|
+
days = seconds // 86400
|
|
965
|
+
hours = (seconds % 86400) // 3600
|
|
966
|
+
return f"{days}d {hours}h"
|
|
967
|
+
|
|
968
|
+
def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None:
|
|
969
|
+
"""Handle job selection"""
|
|
970
|
+
if event.row_key and self.current_experiment:
|
|
971
|
+
job_id = str(event.row_key.value)
|
|
972
|
+
self.post_message(JobSelected(job_id, self.current_experiment))
|