experimaestro 1.11.1__py3-none-any.whl → 2.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +10 -11
- experimaestro/annotations.py +167 -206
- experimaestro/cli/__init__.py +140 -16
- experimaestro/cli/filter.py +42 -74
- experimaestro/cli/jobs.py +157 -106
- experimaestro/cli/progress.py +269 -0
- experimaestro/cli/refactor.py +249 -0
- experimaestro/click.py +0 -1
- experimaestro/commandline.py +19 -3
- experimaestro/connectors/__init__.py +22 -3
- experimaestro/connectors/local.py +12 -0
- experimaestro/core/arguments.py +192 -37
- experimaestro/core/identifier.py +127 -12
- experimaestro/core/objects/__init__.py +6 -0
- experimaestro/core/objects/config.py +702 -285
- experimaestro/core/objects/config_walk.py +24 -6
- experimaestro/core/serialization.py +91 -34
- experimaestro/core/serializers.py +1 -8
- experimaestro/core/subparameters.py +164 -0
- experimaestro/core/types.py +198 -83
- experimaestro/exceptions.py +26 -0
- experimaestro/experiments/cli.py +107 -25
- experimaestro/generators.py +50 -9
- experimaestro/huggingface.py +3 -1
- experimaestro/launcherfinder/parser.py +29 -0
- experimaestro/launcherfinder/registry.py +3 -3
- experimaestro/launchers/__init__.py +26 -1
- experimaestro/launchers/direct.py +12 -0
- experimaestro/launchers/slurm/base.py +154 -2
- experimaestro/mkdocs/base.py +6 -8
- experimaestro/mkdocs/metaloader.py +0 -1
- experimaestro/mypy.py +452 -7
- experimaestro/notifications.py +75 -16
- experimaestro/progress.py +404 -0
- experimaestro/rpyc.py +0 -1
- experimaestro/run.py +19 -6
- experimaestro/scheduler/__init__.py +18 -1
- experimaestro/scheduler/base.py +504 -959
- experimaestro/scheduler/dependencies.py +43 -28
- experimaestro/scheduler/dynamic_outputs.py +259 -130
- experimaestro/scheduler/experiment.py +582 -0
- experimaestro/scheduler/interfaces.py +474 -0
- experimaestro/scheduler/jobs.py +485 -0
- experimaestro/scheduler/services.py +186 -12
- experimaestro/scheduler/signal_handler.py +32 -0
- experimaestro/scheduler/state.py +1 -1
- experimaestro/scheduler/state_db.py +388 -0
- experimaestro/scheduler/state_provider.py +2345 -0
- experimaestro/scheduler/state_sync.py +834 -0
- experimaestro/scheduler/workspace.py +52 -10
- experimaestro/scriptbuilder.py +7 -0
- experimaestro/server/__init__.py +153 -32
- experimaestro/server/data/index.css +0 -125
- experimaestro/server/data/index.css.map +1 -1
- experimaestro/server/data/index.js +194 -58
- experimaestro/server/data/index.js.map +1 -1
- experimaestro/settings.py +47 -6
- experimaestro/sphinx/__init__.py +3 -3
- experimaestro/taskglobals.py +20 -0
- experimaestro/tests/conftest.py +80 -0
- experimaestro/tests/core/test_generics.py +2 -2
- experimaestro/tests/identifier_stability.json +45 -0
- experimaestro/tests/launchers/bin/sacct +6 -2
- experimaestro/tests/launchers/bin/sbatch +4 -2
- experimaestro/tests/launchers/common.py +2 -2
- experimaestro/tests/launchers/test_slurm.py +80 -0
- experimaestro/tests/restart.py +1 -1
- experimaestro/tests/tasks/all.py +7 -0
- experimaestro/tests/tasks/test_dynamic.py +231 -0
- experimaestro/tests/test_checkers.py +2 -2
- experimaestro/tests/test_cli_jobs.py +615 -0
- experimaestro/tests/test_dependencies.py +11 -17
- experimaestro/tests/test_deprecated.py +630 -0
- experimaestro/tests/test_environment.py +200 -0
- experimaestro/tests/test_experiment.py +3 -3
- experimaestro/tests/test_file_progress.py +425 -0
- experimaestro/tests/test_file_progress_integration.py +477 -0
- experimaestro/tests/test_forward.py +3 -3
- experimaestro/tests/test_generators.py +93 -0
- experimaestro/tests/test_identifier.py +520 -169
- experimaestro/tests/test_identifier_stability.py +458 -0
- experimaestro/tests/test_instance.py +16 -21
- experimaestro/tests/test_multitoken.py +442 -0
- experimaestro/tests/test_mypy.py +433 -0
- experimaestro/tests/test_objects.py +314 -30
- experimaestro/tests/test_outputs.py +8 -8
- experimaestro/tests/test_param.py +22 -26
- experimaestro/tests/test_partial_paths.py +231 -0
- experimaestro/tests/test_progress.py +2 -50
- experimaestro/tests/test_resumable_task.py +480 -0
- experimaestro/tests/test_serializers.py +141 -60
- experimaestro/tests/test_state_db.py +434 -0
- experimaestro/tests/test_subparameters.py +160 -0
- experimaestro/tests/test_tags.py +151 -15
- experimaestro/tests/test_tasks.py +137 -160
- experimaestro/tests/test_token_locking.py +252 -0
- experimaestro/tests/test_tokens.py +25 -19
- experimaestro/tests/test_types.py +133 -11
- experimaestro/tests/test_validation.py +19 -19
- experimaestro/tests/test_workspace_triggers.py +158 -0
- experimaestro/tests/token_reschedule.py +5 -3
- experimaestro/tests/utils.py +2 -2
- experimaestro/tokens.py +154 -57
- experimaestro/tools/diff.py +8 -1
- experimaestro/tui/__init__.py +8 -0
- experimaestro/tui/app.py +2303 -0
- experimaestro/tui/app.tcss +353 -0
- experimaestro/tui/log_viewer.py +228 -0
- experimaestro/typingutils.py +11 -2
- experimaestro/utils/__init__.py +23 -0
- experimaestro/utils/environment.py +148 -0
- experimaestro/utils/git.py +129 -0
- experimaestro/utils/resources.py +1 -1
- experimaestro/version.py +34 -0
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +70 -39
- experimaestro-2.0.0b4.dist-info/RECORD +181 -0
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
- experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
- experimaestro/compat.py +0 -6
- experimaestro/core/objects.pyi +0 -225
- experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
- experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
- experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
- experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
- experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
- experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
- experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
- experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
- experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
- experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
- experimaestro-1.11.1.dist-info/RECORD +0 -158
- experimaestro-1.11.1.dist-info/entry_points.txt +0 -17
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info/licenses}/LICENSE +0 -0
experimaestro/cli/jobs.py
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
# flake8: noqa: T201
|
|
2
|
-
import asyncio
|
|
3
2
|
import subprocess
|
|
4
3
|
from typing import Optional
|
|
5
|
-
from shutil import rmtree
|
|
6
4
|
import click
|
|
7
5
|
from pathlib import Path
|
|
8
6
|
from termcolor import colored, cprint
|
|
@@ -34,6 +32,9 @@ def jobs(
|
|
|
34
32
|
selects jobs where the tag model is "bm25", the tag mode is either
|
|
35
33
|
"a" or "b", and the state is running.
|
|
36
34
|
|
|
35
|
+
Note: Jobs are read from the workspace database. If jobs are missing,
|
|
36
|
+
run 'experimaestro experiments sync' to synchronize the database
|
|
37
|
+
with the filesystem.
|
|
37
38
|
"""
|
|
38
39
|
ws = ctx.obj.workspace = find_workspace(workdir=workdir, workspace=workspace)
|
|
39
40
|
check_xp_path(ctx, None, ws.path)
|
|
@@ -44,117 +45,117 @@ def process(
|
|
|
44
45
|
*,
|
|
45
46
|
experiment="",
|
|
46
47
|
tags="",
|
|
47
|
-
ready=False,
|
|
48
48
|
clean=False,
|
|
49
49
|
kill=False,
|
|
50
50
|
filter="",
|
|
51
51
|
perform=False,
|
|
52
52
|
fullpath=False,
|
|
53
|
-
|
|
53
|
+
count=0,
|
|
54
54
|
):
|
|
55
|
-
from
|
|
55
|
+
"""Process jobs from the workspace database
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
workspace: Workspace settings
|
|
59
|
+
experiment: Filter by experiment ID
|
|
60
|
+
tags: Show tags in output
|
|
61
|
+
clean: Clean finished jobs
|
|
62
|
+
kill: Kill running jobs
|
|
63
|
+
filter: Filter expression
|
|
64
|
+
perform: Actually perform kill/clean (dry run if False)
|
|
65
|
+
fullpath: Show full paths instead of short names
|
|
66
|
+
count: Limit output to N most recent jobs (0 = no limit)
|
|
67
|
+
"""
|
|
68
|
+
from .filter import createFilter
|
|
69
|
+
from experimaestro.scheduler.state_provider import WorkspaceStateProvider
|
|
56
70
|
from experimaestro.scheduler import JobState
|
|
57
71
|
|
|
58
|
-
_filter = createFilter(filter) if filter else
|
|
59
|
-
|
|
60
|
-
# Get
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
job_str = (
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
if
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if info.state is None:
|
|
101
|
-
print(colored(f"NODIR {job_str}", "red"), end="")
|
|
102
|
-
elif info.state.running():
|
|
72
|
+
_filter = createFilter(filter) if filter else None
|
|
73
|
+
|
|
74
|
+
# Get state provider (write mode for kill/clean operations)
|
|
75
|
+
read_only = not (kill or clean)
|
|
76
|
+
provider = WorkspaceStateProvider.get_instance(workspace.path, read_only=read_only)
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
# Get all jobs from the database
|
|
80
|
+
all_jobs = provider.get_all_jobs()
|
|
81
|
+
|
|
82
|
+
# Filter by experiment if specified
|
|
83
|
+
if experiment:
|
|
84
|
+
all_jobs = [j for j in all_jobs if j.experiment_id == experiment]
|
|
85
|
+
|
|
86
|
+
# Apply filter expression
|
|
87
|
+
if _filter:
|
|
88
|
+
all_jobs = [j for j in all_jobs if _filter(j)]
|
|
89
|
+
|
|
90
|
+
# Sort by submission time (most recent first)
|
|
91
|
+
# Jobs without submittime go to the end
|
|
92
|
+
all_jobs.sort(key=lambda j: j.submittime or 0, reverse=True)
|
|
93
|
+
|
|
94
|
+
# Limit to N most recent jobs if count is specified
|
|
95
|
+
if count > 0:
|
|
96
|
+
all_jobs = all_jobs[:count]
|
|
97
|
+
|
|
98
|
+
if not all_jobs:
|
|
99
|
+
cprint("No jobs found.", "yellow")
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
# Process each job
|
|
103
|
+
for job in all_jobs:
|
|
104
|
+
job_str = str(job.path) if fullpath else f"{job.task_id}/{job.identifier}"
|
|
105
|
+
|
|
106
|
+
# Add experiment info
|
|
107
|
+
if job.experiment_id:
|
|
108
|
+
job_str += f" [{job.experiment_id}]"
|
|
109
|
+
|
|
110
|
+
if job.state is None or job.state == JobState.UNSCHEDULED:
|
|
111
|
+
print(colored(f"UNSCHED {job_str}", "red"), end="")
|
|
112
|
+
elif job.state.running():
|
|
103
113
|
if kill:
|
|
104
114
|
if perform:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
cprint(
|
|
108
|
-
"internal error – no process could be retrieved",
|
|
109
|
-
"red",
|
|
110
|
-
)
|
|
115
|
+
if provider.kill_job(job, perform=True):
|
|
116
|
+
cprint(f"KILLED {job_str}", "light_red")
|
|
111
117
|
else:
|
|
112
|
-
cprint(f"
|
|
113
|
-
process.kill()
|
|
118
|
+
cprint(f"KILL FAILED {job_str}", "red")
|
|
114
119
|
else:
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
colored(f"{
|
|
118
|
-
|
|
119
|
-
)
|
|
120
|
-
elif
|
|
121
|
-
print(
|
|
122
|
-
colored(f"DONE {job_str}", "green"),
|
|
123
|
-
end="",
|
|
124
|
-
)
|
|
125
|
-
elif info.state == JobState.ERROR:
|
|
120
|
+
cprint(f"KILLING {job_str} (dry run)", "yellow")
|
|
121
|
+
else:
|
|
122
|
+
print(colored(f"{job.state.name:8}{job_str}", "yellow"), end="")
|
|
123
|
+
elif job.state == JobState.DONE:
|
|
124
|
+
print(colored(f"DONE {job_str}", "green"), end="")
|
|
125
|
+
elif job.state == JobState.ERROR:
|
|
126
126
|
print(colored(f"FAIL {job_str}", "red"), end="")
|
|
127
127
|
else:
|
|
128
|
-
print(
|
|
129
|
-
colored(f"{info.state.name:8}{job_str}", "red"),
|
|
130
|
-
end="",
|
|
131
|
-
)
|
|
128
|
+
print(colored(f"{job.state.name:8}{job_str}", "red"), end="")
|
|
132
129
|
|
|
133
|
-
|
|
134
|
-
if
|
|
135
|
-
|
|
136
|
-
|
|
130
|
+
# Show tags if requested
|
|
131
|
+
if tags and job.tags:
|
|
132
|
+
print(f""" {" ".join(f"{k}={v}" for k, v in job.tags.items())}""")
|
|
133
|
+
elif not (kill and perform):
|
|
134
|
+
print()
|
|
137
135
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
136
|
+
# Clean finished jobs
|
|
137
|
+
if clean and job.state and job.state.finished():
|
|
138
|
+
if perform:
|
|
139
|
+
if provider.clean_job(job, perform=True):
|
|
140
|
+
cprint(" Cleaned", "red")
|
|
141
|
+
else:
|
|
142
|
+
cprint(" Clean failed", "red")
|
|
143
|
+
else:
|
|
144
|
+
cprint(" Would clean (dry run)", "yellow")
|
|
142
145
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
print()
|
|
146
|
+
print()
|
|
147
|
+
|
|
148
|
+
finally:
|
|
149
|
+
# Close provider if we created it for write mode
|
|
150
|
+
if not read_only:
|
|
151
|
+
provider.close()
|
|
150
152
|
|
|
151
153
|
|
|
152
154
|
@click.option("--experiment", default=None, help="Restrict to this experiment")
|
|
153
155
|
@click.option("--tags", is_flag=True, help="Show tags")
|
|
154
|
-
@click.option("--ready", is_flag=True, help="Include tasks which are not yet scheduled")
|
|
155
156
|
@click.option("--filter", default="", help="Filter expression")
|
|
156
157
|
@click.option("--fullpath", is_flag=True, help="Prints full paths")
|
|
157
|
-
@click.option("--
|
|
158
|
+
@click.option("--count", "-c", default=0, type=int, help="Limit to N most recent jobs")
|
|
158
159
|
@jobs.command()
|
|
159
160
|
@click.pass_context
|
|
160
161
|
def list(
|
|
@@ -162,24 +163,22 @@ def list(
|
|
|
162
163
|
experiment: str,
|
|
163
164
|
filter: str,
|
|
164
165
|
tags: bool,
|
|
165
|
-
ready: bool,
|
|
166
166
|
fullpath: bool,
|
|
167
|
-
|
|
167
|
+
count: int,
|
|
168
168
|
):
|
|
169
|
+
"""List all jobs in the workspace (sorted by submission date, most recent first)"""
|
|
169
170
|
process(
|
|
170
171
|
ctx.obj.workspace,
|
|
171
172
|
experiment=experiment,
|
|
172
173
|
filter=filter,
|
|
173
174
|
tags=tags,
|
|
174
|
-
ready=ready,
|
|
175
175
|
fullpath=fullpath,
|
|
176
|
-
|
|
176
|
+
count=count,
|
|
177
177
|
)
|
|
178
178
|
|
|
179
179
|
|
|
180
180
|
@click.option("--experiment", default=None, help="Restrict to this experiment")
|
|
181
181
|
@click.option("--tags", is_flag=True, help="Show tags")
|
|
182
|
-
@click.option("--ready", is_flag=True, help="Include tasks which are not yet scheduled")
|
|
183
182
|
@click.option("--filter", default="", help="Filter expression")
|
|
184
183
|
@click.option("--perform", is_flag=True, help="Really perform the killing")
|
|
185
184
|
@click.option("--fullpath", is_flag=True, help="Prints full paths")
|
|
@@ -190,17 +189,15 @@ def kill(
|
|
|
190
189
|
experiment: str,
|
|
191
190
|
filter: str,
|
|
192
191
|
tags: bool,
|
|
193
|
-
ready: bool,
|
|
194
192
|
fullpath: bool,
|
|
195
193
|
perform: bool,
|
|
196
|
-
check: bool,
|
|
197
194
|
):
|
|
195
|
+
"""Kill running jobs"""
|
|
198
196
|
process(
|
|
199
197
|
ctx.obj.workspace,
|
|
200
198
|
experiment=experiment,
|
|
201
199
|
filter=filter,
|
|
202
200
|
tags=tags,
|
|
203
|
-
ready=ready,
|
|
204
201
|
kill=True,
|
|
205
202
|
perform=perform,
|
|
206
203
|
fullpath=fullpath,
|
|
@@ -209,7 +206,6 @@ def kill(
|
|
|
209
206
|
|
|
210
207
|
@click.option("--experiment", default=None, help="Restrict to this experiment")
|
|
211
208
|
@click.option("--tags", is_flag=True, help="Show tags")
|
|
212
|
-
@click.option("--ready", is_flag=True, help="Include tasks which are not yet scheduled")
|
|
213
209
|
@click.option("--filter", default="", help="Filter expression")
|
|
214
210
|
@click.option("--perform", is_flag=True, help="Really perform the cleaning")
|
|
215
211
|
@click.option("--fullpath", is_flag=True, help="Prints full paths")
|
|
@@ -220,16 +216,15 @@ def clean(
|
|
|
220
216
|
experiment: str,
|
|
221
217
|
filter: str,
|
|
222
218
|
tags: bool,
|
|
223
|
-
ready: bool,
|
|
224
219
|
fullpath: bool,
|
|
225
220
|
perform: bool,
|
|
226
221
|
):
|
|
222
|
+
"""Clean finished jobs (delete directories and DB entries)"""
|
|
227
223
|
process(
|
|
228
224
|
ctx.obj.workspace,
|
|
229
225
|
experiment=experiment,
|
|
230
226
|
filter=filter,
|
|
231
227
|
tags=tags,
|
|
232
|
-
ready=ready,
|
|
233
228
|
clean=True,
|
|
234
229
|
perform=perform,
|
|
235
230
|
fullpath=fullpath,
|
|
@@ -244,25 +239,81 @@ def clean(
|
|
|
244
239
|
@jobs.command()
|
|
245
240
|
@click.pass_context
|
|
246
241
|
def log(ctx, jobid: str, follow: bool, std: bool):
|
|
242
|
+
"""View job log (stderr by default, stdout with --std)
|
|
243
|
+
|
|
244
|
+
JOBID format: task.name/hash (e.g., mymodule.MyTask/abc123)
|
|
245
|
+
"""
|
|
247
246
|
task_name, task_hash = jobid.split("/")
|
|
248
247
|
_, name = task_name.rsplit(".", 1)
|
|
249
|
-
|
|
248
|
+
log_path = (
|
|
250
249
|
ctx.obj.workspace.path
|
|
251
250
|
/ "jobs"
|
|
252
251
|
/ task_name
|
|
253
252
|
/ task_hash
|
|
254
253
|
/ f"""{name}.{'out' if std else 'err'}"""
|
|
255
254
|
)
|
|
255
|
+
if not log_path.exists():
|
|
256
|
+
cprint(f"Log file not found: {log_path}", "red")
|
|
257
|
+
return
|
|
256
258
|
if follow:
|
|
257
|
-
subprocess.run(["tail", "-f",
|
|
259
|
+
subprocess.run(["tail", "-f", log_path])
|
|
258
260
|
else:
|
|
259
|
-
subprocess.run(["less", "-r",
|
|
261
|
+
subprocess.run(["less", "-r", log_path])
|
|
260
262
|
|
|
261
263
|
|
|
262
264
|
@click.argument("jobid", type=str)
|
|
263
265
|
@jobs.command()
|
|
264
266
|
@click.pass_context
|
|
265
267
|
def path(ctx, jobid: str):
|
|
268
|
+
"""Print the path to a job directory
|
|
269
|
+
|
|
270
|
+
JOBID format: task.name/hash (e.g., mymodule.MyTask/abc123)
|
|
271
|
+
"""
|
|
266
272
|
task_name, task_hash = jobid.split("/")
|
|
267
|
-
|
|
268
|
-
|
|
273
|
+
job_path = ctx.obj.workspace.path / "jobs" / task_name / task_hash
|
|
274
|
+
if not job_path.exists():
|
|
275
|
+
cprint(f"Job directory not found: {job_path}", "red")
|
|
276
|
+
return
|
|
277
|
+
print(job_path)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@click.option("--perform", is_flag=True, help="Actually delete orphan partials")
|
|
281
|
+
@jobs.command("cleanup-partials")
|
|
282
|
+
@click.pass_context
|
|
283
|
+
def cleanup_partials(ctx, perform: bool):
|
|
284
|
+
"""Clean up orphan partial directories
|
|
285
|
+
|
|
286
|
+
Partial directories are shared checkpoint locations created by
|
|
287
|
+
subparameters. When all jobs using a partial are deleted, the
|
|
288
|
+
partial becomes orphaned and can be cleaned up.
|
|
289
|
+
|
|
290
|
+
This command finds all orphan partials and deletes them (or shows
|
|
291
|
+
what would be deleted in dry-run mode).
|
|
292
|
+
"""
|
|
293
|
+
from experimaestro.scheduler.state_provider import WorkspaceStateProvider
|
|
294
|
+
|
|
295
|
+
provider = WorkspaceStateProvider.get_instance(
|
|
296
|
+
ctx.obj.workspace.path, read_only=not perform
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
orphan_paths = provider.cleanup_orphan_partials(perform=perform)
|
|
301
|
+
|
|
302
|
+
if not orphan_paths:
|
|
303
|
+
cprint("No orphan partials found.", "green")
|
|
304
|
+
return
|
|
305
|
+
|
|
306
|
+
if perform:
|
|
307
|
+
cprint(f"Cleaned {len(orphan_paths)} orphan partial(s):", "green")
|
|
308
|
+
else:
|
|
309
|
+
cprint(f"Found {len(orphan_paths)} orphan partial(s) (dry run):", "yellow")
|
|
310
|
+
|
|
311
|
+
for path in orphan_paths:
|
|
312
|
+
if perform:
|
|
313
|
+
print(colored(f" Deleted: {path}", "red"))
|
|
314
|
+
else:
|
|
315
|
+
print(colored(f" Would delete: {path}", "yellow"))
|
|
316
|
+
|
|
317
|
+
finally:
|
|
318
|
+
if perform:
|
|
319
|
+
provider.close()
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Simplified CLI commands for managing and viewing progress files"""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional, Dict
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
from termcolor import colored
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
|
|
14
|
+
TQDM_AVAILABLE = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
TQDM_AVAILABLE = False
|
|
17
|
+
|
|
18
|
+
from experimaestro.progress import ProgressEntry, ProgressFileReader
|
|
19
|
+
from experimaestro.settings import find_workspace
|
|
20
|
+
from . import cli
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@click.option("--workspace", default="", help="Experimaestro workspace")
|
|
24
|
+
@click.option("--workdir", type=Path, default=None)
|
|
25
|
+
@cli.group()
|
|
26
|
+
@click.pass_context
|
|
27
|
+
def progress(
|
|
28
|
+
ctx,
|
|
29
|
+
workdir: Optional[Path],
|
|
30
|
+
workspace: Optional[str],
|
|
31
|
+
):
|
|
32
|
+
"""Progress tracking commands"""
|
|
33
|
+
ctx.obj.workspace = find_workspace(workdir=workdir, workspace=workspace)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def format_timestamp(timestamp: float) -> str:
|
|
37
|
+
"""Format timestamp for display"""
|
|
38
|
+
dt = datetime.fromtimestamp(timestamp)
|
|
39
|
+
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@click.argument("jobid", type=str)
|
|
43
|
+
@progress.command()
|
|
44
|
+
@click.pass_context
|
|
45
|
+
def show(ctx, jobid: str):
|
|
46
|
+
"""Show current progress state (default command)
|
|
47
|
+
|
|
48
|
+
JOBID format: task_name/task_hash
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
task_name, task_hash = jobid.split("/")
|
|
52
|
+
except ValueError:
|
|
53
|
+
raise click.ClickException("JOBID must be in format task_name/task_hash")
|
|
54
|
+
|
|
55
|
+
workspace = ctx.obj.workspace
|
|
56
|
+
task_path = workspace.path / "jobs" / task_name / task_hash
|
|
57
|
+
|
|
58
|
+
if not task_path.exists():
|
|
59
|
+
raise click.ClickException(f"Job directory not found: {task_path}")
|
|
60
|
+
|
|
61
|
+
reader = ProgressFileReader(task_path)
|
|
62
|
+
current_progress = reader.get_current_progress()
|
|
63
|
+
|
|
64
|
+
if not current_progress:
|
|
65
|
+
click.echo("No progress information available")
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# Filter out EOJ markers
|
|
69
|
+
current_progress = {k: v for k, v in current_progress.items() if k != -1}
|
|
70
|
+
|
|
71
|
+
if not current_progress:
|
|
72
|
+
click.echo("No progress information available")
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
click.echo(f"Progress for job {jobid}")
|
|
76
|
+
click.echo("=" * 80)
|
|
77
|
+
|
|
78
|
+
# Show simple text-based progress for each level
|
|
79
|
+
for level in sorted(current_progress.keys()):
|
|
80
|
+
entry = current_progress[level]
|
|
81
|
+
indent = " " * level
|
|
82
|
+
progress_pct = f"{entry.progress * 100:5.1f}%"
|
|
83
|
+
desc = entry.desc or f"Level {level}"
|
|
84
|
+
timestamp = format_timestamp(entry.timestamp)
|
|
85
|
+
|
|
86
|
+
color = "green" if entry.progress >= 1.0 else "yellow"
|
|
87
|
+
click.echo(colored(f"{indent}L{level}: {progress_pct} - {desc}", color))
|
|
88
|
+
click.echo(colored(f"{indent} Last updated: {timestamp}", "cyan"))
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def create_progress_bar(
|
|
92
|
+
level: int,
|
|
93
|
+
desc: str,
|
|
94
|
+
progress: float = 0.0,
|
|
95
|
+
) -> tqdm:
|
|
96
|
+
"""Create a properly aligned progress bar like dashboard style"""
|
|
97
|
+
if level > 0:
|
|
98
|
+
indent = " " * (level - 1) + "└─ "
|
|
99
|
+
else:
|
|
100
|
+
indent = ""
|
|
101
|
+
label = f"{indent}L{level}"
|
|
102
|
+
|
|
103
|
+
colors = ["blue", "yellow", "magenta", "cyan", "white"]
|
|
104
|
+
bar_color = colors[level % len(colors)]
|
|
105
|
+
|
|
106
|
+
unit = desc[:50] if desc else f"Level {level}"
|
|
107
|
+
ncols = 100
|
|
108
|
+
wbar = 50
|
|
109
|
+
|
|
110
|
+
return tqdm(
|
|
111
|
+
total=100,
|
|
112
|
+
desc=label,
|
|
113
|
+
position=level,
|
|
114
|
+
leave=True,
|
|
115
|
+
bar_format=f"{{desc}}: {{percentage:3.0f}}%|{{bar:{wbar - len(indent)}}}| {{unit}}", # noqa: F541
|
|
116
|
+
ncols=ncols, # Adjust width based on level
|
|
117
|
+
unit=unit,
|
|
118
|
+
colour=bar_color,
|
|
119
|
+
initial=progress * 100,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _update_progress_display(
|
|
124
|
+
reader: ProgressFileReader, progress_bars: Dict[int, tqdm]
|
|
125
|
+
) -> bool:
|
|
126
|
+
"""Update the tqdm progress bars in dashboard style"""
|
|
127
|
+
current_state: Dict[int, ProgressEntry] = {
|
|
128
|
+
k: v for k, v in reader.get_current_state().items() if k != -1
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if not current_state:
|
|
132
|
+
click.echo("No progress information available yet...")
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
# Update existing bars and create new ones
|
|
136
|
+
for _level, entry in current_state.items():
|
|
137
|
+
progress_val = entry.progress * 100
|
|
138
|
+
desc = entry.desc or f"Level {entry.level}"
|
|
139
|
+
|
|
140
|
+
if entry.level not in progress_bars:
|
|
141
|
+
progress_bars[entry.level] = create_progress_bar(
|
|
142
|
+
entry.level, desc, progress_val
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
bar = progress_bars[entry.level]
|
|
146
|
+
bar.unit = desc[:50]
|
|
147
|
+
bar.n = progress_val
|
|
148
|
+
|
|
149
|
+
bar.refresh()
|
|
150
|
+
|
|
151
|
+
# Remove bars for levels that no longer exist
|
|
152
|
+
levels_to_remove = set(progress_bars.keys()) - set(current_state.keys())
|
|
153
|
+
for level in levels_to_remove:
|
|
154
|
+
progress_bars[level].close()
|
|
155
|
+
del progress_bars[level]
|
|
156
|
+
|
|
157
|
+
return True
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@click.argument("jobid", type=str)
|
|
161
|
+
@click.option("--refresh-rate", "-r", default=0.5, help="Refresh rate in seconds")
|
|
162
|
+
@progress.command()
|
|
163
|
+
@click.pass_context
|
|
164
|
+
def live(ctx, jobid: str, refresh_rate: float):
|
|
165
|
+
"""Show live progress with tqdm-style bars
|
|
166
|
+
|
|
167
|
+
JOBID format: task_name/task_hash
|
|
168
|
+
"""
|
|
169
|
+
if not TQDM_AVAILABLE:
|
|
170
|
+
click.echo("tqdm is not available. Install with: pip install tqdm")
|
|
171
|
+
click.echo("Falling back to basic display...")
|
|
172
|
+
ctx.invoke(show, jobid=jobid)
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
task_name, task_hash = jobid.split("/")
|
|
177
|
+
except ValueError:
|
|
178
|
+
raise click.ClickException("JOBID must be in format task_name/task_hash")
|
|
179
|
+
|
|
180
|
+
workspace = ctx.obj.workspace
|
|
181
|
+
task_path = workspace.path / "jobs" / task_name / task_hash
|
|
182
|
+
|
|
183
|
+
if not task_path.exists():
|
|
184
|
+
raise click.ClickException(f"Job directory not found: {task_path}")
|
|
185
|
+
|
|
186
|
+
reader = ProgressFileReader(task_path)
|
|
187
|
+
progress_bars: Dict[int, tqdm] = {}
|
|
188
|
+
|
|
189
|
+
def cleanup_bars():
|
|
190
|
+
"""Clean up all progress bars"""
|
|
191
|
+
for bar in progress_bars.values():
|
|
192
|
+
bar.close()
|
|
193
|
+
progress_bars.clear()
|
|
194
|
+
|
|
195
|
+
click.echo(f"Live progress for job {jobid}")
|
|
196
|
+
click.echo("Press Ctrl+C to stop")
|
|
197
|
+
click.echo("=" * 80)
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
if not _update_progress_display(reader, progress_bars):
|
|
201
|
+
click.echo("No progress information available yet...")
|
|
202
|
+
|
|
203
|
+
while True:
|
|
204
|
+
time.sleep(refresh_rate)
|
|
205
|
+
|
|
206
|
+
if not _update_progress_display(reader, progress_bars):
|
|
207
|
+
# Check if job is complete
|
|
208
|
+
if reader.is_done():
|
|
209
|
+
click.echo("\nJob completed!")
|
|
210
|
+
break
|
|
211
|
+
|
|
212
|
+
# Check if all progress bars are at 100%
|
|
213
|
+
if progress_bars and all(bar.n >= 100 for bar in progress_bars.values()):
|
|
214
|
+
cleanup_bars()
|
|
215
|
+
click.echo("\nAll progress completed!")
|
|
216
|
+
break
|
|
217
|
+
|
|
218
|
+
except KeyboardInterrupt:
|
|
219
|
+
click.echo("\nStopped monitoring progress")
|
|
220
|
+
finally:
|
|
221
|
+
cleanup_bars()
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@progress.command(name="list")
|
|
225
|
+
@click.pass_context
|
|
226
|
+
def list_jobs(ctx):
|
|
227
|
+
"""List all jobs with progress information"""
|
|
228
|
+
ws = ctx.obj.workspace
|
|
229
|
+
jobs_path = ws.path / "jobs"
|
|
230
|
+
|
|
231
|
+
if not jobs_path.exists():
|
|
232
|
+
click.echo("No jobs directory found")
|
|
233
|
+
return
|
|
234
|
+
|
|
235
|
+
for task_dir in jobs_path.iterdir():
|
|
236
|
+
if not task_dir.is_dir():
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
for job_dir in task_dir.iterdir():
|
|
240
|
+
if not job_dir.is_dir():
|
|
241
|
+
continue
|
|
242
|
+
|
|
243
|
+
progress_dir = job_dir / ".experimaestro"
|
|
244
|
+
if not progress_dir.exists():
|
|
245
|
+
continue
|
|
246
|
+
|
|
247
|
+
# Check if there are progress files
|
|
248
|
+
progress_files = list(progress_dir.glob("progress-*.jsonl"))
|
|
249
|
+
if not progress_files:
|
|
250
|
+
continue
|
|
251
|
+
|
|
252
|
+
job_id = f"{task_dir.name}/{job_dir.name}"
|
|
253
|
+
reader = ProgressFileReader(job_dir)
|
|
254
|
+
current_state = reader.get_current_state()
|
|
255
|
+
|
|
256
|
+
# if current_progress:
|
|
257
|
+
if current_state:
|
|
258
|
+
# Get overall progress (level 0)
|
|
259
|
+
level_0 = current_state.get(0)
|
|
260
|
+
if level_0:
|
|
261
|
+
color = "green" if level_0.progress >= 1.0 else "yellow"
|
|
262
|
+
desc = f"{level_0.desc}" if level_0.desc else ""
|
|
263
|
+
progress_pct = f"{level_0.progress * 100:5.1f}%"
|
|
264
|
+
click.echo(colored(f"{job_id:50} - {progress_pct} - {desc}", color))
|
|
265
|
+
|
|
266
|
+
else:
|
|
267
|
+
click.echo(f"{job_id:50} No level 0 progress")
|
|
268
|
+
else:
|
|
269
|
+
click.echo(f"{job_id:50} No progress data")
|