experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +10 -11
- experimaestro/annotations.py +167 -206
- experimaestro/cli/__init__.py +130 -5
- experimaestro/cli/filter.py +42 -74
- experimaestro/cli/jobs.py +157 -106
- experimaestro/cli/refactor.py +249 -0
- experimaestro/click.py +0 -1
- experimaestro/commandline.py +19 -3
- experimaestro/connectors/__init__.py +20 -1
- experimaestro/connectors/local.py +12 -0
- experimaestro/core/arguments.py +182 -46
- experimaestro/core/identifier.py +107 -6
- experimaestro/core/objects/__init__.py +6 -0
- experimaestro/core/objects/config.py +542 -25
- experimaestro/core/objects/config_walk.py +20 -0
- experimaestro/core/serialization.py +91 -34
- experimaestro/core/subparameters.py +164 -0
- experimaestro/core/types.py +175 -38
- experimaestro/exceptions.py +26 -0
- experimaestro/experiments/cli.py +107 -25
- experimaestro/generators.py +50 -9
- experimaestro/huggingface.py +3 -1
- experimaestro/launcherfinder/parser.py +29 -0
- experimaestro/launchers/__init__.py +26 -1
- experimaestro/launchers/direct.py +12 -0
- experimaestro/launchers/slurm/base.py +154 -2
- experimaestro/mkdocs/metaloader.py +0 -1
- experimaestro/mypy.py +452 -7
- experimaestro/notifications.py +63 -13
- experimaestro/progress.py +0 -2
- experimaestro/rpyc.py +0 -1
- experimaestro/run.py +19 -6
- experimaestro/scheduler/base.py +489 -125
- experimaestro/scheduler/dependencies.py +43 -28
- experimaestro/scheduler/dynamic_outputs.py +259 -130
- experimaestro/scheduler/experiment.py +225 -30
- experimaestro/scheduler/interfaces.py +474 -0
- experimaestro/scheduler/jobs.py +216 -206
- experimaestro/scheduler/services.py +186 -12
- experimaestro/scheduler/state_db.py +388 -0
- experimaestro/scheduler/state_provider.py +2345 -0
- experimaestro/scheduler/state_sync.py +834 -0
- experimaestro/scheduler/workspace.py +52 -10
- experimaestro/scriptbuilder.py +7 -0
- experimaestro/server/__init__.py +147 -57
- experimaestro/server/data/index.css +0 -125
- experimaestro/server/data/index.css.map +1 -1
- experimaestro/server/data/index.js +194 -58
- experimaestro/server/data/index.js.map +1 -1
- experimaestro/settings.py +44 -5
- experimaestro/sphinx/__init__.py +3 -3
- experimaestro/taskglobals.py +20 -0
- experimaestro/tests/conftest.py +80 -0
- experimaestro/tests/core/test_generics.py +2 -2
- experimaestro/tests/identifier_stability.json +45 -0
- experimaestro/tests/launchers/bin/sacct +6 -2
- experimaestro/tests/launchers/bin/sbatch +4 -2
- experimaestro/tests/launchers/test_slurm.py +80 -0
- experimaestro/tests/tasks/test_dynamic.py +231 -0
- experimaestro/tests/test_cli_jobs.py +615 -0
- experimaestro/tests/test_deprecated.py +630 -0
- experimaestro/tests/test_environment.py +200 -0
- experimaestro/tests/test_file_progress_integration.py +1 -1
- experimaestro/tests/test_forward.py +3 -3
- experimaestro/tests/test_identifier.py +372 -41
- experimaestro/tests/test_identifier_stability.py +458 -0
- experimaestro/tests/test_instance.py +3 -3
- experimaestro/tests/test_multitoken.py +442 -0
- experimaestro/tests/test_mypy.py +433 -0
- experimaestro/tests/test_objects.py +312 -5
- experimaestro/tests/test_outputs.py +2 -2
- experimaestro/tests/test_param.py +8 -12
- experimaestro/tests/test_partial_paths.py +231 -0
- experimaestro/tests/test_progress.py +0 -48
- experimaestro/tests/test_resumable_task.py +480 -0
- experimaestro/tests/test_serializers.py +141 -1
- experimaestro/tests/test_state_db.py +434 -0
- experimaestro/tests/test_subparameters.py +160 -0
- experimaestro/tests/test_tags.py +136 -0
- experimaestro/tests/test_tasks.py +107 -121
- experimaestro/tests/test_token_locking.py +252 -0
- experimaestro/tests/test_tokens.py +17 -13
- experimaestro/tests/test_types.py +123 -1
- experimaestro/tests/test_workspace_triggers.py +158 -0
- experimaestro/tests/token_reschedule.py +4 -2
- experimaestro/tests/utils.py +2 -2
- experimaestro/tokens.py +154 -57
- experimaestro/tools/diff.py +1 -1
- experimaestro/tui/__init__.py +8 -0
- experimaestro/tui/app.py +2303 -0
- experimaestro/tui/app.tcss +353 -0
- experimaestro/tui/log_viewer.py +228 -0
- experimaestro/utils/__init__.py +23 -0
- experimaestro/utils/environment.py +148 -0
- experimaestro/utils/git.py +129 -0
- experimaestro/utils/resources.py +1 -1
- experimaestro/version.py +34 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +68 -38
- experimaestro-2.0.0b4.dist-info/RECORD +181 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
- experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
- experimaestro/compat.py +0 -6
- experimaestro/core/objects.pyi +0 -221
- experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
- experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
- experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
- experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
- experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
- experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
- experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
- experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
- experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
- experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
- experimaestro-2.0.0a8.dist-info/RECORD +0 -166
- experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b4.dist-info}/licenses/LICENSE +0 -0
experimaestro/cli/filter.py
CHANGED
|
@@ -1,57 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
from experimaestro.compat import cached_property
|
|
1
|
+
"""Filter expressions for job queries
|
|
2
|
+
|
|
3
|
+
This module provides a filter expression parser for querying jobs by state,
|
|
4
|
+
tags, and other attributes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
8
7
|
import re
|
|
9
|
-
from
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
self.path = path
|
|
15
|
-
self.scriptname = scriptname
|
|
16
|
-
self.check = check
|
|
17
|
-
|
|
18
|
-
@cached_property
|
|
19
|
-
def params(self):
|
|
20
|
-
try:
|
|
21
|
-
return json.loads((self.path / "params.json").read_text())
|
|
22
|
-
except Exception:
|
|
23
|
-
logging.warning("Could not load params.json in %s", self.path)
|
|
24
|
-
return {"tags": {}}
|
|
25
|
-
|
|
26
|
-
@cached_property
|
|
27
|
-
def tags(self) -> List[str]:
|
|
28
|
-
return self.params["tags"]
|
|
29
|
-
|
|
30
|
-
@cached_property
|
|
31
|
-
def state(self) -> Optional[JobState]:
|
|
32
|
-
if (self.path / f"{self.scriptname}.done").is_file():
|
|
33
|
-
return JobState.DONE
|
|
34
|
-
if (self.path / f"{self.scriptname}.failed").is_file():
|
|
35
|
-
return JobState.ERROR
|
|
36
|
-
if (self.path / f"{self.scriptname}.pid").is_file():
|
|
37
|
-
if self.check:
|
|
38
|
-
if process := self.getprocess():
|
|
39
|
-
state = asyncio.run(process.aio_state(0))
|
|
40
|
-
if state is None or state.finished:
|
|
41
|
-
return JobState.ERROR
|
|
42
|
-
else:
|
|
43
|
-
return JobState.ERROR
|
|
44
|
-
return JobState.RUNNING
|
|
45
|
-
else:
|
|
46
|
-
return None
|
|
47
|
-
|
|
48
|
-
def getprocess(self):
|
|
49
|
-
from experimaestro.connectors import Process
|
|
50
|
-
from experimaestro.connectors.local import LocalConnector
|
|
51
|
-
|
|
52
|
-
connector = LocalConnector.instance()
|
|
53
|
-
pinfo = json.loads((self.path / f"{self.scriptname}.pid").read_text())
|
|
54
|
-
return Process.fromDefinition(connector, pinfo)
|
|
8
|
+
from typing import Callable, TYPE_CHECKING
|
|
9
|
+
import pyparsing as pp
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from experimaestro.scheduler.state_provider import MockJob
|
|
55
13
|
|
|
56
14
|
|
|
57
15
|
# --- classes for processing
|
|
@@ -61,14 +19,14 @@ class VarExpr:
|
|
|
61
19
|
def __init__(self, values):
|
|
62
20
|
(self.varname,) = values
|
|
63
21
|
|
|
64
|
-
def get(self,
|
|
22
|
+
def get(self, job: "MockJob"):
|
|
65
23
|
if self.varname == "@state":
|
|
66
|
-
return
|
|
24
|
+
return job.state.name if job.state else None
|
|
67
25
|
|
|
68
26
|
if self.varname == "@name":
|
|
69
|
-
return str(
|
|
27
|
+
return str(job.path.parent.name)
|
|
70
28
|
|
|
71
|
-
return
|
|
29
|
+
return job.tags.get(self.varname, None)
|
|
72
30
|
|
|
73
31
|
def __repr__(self):
|
|
74
32
|
return f"""VAR<{self.varname}>"""
|
|
@@ -81,8 +39,8 @@ class BaseInExpr:
|
|
|
81
39
|
|
|
82
40
|
|
|
83
41
|
class InExpr(BaseInExpr):
|
|
84
|
-
def filter(self,
|
|
85
|
-
value = self.var.get(
|
|
42
|
+
def filter(self, job: "MockJob"):
|
|
43
|
+
value = self.var.get(job)
|
|
86
44
|
return value in self.values
|
|
87
45
|
|
|
88
46
|
def __repr__(self):
|
|
@@ -90,8 +48,8 @@ class InExpr(BaseInExpr):
|
|
|
90
48
|
|
|
91
49
|
|
|
92
50
|
class NotInExpr(BaseInExpr):
|
|
93
|
-
def filter(self,
|
|
94
|
-
value = self.var.get(
|
|
51
|
+
def filter(self, job: "MockJob"):
|
|
52
|
+
value = self.var.get(job)
|
|
95
53
|
return value not in self.values
|
|
96
54
|
|
|
97
55
|
def __repr__(self):
|
|
@@ -106,25 +64,25 @@ class RegexExpr:
|
|
|
106
64
|
def __repr__(self):
|
|
107
65
|
return f"""REGEX[{self.varname}, {self.value}]"""
|
|
108
66
|
|
|
109
|
-
def matches(self,
|
|
67
|
+
def matches(self, _manager, publication):
|
|
110
68
|
if self.varname == "tag":
|
|
111
69
|
return self.value in publication.tags
|
|
112
70
|
|
|
113
71
|
raise AssertionError()
|
|
114
72
|
|
|
115
|
-
def filter(self,
|
|
116
|
-
value = self.var.get(
|
|
73
|
+
def filter(self, job: "MockJob"):
|
|
74
|
+
value = self.var.get(job)
|
|
117
75
|
if not value:
|
|
118
76
|
return False
|
|
119
77
|
|
|
120
|
-
return self.
|
|
78
|
+
return self.regex.match(value)
|
|
121
79
|
|
|
122
80
|
|
|
123
81
|
class ConstantString:
|
|
124
82
|
def __init__(self, tokens):
|
|
125
83
|
(self.value,) = tokens
|
|
126
84
|
|
|
127
|
-
def get(self,
|
|
85
|
+
def get(self, _job: "MockJob"):
|
|
128
86
|
return self.value
|
|
129
87
|
|
|
130
88
|
def __repr__(self):
|
|
@@ -138,8 +96,8 @@ class EqExpr:
|
|
|
138
96
|
def __repr__(self):
|
|
139
97
|
return f"""EQ[{self.var1}, {self.var2}]"""
|
|
140
98
|
|
|
141
|
-
def filter(self,
|
|
142
|
-
return self.var1.get(
|
|
99
|
+
def filter(self, job: "MockJob"):
|
|
100
|
+
return self.var1.get(job) == self.var2.get(job)
|
|
143
101
|
|
|
144
102
|
|
|
145
103
|
class LogicExpr:
|
|
@@ -149,11 +107,11 @@ class LogicExpr:
|
|
|
149
107
|
self.operator, self.y = tokens
|
|
150
108
|
self.x = None
|
|
151
109
|
|
|
152
|
-
def filter(self,
|
|
110
|
+
def filter(self, job: "MockJob"):
|
|
153
111
|
if self.operator == "and":
|
|
154
|
-
return self.y.filter(
|
|
112
|
+
return self.y.filter(job) and self.x.filter(job)
|
|
155
113
|
|
|
156
|
-
return self.y.filter(
|
|
114
|
+
return self.y.filter(job) or self.x.filter(job)
|
|
157
115
|
|
|
158
116
|
@staticmethod
|
|
159
117
|
def summary(tokens):
|
|
@@ -187,7 +145,10 @@ quotedString = pp.QuotedString('"', unquoteResults=True) | pp.QuotedString(
|
|
|
187
145
|
"'", unquoteResults=True
|
|
188
146
|
)
|
|
189
147
|
|
|
190
|
-
|
|
148
|
+
# Tag names can contain letters, digits, underscores, and hyphens
|
|
149
|
+
# First character must be a letter, rest can include digits, underscores, hyphens
|
|
150
|
+
tag_name = pp.Word(pp.alphas, pp.alphanums + "_-")
|
|
151
|
+
var = l("@state") | l("@name") | tag_name
|
|
191
152
|
var.setParseAction(VarExpr)
|
|
192
153
|
|
|
193
154
|
regexExpr = var + tilde + quotedString
|
|
@@ -220,7 +181,14 @@ filterExpr = (
|
|
|
220
181
|
expr = (matchExpr + pp.Optional(pipe + filterExpr)).setParseAction(LogicExpr.generator)
|
|
221
182
|
|
|
222
183
|
|
|
223
|
-
def createFilter(query: str) -> Callable[[
|
|
224
|
-
"""Returns a filter
|
|
184
|
+
def createFilter(query: str) -> Callable[["MockJob"], bool]:
|
|
185
|
+
"""Returns a filter function given a query string
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
query: Filter expression (e.g., '@state = "DONE" and model = "bm25"')
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
A callable that takes a MockJob and returns True if it matches
|
|
192
|
+
"""
|
|
225
193
|
(r,) = logicExpr.parseString(query, parseAll=True)
|
|
226
194
|
return r.filter
|
experimaestro/cli/jobs.py
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
# flake8: noqa: T201
|
|
2
|
-
import asyncio
|
|
3
2
|
import subprocess
|
|
4
3
|
from typing import Optional
|
|
5
|
-
from shutil import rmtree
|
|
6
4
|
import click
|
|
7
5
|
from pathlib import Path
|
|
8
6
|
from termcolor import colored, cprint
|
|
@@ -34,6 +32,9 @@ def jobs(
|
|
|
34
32
|
selects jobs where the tag model is "bm25", the tag mode is either
|
|
35
33
|
"a" or "b", and the state is running.
|
|
36
34
|
|
|
35
|
+
Note: Jobs are read from the workspace database. If jobs are missing,
|
|
36
|
+
run 'experimaestro experiments sync' to synchronize the database
|
|
37
|
+
with the filesystem.
|
|
37
38
|
"""
|
|
38
39
|
ws = ctx.obj.workspace = find_workspace(workdir=workdir, workspace=workspace)
|
|
39
40
|
check_xp_path(ctx, None, ws.path)
|
|
@@ -44,117 +45,117 @@ def process(
|
|
|
44
45
|
*,
|
|
45
46
|
experiment="",
|
|
46
47
|
tags="",
|
|
47
|
-
ready=False,
|
|
48
48
|
clean=False,
|
|
49
49
|
kill=False,
|
|
50
50
|
filter="",
|
|
51
51
|
perform=False,
|
|
52
52
|
fullpath=False,
|
|
53
|
-
|
|
53
|
+
count=0,
|
|
54
54
|
):
|
|
55
|
-
from
|
|
55
|
+
"""Process jobs from the workspace database
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
workspace: Workspace settings
|
|
59
|
+
experiment: Filter by experiment ID
|
|
60
|
+
tags: Show tags in output
|
|
61
|
+
clean: Clean finished jobs
|
|
62
|
+
kill: Kill running jobs
|
|
63
|
+
filter: Filter expression
|
|
64
|
+
perform: Actually perform kill/clean (dry run if False)
|
|
65
|
+
fullpath: Show full paths instead of short names
|
|
66
|
+
count: Limit output to N most recent jobs (0 = no limit)
|
|
67
|
+
"""
|
|
68
|
+
from .filter import createFilter
|
|
69
|
+
from experimaestro.scheduler.state_provider import WorkspaceStateProvider
|
|
56
70
|
from experimaestro.scheduler import JobState
|
|
57
71
|
|
|
58
|
-
_filter = createFilter(filter) if filter else
|
|
59
|
-
|
|
60
|
-
# Get
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
job_str = (
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
if
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if info.state is None:
|
|
101
|
-
print(colored(f"NODIR {job_str}", "red"), end="")
|
|
102
|
-
elif info.state.running():
|
|
72
|
+
_filter = createFilter(filter) if filter else None
|
|
73
|
+
|
|
74
|
+
# Get state provider (write mode for kill/clean operations)
|
|
75
|
+
read_only = not (kill or clean)
|
|
76
|
+
provider = WorkspaceStateProvider.get_instance(workspace.path, read_only=read_only)
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
# Get all jobs from the database
|
|
80
|
+
all_jobs = provider.get_all_jobs()
|
|
81
|
+
|
|
82
|
+
# Filter by experiment if specified
|
|
83
|
+
if experiment:
|
|
84
|
+
all_jobs = [j for j in all_jobs if j.experiment_id == experiment]
|
|
85
|
+
|
|
86
|
+
# Apply filter expression
|
|
87
|
+
if _filter:
|
|
88
|
+
all_jobs = [j for j in all_jobs if _filter(j)]
|
|
89
|
+
|
|
90
|
+
# Sort by submission time (most recent first)
|
|
91
|
+
# Jobs without submittime go to the end
|
|
92
|
+
all_jobs.sort(key=lambda j: j.submittime or 0, reverse=True)
|
|
93
|
+
|
|
94
|
+
# Limit to N most recent jobs if count is specified
|
|
95
|
+
if count > 0:
|
|
96
|
+
all_jobs = all_jobs[:count]
|
|
97
|
+
|
|
98
|
+
if not all_jobs:
|
|
99
|
+
cprint("No jobs found.", "yellow")
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
# Process each job
|
|
103
|
+
for job in all_jobs:
|
|
104
|
+
job_str = str(job.path) if fullpath else f"{job.task_id}/{job.identifier}"
|
|
105
|
+
|
|
106
|
+
# Add experiment info
|
|
107
|
+
if job.experiment_id:
|
|
108
|
+
job_str += f" [{job.experiment_id}]"
|
|
109
|
+
|
|
110
|
+
if job.state is None or job.state == JobState.UNSCHEDULED:
|
|
111
|
+
print(colored(f"UNSCHED {job_str}", "red"), end="")
|
|
112
|
+
elif job.state.running():
|
|
103
113
|
if kill:
|
|
104
114
|
if perform:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
cprint(
|
|
108
|
-
"internal error – no process could be retrieved",
|
|
109
|
-
"red",
|
|
110
|
-
)
|
|
115
|
+
if provider.kill_job(job, perform=True):
|
|
116
|
+
cprint(f"KILLED {job_str}", "light_red")
|
|
111
117
|
else:
|
|
112
|
-
cprint(f"
|
|
113
|
-
process.kill()
|
|
118
|
+
cprint(f"KILL FAILED {job_str}", "red")
|
|
114
119
|
else:
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
colored(f"{
|
|
118
|
-
|
|
119
|
-
)
|
|
120
|
-
elif
|
|
121
|
-
print(
|
|
122
|
-
colored(f"DONE {job_str}", "green"),
|
|
123
|
-
end="",
|
|
124
|
-
)
|
|
125
|
-
elif info.state == JobState.ERROR:
|
|
120
|
+
cprint(f"KILLING {job_str} (dry run)", "yellow")
|
|
121
|
+
else:
|
|
122
|
+
print(colored(f"{job.state.name:8}{job_str}", "yellow"), end="")
|
|
123
|
+
elif job.state == JobState.DONE:
|
|
124
|
+
print(colored(f"DONE {job_str}", "green"), end="")
|
|
125
|
+
elif job.state == JobState.ERROR:
|
|
126
126
|
print(colored(f"FAIL {job_str}", "red"), end="")
|
|
127
127
|
else:
|
|
128
|
-
print(
|
|
129
|
-
colored(f"{info.state.name:8}{job_str}", "red"),
|
|
130
|
-
end="",
|
|
131
|
-
)
|
|
128
|
+
print(colored(f"{job.state.name:8}{job_str}", "red"), end="")
|
|
132
129
|
|
|
133
|
-
|
|
134
|
-
if
|
|
135
|
-
|
|
136
|
-
|
|
130
|
+
# Show tags if requested
|
|
131
|
+
if tags and job.tags:
|
|
132
|
+
print(f""" {" ".join(f"{k}={v}" for k, v in job.tags.items())}""")
|
|
133
|
+
elif not (kill and perform):
|
|
134
|
+
print()
|
|
137
135
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
136
|
+
# Clean finished jobs
|
|
137
|
+
if clean and job.state and job.state.finished():
|
|
138
|
+
if perform:
|
|
139
|
+
if provider.clean_job(job, perform=True):
|
|
140
|
+
cprint(" Cleaned", "red")
|
|
141
|
+
else:
|
|
142
|
+
cprint(" Clean failed", "red")
|
|
143
|
+
else:
|
|
144
|
+
cprint(" Would clean (dry run)", "yellow")
|
|
142
145
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
print()
|
|
146
|
+
print()
|
|
147
|
+
|
|
148
|
+
finally:
|
|
149
|
+
# Close provider if we created it for write mode
|
|
150
|
+
if not read_only:
|
|
151
|
+
provider.close()
|
|
150
152
|
|
|
151
153
|
|
|
152
154
|
@click.option("--experiment", default=None, help="Restrict to this experiment")
|
|
153
155
|
@click.option("--tags", is_flag=True, help="Show tags")
|
|
154
|
-
@click.option("--ready", is_flag=True, help="Include tasks which are not yet scheduled")
|
|
155
156
|
@click.option("--filter", default="", help="Filter expression")
|
|
156
157
|
@click.option("--fullpath", is_flag=True, help="Prints full paths")
|
|
157
|
-
@click.option("--
|
|
158
|
+
@click.option("--count", "-c", default=0, type=int, help="Limit to N most recent jobs")
|
|
158
159
|
@jobs.command()
|
|
159
160
|
@click.pass_context
|
|
160
161
|
def list(
|
|
@@ -162,24 +163,22 @@ def list(
|
|
|
162
163
|
experiment: str,
|
|
163
164
|
filter: str,
|
|
164
165
|
tags: bool,
|
|
165
|
-
ready: bool,
|
|
166
166
|
fullpath: bool,
|
|
167
|
-
|
|
167
|
+
count: int,
|
|
168
168
|
):
|
|
169
|
+
"""List all jobs in the workspace (sorted by submission date, most recent first)"""
|
|
169
170
|
process(
|
|
170
171
|
ctx.obj.workspace,
|
|
171
172
|
experiment=experiment,
|
|
172
173
|
filter=filter,
|
|
173
174
|
tags=tags,
|
|
174
|
-
ready=ready,
|
|
175
175
|
fullpath=fullpath,
|
|
176
|
-
|
|
176
|
+
count=count,
|
|
177
177
|
)
|
|
178
178
|
|
|
179
179
|
|
|
180
180
|
@click.option("--experiment", default=None, help="Restrict to this experiment")
|
|
181
181
|
@click.option("--tags", is_flag=True, help="Show tags")
|
|
182
|
-
@click.option("--ready", is_flag=True, help="Include tasks which are not yet scheduled")
|
|
183
182
|
@click.option("--filter", default="", help="Filter expression")
|
|
184
183
|
@click.option("--perform", is_flag=True, help="Really perform the killing")
|
|
185
184
|
@click.option("--fullpath", is_flag=True, help="Prints full paths")
|
|
@@ -190,17 +189,15 @@ def kill(
|
|
|
190
189
|
experiment: str,
|
|
191
190
|
filter: str,
|
|
192
191
|
tags: bool,
|
|
193
|
-
ready: bool,
|
|
194
192
|
fullpath: bool,
|
|
195
193
|
perform: bool,
|
|
196
|
-
check: bool,
|
|
197
194
|
):
|
|
195
|
+
"""Kill running jobs"""
|
|
198
196
|
process(
|
|
199
197
|
ctx.obj.workspace,
|
|
200
198
|
experiment=experiment,
|
|
201
199
|
filter=filter,
|
|
202
200
|
tags=tags,
|
|
203
|
-
ready=ready,
|
|
204
201
|
kill=True,
|
|
205
202
|
perform=perform,
|
|
206
203
|
fullpath=fullpath,
|
|
@@ -209,7 +206,6 @@ def kill(
|
|
|
209
206
|
|
|
210
207
|
@click.option("--experiment", default=None, help="Restrict to this experiment")
|
|
211
208
|
@click.option("--tags", is_flag=True, help="Show tags")
|
|
212
|
-
@click.option("--ready", is_flag=True, help="Include tasks which are not yet scheduled")
|
|
213
209
|
@click.option("--filter", default="", help="Filter expression")
|
|
214
210
|
@click.option("--perform", is_flag=True, help="Really perform the cleaning")
|
|
215
211
|
@click.option("--fullpath", is_flag=True, help="Prints full paths")
|
|
@@ -220,16 +216,15 @@ def clean(
|
|
|
220
216
|
experiment: str,
|
|
221
217
|
filter: str,
|
|
222
218
|
tags: bool,
|
|
223
|
-
ready: bool,
|
|
224
219
|
fullpath: bool,
|
|
225
220
|
perform: bool,
|
|
226
221
|
):
|
|
222
|
+
"""Clean finished jobs (delete directories and DB entries)"""
|
|
227
223
|
process(
|
|
228
224
|
ctx.obj.workspace,
|
|
229
225
|
experiment=experiment,
|
|
230
226
|
filter=filter,
|
|
231
227
|
tags=tags,
|
|
232
|
-
ready=ready,
|
|
233
228
|
clean=True,
|
|
234
229
|
perform=perform,
|
|
235
230
|
fullpath=fullpath,
|
|
@@ -244,25 +239,81 @@ def clean(
|
|
|
244
239
|
@jobs.command()
|
|
245
240
|
@click.pass_context
|
|
246
241
|
def log(ctx, jobid: str, follow: bool, std: bool):
|
|
242
|
+
"""View job log (stderr by default, stdout with --std)
|
|
243
|
+
|
|
244
|
+
JOBID format: task.name/hash (e.g., mymodule.MyTask/abc123)
|
|
245
|
+
"""
|
|
247
246
|
task_name, task_hash = jobid.split("/")
|
|
248
247
|
_, name = task_name.rsplit(".", 1)
|
|
249
|
-
|
|
248
|
+
log_path = (
|
|
250
249
|
ctx.obj.workspace.path
|
|
251
250
|
/ "jobs"
|
|
252
251
|
/ task_name
|
|
253
252
|
/ task_hash
|
|
254
253
|
/ f"""{name}.{'out' if std else 'err'}"""
|
|
255
254
|
)
|
|
255
|
+
if not log_path.exists():
|
|
256
|
+
cprint(f"Log file not found: {log_path}", "red")
|
|
257
|
+
return
|
|
256
258
|
if follow:
|
|
257
|
-
subprocess.run(["tail", "-f",
|
|
259
|
+
subprocess.run(["tail", "-f", log_path])
|
|
258
260
|
else:
|
|
259
|
-
subprocess.run(["less", "-r",
|
|
261
|
+
subprocess.run(["less", "-r", log_path])
|
|
260
262
|
|
|
261
263
|
|
|
262
264
|
@click.argument("jobid", type=str)
|
|
263
265
|
@jobs.command()
|
|
264
266
|
@click.pass_context
|
|
265
267
|
def path(ctx, jobid: str):
|
|
268
|
+
"""Print the path to a job directory
|
|
269
|
+
|
|
270
|
+
JOBID format: task.name/hash (e.g., mymodule.MyTask/abc123)
|
|
271
|
+
"""
|
|
266
272
|
task_name, task_hash = jobid.split("/")
|
|
267
|
-
|
|
268
|
-
|
|
273
|
+
job_path = ctx.obj.workspace.path / "jobs" / task_name / task_hash
|
|
274
|
+
if not job_path.exists():
|
|
275
|
+
cprint(f"Job directory not found: {job_path}", "red")
|
|
276
|
+
return
|
|
277
|
+
print(job_path)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@click.option("--perform", is_flag=True, help="Actually delete orphan partials")
|
|
281
|
+
@jobs.command("cleanup-partials")
|
|
282
|
+
@click.pass_context
|
|
283
|
+
def cleanup_partials(ctx, perform: bool):
|
|
284
|
+
"""Clean up orphan partial directories
|
|
285
|
+
|
|
286
|
+
Partial directories are shared checkpoint locations created by
|
|
287
|
+
subparameters. When all jobs using a partial are deleted, the
|
|
288
|
+
partial becomes orphaned and can be cleaned up.
|
|
289
|
+
|
|
290
|
+
This command finds all orphan partials and deletes them (or shows
|
|
291
|
+
what would be deleted in dry-run mode).
|
|
292
|
+
"""
|
|
293
|
+
from experimaestro.scheduler.state_provider import WorkspaceStateProvider
|
|
294
|
+
|
|
295
|
+
provider = WorkspaceStateProvider.get_instance(
|
|
296
|
+
ctx.obj.workspace.path, read_only=not perform
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
orphan_paths = provider.cleanup_orphan_partials(perform=perform)
|
|
301
|
+
|
|
302
|
+
if not orphan_paths:
|
|
303
|
+
cprint("No orphan partials found.", "green")
|
|
304
|
+
return
|
|
305
|
+
|
|
306
|
+
if perform:
|
|
307
|
+
cprint(f"Cleaned {len(orphan_paths)} orphan partial(s):", "green")
|
|
308
|
+
else:
|
|
309
|
+
cprint(f"Found {len(orphan_paths)} orphan partial(s) (dry run):", "yellow")
|
|
310
|
+
|
|
311
|
+
for path in orphan_paths:
|
|
312
|
+
if perform:
|
|
313
|
+
print(colored(f" Deleted: {path}", "red"))
|
|
314
|
+
else:
|
|
315
|
+
print(colored(f" Would delete: {path}", "yellow"))
|
|
316
|
+
|
|
317
|
+
finally:
|
|
318
|
+
if perform:
|
|
319
|
+
provider.close()
|