experimaestro 1.11.1__py3-none-any.whl → 2.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +10 -11
- experimaestro/annotations.py +167 -206
- experimaestro/cli/__init__.py +140 -16
- experimaestro/cli/filter.py +42 -74
- experimaestro/cli/jobs.py +157 -106
- experimaestro/cli/progress.py +269 -0
- experimaestro/cli/refactor.py +249 -0
- experimaestro/click.py +0 -1
- experimaestro/commandline.py +19 -3
- experimaestro/connectors/__init__.py +22 -3
- experimaestro/connectors/local.py +12 -0
- experimaestro/core/arguments.py +192 -37
- experimaestro/core/identifier.py +127 -12
- experimaestro/core/objects/__init__.py +6 -0
- experimaestro/core/objects/config.py +702 -285
- experimaestro/core/objects/config_walk.py +24 -6
- experimaestro/core/serialization.py +91 -34
- experimaestro/core/serializers.py +1 -8
- experimaestro/core/subparameters.py +164 -0
- experimaestro/core/types.py +198 -83
- experimaestro/exceptions.py +26 -0
- experimaestro/experiments/cli.py +107 -25
- experimaestro/generators.py +50 -9
- experimaestro/huggingface.py +3 -1
- experimaestro/launcherfinder/parser.py +29 -0
- experimaestro/launcherfinder/registry.py +3 -3
- experimaestro/launchers/__init__.py +26 -1
- experimaestro/launchers/direct.py +12 -0
- experimaestro/launchers/slurm/base.py +154 -2
- experimaestro/mkdocs/base.py +6 -8
- experimaestro/mkdocs/metaloader.py +0 -1
- experimaestro/mypy.py +452 -7
- experimaestro/notifications.py +75 -16
- experimaestro/progress.py +404 -0
- experimaestro/rpyc.py +0 -1
- experimaestro/run.py +19 -6
- experimaestro/scheduler/__init__.py +18 -1
- experimaestro/scheduler/base.py +504 -959
- experimaestro/scheduler/dependencies.py +43 -28
- experimaestro/scheduler/dynamic_outputs.py +259 -130
- experimaestro/scheduler/experiment.py +582 -0
- experimaestro/scheduler/interfaces.py +474 -0
- experimaestro/scheduler/jobs.py +485 -0
- experimaestro/scheduler/services.py +186 -12
- experimaestro/scheduler/signal_handler.py +32 -0
- experimaestro/scheduler/state.py +1 -1
- experimaestro/scheduler/state_db.py +388 -0
- experimaestro/scheduler/state_provider.py +2345 -0
- experimaestro/scheduler/state_sync.py +834 -0
- experimaestro/scheduler/workspace.py +52 -10
- experimaestro/scriptbuilder.py +7 -0
- experimaestro/server/__init__.py +153 -32
- experimaestro/server/data/index.css +0 -125
- experimaestro/server/data/index.css.map +1 -1
- experimaestro/server/data/index.js +194 -58
- experimaestro/server/data/index.js.map +1 -1
- experimaestro/settings.py +47 -6
- experimaestro/sphinx/__init__.py +3 -3
- experimaestro/taskglobals.py +20 -0
- experimaestro/tests/conftest.py +80 -0
- experimaestro/tests/core/test_generics.py +2 -2
- experimaestro/tests/identifier_stability.json +45 -0
- experimaestro/tests/launchers/bin/sacct +6 -2
- experimaestro/tests/launchers/bin/sbatch +4 -2
- experimaestro/tests/launchers/common.py +2 -2
- experimaestro/tests/launchers/test_slurm.py +80 -0
- experimaestro/tests/restart.py +1 -1
- experimaestro/tests/tasks/all.py +7 -0
- experimaestro/tests/tasks/test_dynamic.py +231 -0
- experimaestro/tests/test_checkers.py +2 -2
- experimaestro/tests/test_cli_jobs.py +615 -0
- experimaestro/tests/test_dependencies.py +11 -17
- experimaestro/tests/test_deprecated.py +630 -0
- experimaestro/tests/test_environment.py +200 -0
- experimaestro/tests/test_experiment.py +3 -3
- experimaestro/tests/test_file_progress.py +425 -0
- experimaestro/tests/test_file_progress_integration.py +477 -0
- experimaestro/tests/test_forward.py +3 -3
- experimaestro/tests/test_generators.py +93 -0
- experimaestro/tests/test_identifier.py +520 -169
- experimaestro/tests/test_identifier_stability.py +458 -0
- experimaestro/tests/test_instance.py +16 -21
- experimaestro/tests/test_multitoken.py +442 -0
- experimaestro/tests/test_mypy.py +433 -0
- experimaestro/tests/test_objects.py +314 -30
- experimaestro/tests/test_outputs.py +8 -8
- experimaestro/tests/test_param.py +22 -26
- experimaestro/tests/test_partial_paths.py +231 -0
- experimaestro/tests/test_progress.py +2 -50
- experimaestro/tests/test_resumable_task.py +480 -0
- experimaestro/tests/test_serializers.py +141 -60
- experimaestro/tests/test_state_db.py +434 -0
- experimaestro/tests/test_subparameters.py +160 -0
- experimaestro/tests/test_tags.py +151 -15
- experimaestro/tests/test_tasks.py +137 -160
- experimaestro/tests/test_token_locking.py +252 -0
- experimaestro/tests/test_tokens.py +25 -19
- experimaestro/tests/test_types.py +133 -11
- experimaestro/tests/test_validation.py +19 -19
- experimaestro/tests/test_workspace_triggers.py +158 -0
- experimaestro/tests/token_reschedule.py +5 -3
- experimaestro/tests/utils.py +2 -2
- experimaestro/tokens.py +154 -57
- experimaestro/tools/diff.py +8 -1
- experimaestro/tui/__init__.py +8 -0
- experimaestro/tui/app.py +2303 -0
- experimaestro/tui/app.tcss +353 -0
- experimaestro/tui/log_viewer.py +228 -0
- experimaestro/typingutils.py +11 -2
- experimaestro/utils/__init__.py +23 -0
- experimaestro/utils/environment.py +148 -0
- experimaestro/utils/git.py +129 -0
- experimaestro/utils/resources.py +1 -1
- experimaestro/version.py +34 -0
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +70 -39
- experimaestro-2.0.0b4.dist-info/RECORD +181 -0
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
- experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
- experimaestro/compat.py +0 -6
- experimaestro/core/objects.pyi +0 -225
- experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
- experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
- experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
- experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
- experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
- experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
- experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
- experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
- experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
- experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
- experimaestro-1.11.1.dist-info/RECORD +0 -158
- experimaestro-1.11.1.dist-info/entry_points.txt +0 -17
- {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info/licenses}/LICENSE +0 -0
experimaestro/generators.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import inspect
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import Callable, Union
|
|
4
|
+
from typing import Callable, Union, TYPE_CHECKING
|
|
5
5
|
from experimaestro.core.arguments import ArgumentOptions, TypeAnnotation
|
|
6
6
|
from experimaestro.core.objects import ConfigWalkContext, Config
|
|
7
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from experimaestro.core.subparameters import Subparameters
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
class Generator(ABC):
|
|
10
13
|
"""Base class for all generators"""
|
|
@@ -15,25 +18,63 @@ class Generator(ABC):
|
|
|
15
18
|
return False
|
|
16
19
|
|
|
17
20
|
@abstractmethod
|
|
18
|
-
def __call__(self, context: ConfigWalkContext, config: Config):
|
|
19
|
-
...
|
|
21
|
+
def __call__(self, context: ConfigWalkContext, config: Config): ...
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class PathGenerator(Generator):
|
|
23
|
-
"""
|
|
25
|
+
"""Generate paths within the task directory.
|
|
26
|
+
|
|
27
|
+
Use ``PathGenerator`` with ``field(default_factory=...)`` to create
|
|
28
|
+
paths relative to the task's working directory.
|
|
29
|
+
|
|
30
|
+
Example::
|
|
31
|
+
|
|
32
|
+
class MyTask(Task):
|
|
33
|
+
output: Meta[Path] = field(default_factory=PathGenerator("results.json"))
|
|
34
|
+
model: Meta[Path] = field(default_factory=PathGenerator("model.pt"))
|
|
35
|
+
|
|
36
|
+
For shared directories across related tasks, use with subparameters::
|
|
37
|
+
|
|
38
|
+
training_group = param_group("training")
|
|
39
|
+
|
|
40
|
+
class Train(Task):
|
|
41
|
+
epochs: Param[int] = field(groups=[training_group])
|
|
42
|
+
checkpoint: Meta[Path] = field(
|
|
43
|
+
default_factory=PathGenerator(
|
|
44
|
+
"model.pt",
|
|
45
|
+
subparameters=subparameters(exclude=[training_group])
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
:param path: Relative path within the task directory. Can be a string,
|
|
50
|
+
Path, or callable that takes (context, config) and returns a Path.
|
|
51
|
+
:param subparameters: Optional subparameters for partial directory sharing.
|
|
52
|
+
When provided, the path is generated in a shared partial directory.
|
|
53
|
+
"""
|
|
24
54
|
|
|
25
55
|
def __init__(
|
|
26
|
-
self,
|
|
56
|
+
self,
|
|
57
|
+
path: Union[str, Path, Callable[[ConfigWalkContext, Config], Path]] = "",
|
|
58
|
+
*,
|
|
59
|
+
partial: "Subparameters" = None,
|
|
27
60
|
):
|
|
28
61
|
self.path = path
|
|
62
|
+
self.partial = partial
|
|
29
63
|
|
|
30
64
|
def __call__(self, context: ConfigWalkContext, config: Config):
|
|
31
|
-
|
|
32
|
-
|
|
65
|
+
# Determine base path: partial directory or job directory
|
|
66
|
+
if self.partial is not None:
|
|
67
|
+
base_path = context.partial_path(self.partial, config)
|
|
33
68
|
else:
|
|
34
|
-
|
|
69
|
+
base_path = context.currentpath()
|
|
35
70
|
|
|
36
|
-
|
|
71
|
+
# Generate the final path
|
|
72
|
+
if inspect.isfunction(self.path):
|
|
73
|
+
return base_path / self.path(context, config)
|
|
74
|
+
elif self.path:
|
|
75
|
+
return base_path / Path(self.path)
|
|
76
|
+
else:
|
|
77
|
+
return base_path
|
|
37
78
|
|
|
38
79
|
def isoutput(self):
|
|
39
80
|
return True
|
experimaestro/huggingface.py
CHANGED
|
@@ -105,6 +105,35 @@ class Visitor(PTNodeVisitor):
|
|
|
105
105
|
|
|
106
106
|
|
|
107
107
|
def parse(expr: str):
|
|
108
|
+
"""Parse a requirement specification string into a HostRequirement object.
|
|
109
|
+
|
|
110
|
+
The specification string describes hardware requirements for running a task.
|
|
111
|
+
Multiple alternatives can be specified using ``|`` (OR), and requirements
|
|
112
|
+
within an alternative are combined using ``&`` (AND).
|
|
113
|
+
|
|
114
|
+
**Syntax elements:**
|
|
115
|
+
|
|
116
|
+
- ``duration=<N><unit>``: Job duration (units: h/hours, d/days, m/mins)
|
|
117
|
+
- ``cpu(mem=<size>, cores=<N>)``: CPU requirements
|
|
118
|
+
- ``cuda(mem=<size>) * <N>``: GPU requirements (memory and count)
|
|
119
|
+
- Memory sizes: ``<N>G``, ``<N>GiB``, ``<N>M``, ``<N>MiB``
|
|
120
|
+
|
|
121
|
+
:param expr: The requirement specification string
|
|
122
|
+
:return: A :class:`~experimaestro.launcherfinder.specs.HostRequirement` object
|
|
123
|
+
|
|
124
|
+
**Example:**
|
|
125
|
+
|
|
126
|
+
.. code-block:: python
|
|
127
|
+
|
|
128
|
+
from experimaestro.launcherfinder.parser import parse
|
|
129
|
+
|
|
130
|
+
# Request 2 GPUs with 32GB each, 700GB RAM, for 40 hours
|
|
131
|
+
# OR 4 GPUs with 32GB each for 50 hours
|
|
132
|
+
req = parse(
|
|
133
|
+
"duration=40h & cpu(mem=700GiB) & cuda(mem=32GiB) * 2"
|
|
134
|
+
" | duration=50h & cpu(mem=700GiB) & cuda(mem=32GiB) * 4"
|
|
135
|
+
)
|
|
136
|
+
"""
|
|
108
137
|
parser = ParserPython(grammar, syntax_classes={"StrMatch": SuppressStrMatch})
|
|
109
138
|
parse_tree = parser.parse(expr)
|
|
110
139
|
return visit_parse_tree(parse_tree, Visitor(debug=False))
|
|
@@ -6,7 +6,7 @@ from typing import ClassVar, Dict, Optional, Set, Type, Union
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import typing
|
|
8
8
|
from omegaconf import DictConfig, OmegaConf, SCMode
|
|
9
|
-
import
|
|
9
|
+
from importlib.metadata import entry_points
|
|
10
10
|
from experimaestro.utils import logger
|
|
11
11
|
from .base import ConnectorConfiguration, TokenConfiguration
|
|
12
12
|
from .specs import HostRequirement, RequirementUnion
|
|
@@ -75,10 +75,10 @@ class LauncherRegistry:
|
|
|
75
75
|
self.find_launcher_fn = None
|
|
76
76
|
|
|
77
77
|
# Use entry points for connectors and launchers
|
|
78
|
-
for entry_point in
|
|
78
|
+
for entry_point in entry_points(group="experimaestro.connectors"):
|
|
79
79
|
entry_point.load().init_registry(self)
|
|
80
80
|
|
|
81
|
-
for entry_point in
|
|
81
|
+
for entry_point in entry_points(group="experimaestro.tokens"):
|
|
82
82
|
entry_point.load().init_registry(self)
|
|
83
83
|
|
|
84
84
|
# Register the find launcher function if it exists
|
|
@@ -31,7 +31,19 @@ SubmitListener = Callable[[Job], None]
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class Launcher(ABC):
|
|
34
|
-
"""
|
|
34
|
+
"""Base class for task launchers.
|
|
35
|
+
|
|
36
|
+
Launchers are responsible for executing tasks on a compute resource.
|
|
37
|
+
They work with a :class:`~experimaestro.connectors.Connector` to
|
|
38
|
+
access the target system and manage process execution.
|
|
39
|
+
|
|
40
|
+
Subclasses include:
|
|
41
|
+
|
|
42
|
+
- :class:`~experimaestro.launchers.direct.DirectLauncher`: Local execution
|
|
43
|
+
- :class:`~experimaestro.launchers.slurm.SlurmLauncher`: SLURM cluster
|
|
44
|
+
|
|
45
|
+
:param connector: The connector to use for accessing the compute resource
|
|
46
|
+
"""
|
|
35
47
|
|
|
36
48
|
submit_listeners: List[SubmitListener]
|
|
37
49
|
|
|
@@ -69,6 +81,19 @@ class Launcher(ABC):
|
|
|
69
81
|
By default, returns the associated connector builder"""
|
|
70
82
|
return self.connector.processbuilder()
|
|
71
83
|
|
|
84
|
+
@abstractmethod
|
|
85
|
+
def launcher_info_code(self) -> str:
|
|
86
|
+
"""Returns Python code to set up launcher info during task execution.
|
|
87
|
+
|
|
88
|
+
This code is inserted into the generated task script to set up
|
|
89
|
+
launcher-specific information (like LauncherInformation for
|
|
90
|
+
querying remaining time).
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Python code as a string, or empty string if no setup needed.
|
|
94
|
+
"""
|
|
95
|
+
...
|
|
96
|
+
|
|
72
97
|
@staticmethod
|
|
73
98
|
def get(path: Path):
|
|
74
99
|
"""Get a default launcher for a given path"""
|
|
@@ -3,8 +3,20 @@ from . import Launcher
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class DirectLauncher(Launcher):
|
|
6
|
+
"""Launcher that runs tasks directly as local processes.
|
|
7
|
+
|
|
8
|
+
This is the default launcher that executes tasks on the local machine
|
|
9
|
+
without any job scheduler. Tasks are run as Python subprocesses.
|
|
10
|
+
|
|
11
|
+
:param connector: The connector to use (defaults to LocalConnector)
|
|
12
|
+
"""
|
|
13
|
+
|
|
6
14
|
def scriptbuilder(self):
|
|
7
15
|
return PythonScriptBuilder()
|
|
8
16
|
|
|
17
|
+
def launcher_info_code(self) -> str:
|
|
18
|
+
"""Returns empty string as local launcher has no time limits."""
|
|
19
|
+
return ""
|
|
20
|
+
|
|
9
21
|
def __str__(self):
|
|
10
22
|
return f"DirectLauncher({self.connector})"
|
|
@@ -7,6 +7,7 @@ from typing import (
|
|
|
7
7
|
List,
|
|
8
8
|
Optional,
|
|
9
9
|
Tuple,
|
|
10
|
+
TYPE_CHECKING,
|
|
10
11
|
get_type_hints,
|
|
11
12
|
)
|
|
12
13
|
from experimaestro.connectors.local import LocalConnector
|
|
@@ -20,7 +21,7 @@ from experimaestro.launcherfinder.registry import (
|
|
|
20
21
|
from experimaestro.utils import ThreadingCondition
|
|
21
22
|
from experimaestro.tests.connectors.utils import OutputCaptureHandler
|
|
22
23
|
from experimaestro.utils.asyncio import asyncThreadcheck
|
|
23
|
-
from
|
|
24
|
+
from functools import cached_property
|
|
24
25
|
from experimaestro.launchers import Launcher
|
|
25
26
|
from experimaestro.scriptbuilder import PythonScriptBuilder
|
|
26
27
|
from experimaestro.connectors import (
|
|
@@ -32,8 +33,131 @@ from experimaestro.connectors import (
|
|
|
32
33
|
RedirectType,
|
|
33
34
|
)
|
|
34
35
|
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from experimaestro.scheduler.jobs import JobState
|
|
38
|
+
|
|
35
39
|
logger = logging.getLogger("xpm.slurm")
|
|
36
40
|
|
|
41
|
+
# Cached job end time (absolute timestamp).
|
|
42
|
+
# Only used when a task is running within a SLURM job.
|
|
43
|
+
_slurm_job_end_time: Optional[float] = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SlurmLauncherInformation:
|
|
47
|
+
"""Launcher information for SLURM jobs, used during task execution."""
|
|
48
|
+
|
|
49
|
+
def __init__(self, binpath: str = "/usr/bin"):
|
|
50
|
+
self.binpath = Path(binpath)
|
|
51
|
+
|
|
52
|
+
def remaining_time(self) -> Optional[float]:
|
|
53
|
+
"""Returns the remaining time in seconds before the SLURM job times out.
|
|
54
|
+
|
|
55
|
+
Uses the SLURM_JOB_ID environment variable to query squeue for the
|
|
56
|
+
remaining time. The job end time is cached on first call.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
The remaining time in seconds, or None if no time limit.
|
|
60
|
+
"""
|
|
61
|
+
import os
|
|
62
|
+
import time
|
|
63
|
+
|
|
64
|
+
global _slurm_job_end_time
|
|
65
|
+
|
|
66
|
+
# Use cached end time if available
|
|
67
|
+
if _slurm_job_end_time is not None:
|
|
68
|
+
remaining = _slurm_job_end_time - time.time()
|
|
69
|
+
return max(0.0, remaining)
|
|
70
|
+
|
|
71
|
+
# Query SLURM for remaining time and compute end time
|
|
72
|
+
job_id = os.environ.get("SLURM_JOB_ID")
|
|
73
|
+
if not job_id:
|
|
74
|
+
logger.debug("No SLURM_JOB_ID in environment, cannot get remaining time")
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
remaining_seconds = self._query_remaining_time(job_id)
|
|
78
|
+
if remaining_seconds is None:
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
# Cache the absolute end time
|
|
82
|
+
_slurm_job_end_time = time.time() + remaining_seconds
|
|
83
|
+
return remaining_seconds
|
|
84
|
+
|
|
85
|
+
def _query_remaining_time(self, job_id: str) -> Optional[float]:
|
|
86
|
+
"""Query SLURM for remaining time of a job."""
|
|
87
|
+
import subprocess
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
result = subprocess.run(
|
|
91
|
+
[
|
|
92
|
+
f"{self.binpath}/squeue",
|
|
93
|
+
"--job",
|
|
94
|
+
job_id,
|
|
95
|
+
"--format=%L",
|
|
96
|
+
"--noheader",
|
|
97
|
+
],
|
|
98
|
+
capture_output=True,
|
|
99
|
+
text=True,
|
|
100
|
+
timeout=30,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if result.returncode != 0:
|
|
104
|
+
logger.warning(
|
|
105
|
+
"squeue returned error code %d: %s",
|
|
106
|
+
result.returncode,
|
|
107
|
+
result.stderr,
|
|
108
|
+
)
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
time_str = result.stdout.strip()
|
|
112
|
+
if not time_str or time_str == "UNLIMITED":
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
return self._parse_slurm_time(time_str)
|
|
116
|
+
except subprocess.TimeoutExpired:
|
|
117
|
+
logger.warning("Timeout querying squeue for remaining time")
|
|
118
|
+
return None
|
|
119
|
+
except Exception as e:
|
|
120
|
+
logger.warning("Error querying SLURM remaining time: %s", e)
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def _parse_slurm_time(time_str: str) -> Optional[float]:
|
|
125
|
+
"""Parse SLURM time format to seconds.
|
|
126
|
+
|
|
127
|
+
SLURM time format can be:
|
|
128
|
+
- D-HH:MM:SS (days-hours:minutes:seconds)
|
|
129
|
+
- HH:MM:SS (hours:minutes:seconds)
|
|
130
|
+
- MM:SS (minutes:seconds)
|
|
131
|
+
- SS (seconds)
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Time in seconds, or None if parsing fails
|
|
135
|
+
"""
|
|
136
|
+
try:
|
|
137
|
+
days = 0
|
|
138
|
+
if "-" in time_str:
|
|
139
|
+
days_str, time_str = time_str.split("-", 1)
|
|
140
|
+
days = int(days_str)
|
|
141
|
+
|
|
142
|
+
parts = time_str.split(":")
|
|
143
|
+
if len(parts) == 3:
|
|
144
|
+
hours, minutes, seconds = int(parts[0]), int(parts[1]), int(parts[2])
|
|
145
|
+
elif len(parts) == 2:
|
|
146
|
+
hours = 0
|
|
147
|
+
minutes, seconds = int(parts[0]), int(parts[1])
|
|
148
|
+
elif len(parts) == 1:
|
|
149
|
+
hours = 0
|
|
150
|
+
minutes = 0
|
|
151
|
+
seconds = int(parts[0])
|
|
152
|
+
else:
|
|
153
|
+
logger.warning("Could not parse SLURM time: %s", time_str)
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
return float(days * 86400 + hours * 3600 + minutes * 60 + seconds)
|
|
157
|
+
except (ValueError, IndexError) as e:
|
|
158
|
+
logger.warning("Could not parse SLURM time '%s': %s", time_str, e)
|
|
159
|
+
return None
|
|
160
|
+
|
|
37
161
|
|
|
38
162
|
class SlurmJobState:
|
|
39
163
|
start: str
|
|
@@ -176,14 +300,34 @@ class BatchSlurmProcess(Process):
|
|
|
176
300
|
def __init__(self, launcher: "SlurmLauncher", jobid: str):
|
|
177
301
|
self.launcher = launcher
|
|
178
302
|
self.jobid = jobid
|
|
303
|
+
self._last_state: Optional[SlurmJobState] = None
|
|
179
304
|
|
|
180
305
|
def wait(self):
|
|
181
306
|
with SlurmProcessWatcher.get(self.launcher) as watcher:
|
|
182
307
|
while True:
|
|
183
308
|
state = watcher.getjob(self.jobid)
|
|
184
309
|
if state and state.finished():
|
|
310
|
+
self._last_state = state
|
|
185
311
|
return 0 if state.slurm_state == "COMPLETED" else 1
|
|
186
312
|
|
|
313
|
+
def get_job_state(self, code: int) -> "JobState":
|
|
314
|
+
"""Convert SLURM exit code to JobState, detecting timeouts"""
|
|
315
|
+
from experimaestro.scheduler.jobs import (
|
|
316
|
+
JobState,
|
|
317
|
+
JobStateError,
|
|
318
|
+
JobFailureStatus,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
if code == 0:
|
|
322
|
+
return JobState.DONE
|
|
323
|
+
|
|
324
|
+
# Check if this was a SLURM timeout
|
|
325
|
+
if self._last_state and self._last_state.slurm_state == "TIMEOUT":
|
|
326
|
+
logger.info("SLURM job %s timed out", self.jobid)
|
|
327
|
+
return JobStateError(JobFailureStatus.TIMEOUT)
|
|
328
|
+
|
|
329
|
+
return JobState.ERROR
|
|
330
|
+
|
|
187
331
|
async def aio_state(self, timeout: float | None = None) -> ProcessState:
|
|
188
332
|
def check():
|
|
189
333
|
with SlurmProcessWatcher.get(self.launcher) as watcher:
|
|
@@ -432,7 +576,7 @@ class SlurmLauncher(Launcher):
|
|
|
432
576
|
def scriptbuilder(self):
|
|
433
577
|
"""Returns the script builder
|
|
434
578
|
|
|
435
|
-
We assume
|
|
579
|
+
We assume Unix, but should be changed to PythonScriptBuilder when working
|
|
436
580
|
"""
|
|
437
581
|
return SlurmScriptBuilder(self)
|
|
438
582
|
|
|
@@ -442,6 +586,14 @@ class SlurmLauncher(Launcher):
|
|
|
442
586
|
By default, returns the associated connector builder"""
|
|
443
587
|
return SlurmProcessBuilder(self)
|
|
444
588
|
|
|
589
|
+
def launcher_info_code(self) -> str:
|
|
590
|
+
"""Returns Python code to set up launcher info during task execution."""
|
|
591
|
+
return (
|
|
592
|
+
" from experimaestro.launchers.slurm import SlurmLauncherInformation\n"
|
|
593
|
+
" from experimaestro import taskglobals\n"
|
|
594
|
+
f' taskglobals.Env.instance().launcher_info = SlurmLauncherInformation(binpath="{self.binpath}")\n'
|
|
595
|
+
)
|
|
596
|
+
|
|
445
597
|
|
|
446
598
|
class SlurmScriptBuilder(PythonScriptBuilder):
|
|
447
599
|
def __init__(self, launcher: SlurmLauncher, pythonpath=None):
|
experimaestro/mkdocs/base.py
CHANGED
|
@@ -4,12 +4,11 @@ See https://www.mkdocs.org/user-guide/plugins/ for plugin API documentation
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from collections import defaultdict
|
|
7
|
-
import functools
|
|
8
7
|
import re
|
|
9
8
|
from experimaestro.mkdocs.annotations import shoulddocument
|
|
10
9
|
import requests
|
|
11
10
|
from urllib.parse import urljoin
|
|
12
|
-
from experimaestro.core.types import ObjectType
|
|
11
|
+
from experimaestro.core.types import ObjectType
|
|
13
12
|
import mkdocs
|
|
14
13
|
from pathlib import Path
|
|
15
14
|
from typing import Dict, Iterator, List, Optional, Set, Tuple, Type as TypingType
|
|
@@ -76,7 +75,7 @@ class ObjectLatticeNode:
|
|
|
76
75
|
return f"node({self.objecttype.identifier})"
|
|
77
76
|
|
|
78
77
|
def isAncestor(self, other):
|
|
79
|
-
return issubclass(self.objecttype.
|
|
78
|
+
return issubclass(self.objecttype.config_type, other.objecttype.config_type)
|
|
80
79
|
|
|
81
80
|
def _addChild(self, child: "ObjectLatticeNode"):
|
|
82
81
|
child.parents.add(self)
|
|
@@ -321,7 +320,7 @@ class Documentation(mkdocs.plugins.BasePlugin):
|
|
|
321
320
|
|
|
322
321
|
for node in self.lattice.iter_all():
|
|
323
322
|
if node.objecttype is not None:
|
|
324
|
-
member = node.objecttype.
|
|
323
|
+
member = node.objecttype.value_type
|
|
325
324
|
qname = f"{member.__module__}.{member.__qualname__}"
|
|
326
325
|
path = self.type2path[qname]
|
|
327
326
|
|
|
@@ -354,7 +353,7 @@ class Documentation(mkdocs.plugins.BasePlugin):
|
|
|
354
353
|
# Now, sort according to descendant/ascendant relationship or name
|
|
355
354
|
nodes = set()
|
|
356
355
|
for _node in cfgs:
|
|
357
|
-
if issubclass(_node.objecttype.
|
|
356
|
+
if issubclass(_node.objecttype.config_type, xpmtype.config_type):
|
|
358
357
|
nodes.add(_node)
|
|
359
358
|
|
|
360
359
|
# Removes so they are not generated twice
|
|
@@ -443,11 +442,10 @@ class Documentation(mkdocs.plugins.BasePlugin):
|
|
|
443
442
|
lines.append("\n\n")
|
|
444
443
|
|
|
445
444
|
for name, argument in xpminfo.arguments.items():
|
|
446
|
-
|
|
447
445
|
if isinstance(argument.type, ObjectType):
|
|
448
|
-
|
|
446
|
+
value_type = argument.type.value_type
|
|
449
447
|
typestr = self.getlink(
|
|
450
|
-
page.url, f"{
|
|
448
|
+
page.url, f"{value_type.__module__}.{value_type.__qualname__}"
|
|
451
449
|
)
|
|
452
450
|
else:
|
|
453
451
|
typestr = argument.type.name()
|