experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +12 -5
- experimaestro/cli/__init__.py +393 -134
- experimaestro/cli/filter.py +48 -23
- experimaestro/cli/jobs.py +253 -71
- experimaestro/cli/refactor.py +1 -2
- experimaestro/commandline.py +7 -4
- experimaestro/connectors/__init__.py +9 -1
- experimaestro/connectors/local.py +43 -3
- experimaestro/core/arguments.py +18 -18
- experimaestro/core/identifier.py +11 -11
- experimaestro/core/objects/config.py +96 -39
- experimaestro/core/objects/config_walk.py +3 -3
- experimaestro/core/{subparameters.py → partial.py} +16 -16
- experimaestro/core/partial_lock.py +394 -0
- experimaestro/core/types.py +12 -15
- experimaestro/dynamic.py +290 -0
- experimaestro/experiments/__init__.py +6 -2
- experimaestro/experiments/cli.py +223 -52
- experimaestro/experiments/configuration.py +24 -0
- experimaestro/generators.py +5 -5
- experimaestro/ipc.py +118 -1
- experimaestro/launcherfinder/__init__.py +2 -2
- experimaestro/launcherfinder/registry.py +6 -7
- experimaestro/launcherfinder/specs.py +2 -9
- experimaestro/launchers/slurm/__init__.py +2 -2
- experimaestro/launchers/slurm/base.py +62 -0
- experimaestro/locking.py +957 -1
- experimaestro/notifications.py +89 -201
- experimaestro/progress.py +63 -366
- experimaestro/rpyc.py +0 -2
- experimaestro/run.py +29 -2
- experimaestro/scheduler/__init__.py +8 -1
- experimaestro/scheduler/base.py +650 -53
- experimaestro/scheduler/dependencies.py +20 -16
- experimaestro/scheduler/experiment.py +764 -169
- experimaestro/scheduler/interfaces.py +338 -96
- experimaestro/scheduler/jobs.py +58 -20
- experimaestro/scheduler/remote/__init__.py +31 -0
- experimaestro/scheduler/remote/adaptive_sync.py +265 -0
- experimaestro/scheduler/remote/client.py +928 -0
- experimaestro/scheduler/remote/protocol.py +282 -0
- experimaestro/scheduler/remote/server.py +447 -0
- experimaestro/scheduler/remote/sync.py +144 -0
- experimaestro/scheduler/services.py +186 -35
- experimaestro/scheduler/state_provider.py +811 -2157
- experimaestro/scheduler/state_status.py +1247 -0
- experimaestro/scheduler/transient.py +31 -0
- experimaestro/scheduler/workspace.py +1 -1
- experimaestro/scheduler/workspace_state_provider.py +1273 -0
- experimaestro/scriptbuilder.py +4 -4
- experimaestro/settings.py +36 -0
- experimaestro/tests/conftest.py +33 -5
- experimaestro/tests/connectors/bin/executable.py +1 -1
- experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
- experimaestro/tests/launchers/bin/test.py +1 -0
- experimaestro/tests/launchers/test_slurm.py +9 -9
- experimaestro/tests/partial_reschedule.py +46 -0
- experimaestro/tests/restart.py +3 -3
- experimaestro/tests/restart_main.py +1 -0
- experimaestro/tests/scripts/notifyandwait.py +1 -0
- experimaestro/tests/task_partial.py +38 -0
- experimaestro/tests/task_tokens.py +2 -2
- experimaestro/tests/tasks/test_dynamic.py +6 -6
- experimaestro/tests/test_dependencies.py +3 -3
- experimaestro/tests/test_deprecated.py +15 -15
- experimaestro/tests/test_dynamic_locking.py +317 -0
- experimaestro/tests/test_environment.py +24 -14
- experimaestro/tests/test_experiment.py +171 -36
- experimaestro/tests/test_identifier.py +25 -25
- experimaestro/tests/test_identifier_stability.py +3 -5
- experimaestro/tests/test_multitoken.py +2 -4
- experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
- experimaestro/tests/test_partial_paths.py +81 -138
- experimaestro/tests/test_pre_experiment.py +219 -0
- experimaestro/tests/test_progress.py +2 -8
- experimaestro/tests/test_remote_state.py +1132 -0
- experimaestro/tests/test_stray_jobs.py +261 -0
- experimaestro/tests/test_tasks.py +1 -2
- experimaestro/tests/test_token_locking.py +52 -67
- experimaestro/tests/test_tokens.py +5 -6
- experimaestro/tests/test_transient.py +225 -0
- experimaestro/tests/test_workspace_state_provider.py +768 -0
- experimaestro/tests/token_reschedule.py +1 -3
- experimaestro/tests/utils.py +2 -7
- experimaestro/tokens.py +227 -372
- experimaestro/tools/diff.py +1 -0
- experimaestro/tools/documentation.py +4 -5
- experimaestro/tools/jobs.py +1 -2
- experimaestro/tui/app.py +459 -1895
- experimaestro/tui/app.tcss +162 -0
- experimaestro/tui/dialogs.py +172 -0
- experimaestro/tui/log_viewer.py +253 -3
- experimaestro/tui/messages.py +137 -0
- experimaestro/tui/utils.py +54 -0
- experimaestro/tui/widgets/__init__.py +23 -0
- experimaestro/tui/widgets/experiments.py +468 -0
- experimaestro/tui/widgets/global_services.py +238 -0
- experimaestro/tui/widgets/jobs.py +972 -0
- experimaestro/tui/widgets/log.py +156 -0
- experimaestro/tui/widgets/orphans.py +363 -0
- experimaestro/tui/widgets/runs.py +185 -0
- experimaestro/tui/widgets/services.py +314 -0
- experimaestro/tui/widgets/stray_jobs.py +528 -0
- experimaestro/utils/__init__.py +1 -1
- experimaestro/utils/environment.py +105 -22
- experimaestro/utils/fswatcher.py +124 -0
- experimaestro/utils/jobs.py +1 -2
- experimaestro/utils/jupyter.py +1 -2
- experimaestro/utils/logging.py +72 -0
- experimaestro/version.py +2 -2
- experimaestro/webui/__init__.py +9 -0
- experimaestro/webui/app.py +117 -0
- experimaestro/{server → webui}/data/index.css +66 -11
- experimaestro/webui/data/index.css.map +1 -0
- experimaestro/{server → webui}/data/index.js +82763 -87217
- experimaestro/webui/data/index.js.map +1 -0
- experimaestro/webui/routes/__init__.py +5 -0
- experimaestro/webui/routes/auth.py +53 -0
- experimaestro/webui/routes/proxy.py +117 -0
- experimaestro/webui/server.py +200 -0
- experimaestro/webui/state_bridge.py +152 -0
- experimaestro/webui/websocket.py +413 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +8 -9
- experimaestro-2.0.0b17.dist-info/RECORD +219 -0
- experimaestro/cli/progress.py +0 -269
- experimaestro/scheduler/state.py +0 -75
- experimaestro/scheduler/state_db.py +0 -388
- experimaestro/scheduler/state_sync.py +0 -834
- experimaestro/server/__init__.py +0 -467
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/tests/test_cli_jobs.py +0 -615
- experimaestro/tests/test_file_progress.py +0 -425
- experimaestro/tests/test_file_progress_integration.py +0 -477
- experimaestro/tests/test_state_db.py +0 -434
- experimaestro-2.0.0b4.dist-info/RECORD +0 -181
- /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
- /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
- /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
- /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
- /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
- /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
- /experimaestro/{server → webui}/data/favicon.ico +0 -0
- /experimaestro/{server → webui}/data/index.html +0 -0
- /experimaestro/{server → webui}/data/login.html +0 -0
- /experimaestro/{server → webui}/data/manifest.json +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""All classes related to localhost management"""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import subprocess
|
|
4
5
|
from typing import Optional
|
|
5
6
|
from pathlib import Path, WindowsPath, PosixPath
|
|
@@ -39,6 +40,26 @@ class PsutilProcess(Process):
|
|
|
39
40
|
)
|
|
40
41
|
return code
|
|
41
42
|
|
|
43
|
+
async def aio_wait(self) -> int:
|
|
44
|
+
"""Asynchronously wait for process to finish"""
|
|
45
|
+
logger.debug(
|
|
46
|
+
"Async waiting (psutil) for process with PID %s", self._process.pid
|
|
47
|
+
)
|
|
48
|
+
poll_interval = 0.01 # start at 0.01 seconds, max 10 seconds
|
|
49
|
+
|
|
50
|
+
while self._process.is_running():
|
|
51
|
+
await asyncio.sleep(poll_interval)
|
|
52
|
+
poll_interval = min(poll_interval * 1.5, 10.0)
|
|
53
|
+
|
|
54
|
+
# Process has finished, wait() returns immediately
|
|
55
|
+
code = self._process.wait()
|
|
56
|
+
logger.debug(
|
|
57
|
+
"Finished async wait (psutil) for process with PID %s: code %s",
|
|
58
|
+
self._process.pid,
|
|
59
|
+
code,
|
|
60
|
+
)
|
|
61
|
+
return code
|
|
62
|
+
|
|
42
63
|
async def aio_state(self, timeout: float | None = None) -> ProcessState:
|
|
43
64
|
if self._process.is_running():
|
|
44
65
|
return ProcessState.RUNNING
|
|
@@ -65,6 +86,25 @@ class LocalProcess(Process):
|
|
|
65
86
|
)
|
|
66
87
|
return code
|
|
67
88
|
|
|
89
|
+
async def aio_wait(self) -> int:
|
|
90
|
+
"""Asynchronously wait for process to finish"""
|
|
91
|
+
logger.debug(
|
|
92
|
+
"Async waiting (python) for process with PID %s", self._process.pid
|
|
93
|
+
)
|
|
94
|
+
poll_interval = 0.01 # start at 0.01 seconds, max 10 seconds
|
|
95
|
+
|
|
96
|
+
while self._process.poll() is None:
|
|
97
|
+
await asyncio.sleep(poll_interval)
|
|
98
|
+
poll_interval = min(poll_interval * 1.5, 10.0)
|
|
99
|
+
|
|
100
|
+
code = self._process.returncode
|
|
101
|
+
logger.debug(
|
|
102
|
+
"Finished async wait (python) for process with PID %s: %s",
|
|
103
|
+
self._process.pid,
|
|
104
|
+
code,
|
|
105
|
+
)
|
|
106
|
+
return code
|
|
107
|
+
|
|
68
108
|
async def aio_state(self, timeout: float | None = None) -> ProcessState:
|
|
69
109
|
code = self._process.poll()
|
|
70
110
|
if code is None:
|
|
@@ -213,9 +253,9 @@ class LocalConnector(Connector):
|
|
|
213
253
|
return LocalProcessBuilder()
|
|
214
254
|
|
|
215
255
|
def resolve(self, path: Path, basepath: Path = None) -> str:
|
|
216
|
-
assert isinstance(path, PosixPath) or isinstance(
|
|
217
|
-
path
|
|
218
|
-
)
|
|
256
|
+
assert isinstance(path, PosixPath) or isinstance(path, WindowsPath), (
|
|
257
|
+
f"Unrecognized path {type(path)}"
|
|
258
|
+
)
|
|
219
259
|
if not basepath:
|
|
220
260
|
return str(path.absolute())
|
|
221
261
|
try:
|
experimaestro/core/arguments.py
CHANGED
|
@@ -8,7 +8,7 @@ from typing import Annotated
|
|
|
8
8
|
|
|
9
9
|
if TYPE_CHECKING:
|
|
10
10
|
import experimaestro.core.types
|
|
11
|
-
from experimaestro.core.
|
|
11
|
+
from experimaestro.core.partial import ParameterGroup
|
|
12
12
|
|
|
13
13
|
# Track deprecation warnings per module (max 10 per module)
|
|
14
14
|
_deprecation_warning_counts: dict[str, int] = {}
|
|
@@ -70,7 +70,7 @@ class Argument:
|
|
|
70
70
|
otherwise be issued. Defaults to False.
|
|
71
71
|
|
|
72
72
|
groups (set[ParameterGroup], optional): Set of groups this parameter
|
|
73
|
-
belongs to. Used with
|
|
73
|
+
belongs to. Used with partial to compute partial identifiers.
|
|
74
74
|
Defaults to None (empty set).
|
|
75
75
|
"""
|
|
76
76
|
required = (field_or_default is None) if required is None else required
|
|
@@ -97,9 +97,9 @@ class Argument:
|
|
|
97
97
|
self.groups = groups if groups else set()
|
|
98
98
|
|
|
99
99
|
if field_or_default is not None:
|
|
100
|
-
assert (
|
|
101
|
-
|
|
102
|
-
)
|
|
100
|
+
assert self.generator is None, (
|
|
101
|
+
"generator and field_or_default are exclusive options"
|
|
102
|
+
)
|
|
103
103
|
if isinstance(field_or_default, field):
|
|
104
104
|
self.ignore_generated = field_or_default.ignore_generated
|
|
105
105
|
# Allow field to override the overrides flag
|
|
@@ -125,9 +125,9 @@ class Argument:
|
|
|
125
125
|
self.default = field_or_default
|
|
126
126
|
self.ignore_default_in_identifier = True
|
|
127
127
|
|
|
128
|
-
assert (
|
|
129
|
-
|
|
130
|
-
)
|
|
128
|
+
assert not self.constant or self.default is not None, (
|
|
129
|
+
"Cannot be constant without default"
|
|
130
|
+
)
|
|
131
131
|
|
|
132
132
|
def __repr__(self):
|
|
133
133
|
return "Param[{name}:{type}]".format(**self.__dict__)
|
|
@@ -327,19 +327,19 @@ class field:
|
|
|
327
327
|
Useful for adding a field that changes the identifier but won't be used.
|
|
328
328
|
:param overrides: If True, suppress warning when overriding parent parameter
|
|
329
329
|
:param groups: List of ParameterGroup objects for partial identifiers.
|
|
330
|
-
Used with
|
|
330
|
+
Used with partial to compute identifiers that exclude certain groups.
|
|
331
331
|
"""
|
|
332
|
-
assert not (
|
|
333
|
-
|
|
334
|
-
)
|
|
332
|
+
assert not ((default is not None) and (default_factory is not None)), (
|
|
333
|
+
"default and default_factory are mutually exclusive options"
|
|
334
|
+
)
|
|
335
335
|
|
|
336
|
-
assert not (
|
|
337
|
-
|
|
338
|
-
)
|
|
336
|
+
assert not ((default is not None) and (ignore_default is not None)), (
|
|
337
|
+
"default and ignore_default are mutually exclusive options"
|
|
338
|
+
)
|
|
339
339
|
|
|
340
|
-
assert not (
|
|
341
|
-
|
|
342
|
-
)
|
|
340
|
+
assert not ((ignore_default is not None) and (default_factory is not None)), (
|
|
341
|
+
"ignore_default and default_factory are mutually exclusive options"
|
|
342
|
+
)
|
|
343
343
|
|
|
344
344
|
self.default_factory = default_factory
|
|
345
345
|
self.default = default
|
experimaestro/core/identifier.py
CHANGED
|
@@ -9,7 +9,7 @@ from typing import Optional, TYPE_CHECKING
|
|
|
9
9
|
from experimaestro.core.objects import Config, ConfigMixin
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
|
-
from experimaestro.core.
|
|
12
|
+
from experimaestro.core.partial import Partial
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class ConfigPath:
|
|
@@ -112,8 +112,8 @@ class IdentifierComputer:
|
|
|
112
112
|
config: The configuration to compute the identifier for
|
|
113
113
|
config_path: Used to track cycles when computing identifiers
|
|
114
114
|
version: Hash computation version (defaults to XPM_HASH_COMPUTER env var or 2)
|
|
115
|
-
|
|
116
|
-
by this
|
|
115
|
+
partial: If provided, only include parameters that are not excluded
|
|
116
|
+
by this Partial instance (for partial identifier computation)
|
|
117
117
|
"""
|
|
118
118
|
|
|
119
119
|
OBJECT_ID = b"\x00"
|
|
@@ -137,14 +137,14 @@ class IdentifierComputer:
|
|
|
137
137
|
config_path: ConfigPath,
|
|
138
138
|
*,
|
|
139
139
|
version=None,
|
|
140
|
-
|
|
140
|
+
partial: "Partial" = None,
|
|
141
141
|
):
|
|
142
142
|
# Hasher for parameters
|
|
143
143
|
self._hasher = hashlib.sha256()
|
|
144
144
|
self.config = config
|
|
145
145
|
self.config_path = config_path
|
|
146
146
|
self.version = version or int(os.environ.get("XPM_HASH_COMPUTER", 2))
|
|
147
|
-
self.
|
|
147
|
+
self.partial = partial
|
|
148
148
|
if hash_logger.isEnabledFor(logging.DEBUG):
|
|
149
149
|
hash_logger.debug(
|
|
150
150
|
"starting hash (%s): %s", hash(str(self.config)), self.config
|
|
@@ -279,9 +279,9 @@ class IdentifierComputer:
|
|
|
279
279
|
# Process arguments (sort by name to ensure uniqueness)
|
|
280
280
|
arguments = sorted(xpmtype.arguments.values(), key=lambda a: a.name)
|
|
281
281
|
for argument in arguments:
|
|
282
|
-
# Skip arguments excluded by
|
|
283
|
-
if self.
|
|
284
|
-
if self.
|
|
282
|
+
# Skip arguments excluded by partial (for partial identifiers)
|
|
283
|
+
if self.partial is not None:
|
|
284
|
+
if self.partial.is_excluded(argument.groups):
|
|
285
285
|
continue
|
|
286
286
|
|
|
287
287
|
# Ignored argument
|
|
@@ -382,7 +382,7 @@ class IdentifierComputer:
|
|
|
382
382
|
@staticmethod
|
|
383
383
|
def compute_partial(
|
|
384
384
|
config: "ConfigMixin",
|
|
385
|
-
|
|
385
|
+
partial: "Partial",
|
|
386
386
|
config_path: ConfigPath | None = None,
|
|
387
387
|
version=None,
|
|
388
388
|
) -> Identifier:
|
|
@@ -393,7 +393,7 @@ class IdentifierComputer:
|
|
|
393
393
|
partial identifier (and thus the same partial directory).
|
|
394
394
|
|
|
395
395
|
:param config: the configuration for which we compute the identifier
|
|
396
|
-
:param
|
|
396
|
+
:param partial: the Partial instance defining which groups
|
|
397
397
|
to include/exclude
|
|
398
398
|
:param config_path: used to track down cycles between configurations
|
|
399
399
|
:param version: version for the hash computation (None for the last one)
|
|
@@ -402,7 +402,7 @@ class IdentifierComputer:
|
|
|
402
402
|
|
|
403
403
|
with config_path.push(config):
|
|
404
404
|
computer = IdentifierComputer(
|
|
405
|
-
config, config_path, version=version,
|
|
405
|
+
config, config_path, version=version, partial=partial
|
|
406
406
|
)
|
|
407
407
|
computer.update(config, myself=True)
|
|
408
408
|
identifier = computer.identifier()
|
|
@@ -37,9 +37,10 @@ from ..context import SerializationContext, SerializedPath, SerializedPathLoader
|
|
|
37
37
|
if TYPE_CHECKING:
|
|
38
38
|
from ..callbacks import TaskEventListener
|
|
39
39
|
from ..identifier import Identifier
|
|
40
|
-
from ..
|
|
40
|
+
from ..partial import Partial
|
|
41
41
|
from experimaestro.scheduler.base import Job
|
|
42
42
|
from experimaestro.scheduler.workspace import RunMode
|
|
43
|
+
from experimaestro.scheduler.transient import TransientMode
|
|
43
44
|
from experimaestro.launchers import Launcher
|
|
44
45
|
from experimaestro.scheduler import Workspace
|
|
45
46
|
|
|
@@ -232,7 +233,7 @@ class ConfigInformation:
|
|
|
232
233
|
"""The configuration identifier (cached when sealed)"""
|
|
233
234
|
|
|
234
235
|
self._partial_identifiers: Dict[str, "Identifier"] = {}
|
|
235
|
-
"""Cached partial identifiers (keyed by
|
|
236
|
+
"""Cached partial identifiers (keyed by partial name)"""
|
|
236
237
|
|
|
237
238
|
self._validated = False
|
|
238
239
|
self._sealed = False
|
|
@@ -308,7 +309,7 @@ class ConfigInformation:
|
|
|
308
309
|
raise AttributeError(
|
|
309
310
|
f"Cannot set {k} to a configuration with generated values. "
|
|
310
311
|
"Here is the list of paths to help you: "
|
|
311
|
-
f"""{
|
|
312
|
+
f"""{", ".join(get_generated_paths(v, [k]))}"""
|
|
312
313
|
)
|
|
313
314
|
|
|
314
315
|
if not bypass and (
|
|
@@ -499,9 +500,9 @@ class ConfigInformation:
|
|
|
499
500
|
logging.warning("Ignoring %s", k)
|
|
500
501
|
value = argument.generator(self.context, config)
|
|
501
502
|
else:
|
|
502
|
-
assert (
|
|
503
|
-
|
|
504
|
-
)
|
|
503
|
+
assert False, (
|
|
504
|
+
"generator has either two parameters (context and config), or none"
|
|
505
|
+
)
|
|
505
506
|
config.__xpm__.set(k, value, bypass=True)
|
|
506
507
|
else:
|
|
507
508
|
value = config.__xpm__.values.get(k)
|
|
@@ -558,15 +559,15 @@ class ConfigInformation:
|
|
|
558
559
|
self._identifier = identifier
|
|
559
560
|
return identifier
|
|
560
561
|
|
|
561
|
-
def get_partial_identifier(self,
|
|
562
|
-
"""Get the partial identifier for a given
|
|
562
|
+
def get_partial_identifier(self, partial: "Partial") -> "Identifier":
|
|
563
|
+
"""Get the partial identifier for a given partial instance.
|
|
563
564
|
|
|
564
565
|
Partial identifiers exclude certain parameter groups, allowing
|
|
565
566
|
configurations that differ only in those groups to share the same
|
|
566
567
|
partial identifier (and thus the same partial directory).
|
|
567
568
|
|
|
568
569
|
Args:
|
|
569
|
-
|
|
570
|
+
partial: The Partial instance defining which groups
|
|
570
571
|
to include/exclude.
|
|
571
572
|
|
|
572
573
|
Returns:
|
|
@@ -574,11 +575,11 @@ class ConfigInformation:
|
|
|
574
575
|
"""
|
|
575
576
|
from ..identifier import IdentifierComputer
|
|
576
577
|
|
|
577
|
-
name =
|
|
578
|
+
name = partial.name
|
|
578
579
|
if name in self._partial_identifiers:
|
|
579
580
|
return self._partial_identifiers[name]
|
|
580
581
|
|
|
581
|
-
identifier = IdentifierComputer.compute_partial(self.pyobject,
|
|
582
|
+
identifier = IdentifierComputer.compute_partial(self.pyobject, partial)
|
|
582
583
|
|
|
583
584
|
if self._sealed:
|
|
584
585
|
self._partial_identifiers[name] = identifier
|
|
@@ -694,11 +695,17 @@ class ConfigInformation:
|
|
|
694
695
|
run_mode=None,
|
|
695
696
|
init_tasks: List["LightweightTask"] = [],
|
|
696
697
|
max_retries: Optional[int] = None,
|
|
698
|
+
transient: "TransientMode" = None,
|
|
697
699
|
):
|
|
698
700
|
from experimaestro.scheduler import experiment, JobContext
|
|
699
701
|
from experimaestro.scheduler.workspace import RunMode
|
|
702
|
+
from experimaestro.scheduler.transient import TransientMode
|
|
700
703
|
from ..callbacks import TaskEventListener
|
|
701
704
|
|
|
705
|
+
# Use default transient mode if not specified
|
|
706
|
+
if transient is None:
|
|
707
|
+
transient = TransientMode.NONE
|
|
708
|
+
|
|
702
709
|
# --- Prepare the object
|
|
703
710
|
|
|
704
711
|
if self.job:
|
|
@@ -718,6 +725,7 @@ class ConfigInformation:
|
|
|
718
725
|
workspace=workspace,
|
|
719
726
|
run_mode=run_mode,
|
|
720
727
|
max_retries=max_retries,
|
|
728
|
+
transient=transient,
|
|
721
729
|
)
|
|
722
730
|
|
|
723
731
|
# Validate the object
|
|
@@ -750,6 +758,15 @@ class ConfigInformation:
|
|
|
750
758
|
# Add predefined dependencies
|
|
751
759
|
self.job.dependencies.update(self.dependencies)
|
|
752
760
|
|
|
761
|
+
# Add partial dependencies for scheduler-level locking
|
|
762
|
+
from experimaestro.core.partial_lock import PartialJobResource
|
|
763
|
+
|
|
764
|
+
for name, partial_spec in self.xpmtype._partials.items():
|
|
765
|
+
partial_path = job_context.partial_path(partial_spec, self.pyobject)
|
|
766
|
+
resource = PartialJobResource.create(partial_path)
|
|
767
|
+
partial_dep = resource.dependency(name)
|
|
768
|
+
self.job.dependencies.add(partial_dep)
|
|
769
|
+
|
|
753
770
|
run_mode = (
|
|
754
771
|
workspace.run_mode if run_mode is None else run_mode
|
|
755
772
|
) or RunMode.NORMAL
|
|
@@ -807,9 +824,9 @@ class ConfigInformation:
|
|
|
807
824
|
def mark_output(self, config: "Config"):
|
|
808
825
|
"""Sets a dependency on the job"""
|
|
809
826
|
assert not isinstance(config, Task), "Cannot set a dependency on a task"
|
|
810
|
-
assert isinstance(
|
|
811
|
-
|
|
812
|
-
)
|
|
827
|
+
assert isinstance(config, ConfigMixin), (
|
|
828
|
+
"Only configurations can be marked as dependent on a task"
|
|
829
|
+
)
|
|
813
830
|
config.__xpm__.task = self.pyobject
|
|
814
831
|
return config
|
|
815
832
|
|
|
@@ -915,9 +932,9 @@ class ConfigInformation:
|
|
|
915
932
|
for argument, value in self.xpmvalues():
|
|
916
933
|
with context.push(argument.name) as var_path:
|
|
917
934
|
if argument.is_data and value is not None:
|
|
918
|
-
assert isinstance(
|
|
919
|
-
value
|
|
920
|
-
)
|
|
935
|
+
assert isinstance(value, Path), (
|
|
936
|
+
f"Data arguments should be paths (type is {type(value)})"
|
|
937
|
+
)
|
|
921
938
|
value = context.serialize(var_path, value)
|
|
922
939
|
|
|
923
940
|
jsonfields[argument.name] = ConfigInformation._outputjsonvalue(
|
|
@@ -975,7 +992,7 @@ class ConfigInformation:
|
|
|
975
992
|
|
|
976
993
|
Note: Tags are no longer stored in params.json. They are managed by the
|
|
977
994
|
experiment state provider (scoped to job_id, experiment_id, run_id) and
|
|
978
|
-
also stored in experiment
|
|
995
|
+
also stored in experiment status.json.
|
|
979
996
|
|
|
980
997
|
Arguments:
|
|
981
998
|
out {io.TextIOBase} -- The output stream
|
|
@@ -1023,9 +1040,9 @@ class ConfigInformation:
|
|
|
1023
1040
|
:return: a Config object, its instance or a tuple (instance, init_tasks) is return_tasks is True
|
|
1024
1041
|
"""
|
|
1025
1042
|
# Load
|
|
1026
|
-
assert not (
|
|
1027
|
-
as_instance and return_tasks
|
|
1028
|
-
)
|
|
1043
|
+
assert not (as_instance and return_tasks), (
|
|
1044
|
+
"Cannot set as_instance and return_tasks to True"
|
|
1045
|
+
)
|
|
1029
1046
|
if callable(path):
|
|
1030
1047
|
data_loader = path
|
|
1031
1048
|
else:
|
|
@@ -1087,7 +1104,7 @@ class ConfigInformation:
|
|
|
1087
1104
|
|
|
1088
1105
|
@overload
|
|
1089
1106
|
@staticmethod
|
|
1090
|
-
def fromParameters(
|
|
1107
|
+
def fromParameters(
|
|
1091
1108
|
definitions: List[Dict],
|
|
1092
1109
|
as_instance=True,
|
|
1093
1110
|
save_directory: Optional[Path] = None,
|
|
@@ -1097,7 +1114,7 @@ class ConfigInformation:
|
|
|
1097
1114
|
|
|
1098
1115
|
@overload
|
|
1099
1116
|
@staticmethod
|
|
1100
|
-
def fromParameters(
|
|
1117
|
+
def fromParameters(
|
|
1101
1118
|
definitions: List[Dict],
|
|
1102
1119
|
as_instance=False,
|
|
1103
1120
|
return_tasks=True,
|
|
@@ -1108,7 +1125,7 @@ class ConfigInformation:
|
|
|
1108
1125
|
|
|
1109
1126
|
@overload
|
|
1110
1127
|
@staticmethod
|
|
1111
|
-
def fromParameters(
|
|
1128
|
+
def fromParameters(
|
|
1112
1129
|
definitions: List[Dict],
|
|
1113
1130
|
as_instance=False,
|
|
1114
1131
|
save_directory: Optional[Path] = None,
|
|
@@ -1325,9 +1342,9 @@ class ConfigInformation:
|
|
|
1325
1342
|
# Unwrap the value if needed
|
|
1326
1343
|
setattr(o, name, v)
|
|
1327
1344
|
|
|
1328
|
-
assert (
|
|
1329
|
-
|
|
1330
|
-
)
|
|
1345
|
+
assert getattr(o, name) is v, (
|
|
1346
|
+
f"Problem with deserialization {name} of {o.__class__}"
|
|
1347
|
+
)
|
|
1331
1348
|
else:
|
|
1332
1349
|
o.__xpm__.set(name, v, bypass=True)
|
|
1333
1350
|
|
|
@@ -1619,13 +1636,51 @@ class ConfigMixin:
|
|
|
1619
1636
|
return f"Config[{self.__xpmtype__.identifier}]"
|
|
1620
1637
|
|
|
1621
1638
|
def __str__(self):
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1639
|
+
return self.to_str()
|
|
1640
|
+
|
|
1641
|
+
def to_str(self, max_depth: int = 3, _visited: set = None) -> str:
|
|
1642
|
+
"""Convert config to string with cycle detection and depth limiting.
|
|
1643
|
+
|
|
1644
|
+
Args:
|
|
1645
|
+
max_depth: Maximum depth for nested configs (default: 3)
|
|
1646
|
+
_visited: Internal set to track visited objects for cycle detection
|
|
1647
|
+
"""
|
|
1648
|
+
if _visited is None:
|
|
1649
|
+
_visited = set()
|
|
1650
|
+
|
|
1651
|
+
obj_id = id(self)
|
|
1652
|
+
if obj_id in _visited:
|
|
1653
|
+
return "..."
|
|
1654
|
+
|
|
1655
|
+
_visited.add(obj_id)
|
|
1656
|
+
try:
|
|
1657
|
+
if max_depth <= 0:
|
|
1658
|
+
return f"{self.__xpmtype__.value_type.__qualname__}(...)"
|
|
1659
|
+
|
|
1660
|
+
def format_value(value):
|
|
1661
|
+
if isinstance(value, Config):
|
|
1662
|
+
return value.to_str(max_depth - 1, _visited)
|
|
1663
|
+
elif isinstance(value, (list, tuple)):
|
|
1664
|
+
formatted = [format_value(v) for v in value]
|
|
1665
|
+
return (
|
|
1666
|
+
f"[{', '.join(formatted)}]"
|
|
1667
|
+
if isinstance(value, list)
|
|
1668
|
+
else f"({', '.join(formatted)})"
|
|
1669
|
+
)
|
|
1670
|
+
elif isinstance(value, dict):
|
|
1671
|
+
items = [f"{k}: {format_value(v)}" for k, v in value.items()]
|
|
1672
|
+
return "{" + ", ".join(items) + "}"
|
|
1673
|
+
return str(value)
|
|
1674
|
+
|
|
1675
|
+
params = ", ".join(
|
|
1676
|
+
[
|
|
1677
|
+
f"{key}={format_value(value)}"
|
|
1678
|
+
for key, value in self.__xpm__.values.items()
|
|
1679
|
+
]
|
|
1680
|
+
)
|
|
1681
|
+
return f"{self.__xpmtype__.value_type.__module__}.{self.__xpmtype__.value_type.__qualname__}({params})"
|
|
1682
|
+
finally:
|
|
1683
|
+
_visited.discard(obj_id)
|
|
1629
1684
|
|
|
1630
1685
|
def tag(self, name, value):
|
|
1631
1686
|
# Capture caller's location and pass to addtag
|
|
@@ -1674,9 +1729,9 @@ class ConfigMixin:
|
|
|
1674
1729
|
|
|
1675
1730
|
context = EmptyContext()
|
|
1676
1731
|
else:
|
|
1677
|
-
assert isinstance(
|
|
1678
|
-
context
|
|
1679
|
-
)
|
|
1732
|
+
assert isinstance(context, ConfigWalkContext), (
|
|
1733
|
+
f"{context.__class__} is not an instance of ConfigWalkContext"
|
|
1734
|
+
)
|
|
1680
1735
|
|
|
1681
1736
|
instance = self.__xpm__.fromConfig(context, objects=objects) # type: ignore
|
|
1682
1737
|
if keep:
|
|
@@ -1691,6 +1746,7 @@ class ConfigMixin:
|
|
|
1691
1746
|
run_mode: "RunMode" = None,
|
|
1692
1747
|
init_tasks: List["LightweightTask"] = [],
|
|
1693
1748
|
max_retries: Optional[int] = None,
|
|
1749
|
+
transient: "TransientMode" = None,
|
|
1694
1750
|
):
|
|
1695
1751
|
"""Submit this task
|
|
1696
1752
|
|
|
@@ -1698,6 +1754,7 @@ class ConfigMixin:
|
|
|
1698
1754
|
:param launcher: The launcher, defaults to None
|
|
1699
1755
|
:param run_mode: Run mode (if None, uses the workspace default)
|
|
1700
1756
|
:param max_retries: Maximum number of retries for resumable tasks that timeout (default: from workspace settings or 3)
|
|
1757
|
+
:param transient: Transient mode for intermediary tasks (see TransientMode)
|
|
1701
1758
|
:return: an object object
|
|
1702
1759
|
"""
|
|
1703
1760
|
return self.__xpm__.submit(
|
|
@@ -1706,6 +1763,7 @@ class ConfigMixin:
|
|
|
1706
1763
|
run_mode=run_mode,
|
|
1707
1764
|
init_tasks=init_tasks,
|
|
1708
1765
|
max_retries=max_retries,
|
|
1766
|
+
transient=transient,
|
|
1709
1767
|
)
|
|
1710
1768
|
|
|
1711
1769
|
def stdout(self):
|
|
@@ -1857,8 +1915,7 @@ class Config:
|
|
|
1857
1915
|
# Check that value class is a subclass of the config class
|
|
1858
1916
|
if not issubclass(value_class, cls):
|
|
1859
1917
|
raise TypeError(
|
|
1860
|
-
f"Value class {value_class.__name__} must be a subclass of "
|
|
1861
|
-
f"{cls.__name__}"
|
|
1918
|
+
f"Value class {value_class.__name__} must be a subclass of {cls.__name__}"
|
|
1862
1919
|
)
|
|
1863
1920
|
|
|
1864
1921
|
# Check that value class inherits from parent value classes
|
|
@@ -29,14 +29,14 @@ class ConfigWalkContext:
|
|
|
29
29
|
return self.path / self._configpath
|
|
30
30
|
return self.path
|
|
31
31
|
|
|
32
|
-
def partial_path(self,
|
|
33
|
-
"""Returns the partial directory path for a given
|
|
32
|
+
def partial_path(self, partial, config) -> Path:
|
|
33
|
+
"""Returns the partial directory path for a given partial instance.
|
|
34
34
|
|
|
35
35
|
This method should be overridden in subclasses that have access to
|
|
36
36
|
workspace information (like JobContext).
|
|
37
37
|
|
|
38
38
|
Args:
|
|
39
|
-
|
|
39
|
+
partial: The Partial instance defining which groups to exclude
|
|
40
40
|
config: The configuration to compute the partial identifier for
|
|
41
41
|
|
|
42
42
|
Returns:
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Partial identifier computation.
|
|
2
2
|
|
|
3
|
-
This module provides the `
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
This module provides the `partial` function and `Partial` class for defining
|
|
4
|
+
parameter subsets that compute partial identifiers. This enables sharing
|
|
5
|
+
directories (like checkpoints) across tasks that differ only in excluded
|
|
6
|
+
parameter groups.
|
|
7
7
|
|
|
8
8
|
Example:
|
|
9
9
|
iter_group = param_group("iter")
|
|
10
10
|
|
|
11
11
|
class Learn(Task):
|
|
12
|
-
checkpoints =
|
|
12
|
+
checkpoints = partial(exclude_groups=[iter_group])
|
|
13
13
|
|
|
14
14
|
max_iter: Param[int] = field(groups=[iter_group])
|
|
15
15
|
learning_rate: Param[float]
|
|
@@ -36,7 +36,7 @@ class ParameterGroup:
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def param_group(name: str) -> ParameterGroup:
|
|
39
|
-
"""Create a parameter group for use with
|
|
39
|
+
"""Create a parameter group for use with partial identifiers.
|
|
40
40
|
|
|
41
41
|
Parameter groups allow computing partial identifiers that exclude
|
|
42
42
|
certain parameters, enabling shared directories across related tasks.
|
|
@@ -56,10 +56,10 @@ def param_group(name: str) -> ParameterGroup:
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
@dataclass
|
|
59
|
-
class
|
|
59
|
+
class Partial:
|
|
60
60
|
"""Defines a subset of parameters for partial identifier computation.
|
|
61
61
|
|
|
62
|
-
A
|
|
62
|
+
A Partial instance defines which parameter groups to include or exclude
|
|
63
63
|
when computing a partial identifier. This enables sharing directories
|
|
64
64
|
(like checkpoints) across experiments that only differ in excluded groups.
|
|
65
65
|
|
|
@@ -123,16 +123,16 @@ class Subparameters:
|
|
|
123
123
|
return False
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
def
|
|
126
|
+
def partial(
|
|
127
127
|
*,
|
|
128
128
|
exclude_groups: list[ParameterGroup] | None = None,
|
|
129
129
|
include_groups: list[ParameterGroup] | None = None,
|
|
130
130
|
exclude_no_group: bool = False,
|
|
131
131
|
exclude_all: bool = False,
|
|
132
|
-
) ->
|
|
133
|
-
"""Create a
|
|
132
|
+
) -> Partial:
|
|
133
|
+
"""Create a partial specification for partial identifier computation.
|
|
134
134
|
|
|
135
|
-
|
|
135
|
+
Partials allow tasks to share directories when they differ only
|
|
136
136
|
in certain parameter groups (e.g., training hyperparameters).
|
|
137
137
|
|
|
138
138
|
Example::
|
|
@@ -146,7 +146,7 @@ def subparameters(
|
|
|
146
146
|
checkpoint: Meta[Path] = field(
|
|
147
147
|
default_factory=PathGenerator(
|
|
148
148
|
"model.pt",
|
|
149
|
-
|
|
149
|
+
partial=partial(exclude_groups=[training_group])
|
|
150
150
|
)
|
|
151
151
|
)
|
|
152
152
|
|
|
@@ -154,9 +154,9 @@ def subparameters(
|
|
|
154
154
|
:param include_groups: Parameter groups to always include (overrides exclusion)
|
|
155
155
|
:param exclude_no_group: If True, exclude parameters with no group assigned
|
|
156
156
|
:param exclude_all: If True, exclude all parameters by default
|
|
157
|
-
:return: A
|
|
157
|
+
:return: A Partial object
|
|
158
158
|
"""
|
|
159
|
-
return
|
|
159
|
+
return Partial(
|
|
160
160
|
exclude_groups=set(exclude_groups or []),
|
|
161
161
|
include_groups=set(include_groups or []),
|
|
162
162
|
exclude_no_group=exclude_no_group,
|