experimaestro 1.5.7__py3-none-any.whl → 1.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/cli/jobs.py +8 -2
- experimaestro/connectors/ssh.py +2 -2
- experimaestro/core/types.py +8 -3
- experimaestro/experiments/cli.py +17 -20
- experimaestro/launcherfinder/__init__.py +1 -1
- experimaestro/launcherfinder/base.py +2 -18
- experimaestro/launcherfinder/registry.py +22 -129
- experimaestro/launchers/direct.py +0 -47
- experimaestro/scheduler/base.py +1 -1
- experimaestro/tests/launchers/config_slurm/launchers.py +25 -0
- experimaestro/tests/test_findlauncher.py +1 -1
- experimaestro/tokens.py +8 -8
- experimaestro/utils/resources.py +5 -1
- {experimaestro-1.5.7.dist-info → experimaestro-1.5.9.dist-info}/METADATA +1 -1
- {experimaestro-1.5.7.dist-info → experimaestro-1.5.9.dist-info}/RECORD +18 -21
- {experimaestro-1.5.7.dist-info → experimaestro-1.5.9.dist-info}/entry_points.txt +0 -4
- experimaestro/launchers/slurm/cli.py +0 -29
- experimaestro/launchers/slurm/configuration.py +0 -597
- experimaestro/tests/launchers/config_slurm/launchers.yaml +0 -134
- experimaestro/utils/yaml.py +0 -202
- {experimaestro-1.5.7.dist-info → experimaestro-1.5.9.dist-info}/LICENSE +0 -0
- {experimaestro-1.5.7.dist-info → experimaestro-1.5.9.dist-info}/WHEEL +0 -0
experimaestro/cli/jobs.py
CHANGED
|
@@ -90,8 +90,14 @@ def process(
|
|
|
90
90
|
if kill:
|
|
91
91
|
if perform:
|
|
92
92
|
process = info.getprocess()
|
|
93
|
-
|
|
94
|
-
|
|
93
|
+
if process is None:
|
|
94
|
+
cprint(
|
|
95
|
+
"internal error – no process could be retrieved",
|
|
96
|
+
"red",
|
|
97
|
+
)
|
|
98
|
+
else:
|
|
99
|
+
cprint(f"KILLING {process}", "light_red")
|
|
100
|
+
process.kill()
|
|
95
101
|
else:
|
|
96
102
|
print("KILLING (not performing)", process)
|
|
97
103
|
print(
|
experimaestro/connectors/ssh.py
CHANGED
|
@@ -3,7 +3,7 @@ from pathlib import Path, _posix_flavour
|
|
|
3
3
|
import io
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
|
-
from experimaestro.launcherfinder import LauncherRegistry
|
|
6
|
+
from experimaestro.launcherfinder import LauncherRegistry
|
|
7
7
|
from fabric import Connection
|
|
8
8
|
from invoke import Promise
|
|
9
9
|
import invoke.exceptions
|
|
@@ -132,7 +132,7 @@ class SshPath(Path):
|
|
|
132
132
|
|
|
133
133
|
|
|
134
134
|
@dataclass
|
|
135
|
-
class SshConfiguration
|
|
135
|
+
class SshConfiguration:
|
|
136
136
|
hostname: str
|
|
137
137
|
|
|
138
138
|
def create(self, registry: LauncherRegistry):
|
experimaestro/core/types.py
CHANGED
|
@@ -325,12 +325,17 @@ class ObjectType(Type):
|
|
|
325
325
|
|
|
326
326
|
# Get the module
|
|
327
327
|
module = inspect.getmodule(self.originaltype)
|
|
328
|
-
|
|
328
|
+
self._module = module.__name__
|
|
329
|
+
self._package = module.__package__
|
|
330
|
+
|
|
331
|
+
if self._module and self._package:
|
|
329
332
|
self._file = None
|
|
330
333
|
else:
|
|
331
334
|
self._file = Path(inspect.getfile(self.originaltype)).absolute()
|
|
332
|
-
|
|
333
|
-
|
|
335
|
+
|
|
336
|
+
assert (
|
|
337
|
+
self._module and self._package
|
|
338
|
+
) or self._file, f"Could not detect module/file for {self.originaltype}"
|
|
334
339
|
|
|
335
340
|
# The class of the object
|
|
336
341
|
|
experimaestro/experiments/cli.py
CHANGED
|
@@ -1,26 +1,28 @@
|
|
|
1
|
+
import imp
|
|
1
2
|
import inspect
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
import sys
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import Any, List, Optional, Protocol, Tuple
|
|
7
|
+
from typing import Any, List, Optional, Protocol, Tuple
|
|
7
8
|
|
|
8
9
|
import click
|
|
9
10
|
import omegaconf
|
|
10
11
|
import yaml
|
|
12
|
+
from omegaconf import OmegaConf, SCMode
|
|
13
|
+
from termcolor import cprint
|
|
14
|
+
|
|
11
15
|
from experimaestro import LauncherRegistry, RunMode, experiment
|
|
12
|
-
from experimaestro.experiments.configuration import ConfigurationBase
|
|
13
16
|
from experimaestro.exceptions import HandledException
|
|
17
|
+
from experimaestro.experiments.configuration import ConfigurationBase
|
|
14
18
|
from experimaestro.settings import find_workspace
|
|
15
|
-
from omegaconf import OmegaConf, SCMode
|
|
16
|
-
from termcolor import cprint
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
class ExperimentHelper:
|
|
20
22
|
"""Helper for experiments"""
|
|
21
23
|
|
|
22
|
-
# The experiment
|
|
23
24
|
xp: experiment
|
|
25
|
+
"""The experiment object"""
|
|
24
26
|
|
|
25
27
|
#: Run function
|
|
26
28
|
callable: "ExperimentCallable"
|
|
@@ -175,28 +177,23 @@ def experiments_cli( # noqa: C901
|
|
|
175
177
|
xp_file = Path(xp_file)
|
|
176
178
|
if not xp_file.exists() and xp_file.suffix != ".py":
|
|
177
179
|
xp_file = xp_file.with_suffix(".py")
|
|
178
|
-
xp_file = Path(yaml_file).parent / xp_file
|
|
179
|
-
|
|
180
|
-
with open(xp_file, "r") as f:
|
|
181
|
-
source = f.read()
|
|
182
|
-
if sys.version_info < (3, 9):
|
|
183
|
-
the__file__ = str(xp_file)
|
|
184
|
-
else:
|
|
185
|
-
the__file__ = str(xp_file.absolute())
|
|
186
|
-
|
|
187
|
-
code = compile(source, filename=the__file__, mode="exec")
|
|
188
|
-
_locals: Dict[str, Any] = {}
|
|
180
|
+
xp_file: Path = Path(yaml_file).parent / xp_file
|
|
189
181
|
|
|
190
|
-
|
|
182
|
+
# --- Finds the "run" function
|
|
191
183
|
try:
|
|
192
|
-
|
|
184
|
+
sys.path.append(str(xp_file.parent.absolute()))
|
|
185
|
+
with open(xp_file) as src:
|
|
186
|
+
module_name = xp_file.with_suffix("").name
|
|
187
|
+
mod = imp.load_module(
|
|
188
|
+
module_name, src, str(xp_file.absolute()), (".py", "r", imp.PY_SOURCE)
|
|
189
|
+
)
|
|
190
|
+
helper = getattr(mod, "run", None)
|
|
193
191
|
finally:
|
|
194
192
|
sys.path.pop()
|
|
195
193
|
|
|
196
194
|
# --- ... and runs it
|
|
197
|
-
helper = _locals.get("run", None)
|
|
198
195
|
if helper is None:
|
|
199
|
-
raise ValueError(f"Could not find run function in {
|
|
196
|
+
raise ValueError(f"Could not find run function in {xp_file}")
|
|
200
197
|
|
|
201
198
|
if not isinstance(helper, ExperimentHelper):
|
|
202
199
|
helper = ExperimentHelper(helper)
|
|
@@ -1,33 +1,17 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import TYPE_CHECKING, List, Optional
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
3
2
|
|
|
4
|
-
from experimaestro.utils.yaml import YAMLDataClass
|
|
5
|
-
from .specs import HostRequirement
|
|
6
3
|
|
|
7
4
|
if TYPE_CHECKING:
|
|
8
|
-
from experimaestro.launchers import Launcher
|
|
9
5
|
from experimaestro.connectors import Connector
|
|
10
6
|
from experimaestro.tokens import Token
|
|
11
7
|
from .registry import LauncherRegistry
|
|
12
8
|
|
|
13
9
|
|
|
14
|
-
class LauncherConfiguration:
|
|
15
|
-
tags: List[str]
|
|
16
|
-
weight: int
|
|
17
|
-
|
|
18
|
-
"""Generic class for a launcher configuration"""
|
|
19
|
-
|
|
20
|
-
def get(
|
|
21
|
-
self, registry: "LauncherRegistry", requirement: HostRequirement
|
|
22
|
-
) -> Optional["Launcher"]:
|
|
23
|
-
raise NotImplementedError(f"For {self.__class__}")
|
|
24
|
-
|
|
25
|
-
|
|
26
10
|
class ConnectorConfiguration:
|
|
27
11
|
def create(self, registry: "LauncherRegistry") -> "Connector":
|
|
28
12
|
raise NotImplementedError(f"For {self.__class__}")
|
|
29
13
|
|
|
30
14
|
|
|
31
|
-
class TokenConfiguration
|
|
15
|
+
class TokenConfiguration:
|
|
32
16
|
def create(self, registry: "LauncherRegistry", identifier: str) -> "Token":
|
|
33
17
|
raise NotImplementedError(f"For {self.__class__}")
|
|
@@ -1,27 +1,15 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Configuration registers
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar, Dict, Optional, Set, Type, Union
|
|
2
4
|
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
import itertools
|
|
5
|
-
from types import new_class
|
|
6
|
-
from typing import ClassVar, Dict, List, Optional, Set, Type, Union
|
|
7
|
-
from experimaestro import Annotated
|
|
8
5
|
from pathlib import Path
|
|
9
6
|
import typing
|
|
7
|
+
from omegaconf import DictConfig, OmegaConf, SCMode
|
|
10
8
|
import pkg_resources
|
|
11
|
-
import humanfriendly
|
|
12
|
-
import yaml
|
|
13
|
-
from yaml import Loader, Dumper
|
|
14
9
|
from experimaestro.utils import logger
|
|
15
|
-
from experimaestro.utils.yaml import (
|
|
16
|
-
Initialize,
|
|
17
|
-
YAMLDataClass,
|
|
18
|
-
YAMLException,
|
|
19
|
-
YAMLList,
|
|
20
|
-
add_path_resolvers,
|
|
21
|
-
)
|
|
22
10
|
|
|
23
|
-
from .base import
|
|
24
|
-
from .specs import
|
|
11
|
+
from .base import ConnectorConfiguration, TokenConfiguration
|
|
12
|
+
from .specs import HostRequirement
|
|
25
13
|
|
|
26
14
|
if typing.TYPE_CHECKING:
|
|
27
15
|
from experimaestro.launchers import Launcher
|
|
@@ -32,80 +20,20 @@ class LauncherNotFoundError(Exception):
|
|
|
32
20
|
pass
|
|
33
21
|
|
|
34
22
|
|
|
35
|
-
@dataclass
|
|
36
|
-
class GPU(YAMLDataClass):
|
|
37
|
-
"""Represents a GPU"""
|
|
38
|
-
|
|
39
|
-
model: str
|
|
40
|
-
count: int
|
|
41
|
-
memory: Annotated[int, Initialize(humanfriendly.parse_size)]
|
|
42
|
-
|
|
43
|
-
def to_spec(self):
|
|
44
|
-
return [CudaSpecification(self.memory, self.model) for _ in range(self.count)]
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class GPUList(YAMLList[GPU]):
|
|
48
|
-
"""Represents a list of GPUs"""
|
|
49
|
-
|
|
50
|
-
def __repr__(self):
|
|
51
|
-
return f"GPUs({super().__repr__()})"
|
|
52
|
-
|
|
53
|
-
def to_spec(self) -> List[CudaSpecification]:
|
|
54
|
-
return list(itertools.chain(*[gpu.to_spec() for gpu in self]))
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
@dataclass
|
|
58
|
-
class CPU(YAMLDataClass):
|
|
59
|
-
"""Represents a CPU"""
|
|
60
|
-
|
|
61
|
-
memory: Annotated[int, Initialize(humanfriendly.parse_size)] = 0
|
|
62
|
-
cores: int = 1
|
|
63
|
-
|
|
64
|
-
def to_spec(self):
|
|
65
|
-
return CPUSpecification(self.memory, self.cores)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
@dataclass
|
|
69
|
-
class Host(YAMLDataClass):
|
|
70
|
-
name: str
|
|
71
|
-
gpus: List[GPU]
|
|
72
|
-
launchers: List[str]
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
Launchers = Dict[str, List[LauncherConfiguration]]
|
|
76
23
|
Connectors = Dict[str, Dict[str, ConnectorConfiguration]]
|
|
77
24
|
Tokens = Dict[str, Dict[str, TokenConfiguration]]
|
|
78
25
|
|
|
79
26
|
|
|
80
|
-
def
|
|
81
|
-
return new_class("LauncherLoader", (yaml.FullLoader,)) # type: ignore
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def load_yaml(loader_cls: Type[Loader], path: Path):
|
|
27
|
+
def load_yaml(schema, path: Path):
|
|
85
28
|
if not path.is_file():
|
|
86
|
-
return
|
|
29
|
+
return {}
|
|
87
30
|
|
|
88
|
-
logger.warning(
|
|
89
|
-
"Using YAML file to configure launchers is deprecated. Please remove %s using launchers.py",
|
|
90
|
-
path,
|
|
91
|
-
)
|
|
92
31
|
logger.debug("Loading %s", path)
|
|
93
32
|
with path.open("rt") as fp:
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
loader.dispose()
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def unknown_error(loader: Loader, node):
|
|
102
|
-
raise YAMLException(
|
|
103
|
-
"",
|
|
104
|
-
node.start_mark.name,
|
|
105
|
-
node.start_mark.line,
|
|
106
|
-
node.start_mark.column,
|
|
107
|
-
f"No handler defined for key {node}",
|
|
108
|
-
)
|
|
33
|
+
cfg = OmegaConf.load(fp)
|
|
34
|
+
return OmegaConf.to_container(
|
|
35
|
+
OmegaConf.merge(cfg, schema), structured_config_mode=SCMode.INSTANTIATE
|
|
36
|
+
)
|
|
109
37
|
|
|
110
38
|
|
|
111
39
|
class LauncherRegistry:
|
|
@@ -132,27 +60,14 @@ class LauncherRegistry:
|
|
|
132
60
|
LauncherRegistry.CURRENT_CONFIG_DIR = config_dir
|
|
133
61
|
|
|
134
62
|
def __init__(self, basepath: Path):
|
|
135
|
-
self.
|
|
136
|
-
self.
|
|
137
|
-
self.TokenLoader: Type[Loader] = new_loader("TokenLoader")
|
|
138
|
-
self.Dumper: Type[Dumper] = new_class("CustomDumper", (Dumper,), {})
|
|
63
|
+
self.connectors_schema = DictConfig({})
|
|
64
|
+
self.tokens_schema = DictConfig({})
|
|
139
65
|
self.find_launcher_fn = None
|
|
140
66
|
|
|
141
|
-
# Add safeguards
|
|
142
|
-
add_path_resolvers(
|
|
143
|
-
self.LauncherLoader,
|
|
144
|
-
[],
|
|
145
|
-
Dict[str, LauncherConfiguration],
|
|
146
|
-
dumper=self.Dumper,
|
|
147
|
-
)
|
|
148
|
-
|
|
149
67
|
# Use entry points for connectors and launchers
|
|
150
68
|
for entry_point in pkg_resources.iter_entry_points("experimaestro.connectors"):
|
|
151
69
|
entry_point.load().init_registry(self)
|
|
152
70
|
|
|
153
|
-
for entry_point in pkg_resources.iter_entry_points("experimaestro.launchers"):
|
|
154
|
-
entry_point.load().init_registry(self)
|
|
155
|
-
|
|
156
71
|
for entry_point in pkg_resources.iter_entry_points("experimaestro.tokens"):
|
|
157
72
|
entry_point.load().init_registry(self)
|
|
158
73
|
|
|
@@ -172,32 +87,16 @@ class LauncherRegistry:
|
|
|
172
87
|
logger.warn("No find_launcher() function was found in %s", launchers_py)
|
|
173
88
|
|
|
174
89
|
# Read the configuration file
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
)
|
|
178
|
-
self.launchers = sorted(
|
|
179
|
-
itertools.chain(*launchers.values()), key=lambda launcher: -launcher.weight
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
self.connectors: Connectors = (
|
|
183
|
-
load_yaml(self.ConnectorLoader, basepath / "connectors.yaml") or {}
|
|
184
|
-
)
|
|
185
|
-
self.tokens: Tokens = (
|
|
186
|
-
load_yaml(self.TokenLoader, basepath / "tokens.yaml") or {}
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
def register_launcher(self, identifier: str, cls: Type[YAMLDataClass]):
|
|
190
|
-
add_path_resolvers(
|
|
191
|
-
self.LauncherLoader, [identifier, None], cls, dumper=self.Dumper
|
|
90
|
+
self.connectors = load_yaml(
|
|
91
|
+
self.connectors_schema, basepath / "connectors.yaml"
|
|
192
92
|
)
|
|
93
|
+
self.tokens = load_yaml(self.tokens_schema, basepath / "tokens.yaml")
|
|
193
94
|
|
|
194
|
-
def register_connector(self, identifier: str, cls: Type
|
|
195
|
-
|
|
196
|
-
self.ConnectorLoader, [identifier, None], cls, dumper=self.Dumper
|
|
197
|
-
)
|
|
95
|
+
def register_connector(self, identifier: str, cls: Type):
|
|
96
|
+
self.connectors_schema.merge_with({identifier: cls})
|
|
198
97
|
|
|
199
|
-
def register_token(self, identifier: str, cls: Type
|
|
200
|
-
|
|
98
|
+
def register_token(self, identifier: str, cls: Type):
|
|
99
|
+
self.tokens_schema.merge_with({identifier: cls})
|
|
201
100
|
|
|
202
101
|
def getToken(self, identifier: str) -> "Token":
|
|
203
102
|
for tokens in self.tokens.values():
|
|
@@ -227,7 +126,7 @@ class LauncherRegistry:
|
|
|
227
126
|
tags: Restrict the launchers to those containing one of the specified tags
|
|
228
127
|
"""
|
|
229
128
|
|
|
230
|
-
if
|
|
129
|
+
if self.find_launcher_fn is None:
|
|
231
130
|
logger.info("No launchers.yaml file: using local host ")
|
|
232
131
|
from experimaestro.launchers.direct import DirectLauncher
|
|
233
132
|
from experimaestro.connectors.local import LocalConnector
|
|
@@ -250,12 +149,6 @@ class LauncherRegistry:
|
|
|
250
149
|
if launcher := self.find_launcher_fn(spec, tags):
|
|
251
150
|
return launcher
|
|
252
151
|
|
|
253
|
-
# We have registered launchers
|
|
254
|
-
for spec in specs:
|
|
255
|
-
for handler in self.launchers:
|
|
256
|
-
if (not tags) or any((tag in tags) for tag in handler.tags):
|
|
257
|
-
if launcher := handler.get(self, spec):
|
|
258
|
-
return launcher
|
|
259
152
|
return None
|
|
260
153
|
|
|
261
154
|
|
|
@@ -1,15 +1,3 @@
|
|
|
1
|
-
from dataclasses import dataclass, field
|
|
2
|
-
from functools import cached_property
|
|
3
|
-
from typing import Dict, List, Optional
|
|
4
|
-
from experimaestro.launcherfinder import (
|
|
5
|
-
LauncherConfiguration,
|
|
6
|
-
LauncherRegistry,
|
|
7
|
-
HostRequirement,
|
|
8
|
-
)
|
|
9
|
-
from experimaestro.launcherfinder.registry import CPU, GPUList, YAMLDataClass
|
|
10
|
-
from experimaestro.launcherfinder.specs import (
|
|
11
|
-
HostSpecification,
|
|
12
|
-
)
|
|
13
1
|
from experimaestro.scriptbuilder import PythonScriptBuilder
|
|
14
2
|
from . import Launcher
|
|
15
3
|
|
|
@@ -18,40 +6,5 @@ class DirectLauncher(Launcher):
|
|
|
18
6
|
def scriptbuilder(self):
|
|
19
7
|
return PythonScriptBuilder()
|
|
20
8
|
|
|
21
|
-
@staticmethod
|
|
22
|
-
def init_registry(registry: LauncherRegistry):
|
|
23
|
-
registry.register_launcher("local", DirectLauncherConfiguration)
|
|
24
|
-
|
|
25
9
|
def __str__(self):
|
|
26
10
|
return f"DirectLauncher({self.connector})"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
@dataclass
|
|
30
|
-
class DirectLauncherConfiguration(YAMLDataClass, LauncherConfiguration):
|
|
31
|
-
connector: str = "connector"
|
|
32
|
-
cpu: CPU = field(default_factory=CPU)
|
|
33
|
-
gpus: GPUList = field(default_factory=GPUList)
|
|
34
|
-
tokens: Optional[Dict[str, int]] = None
|
|
35
|
-
tags: List[str] = field(default_factory=lambda: [])
|
|
36
|
-
weight: int = 0
|
|
37
|
-
disable: bool = False
|
|
38
|
-
|
|
39
|
-
@cached_property
|
|
40
|
-
def spec(self) -> HostSpecification:
|
|
41
|
-
return HostSpecification(cpu=self.cpu.to_spec(), cuda=self.gpus.to_spec())
|
|
42
|
-
|
|
43
|
-
def get(
|
|
44
|
-
self, registry: LauncherRegistry, requirement: "HostRequirement"
|
|
45
|
-
) -> Optional[Launcher]:
|
|
46
|
-
if requirement.match(self.spec):
|
|
47
|
-
launcher = DirectLauncher(connector=registry.getConnector(self.connector))
|
|
48
|
-
if self.tokens:
|
|
49
|
-
for token_identifier, count in self.tokens.items():
|
|
50
|
-
token = registry.getToken(token_identifier)
|
|
51
|
-
# TODO: handle the case where this is not a CounterToken
|
|
52
|
-
launcher.addListener(
|
|
53
|
-
lambda job: job.dependencies.add(token.dependency(count))
|
|
54
|
-
)
|
|
55
|
-
return launcher
|
|
56
|
-
|
|
57
|
-
return None
|
experimaestro/scheduler/base.py
CHANGED
|
@@ -889,7 +889,7 @@ class experiment:
|
|
|
889
889
|
"""Shortcut to set the environment value"""
|
|
890
890
|
if override or name not in self.workspace.env:
|
|
891
891
|
logging.info("Setting environment: %s=%s", name, value)
|
|
892
|
-
self.
|
|
892
|
+
self.workspace.env[name] = value
|
|
893
893
|
|
|
894
894
|
def token(self, name: str, count: int):
|
|
895
895
|
"""Returns a token for this experiment
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from experimaestro.launcherfinder.specs import (
|
|
2
|
+
CPUSpecification,
|
|
3
|
+
CudaSpecification,
|
|
4
|
+
HostRequirement,
|
|
5
|
+
HostSpecification,
|
|
6
|
+
)
|
|
7
|
+
from experimaestro.launchers.slurm.base import SlurmLauncher, SlurmOptions
|
|
8
|
+
|
|
9
|
+
GIGA = 1024**3
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def find_launcher(requirements: HostRequirement, tags: set[str] = set()):
|
|
13
|
+
host = HostSpecification(
|
|
14
|
+
cpu=CPUSpecification(cores=16, memory=32 * GIGA),
|
|
15
|
+
max_duration=3600 * 24 * 10,
|
|
16
|
+
cuda=[CudaSpecification(memory=32 * GIGA) for _ in range(4)],
|
|
17
|
+
)
|
|
18
|
+
if match := requirements.match(host):
|
|
19
|
+
return SlurmLauncher(
|
|
20
|
+
options=SlurmOptions(
|
|
21
|
+
gpus_per_node=len(match.requirement.cuda_gpus),
|
|
22
|
+
partition="hard,electronic",
|
|
23
|
+
constraint="(A6000&GPU2&GPUM48G)|(A6000&GPU3&GPUM48G)|(RTX&GPU4&GPUM48G)",
|
|
24
|
+
)
|
|
25
|
+
)
|
|
@@ -81,7 +81,7 @@ def slurm_constraint_split(constraint: str):
|
|
|
81
81
|
def test_findlauncher_slurm():
|
|
82
82
|
path = ResourcePathWrapper.create(f"{__package__ }.launchers", "config_slurm")
|
|
83
83
|
|
|
84
|
-
assert (path / "launchers.
|
|
84
|
+
assert (path / "launchers.py").is_file()
|
|
85
85
|
|
|
86
86
|
registry = LauncherRegistry(path)
|
|
87
87
|
launcher = registry.find("""duration=4 days & cuda(mem=24G) * 2""")
|
experimaestro/tokens.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
"""Tokens are special types of dependency controlling the access to
|
|
1
|
+
"""Tokens are special types of dependency controlling the access to
|
|
2
2
|
a computational resource (e.g. number of launched jobs, etc.)
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
import sys
|
|
7
7
|
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from omegaconf import DictConfig
|
|
8
10
|
from experimaestro.core.objects import Config
|
|
9
11
|
import fasteners
|
|
10
12
|
import threading
|
|
@@ -14,7 +16,6 @@ from typing import Dict
|
|
|
14
16
|
from experimaestro.launcherfinder.base import TokenConfiguration
|
|
15
17
|
|
|
16
18
|
from experimaestro.launcherfinder.registry import LauncherRegistry
|
|
17
|
-
from experimaestro.utils.yaml import YAMLDict
|
|
18
19
|
|
|
19
20
|
from .ipc import ipcom
|
|
20
21
|
from .locking import Lock, LockError
|
|
@@ -87,7 +88,7 @@ class TokenFile:
|
|
|
87
88
|
try:
|
|
88
89
|
self.path = path
|
|
89
90
|
with path.open("rt") as fp:
|
|
90
|
-
count, self.uri = [
|
|
91
|
+
count, self.uri = [line.strip() for line in fp.readlines()]
|
|
91
92
|
self.count = int(count)
|
|
92
93
|
except Exception:
|
|
93
94
|
logging.exception("Error while reading %s", self.path)
|
|
@@ -183,7 +184,10 @@ class CounterToken(Token, FileSystemEventHandler):
|
|
|
183
184
|
|
|
184
185
|
@staticmethod
|
|
185
186
|
def init_registry(registry: LauncherRegistry):
|
|
186
|
-
registry.register_token(
|
|
187
|
+
registry.register_token(
|
|
188
|
+
"countertoken",
|
|
189
|
+
DictConfig({}, key_type=str, element_type=CounterConfiguration),
|
|
190
|
+
)
|
|
187
191
|
|
|
188
192
|
def __init__(self, name: str, path: Path, count: int, force=True):
|
|
189
193
|
"""[summary]
|
|
@@ -456,7 +460,3 @@ class CounterConfiguration(TokenConfiguration):
|
|
|
456
460
|
from experimaestro.connectors.local import LocalConnector
|
|
457
461
|
|
|
458
462
|
return LocalConnector.instance().createtoken(identifier, self.tokens)
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
class CounterTokenConfiguration(YAMLDict[CounterConfiguration]):
|
|
462
|
-
pass
|
experimaestro/utils/resources.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from contextlib import contextmanager
|
|
2
|
+
from os import PathLike
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Union
|
|
4
5
|
from importlib import resources
|
|
5
6
|
from experimaestro.compat import cached_property
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
class ResourcePathWrapper:
|
|
9
|
+
class ResourcePathWrapper(PathLike):
|
|
9
10
|
"""Simple wrapper for resource path"""
|
|
10
11
|
|
|
11
12
|
def __init__(self, path: Path):
|
|
@@ -30,6 +31,9 @@ class ResourcePathWrapper:
|
|
|
30
31
|
def is_file(self):
|
|
31
32
|
return resources.is_resource(self.package, self.name)
|
|
32
33
|
|
|
34
|
+
def __fspath__(self):
|
|
35
|
+
return resources.path(self.package, self.name).__fspath__()
|
|
36
|
+
|
|
33
37
|
@contextmanager
|
|
34
38
|
def open(self, *args, **kwargs):
|
|
35
39
|
with resources.path(self.package, self.name) as path:
|