experimaestro 1.5.4__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +0 -1
- experimaestro/__main__.py +3 -423
- experimaestro/cli/__init__.py +312 -0
- experimaestro/{filter.py → cli/filter.py} +4 -4
- experimaestro/cli/jobs.py +261 -0
- experimaestro/click.py +0 -35
- experimaestro/connectors/ssh.py +26 -7
- experimaestro/core/objects.py +13 -6
- experimaestro/core/types.py +8 -3
- experimaestro/experiments/cli.py +97 -63
- experimaestro/experiments/configuration.py +7 -1
- experimaestro/launcherfinder/__init__.py +1 -1
- experimaestro/launcherfinder/base.py +2 -18
- experimaestro/launcherfinder/registry.py +22 -129
- experimaestro/launchers/direct.py +0 -47
- experimaestro/launchers/slurm/base.py +3 -1
- experimaestro/notifications.py +24 -8
- experimaestro/run.py +21 -3
- experimaestro/scheduler/base.py +26 -15
- experimaestro/scheduler/workspace.py +26 -8
- experimaestro/scriptbuilder.py +5 -1
- experimaestro/settings.py +43 -5
- experimaestro/tests/launchers/config_slurm/launchers.py +25 -0
- experimaestro/tests/test_findlauncher.py +1 -1
- experimaestro/tests/test_ssh.py +7 -0
- experimaestro/tests/test_tags.py +35 -0
- experimaestro/tokens.py +8 -8
- experimaestro/utils/resources.py +5 -1
- {experimaestro-1.5.4.dist-info → experimaestro-1.6.0.dist-info}/METADATA +4 -5
- {experimaestro-1.5.4.dist-info → experimaestro-1.6.0.dist-info}/RECORD +33 -59
- {experimaestro-1.5.4.dist-info → experimaestro-1.6.0.dist-info}/WHEEL +1 -1
- {experimaestro-1.5.4.dist-info → experimaestro-1.6.0.dist-info}/entry_points.txt +0 -4
- experimaestro/launchers/slurm/cli.py +0 -29
- experimaestro/launchers/slurm/configuration.py +0 -597
- experimaestro/scheduler/environment.py +0 -94
- experimaestro/server/data/016b4a6cdced82ab3aa1.ttf +0 -0
- experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
- experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
- experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
- experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
- experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
- experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
- experimaestro/server/data/50701fbb8177c2dde530.ttf +0 -0
- experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
- experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
- experimaestro/server/data/878f31251d960bd6266f.woff2 +0 -0
- experimaestro/server/data/b041b1fa4fe241b23445.woff2 +0 -0
- experimaestro/server/data/b6879d41b0852f01ed5b.woff2 +0 -0
- experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
- experimaestro/server/data/d75e3fd1eb12e9bd6655.ttf +0 -0
- experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
- experimaestro/server/data/favicon.ico +0 -0
- experimaestro/server/data/index.css +0 -22844
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.html +0 -27
- experimaestro/server/data/index.js +0 -100947
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/server/data/login.html +0 -22
- experimaestro/server/data/manifest.json +0 -15
- experimaestro/tests/launchers/config_slurm/launchers.yaml +0 -134
- experimaestro/utils/yaml.py +0 -202
- {experimaestro-1.5.4.dist-info → experimaestro-1.6.0.dist-info}/LICENSE +0 -0
|
@@ -262,7 +262,9 @@ class SlurmProcessBuilder(ProcessBuilder):
|
|
|
262
262
|
addstream(builder.command, "-i", self.stdin)
|
|
263
263
|
|
|
264
264
|
builder.command.extend(self.command)
|
|
265
|
-
logger.info(
|
|
265
|
+
logger.info(
|
|
266
|
+
"slurm sbatch command: %s", " ".join(f'"{s}"' for s in builder.command)
|
|
267
|
+
)
|
|
266
268
|
handler = OutputCaptureHandler()
|
|
267
269
|
builder.stdout = Redirect.pipe(handler)
|
|
268
270
|
builder.stderr = Redirect.inherit()
|
experimaestro/notifications.py
CHANGED
|
@@ -44,6 +44,12 @@ class LevelInformation:
|
|
|
44
44
|
return f"[{self.level}] {self.desc} {int(self.progress*1000)/10}%"
|
|
45
45
|
|
|
46
46
|
|
|
47
|
+
class ListenerInformation:
|
|
48
|
+
def __init__(self, url: str):
|
|
49
|
+
self.url = url
|
|
50
|
+
self.error_count = 0
|
|
51
|
+
|
|
52
|
+
|
|
47
53
|
class Reporter(threading.Thread):
|
|
48
54
|
NOTIFICATION_FOLDER = ".notifications"
|
|
49
55
|
|
|
@@ -59,7 +65,7 @@ class Reporter(threading.Thread):
|
|
|
59
65
|
super().__init__(daemon=True)
|
|
60
66
|
self.path = path / Reporter.NOTIFICATION_FOLDER
|
|
61
67
|
self.path.mkdir(exist_ok=True)
|
|
62
|
-
self.urls: Dict[str,
|
|
68
|
+
self.urls: Dict[str, ListenerInformation] = {}
|
|
63
69
|
|
|
64
70
|
# Last check of notification URLs
|
|
65
71
|
self.lastcheck = 0
|
|
@@ -80,7 +86,7 @@ class Reporter(threading.Thread):
|
|
|
80
86
|
self.cv.notifyAll()
|
|
81
87
|
|
|
82
88
|
@staticmethod
|
|
83
|
-
def isfatal_httperror(e: Exception) -> bool:
|
|
89
|
+
def isfatal_httperror(e: Exception, info: ListenerInformation) -> bool:
|
|
84
90
|
"""Returns True if this HTTP error indicates that the server won't recover"""
|
|
85
91
|
if isinstance(e, HTTPError):
|
|
86
92
|
if e.code >= 400 and e.code < 500:
|
|
@@ -90,6 +96,13 @@ class Reporter(threading.Thread):
|
|
|
90
96
|
return True
|
|
91
97
|
if isinstance(e.reason, socket.gaierror) and e.reason.errno == -2:
|
|
92
98
|
return True
|
|
99
|
+
if isinstance(e.reason, TimeoutError):
|
|
100
|
+
info.error_count += 1
|
|
101
|
+
|
|
102
|
+
# Too many errors
|
|
103
|
+
if info.error_count > 3:
|
|
104
|
+
logger.info("Too many errors with %s", info.error_count)
|
|
105
|
+
return True
|
|
93
106
|
|
|
94
107
|
return False
|
|
95
108
|
|
|
@@ -100,8 +113,8 @@ class Reporter(threading.Thread):
|
|
|
100
113
|
mtime = os.path.getmtime(self.path)
|
|
101
114
|
if mtime > self.lastcheck:
|
|
102
115
|
for f in self.path.iterdir():
|
|
103
|
-
self.urls[f.name] = f.read_text().strip()
|
|
104
|
-
logger.info("Added new notification URL: %s", self.urls[f.name])
|
|
116
|
+
self.urls[f.name] = ListenerInformation(f.read_text().strip())
|
|
117
|
+
logger.info("Added new notification URL: %s", self.urls[f.name].url)
|
|
105
118
|
f.unlink()
|
|
106
119
|
|
|
107
120
|
self.lastcheck = os.path.getmtime(self.path)
|
|
@@ -128,7 +141,9 @@ class Reporter(threading.Thread):
|
|
|
128
141
|
params = level.report()
|
|
129
142
|
|
|
130
143
|
# Go over all URLs
|
|
131
|
-
for key,
|
|
144
|
+
for key, info in self.urls.items():
|
|
145
|
+
baseurl = info.url
|
|
146
|
+
|
|
132
147
|
url = "{}/progress?{}".format(
|
|
133
148
|
baseurl, urllib.parse.urlencode(params)
|
|
134
149
|
)
|
|
@@ -147,7 +162,7 @@ class Reporter(threading.Thread):
|
|
|
147
162
|
url,
|
|
148
163
|
e,
|
|
149
164
|
)
|
|
150
|
-
if Reporter.isfatal_httperror(e):
|
|
165
|
+
if Reporter.isfatal_httperror(e, info):
|
|
151
166
|
toremove.append(key)
|
|
152
167
|
|
|
153
168
|
# Removes unvalid URLs
|
|
@@ -165,7 +180,8 @@ class Reporter(threading.Thread):
|
|
|
165
180
|
self.check_urls()
|
|
166
181
|
if self.urls:
|
|
167
182
|
# Go over all URLs
|
|
168
|
-
for key,
|
|
183
|
+
for key, info in self.urls.items():
|
|
184
|
+
baseurl = info.url
|
|
169
185
|
url = "{}?status=eoj".format(baseurl)
|
|
170
186
|
try:
|
|
171
187
|
with urlopen(url) as _:
|
|
@@ -243,7 +259,7 @@ class xpm_tqdm(std_tqdm):
|
|
|
243
259
|
|
|
244
260
|
def update(self, n=1):
|
|
245
261
|
result = super().update(n)
|
|
246
|
-
if self.total is not None:
|
|
262
|
+
if self.total is not None and self.total > 0:
|
|
247
263
|
progress(self.n / self.total, level=self.pos, console=False)
|
|
248
264
|
return result
|
|
249
265
|
|
experimaestro/run.py
CHANGED
|
@@ -92,7 +92,7 @@ class TaskRunner:
|
|
|
92
92
|
logger.info("Finished cleanup")
|
|
93
93
|
|
|
94
94
|
def handle_error(self, code, frame_type):
|
|
95
|
-
logger.info("
|
|
95
|
+
logger.info("Error handler: finished with code %d", code)
|
|
96
96
|
self.failedpath.write_text(str(code))
|
|
97
97
|
self.cleanup()
|
|
98
98
|
logger.info("Exiting")
|
|
@@ -100,8 +100,18 @@ class TaskRunner:
|
|
|
100
100
|
|
|
101
101
|
def run(self):
|
|
102
102
|
atexit.register(self.cleanup)
|
|
103
|
-
signal.signal(signal.SIGTERM, self.handle_error)
|
|
104
|
-
signal.signal(signal.SIGINT, self.handle_error)
|
|
103
|
+
sigterm_handler = signal.signal(signal.SIGTERM, self.handle_error)
|
|
104
|
+
sigint_handler = signal.signal(signal.SIGINT, self.handle_error)
|
|
105
|
+
|
|
106
|
+
def remove_signal_handlers(remove_cleanup=True):
|
|
107
|
+
"""Removes cleanup in forked processes"""
|
|
108
|
+
signal.signal(signal.SIGTERM, sigterm_handler)
|
|
109
|
+
signal.signal(signal.SIGINT, sigint_handler)
|
|
110
|
+
atexit.unregister(self.cleanup)
|
|
111
|
+
|
|
112
|
+
if sys.platform != "win32":
|
|
113
|
+
os.register_at_fork(after_in_child=remove_signal_handlers)
|
|
114
|
+
|
|
105
115
|
try:
|
|
106
116
|
workdir = self.scriptpath.parent
|
|
107
117
|
os.chdir(workdir)
|
|
@@ -129,6 +139,10 @@ class TaskRunner:
|
|
|
129
139
|
self.started = True
|
|
130
140
|
run(workdir / "params.json")
|
|
131
141
|
|
|
142
|
+
# ... remove the handlers
|
|
143
|
+
logger.info("Task ended successfully")
|
|
144
|
+
remove_signal_handlers(remove_cleanup=False)
|
|
145
|
+
|
|
132
146
|
# Everything went OK
|
|
133
147
|
sys.exit(0)
|
|
134
148
|
except Exception:
|
|
@@ -137,6 +151,10 @@ class TaskRunner:
|
|
|
137
151
|
|
|
138
152
|
except SystemExit as e:
|
|
139
153
|
if e.code == 0:
|
|
154
|
+
# Normal exit, just create the ".done" file
|
|
140
155
|
self.donepath.touch()
|
|
156
|
+
|
|
157
|
+
# ... and finish the exit process
|
|
158
|
+
raise
|
|
141
159
|
else:
|
|
142
160
|
self.handle_error(e.code, None)
|
experimaestro/scheduler/base.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from collections import ChainMap
|
|
2
2
|
from functools import cached_property
|
|
3
|
+
import logging
|
|
3
4
|
import os
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from shutil import rmtree
|
|
@@ -13,13 +14,12 @@ from experimaestro.exceptions import HandledException
|
|
|
13
14
|
from experimaestro.notifications import LevelInformation, Reporter
|
|
14
15
|
from typing import Dict
|
|
15
16
|
from experimaestro.scheduler.services import Service
|
|
16
|
-
from experimaestro.settings import get_settings
|
|
17
|
+
from experimaestro.settings import WorkspaceSettings, get_settings
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
from experimaestro.core.objects import Config, ConfigWalkContext
|
|
20
21
|
from experimaestro.utils import logger
|
|
21
22
|
from experimaestro.locking import Locks, LockError, Lock
|
|
22
|
-
from .environment import Environment
|
|
23
23
|
from .workspace import RunMode, Workspace
|
|
24
24
|
from .dependencies import Dependency, DependencyStatus, Resource
|
|
25
25
|
import concurrent.futures
|
|
@@ -180,7 +180,7 @@ class Job(Resource):
|
|
|
180
180
|
return ChainMap(
|
|
181
181
|
{},
|
|
182
182
|
self.launcher.environ if self.launcher else {},
|
|
183
|
-
self.workspace.
|
|
183
|
+
self.workspace.env if self.workspace else {},
|
|
184
184
|
)
|
|
185
185
|
|
|
186
186
|
@property
|
|
@@ -508,6 +508,12 @@ class Scheduler:
|
|
|
508
508
|
job.scheduler = self
|
|
509
509
|
self.waitingjobs.add(job)
|
|
510
510
|
|
|
511
|
+
# Check that we don't have a completed job in
|
|
512
|
+
# alternate directories
|
|
513
|
+
for jobspath in experiment.current().alt_jobspaths:
|
|
514
|
+
# FIXME: check if done
|
|
515
|
+
pass
|
|
516
|
+
|
|
511
517
|
# Creates a link into the experiment folder
|
|
512
518
|
path = experiment.current().jobspath / job.relpath
|
|
513
519
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -719,7 +725,7 @@ class experiment:
|
|
|
719
725
|
```
|
|
720
726
|
"""
|
|
721
727
|
|
|
722
|
-
|
|
728
|
+
#: Current experiment
|
|
723
729
|
CURRENT: Optional["experiment"] = None
|
|
724
730
|
|
|
725
731
|
@staticmethod
|
|
@@ -733,7 +739,7 @@ class experiment:
|
|
|
733
739
|
|
|
734
740
|
def __init__(
|
|
735
741
|
self,
|
|
736
|
-
env: Union[Path, str,
|
|
742
|
+
env: Union[Path, str, WorkspaceSettings],
|
|
737
743
|
name: str,
|
|
738
744
|
*,
|
|
739
745
|
host: Optional[str] = None,
|
|
@@ -761,16 +767,13 @@ class experiment:
|
|
|
761
767
|
|
|
762
768
|
from experimaestro.server import Server
|
|
763
769
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
self.environment = Environment(workdir=env)
|
|
770
|
+
settings = get_settings()
|
|
771
|
+
if not isinstance(env, WorkspaceSettings):
|
|
772
|
+
env = WorkspaceSettings(id=None, path=Path(env))
|
|
768
773
|
|
|
769
774
|
# Creates the workspace
|
|
770
775
|
run_mode = run_mode or RunMode.NORMAL
|
|
771
|
-
self.workspace = Workspace(
|
|
772
|
-
self.environment, launcher=launcher, run_mode=run_mode
|
|
773
|
-
)
|
|
776
|
+
self.workspace = Workspace(settings, env, launcher=launcher, run_mode=run_mode)
|
|
774
777
|
|
|
775
778
|
# Mark the directory has an experimaestro folder
|
|
776
779
|
self.workdir = self.workspace.experimentspath / name
|
|
@@ -780,7 +783,7 @@ class experiment:
|
|
|
780
783
|
self.old_experiment = None
|
|
781
784
|
self.services: Dict[str, Service] = {}
|
|
782
785
|
|
|
783
|
-
|
|
786
|
+
# Get configuration settings
|
|
784
787
|
|
|
785
788
|
if host is not None:
|
|
786
789
|
settings.server.host = host
|
|
@@ -832,6 +835,12 @@ class experiment:
|
|
|
832
835
|
"""Return the directory in which results can be stored for this experiment"""
|
|
833
836
|
return self.workdir / "jobs"
|
|
834
837
|
|
|
838
|
+
@property
|
|
839
|
+
def alt_jobspaths(self):
|
|
840
|
+
"""Return potential other directories"""
|
|
841
|
+
for alt_workdir in self.workspace.alt_workdirs:
|
|
842
|
+
yield alt_workdir / "jobs"
|
|
843
|
+
|
|
835
844
|
@property
|
|
836
845
|
def jobsbakpath(self):
|
|
837
846
|
"""Return the directory in which results can be stored for this experiment"""
|
|
@@ -876,9 +885,11 @@ class experiment:
|
|
|
876
885
|
future = asyncio.run_coroutine_threadsafe(awaitcompletion(), self.loop)
|
|
877
886
|
return future.result()
|
|
878
887
|
|
|
879
|
-
def setenv(self, name, value):
|
|
888
|
+
def setenv(self, name, value, override=True):
|
|
880
889
|
"""Shortcut to set the environment value"""
|
|
881
|
-
self.
|
|
890
|
+
if override or name not in self.workspace.env:
|
|
891
|
+
logging.info("Setting environment: %s=%s", name, value)
|
|
892
|
+
self.workspace.env[name] = value
|
|
882
893
|
|
|
883
894
|
def token(self, name: str, count: int):
|
|
884
895
|
"""Returns a token for this experiment
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
from collections import ChainMap
|
|
1
2
|
from enum import Enum
|
|
3
|
+
from functools import cached_property
|
|
2
4
|
from pathlib import Path
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
if TYPE_CHECKING:
|
|
6
|
-
from experimaestro.scheduler.environment import Environment
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from experimaestro.settings import WorkspaceSettings, Settings
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class RunMode(str, Enum):
|
|
@@ -25,15 +25,22 @@ class Workspace:
|
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
27
|
CURRENT = None
|
|
28
|
-
|
|
28
|
+
settings: "Settings"
|
|
29
|
+
worspace: "WorkspaceSettings"
|
|
29
30
|
|
|
30
31
|
"""Creates a workspace for experiments"""
|
|
31
32
|
|
|
32
33
|
def __init__(
|
|
33
|
-
self,
|
|
34
|
+
self,
|
|
35
|
+
settings: "Settings",
|
|
36
|
+
workspace_settings: "WorkspaceSettings",
|
|
37
|
+
launcher=None,
|
|
38
|
+
run_mode: RunMode = None,
|
|
34
39
|
):
|
|
35
|
-
self.
|
|
36
|
-
|
|
40
|
+
self.settings = settings
|
|
41
|
+
self.workspace_settings = workspace_settings
|
|
42
|
+
|
|
43
|
+
path = self.workspace_settings.path
|
|
37
44
|
self.notificationURL: Optional[str] = None
|
|
38
45
|
if isinstance(path, Path):
|
|
39
46
|
path = path.absolute()
|
|
@@ -43,6 +50,8 @@ class Workspace:
|
|
|
43
50
|
|
|
44
51
|
self.launcher = launcher or Launcher.get(path)
|
|
45
52
|
|
|
53
|
+
self.env = ChainMap({}, workspace_settings.env, settings.env)
|
|
54
|
+
|
|
46
55
|
def __enter__(self):
|
|
47
56
|
self.old_workspace = Workspace.CURRENT
|
|
48
57
|
Workspace.CURRENT = self
|
|
@@ -50,6 +59,15 @@ class Workspace:
|
|
|
50
59
|
def __exit__(self, *args):
|
|
51
60
|
Workspace.CURRENT = self.old_workspace
|
|
52
61
|
|
|
62
|
+
@cached_property
|
|
63
|
+
def alt_workspaces(self):
|
|
64
|
+
for ws_id in self.workspace_settings.alt_workspaces:
|
|
65
|
+
yield self.settings.workspaces[ws_id]
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def alt_workdirs(self):
|
|
69
|
+
yield from map(lambda ws: ws.path, self.workspace_settings.alt_workspaces)
|
|
70
|
+
|
|
53
71
|
@property
|
|
54
72
|
def connector(self):
|
|
55
73
|
"""Returns the default connector"""
|
experimaestro/scriptbuilder.py
CHANGED
|
@@ -92,7 +92,11 @@ class PythonScriptBuilder:
|
|
|
92
92
|
with scriptpath.open("wt") as out:
|
|
93
93
|
out.write("#!{}\n".format(self.pythonpath))
|
|
94
94
|
out.write("# Experimaestro generated task\n\n")
|
|
95
|
-
out.write(
|
|
95
|
+
out.write(
|
|
96
|
+
"""import logging\n"""
|
|
97
|
+
"""logging.basicConfig(level=logging.INFO, """
|
|
98
|
+
"""format='%(levelname)s:%(process)d:%(asctime)s [%(name)s] %(message)s', datefmt='%y-%m-%d %H:%M:%S')\n\n"""
|
|
99
|
+
)
|
|
96
100
|
|
|
97
101
|
out.write("\nif __name__ == '__main__':\n\n" "")
|
|
98
102
|
|
experimaestro/settings.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from omegaconf import OmegaConf
|
|
2
|
+
from omegaconf import OmegaConf, SCMode
|
|
3
3
|
from dataclasses import field, dataclass
|
|
4
4
|
from functools import lru_cache
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Dict, Optional, List
|
|
7
|
+
import logging
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
@dataclass
|
|
@@ -23,15 +24,23 @@ class ServerSettings:
|
|
|
23
24
|
|
|
24
25
|
@dataclass
|
|
25
26
|
class WorkspaceSettings:
|
|
27
|
+
"""Defines the workspace"""
|
|
28
|
+
|
|
26
29
|
id: str
|
|
27
30
|
"""The workspace identifier"""
|
|
28
31
|
|
|
29
|
-
path: Path
|
|
32
|
+
path: Path = field()
|
|
30
33
|
"""The workspace path"""
|
|
31
34
|
|
|
32
35
|
env: Dict[str, str] = field(default_factory=dict)
|
|
33
36
|
"""Workspace specific environment variables"""
|
|
34
37
|
|
|
38
|
+
alt_workspaces: List[str] = field(default_factory=list)
|
|
39
|
+
"""Alternative workspaces to find jobs or experiments"""
|
|
40
|
+
|
|
41
|
+
def __post_init__(self):
|
|
42
|
+
self.path = self.path.expanduser().resolve()
|
|
43
|
+
|
|
35
44
|
|
|
36
45
|
@dataclass
|
|
37
46
|
class Settings:
|
|
@@ -51,13 +60,17 @@ def get_settings(path: Optional[Path] = None) -> Settings:
|
|
|
51
60
|
|
|
52
61
|
path = path or Path("~/.config/experimaestro/settings.yaml").expanduser()
|
|
53
62
|
if not path.is_file():
|
|
54
|
-
return
|
|
63
|
+
return OmegaConf.to_container(
|
|
64
|
+
schema, structured_config_mode=SCMode.INSTANTIATE
|
|
65
|
+
)
|
|
55
66
|
|
|
56
67
|
conf = OmegaConf.load(path)
|
|
57
|
-
return OmegaConf.
|
|
68
|
+
return OmegaConf.to_container(
|
|
69
|
+
OmegaConf.merge(schema, conf), structured_config_mode=SCMode.INSTANTIATE
|
|
70
|
+
)
|
|
58
71
|
|
|
59
72
|
|
|
60
|
-
def get_workspace(id: Optional[str]) -> WorkspaceSettings:
|
|
73
|
+
def get_workspace(id: Optional[str] = None) -> Optional[WorkspaceSettings]:
|
|
61
74
|
"""Return the workspace settings given an id (or None for the default one)"""
|
|
62
75
|
workspaces = get_settings().workspaces
|
|
63
76
|
if workspaces:
|
|
@@ -68,3 +81,28 @@ def get_workspace(id: Optional[str]) -> WorkspaceSettings:
|
|
|
68
81
|
return workspace
|
|
69
82
|
|
|
70
83
|
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def find_workspace(*, workspace: Optional[str] = None, workdir: Optional[Path] = None):
|
|
87
|
+
"""Find workspace"""
|
|
88
|
+
workdir = Path(workdir) if workdir else None
|
|
89
|
+
|
|
90
|
+
if workspace:
|
|
91
|
+
ws_env = get_workspace(workspace)
|
|
92
|
+
if ws_env is None:
|
|
93
|
+
raise RuntimeError("No workspace named %s", workspace)
|
|
94
|
+
|
|
95
|
+
logging.info("Using workspace %s", ws_env.id)
|
|
96
|
+
if workdir:
|
|
97
|
+
# Overrides working directory
|
|
98
|
+
logging.info(" override working directory: %s", workdir)
|
|
99
|
+
ws_env.path = workdir
|
|
100
|
+
elif workdir:
|
|
101
|
+
logging.info("Using workdir %s", workdir)
|
|
102
|
+
ws_env = WorkspaceSettings("", workdir)
|
|
103
|
+
else:
|
|
104
|
+
ws_env = get_workspace()
|
|
105
|
+
assert ws_env is not None, "No workdir or workspace defined, and no default"
|
|
106
|
+
logging.info("Using default workspace %s", ws_env.id)
|
|
107
|
+
|
|
108
|
+
return ws_env
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from experimaestro.launcherfinder.specs import (
|
|
2
|
+
CPUSpecification,
|
|
3
|
+
CudaSpecification,
|
|
4
|
+
HostRequirement,
|
|
5
|
+
HostSpecification,
|
|
6
|
+
)
|
|
7
|
+
from experimaestro.launchers.slurm.base import SlurmLauncher, SlurmOptions
|
|
8
|
+
|
|
9
|
+
GIGA = 1024**3
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def find_launcher(requirements: HostRequirement, tags: set[str] = set()):
|
|
13
|
+
host = HostSpecification(
|
|
14
|
+
cpu=CPUSpecification(cores=16, memory=32 * GIGA),
|
|
15
|
+
max_duration=3600 * 24 * 10,
|
|
16
|
+
cuda=[CudaSpecification(memory=32 * GIGA) for _ in range(4)],
|
|
17
|
+
)
|
|
18
|
+
if match := requirements.match(host):
|
|
19
|
+
return SlurmLauncher(
|
|
20
|
+
options=SlurmOptions(
|
|
21
|
+
gpus_per_node=len(match.requirement.cuda_gpus),
|
|
22
|
+
partition="hard,electronic",
|
|
23
|
+
constraint="(A6000&GPU2&GPUM48G)|(A6000&GPU3&GPUM48G)|(RTX&GPU4&GPUM48G)",
|
|
24
|
+
)
|
|
25
|
+
)
|
|
@@ -81,7 +81,7 @@ def slurm_constraint_split(constraint: str):
|
|
|
81
81
|
def test_findlauncher_slurm():
|
|
82
82
|
path = ResourcePathWrapper.create(f"{__package__ }.launchers", "config_slurm")
|
|
83
83
|
|
|
84
|
-
assert (path / "launchers.
|
|
84
|
+
assert (path / "launchers.py").is_file()
|
|
85
85
|
|
|
86
86
|
registry = LauncherRegistry(path)
|
|
87
87
|
launcher = registry.find("""duration=4 days & cuda(mem=24G) * 2""")
|
experimaestro/tests/test_ssh.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import pytest
|
|
1
3
|
from experimaestro.connectors.ssh import SshPath
|
|
2
4
|
|
|
3
5
|
# --- Test SSH path and SSH path manipulation
|
|
4
6
|
|
|
5
7
|
|
|
8
|
+
@pytest.mark.skipif(sys.version_info >= (3, 12), reason="requires python3.10 or higher")
|
|
6
9
|
def test_absolute():
|
|
7
10
|
path = SshPath("ssh://host//a/path")
|
|
8
11
|
assert path.host == "host"
|
|
9
12
|
assert path.is_absolute()
|
|
10
13
|
|
|
11
14
|
|
|
15
|
+
@pytest.mark.skipif(sys.version_info >= (3, 12), reason="requires python3.10 or higher")
|
|
12
16
|
def test_relative():
|
|
13
17
|
path = SshPath("ssh://host")
|
|
14
18
|
assert path.host == "host"
|
|
@@ -17,6 +21,7 @@ def test_relative():
|
|
|
17
21
|
assert not path.is_absolute()
|
|
18
22
|
|
|
19
23
|
|
|
24
|
+
@pytest.mark.skipif(sys.version_info >= (3, 12), reason="requires python3.10 or higher")
|
|
20
25
|
def test_relative_withpath():
|
|
21
26
|
path = SshPath("ssh://host/relative/path")
|
|
22
27
|
assert path.host == "host"
|
|
@@ -24,6 +29,7 @@ def test_relative_withpath():
|
|
|
24
29
|
assert not path.is_absolute()
|
|
25
30
|
|
|
26
31
|
|
|
32
|
+
@pytest.mark.skipif(sys.version_info >= (3, 12), reason="requires python3.10 or higher")
|
|
27
33
|
def test_relative_absolute():
|
|
28
34
|
path = SshPath("ssh://host") / "/absolute/path"
|
|
29
35
|
assert path.host == "host"
|
|
@@ -31,6 +37,7 @@ def test_relative_absolute():
|
|
|
31
37
|
assert path.is_absolute()
|
|
32
38
|
|
|
33
39
|
|
|
40
|
+
@pytest.mark.skipif(sys.version_info >= (3, 12), reason="requires python3.10 or higher")
|
|
34
41
|
def test_relative_compose():
|
|
35
42
|
path = SshPath("ssh://host/abc") / "relative/path"
|
|
36
43
|
assert path.host == "host"
|
experimaestro/tests/test_tags.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import Dict
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from experimaestro import (
|
|
4
4
|
tag,
|
|
5
|
+
LightweightTask,
|
|
5
6
|
config,
|
|
6
7
|
argument,
|
|
7
8
|
Config,
|
|
@@ -69,6 +70,40 @@ def test_inneroutput():
|
|
|
69
70
|
assert evaluate.__xpm__.tags() == {"hello": "world"}
|
|
70
71
|
|
|
71
72
|
|
|
73
|
+
def test_tags_init_tasks():
|
|
74
|
+
"""Test tags within init tasks"""
|
|
75
|
+
|
|
76
|
+
class MyTask(Task):
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
class InitTask(LightweightTask):
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
class MyConfig(Config):
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
class TaskWithOutput(Task):
|
|
86
|
+
x: Param[MyConfig]
|
|
87
|
+
|
|
88
|
+
def task_outputs(self, dep) -> MyConfig:
|
|
89
|
+
return dep(MyConfig())
|
|
90
|
+
|
|
91
|
+
init_task = InitTask().tag("hello", "world")
|
|
92
|
+
task = MyTask()
|
|
93
|
+
result = task.submit(run_mode=RunMode.DRY_RUN, init_tasks=[init_task])
|
|
94
|
+
assert result.tags() == {"hello": "world"}
|
|
95
|
+
|
|
96
|
+
other_task = TaskWithOutput(x=MyConfig().tag("hello", "world"))
|
|
97
|
+
assert other_task.tags() == {"hello": "world"}
|
|
98
|
+
|
|
99
|
+
result = other_task.submit(run_mode=RunMode.DRY_RUN)
|
|
100
|
+
assert isinstance(result, MyConfig)
|
|
101
|
+
assert result.tags() == {"hello": "world"}
|
|
102
|
+
|
|
103
|
+
result = MyTask().submit(run_mode=RunMode.DRY_RUN, init_tasks=[result])
|
|
104
|
+
assert result.tags() == {"hello": "world"}
|
|
105
|
+
|
|
106
|
+
|
|
72
107
|
class TaskDirectoryContext(DirectoryContext):
|
|
73
108
|
def __init__(self, task, path):
|
|
74
109
|
super().__init__(path)
|
experimaestro/tokens.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
"""Tokens are special types of dependency controlling the access to
|
|
1
|
+
"""Tokens are special types of dependency controlling the access to
|
|
2
2
|
a computational resource (e.g. number of launched jobs, etc.)
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
import sys
|
|
7
7
|
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from omegaconf import DictConfig
|
|
8
10
|
from experimaestro.core.objects import Config
|
|
9
11
|
import fasteners
|
|
10
12
|
import threading
|
|
@@ -14,7 +16,6 @@ from typing import Dict
|
|
|
14
16
|
from experimaestro.launcherfinder.base import TokenConfiguration
|
|
15
17
|
|
|
16
18
|
from experimaestro.launcherfinder.registry import LauncherRegistry
|
|
17
|
-
from experimaestro.utils.yaml import YAMLDict
|
|
18
19
|
|
|
19
20
|
from .ipc import ipcom
|
|
20
21
|
from .locking import Lock, LockError
|
|
@@ -87,7 +88,7 @@ class TokenFile:
|
|
|
87
88
|
try:
|
|
88
89
|
self.path = path
|
|
89
90
|
with path.open("rt") as fp:
|
|
90
|
-
count, self.uri = [
|
|
91
|
+
count, self.uri = [line.strip() for line in fp.readlines()]
|
|
91
92
|
self.count = int(count)
|
|
92
93
|
except Exception:
|
|
93
94
|
logging.exception("Error while reading %s", self.path)
|
|
@@ -183,7 +184,10 @@ class CounterToken(Token, FileSystemEventHandler):
|
|
|
183
184
|
|
|
184
185
|
@staticmethod
|
|
185
186
|
def init_registry(registry: LauncherRegistry):
|
|
186
|
-
registry.register_token(
|
|
187
|
+
registry.register_token(
|
|
188
|
+
"countertoken",
|
|
189
|
+
DictConfig({}, key_type=str, element_type=CounterConfiguration),
|
|
190
|
+
)
|
|
187
191
|
|
|
188
192
|
def __init__(self, name: str, path: Path, count: int, force=True):
|
|
189
193
|
"""[summary]
|
|
@@ -456,7 +460,3 @@ class CounterConfiguration(TokenConfiguration):
|
|
|
456
460
|
from experimaestro.connectors.local import LocalConnector
|
|
457
461
|
|
|
458
462
|
return LocalConnector.instance().createtoken(identifier, self.tokens)
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
class CounterTokenConfiguration(YAMLDict[CounterConfiguration]):
|
|
462
|
-
pass
|
experimaestro/utils/resources.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from contextlib import contextmanager
|
|
2
|
+
from os import PathLike
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Union
|
|
4
5
|
from importlib import resources
|
|
5
6
|
from experimaestro.compat import cached_property
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
class ResourcePathWrapper:
|
|
9
|
+
class ResourcePathWrapper(PathLike):
|
|
9
10
|
"""Simple wrapper for resource path"""
|
|
10
11
|
|
|
11
12
|
def __init__(self, path: Path):
|
|
@@ -30,6 +31,9 @@ class ResourcePathWrapper:
|
|
|
30
31
|
def is_file(self):
|
|
31
32
|
return resources.is_resource(self.package, self.name)
|
|
32
33
|
|
|
34
|
+
def __fspath__(self):
|
|
35
|
+
return resources.path(self.package, self.name).__fspath__()
|
|
36
|
+
|
|
33
37
|
@contextmanager
|
|
34
38
|
def open(self, *args, **kwargs):
|
|
35
39
|
with resources.path(self.package, self.name) as path:
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: experimaestro
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.6.0
|
|
4
4
|
Summary: "Experimaestro is a computer science experiment manager"
|
|
5
|
-
Home-page: https://github.com/experimaestro/experimaestro-python
|
|
6
5
|
License: GPL-3
|
|
7
6
|
Keywords: experiment manager
|
|
8
7
|
Author: Benjamin Piwowarski
|
|
@@ -20,13 +19,13 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.10
|
|
21
20
|
Classifier: Programming Language :: Python :: 3.11
|
|
22
21
|
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
23
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
24
|
Requires-Dist: arpeggio (>=2,<3)
|
|
25
25
|
Requires-Dist: attrs (>=23.1.0,<24.0.0)
|
|
26
26
|
Requires-Dist: click (>=8)
|
|
27
27
|
Requires-Dist: decorator (>=5,<6)
|
|
28
28
|
Requires-Dist: docstring-parser (>=0.15,<0.16)
|
|
29
|
-
Requires-Dist: fabric (>=3,<4)
|
|
30
29
|
Requires-Dist: fasteners (>=0.19,<0.20)
|
|
31
30
|
Requires-Dist: flask (>=2.3,<3.0)
|
|
32
31
|
Requires-Dist: flask-socketio (>=5.3,<6.0)
|
|
@@ -41,7 +40,7 @@ Requires-Dist: pyparsing (>=3.1,<4.0)
|
|
|
41
40
|
Requires-Dist: pytools (>=2023.1.1,<2024.0.0)
|
|
42
41
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
43
42
|
Requires-Dist: requests (>=2.31,<3.0)
|
|
44
|
-
Requires-Dist: rpyc (>=5,<
|
|
43
|
+
Requires-Dist: rpyc (>=5,<7)
|
|
45
44
|
Requires-Dist: sortedcontainers (>=2.4,<3.0)
|
|
46
45
|
Requires-Dist: termcolor (>=2.3)
|
|
47
46
|
Requires-Dist: tqdm (>=4.66.1,<5.0.0)
|