fractal-server 2.12.0a1__py3-none-any.whl → 2.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/__main__.py +17 -63
- fractal_server/app/models/security.py +9 -12
- fractal_server/app/models/v2/dataset.py +2 -2
- fractal_server/app/models/v2/job.py +11 -9
- fractal_server/app/models/v2/task.py +2 -3
- fractal_server/app/models/v2/task_group.py +6 -2
- fractal_server/app/models/v2/workflowtask.py +15 -8
- fractal_server/app/routes/admin/v2/task.py +1 -1
- fractal_server/app/routes/admin/v2/task_group.py +1 -1
- fractal_server/app/routes/api/v2/dataset.py +4 -4
- fractal_server/app/routes/api/v2/images.py +11 -23
- fractal_server/app/routes/api/v2/project.py +2 -2
- fractal_server/app/routes/api/v2/status.py +1 -1
- fractal_server/app/routes/api/v2/submit.py +8 -6
- fractal_server/app/routes/api/v2/task.py +4 -2
- fractal_server/app/routes/api/v2/task_collection.py +3 -2
- fractal_server/app/routes/api/v2/task_group.py +2 -2
- fractal_server/app/routes/api/v2/workflow.py +3 -3
- fractal_server/app/routes/api/v2/workflow_import.py +3 -3
- fractal_server/app/routes/api/v2/workflowtask.py +3 -1
- fractal_server/app/routes/auth/_aux_auth.py +4 -1
- fractal_server/app/routes/auth/current_user.py +3 -5
- fractal_server/app/routes/auth/group.py +1 -1
- fractal_server/app/routes/auth/users.py +2 -4
- fractal_server/app/routes/aux/_runner.py +1 -1
- fractal_server/app/routes/aux/validate_user_settings.py +1 -2
- fractal_server/app/runner/executors/_job_states.py +13 -0
- fractal_server/app/runner/executors/slurm/_slurm_config.py +26 -18
- fractal_server/app/runner/executors/slurm/ssh/__init__.py +0 -3
- fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +31 -22
- fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +2 -6
- fractal_server/app/runner/executors/slurm/ssh/executor.py +35 -50
- fractal_server/app/runner/executors/slurm/sudo/__init__.py +0 -3
- fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py +1 -2
- fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py +37 -47
- fractal_server/app/runner/executors/slurm/sudo/executor.py +77 -41
- fractal_server/app/runner/v2/__init__.py +0 -9
- fractal_server/app/runner/v2/_local/_local_config.py +5 -4
- fractal_server/app/runner/v2/_slurm_common/get_slurm_config.py +4 -4
- fractal_server/app/runner/v2/_slurm_sudo/__init__.py +2 -2
- fractal_server/app/runner/v2/deduplicate_list.py +1 -1
- fractal_server/app/runner/v2/runner.py +9 -4
- fractal_server/app/runner/v2/task_interface.py +15 -7
- fractal_server/app/schemas/_filter_validators.py +6 -3
- fractal_server/app/schemas/_validators.py +7 -5
- fractal_server/app/schemas/user.py +23 -18
- fractal_server/app/schemas/user_group.py +25 -11
- fractal_server/app/schemas/user_settings.py +31 -24
- fractal_server/app/schemas/v2/dataset.py +48 -35
- fractal_server/app/schemas/v2/dumps.py +16 -14
- fractal_server/app/schemas/v2/job.py +49 -29
- fractal_server/app/schemas/v2/manifest.py +32 -28
- fractal_server/app/schemas/v2/project.py +18 -8
- fractal_server/app/schemas/v2/task.py +86 -75
- fractal_server/app/schemas/v2/task_collection.py +41 -30
- fractal_server/app/schemas/v2/task_group.py +39 -20
- fractal_server/app/schemas/v2/workflow.py +24 -12
- fractal_server/app/schemas/v2/workflowtask.py +63 -61
- fractal_server/app/security/__init__.py +7 -4
- fractal_server/app/security/signup_email.py +21 -12
- fractal_server/config.py +123 -75
- fractal_server/images/models.py +18 -12
- fractal_server/main.py +13 -10
- fractal_server/migrations/env.py +16 -63
- fractal_server/tasks/v2/local/collect.py +9 -8
- fractal_server/tasks/v2/local/deactivate.py +3 -0
- fractal_server/tasks/v2/local/reactivate.py +3 -0
- fractal_server/tasks/v2/ssh/collect.py +8 -8
- fractal_server/tasks/v2/ssh/deactivate.py +3 -0
- fractal_server/tasks/v2/ssh/reactivate.py +9 -6
- fractal_server/tasks/v2/utils_background.py +1 -1
- fractal_server/tasks/v2/utils_database.py +1 -1
- {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/METADATA +10 -11
- {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/RECORD +78 -81
- fractal_server/app/runner/v2/_local_experimental/__init__.py +0 -121
- fractal_server/app/runner/v2/_local_experimental/_local_config.py +0 -108
- fractal_server/app/runner/v2/_local_experimental/_submit_setup.py +0 -42
- fractal_server/app/runner/v2/_local_experimental/executor.py +0 -157
- {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/LICENSE +0 -0
- {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/WHEEL +0 -0
- {fractal_server-2.12.0a1.dist-info → fractal_server-2.13.0.dist-info}/entry_points.txt +0 -0
@@ -1,12 +1,10 @@
|
|
1
1
|
import os
|
2
|
+
import threading
|
2
3
|
import time
|
3
4
|
import traceback
|
4
5
|
from itertools import count
|
5
|
-
from typing import Callable
|
6
6
|
from typing import Optional
|
7
7
|
|
8
|
-
from cfut import FileWaitThread
|
9
|
-
|
10
8
|
from ......logger import set_logger
|
11
9
|
from ._check_jobs_status import _jobs_finished
|
12
10
|
from fractal_server.app.runner.exceptions import JobExecutionError
|
@@ -14,33 +12,43 @@ from fractal_server.app.runner.exceptions import JobExecutionError
|
|
14
12
|
logger = set_logger(__name__)
|
15
13
|
|
16
14
|
|
17
|
-
class
|
15
|
+
class FractalSlurmSudoWaitThread(threading.Thread):
|
18
16
|
"""
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
17
|
+
Thread that monitors a pool of SLURM jobs
|
18
|
+
|
19
|
+
This class is a custom re-implementation of the waiting thread class from:
|
20
|
+
|
21
|
+
> clusterfutures <https://github.com/sampsyo/clusterfutures>
|
22
|
+
> Original Copyright
|
23
|
+
> Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
|
24
|
+
> License: MIT
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
slurm_user:
|
28
|
+
shutdown_file:
|
29
|
+
shutdown_callback:
|
30
|
+
slurm_poll_interval:
|
31
|
+
waiting:
|
32
|
+
shutdown:
|
33
|
+
lock:
|
36
34
|
"""
|
37
35
|
|
38
36
|
slurm_user: str
|
39
37
|
shutdown_file: Optional[str] = None
|
40
|
-
shutdown_callback:
|
41
|
-
|
42
|
-
|
43
|
-
|
38
|
+
shutdown_callback: callable
|
39
|
+
slurm_poll_interval: int = 30
|
40
|
+
waiting: dict[tuple[str, ...], str]
|
41
|
+
shutdown: bool
|
42
|
+
_lock: threading.Lock
|
43
|
+
|
44
|
+
def __init__(self, callback: callable, interval=1):
|
45
|
+
threading.Thread.__init__(self, daemon=True)
|
46
|
+
self.callback = callback
|
47
|
+
self.interval = interval
|
48
|
+
self.waiting = {}
|
49
|
+
self._lock = threading.Lock() # To protect the .waiting dict
|
50
|
+
self.shutdown = False
|
51
|
+
self.active_job_ids = []
|
44
52
|
|
45
53
|
def wait(
|
46
54
|
self,
|
@@ -61,10 +69,10 @@ class FractalFileWaitThread(FileWaitThread):
|
|
61
69
|
error_msg = "Cannot call `wait` method after executor shutdown."
|
62
70
|
logger.warning(error_msg)
|
63
71
|
raise JobExecutionError(info=error_msg)
|
64
|
-
with self.
|
72
|
+
with self._lock:
|
65
73
|
self.waiting[filenames] = jobid
|
66
74
|
|
67
|
-
def
|
75
|
+
def check_shutdown(self, i):
|
68
76
|
"""
|
69
77
|
Do one shutdown-file-existence check.
|
70
78
|
|
@@ -99,30 +107,12 @@ class FractalFileWaitThread(FileWaitThread):
|
|
99
107
|
if self.shutdown:
|
100
108
|
self.shutdown_callback()
|
101
109
|
return
|
102
|
-
with self.
|
110
|
+
with self._lock:
|
103
111
|
self.check(i)
|
104
112
|
time.sleep(self.interval)
|
105
113
|
|
106
|
-
|
107
|
-
class FractalSlurmWaitThread(FractalFileWaitThread):
|
108
|
-
"""
|
109
|
-
Replaces the original clusterfutures.SlurmWaitThread, to inherit from
|
110
|
-
FractalFileWaitThread instead of FileWaitThread.
|
111
|
-
|
112
|
-
The function is copied from clusterfutures 0.5. Original Copyright: 2022
|
113
|
-
Adrian Sampson, released under the MIT licence
|
114
|
-
|
115
|
-
**Note**: if `self.interval != 1` then this should be modified, but for
|
116
|
-
`clusterfutures` v0.5 `self.interval` is indeed equal to `1`.
|
117
|
-
|
118
|
-
Changed from clusterfutures:
|
119
|
-
* Rename `id_to_filename` to `id_to_filenames`
|
120
|
-
"""
|
121
|
-
|
122
|
-
slurm_poll_interval = 30
|
123
|
-
|
124
114
|
def check(self, i):
|
125
|
-
|
115
|
+
self.check_shutdown(i)
|
126
116
|
if i % (self.slurm_poll_interval // self.interval) == 0:
|
127
117
|
try:
|
128
118
|
finished_jobs = _jobs_finished(self.waiting.values())
|
@@ -1,20 +1,12 @@
|
|
1
|
-
|
2
|
-
# Original Copyright
|
3
|
-
# Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
|
4
|
-
# License: MIT
|
5
|
-
#
|
6
|
-
# Modified by:
|
7
|
-
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
8
|
-
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
9
|
-
# Marco Franzon <marco.franzon@exact-lab.it>
|
10
|
-
#
|
11
|
-
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
12
|
-
# University of Zurich
|
1
|
+
import json
|
13
2
|
import math
|
14
3
|
import shlex
|
15
4
|
import subprocess # nosec
|
16
5
|
import sys
|
6
|
+
import threading
|
17
7
|
import time
|
8
|
+
import uuid
|
9
|
+
from concurrent.futures import Executor
|
18
10
|
from concurrent.futures import Future
|
19
11
|
from concurrent.futures import InvalidStateError
|
20
12
|
from copy import copy
|
@@ -26,8 +18,6 @@ from typing import Optional
|
|
26
18
|
from typing import Sequence
|
27
19
|
|
28
20
|
import cloudpickle
|
29
|
-
from cfut import SlurmExecutor
|
30
|
-
from cfut.util import random_string
|
31
21
|
|
32
22
|
from ......config import get_settings
|
33
23
|
from ......logger import set_logger
|
@@ -42,7 +32,7 @@ from .._batching import heuristics
|
|
42
32
|
from ..utils_executors import get_pickle_file_path
|
43
33
|
from ..utils_executors import get_slurm_file_path
|
44
34
|
from ..utils_executors import get_slurm_script_file_path
|
45
|
-
from ._executor_wait_thread import
|
35
|
+
from ._executor_wait_thread import FractalSlurmSudoWaitThread
|
46
36
|
from ._subprocess_run_as_user import _glob_as_user
|
47
37
|
from ._subprocess_run_as_user import _glob_as_user_strict
|
48
38
|
from ._subprocess_run_as_user import _path_exists_as_user
|
@@ -161,7 +151,6 @@ class SlurmJob:
|
|
161
151
|
self,
|
162
152
|
num_tasks_tot: int,
|
163
153
|
slurm_config: SlurmConfig,
|
164
|
-
workflow_task_file_prefix: Optional[str] = None,
|
165
154
|
slurm_file_prefix: Optional[str] = None,
|
166
155
|
wftask_file_prefixes: Optional[tuple[str, ...]] = None,
|
167
156
|
single_task_submission: bool = False,
|
@@ -180,9 +169,7 @@ class SlurmJob:
|
|
180
169
|
)
|
181
170
|
else:
|
182
171
|
self.wftask_file_prefixes = wftask_file_prefixes
|
183
|
-
self.workerids = tuple(
|
184
|
-
random_string() for i in range(self.num_tasks_tot)
|
185
|
-
)
|
172
|
+
self.workerids = tuple(uuid.uuid4() for i in range(self.num_tasks_tot))
|
186
173
|
self.slurm_config = slurm_config
|
187
174
|
|
188
175
|
def get_clean_output_pickle_files(self) -> tuple[str, ...]:
|
@@ -193,9 +180,17 @@ class SlurmJob:
|
|
193
180
|
return tuple(str(f.as_posix()) for f in self.output_pickle_files)
|
194
181
|
|
195
182
|
|
196
|
-
class
|
183
|
+
class FractalSlurmSudoExecutor(Executor):
|
197
184
|
"""
|
198
|
-
|
185
|
+
Executor to submit SLURM jobs as a different user, via `sudo -u`
|
186
|
+
|
187
|
+
This class is a custom re-implementation of the SLURM executor from
|
188
|
+
|
189
|
+
> clusterfutures <https://github.com/sampsyo/clusterfutures>
|
190
|
+
> Original Copyright
|
191
|
+
> Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
|
192
|
+
> License: MIT
|
193
|
+
|
199
194
|
|
200
195
|
Attributes:
|
201
196
|
slurm_user:
|
@@ -211,7 +206,7 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
211
206
|
Dictionary with paths of slurm-related files for active jobs
|
212
207
|
"""
|
213
208
|
|
214
|
-
wait_thread_cls =
|
209
|
+
wait_thread_cls = FractalSlurmSudoWaitThread
|
215
210
|
slurm_user: str
|
216
211
|
shutdown_file: str
|
217
212
|
common_script_lines: list[str]
|
@@ -219,8 +214,7 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
219
214
|
workflow_dir_local: Path
|
220
215
|
workflow_dir_remote: Path
|
221
216
|
map_jobid_to_slurm_files: dict[str, tuple[str, str, str]]
|
222
|
-
|
223
|
-
slurm_account: Optional[str]
|
217
|
+
slurm_account: Optional[str] = None
|
224
218
|
jobs: dict[str, tuple[Future, SlurmJob]]
|
225
219
|
|
226
220
|
def __init__(
|
@@ -232,7 +226,6 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
232
226
|
user_cache_dir: Optional[str] = None,
|
233
227
|
common_script_lines: Optional[list[str]] = None,
|
234
228
|
slurm_poll_interval: Optional[int] = None,
|
235
|
-
keep_pickle_files: bool = False,
|
236
229
|
slurm_account: Optional[str] = None,
|
237
230
|
*args,
|
238
231
|
**kwargs,
|
@@ -246,18 +239,31 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
246
239
|
"Missing attribute FractalSlurmExecutor.slurm_user"
|
247
240
|
)
|
248
241
|
|
249
|
-
|
242
|
+
self.jobs = {}
|
243
|
+
self.job_outfiles = {}
|
244
|
+
self.jobs_lock = threading.Lock()
|
245
|
+
self.jobs_empty_cond = threading.Condition(self.jobs_lock)
|
246
|
+
|
247
|
+
self.wait_thread = self.wait_thread_cls(self._completion)
|
248
|
+
self.wait_thread.start()
|
250
249
|
|
251
250
|
# Assign `wait_thread.shutdown_callback` early, since it may be called
|
252
251
|
# from within `_stop_and_join_wait_thread` (e.g. if an exception is
|
253
252
|
# raised within `__init__`).
|
254
253
|
self.wait_thread.shutdown_callback = self.shutdown
|
255
254
|
|
256
|
-
self.keep_pickle_files = keep_pickle_files
|
257
255
|
self.slurm_user = slurm_user
|
258
256
|
self.slurm_account = slurm_account
|
259
257
|
|
260
258
|
self.common_script_lines = common_script_lines or []
|
259
|
+
settings = Inject(get_settings)
|
260
|
+
|
261
|
+
if settings.FRACTAL_SLURM_WORKER_PYTHON is not None:
|
262
|
+
try:
|
263
|
+
self.check_remote_python_interpreter()
|
264
|
+
except Exception as e:
|
265
|
+
self._stop_and_join_wait_thread()
|
266
|
+
raise RuntimeError(f"Original error {str(e)}")
|
261
267
|
|
262
268
|
# Check that SLURM account is not set here
|
263
269
|
try:
|
@@ -289,7 +295,6 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
289
295
|
# Set the attribute slurm_poll_interval for self.wait_thread (see
|
290
296
|
# cfut.SlurmWaitThread)
|
291
297
|
if not slurm_poll_interval:
|
292
|
-
settings = Inject(get_settings)
|
293
298
|
slurm_poll_interval = settings.FRACTAL_SLURM_POLL_INTERVAL
|
294
299
|
self.wait_thread.slurm_poll_interval = slurm_poll_interval
|
295
300
|
self.wait_thread.slurm_user = self.slurm_user
|
@@ -608,7 +613,14 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
608
613
|
_prefixes = []
|
609
614
|
_subfolder_names = []
|
610
615
|
for component in components:
|
611
|
-
|
616
|
+
# In Fractal, `component` is a `dict` by construction (e.g.
|
617
|
+
# `component = {"zarr_url": "/something", "param": 1}``). The
|
618
|
+
# try/except covers the case of e.g. `executor.map([1, 2])`,
|
619
|
+
# which is useful for testing.
|
620
|
+
try:
|
621
|
+
actual_component = component.get(_COMPONENT_KEY_, None)
|
622
|
+
except AttributeError:
|
623
|
+
actual_component = str(component)
|
612
624
|
_task_file_paths = get_task_file_paths(
|
613
625
|
workflow_dir_local=task_files.workflow_dir_local,
|
614
626
|
workflow_dir_remote=task_files.workflow_dir_remote,
|
@@ -860,8 +872,7 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
860
872
|
" cancelled, exit from"
|
861
873
|
" FractalSlurmExecutor._completion."
|
862
874
|
)
|
863
|
-
|
864
|
-
in_path.unlink()
|
875
|
+
in_path.unlink()
|
865
876
|
self._cleanup(jobid)
|
866
877
|
return
|
867
878
|
|
@@ -903,23 +914,20 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
903
914
|
exc = TaskExecutionError(proxy.tb, **kwargs)
|
904
915
|
fut.set_exception(exc)
|
905
916
|
return
|
906
|
-
|
907
|
-
out_path.unlink()
|
917
|
+
out_path.unlink()
|
908
918
|
except InvalidStateError:
|
909
919
|
logger.warning(
|
910
920
|
f"Future {fut} (SLURM job ID: {jobid}) was already"
|
911
921
|
" cancelled, exit from"
|
912
922
|
" FractalSlurmExecutor._completion."
|
913
923
|
)
|
914
|
-
|
915
|
-
|
916
|
-
in_path.unlink()
|
924
|
+
out_path.unlink()
|
925
|
+
in_path.unlink()
|
917
926
|
self._cleanup(jobid)
|
918
927
|
return
|
919
928
|
|
920
929
|
# Clean up input pickle file
|
921
|
-
|
922
|
-
in_path.unlink()
|
930
|
+
in_path.unlink()
|
923
931
|
self._cleanup(jobid)
|
924
932
|
if job.single_task_submission:
|
925
933
|
fut.set_result(outputs[0])
|
@@ -1155,8 +1163,10 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
1155
1163
|
script_lines = slurm_config.sort_script_lines(script_lines)
|
1156
1164
|
logger.debug(script_lines)
|
1157
1165
|
|
1158
|
-
# Always print output of `pwd`
|
1159
|
-
script_lines.append(
|
1166
|
+
# Always print output of `uname -n` and `pwd`
|
1167
|
+
script_lines.append(
|
1168
|
+
'"Hostname: `uname -n`; current directory: `pwd`"\n'
|
1169
|
+
)
|
1160
1170
|
|
1161
1171
|
# Complete script preamble
|
1162
1172
|
script_lines.append("\n")
|
@@ -1230,7 +1240,7 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
1230
1240
|
logger.debug("Executor shutdown: end")
|
1231
1241
|
|
1232
1242
|
def _stop_and_join_wait_thread(self):
|
1233
|
-
self.wait_thread.
|
1243
|
+
self.wait_thread.shutdown = True
|
1234
1244
|
self.wait_thread.join()
|
1235
1245
|
|
1236
1246
|
def __exit__(self, *args, **kwargs):
|
@@ -1243,3 +1253,29 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
1243
1253
|
)
|
1244
1254
|
self._stop_and_join_wait_thread()
|
1245
1255
|
logger.debug("[FractalSlurmExecutor.__exit__] End")
|
1256
|
+
|
1257
|
+
def check_remote_python_interpreter(self):
|
1258
|
+
"""
|
1259
|
+
Check fractal-server version on the _remote_ Python interpreter.
|
1260
|
+
"""
|
1261
|
+
settings = Inject(get_settings)
|
1262
|
+
output = _subprocess_run_or_raise(
|
1263
|
+
(
|
1264
|
+
f"{settings.FRACTAL_SLURM_WORKER_PYTHON} "
|
1265
|
+
"-m fractal_server.app.runner.versions"
|
1266
|
+
)
|
1267
|
+
)
|
1268
|
+
runner_version = json.loads(output.stdout.strip("\n"))[
|
1269
|
+
"fractal_server"
|
1270
|
+
]
|
1271
|
+
|
1272
|
+
if runner_version != __VERSION__:
|
1273
|
+
error_msg = (
|
1274
|
+
"Fractal-server version mismatch.\n"
|
1275
|
+
"Local interpreter: "
|
1276
|
+
f"({sys.executable}): {__VERSION__}.\n"
|
1277
|
+
"Remote interpreter: "
|
1278
|
+
f"({settings.FRACTAL_SLURM_WORKER_PYTHON}): {runner_version}."
|
1279
|
+
)
|
1280
|
+
logger.error(error_msg)
|
1281
|
+
raise ValueError(error_msg)
|
@@ -31,9 +31,6 @@ from ..executors.slurm.sudo._subprocess_run_as_user import _mkdir_as_user
|
|
31
31
|
from ..filenames import WORKFLOW_LOG_FILENAME
|
32
32
|
from ..task_files import task_subfolder_name
|
33
33
|
from ._local import process_workflow as local_process_workflow
|
34
|
-
from ._local_experimental import (
|
35
|
-
process_workflow as local_experimental_process_workflow,
|
36
|
-
)
|
37
34
|
from ._slurm_ssh import process_workflow as slurm_ssh_process_workflow
|
38
35
|
from ._slurm_sudo import process_workflow as slurm_sudo_process_workflow
|
39
36
|
from .handle_failed_job import mark_last_wftask_as_failed
|
@@ -45,7 +42,6 @@ _backends = {}
|
|
45
42
|
_backends["local"] = local_process_workflow
|
46
43
|
_backends["slurm"] = slurm_sudo_process_workflow
|
47
44
|
_backends["slurm_ssh"] = slurm_ssh_process_workflow
|
48
|
-
_backends["local_experimental"] = local_experimental_process_workflow
|
49
45
|
|
50
46
|
|
51
47
|
def fail_job(
|
@@ -184,8 +180,6 @@ def submit_workflow(
|
|
184
180
|
# Define and create WORKFLOW_DIR_REMOTE
|
185
181
|
if FRACTAL_RUNNER_BACKEND == "local":
|
186
182
|
WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
|
187
|
-
elif FRACTAL_RUNNER_BACKEND == "local_experimental":
|
188
|
-
WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
|
189
183
|
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
190
184
|
WORKFLOW_DIR_REMOTE = (
|
191
185
|
Path(user_cache_dir) / WORKFLOW_DIR_LOCAL.name
|
@@ -287,9 +281,6 @@ def submit_workflow(
|
|
287
281
|
if FRACTAL_RUNNER_BACKEND == "local":
|
288
282
|
process_workflow = local_process_workflow
|
289
283
|
backend_specific_kwargs = {}
|
290
|
-
elif FRACTAL_RUNNER_BACKEND == "local_experimental":
|
291
|
-
process_workflow = local_experimental_process_workflow
|
292
|
-
backend_specific_kwargs = {}
|
293
284
|
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
294
285
|
process_workflow = slurm_sudo_process_workflow
|
295
286
|
backend_specific_kwargs = dict(
|
@@ -17,8 +17,8 @@ from typing import Literal
|
|
17
17
|
from typing import Optional
|
18
18
|
|
19
19
|
from pydantic import BaseModel
|
20
|
-
from pydantic import
|
21
|
-
from pydantic
|
20
|
+
from pydantic import ConfigDict
|
21
|
+
from pydantic import ValidationError
|
22
22
|
|
23
23
|
from .....config import get_settings
|
24
24
|
from .....syringe import Inject
|
@@ -33,7 +33,7 @@ class LocalBackendConfigError(ValueError):
|
|
33
33
|
pass
|
34
34
|
|
35
35
|
|
36
|
-
class LocalBackendConfig(BaseModel
|
36
|
+
class LocalBackendConfig(BaseModel):
|
37
37
|
"""
|
38
38
|
Specifications of the local-backend configuration
|
39
39
|
|
@@ -44,7 +44,8 @@ class LocalBackendConfig(BaseModel, extra=Extra.forbid):
|
|
44
44
|
start at the same time.
|
45
45
|
"""
|
46
46
|
|
47
|
-
|
47
|
+
model_config = ConfigDict(extra="forbid")
|
48
|
+
parallel_tasks_per_job: Optional[int] = None
|
48
49
|
|
49
50
|
|
50
51
|
def get_default_local_backend_config():
|
@@ -67,14 +67,14 @@ def get_slurm_config(
|
|
67
67
|
|
68
68
|
# Incorporate slurm_env.default_slurm_config
|
69
69
|
slurm_env = load_slurm_config_file(config_path=config_path)
|
70
|
-
slurm_dict = slurm_env.default_slurm_config.
|
70
|
+
slurm_dict = slurm_env.default_slurm_config.model_dump(
|
71
71
|
exclude_unset=True, exclude={"mem"}
|
72
72
|
)
|
73
73
|
if slurm_env.default_slurm_config.mem:
|
74
74
|
slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
|
75
75
|
|
76
76
|
# Incorporate slurm_env.batching_config
|
77
|
-
for key, value in slurm_env.batching_config.
|
77
|
+
for key, value in slurm_env.batching_config.model_dump().items():
|
78
78
|
slurm_dict[key] = value
|
79
79
|
|
80
80
|
# Incorporate slurm_env.user_local_exports
|
@@ -82,7 +82,7 @@ def get_slurm_config(
|
|
82
82
|
|
83
83
|
logger.debug(
|
84
84
|
"[get_slurm_config] Fractal SLURM configuration file: "
|
85
|
-
f"{slurm_env.
|
85
|
+
f"{slurm_env.model_dump()=}"
|
86
86
|
)
|
87
87
|
|
88
88
|
# GPU-related options
|
@@ -97,7 +97,7 @@ def get_slurm_config(
|
|
97
97
|
needs_gpu = False
|
98
98
|
logger.debug(f"[get_slurm_config] {needs_gpu=}")
|
99
99
|
if needs_gpu:
|
100
|
-
for key, value in slurm_env.gpu_slurm_config.
|
100
|
+
for key, value in slurm_env.gpu_slurm_config.model_dump(
|
101
101
|
exclude_unset=True, exclude={"mem"}
|
102
102
|
).items():
|
103
103
|
slurm_dict[key] = value
|
@@ -21,7 +21,7 @@ from typing import Optional
|
|
21
21
|
|
22
22
|
from ....models.v2 import DatasetV2
|
23
23
|
from ....models.v2 import WorkflowV2
|
24
|
-
from ...executors.slurm.sudo.executor import
|
24
|
+
from ...executors.slurm.sudo.executor import FractalSlurmSudoExecutor
|
25
25
|
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
26
26
|
from ..runner import execute_tasks_v2
|
27
27
|
from ._submit_setup import _slurm_submit_setup
|
@@ -64,7 +64,7 @@ def process_workflow(
|
|
64
64
|
if isinstance(worker_init, str):
|
65
65
|
worker_init = worker_init.split("\n")
|
66
66
|
|
67
|
-
with
|
67
|
+
with FractalSlurmSudoExecutor(
|
68
68
|
debug=True,
|
69
69
|
keep_logs=True,
|
70
70
|
slurm_user=slurm_user,
|
@@ -16,7 +16,7 @@ def deduplicate_list(
|
|
16
16
|
new_list_dict = []
|
17
17
|
new_list_objs = []
|
18
18
|
for this_obj in this_list:
|
19
|
-
this_dict = this_obj.
|
19
|
+
this_dict = this_obj.model_dump()
|
20
20
|
if this_dict not in new_list_dict:
|
21
21
|
new_list_dict.append(this_dict)
|
22
22
|
new_list_objs.append(this_obj)
|
@@ -75,10 +75,13 @@ def execute_tasks_v2(
|
|
75
75
|
with next(get_sync_db()) as db:
|
76
76
|
db_dataset = db.get(DatasetV2, dataset.id)
|
77
77
|
new_history_item = _DatasetHistoryItemV2(
|
78
|
-
workflowtask=
|
78
|
+
workflowtask=dict(
|
79
|
+
**wftask.model_dump(exclude={"task"}),
|
80
|
+
task=wftask.task.model_dump(),
|
81
|
+
),
|
79
82
|
status=WorkflowTaskStatusTypeV2.SUBMITTED,
|
80
83
|
parallelization=dict(), # FIXME: re-include parallelization
|
81
|
-
).
|
84
|
+
).model_dump()
|
82
85
|
db_dataset.history.append(new_history_item)
|
83
86
|
flag_modified(db_dataset, "history")
|
84
87
|
db.merge(db_dataset)
|
@@ -132,7 +135,9 @@ def execute_tasks_v2(
|
|
132
135
|
and current_task_output.image_list_removals == []
|
133
136
|
):
|
134
137
|
current_task_output = TaskOutput(
|
135
|
-
**current_task_output.
|
138
|
+
**current_task_output.model_dump(
|
139
|
+
exclude={"image_list_updates"}
|
140
|
+
),
|
136
141
|
image_list_updates=[
|
137
142
|
dict(zarr_url=img["zarr_url"]) for img in filtered_images
|
138
143
|
],
|
@@ -141,7 +146,7 @@ def execute_tasks_v2(
|
|
141
146
|
# Update image list
|
142
147
|
current_task_output.check_zarr_urls_are_unique()
|
143
148
|
for image_obj in current_task_output.image_list_updates:
|
144
|
-
image = image_obj.
|
149
|
+
image = image_obj.model_dump()
|
145
150
|
# Edit existing image
|
146
151
|
tmp_image_paths = [img["zarr_url"] for img in tmp_images]
|
147
152
|
if image["zarr_url"] in tmp_image_paths:
|
@@ -1,15 +1,17 @@
|
|
1
1
|
from typing import Any
|
2
2
|
|
3
3
|
from pydantic import BaseModel
|
4
|
-
from pydantic import
|
4
|
+
from pydantic import ConfigDict
|
5
5
|
from pydantic import Field
|
6
|
-
from pydantic import
|
6
|
+
from pydantic import field_validator
|
7
7
|
|
8
8
|
from ....images import SingleImageTaskOutput
|
9
9
|
from fractal_server.urls import normalize_url
|
10
10
|
|
11
11
|
|
12
|
-
class TaskOutput(BaseModel
|
12
|
+
class TaskOutput(BaseModel):
|
13
|
+
|
14
|
+
model_config = ConfigDict(extra="forbid")
|
13
15
|
|
14
16
|
image_list_updates: list[SingleImageTaskOutput] = Field(
|
15
17
|
default_factory=list
|
@@ -35,21 +37,27 @@ class TaskOutput(BaseModel, extra=Extra.forbid):
|
|
35
37
|
msg = f"{msg}\n{duplicate}"
|
36
38
|
raise ValueError(msg)
|
37
39
|
|
38
|
-
@
|
40
|
+
@field_validator("image_list_removals")
|
41
|
+
@classmethod
|
39
42
|
def normalize_paths(cls, v: list[str]) -> list[str]:
|
40
43
|
return [normalize_url(zarr_url) for zarr_url in v]
|
41
44
|
|
42
45
|
|
43
|
-
class InitArgsModel(BaseModel
|
46
|
+
class InitArgsModel(BaseModel):
|
47
|
+
|
48
|
+
model_config = ConfigDict(extra="forbid")
|
44
49
|
|
45
50
|
zarr_url: str
|
46
51
|
init_args: dict[str, Any] = Field(default_factory=dict)
|
47
52
|
|
48
|
-
@
|
53
|
+
@field_validator("zarr_url")
|
54
|
+
@classmethod
|
49
55
|
def normalize_path(cls, v: str) -> str:
|
50
56
|
return normalize_url(v)
|
51
57
|
|
52
58
|
|
53
|
-
class InitTaskOutput(BaseModel
|
59
|
+
class InitTaskOutput(BaseModel):
|
60
|
+
|
61
|
+
model_config = ConfigDict(extra="forbid")
|
54
62
|
|
55
63
|
parallelization_list: list[InitArgsModel] = Field(default_factory=list)
|
@@ -5,22 +5,25 @@ from fractal_server.images.models import AttributeFiltersType
|
|
5
5
|
|
6
6
|
|
7
7
|
def validate_type_filters(
|
8
|
-
type_filters: Optional[dict[str, bool]]
|
8
|
+
cls, type_filters: Optional[dict[str, bool]]
|
9
9
|
) -> dict[str, bool]:
|
10
10
|
if type_filters is None:
|
11
11
|
raise ValueError("'type_filters' cannot be 'None'.")
|
12
12
|
|
13
|
-
type_filters = valdict_keys("type_filters")(type_filters)
|
13
|
+
type_filters = valdict_keys("type_filters")(cls, type_filters)
|
14
14
|
return type_filters
|
15
15
|
|
16
16
|
|
17
17
|
def validate_attribute_filters(
|
18
|
+
cls,
|
18
19
|
attribute_filters: Optional[AttributeFiltersType],
|
19
20
|
) -> AttributeFiltersType:
|
20
21
|
if attribute_filters is None:
|
21
22
|
raise ValueError("'attribute_filters' cannot be 'None'.")
|
22
23
|
|
23
|
-
attribute_filters = valdict_keys("attribute_filters")(
|
24
|
+
attribute_filters = valdict_keys("attribute_filters")(
|
25
|
+
cls, attribute_filters
|
26
|
+
)
|
24
27
|
for key, values in attribute_filters.items():
|
25
28
|
if values == []:
|
26
29
|
raise ValueError(
|
@@ -11,7 +11,7 @@ def valstr(attribute: str, accept_none: bool = False):
|
|
11
11
|
If `accept_none`, the validator also accepts `None`.
|
12
12
|
"""
|
13
13
|
|
14
|
-
def val(string: Optional[str]) -> Optional[str]:
|
14
|
+
def val(cls, string: Optional[str]) -> Optional[str]:
|
15
15
|
if string is None:
|
16
16
|
if accept_none:
|
17
17
|
return string
|
@@ -28,14 +28,16 @@ def valstr(attribute: str, accept_none: bool = False):
|
|
28
28
|
|
29
29
|
|
30
30
|
def valdict_keys(attribute: str):
|
31
|
-
def val(d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]:
|
31
|
+
def val(cls, d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]:
|
32
32
|
"""
|
33
33
|
Apply valstr to every key of the dictionary, and fail if there are
|
34
34
|
identical keys.
|
35
35
|
"""
|
36
36
|
if d is not None:
|
37
37
|
old_keys = list(d.keys())
|
38
|
-
new_keys = [
|
38
|
+
new_keys = [
|
39
|
+
valstr(f"{attribute}[{key}]")(cls, key) for key in old_keys
|
40
|
+
]
|
39
41
|
if len(new_keys) != len(set(new_keys)):
|
40
42
|
raise ValueError(
|
41
43
|
f"Dictionary contains multiple identical keys: '{d}'."
|
@@ -53,7 +55,7 @@ def val_absolute_path(attribute: str, accept_none: bool = False):
|
|
53
55
|
Check that a string attribute is an absolute path
|
54
56
|
"""
|
55
57
|
|
56
|
-
def val(string: Optional[str]) -> Optional[str]:
|
58
|
+
def val(cls, string: Optional[str]) -> Optional[str]:
|
57
59
|
if string is None:
|
58
60
|
if accept_none:
|
59
61
|
return string
|
@@ -75,7 +77,7 @@ def val_absolute_path(attribute: str, accept_none: bool = False):
|
|
75
77
|
|
76
78
|
|
77
79
|
def val_unique_list(attribute: str):
|
78
|
-
def val(must_be_unique: Optional[list]) -> Optional[list]:
|
80
|
+
def val(cls, must_be_unique: Optional[list]) -> Optional[list]:
|
79
81
|
if must_be_unique is not None:
|
80
82
|
if len(set(must_be_unique)) != len(must_be_unique):
|
81
83
|
raise ValueError(f"`{attribute}` list has repetitions")
|