fractal-server 2.12.1__py3-none-any.whl → 2.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/models/security.py +9 -12
- fractal_server/app/models/v2/__init__.py +4 -0
- fractal_server/app/models/v2/accounting.py +35 -0
- fractal_server/app/models/v2/dataset.py +2 -2
- fractal_server/app/models/v2/job.py +11 -9
- fractal_server/app/models/v2/task.py +2 -3
- fractal_server/app/models/v2/task_group.py +6 -2
- fractal_server/app/models/v2/workflowtask.py +15 -8
- fractal_server/app/routes/admin/v2/__init__.py +4 -0
- fractal_server/app/routes/admin/v2/accounting.py +108 -0
- fractal_server/app/routes/admin/v2/impersonate.py +35 -0
- fractal_server/app/routes/admin/v2/job.py +5 -13
- fractal_server/app/routes/admin/v2/task.py +1 -1
- fractal_server/app/routes/admin/v2/task_group.py +5 -13
- fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py +3 -3
- fractal_server/app/routes/api/v2/dataset.py +4 -4
- fractal_server/app/routes/api/v2/images.py +11 -11
- fractal_server/app/routes/api/v2/project.py +2 -2
- fractal_server/app/routes/api/v2/status.py +1 -1
- fractal_server/app/routes/api/v2/submit.py +9 -6
- fractal_server/app/routes/api/v2/task.py +4 -2
- fractal_server/app/routes/api/v2/task_collection.py +3 -2
- fractal_server/app/routes/api/v2/task_group.py +4 -7
- fractal_server/app/routes/api/v2/workflow.py +3 -3
- fractal_server/app/routes/api/v2/workflow_import.py +3 -3
- fractal_server/app/routes/api/v2/workflowtask.py +3 -1
- fractal_server/app/routes/auth/_aux_auth.py +4 -1
- fractal_server/app/routes/auth/current_user.py +3 -5
- fractal_server/app/routes/auth/group.py +1 -1
- fractal_server/app/routes/auth/users.py +2 -4
- fractal_server/app/routes/aux/__init__.py +0 -20
- fractal_server/app/routes/aux/_runner.py +1 -1
- fractal_server/app/routes/aux/validate_user_settings.py +1 -2
- fractal_server/app/runner/executors/_job_states.py +13 -0
- fractal_server/app/runner/executors/slurm/_slurm_config.py +26 -18
- fractal_server/app/runner/executors/slurm/ssh/__init__.py +0 -3
- fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +31 -22
- fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +2 -5
- fractal_server/app/runner/executors/slurm/ssh/executor.py +21 -27
- fractal_server/app/runner/executors/slurm/sudo/__init__.py +0 -3
- fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py +1 -2
- fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py +37 -47
- fractal_server/app/runner/executors/slurm/sudo/executor.py +25 -24
- fractal_server/app/runner/v2/__init__.py +4 -9
- fractal_server/app/runner/v2/_local/__init__.py +3 -0
- fractal_server/app/runner/v2/_local/_local_config.py +5 -4
- fractal_server/app/runner/v2/_slurm_common/get_slurm_config.py +4 -4
- fractal_server/app/runner/v2/_slurm_ssh/__init__.py +2 -0
- fractal_server/app/runner/v2/_slurm_sudo/__init__.py +4 -2
- fractal_server/app/runner/v2/deduplicate_list.py +1 -1
- fractal_server/app/runner/v2/runner.py +25 -10
- fractal_server/app/runner/v2/runner_functions.py +12 -11
- fractal_server/app/runner/v2/task_interface.py +15 -7
- fractal_server/app/schemas/_filter_validators.py +6 -3
- fractal_server/app/schemas/_validators.py +7 -5
- fractal_server/app/schemas/user.py +23 -18
- fractal_server/app/schemas/user_group.py +25 -11
- fractal_server/app/schemas/user_settings.py +31 -24
- fractal_server/app/schemas/v2/__init__.py +1 -0
- fractal_server/app/schemas/v2/accounting.py +18 -0
- fractal_server/app/schemas/v2/dataset.py +48 -35
- fractal_server/app/schemas/v2/dumps.py +16 -14
- fractal_server/app/schemas/v2/job.py +49 -29
- fractal_server/app/schemas/v2/manifest.py +32 -28
- fractal_server/app/schemas/v2/project.py +18 -8
- fractal_server/app/schemas/v2/task.py +86 -75
- fractal_server/app/schemas/v2/task_collection.py +41 -30
- fractal_server/app/schemas/v2/task_group.py +39 -20
- fractal_server/app/schemas/v2/workflow.py +24 -12
- fractal_server/app/schemas/v2/workflowtask.py +63 -61
- fractal_server/app/security/__init__.py +1 -1
- fractal_server/config.py +86 -73
- fractal_server/images/models.py +18 -12
- fractal_server/main.py +1 -1
- fractal_server/migrations/versions/af1ef1c83c9b_add_accounting_tables.py +57 -0
- fractal_server/tasks/v2/utils_background.py +2 -2
- fractal_server/tasks/v2/utils_database.py +1 -1
- {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/METADATA +9 -10
- {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/RECORD +83 -81
- fractal_server/app/runner/v2/_local_experimental/__init__.py +0 -121
- fractal_server/app/runner/v2/_local_experimental/_local_config.py +0 -108
- fractal_server/app/runner/v2/_local_experimental/_submit_setup.py +0 -42
- fractal_server/app/runner/v2/_local_experimental/executor.py +0 -157
- {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/LICENSE +0 -0
- {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/WHEEL +0 -0
- {fractal_server-2.12.1.dist-info → fractal_server-2.13.1.dist-info}/entry_points.txt +0 -0
@@ -1,20 +1,9 @@
|
|
1
|
-
# This adapts clusterfutures <https://github.com/sampsyo/clusterfutures>
|
2
|
-
# Original Copyright
|
3
|
-
# Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
|
4
|
-
# License: MIT
|
5
|
-
#
|
6
|
-
# Modified by:
|
7
|
-
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
8
|
-
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
9
|
-
# Marco Franzon <marco.franzon@exact-lab.it>
|
10
|
-
#
|
11
|
-
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
12
|
-
# University of Zurich
|
13
1
|
import json
|
14
2
|
import math
|
15
3
|
import sys
|
16
4
|
import threading
|
17
5
|
import time
|
6
|
+
from concurrent.futures import Executor
|
18
7
|
from concurrent.futures import Future
|
19
8
|
from concurrent.futures import InvalidStateError
|
20
9
|
from copy import copy
|
@@ -25,18 +14,18 @@ from typing import Optional
|
|
25
14
|
from typing import Sequence
|
26
15
|
|
27
16
|
import cloudpickle
|
28
|
-
from cfut import SlurmExecutor
|
29
17
|
|
30
18
|
from ....filenames import SHUTDOWN_FILENAME
|
31
19
|
from ....task_files import get_task_file_paths
|
32
20
|
from ....task_files import TaskFiles
|
33
21
|
from ....versions import get_versions
|
22
|
+
from ..._job_states import STATES_FINISHED
|
34
23
|
from ...slurm._slurm_config import SlurmConfig
|
35
24
|
from .._batching import heuristics
|
36
25
|
from ..utils_executors import get_pickle_file_path
|
37
26
|
from ..utils_executors import get_slurm_file_path
|
38
27
|
from ..utils_executors import get_slurm_script_file_path
|
39
|
-
from ._executor_wait_thread import
|
28
|
+
from ._executor_wait_thread import FractalSlurmSSHWaitThread
|
40
29
|
from fractal_server.app.runner.components import _COMPONENT_KEY_
|
41
30
|
from fractal_server.app.runner.compress_folder import compress_folder
|
42
31
|
from fractal_server.app.runner.exceptions import JobExecutionError
|
@@ -48,24 +37,31 @@ from fractal_server.logger import set_logger
|
|
48
37
|
from fractal_server.ssh._fabric import FractalSSH
|
49
38
|
from fractal_server.syringe import Inject
|
50
39
|
|
40
|
+
|
51
41
|
logger = set_logger(__name__)
|
52
42
|
|
53
43
|
|
54
|
-
class FractalSlurmSSHExecutor(
|
44
|
+
class FractalSlurmSSHExecutor(Executor):
|
55
45
|
"""
|
56
|
-
|
46
|
+
Executor to submit SLURM jobs via SSH
|
47
|
+
|
48
|
+
This class is a custom re-implementation of the SLURM executor from
|
49
|
+
|
50
|
+
> clusterfutures <https://github.com/sampsyo/clusterfutures>
|
51
|
+
> Original Copyright
|
52
|
+
> Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
|
53
|
+
> License: MIT
|
57
54
|
|
58
|
-
FIXME: docstring
|
59
55
|
|
60
56
|
Attributes:
|
61
57
|
fractal_ssh: FractalSSH connection with custom lock
|
62
|
-
shutdown_file:
|
63
|
-
python_remote: Equal to `settings.FRACTAL_SLURM_WORKER_PYTHON`
|
64
|
-
wait_thread_cls: Class for waiting thread
|
65
58
|
workflow_dir_local:
|
66
59
|
Directory for both the cfut/SLURM and fractal-server files and logs
|
67
60
|
workflow_dir_remote:
|
68
61
|
Directory for both the cfut/SLURM and fractal-server files and logs
|
62
|
+
shutdown_file:
|
63
|
+
python_remote: Equal to `settings.FRACTAL_SLURM_WORKER_PYTHON`
|
64
|
+
wait_thread_cls: Class for waiting thread
|
69
65
|
common_script_lines:
|
70
66
|
Arbitrary script lines that will always be included in the
|
71
67
|
sbatch script
|
@@ -82,10 +78,10 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
82
78
|
shutdown_file: str
|
83
79
|
python_remote: str
|
84
80
|
|
85
|
-
wait_thread_cls =
|
81
|
+
wait_thread_cls = FractalSlurmSSHWaitThread
|
86
82
|
|
87
83
|
common_script_lines: list[str]
|
88
|
-
slurm_account: Optional[str]
|
84
|
+
slurm_account: Optional[str] = None
|
89
85
|
|
90
86
|
jobs: dict[str, tuple[Future, SlurmJob]]
|
91
87
|
map_jobid_to_slurm_files_local: dict[str, tuple[str, str, str]]
|
@@ -1159,7 +1155,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1159
1155
|
Path(tarfile_path_local).unlink()
|
1160
1156
|
|
1161
1157
|
t_1 = time.perf_counter()
|
1162
|
-
logger.info("[_get_subfolder_sftp] End -
|
1158
|
+
logger.info(f"[_get_subfolder_sftp] End - elapsed: {t_1 - t_0:.3f} s")
|
1163
1159
|
|
1164
1160
|
def _prepare_sbatch_script(
|
1165
1161
|
self,
|
@@ -1258,7 +1254,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1258
1254
|
logger.debug("Executor shutdown: end")
|
1259
1255
|
|
1260
1256
|
def _stop_and_join_wait_thread(self):
|
1261
|
-
self.wait_thread.
|
1257
|
+
self.wait_thread.shutdown = True
|
1262
1258
|
self.wait_thread.join()
|
1263
1259
|
|
1264
1260
|
def __exit__(self, *args, **kwargs):
|
@@ -1295,8 +1291,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1295
1291
|
(released under the MIT licence)
|
1296
1292
|
"""
|
1297
1293
|
|
1298
|
-
from cfut.slurm import STATES_FINISHED
|
1299
|
-
|
1300
1294
|
logger.debug(
|
1301
1295
|
f"[FractalSlurmSSHExecutor._jobs_finished] START ({job_ids=})"
|
1302
1296
|
)
|
@@ -1387,6 +1381,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1387
1381
|
t_end_handshake = time.perf_counter()
|
1388
1382
|
logger.info(
|
1389
1383
|
"[FractalSlurmSSHExecutor.ssh_handshake] END"
|
1390
|
-
f" - elapsed: {t_end_handshake-t_start_handshake:.3f} s"
|
1384
|
+
f" - elapsed: {t_end_handshake - t_start_handshake:.3f} s"
|
1391
1385
|
)
|
1392
1386
|
return remote_versions
|
@@ -1,12 +1,10 @@
|
|
1
1
|
import os
|
2
|
+
import threading
|
2
3
|
import time
|
3
4
|
import traceback
|
4
5
|
from itertools import count
|
5
|
-
from typing import Callable
|
6
6
|
from typing import Optional
|
7
7
|
|
8
|
-
from cfut import FileWaitThread
|
9
|
-
|
10
8
|
from ......logger import set_logger
|
11
9
|
from ._check_jobs_status import _jobs_finished
|
12
10
|
from fractal_server.app.runner.exceptions import JobExecutionError
|
@@ -14,33 +12,43 @@ from fractal_server.app.runner.exceptions import JobExecutionError
|
|
14
12
|
logger = set_logger(__name__)
|
15
13
|
|
16
14
|
|
17
|
-
class
|
15
|
+
class FractalSlurmSudoWaitThread(threading.Thread):
|
18
16
|
"""
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
17
|
+
Thread that monitors a pool of SLURM jobs
|
18
|
+
|
19
|
+
This class is a custom re-implementation of the waiting thread class from:
|
20
|
+
|
21
|
+
> clusterfutures <https://github.com/sampsyo/clusterfutures>
|
22
|
+
> Original Copyright
|
23
|
+
> Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
|
24
|
+
> License: MIT
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
slurm_user:
|
28
|
+
shutdown_file:
|
29
|
+
shutdown_callback:
|
30
|
+
slurm_poll_interval:
|
31
|
+
waiting:
|
32
|
+
shutdown:
|
33
|
+
lock:
|
36
34
|
"""
|
37
35
|
|
38
36
|
slurm_user: str
|
39
37
|
shutdown_file: Optional[str] = None
|
40
|
-
shutdown_callback:
|
41
|
-
|
42
|
-
|
43
|
-
|
38
|
+
shutdown_callback: callable
|
39
|
+
slurm_poll_interval: int = 30
|
40
|
+
waiting: dict[tuple[str, ...], str]
|
41
|
+
shutdown: bool
|
42
|
+
_lock: threading.Lock
|
43
|
+
|
44
|
+
def __init__(self, callback: callable, interval=1):
|
45
|
+
threading.Thread.__init__(self, daemon=True)
|
46
|
+
self.callback = callback
|
47
|
+
self.interval = interval
|
48
|
+
self.waiting = {}
|
49
|
+
self._lock = threading.Lock() # To protect the .waiting dict
|
50
|
+
self.shutdown = False
|
51
|
+
self.active_job_ids = []
|
44
52
|
|
45
53
|
def wait(
|
46
54
|
self,
|
@@ -61,10 +69,10 @@ class FractalFileWaitThread(FileWaitThread):
|
|
61
69
|
error_msg = "Cannot call `wait` method after executor shutdown."
|
62
70
|
logger.warning(error_msg)
|
63
71
|
raise JobExecutionError(info=error_msg)
|
64
|
-
with self.
|
72
|
+
with self._lock:
|
65
73
|
self.waiting[filenames] = jobid
|
66
74
|
|
67
|
-
def
|
75
|
+
def check_shutdown(self, i):
|
68
76
|
"""
|
69
77
|
Do one shutdown-file-existence check.
|
70
78
|
|
@@ -99,30 +107,12 @@ class FractalFileWaitThread(FileWaitThread):
|
|
99
107
|
if self.shutdown:
|
100
108
|
self.shutdown_callback()
|
101
109
|
return
|
102
|
-
with self.
|
110
|
+
with self._lock:
|
103
111
|
self.check(i)
|
104
112
|
time.sleep(self.interval)
|
105
113
|
|
106
|
-
|
107
|
-
class FractalSlurmWaitThread(FractalFileWaitThread):
|
108
|
-
"""
|
109
|
-
Replaces the original clusterfutures.SlurmWaitThread, to inherit from
|
110
|
-
FractalFileWaitThread instead of FileWaitThread.
|
111
|
-
|
112
|
-
The function is copied from clusterfutures 0.5. Original Copyright: 2022
|
113
|
-
Adrian Sampson, released under the MIT licence
|
114
|
-
|
115
|
-
**Note**: if `self.interval != 1` then this should be modified, but for
|
116
|
-
`clusterfutures` v0.5 `self.interval` is indeed equal to `1`.
|
117
|
-
|
118
|
-
Changed from clusterfutures:
|
119
|
-
* Rename `id_to_filename` to `id_to_filenames`
|
120
|
-
"""
|
121
|
-
|
122
|
-
slurm_poll_interval = 30
|
123
|
-
|
124
114
|
def check(self, i):
|
125
|
-
|
115
|
+
self.check_shutdown(i)
|
126
116
|
if i % (self.slurm_poll_interval // self.interval) == 0:
|
127
117
|
try:
|
128
118
|
finished_jobs = _jobs_finished(self.waiting.values())
|
@@ -1,21 +1,12 @@
|
|
1
|
-
# This adapts clusterfutures <https://github.com/sampsyo/clusterfutures>
|
2
|
-
# Original Copyright
|
3
|
-
# Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
|
4
|
-
# License: MIT
|
5
|
-
#
|
6
|
-
# Modified by:
|
7
|
-
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
8
|
-
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
9
|
-
# Marco Franzon <marco.franzon@exact-lab.it>
|
10
|
-
#
|
11
|
-
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
12
|
-
# University of Zurich
|
13
1
|
import json
|
14
2
|
import math
|
15
3
|
import shlex
|
16
4
|
import subprocess # nosec
|
17
5
|
import sys
|
6
|
+
import threading
|
18
7
|
import time
|
8
|
+
import uuid
|
9
|
+
from concurrent.futures import Executor
|
19
10
|
from concurrent.futures import Future
|
20
11
|
from concurrent.futures import InvalidStateError
|
21
12
|
from copy import copy
|
@@ -27,8 +18,6 @@ from typing import Optional
|
|
27
18
|
from typing import Sequence
|
28
19
|
|
29
20
|
import cloudpickle
|
30
|
-
from cfut import SlurmExecutor
|
31
|
-
from cfut.util import random_string
|
32
21
|
|
33
22
|
from ......config import get_settings
|
34
23
|
from ......logger import set_logger
|
@@ -43,7 +32,7 @@ from .._batching import heuristics
|
|
43
32
|
from ..utils_executors import get_pickle_file_path
|
44
33
|
from ..utils_executors import get_slurm_file_path
|
45
34
|
from ..utils_executors import get_slurm_script_file_path
|
46
|
-
from ._executor_wait_thread import
|
35
|
+
from ._executor_wait_thread import FractalSlurmSudoWaitThread
|
47
36
|
from ._subprocess_run_as_user import _glob_as_user
|
48
37
|
from ._subprocess_run_as_user import _glob_as_user_strict
|
49
38
|
from ._subprocess_run_as_user import _path_exists_as_user
|
@@ -180,9 +169,7 @@ class SlurmJob:
|
|
180
169
|
)
|
181
170
|
else:
|
182
171
|
self.wftask_file_prefixes = wftask_file_prefixes
|
183
|
-
self.workerids = tuple(
|
184
|
-
random_string() for i in range(self.num_tasks_tot)
|
185
|
-
)
|
172
|
+
self.workerids = tuple(uuid.uuid4() for i in range(self.num_tasks_tot))
|
186
173
|
self.slurm_config = slurm_config
|
187
174
|
|
188
175
|
def get_clean_output_pickle_files(self) -> tuple[str, ...]:
|
@@ -193,9 +180,17 @@ class SlurmJob:
|
|
193
180
|
return tuple(str(f.as_posix()) for f in self.output_pickle_files)
|
194
181
|
|
195
182
|
|
196
|
-
class
|
183
|
+
class FractalSlurmSudoExecutor(Executor):
|
197
184
|
"""
|
198
|
-
|
185
|
+
Executor to submit SLURM jobs as a different user, via `sudo -u`
|
186
|
+
|
187
|
+
This class is a custom re-implementation of the SLURM executor from
|
188
|
+
|
189
|
+
> clusterfutures <https://github.com/sampsyo/clusterfutures>
|
190
|
+
> Original Copyright
|
191
|
+
> Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
|
192
|
+
> License: MIT
|
193
|
+
|
199
194
|
|
200
195
|
Attributes:
|
201
196
|
slurm_user:
|
@@ -211,7 +206,7 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
211
206
|
Dictionary with paths of slurm-related files for active jobs
|
212
207
|
"""
|
213
208
|
|
214
|
-
wait_thread_cls =
|
209
|
+
wait_thread_cls = FractalSlurmSudoWaitThread
|
215
210
|
slurm_user: str
|
216
211
|
shutdown_file: str
|
217
212
|
common_script_lines: list[str]
|
@@ -219,7 +214,7 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
219
214
|
workflow_dir_local: Path
|
220
215
|
workflow_dir_remote: Path
|
221
216
|
map_jobid_to_slurm_files: dict[str, tuple[str, str, str]]
|
222
|
-
slurm_account: Optional[str]
|
217
|
+
slurm_account: Optional[str] = None
|
223
218
|
jobs: dict[str, tuple[Future, SlurmJob]]
|
224
219
|
|
225
220
|
def __init__(
|
@@ -244,7 +239,13 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
244
239
|
"Missing attribute FractalSlurmExecutor.slurm_user"
|
245
240
|
)
|
246
241
|
|
247
|
-
|
242
|
+
self.jobs = {}
|
243
|
+
self.job_outfiles = {}
|
244
|
+
self.jobs_lock = threading.Lock()
|
245
|
+
self.jobs_empty_cond = threading.Condition(self.jobs_lock)
|
246
|
+
|
247
|
+
self.wait_thread = self.wait_thread_cls(self._completion)
|
248
|
+
self.wait_thread.start()
|
248
249
|
|
249
250
|
# Assign `wait_thread.shutdown_callback` early, since it may be called
|
250
251
|
# from within `_stop_and_join_wait_thread` (e.g. if an exception is
|
@@ -1239,7 +1240,7 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
1239
1240
|
logger.debug("Executor shutdown: end")
|
1240
1241
|
|
1241
1242
|
def _stop_and_join_wait_thread(self):
|
1242
|
-
self.wait_thread.
|
1243
|
+
self.wait_thread.shutdown = True
|
1243
1244
|
self.wait_thread.join()
|
1244
1245
|
|
1245
1246
|
def __exit__(self, *args, **kwargs):
|
@@ -31,9 +31,6 @@ from ..executors.slurm.sudo._subprocess_run_as_user import _mkdir_as_user
|
|
31
31
|
from ..filenames import WORKFLOW_LOG_FILENAME
|
32
32
|
from ..task_files import task_subfolder_name
|
33
33
|
from ._local import process_workflow as local_process_workflow
|
34
|
-
from ._local_experimental import (
|
35
|
-
process_workflow as local_experimental_process_workflow,
|
36
|
-
)
|
37
34
|
from ._slurm_ssh import process_workflow as slurm_ssh_process_workflow
|
38
35
|
from ._slurm_sudo import process_workflow as slurm_sudo_process_workflow
|
39
36
|
from .handle_failed_job import mark_last_wftask_as_failed
|
@@ -45,7 +42,6 @@ _backends = {}
|
|
45
42
|
_backends["local"] = local_process_workflow
|
46
43
|
_backends["slurm"] = slurm_sudo_process_workflow
|
47
44
|
_backends["slurm_ssh"] = slurm_ssh_process_workflow
|
48
|
-
_backends["local_experimental"] = local_experimental_process_workflow
|
49
45
|
|
50
46
|
|
51
47
|
def fail_job(
|
@@ -74,6 +70,7 @@ def submit_workflow(
|
|
74
70
|
workflow_id: int,
|
75
71
|
dataset_id: int,
|
76
72
|
job_id: int,
|
73
|
+
user_id: int,
|
77
74
|
user_settings: UserSettings,
|
78
75
|
worker_init: Optional[str] = None,
|
79
76
|
slurm_user: Optional[str] = None,
|
@@ -94,6 +91,8 @@ def submit_workflow(
|
|
94
91
|
job_id:
|
95
92
|
Id of the job record which stores the state for the current
|
96
93
|
workflow application.
|
94
|
+
user_id:
|
95
|
+
User ID.
|
97
96
|
worker_init:
|
98
97
|
Custom executor parameters that get parsed before the execution of
|
99
98
|
each task.
|
@@ -184,8 +183,6 @@ def submit_workflow(
|
|
184
183
|
# Define and create WORKFLOW_DIR_REMOTE
|
185
184
|
if FRACTAL_RUNNER_BACKEND == "local":
|
186
185
|
WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
|
187
|
-
elif FRACTAL_RUNNER_BACKEND == "local_experimental":
|
188
|
-
WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
|
189
186
|
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
190
187
|
WORKFLOW_DIR_REMOTE = (
|
191
188
|
Path(user_cache_dir) / WORKFLOW_DIR_LOCAL.name
|
@@ -287,9 +284,6 @@ def submit_workflow(
|
|
287
284
|
if FRACTAL_RUNNER_BACKEND == "local":
|
288
285
|
process_workflow = local_process_workflow
|
289
286
|
backend_specific_kwargs = {}
|
290
|
-
elif FRACTAL_RUNNER_BACKEND == "local_experimental":
|
291
|
-
process_workflow = local_experimental_process_workflow
|
292
|
-
backend_specific_kwargs = {}
|
293
287
|
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
294
288
|
process_workflow = slurm_sudo_process_workflow
|
295
289
|
backend_specific_kwargs = dict(
|
@@ -321,6 +315,7 @@ def submit_workflow(
|
|
321
315
|
process_workflow(
|
322
316
|
workflow=workflow,
|
323
317
|
dataset=dataset,
|
318
|
+
user_id=user_id,
|
324
319
|
workflow_dir_local=WORKFLOW_DIR_LOCAL,
|
325
320
|
workflow_dir_remote=WORKFLOW_DIR_REMOTE,
|
326
321
|
logger_name=logger_name,
|
@@ -41,6 +41,7 @@ def process_workflow(
|
|
41
41
|
last_task_index: Optional[int] = None,
|
42
42
|
logger_name: str,
|
43
43
|
job_attribute_filters: AttributeFiltersType,
|
44
|
+
user_id: int,
|
44
45
|
# Slurm-specific
|
45
46
|
user_cache_dir: Optional[str] = None,
|
46
47
|
slurm_user: Optional[str] = None,
|
@@ -75,6 +76,7 @@ def process_workflow(
|
|
75
76
|
Positional index of the last task to execute; if `None`, proceed
|
76
77
|
until the last task.
|
77
78
|
logger_name: Logger name
|
79
|
+
user_id:
|
78
80
|
slurm_user:
|
79
81
|
Username to impersonate to run the workflow. This argument is
|
80
82
|
present for compatibility with the standard backend interface, but
|
@@ -126,4 +128,5 @@ def process_workflow(
|
|
126
128
|
logger_name=logger_name,
|
127
129
|
submit_setup_call=_local_submit_setup,
|
128
130
|
job_attribute_filters=job_attribute_filters,
|
131
|
+
user_id=user_id,
|
129
132
|
)
|
@@ -17,8 +17,8 @@ from typing import Literal
|
|
17
17
|
from typing import Optional
|
18
18
|
|
19
19
|
from pydantic import BaseModel
|
20
|
-
from pydantic import
|
21
|
-
from pydantic
|
20
|
+
from pydantic import ConfigDict
|
21
|
+
from pydantic import ValidationError
|
22
22
|
|
23
23
|
from .....config import get_settings
|
24
24
|
from .....syringe import Inject
|
@@ -33,7 +33,7 @@ class LocalBackendConfigError(ValueError):
|
|
33
33
|
pass
|
34
34
|
|
35
35
|
|
36
|
-
class LocalBackendConfig(BaseModel
|
36
|
+
class LocalBackendConfig(BaseModel):
|
37
37
|
"""
|
38
38
|
Specifications of the local-backend configuration
|
39
39
|
|
@@ -44,7 +44,8 @@ class LocalBackendConfig(BaseModel, extra=Extra.forbid):
|
|
44
44
|
start at the same time.
|
45
45
|
"""
|
46
46
|
|
47
|
-
|
47
|
+
model_config = ConfigDict(extra="forbid")
|
48
|
+
parallel_tasks_per_job: Optional[int] = None
|
48
49
|
|
49
50
|
|
50
51
|
def get_default_local_backend_config():
|
@@ -67,14 +67,14 @@ def get_slurm_config(
|
|
67
67
|
|
68
68
|
# Incorporate slurm_env.default_slurm_config
|
69
69
|
slurm_env = load_slurm_config_file(config_path=config_path)
|
70
|
-
slurm_dict = slurm_env.default_slurm_config.
|
70
|
+
slurm_dict = slurm_env.default_slurm_config.model_dump(
|
71
71
|
exclude_unset=True, exclude={"mem"}
|
72
72
|
)
|
73
73
|
if slurm_env.default_slurm_config.mem:
|
74
74
|
slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
|
75
75
|
|
76
76
|
# Incorporate slurm_env.batching_config
|
77
|
-
for key, value in slurm_env.batching_config.
|
77
|
+
for key, value in slurm_env.batching_config.model_dump().items():
|
78
78
|
slurm_dict[key] = value
|
79
79
|
|
80
80
|
# Incorporate slurm_env.user_local_exports
|
@@ -82,7 +82,7 @@ def get_slurm_config(
|
|
82
82
|
|
83
83
|
logger.debug(
|
84
84
|
"[get_slurm_config] Fractal SLURM configuration file: "
|
85
|
-
f"{slurm_env.
|
85
|
+
f"{slurm_env.model_dump()=}"
|
86
86
|
)
|
87
87
|
|
88
88
|
# GPU-related options
|
@@ -97,7 +97,7 @@ def get_slurm_config(
|
|
97
97
|
needs_gpu = False
|
98
98
|
logger.debug(f"[get_slurm_config] {needs_gpu=}")
|
99
99
|
if needs_gpu:
|
100
|
-
for key, value in slurm_env.gpu_slurm_config.
|
100
|
+
for key, value in slurm_env.gpu_slurm_config.model_dump(
|
101
101
|
exclude_unset=True, exclude={"mem"}
|
102
102
|
).items():
|
103
103
|
slurm_dict[key] = value
|
@@ -45,6 +45,7 @@ def process_workflow(
|
|
45
45
|
job_attribute_filters: AttributeFiltersType,
|
46
46
|
fractal_ssh: FractalSSH,
|
47
47
|
worker_init: Optional[str] = None,
|
48
|
+
user_id: int,
|
48
49
|
# Not used
|
49
50
|
user_cache_dir: Optional[str] = None,
|
50
51
|
slurm_user: Optional[str] = None,
|
@@ -94,4 +95,5 @@ def process_workflow(
|
|
94
95
|
logger_name=logger_name,
|
95
96
|
submit_setup_call=_slurm_submit_setup,
|
96
97
|
job_attribute_filters=job_attribute_filters,
|
98
|
+
user_id=user_id,
|
97
99
|
)
|
@@ -21,7 +21,7 @@ from typing import Optional
|
|
21
21
|
|
22
22
|
from ....models.v2 import DatasetV2
|
23
23
|
from ....models.v2 import WorkflowV2
|
24
|
-
from ...executors.slurm.sudo.executor import
|
24
|
+
from ...executors.slurm.sudo.executor import FractalSlurmSudoExecutor
|
25
25
|
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
26
26
|
from ..runner import execute_tasks_v2
|
27
27
|
from ._submit_setup import _slurm_submit_setup
|
@@ -38,6 +38,7 @@ def process_workflow(
|
|
38
38
|
last_task_index: Optional[int] = None,
|
39
39
|
logger_name: str,
|
40
40
|
job_attribute_filters: AttributeFiltersType,
|
41
|
+
user_id: int,
|
41
42
|
# Slurm-specific
|
42
43
|
user_cache_dir: Optional[str] = None,
|
43
44
|
slurm_user: Optional[str] = None,
|
@@ -64,7 +65,7 @@ def process_workflow(
|
|
64
65
|
if isinstance(worker_init, str):
|
65
66
|
worker_init = worker_init.split("\n")
|
66
67
|
|
67
|
-
with
|
68
|
+
with FractalSlurmSudoExecutor(
|
68
69
|
debug=True,
|
69
70
|
keep_logs=True,
|
70
71
|
slurm_user=slurm_user,
|
@@ -85,4 +86,5 @@ def process_workflow(
|
|
85
86
|
logger_name=logger_name,
|
86
87
|
submit_setup_call=_slurm_submit_setup,
|
87
88
|
job_attribute_filters=job_attribute_filters,
|
89
|
+
user_id=user_id,
|
88
90
|
)
|
@@ -16,7 +16,7 @@ def deduplicate_list(
|
|
16
16
|
new_list_dict = []
|
17
17
|
new_list_objs = []
|
18
18
|
for this_obj in this_list:
|
19
|
-
this_dict = this_obj.
|
19
|
+
this_dict = this_obj.model_dump()
|
20
20
|
if this_dict not in new_list_dict:
|
21
21
|
new_list_dict.append(this_dict)
|
22
22
|
new_list_objs.append(this_obj)
|