fractal-server 2.3.3__py3-none-any.whl → 2.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/routes/api/__init__.py +9 -0
- fractal_server/app/routes/api/v1/project.py +10 -1
- fractal_server/app/runner/compress_folder.py +111 -99
- fractal_server/app/runner/executors/slurm/_slurm_config.py +16 -1
- fractal_server/app/runner/executors/slurm/ssh/executor.py +27 -16
- fractal_server/app/runner/executors/slurm/sudo/executor.py +6 -0
- fractal_server/app/runner/extract_archive.py +66 -19
- fractal_server/app/runner/run_subprocess.py +27 -0
- fractal_server/app/runner/v2/__init__.py +1 -1
- fractal_server/app/runner/v2/_slurm_common/__init__.py +0 -0
- fractal_server/app/runner/v2/{_slurm_ssh → _slurm_common}/get_slurm_config.py +4 -13
- fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py +3 -3
- fractal_server/app/runner/v2/{_slurm → _slurm_sudo}/_submit_setup.py +3 -3
- fractal_server/config.py +22 -1
- fractal_server/main.py +1 -1
- fractal_server/ssh/_fabric.py +22 -0
- {fractal_server-2.3.3.dist-info → fractal_server-2.3.5.dist-info}/METADATA +1 -1
- {fractal_server-2.3.3.dist-info → fractal_server-2.3.5.dist-info}/RECORD +23 -22
- fractal_server/app/runner/v2/_slurm/get_slurm_config.py +0 -182
- /fractal_server/app/runner/v2/{_slurm → _slurm_sudo}/__init__.py +0 -0
- {fractal_server-2.3.3.dist-info → fractal_server-2.3.5.dist-info}/LICENSE +0 -0
- {fractal_server-2.3.3.dist-info → fractal_server-2.3.5.dist-info}/WHEEL +0 -0
- {fractal_server-2.3.3.dist-info → fractal_server-2.3.5.dist-info}/entry_points.txt +0 -0
fractal_server/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__VERSION__ = "2.3.
|
1
|
+
__VERSION__ = "2.3.5"
|
@@ -2,9 +2,12 @@
|
|
2
2
|
`api` module
|
3
3
|
"""
|
4
4
|
from fastapi import APIRouter
|
5
|
+
from fastapi import Depends
|
5
6
|
|
6
7
|
from ....config import get_settings
|
7
8
|
from ....syringe import Inject
|
9
|
+
from ...models.security import UserOAuth
|
10
|
+
from ...security import current_active_superuser
|
8
11
|
|
9
12
|
|
10
13
|
router_api = APIRouter()
|
@@ -17,3 +20,9 @@ async def alive():
|
|
17
20
|
alive=True,
|
18
21
|
version=settings.PROJECT_VERSION,
|
19
22
|
)
|
23
|
+
|
24
|
+
|
25
|
+
@router_api.get("/settings/")
|
26
|
+
async def view_settings(user: UserOAuth = Depends(current_active_superuser)):
|
27
|
+
settings = Inject(get_settings)
|
28
|
+
return settings.get_sanitized()
|
@@ -250,9 +250,18 @@ async def apply_workflow(
|
|
250
250
|
db: AsyncSession = Depends(get_async_db),
|
251
251
|
) -> Optional[ApplyWorkflowReadV1]:
|
252
252
|
|
253
|
+
settings = Inject(get_settings)
|
254
|
+
if settings.FRACTAL_API_V1_MODE == "include_without_submission":
|
255
|
+
raise HTTPException(
|
256
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
257
|
+
detail=(
|
258
|
+
"Legacy API is still accessible, "
|
259
|
+
"but the submission of legacy jobs is not available."
|
260
|
+
),
|
261
|
+
)
|
262
|
+
|
253
263
|
# Remove non-submitted V1 jobs from the app state when the list grows
|
254
264
|
# beyond a threshold
|
255
|
-
settings = Inject(get_settings)
|
256
265
|
if (
|
257
266
|
len(request.app.state.jobsV1)
|
258
267
|
> settings.FRACTAL_API_MAX_JOB_LIST_LENGTH
|
@@ -1,120 +1,132 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
"""
|
2
|
+
Wrap `tar` compression command.
|
3
|
+
|
4
|
+
This module is used both locally (in the environment where `fractal-server`
|
5
|
+
is running) and remotely (as a standalon Python module, executed over SSH).
|
6
|
+
|
7
|
+
This is a twin-module of `extract_archive.py`.
|
8
|
+
|
9
|
+
The reason for using the `tar` command via `subprocess` rather than Python
|
10
|
+
built-in `tarfile` library has to do with performance issues we observed
|
11
|
+
when handling files which were just created within a SLURM job, and in the
|
12
|
+
context of a CephFS filesystem.
|
13
|
+
"""
|
14
|
+
import shutil
|
3
15
|
import sys
|
4
|
-
import tarfile
|
5
|
-
import time
|
6
16
|
from pathlib import Path
|
7
|
-
from typing import Optional
|
8
17
|
|
18
|
+
from fractal_server.app.runner.run_subprocess import run_subprocess
|
19
|
+
from fractal_server.logger import get_logger
|
20
|
+
from fractal_server.logger import set_logger
|
21
|
+
|
22
|
+
|
23
|
+
def copy_subfolder(src: Path, dest: Path, logger_name: str):
|
24
|
+
cmd_cp = f"cp -r {src.as_posix()} {dest.as_posix()}"
|
25
|
+
logger = get_logger(logger_name=logger_name)
|
26
|
+
logger.debug(f"{cmd_cp=}")
|
27
|
+
res = run_subprocess(cmd=cmd_cp, logger_name=logger_name)
|
28
|
+
return res
|
29
|
+
|
30
|
+
|
31
|
+
def create_tar_archive(
|
32
|
+
tarfile_path: Path,
|
33
|
+
subfolder_path_tmp_copy: Path,
|
34
|
+
logger_name: str,
|
35
|
+
remote_to_local: bool,
|
36
|
+
):
|
37
|
+
logger = get_logger(logger_name)
|
38
|
+
|
39
|
+
if remote_to_local:
|
40
|
+
exclude_options = "--exclude *sbatch --exclude *_in_*.pickle "
|
41
|
+
else:
|
42
|
+
exclude_options = ""
|
43
|
+
|
44
|
+
cmd_tar = (
|
45
|
+
f"tar czf {tarfile_path} "
|
46
|
+
f"{exclude_options} "
|
47
|
+
f"--directory={subfolder_path_tmp_copy.as_posix()} "
|
48
|
+
"."
|
49
|
+
)
|
50
|
+
logger.debug(f"cmd tar:\n{cmd_tar}")
|
51
|
+
run_subprocess(cmd=cmd_tar, logger_name=logger_name)
|
9
52
|
|
10
|
-
# COMPRESS_FOLDER_MODALITY = "python"
|
11
|
-
COMPRESS_FOLDER_MODALITY = "cp-tar-rmtree"
|
12
53
|
|
54
|
+
def remove_temp_subfolder(subfolder_path_tmp_copy: Path, logger_name: str):
|
55
|
+
logger = get_logger(logger_name)
|
56
|
+
try:
|
57
|
+
logger.debug(f"Now remove {subfolder_path_tmp_copy}")
|
58
|
+
shutil.rmtree(subfolder_path_tmp_copy)
|
59
|
+
except Exception as e:
|
60
|
+
logger.debug(f"ERROR during shutil.rmtree: {e}")
|
13
61
|
|
14
|
-
def _filter(info: tarfile.TarInfo) -> Optional[tarfile.TarInfo]:
|
15
|
-
if info.name.endswith(".pickle"):
|
16
|
-
filename = info.name.split("/")[-1]
|
17
|
-
parts = filename.split("_")
|
18
|
-
if len(parts) == 3 and parts[1] == "in":
|
19
|
-
return None
|
20
|
-
elif len(parts) == 5 and parts[3] == "in":
|
21
|
-
return None
|
22
|
-
elif info.name.endswith("slurm_submit.sbatch"):
|
23
|
-
return None
|
24
|
-
return info
|
25
62
|
|
63
|
+
def compress_folder(
|
64
|
+
subfolder_path: Path, remote_to_local: bool = False
|
65
|
+
) -> str:
|
66
|
+
"""
|
67
|
+
Compress e.g. `/path/archive` into `/path/archive.tar.gz`
|
26
68
|
|
27
|
-
|
28
|
-
|
29
|
-
"Expected use:\n"
|
30
|
-
"python -m fractal_server.app.runner.compress_folder "
|
31
|
-
"path/to/folder"
|
32
|
-
)
|
69
|
+
Note that `/path/archive.tar.gz` may already exist. In this case, it will
|
70
|
+
be overwritten.
|
33
71
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
72
|
+
Args:
|
73
|
+
subfolder_path: Absolute path to the folder to compress.
|
74
|
+
remote_to_local: If `True`, exclude some files from the tar.gz archive.
|
75
|
+
|
76
|
+
Returns:
|
77
|
+
Absolute path to the tar.gz archive.
|
78
|
+
"""
|
38
79
|
|
39
|
-
|
40
|
-
|
41
|
-
print("[compress_folder.py] START")
|
42
|
-
print(f"[compress_folder.py] {COMPRESS_FOLDER_MODALITY=}")
|
43
|
-
print(f"[compress_folder.py] {subfolder_path=}")
|
80
|
+
logger_name = "compress_folder"
|
81
|
+
logger = set_logger(logger_name)
|
44
82
|
|
45
|
-
|
83
|
+
logger.debug("START")
|
84
|
+
logger.debug(f"{subfolder_path=}")
|
85
|
+
parent_dir = subfolder_path.parent
|
46
86
|
subfolder_name = subfolder_path.name
|
47
|
-
tarfile_path = (
|
48
|
-
|
49
|
-
|
50
|
-
if COMPRESS_FOLDER_MODALITY == "python":
|
51
|
-
raise NotImplementedError()
|
52
|
-
with tarfile.open(tarfile_path, "w:gz") as tar:
|
53
|
-
tar.add(
|
54
|
-
subfolder_path,
|
55
|
-
arcname=".", # ????
|
56
|
-
recursive=True,
|
57
|
-
filter=_filter,
|
58
|
-
)
|
59
|
-
elif COMPRESS_FOLDER_MODALITY == "cp-tar-rmtree":
|
60
|
-
import shutil
|
61
|
-
import time
|
62
|
-
|
63
|
-
subfolder_path_tmp_copy = (
|
64
|
-
subfolder_path.parent / f"{subfolder_path.name}_copy"
|
65
|
-
)
|
87
|
+
tarfile_path = (parent_dir / f"{subfolder_name}.tar.gz").as_posix()
|
88
|
+
logger.debug(f"{tarfile_path=}")
|
66
89
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
)
|
74
|
-
res = subprocess.run( # nosec
|
75
|
-
shlex.split(cmd_cp),
|
76
|
-
check=True,
|
77
|
-
capture_output=True,
|
78
|
-
encoding="utf-8",
|
90
|
+
subfolder_path_tmp_copy = (
|
91
|
+
subfolder_path.parent / f"{subfolder_path.name}_copy"
|
92
|
+
)
|
93
|
+
try:
|
94
|
+
copy_subfolder(
|
95
|
+
subfolder_path, subfolder_path_tmp_copy, logger_name=logger_name
|
79
96
|
)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
f"{tarfile_path} "
|
86
|
-
"--exclude *sbatch --exclude *_in_*.pickle "
|
87
|
-
f"--directory={subfolder_path_tmp_copy.as_posix()} "
|
88
|
-
"."
|
97
|
+
create_tar_archive(
|
98
|
+
tarfile_path,
|
99
|
+
subfolder_path_tmp_copy,
|
100
|
+
logger_name=logger_name,
|
101
|
+
remote_to_local=remote_to_local,
|
89
102
|
)
|
103
|
+
return tarfile_path
|
90
104
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
shlex.split(cmd_tar),
|
95
|
-
capture_output=True,
|
96
|
-
encoding="utf-8",
|
97
|
-
)
|
98
|
-
t1 = time.perf_counter()
|
99
|
-
t_1 = time.perf_counter()
|
100
|
-
print(f"[compress_folder.py] tar END - elapsed: {t1-t0:.3f} s")
|
105
|
+
except Exception as e:
|
106
|
+
logger.debug(f"ERROR: {e}")
|
107
|
+
sys.exit(1)
|
101
108
|
|
102
|
-
|
109
|
+
finally:
|
110
|
+
remove_temp_subfolder(subfolder_path_tmp_copy, logger_name=logger_name)
|
103
111
|
|
104
|
-
if res.returncode != 0:
|
105
|
-
print("[compress_folder.py] ERROR in tar")
|
106
|
-
print(f"[compress_folder.py] tar stdout:\n{res.stdout}")
|
107
|
-
print(f"[compress_folder.py] tar stderr:\n{res.stderr}")
|
108
112
|
|
109
|
-
|
110
|
-
|
113
|
+
def main(sys_argv: list[str]):
|
114
|
+
|
115
|
+
help_msg = (
|
116
|
+
"Expected use:\n"
|
117
|
+
"python -m fractal_server.app.runner.compress_folder "
|
118
|
+
"path/to/folder [--remote-to-local]\n"
|
119
|
+
)
|
120
|
+
num_args = len(sys_argv[1:])
|
121
|
+
if num_args == 0:
|
122
|
+
sys.exit(f"Invalid argument.\n{help_msg}\nProvided: {sys_argv[1:]=}")
|
123
|
+
elif num_args == 1:
|
124
|
+
compress_folder(subfolder_path=Path(sys_argv[1]))
|
125
|
+
elif num_args == 2 and sys_argv[2] == "--remote-to-local":
|
126
|
+
compress_folder(subfolder_path=Path(sys_argv[1]), remote_to_local=True)
|
127
|
+
else:
|
128
|
+
sys.exit(f"Invalid argument.\n{help_msg}\nProvided: {sys_argv[1:]=}")
|
111
129
|
|
112
|
-
t0 = time.perf_counter()
|
113
|
-
shutil.rmtree(subfolder_path_tmp_copy)
|
114
|
-
t1 = time.perf_counter()
|
115
|
-
print(
|
116
|
-
f"[compress_folder.py] shutil.rmtree END - elapsed: {t1-t0:.3f} s"
|
117
|
-
)
|
118
130
|
|
119
|
-
|
120
|
-
|
131
|
+
if __name__ == "__main__":
|
132
|
+
main(sys.argv)
|
@@ -62,6 +62,8 @@ class _SlurmConfigSet(BaseModel, extra=Extra.forbid):
|
|
62
62
|
time: Optional[str]
|
63
63
|
account: Optional[str]
|
64
64
|
extra_lines: Optional[list[str]]
|
65
|
+
pre_submission_commands: Optional[list[str]]
|
66
|
+
gpus: Optional[str]
|
65
67
|
|
66
68
|
|
67
69
|
class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
|
@@ -219,6 +221,7 @@ class SlurmConfig(BaseModel, extra=Extra.forbid):
|
|
219
221
|
constraint: Corresponds to SLURM option.
|
220
222
|
gres: Corresponds to SLURM option.
|
221
223
|
account: Corresponds to SLURM option.
|
224
|
+
gpus: Corresponds to SLURM option.
|
222
225
|
time: Corresponds to SLURM option (WARNING: not fully supported).
|
223
226
|
prefix: Prefix of configuration lines in SLURM submission scripts.
|
224
227
|
shebang_line: Shebang line for SLURM submission scripts.
|
@@ -240,6 +243,8 @@ class SlurmConfig(BaseModel, extra=Extra.forbid):
|
|
240
243
|
Key-value pairs to be included as `export`-ed variables in SLURM
|
241
244
|
submission script, after prepending values with the user's cache
|
242
245
|
directory.
|
246
|
+
pre_submission_commands: List of commands to be prepended to the sbatch
|
247
|
+
command.
|
243
248
|
"""
|
244
249
|
|
245
250
|
# Required SLURM parameters (note that the integer attributes are those
|
@@ -254,6 +259,7 @@ class SlurmConfig(BaseModel, extra=Extra.forbid):
|
|
254
259
|
job_name: Optional[str] = None
|
255
260
|
constraint: Optional[str] = None
|
256
261
|
gres: Optional[str] = None
|
262
|
+
gpus: Optional[str] = None
|
257
263
|
time: Optional[str] = None
|
258
264
|
account: Optional[str] = None
|
259
265
|
|
@@ -274,6 +280,8 @@ class SlurmConfig(BaseModel, extra=Extra.forbid):
|
|
274
280
|
target_num_jobs: int
|
275
281
|
max_num_jobs: int
|
276
282
|
|
283
|
+
pre_submission_commands: list[str] = Field(default_factory=list)
|
284
|
+
|
277
285
|
def _sorted_extra_lines(self) -> list[str]:
|
278
286
|
"""
|
279
287
|
Return a copy of `self.extra_lines`, where lines starting with
|
@@ -340,7 +348,14 @@ class SlurmConfig(BaseModel, extra=Extra.forbid):
|
|
340
348
|
f"{self.prefix} --cpus-per-task={self.cpus_per_task}",
|
341
349
|
f"{self.prefix} --mem={mem_per_job_MB}M",
|
342
350
|
]
|
343
|
-
for key in [
|
351
|
+
for key in [
|
352
|
+
"job_name",
|
353
|
+
"constraint",
|
354
|
+
"gres",
|
355
|
+
"gpus",
|
356
|
+
"time",
|
357
|
+
"account",
|
358
|
+
]:
|
344
359
|
value = getattr(self, key)
|
345
360
|
if value is not None:
|
346
361
|
# Handle the `time` parameter
|
@@ -13,7 +13,6 @@
|
|
13
13
|
import json
|
14
14
|
import math
|
15
15
|
import sys
|
16
|
-
import tarfile
|
17
16
|
import threading
|
18
17
|
import time
|
19
18
|
from concurrent.futures import Future
|
@@ -38,9 +37,11 @@ from ...slurm._slurm_config import SlurmConfig
|
|
38
37
|
from .._batching import heuristics
|
39
38
|
from ._executor_wait_thread import FractalSlurmWaitThread
|
40
39
|
from fractal_server.app.runner.components import _COMPONENT_KEY_
|
40
|
+
from fractal_server.app.runner.compress_folder import compress_folder
|
41
41
|
from fractal_server.app.runner.exceptions import JobExecutionError
|
42
42
|
from fractal_server.app.runner.exceptions import TaskExecutionError
|
43
43
|
from fractal_server.app.runner.executors.slurm.ssh._slurm_job import SlurmJob
|
44
|
+
from fractal_server.app.runner.extract_archive import extract_archive
|
44
45
|
from fractal_server.config import get_settings
|
45
46
|
from fractal_server.logger import set_logger
|
46
47
|
from fractal_server.ssh._fabric import FractalSSH
|
@@ -822,17 +823,12 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
822
823
|
|
823
824
|
# Create compressed subfolder archive (locally)
|
824
825
|
local_subfolder = self.workflow_dir_local / subfolder_name
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
).as_posix()
|
826
|
+
tarfile_path_local = compress_folder(local_subfolder)
|
827
|
+
tarfile_name = Path(tarfile_path_local).name
|
828
|
+
logger.info(f"Subfolder archive created at {tarfile_path_local}")
|
829
829
|
tarfile_path_remote = (
|
830
830
|
self.workflow_dir_remote / tarfile_name
|
831
831
|
).as_posix()
|
832
|
-
with tarfile.open(tarfile_path_local, "w:gz") as tar:
|
833
|
-
for this_file in local_subfolder.glob("*"):
|
834
|
-
tar.add(this_file, arcname=this_file.name)
|
835
|
-
logger.info(f"Subfolder archive created at {tarfile_path_local}")
|
836
832
|
|
837
833
|
# Transfer archive
|
838
834
|
t_0_put = time.perf_counter()
|
@@ -873,9 +869,22 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
873
869
|
|
874
870
|
# Submit job to SLURM, and get jobid
|
875
871
|
sbatch_command = f"sbatch --parsable {job.slurm_script_remote}"
|
876
|
-
|
877
|
-
|
878
|
-
|
872
|
+
pre_submission_cmds = job.slurm_config.pre_submission_commands
|
873
|
+
if len(pre_submission_cmds) == 0:
|
874
|
+
sbatch_stdout = self.fractal_ssh.run_command(cmd=sbatch_command)
|
875
|
+
else:
|
876
|
+
logger.debug(f"Now using {pre_submission_cmds=}")
|
877
|
+
script_lines = pre_submission_cmds + [sbatch_command]
|
878
|
+
script_content = "\n".join(script_lines)
|
879
|
+
script_content = f"{script_content}\n"
|
880
|
+
script_path_remote = (
|
881
|
+
f"{job.slurm_script_remote.as_posix()}_wrapper.sh"
|
882
|
+
)
|
883
|
+
self.fractal_ssh.write_remote_file(
|
884
|
+
path=script_path_remote, content=script_content
|
885
|
+
)
|
886
|
+
cmd = f"bash {script_path_remote}"
|
887
|
+
sbatch_stdout = self.fractal_ssh.run_command(cmd=cmd)
|
879
888
|
|
880
889
|
# Extract SLURM job ID from stdout
|
881
890
|
try:
|
@@ -885,7 +894,9 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
885
894
|
error_msg = (
|
886
895
|
f"Submit command `{sbatch_command}` returned "
|
887
896
|
f"`{stdout=}` which cannot be cast to an integer "
|
888
|
-
f"SLURM-job ID
|
897
|
+
f"SLURM-job ID.\n"
|
898
|
+
f"Note that {pre_submission_cmds=}.\n"
|
899
|
+
f"Original error:\n{str(e)}"
|
889
900
|
)
|
890
901
|
logger.error(error_msg)
|
891
902
|
raise JobExecutionError(info=error_msg)
|
@@ -1222,7 +1233,8 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1222
1233
|
tar_command = (
|
1223
1234
|
f"{self.python_remote} "
|
1224
1235
|
"-m fractal_server.app.runner.compress_folder "
|
1225
|
-
f"{(self.workflow_dir_remote / subfolder_name).as_posix()}"
|
1236
|
+
f"{(self.workflow_dir_remote / subfolder_name).as_posix()} "
|
1237
|
+
"--remote-to-local"
|
1226
1238
|
)
|
1227
1239
|
stdout = self.fractal_ssh.run_command(cmd=tar_command)
|
1228
1240
|
print(stdout)
|
@@ -1240,8 +1252,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
|
|
1240
1252
|
)
|
1241
1253
|
|
1242
1254
|
# Extract tarfile locally
|
1243
|
-
|
1244
|
-
tar.extractall(path=(self.workflow_dir_local / subfolder_name))
|
1255
|
+
extract_archive(Path(tarfile_path_local))
|
1245
1256
|
|
1246
1257
|
t_1 = time.perf_counter()
|
1247
1258
|
logger.info("[_get_subfolder_sftp] End - " f"elapsed: {t_1-t_0:.3f} s")
|
@@ -1121,6 +1121,12 @@ class FractalSlurmExecutor(SlurmExecutor):
|
|
1121
1121
|
slurm_err_path=str(job.slurm_stderr),
|
1122
1122
|
)
|
1123
1123
|
|
1124
|
+
# Print warning for ignored parameter
|
1125
|
+
if len(job.slurm_config.pre_submission_commands) > 0:
|
1126
|
+
logger.warning(
|
1127
|
+
f"Ignoring {job.slurm_config.pre_submission_commands=}."
|
1128
|
+
)
|
1129
|
+
|
1124
1130
|
# Submit job via sbatch, and retrieve jobid
|
1125
1131
|
|
1126
1132
|
# Write script content to a job.slurm_script
|
@@ -1,7 +1,22 @@
|
|
1
|
+
"""
|
2
|
+
Wrap `tar` extraction command.
|
3
|
+
|
4
|
+
This module is used both locally (in the environment where `fractal-server`
|
5
|
+
is running) and remotely (as a standalon Python module, executed over SSH).
|
6
|
+
|
7
|
+
This is a twin-module of `compress_folder.py`.
|
8
|
+
|
9
|
+
The reason for using the `tar` command via `subprocess` rather than Python
|
10
|
+
built-in `tarfile` library has to do with performance issues we observed
|
11
|
+
when handling files which were just created within a SLURM job, and in the
|
12
|
+
context of a CephFS filesystem.
|
13
|
+
"""
|
1
14
|
import sys
|
2
|
-
import tarfile
|
3
15
|
from pathlib import Path
|
4
16
|
|
17
|
+
from .run_subprocess import run_subprocess
|
18
|
+
from fractal_server.logger import set_logger
|
19
|
+
|
5
20
|
|
6
21
|
def _remove_suffix(*, string: str, suffix: str) -> str:
|
7
22
|
if string.endswith(suffix):
|
@@ -10,29 +25,61 @@ def _remove_suffix(*, string: str, suffix: str) -> str:
|
|
10
25
|
raise ValueError(f"Cannot remove {suffix=} from {string=}.")
|
11
26
|
|
12
27
|
|
13
|
-
|
28
|
+
def extract_archive(archive_path: Path):
|
29
|
+
"""
|
30
|
+
Extract e.g. `/path/archive.tar.gz` archive into `/path/archive` folder
|
31
|
+
|
32
|
+
Note that `/path/archive` may already exist. In this case, files with
|
33
|
+
the same name are overwritten and new files are added.
|
34
|
+
|
35
|
+
Arguments:
|
36
|
+
archive_path: Absolute path to the archive file.
|
37
|
+
"""
|
38
|
+
|
39
|
+
logger_name = "extract_archive"
|
40
|
+
logger = set_logger(logger_name)
|
41
|
+
|
42
|
+
logger.debug("START")
|
43
|
+
logger.debug(f"{archive_path.as_posix()=}")
|
44
|
+
|
45
|
+
# Check archive_path is valid
|
46
|
+
if not archive_path.exists():
|
47
|
+
sys.exit(f"Missing file {archive_path.as_posix()}.")
|
48
|
+
|
49
|
+
# Prepare subfolder path
|
50
|
+
parent_dir = archive_path.parent
|
51
|
+
subfolder_name = _remove_suffix(string=archive_path.name, suffix=".tar.gz")
|
52
|
+
subfolder_path = parent_dir / subfolder_name
|
53
|
+
logger.debug(f"{subfolder_path.as_posix()=}")
|
54
|
+
|
55
|
+
# Create subfolder
|
56
|
+
subfolder_path.mkdir(exist_ok=True)
|
57
|
+
|
58
|
+
# Run tar command
|
59
|
+
cmd_tar = (
|
60
|
+
f"tar -xzvf {archive_path} "
|
61
|
+
f"--directory={subfolder_path.as_posix()} "
|
62
|
+
"."
|
63
|
+
)
|
64
|
+
logger.debug(f"{cmd_tar=}")
|
65
|
+
run_subprocess(cmd=cmd_tar, logger_name=logger_name)
|
66
|
+
|
67
|
+
logger.debug("END")
|
68
|
+
|
69
|
+
|
70
|
+
def main(sys_argv: list[str]):
|
14
71
|
help_msg = (
|
15
72
|
"Expected use:\n"
|
16
73
|
"python -m fractal_server.app.runner.extract_archive "
|
17
74
|
"path/to/archive.tar.gz"
|
18
75
|
)
|
19
76
|
|
20
|
-
if len(
|
21
|
-
|
22
|
-
|
23
|
-
)
|
24
|
-
|
25
|
-
raise ValueError(
|
26
|
-
f"Invalid argument.\n{help_msg}\nProvided: {sys.argv=}"
|
27
|
-
)
|
28
|
-
|
29
|
-
tarfile_path = Path(sys.argv[1])
|
30
|
-
|
31
|
-
print(f"[extract_archive.py] {tarfile_path=}")
|
77
|
+
if len(sys_argv[1:]) != 1 or not sys_argv[1].endswith(".tar.gz"):
|
78
|
+
sys.exit(f"Invalid argument.\n{help_msg}\nProvided: {sys_argv[1:]=}")
|
79
|
+
else:
|
80
|
+
tarfile_path = Path(sys_argv[1])
|
81
|
+
extract_archive(tarfile_path)
|
32
82
|
|
33
|
-
job_folder = tarfile_path.parent
|
34
|
-
subfolder_name = _remove_suffix(string=tarfile_path.name, suffix=".tar.gz")
|
35
|
-
with tarfile.open(tarfile_path) as tar:
|
36
|
-
tar.extractall(path=Path(job_folder, subfolder_name).as_posix())
|
37
83
|
|
38
|
-
|
84
|
+
if __name__ == "__main__":
|
85
|
+
main(sys.argv)
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import shlex
|
2
|
+
import subprocess # nosec
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from fractal_server.logger import get_logger
|
6
|
+
|
7
|
+
|
8
|
+
def run_subprocess(
|
9
|
+
cmd: str, logger_name: Optional[str] = None
|
10
|
+
) -> subprocess.CompletedProcess:
|
11
|
+
logger = get_logger(logger_name)
|
12
|
+
try:
|
13
|
+
res = subprocess.run( # nosec
|
14
|
+
shlex.split(cmd), check=True, capture_output=True, encoding="utf-8"
|
15
|
+
)
|
16
|
+
return res
|
17
|
+
except subprocess.CalledProcessError as e:
|
18
|
+
logger.debug(
|
19
|
+
f"Command '{e.cmd}' returned non-zero exit status {e.returncode}."
|
20
|
+
)
|
21
|
+
logger.debug(f"stdout: {e.stdout}")
|
22
|
+
logger.debug(f"stderr: {e.stderr}")
|
23
|
+
raise e
|
24
|
+
except Exception as e:
|
25
|
+
logger.debug(f"An error occurred while running command: {cmd}")
|
26
|
+
logger.debug(str(e))
|
27
|
+
raise e
|
@@ -36,8 +36,8 @@ from ._local import process_workflow as local_process_workflow
|
|
36
36
|
from ._local_experimental import (
|
37
37
|
process_workflow as local_experimental_process_workflow,
|
38
38
|
)
|
39
|
-
from ._slurm import process_workflow as slurm_sudo_process_workflow
|
40
39
|
from ._slurm_ssh import process_workflow as slurm_ssh_process_workflow
|
40
|
+
from ._slurm_sudo import process_workflow as slurm_sudo_process_workflow
|
41
41
|
from .handle_failed_job import assemble_filters_failed_job
|
42
42
|
from .handle_failed_job import assemble_history_failed_job
|
43
43
|
from .handle_failed_job import assemble_images_failed_job
|
File without changes
|
@@ -18,8 +18,6 @@ from fractal_server.app.runner.executors.slurm._slurm_config import (
|
|
18
18
|
|
19
19
|
def get_slurm_config(
|
20
20
|
wftask: WorkflowTaskV2,
|
21
|
-
workflow_dir_local: Path,
|
22
|
-
workflow_dir_remote: Path,
|
23
21
|
which_type: Literal["non_parallel", "parallel"],
|
24
22
|
config_path: Optional[Path] = None,
|
25
23
|
) -> SlurmConfig:
|
@@ -43,13 +41,6 @@ def get_slurm_config(
|
|
43
41
|
wftask:
|
44
42
|
WorkflowTask for which the SLURM configuration is is to be
|
45
43
|
prepared.
|
46
|
-
workflow_dir_local:
|
47
|
-
Server-owned directory to store all task-execution-related relevant
|
48
|
-
files (inputs, outputs, errors, and all meta files related to the
|
49
|
-
job execution). Note: users cannot write directly to this folder.
|
50
|
-
workflow_dir_remote:
|
51
|
-
User-side directory with the same scope as `workflow_dir_local`,
|
52
|
-
and where a user can write.
|
53
44
|
config_path:
|
54
45
|
Path of a Fractal SLURM configuration file; if `None`, use
|
55
46
|
`FRACTAL_SLURM_CONFIG_FILE` variable from settings.
|
@@ -99,13 +90,13 @@ def get_slurm_config(
|
|
99
90
|
# 1. This block of definitions takes priority over other definitions from
|
100
91
|
# slurm_env which are not under the `needs_gpu` subgroup
|
101
92
|
# 2. This block of definitions has lower priority than whatever comes next
|
102
|
-
# (i.e. from WorkflowTask.
|
93
|
+
# (i.e. from WorkflowTask.meta_parallel).
|
103
94
|
if wftask_meta is not None:
|
104
95
|
needs_gpu = wftask_meta.get("needs_gpu", False)
|
105
96
|
else:
|
106
97
|
needs_gpu = False
|
107
98
|
logger.debug(f"[get_slurm_config] {needs_gpu=}")
|
108
|
-
if needs_gpu
|
99
|
+
if needs_gpu:
|
109
100
|
for key, value in slurm_env.gpu_slurm_config.dict(
|
110
101
|
exclude_unset=True, exclude={"mem"}
|
111
102
|
).items():
|
@@ -143,9 +134,9 @@ def get_slurm_config(
|
|
143
134
|
)
|
144
135
|
logger.error(error_msg)
|
145
136
|
raise SlurmConfigError(error_msg)
|
146
|
-
for key in ["time", "gres", "constraint"]:
|
137
|
+
for key in ["time", "gres", "gpus", "constraint"]:
|
147
138
|
value = wftask_meta.get(key, None)
|
148
|
-
if value:
|
139
|
+
if value is not None:
|
149
140
|
slurm_dict[key] = value
|
150
141
|
if wftask_meta is not None:
|
151
142
|
extra_lines = wftask_meta.get("extra_lines", [])
|
@@ -17,8 +17,10 @@ from pathlib import Path
|
|
17
17
|
from typing import Literal
|
18
18
|
|
19
19
|
from ...task_files import get_task_file_paths
|
20
|
-
from .get_slurm_config import get_slurm_config
|
21
20
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
21
|
+
from fractal_server.app.runner.v2._slurm_common.get_slurm_config import (
|
22
|
+
get_slurm_config,
|
23
|
+
)
|
22
24
|
|
23
25
|
|
24
26
|
def _slurm_submit_setup(
|
@@ -62,8 +64,6 @@ def _slurm_submit_setup(
|
|
62
64
|
# Get SlurmConfig object
|
63
65
|
slurm_config = get_slurm_config(
|
64
66
|
wftask=wftask,
|
65
|
-
workflow_dir_local=workflow_dir_local,
|
66
|
-
workflow_dir_remote=workflow_dir_remote,
|
67
67
|
which_type=which_type,
|
68
68
|
)
|
69
69
|
|
@@ -17,8 +17,10 @@ from pathlib import Path
|
|
17
17
|
from typing import Literal
|
18
18
|
|
19
19
|
from ...task_files import get_task_file_paths
|
20
|
-
from .get_slurm_config import get_slurm_config
|
21
20
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
21
|
+
from fractal_server.app.runner.v2._slurm_common.get_slurm_config import (
|
22
|
+
get_slurm_config,
|
23
|
+
)
|
22
24
|
|
23
25
|
|
24
26
|
def _slurm_submit_setup(
|
@@ -62,8 +64,6 @@ def _slurm_submit_setup(
|
|
62
64
|
# Get SlurmConfig object
|
63
65
|
slurm_config = get_slurm_config(
|
64
66
|
wftask=wftask,
|
65
|
-
workflow_dir_local=workflow_dir_local,
|
66
|
-
workflow_dir_remote=workflow_dir_remote,
|
67
67
|
which_type=which_type,
|
68
68
|
)
|
69
69
|
|
fractal_server/config.py
CHANGED
@@ -546,7 +546,9 @@ class Settings(BaseSettings):
|
|
546
546
|
attribute in their input-arguments JSON file.
|
547
547
|
"""
|
548
548
|
|
549
|
-
FRACTAL_API_V1_MODE: Literal[
|
549
|
+
FRACTAL_API_V1_MODE: Literal[
|
550
|
+
"include", "include_without_submission", "exclude"
|
551
|
+
] = "include"
|
550
552
|
"""
|
551
553
|
Whether to include the v1 API.
|
552
554
|
"""
|
@@ -685,6 +687,25 @@ class Settings(BaseSettings):
|
|
685
687
|
self.check_db()
|
686
688
|
self.check_runner()
|
687
689
|
|
690
|
+
def get_sanitized(self) -> dict:
|
691
|
+
def _must_be_sanitized(string) -> bool:
|
692
|
+
if not string.upper().startswith("FRACTAL") or any(
|
693
|
+
s in string.upper()
|
694
|
+
for s in ["PASSWORD", "SECRET", "PWD", "TOKEN"]
|
695
|
+
):
|
696
|
+
return True
|
697
|
+
else:
|
698
|
+
return False
|
699
|
+
|
700
|
+
sanitized_settings = {}
|
701
|
+
for k, v in self.dict().items():
|
702
|
+
if _must_be_sanitized(k):
|
703
|
+
sanitized_settings[k] = "***"
|
704
|
+
else:
|
705
|
+
sanitized_settings[k] = v
|
706
|
+
|
707
|
+
return sanitized_settings
|
708
|
+
|
688
709
|
|
689
710
|
def get_settings(settings=Settings()) -> Settings:
|
690
711
|
return settings
|
fractal_server/main.py
CHANGED
@@ -49,7 +49,7 @@ def collect_routers(app: FastAPI) -> None:
|
|
49
49
|
settings = Inject(get_settings)
|
50
50
|
|
51
51
|
app.include_router(router_api, prefix="/api")
|
52
|
-
if settings.FRACTAL_API_V1_MODE
|
52
|
+
if settings.FRACTAL_API_V1_MODE.startswith("include"):
|
53
53
|
app.include_router(router_api_v1, prefix="/api/v1")
|
54
54
|
app.include_router(
|
55
55
|
router_admin_v1, prefix="/admin/v1", tags=["V1 Admin area"]
|
fractal_server/ssh/_fabric.py
CHANGED
@@ -306,6 +306,28 @@ class FractalSSH(object):
|
|
306
306
|
cmd = f"rm -r {folder}"
|
307
307
|
self.run_command(cmd=cmd)
|
308
308
|
|
309
|
+
def write_remote_file(
|
310
|
+
self,
|
311
|
+
*,
|
312
|
+
path: str,
|
313
|
+
content: str,
|
314
|
+
lock_timeout: Optional[float] = None,
|
315
|
+
) -> None:
|
316
|
+
"""
|
317
|
+
Open a remote file via SFTP and write it.
|
318
|
+
|
319
|
+
Args:
|
320
|
+
path: Absolute path
|
321
|
+
contents: File contents
|
322
|
+
lock_timeout:
|
323
|
+
"""
|
324
|
+
actual_lock_timeout = self.default_lock_timeout
|
325
|
+
if lock_timeout is not None:
|
326
|
+
actual_lock_timeout = lock_timeout
|
327
|
+
with self.acquire_timeout(timeout=actual_lock_timeout):
|
328
|
+
with self.sftp().open(filename=path, mode="w") as f:
|
329
|
+
f.write(content)
|
330
|
+
|
309
331
|
|
310
332
|
def get_ssh_connection(
|
311
333
|
*,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
fractal_server/__init__.py,sha256=
|
1
|
+
fractal_server/__init__.py,sha256=RqyrrNV86fgXA3ZGY013Ddl0XCyYA-YaiOneKISOYXc,22
|
2
2
|
fractal_server/__main__.py,sha256=CocbzZooX1UtGqPi55GcHGNxnrJXFg5tUU5b3wyFCyo,4958
|
3
3
|
fractal_server/alembic.ini,sha256=MWwi7GzjzawI9cCAK1LW7NxIBQDUqD12-ptJoq5JpP0,3153
|
4
4
|
fractal_server/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -25,12 +25,12 @@ fractal_server/app/routes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
25
25
|
fractal_server/app/routes/admin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
26
|
fractal_server/app/routes/admin/v1.py,sha256=ShmsUFtfyRqP84QScqmRDVrJFpbR4p-8baLxAkI3n1U,13926
|
27
27
|
fractal_server/app/routes/admin/v2.py,sha256=JuG1qqVeQIgVJgPyqrB1053il22mGPGpKBiJi6zVsqQ,13687
|
28
|
-
fractal_server/app/routes/api/__init__.py,sha256=
|
28
|
+
fractal_server/app/routes/api/__init__.py,sha256=XlJUFd-0FossfyKyJti4dmwY6SMysQn1yiisMrNzgBE,615
|
29
29
|
fractal_server/app/routes/api/v1/__init__.py,sha256=Y2HQdG197J0a7DyQEE2jn53IfxD0EHGhzK1I2JZuEck,958
|
30
30
|
fractal_server/app/routes/api/v1/_aux_functions.py,sha256=CeaVrNVYs_lEbiJbu4uaTeeiajljeXfdq1iLkt5RoRo,12636
|
31
31
|
fractal_server/app/routes/api/v1/dataset.py,sha256=HRE-8vPmVkeXf7WFYkI19mDtbY-iJZeJ7PmMiV0LMgY,16923
|
32
32
|
fractal_server/app/routes/api/v1/job.py,sha256=217fGh7U37esib1JG8INpLhE0W88t9X0fFwCNVt2r_M,5313
|
33
|
-
fractal_server/app/routes/api/v1/project.py,sha256=
|
33
|
+
fractal_server/app/routes/api/v1/project.py,sha256=0DavnACBDr8-BHWGQ0YPfxVNJLsYmbuo-TeKJk1s3Hw,16436
|
34
34
|
fractal_server/app/routes/api/v1/task.py,sha256=udbKnenzc-Q10elYCVB9JmOPWATraa9tZi0AaByvWo0,6129
|
35
35
|
fractal_server/app/routes/api/v1/task_collection.py,sha256=82XBsJHlPiDPCbpLa-16ojKDpj2LYj9_jFSZt0t58bQ,8911
|
36
36
|
fractal_server/app/routes/api/v1/workflow.py,sha256=7r9IoIevg_rvYCrerMOsIsUabSOQatxdPCfLdkP0dRs,10942
|
@@ -57,24 +57,25 @@ fractal_server/app/runner/.gitignore,sha256=ytzN_oyHWXrGU7iFAtoHSTUbM6Rn6kG0Zkdd
|
|
57
57
|
fractal_server/app/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
58
58
|
fractal_server/app/runner/async_wrap.py,sha256=_O6f8jftKYXG_DozkmlrDBhoiK9QhE9MablOyECq2_M,829
|
59
59
|
fractal_server/app/runner/components.py,sha256=ZF8ct_Ky5k8IAcrmpYOZ-bc6OBgdELEighYVqFDEbZg,119
|
60
|
-
fractal_server/app/runner/compress_folder.py,sha256=
|
60
|
+
fractal_server/app/runner/compress_folder.py,sha256=zmxo2EFkSaO4h3GnMRi9DYaf62bxy4zznZZGfmq-n68,3975
|
61
61
|
fractal_server/app/runner/exceptions.py,sha256=_qZ_t8O4umAdJ1ikockiF5rDJuxnEskrGrLjZcnQl7A,4159
|
62
62
|
fractal_server/app/runner/executors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
63
63
|
fractal_server/app/runner/executors/slurm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
64
|
fractal_server/app/runner/executors/slurm/_batching.py,sha256=3mfeFuYm3UA4EXh4VWuqZTF-dcINECZgTHoPOaOszDo,8840
|
65
|
-
fractal_server/app/runner/executors/slurm/_slurm_config.py,sha256=
|
65
|
+
fractal_server/app/runner/executors/slurm/_slurm_config.py,sha256=iyhtDi1qveqq7I4S1tycVKsp3VfyocvBgGugYDpOzAs,16069
|
66
66
|
fractal_server/app/runner/executors/slurm/remote.py,sha256=wLziIsGdSMiO-jIXM8x77JRK82g_2hx0iBKTiMghuIo,5852
|
67
67
|
fractal_server/app/runner/executors/slurm/ssh/__init__.py,sha256=Cjn1rYvljddi96tAwS-qqGkNfOcfPzjChdaEZEObCcM,65
|
68
68
|
fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py,sha256=jM4G-wiHynZhNERusVGLtDTepJDiYjCDloWZyflaMV0,3482
|
69
69
|
fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py,sha256=rwlqZzoGo4SAb4nSlFjsQJdaCgfM1J6YGcjb8yYxlqc,4506
|
70
|
-
fractal_server/app/runner/executors/slurm/ssh/executor.py,sha256=
|
70
|
+
fractal_server/app/runner/executors/slurm/ssh/executor.py,sha256=rfLEO6mN3sZvZYHqs3lmYvPYFGLmXyMPWl1Bg0mq-6k,56109
|
71
71
|
fractal_server/app/runner/executors/slurm/sudo/__init__.py,sha256=Cjn1rYvljddi96tAwS-qqGkNfOcfPzjChdaEZEObCcM,65
|
72
72
|
fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py,sha256=wAgwpVcr6JIslKHOuS0FhRa_6T1KCManyRJqA-fifzw,1909
|
73
73
|
fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py,sha256=z5LlhaiqAb8pHsF1WwdzXN39C5anQmwjo1rSQgtRAYE,4422
|
74
74
|
fractal_server/app/runner/executors/slurm/sudo/_subprocess_run_as_user.py,sha256=uZgmxP0ZneGpzTVt-GT-6EgNKUh1sW2-QH7LFYc1tNI,5132
|
75
|
-
fractal_server/app/runner/executors/slurm/sudo/executor.py,sha256=
|
76
|
-
fractal_server/app/runner/extract_archive.py,sha256=
|
75
|
+
fractal_server/app/runner/executors/slurm/sudo/executor.py,sha256=74jfNauDgQOdcILHcCLJM4Awm_SThsZYgc0Vwx0hnB8,48460
|
76
|
+
fractal_server/app/runner/extract_archive.py,sha256=tLpjDrX47OjTNhhoWvm6iNukg8KoieWyTb7ZfvE9eWU,2483
|
77
77
|
fractal_server/app/runner/filenames.py,sha256=9lwu3yB4C67yiijYw8XIKaLFn3mJUt6_TCyVFM_aZUQ,206
|
78
|
+
fractal_server/app/runner/run_subprocess.py,sha256=KTkJnWLrLQdR2WRJ3jGu0RBu4330L3mtCAE_B0wDx3M,818
|
78
79
|
fractal_server/app/runner/set_start_and_last_task_index.py,sha256=-q4zVybAj8ek2XlbENKlfOAJ39hT_zoJoZkqzDqiAMY,1254
|
79
80
|
fractal_server/app/runner/shutdown.py,sha256=I_o2iYKJwzku0L3E85ETjrve3QPECygR5xhhsAo5huM,2910
|
80
81
|
fractal_server/app/runner/task_files.py,sha256=sd_MpJ01C8c9QTO8GzGMidFGdlq_hXX_ARDRhd_YMnI,3762
|
@@ -89,7 +90,7 @@ fractal_server/app/runner/v1/_slurm/_submit_setup.py,sha256=KO9c694d318adoPQh9UG
|
|
89
90
|
fractal_server/app/runner/v1/_slurm/get_slurm_config.py,sha256=6pQNNx997bLIfLp0guF09t_O0ZYRXnbEGLktSAcKnic,5999
|
90
91
|
fractal_server/app/runner/v1/common.py,sha256=_L-vjLnWato80VdlB_BFN4G8P4jSM07u-5cnl1T3S34,3294
|
91
92
|
fractal_server/app/runner/v1/handle_failed_job.py,sha256=bHzScC_aIlU3q-bQxGW6rfWV4xbZ2tho_sktjsAs1no,4684
|
92
|
-
fractal_server/app/runner/v2/__init__.py,sha256=
|
93
|
+
fractal_server/app/runner/v2/__init__.py,sha256=nD4uFi-RGsN6JAmJNpV2dS603u8KqFuGwXZS8jIrf50,16917
|
93
94
|
fractal_server/app/runner/v2/_local/__init__.py,sha256=KTj14K6jH8fXGUi5P7u5_RqEE1zF4aXtgPxCKzw46iw,5971
|
94
95
|
fractal_server/app/runner/v2/_local/_local_config.py,sha256=9oi209Dlp35ANfxb_DISqmMKKc6DPaMsmYVWbZLseME,3630
|
95
96
|
fractal_server/app/runner/v2/_local/_submit_setup.py,sha256=MucNOo8Er0F5ZIwH7CnTeXgnFMc6d3pKPkv563QNVi0,1630
|
@@ -98,12 +99,12 @@ fractal_server/app/runner/v2/_local_experimental/__init__.py,sha256=53yS8a-l0dMT
|
|
98
99
|
fractal_server/app/runner/v2/_local_experimental/_local_config.py,sha256=QiS5ODe-iGmUQdIT8QgpbyMc7-ZpIRv1V_f2q3qfPQ8,3211
|
99
100
|
fractal_server/app/runner/v2/_local_experimental/_submit_setup.py,sha256=we7r-sQf0CJ9gxbfbgHcYdC6pKjx8eXweljIjthxkv8,1212
|
100
101
|
fractal_server/app/runner/v2/_local_experimental/executor.py,sha256=vcBKjireIIyF5WgIQLatD6ojlWEydbTwyIG0bcpIjys,5438
|
101
|
-
fractal_server/app/runner/v2/
|
102
|
-
fractal_server/app/runner/v2/
|
103
|
-
fractal_server/app/runner/v2/_slurm/get_slurm_config.py,sha256=btGmbZB0fO6bg2WujFxbGEV2iWzaMKbHgV1r2hm_4a0,6748
|
102
|
+
fractal_server/app/runner/v2/_slurm_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
103
|
+
fractal_server/app/runner/v2/_slurm_common/get_slurm_config.py,sha256=V47uckqA4Vp-7m5esDnTitekc-yabLhaZSlPj4jN_D8,6307
|
104
104
|
fractal_server/app/runner/v2/_slurm_ssh/__init__.py,sha256=1p6d_ppXBqRNPXPGxM8cmIOffEsfkEPEfvDeT-_90dE,3990
|
105
|
-
fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py,sha256=
|
106
|
-
fractal_server/app/runner/v2/
|
105
|
+
fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py,sha256=a5_FDPH_yxYmrjAjMRLgh_Y4DSG3mRslCLQodGM3-t4,2838
|
106
|
+
fractal_server/app/runner/v2/_slurm_sudo/__init__.py,sha256=q2fwiKqtNpXtfs5wUFQjwJxdYqKPPTbCy1ieBhhi-Bw,4316
|
107
|
+
fractal_server/app/runner/v2/_slurm_sudo/_submit_setup.py,sha256=a5_FDPH_yxYmrjAjMRLgh_Y4DSG3mRslCLQodGM3-t4,2838
|
107
108
|
fractal_server/app/runner/v2/deduplicate_list.py,sha256=-imwO7OB7ATADEnqVbTElUwoY0YIJCTf_SbWJNN9OZg,639
|
108
109
|
fractal_server/app/runner/v2/handle_failed_job.py,sha256=M1r3dnrbUMo_AI2qjaVuGhieMAyLh5gcvB10YOBpjvI,5415
|
109
110
|
fractal_server/app/runner/v2/merge_outputs.py,sha256=IHuHqbKmk97K35BFvTrKVBs60z3e_--OzXTnsvmA02c,1281
|
@@ -138,14 +139,14 @@ fractal_server/app/schemas/v2/task_collection.py,sha256=8PG1bOqkfQqORMN0brWf6mHD
|
|
138
139
|
fractal_server/app/schemas/v2/workflow.py,sha256=Zzx3e-qgkH8le0FUmAx9UrV5PWd7bj14PPXUh_zgZXM,1827
|
139
140
|
fractal_server/app/schemas/v2/workflowtask.py,sha256=atVuVN4aXsVEOmSd-vyg-8_8OnPmqx-gT75rXcn_AlQ,6552
|
140
141
|
fractal_server/app/security/__init__.py,sha256=2-QbwuR-nsuHM_uwKS_WzYvkhnuhO5jUv8UVROetyVk,11169
|
141
|
-
fractal_server/config.py,sha256=
|
142
|
+
fractal_server/config.py,sha256=KOa2jrsbx0H6zG2ItNZkLiKqbuOkV3aUYKFuIN3FIyE,24921
|
142
143
|
fractal_server/data_migrations/README.md,sha256=_3AEFvDg9YkybDqCLlFPdDmGJvr6Tw7HRI14aZ3LOIw,398
|
143
144
|
fractal_server/gunicorn_fractal.py,sha256=2AOkgxu-oQ-XB578_voT0VuhmAXFTmb0c-nYn1XLy_Q,1231
|
144
145
|
fractal_server/images/__init__.py,sha256=xO6jTLE4EZKO6cTDdJsBmK9cdeh9hFTaSbSuWgQg7y4,196
|
145
146
|
fractal_server/images/models.py,sha256=9ipU5h4N6ogBChoB-2vHoqtL0TXOHCv6kRR-fER3mkM,4167
|
146
147
|
fractal_server/images/tools.py,sha256=gxeniYy4Z-cp_ToK2LHPJUTVVUUrdpogYdcBUvBuLiY,2209
|
147
148
|
fractal_server/logger.py,sha256=56wfka6fHaa3Rx5qO009nEs_y8gx5wZ2NUNZZ1I-uvc,5130
|
148
|
-
fractal_server/main.py,sha256=
|
149
|
+
fractal_server/main.py,sha256=Kmty1C9jPfH101nP3b82u9H9QvT-5Z-8Dd60wf9S5h0,5298
|
149
150
|
fractal_server/migrations/README,sha256=4rQvyDfqodGhpJw74VYijRmgFP49ji5chyEemWGHsuw,59
|
150
151
|
fractal_server/migrations/env.py,sha256=Bvg-FJzRJZIH_wqS_ZyZNXANIaathjo22_IY7c3fCjo,2636
|
151
152
|
fractal_server/migrations/script.py.mako,sha256=oMXw9LC3zRbinWWPPDgeZ4z9FJrV2zhRWiYdS5YgNbI,526
|
@@ -167,7 +168,7 @@ fractal_server/migrations/versions/efa89c30e0a4_add_project_timestamp_created.py
|
|
167
168
|
fractal_server/migrations/versions/f384e1c0cf5d_drop_task_default_args_columns.py,sha256=9BwqUS9Gf7UW_KjrzHbtViC880qhD452KAytkHWWZyk,746
|
168
169
|
fractal_server/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
169
170
|
fractal_server/ssh/__init__.py,sha256=sVUmzxf7_DuXG1xoLQ1_00fo5NPhi2LJipSmU5EAkPs,124
|
170
|
-
fractal_server/ssh/_fabric.py,sha256=
|
171
|
+
fractal_server/ssh/_fabric.py,sha256=9xcsOEwbCgbJtupkIeG8OOtT8ct8c7_ruIehhNmD4wc,11379
|
171
172
|
fractal_server/string_tools.py,sha256=KThgTLn_FHNSuEUGLabryJAP6DaFd7bpi-hF5FgkBjw,1268
|
172
173
|
fractal_server/syringe.py,sha256=3qSMW3YaMKKnLdgnooAINOPxnCOxP7y2jeAQYB21Gdo,2786
|
173
174
|
fractal_server/tasks/__init__.py,sha256=kadmVUoIghl8s190_Tt-8f-WBqMi8u8oU4Pvw39NHE8,23
|
@@ -192,8 +193,8 @@ fractal_server/tasks/v2/templates/_5_pip_show.sh,sha256=GrJ19uHYQxANEy9JaeNJZVTq
|
|
192
193
|
fractal_server/tasks/v2/utils.py,sha256=JOyCacb6MNvrwfLNTyLwcz8y79J29YuJeJ2MK5kqXRM,1657
|
193
194
|
fractal_server/urls.py,sha256=5o_qq7PzKKbwq12NHSQZDmDitn5RAOeQ4xufu-2v9Zk,448
|
194
195
|
fractal_server/utils.py,sha256=b7WwFdcFZ8unyT65mloFToYuEDXpQoHRcmRNqrhd_dQ,2115
|
195
|
-
fractal_server-2.3.
|
196
|
-
fractal_server-2.3.
|
197
|
-
fractal_server-2.3.
|
198
|
-
fractal_server-2.3.
|
199
|
-
fractal_server-2.3.
|
196
|
+
fractal_server-2.3.5.dist-info/LICENSE,sha256=QKAharUuhxL58kSoLizKJeZE3mTCBnX6ucmz8W0lxlk,1576
|
197
|
+
fractal_server-2.3.5.dist-info/METADATA,sha256=dElt7KKeyH2IIRToMdsTiVKeIEww0ZsbFzlQcCskrKE,4425
|
198
|
+
fractal_server-2.3.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
199
|
+
fractal_server-2.3.5.dist-info/entry_points.txt,sha256=8tV2kynvFkjnhbtDnxAqImL6HMVKsopgGfew0DOp5UY,58
|
200
|
+
fractal_server-2.3.5.dist-info/RECORD,,
|
@@ -1,182 +0,0 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
from typing import Literal
|
3
|
-
from typing import Optional
|
4
|
-
|
5
|
-
from fractal_server.app.models.v2 import WorkflowTaskV2
|
6
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
7
|
-
_parse_mem_value,
|
8
|
-
)
|
9
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
10
|
-
load_slurm_config_file,
|
11
|
-
)
|
12
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import logger
|
13
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import SlurmConfig
|
14
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
15
|
-
SlurmConfigError,
|
16
|
-
)
|
17
|
-
|
18
|
-
|
19
|
-
def get_slurm_config(
|
20
|
-
wftask: WorkflowTaskV2,
|
21
|
-
workflow_dir_local: Path,
|
22
|
-
workflow_dir_remote: Path,
|
23
|
-
which_type: Literal["non_parallel", "parallel"],
|
24
|
-
config_path: Optional[Path] = None,
|
25
|
-
) -> SlurmConfig:
|
26
|
-
"""
|
27
|
-
Prepare a `SlurmConfig` configuration object
|
28
|
-
|
29
|
-
The argument `which_type` determines whether we use `wftask.meta_parallel`
|
30
|
-
or `wftask.meta_non_parallel`. In the following descritpion, let us assume
|
31
|
-
that `which_type="parallel"`.
|
32
|
-
|
33
|
-
The sources for `SlurmConfig` attributes, in increasing priority order, are
|
34
|
-
|
35
|
-
1. The general content of the Fractal SLURM configuration file.
|
36
|
-
2. The GPU-specific content of the Fractal SLURM configuration file, if
|
37
|
-
appropriate.
|
38
|
-
3. Properties in `wftask.meta_parallel` (which typically include those in
|
39
|
-
`wftask.task.meta_parallel`). Note that `wftask.meta_parallel` may be
|
40
|
-
`None`.
|
41
|
-
|
42
|
-
Arguments:
|
43
|
-
wftask:
|
44
|
-
WorkflowTask for which the SLURM configuration is is to be
|
45
|
-
prepared.
|
46
|
-
workflow_dir_local:
|
47
|
-
Server-owned directory to store all task-execution-related relevant
|
48
|
-
files (inputs, outputs, errors, and all meta files related to the
|
49
|
-
job execution). Note: users cannot write directly to this folder.
|
50
|
-
workflow_dir_remote:
|
51
|
-
User-side directory with the same scope as `workflow_dir_local`,
|
52
|
-
and where a user can write.
|
53
|
-
config_path:
|
54
|
-
Path of a Fractal SLURM configuration file; if `None`, use
|
55
|
-
`FRACTAL_SLURM_CONFIG_FILE` variable from settings.
|
56
|
-
which_type:
|
57
|
-
Determines whether to use `meta_parallel` or `meta_non_parallel`.
|
58
|
-
|
59
|
-
Returns:
|
60
|
-
slurm_config:
|
61
|
-
The SlurmConfig object
|
62
|
-
"""
|
63
|
-
|
64
|
-
if which_type == "non_parallel":
|
65
|
-
wftask_meta = wftask.meta_non_parallel
|
66
|
-
elif which_type == "parallel":
|
67
|
-
wftask_meta = wftask.meta_parallel
|
68
|
-
else:
|
69
|
-
raise ValueError(
|
70
|
-
f"get_slurm_config received invalid argument {which_type=}."
|
71
|
-
)
|
72
|
-
|
73
|
-
logger.debug(
|
74
|
-
"[get_slurm_config] WorkflowTask meta attribute: {wftask_meta=}"
|
75
|
-
)
|
76
|
-
|
77
|
-
# Incorporate slurm_env.default_slurm_config
|
78
|
-
slurm_env = load_slurm_config_file(config_path=config_path)
|
79
|
-
slurm_dict = slurm_env.default_slurm_config.dict(
|
80
|
-
exclude_unset=True, exclude={"mem"}
|
81
|
-
)
|
82
|
-
if slurm_env.default_slurm_config.mem:
|
83
|
-
slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
|
84
|
-
|
85
|
-
# Incorporate slurm_env.batching_config
|
86
|
-
for key, value in slurm_env.batching_config.dict().items():
|
87
|
-
slurm_dict[key] = value
|
88
|
-
|
89
|
-
# Incorporate slurm_env.user_local_exports
|
90
|
-
slurm_dict["user_local_exports"] = slurm_env.user_local_exports
|
91
|
-
|
92
|
-
logger.debug(
|
93
|
-
"[get_slurm_config] Fractal SLURM configuration file: "
|
94
|
-
f"{slurm_env.dict()=}"
|
95
|
-
)
|
96
|
-
|
97
|
-
# GPU-related options
|
98
|
-
# Notes about priority:
|
99
|
-
# 1. This block of definitions takes priority over other definitions from
|
100
|
-
# slurm_env which are not under the `needs_gpu` subgroup
|
101
|
-
# 2. This block of definitions has lower priority than whatever comes next
|
102
|
-
# (i.e. from WorkflowTask.meta).
|
103
|
-
if wftask_meta is not None:
|
104
|
-
needs_gpu = wftask_meta.get("needs_gpu", False)
|
105
|
-
else:
|
106
|
-
needs_gpu = False
|
107
|
-
logger.debug(f"[get_slurm_config] {needs_gpu=}")
|
108
|
-
if needs_gpu:
|
109
|
-
for key, value in slurm_env.gpu_slurm_config.dict(
|
110
|
-
exclude_unset=True, exclude={"mem"}
|
111
|
-
).items():
|
112
|
-
slurm_dict[key] = value
|
113
|
-
if slurm_env.gpu_slurm_config.mem:
|
114
|
-
slurm_dict["mem_per_task_MB"] = slurm_env.gpu_slurm_config.mem
|
115
|
-
|
116
|
-
# Number of CPUs per task, for multithreading
|
117
|
-
if wftask_meta is not None and "cpus_per_task" in wftask_meta:
|
118
|
-
cpus_per_task = int(wftask_meta["cpus_per_task"])
|
119
|
-
slurm_dict["cpus_per_task"] = cpus_per_task
|
120
|
-
|
121
|
-
# Required memory per task, in MB
|
122
|
-
if wftask_meta is not None and "mem" in wftask_meta:
|
123
|
-
raw_mem = wftask_meta["mem"]
|
124
|
-
mem_per_task_MB = _parse_mem_value(raw_mem)
|
125
|
-
slurm_dict["mem_per_task_MB"] = mem_per_task_MB
|
126
|
-
|
127
|
-
# Job name
|
128
|
-
if wftask.is_legacy_task:
|
129
|
-
job_name = wftask.task_legacy.name.replace(" ", "_")
|
130
|
-
else:
|
131
|
-
job_name = wftask.task.name.replace(" ", "_")
|
132
|
-
slurm_dict["job_name"] = job_name
|
133
|
-
|
134
|
-
# Optional SLURM arguments and extra lines
|
135
|
-
if wftask_meta is not None:
|
136
|
-
account = wftask_meta.get("account", None)
|
137
|
-
if account is not None:
|
138
|
-
error_msg = (
|
139
|
-
f"Invalid {account=} property in WorkflowTask `meta` "
|
140
|
-
"attribute.\n"
|
141
|
-
"SLURM account must be set in the request body of the "
|
142
|
-
"apply-workflow endpoint, or by modifying the user properties."
|
143
|
-
)
|
144
|
-
logger.error(error_msg)
|
145
|
-
raise SlurmConfigError(error_msg)
|
146
|
-
for key in ["time", "gres", "constraint"]:
|
147
|
-
value = wftask_meta.get(key, None)
|
148
|
-
if value:
|
149
|
-
slurm_dict[key] = value
|
150
|
-
if wftask_meta is not None:
|
151
|
-
extra_lines = wftask_meta.get("extra_lines", [])
|
152
|
-
else:
|
153
|
-
extra_lines = []
|
154
|
-
extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
|
155
|
-
if len(set(extra_lines)) != len(extra_lines):
|
156
|
-
logger.debug(
|
157
|
-
"[get_slurm_config] Removing repeated elements "
|
158
|
-
f"from {extra_lines=}."
|
159
|
-
)
|
160
|
-
extra_lines = list(set(extra_lines))
|
161
|
-
slurm_dict["extra_lines"] = extra_lines
|
162
|
-
|
163
|
-
# Job-batching parameters (if None, they will be determined heuristically)
|
164
|
-
if wftask_meta is not None:
|
165
|
-
tasks_per_job = wftask_meta.get("tasks_per_job", None)
|
166
|
-
parallel_tasks_per_job = wftask_meta.get(
|
167
|
-
"parallel_tasks_per_job", None
|
168
|
-
)
|
169
|
-
else:
|
170
|
-
tasks_per_job = None
|
171
|
-
parallel_tasks_per_job = None
|
172
|
-
slurm_dict["tasks_per_job"] = tasks_per_job
|
173
|
-
slurm_dict["parallel_tasks_per_job"] = parallel_tasks_per_job
|
174
|
-
|
175
|
-
# Put everything together
|
176
|
-
logger.debug(
|
177
|
-
"[get_slurm_config] Now create a SlurmConfig object based "
|
178
|
-
f"on {slurm_dict=}"
|
179
|
-
)
|
180
|
-
slurm_config = SlurmConfig(**slurm_dict)
|
181
|
-
|
182
|
-
return slurm_config
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|