fractal-server 2.2.0a1__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/models/v1/state.py +1 -2
- fractal_server/app/routes/admin/v1.py +2 -2
- fractal_server/app/routes/admin/v2.py +2 -2
- fractal_server/app/routes/api/v1/job.py +2 -2
- fractal_server/app/routes/api/v1/task_collection.py +4 -4
- fractal_server/app/routes/api/v2/__init__.py +23 -3
- fractal_server/app/routes/api/v2/job.py +2 -2
- fractal_server/app/routes/api/v2/submit.py +6 -0
- fractal_server/app/routes/api/v2/task_collection.py +74 -34
- fractal_server/app/routes/api/v2/task_collection_custom.py +170 -0
- fractal_server/app/routes/api/v2/task_collection_ssh.py +125 -0
- fractal_server/app/routes/aux/_runner.py +10 -2
- fractal_server/app/runner/compress_folder.py +120 -0
- fractal_server/app/runner/executors/slurm/__init__.py +0 -3
- fractal_server/app/runner/executors/slurm/_batching.py +0 -1
- fractal_server/app/runner/executors/slurm/_slurm_config.py +9 -9
- fractal_server/app/runner/executors/slurm/ssh/__init__.py +3 -0
- fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +112 -0
- fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +120 -0
- fractal_server/app/runner/executors/slurm/ssh/executor.py +1488 -0
- fractal_server/app/runner/executors/slurm/sudo/__init__.py +3 -0
- fractal_server/app/runner/executors/slurm/{_check_jobs_status.py → sudo/_check_jobs_status.py} +1 -1
- fractal_server/app/runner/executors/slurm/{_executor_wait_thread.py → sudo/_executor_wait_thread.py} +1 -1
- fractal_server/app/runner/executors/slurm/{_subprocess_run_as_user.py → sudo/_subprocess_run_as_user.py} +1 -1
- fractal_server/app/runner/executors/slurm/{executor.py → sudo/executor.py} +12 -12
- fractal_server/app/runner/extract_archive.py +38 -0
- fractal_server/app/runner/v1/__init__.py +78 -40
- fractal_server/app/runner/v1/_slurm/__init__.py +1 -1
- fractal_server/app/runner/v2/__init__.py +147 -62
- fractal_server/app/runner/v2/_local_experimental/__init__.py +22 -12
- fractal_server/app/runner/v2/_local_experimental/executor.py +12 -8
- fractal_server/app/runner/v2/_slurm/__init__.py +1 -6
- fractal_server/app/runner/v2/_slurm_ssh/__init__.py +125 -0
- fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py +83 -0
- fractal_server/app/runner/v2/_slurm_ssh/get_slurm_config.py +182 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +9 -11
- fractal_server/app/runner/versions.py +30 -0
- fractal_server/app/schemas/v1/__init__.py +1 -0
- fractal_server/app/schemas/{state.py → v1/state.py} +4 -21
- fractal_server/app/schemas/v2/__init__.py +4 -1
- fractal_server/app/schemas/v2/task_collection.py +101 -30
- fractal_server/config.py +184 -3
- fractal_server/main.py +27 -1
- fractal_server/ssh/__init__.py +4 -0
- fractal_server/ssh/_fabric.py +245 -0
- fractal_server/tasks/utils.py +12 -64
- fractal_server/tasks/v1/background_operations.py +2 -2
- fractal_server/tasks/{endpoint_operations.py → v1/endpoint_operations.py} +7 -12
- fractal_server/tasks/v1/utils.py +67 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +61 -32
- fractal_server/tasks/v2/_venv_pip.py +195 -0
- fractal_server/tasks/v2/background_operations.py +257 -295
- fractal_server/tasks/v2/background_operations_ssh.py +317 -0
- fractal_server/tasks/v2/endpoint_operations.py +136 -0
- fractal_server/tasks/v2/templates/_1_create_venv.sh +46 -0
- fractal_server/tasks/v2/templates/_2_upgrade_pip.sh +30 -0
- fractal_server/tasks/v2/templates/_3_pip_install.sh +32 -0
- fractal_server/tasks/v2/templates/_4_pip_freeze.sh +21 -0
- fractal_server/tasks/v2/templates/_5_pip_show.sh +59 -0
- fractal_server/tasks/v2/utils.py +54 -0
- {fractal_server-2.2.0a1.dist-info → fractal_server-2.3.0.dist-info}/METADATA +4 -2
- {fractal_server-2.2.0a1.dist-info → fractal_server-2.3.0.dist-info}/RECORD +66 -42
- fractal_server/tasks/v2/get_collection_data.py +0 -14
- {fractal_server-2.2.0a1.dist-info → fractal_server-2.3.0.dist-info}/LICENSE +0 -0
- {fractal_server-2.2.0a1.dist-info → fractal_server-2.3.0.dist-info}/WHEEL +0 -0
- {fractal_server-2.2.0a1.dist-info → fractal_server-2.3.0.dist-info}/entry_points.txt +0 -0
@@ -1,9 +1,11 @@
|
|
1
|
+
from concurrent.futures.process import BrokenProcessPool
|
1
2
|
from pathlib import Path
|
2
3
|
from typing import Optional
|
3
4
|
|
4
5
|
from ....models.v2 import DatasetV2
|
5
6
|
from ....models.v2 import WorkflowV2
|
6
7
|
from ...async_wrap import async_wrap
|
8
|
+
from ...exceptions import JobExecutionError
|
7
9
|
from ...filenames import SHUTDOWN_FILENAME
|
8
10
|
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
9
11
|
from ..runner import execute_tasks_v2
|
@@ -29,21 +31,29 @@ def _process_workflow(
|
|
29
31
|
[process_workflow][fractal_server.app.runner.v2._local_experimental.process_workflow]
|
30
32
|
for the call signature.
|
31
33
|
"""
|
32
|
-
|
33
34
|
with FractalProcessPoolExecutor(
|
34
35
|
shutdown_file=workflow_dir_local / SHUTDOWN_FILENAME
|
35
36
|
) as executor:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
37
|
+
try:
|
38
|
+
new_dataset_attributes = execute_tasks_v2(
|
39
|
+
wf_task_list=workflow.task_list[
|
40
|
+
first_task_index : (last_task_index + 1) # noqa
|
41
|
+
],
|
42
|
+
dataset=dataset,
|
43
|
+
executor=executor,
|
44
|
+
workflow_dir_local=workflow_dir_local,
|
45
|
+
workflow_dir_remote=workflow_dir_local,
|
46
|
+
logger_name=logger_name,
|
47
|
+
submit_setup_call=_local_submit_setup,
|
48
|
+
)
|
49
|
+
except BrokenProcessPool as e:
|
50
|
+
raise JobExecutionError(
|
51
|
+
info=(
|
52
|
+
"Job failed with BrokenProcessPool error, likely due to "
|
53
|
+
f"an executor shutdown.\nOriginal error:\n{e.args[0]}"
|
54
|
+
)
|
55
|
+
)
|
56
|
+
|
47
57
|
return new_dataset_attributes
|
48
58
|
|
49
59
|
|
@@ -2,8 +2,6 @@
|
|
2
2
|
Custom version of Python
|
3
3
|
[ProcessPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ProcessPoolExecutor)).
|
4
4
|
"""
|
5
|
-
import os
|
6
|
-
import signal
|
7
5
|
import threading
|
8
6
|
import time
|
9
7
|
from concurrent.futures import ProcessPoolExecutor
|
@@ -14,13 +12,14 @@ from typing import Iterable
|
|
14
12
|
from typing import Optional
|
15
13
|
from typing import Sequence
|
16
14
|
|
15
|
+
import psutil
|
16
|
+
|
17
17
|
from ._local_config import get_default_local_backend_config
|
18
18
|
from ._local_config import LocalBackendConfig
|
19
19
|
from fractal_server.app.runner.exceptions import JobExecutionError
|
20
|
-
from fractal_server.logger import
|
21
|
-
|
20
|
+
from fractal_server.logger import set_logger
|
22
21
|
|
23
|
-
logger =
|
22
|
+
logger = set_logger("FractalProcessPoolExecutor")
|
24
23
|
|
25
24
|
|
26
25
|
class FractalProcessPoolExecutor(ProcessPoolExecutor):
|
@@ -66,12 +65,17 @@ class FractalProcessPoolExecutor(ProcessPoolExecutor):
|
|
66
65
|
"""
|
67
66
|
Running on '_shutdown_file_thread'.
|
68
67
|
"""
|
68
|
+
|
69
69
|
logger.info("Start terminating FractalProcessPoolExecutor processes.")
|
70
|
+
# We use 'psutil' in order to easily access the PIDs of the children.
|
70
71
|
if self._processes is not None:
|
71
72
|
for pid in self._processes.keys():
|
72
|
-
|
73
|
-
|
74
|
-
|
73
|
+
parent = psutil.Process(pid)
|
74
|
+
children = parent.children(recursive=True)
|
75
|
+
for child in children:
|
76
|
+
child.kill()
|
77
|
+
parent.kill()
|
78
|
+
logger.info(f"Process {pid} and its children terminated.")
|
75
79
|
logger.info("FractalProcessPoolExecutor processes terminated.")
|
76
80
|
|
77
81
|
def shutdown(self, *args, **kwargs) -> None:
|
@@ -24,16 +24,11 @@ from typing import Union
|
|
24
24
|
from ....models.v2 import DatasetV2
|
25
25
|
from ....models.v2 import WorkflowV2
|
26
26
|
from ...async_wrap import async_wrap
|
27
|
-
from ...executors.slurm.executor import FractalSlurmExecutor
|
27
|
+
from ...executors.slurm.sudo.executor import FractalSlurmExecutor
|
28
28
|
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
29
29
|
from ..runner import execute_tasks_v2
|
30
30
|
from ._submit_setup import _slurm_submit_setup
|
31
31
|
|
32
|
-
# from .._common import execute_tasks
|
33
|
-
# from ..common import async_wrap
|
34
|
-
# from ..common import set_start_and_last_task_index
|
35
|
-
# from ..common import TaskParameters
|
36
|
-
|
37
32
|
|
38
33
|
def _process_workflow(
|
39
34
|
*,
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
6
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
7
|
+
# Marco Franzon <marco.franzon@exact-lab.it>
|
8
|
+
#
|
9
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
10
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
11
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
12
|
+
# Zurich.
|
13
|
+
"""
|
14
|
+
Slurm Bakend
|
15
|
+
|
16
|
+
This backend runs fractal workflows in a SLURM cluster using Clusterfutures
|
17
|
+
Executor objects.
|
18
|
+
"""
|
19
|
+
from pathlib import Path
|
20
|
+
from typing import Any
|
21
|
+
from typing import Optional
|
22
|
+
from typing import Union
|
23
|
+
|
24
|
+
from .....ssh._fabric import FractalSSH
|
25
|
+
from ....models.v2 import DatasetV2
|
26
|
+
from ....models.v2 import WorkflowV2
|
27
|
+
from ...async_wrap import async_wrap
|
28
|
+
from ...executors.slurm.ssh.executor import FractalSlurmSSHExecutor
|
29
|
+
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
30
|
+
from ..runner import execute_tasks_v2
|
31
|
+
from ._submit_setup import _slurm_submit_setup
|
32
|
+
|
33
|
+
|
34
|
+
def _process_workflow(
|
35
|
+
*,
|
36
|
+
workflow: WorkflowV2,
|
37
|
+
dataset: DatasetV2,
|
38
|
+
logger_name: str,
|
39
|
+
workflow_dir_local: Path,
|
40
|
+
workflow_dir_remote: Path,
|
41
|
+
first_task_index: int,
|
42
|
+
last_task_index: int,
|
43
|
+
fractal_ssh: FractalSSH,
|
44
|
+
worker_init: Optional[Union[str, list[str]]] = None,
|
45
|
+
) -> dict[str, Any]:
|
46
|
+
"""
|
47
|
+
Internal processing routine for the SLURM backend
|
48
|
+
|
49
|
+
This function initialises the a FractalSlurmExecutor, setting logging,
|
50
|
+
workflow working dir and user to impersonate. It then schedules the
|
51
|
+
workflow tasks and returns the new dataset attributes
|
52
|
+
|
53
|
+
Cf.
|
54
|
+
[process_workflow][fractal_server.app.runner.v2._local.process_workflow]
|
55
|
+
|
56
|
+
Returns:
|
57
|
+
new_dataset_attributes:
|
58
|
+
"""
|
59
|
+
|
60
|
+
if isinstance(worker_init, str):
|
61
|
+
worker_init = worker_init.split("\n")
|
62
|
+
|
63
|
+
with FractalSlurmSSHExecutor(
|
64
|
+
fractal_ssh=fractal_ssh,
|
65
|
+
workflow_dir_local=workflow_dir_local,
|
66
|
+
workflow_dir_remote=workflow_dir_remote,
|
67
|
+
common_script_lines=worker_init,
|
68
|
+
) as executor:
|
69
|
+
new_dataset_attributes = execute_tasks_v2(
|
70
|
+
wf_task_list=workflow.task_list[
|
71
|
+
first_task_index : (last_task_index + 1) # noqa
|
72
|
+
], # noqa
|
73
|
+
dataset=dataset,
|
74
|
+
executor=executor,
|
75
|
+
workflow_dir_local=workflow_dir_local,
|
76
|
+
workflow_dir_remote=workflow_dir_remote,
|
77
|
+
logger_name=logger_name,
|
78
|
+
submit_setup_call=_slurm_submit_setup,
|
79
|
+
)
|
80
|
+
return new_dataset_attributes
|
81
|
+
|
82
|
+
|
83
|
+
async def process_workflow(
|
84
|
+
*,
|
85
|
+
workflow: WorkflowV2,
|
86
|
+
dataset: DatasetV2,
|
87
|
+
workflow_dir_local: Path,
|
88
|
+
workflow_dir_remote: Optional[Path] = None,
|
89
|
+
first_task_index: Optional[int] = None,
|
90
|
+
last_task_index: Optional[int] = None,
|
91
|
+
logger_name: str,
|
92
|
+
# Not used
|
93
|
+
fractal_ssh: FractalSSH,
|
94
|
+
user_cache_dir: Optional[str] = None,
|
95
|
+
slurm_user: Optional[str] = None,
|
96
|
+
slurm_account: Optional[str] = None,
|
97
|
+
worker_init: Optional[str] = None,
|
98
|
+
) -> dict:
|
99
|
+
"""
|
100
|
+
Process workflow (SLURM backend public interface)
|
101
|
+
|
102
|
+
Cf.
|
103
|
+
[process_workflow][fractal_server.app.runner.v2._local.process_workflow]
|
104
|
+
"""
|
105
|
+
|
106
|
+
# Set values of first_task_index and last_task_index
|
107
|
+
num_tasks = len(workflow.task_list)
|
108
|
+
first_task_index, last_task_index = set_start_and_last_task_index(
|
109
|
+
num_tasks,
|
110
|
+
first_task_index=first_task_index,
|
111
|
+
last_task_index=last_task_index,
|
112
|
+
)
|
113
|
+
|
114
|
+
new_dataset_attributes = await async_wrap(_process_workflow)(
|
115
|
+
workflow=workflow,
|
116
|
+
dataset=dataset,
|
117
|
+
logger_name=logger_name,
|
118
|
+
workflow_dir_local=workflow_dir_local,
|
119
|
+
workflow_dir_remote=workflow_dir_remote,
|
120
|
+
first_task_index=first_task_index,
|
121
|
+
last_task_index=last_task_index,
|
122
|
+
worker_init=worker_init,
|
123
|
+
fractal_ssh=fractal_ssh,
|
124
|
+
)
|
125
|
+
return new_dataset_attributes
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
6
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
7
|
+
#
|
8
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
9
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
10
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
11
|
+
# Zurich.
|
12
|
+
"""
|
13
|
+
Submodule to define _slurm_submit_setup, which is also the reference
|
14
|
+
implementation of `submit_setup_call`.
|
15
|
+
"""
|
16
|
+
from pathlib import Path
|
17
|
+
from typing import Literal
|
18
|
+
|
19
|
+
from ...task_files import get_task_file_paths
|
20
|
+
from .get_slurm_config import get_slurm_config
|
21
|
+
from fractal_server.app.models.v2 import WorkflowTaskV2
|
22
|
+
|
23
|
+
|
24
|
+
def _slurm_submit_setup(
|
25
|
+
*,
|
26
|
+
wftask: WorkflowTaskV2,
|
27
|
+
workflow_dir_local: Path,
|
28
|
+
workflow_dir_remote: Path,
|
29
|
+
which_type: Literal["non_parallel", "parallel"],
|
30
|
+
) -> dict[str, object]:
|
31
|
+
"""
|
32
|
+
Collect WorfklowTask-specific configuration parameters from different
|
33
|
+
sources, and inject them for execution.
|
34
|
+
|
35
|
+
Here goes all the logic for reading attributes from the appropriate sources
|
36
|
+
and transforming them into an appropriate `SlurmConfig` object (encoding
|
37
|
+
SLURM configuration) and `TaskFiles` object (with details e.g. about file
|
38
|
+
paths or filename prefixes).
|
39
|
+
|
40
|
+
For now, this is the reference implementation for the argument
|
41
|
+
`submit_setup_call` of
|
42
|
+
[fractal_server.app.runner.v2.runner][].
|
43
|
+
|
44
|
+
Arguments:
|
45
|
+
wftask:
|
46
|
+
WorkflowTask for which the configuration is to be assembled
|
47
|
+
workflow_dir_local:
|
48
|
+
Server-owned directory to store all task-execution-related relevant
|
49
|
+
files (inputs, outputs, errors, and all meta files related to the
|
50
|
+
job execution). Note: users cannot write directly to this folder.
|
51
|
+
workflow_dir_remote:
|
52
|
+
User-side directory with the same scope as `workflow_dir_local`,
|
53
|
+
and where a user can write.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
submit_setup_dict:
|
57
|
+
A dictionary that will be passed on to
|
58
|
+
`FractalSlurmExecutor.submit` and `FractalSlurmExecutor.map`, so
|
59
|
+
as to set extra options.
|
60
|
+
"""
|
61
|
+
|
62
|
+
# Get SlurmConfig object
|
63
|
+
slurm_config = get_slurm_config(
|
64
|
+
wftask=wftask,
|
65
|
+
workflow_dir_local=workflow_dir_local,
|
66
|
+
workflow_dir_remote=workflow_dir_remote,
|
67
|
+
which_type=which_type,
|
68
|
+
)
|
69
|
+
|
70
|
+
# Get TaskFiles object
|
71
|
+
task_files = get_task_file_paths(
|
72
|
+
workflow_dir_local=workflow_dir_local,
|
73
|
+
workflow_dir_remote=workflow_dir_remote,
|
74
|
+
task_order=wftask.order,
|
75
|
+
task_name=wftask.task.name,
|
76
|
+
)
|
77
|
+
|
78
|
+
# Prepare and return output dictionary
|
79
|
+
submit_setup_dict = dict(
|
80
|
+
slurm_config=slurm_config,
|
81
|
+
task_files=task_files,
|
82
|
+
)
|
83
|
+
return submit_setup_dict
|
@@ -0,0 +1,182 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Literal
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from fractal_server.app.models.v2 import WorkflowTaskV2
|
6
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
7
|
+
_parse_mem_value,
|
8
|
+
)
|
9
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
10
|
+
load_slurm_config_file,
|
11
|
+
)
|
12
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import logger
|
13
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import SlurmConfig
|
14
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
15
|
+
SlurmConfigError,
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
def get_slurm_config(
|
20
|
+
wftask: WorkflowTaskV2,
|
21
|
+
workflow_dir_local: Path,
|
22
|
+
workflow_dir_remote: Path,
|
23
|
+
which_type: Literal["non_parallel", "parallel"],
|
24
|
+
config_path: Optional[Path] = None,
|
25
|
+
) -> SlurmConfig:
|
26
|
+
"""
|
27
|
+
Prepare a `SlurmConfig` configuration object
|
28
|
+
|
29
|
+
The argument `which_type` determines whether we use `wftask.meta_parallel`
|
30
|
+
or `wftask.meta_non_parallel`. In the following descritpion, let us assume
|
31
|
+
that `which_type="parallel"`.
|
32
|
+
|
33
|
+
The sources for `SlurmConfig` attributes, in increasing priority order, are
|
34
|
+
|
35
|
+
1. The general content of the Fractal SLURM configuration file.
|
36
|
+
2. The GPU-specific content of the Fractal SLURM configuration file, if
|
37
|
+
appropriate.
|
38
|
+
3. Properties in `wftask.meta_parallel` (which typically include those in
|
39
|
+
`wftask.task.meta_parallel`). Note that `wftask.meta_parallel` may be
|
40
|
+
`None`.
|
41
|
+
|
42
|
+
Arguments:
|
43
|
+
wftask:
|
44
|
+
WorkflowTask for which the SLURM configuration is is to be
|
45
|
+
prepared.
|
46
|
+
workflow_dir_local:
|
47
|
+
Server-owned directory to store all task-execution-related relevant
|
48
|
+
files (inputs, outputs, errors, and all meta files related to the
|
49
|
+
job execution). Note: users cannot write directly to this folder.
|
50
|
+
workflow_dir_remote:
|
51
|
+
User-side directory with the same scope as `workflow_dir_local`,
|
52
|
+
and where a user can write.
|
53
|
+
config_path:
|
54
|
+
Path of a Fractal SLURM configuration file; if `None`, use
|
55
|
+
`FRACTAL_SLURM_CONFIG_FILE` variable from settings.
|
56
|
+
which_type:
|
57
|
+
Determines whether to use `meta_parallel` or `meta_non_parallel`.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
slurm_config:
|
61
|
+
The SlurmConfig object
|
62
|
+
"""
|
63
|
+
|
64
|
+
if which_type == "non_parallel":
|
65
|
+
wftask_meta = wftask.meta_non_parallel
|
66
|
+
elif which_type == "parallel":
|
67
|
+
wftask_meta = wftask.meta_parallel
|
68
|
+
else:
|
69
|
+
raise ValueError(
|
70
|
+
f"get_slurm_config received invalid argument {which_type=}."
|
71
|
+
)
|
72
|
+
|
73
|
+
logger.debug(
|
74
|
+
f"[get_slurm_config] WorkflowTask meta attribute: {wftask_meta=}"
|
75
|
+
)
|
76
|
+
|
77
|
+
# Incorporate slurm_env.default_slurm_config
|
78
|
+
slurm_env = load_slurm_config_file(config_path=config_path)
|
79
|
+
slurm_dict = slurm_env.default_slurm_config.dict(
|
80
|
+
exclude_unset=True, exclude={"mem"}
|
81
|
+
)
|
82
|
+
if slurm_env.default_slurm_config.mem:
|
83
|
+
slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
|
84
|
+
|
85
|
+
# Incorporate slurm_env.batching_config
|
86
|
+
for key, value in slurm_env.batching_config.dict().items():
|
87
|
+
slurm_dict[key] = value
|
88
|
+
|
89
|
+
# Incorporate slurm_env.user_local_exports
|
90
|
+
slurm_dict["user_local_exports"] = slurm_env.user_local_exports
|
91
|
+
|
92
|
+
logger.debug(
|
93
|
+
"[get_slurm_config] Fractal SLURM configuration file: "
|
94
|
+
f"{slurm_env.dict()=}"
|
95
|
+
)
|
96
|
+
|
97
|
+
# GPU-related options
|
98
|
+
# Notes about priority:
|
99
|
+
# 1. This block of definitions takes priority over other definitions from
|
100
|
+
# slurm_env which are not under the `needs_gpu` subgroup
|
101
|
+
# 2. This block of definitions has lower priority than whatever comes next
|
102
|
+
# (i.e. from WorkflowTask.meta).
|
103
|
+
if wftask_meta is not None:
|
104
|
+
needs_gpu = wftask_meta.get("needs_gpu", False)
|
105
|
+
else:
|
106
|
+
needs_gpu = False
|
107
|
+
logger.debug(f"[get_slurm_config] {needs_gpu=}")
|
108
|
+
if needs_gpu and slurm_env.gpu_slurm_config is not None: # FIXME
|
109
|
+
for key, value in slurm_env.gpu_slurm_config.dict(
|
110
|
+
exclude_unset=True, exclude={"mem"}
|
111
|
+
).items():
|
112
|
+
slurm_dict[key] = value
|
113
|
+
if slurm_env.gpu_slurm_config.mem:
|
114
|
+
slurm_dict["mem_per_task_MB"] = slurm_env.gpu_slurm_config.mem
|
115
|
+
|
116
|
+
# Number of CPUs per task, for multithreading
|
117
|
+
if wftask_meta is not None and "cpus_per_task" in wftask_meta:
|
118
|
+
cpus_per_task = int(wftask_meta["cpus_per_task"])
|
119
|
+
slurm_dict["cpus_per_task"] = cpus_per_task
|
120
|
+
|
121
|
+
# Required memory per task, in MB
|
122
|
+
if wftask_meta is not None and "mem" in wftask_meta:
|
123
|
+
raw_mem = wftask_meta["mem"]
|
124
|
+
mem_per_task_MB = _parse_mem_value(raw_mem)
|
125
|
+
slurm_dict["mem_per_task_MB"] = mem_per_task_MB
|
126
|
+
|
127
|
+
# Job name
|
128
|
+
if wftask.is_legacy_task:
|
129
|
+
job_name = wftask.task_legacy.name.replace(" ", "_")
|
130
|
+
else:
|
131
|
+
job_name = wftask.task.name.replace(" ", "_")
|
132
|
+
slurm_dict["job_name"] = job_name
|
133
|
+
|
134
|
+
# Optional SLURM arguments and extra lines
|
135
|
+
if wftask_meta is not None:
|
136
|
+
account = wftask_meta.get("account", None)
|
137
|
+
if account is not None:
|
138
|
+
error_msg = (
|
139
|
+
f"Invalid {account=} property in WorkflowTask `meta` "
|
140
|
+
"attribute.\n"
|
141
|
+
"SLURM account must be set in the request body of the "
|
142
|
+
"apply-workflow endpoint, or by modifying the user properties."
|
143
|
+
)
|
144
|
+
logger.error(error_msg)
|
145
|
+
raise SlurmConfigError(error_msg)
|
146
|
+
for key in ["time", "gres", "constraint"]:
|
147
|
+
value = wftask_meta.get(key, None)
|
148
|
+
if value:
|
149
|
+
slurm_dict[key] = value
|
150
|
+
if wftask_meta is not None:
|
151
|
+
extra_lines = wftask_meta.get("extra_lines", [])
|
152
|
+
else:
|
153
|
+
extra_lines = []
|
154
|
+
extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
|
155
|
+
if len(set(extra_lines)) != len(extra_lines):
|
156
|
+
logger.debug(
|
157
|
+
"[get_slurm_config] Removing repeated elements "
|
158
|
+
f"from {extra_lines=}."
|
159
|
+
)
|
160
|
+
extra_lines = list(set(extra_lines))
|
161
|
+
slurm_dict["extra_lines"] = extra_lines
|
162
|
+
|
163
|
+
# Job-batching parameters (if None, they will be determined heuristically)
|
164
|
+
if wftask_meta is not None:
|
165
|
+
tasks_per_job = wftask_meta.get("tasks_per_job", None)
|
166
|
+
parallel_tasks_per_job = wftask_meta.get(
|
167
|
+
"parallel_tasks_per_job", None
|
168
|
+
)
|
169
|
+
else:
|
170
|
+
tasks_per_job = None
|
171
|
+
parallel_tasks_per_job = None
|
172
|
+
slurm_dict["tasks_per_job"] = tasks_per_job
|
173
|
+
slurm_dict["parallel_tasks_per_job"] = parallel_tasks_per_job
|
174
|
+
|
175
|
+
# Put everything together
|
176
|
+
logger.debug(
|
177
|
+
"[get_slurm_config] Now create a SlurmConfig object based "
|
178
|
+
f"on {slurm_dict=}"
|
179
|
+
)
|
180
|
+
slurm_config = SlurmConfig(**slurm_dict)
|
181
|
+
|
182
|
+
return slurm_config
|
@@ -34,17 +34,15 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
|
|
34
34
|
)
|
35
35
|
raise TaskExecutionError(msg)
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
finally:
|
47
|
-
fp_log.close()
|
37
|
+
with open(log_path, "w") as fp_log:
|
38
|
+
try:
|
39
|
+
result = subprocess.run( # nosec
|
40
|
+
shlex_split(cmd),
|
41
|
+
stderr=fp_log,
|
42
|
+
stdout=fp_log,
|
43
|
+
)
|
44
|
+
except Exception as e:
|
45
|
+
raise e
|
48
46
|
|
49
47
|
if result.returncode > 0:
|
50
48
|
with log_path.open("r") as fp_stderr:
|
@@ -0,0 +1,30 @@
|
|
1
|
+
import json
|
2
|
+
import sys
|
3
|
+
from typing import Union
|
4
|
+
|
5
|
+
import cloudpickle
|
6
|
+
|
7
|
+
import fractal_server
|
8
|
+
|
9
|
+
|
10
|
+
def get_versions() -> dict[str, Union[list[int], str]]:
|
11
|
+
"""
|
12
|
+
Extract versions of Python, fractal-server and cloudpickle.
|
13
|
+
|
14
|
+
This information is useful to check compatibility of two Python
|
15
|
+
interpreters when running tasks: the current interpreter (which prepares
|
16
|
+
the input pickles and orchestrates workflow execution) and a remote
|
17
|
+
interpreter (e.g. the one defined in the `FRACTAL_SLURM_WORKER_PYTHON`
|
18
|
+
configuration variable) that executes the tasks.
|
19
|
+
"""
|
20
|
+
versions = dict(
|
21
|
+
python=list(sys.version_info[:3]),
|
22
|
+
cloudpickle=cloudpickle.__version__,
|
23
|
+
fractal_server=fractal_server.__VERSION__,
|
24
|
+
)
|
25
|
+
return versions
|
26
|
+
|
27
|
+
|
28
|
+
if __name__ == "__main__":
|
29
|
+
versions = get_versions()
|
30
|
+
print(json.dumps(versions))
|
@@ -17,6 +17,7 @@ from .manifest import TaskManifestV1 # noqa: F401
|
|
17
17
|
from .project import ProjectCreateV1 # noqa: F401
|
18
18
|
from .project import ProjectReadV1 # noqa: F401
|
19
19
|
from .project import ProjectUpdateV1 # noqa: F401
|
20
|
+
from .state import StateRead # noqa: F401
|
20
21
|
from .task import TaskCreateV1 # noqa: F401
|
21
22
|
from .task import TaskImportV1 # noqa: F401
|
22
23
|
from .task import TaskReadV1 # noqa: F401
|
@@ -5,29 +5,10 @@ from typing import Optional
|
|
5
5
|
from pydantic import BaseModel
|
6
6
|
from pydantic import validator
|
7
7
|
|
8
|
-
from ._validators import valutc
|
8
|
+
from fractal_server.app.schemas._validators import valutc
|
9
9
|
|
10
|
-
__all__ = (
|
11
|
-
"_StateBase",
|
12
|
-
"StateRead",
|
13
|
-
)
|
14
10
|
|
15
|
-
|
16
|
-
class _StateBase(BaseModel):
|
17
|
-
"""
|
18
|
-
Base class for `State`.
|
19
|
-
|
20
|
-
Attributes:
|
21
|
-
id: Primary key
|
22
|
-
data: Content of the state
|
23
|
-
timestamp: Time stamp of the state
|
24
|
-
"""
|
25
|
-
|
26
|
-
data: dict[str, Any]
|
27
|
-
timestamp: datetime
|
28
|
-
|
29
|
-
|
30
|
-
class StateRead(_StateBase):
|
11
|
+
class StateRead(BaseModel):
|
31
12
|
"""
|
32
13
|
Class for `State` read from database.
|
33
14
|
|
@@ -36,5 +17,7 @@ class StateRead(_StateBase):
|
|
36
17
|
"""
|
37
18
|
|
38
19
|
id: Optional[int]
|
20
|
+
data: dict[str, Any]
|
21
|
+
timestamp: datetime
|
39
22
|
|
40
23
|
_timestamp = validator("timestamp", allow_reuse=True)(valutc("timestamp"))
|
@@ -13,6 +13,7 @@ from .job import JobReadV2 # noqa F401
|
|
13
13
|
from .job import JobStatusTypeV2 # noqa F401
|
14
14
|
from .job import JobUpdateV2 # noqa F401
|
15
15
|
from .manifest import ManifestV2 # noqa F401
|
16
|
+
from .manifest import TaskManifestV2 # noqa F401
|
16
17
|
from .project import ProjectCreateV2 # noqa F401
|
17
18
|
from .project import ProjectReadV2 # noqa F401
|
18
19
|
from .project import ProjectUpdateV2 # noqa F401
|
@@ -22,8 +23,10 @@ from .task import TaskImportV2 # noqa F401
|
|
22
23
|
from .task import TaskLegacyReadV2 # noqa F401
|
23
24
|
from .task import TaskReadV2 # noqa F401
|
24
25
|
from .task import TaskUpdateV2 # noqa F401
|
26
|
+
from .task_collection import CollectionStateReadV2 # noqa F401
|
27
|
+
from .task_collection import CollectionStatusV2 # noqa F401
|
28
|
+
from .task_collection import TaskCollectCustomV2 # noqa F401
|
25
29
|
from .task_collection import TaskCollectPipV2 # noqa F401
|
26
|
-
from .task_collection import TaskCollectStatusV2 # noqa F401
|
27
30
|
from .workflow import WorkflowCreateV2 # noqa F401
|
28
31
|
from .workflow import WorkflowExportV2 # noqa F401
|
29
32
|
from .workflow import WorkflowImportV2 # noqa F401
|