fractal-server 1.4.10__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/models/__init__.py +6 -8
- fractal_server/app/models/linkuserproject.py +9 -0
- fractal_server/app/models/security.py +6 -0
- fractal_server/app/models/v1/__init__.py +12 -0
- fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
- fractal_server/app/models/{job.py → v1/job.py} +5 -5
- fractal_server/app/models/{project.py → v1/project.py} +5 -5
- fractal_server/app/models/{state.py → v1/state.py} +2 -2
- fractal_server/app/models/{task.py → v1/task.py} +7 -2
- fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
- fractal_server/app/models/v2/__init__.py +22 -0
- fractal_server/app/models/v2/collection_state.py +21 -0
- fractal_server/app/models/v2/dataset.py +54 -0
- fractal_server/app/models/v2/job.py +51 -0
- fractal_server/app/models/v2/project.py +30 -0
- fractal_server/app/models/v2/task.py +93 -0
- fractal_server/app/models/v2/workflow.py +35 -0
- fractal_server/app/models/v2/workflowtask.py +49 -0
- fractal_server/app/routes/admin/__init__.py +0 -0
- fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
- fractal_server/app/routes/admin/v2.py +309 -0
- fractal_server/app/routes/api/v1/__init__.py +7 -7
- fractal_server/app/routes/api/v1/_aux_functions.py +8 -8
- fractal_server/app/routes/api/v1/dataset.py +41 -41
- fractal_server/app/routes/api/v1/job.py +14 -14
- fractal_server/app/routes/api/v1/project.py +27 -25
- fractal_server/app/routes/api/v1/task.py +26 -16
- fractal_server/app/routes/api/v1/task_collection.py +28 -16
- fractal_server/app/routes/api/v1/workflow.py +28 -28
- fractal_server/app/routes/api/v1/workflowtask.py +11 -11
- fractal_server/app/routes/api/v2/__init__.py +34 -0
- fractal_server/app/routes/api/v2/_aux_functions.py +502 -0
- fractal_server/app/routes/api/v2/dataset.py +293 -0
- fractal_server/app/routes/api/v2/images.py +279 -0
- fractal_server/app/routes/api/v2/job.py +200 -0
- fractal_server/app/routes/api/v2/project.py +186 -0
- fractal_server/app/routes/api/v2/status.py +150 -0
- fractal_server/app/routes/api/v2/submit.py +210 -0
- fractal_server/app/routes/api/v2/task.py +222 -0
- fractal_server/app/routes/api/v2/task_collection.py +239 -0
- fractal_server/app/routes/api/v2/task_legacy.py +59 -0
- fractal_server/app/routes/api/v2/workflow.py +380 -0
- fractal_server/app/routes/api/v2/workflowtask.py +265 -0
- fractal_server/app/routes/aux/_job.py +2 -2
- fractal_server/app/runner/__init__.py +0 -364
- fractal_server/app/runner/async_wrap.py +27 -0
- fractal_server/app/runner/components.py +5 -0
- fractal_server/app/runner/exceptions.py +129 -0
- fractal_server/app/runner/executors/__init__.py +0 -0
- fractal_server/app/runner/executors/slurm/__init__.py +3 -0
- fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_check_jobs_status.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
- fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +32 -21
- fractal_server/app/runner/filenames.py +6 -0
- fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
- fractal_server/app/runner/task_files.py +103 -0
- fractal_server/app/runner/v1/__init__.py +366 -0
- fractal_server/app/runner/{_common.py → v1/_common.py} +14 -121
- fractal_server/app/runner/{_local → v1/_local}/__init__.py +5 -4
- fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
- fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
- fractal_server/app/runner/v1/_slurm/__init__.py +312 -0
- fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +5 -11
- fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
- fractal_server/app/runner/v1/common.py +117 -0
- fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
- fractal_server/app/runner/v2/__init__.py +336 -0
- fractal_server/app/runner/v2/_local/__init__.py +162 -0
- fractal_server/app/runner/v2/_local/_local_config.py +118 -0
- fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
- fractal_server/app/runner/v2/_local/executor.py +100 -0
- fractal_server/app/runner/{_slurm → v2/_slurm}/__init__.py +38 -47
- fractal_server/app/runner/v2/_slurm/_submit_setup.py +82 -0
- fractal_server/app/runner/v2/_slurm/get_slurm_config.py +182 -0
- fractal_server/app/runner/v2/deduplicate_list.py +23 -0
- fractal_server/app/runner/v2/handle_failed_job.py +165 -0
- fractal_server/app/runner/v2/merge_outputs.py +38 -0
- fractal_server/app/runner/v2/runner.py +343 -0
- fractal_server/app/runner/v2/runner_functions.py +374 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +130 -0
- fractal_server/app/runner/v2/task_interface.py +62 -0
- fractal_server/app/runner/v2/v1_compat.py +31 -0
- fractal_server/app/schemas/__init__.py +1 -42
- fractal_server/app/schemas/_validators.py +28 -5
- fractal_server/app/schemas/v1/__init__.py +36 -0
- fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
- fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
- fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
- fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
- fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
- fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
- fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
- fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
- fractal_server/app/schemas/v2/__init__.py +37 -0
- fractal_server/app/schemas/v2/dataset.py +126 -0
- fractal_server/app/schemas/v2/dumps.py +87 -0
- fractal_server/app/schemas/v2/job.py +114 -0
- fractal_server/app/schemas/v2/manifest.py +159 -0
- fractal_server/app/schemas/v2/project.py +34 -0
- fractal_server/app/schemas/v2/status.py +16 -0
- fractal_server/app/schemas/v2/task.py +151 -0
- fractal_server/app/schemas/v2/task_collection.py +109 -0
- fractal_server/app/schemas/v2/workflow.py +79 -0
- fractal_server/app/schemas/v2/workflowtask.py +208 -0
- fractal_server/config.py +5 -4
- fractal_server/images/__init__.py +4 -0
- fractal_server/images/models.py +136 -0
- fractal_server/images/tools.py +84 -0
- fractal_server/main.py +11 -3
- fractal_server/migrations/env.py +0 -2
- fractal_server/migrations/versions/5bf02391cfef_v2.py +245 -0
- fractal_server/tasks/__init__.py +0 -5
- fractal_server/tasks/endpoint_operations.py +13 -19
- fractal_server/tasks/utils.py +35 -0
- fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
- fractal_server/tasks/v1/__init__.py +0 -0
- fractal_server/tasks/{background_operations.py → v1/background_operations.py} +20 -52
- fractal_server/tasks/v1/get_collection_data.py +14 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
- fractal_server/tasks/v2/__init__.py +0 -0
- fractal_server/tasks/v2/background_operations.py +381 -0
- fractal_server/tasks/v2/get_collection_data.py +14 -0
- fractal_server/urls.py +13 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/METADATA +10 -10
- fractal_server-2.0.0.dist-info/RECORD +169 -0
- fractal_server/app/runner/_slurm/.gitignore +0 -2
- fractal_server/app/runner/common.py +0 -311
- fractal_server/app/schemas/json_schemas/manifest.json +0 -81
- fractal_server-1.4.10.dist-info/RECORD +0 -98
- /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
- /fractal_server/app/runner/{_local → v1/_local}/executor.py +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/LICENSE +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/WHEEL +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,100 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
6
|
+
#
|
7
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
8
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
9
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
10
|
+
# Zurich.
|
11
|
+
"""
|
12
|
+
Custom version of Python
|
13
|
+
[ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor)).
|
14
|
+
"""
|
15
|
+
from concurrent.futures import ThreadPoolExecutor
|
16
|
+
from typing import Callable
|
17
|
+
from typing import Iterable
|
18
|
+
from typing import Optional
|
19
|
+
from typing import Sequence
|
20
|
+
|
21
|
+
from ._local_config import get_default_local_backend_config
|
22
|
+
from ._local_config import LocalBackendConfig
|
23
|
+
|
24
|
+
|
25
|
+
class FractalThreadPoolExecutor(ThreadPoolExecutor):
|
26
|
+
"""
|
27
|
+
Custom version of
|
28
|
+
[ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor))
|
29
|
+
that overrides the `submit` and `map` methods
|
30
|
+
"""
|
31
|
+
|
32
|
+
def submit(
|
33
|
+
self,
|
34
|
+
*args,
|
35
|
+
local_backend_config: Optional[LocalBackendConfig] = None,
|
36
|
+
**kwargs,
|
37
|
+
):
|
38
|
+
"""
|
39
|
+
Compared to the `ThreadPoolExecutor` method, here we accept an addition
|
40
|
+
keyword argument (`local_backend_config`), which is then simply
|
41
|
+
ignored.
|
42
|
+
"""
|
43
|
+
return super().submit(*args, **kwargs)
|
44
|
+
|
45
|
+
def map(
|
46
|
+
self,
|
47
|
+
fn: Callable,
|
48
|
+
*iterables: Sequence[Iterable],
|
49
|
+
local_backend_config: Optional[LocalBackendConfig] = None,
|
50
|
+
):
|
51
|
+
"""
|
52
|
+
Custom version of the `Executor.map` method
|
53
|
+
|
54
|
+
The main change with the respect to the original `map` method is that
|
55
|
+
the list of tasks to be executed is split into chunks, and then
|
56
|
+
`super().map` is called (sequentially) on each chunk. The goal of this
|
57
|
+
change is to limit parallelism, e.g. due to limited computational
|
58
|
+
resources.
|
59
|
+
|
60
|
+
Other changes from the `concurrent.futures` `map` method:
|
61
|
+
|
62
|
+
1. Removed `timeout` argument;
|
63
|
+
2. Removed `chunksize`;
|
64
|
+
3. All iterators (both inputs and output ones) are transformed into
|
65
|
+
lists.
|
66
|
+
|
67
|
+
Args:
|
68
|
+
fn: A callable function.
|
69
|
+
iterables: The argument iterables (one iterable per argument of
|
70
|
+
`fn`).
|
71
|
+
local_backend_config: The backend configuration, needed to extract
|
72
|
+
`parallel_tasks_per_job`.
|
73
|
+
"""
|
74
|
+
|
75
|
+
# Preliminary check
|
76
|
+
iterable_lengths = [len(it) for it in iterables]
|
77
|
+
if not len(set(iterable_lengths)) == 1:
|
78
|
+
raise ValueError("Iterables have different lengths.")
|
79
|
+
|
80
|
+
# Set total number of arguments
|
81
|
+
n_elements = len(iterables[0])
|
82
|
+
|
83
|
+
# Set parallel_tasks_per_job
|
84
|
+
if local_backend_config is None:
|
85
|
+
local_backend_config = get_default_local_backend_config()
|
86
|
+
parallel_tasks_per_job = local_backend_config.parallel_tasks_per_job
|
87
|
+
if parallel_tasks_per_job is None:
|
88
|
+
parallel_tasks_per_job = n_elements
|
89
|
+
|
90
|
+
# Execute tasks, in chunks of size parallel_tasks_per_job
|
91
|
+
results = []
|
92
|
+
for ind_chunk in range(0, n_elements, parallel_tasks_per_job):
|
93
|
+
chunk_iterables = [
|
94
|
+
it[ind_chunk : ind_chunk + parallel_tasks_per_job] # noqa
|
95
|
+
for it in iterables
|
96
|
+
]
|
97
|
+
map_iter = super().map(fn, *chunk_iterables)
|
98
|
+
results.extend(list(map_iter))
|
99
|
+
|
100
|
+
return iter(results)
|
@@ -21,22 +21,24 @@ from typing import Any
|
|
21
21
|
from typing import Optional
|
22
22
|
from typing import Union
|
23
23
|
|
24
|
-
from
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
28
|
-
from
|
24
|
+
from ....models.v2 import DatasetV2
|
25
|
+
from ....models.v2 import WorkflowV2
|
26
|
+
from ...async_wrap import async_wrap
|
27
|
+
from ...executors.slurm.executor import FractalSlurmExecutor
|
28
|
+
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
29
|
+
from ..runner import execute_tasks_v2
|
29
30
|
from ._submit_setup import _slurm_submit_setup
|
30
|
-
|
31
|
+
|
32
|
+
# from .._common import execute_tasks
|
33
|
+
# from ..common import async_wrap
|
34
|
+
# from ..common import set_start_and_last_task_index
|
35
|
+
# from ..common import TaskParameters
|
31
36
|
|
32
37
|
|
33
38
|
def _process_workflow(
|
34
39
|
*,
|
35
|
-
workflow:
|
36
|
-
|
37
|
-
output_path: Path,
|
38
|
-
input_metadata: dict[str, Any],
|
39
|
-
input_history: list[dict[str, Any]],
|
40
|
+
workflow: WorkflowV2,
|
41
|
+
dataset: DatasetV2,
|
40
42
|
logger_name: str,
|
41
43
|
workflow_dir: Path,
|
42
44
|
workflow_dir_user: Path,
|
@@ -52,12 +54,13 @@ def _process_workflow(
|
|
52
54
|
|
53
55
|
This function initialises the a FractalSlurmExecutor, setting logging,
|
54
56
|
workflow working dir and user to impersonate. It then schedules the
|
55
|
-
workflow tasks and returns the
|
57
|
+
workflow tasks and returns the new dataset attributes
|
56
58
|
|
57
|
-
Cf.
|
59
|
+
Cf.
|
60
|
+
[process_workflow][fractal_server.app.runner.v2._local.process_workflow]
|
58
61
|
|
59
62
|
Returns:
|
60
|
-
|
63
|
+
new_dataset_attributes:
|
61
64
|
"""
|
62
65
|
|
63
66
|
if not slurm_user:
|
@@ -78,49 +81,40 @@ def _process_workflow(
|
|
78
81
|
common_script_lines=worker_init,
|
79
82
|
slurm_account=slurm_account,
|
80
83
|
) as executor:
|
81
|
-
|
82
|
-
|
83
|
-
task_list=workflow.task_list[
|
84
|
+
new_dataset_attributes = execute_tasks_v2(
|
85
|
+
wf_task_list=workflow.task_list[
|
84
86
|
first_task_index : (last_task_index + 1) # noqa
|
85
87
|
], # noqa
|
86
|
-
|
87
|
-
|
88
|
-
output_path=output_path,
|
89
|
-
metadata=input_metadata,
|
90
|
-
history=input_history,
|
91
|
-
),
|
88
|
+
dataset=dataset,
|
89
|
+
executor=executor,
|
92
90
|
workflow_dir=workflow_dir,
|
93
91
|
workflow_dir_user=workflow_dir_user,
|
94
|
-
submit_setup_call=_slurm_submit_setup,
|
95
92
|
logger_name=logger_name,
|
93
|
+
submit_setup_call=_slurm_submit_setup,
|
96
94
|
)
|
97
|
-
|
98
|
-
metadata=output_task_pars.metadata, history=output_task_pars.history
|
99
|
-
)
|
100
|
-
return output_dataset_metadata_history
|
95
|
+
return new_dataset_attributes
|
101
96
|
|
102
97
|
|
103
98
|
async def process_workflow(
|
104
99
|
*,
|
105
|
-
workflow:
|
106
|
-
|
107
|
-
output_path: Path,
|
108
|
-
input_metadata: dict[str, Any],
|
109
|
-
input_history: list[dict[str, Any]],
|
110
|
-
logger_name: str,
|
100
|
+
workflow: WorkflowV2,
|
101
|
+
dataset: DatasetV2,
|
111
102
|
workflow_dir: Path,
|
112
103
|
workflow_dir_user: Optional[Path] = None,
|
104
|
+
first_task_index: Optional[int] = None,
|
105
|
+
last_task_index: Optional[int] = None,
|
106
|
+
logger_name: str,
|
107
|
+
# Slurm-specific
|
113
108
|
user_cache_dir: Optional[str] = None,
|
114
109
|
slurm_user: Optional[str] = None,
|
115
110
|
slurm_account: Optional[str] = None,
|
116
111
|
worker_init: Optional[str] = None,
|
117
|
-
|
118
|
-
last_task_index: Optional[int] = None,
|
119
|
-
) -> dict[str, Any]:
|
112
|
+
) -> dict:
|
120
113
|
"""
|
121
114
|
Process workflow (SLURM backend public interface)
|
122
115
|
|
123
|
-
Cf.
|
116
|
+
Cf.
|
117
|
+
[process_workflow][fractal_server.app.runner.v2._local.process_workflow]
|
124
118
|
"""
|
125
119
|
|
126
120
|
# Set values of first_task_index and last_task_index
|
@@ -131,20 +125,17 @@ async def process_workflow(
|
|
131
125
|
last_task_index=last_task_index,
|
132
126
|
)
|
133
127
|
|
134
|
-
|
128
|
+
new_dataset_attributes = await async_wrap(_process_workflow)(
|
135
129
|
workflow=workflow,
|
136
|
-
|
137
|
-
output_path=output_path,
|
138
|
-
input_metadata=input_metadata,
|
139
|
-
input_history=input_history,
|
130
|
+
dataset=dataset,
|
140
131
|
logger_name=logger_name,
|
141
132
|
workflow_dir=workflow_dir,
|
142
133
|
workflow_dir_user=workflow_dir_user,
|
134
|
+
first_task_index=first_task_index,
|
135
|
+
last_task_index=last_task_index,
|
136
|
+
user_cache_dir=user_cache_dir,
|
143
137
|
slurm_user=slurm_user,
|
144
138
|
slurm_account=slurm_account,
|
145
|
-
user_cache_dir=user_cache_dir,
|
146
139
|
worker_init=worker_init,
|
147
|
-
first_task_index=first_task_index,
|
148
|
-
last_task_index=last_task_index,
|
149
140
|
)
|
150
|
-
return
|
141
|
+
return new_dataset_attributes
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
6
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
7
|
+
#
|
8
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
9
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
10
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
11
|
+
# Zurich.
|
12
|
+
"""
|
13
|
+
Submodule to define _slurm_submit_setup, which is also the reference
|
14
|
+
implementation of `submit_setup_call`.
|
15
|
+
"""
|
16
|
+
from pathlib import Path
|
17
|
+
from typing import Literal
|
18
|
+
|
19
|
+
from ...task_files import get_task_file_paths
|
20
|
+
from .get_slurm_config import get_slurm_config
|
21
|
+
from fractal_server.app.models.v2 import WorkflowTaskV2
|
22
|
+
|
23
|
+
|
24
|
+
def _slurm_submit_setup(
|
25
|
+
*,
|
26
|
+
wftask: WorkflowTaskV2,
|
27
|
+
workflow_dir: Path,
|
28
|
+
workflow_dir_user: Path,
|
29
|
+
which_type: Literal["non_parallel", "parallel"],
|
30
|
+
) -> dict[str, object]:
|
31
|
+
"""
|
32
|
+
Collect WorfklowTask-specific configuration parameters from different
|
33
|
+
sources, and inject them for execution.
|
34
|
+
|
35
|
+
Here goes all the logic for reading attributes from the appropriate sources
|
36
|
+
and transforming them into an appropriate `SlurmConfig` object (encoding
|
37
|
+
SLURM configuration) and `TaskFiles` object (with details e.g. about file
|
38
|
+
paths or filename prefixes).
|
39
|
+
|
40
|
+
For now, this is the reference implementation for the argument
|
41
|
+
`submit_setup_call` of
|
42
|
+
[fractal_server.app.runner.v2.runner][].
|
43
|
+
|
44
|
+
Arguments:
|
45
|
+
wftask:
|
46
|
+
WorkflowTask for which the configuration is to be assembled
|
47
|
+
workflow_dir:
|
48
|
+
Server-owned directory to store all task-execution-related relevant
|
49
|
+
files (inputs, outputs, errors, and all meta files related to the
|
50
|
+
job execution). Note: users cannot write directly to this folder.
|
51
|
+
workflow_dir_user:
|
52
|
+
User-side directory with the same scope as `workflow_dir`, and
|
53
|
+
where a user can write.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
submit_setup_dict:
|
57
|
+
A dictionary that will be passed on to
|
58
|
+
`FractalSlurmExecutor.submit` and `FractalSlurmExecutor.map`, so
|
59
|
+
as to set extra options.
|
60
|
+
"""
|
61
|
+
|
62
|
+
# Get SlurmConfig object
|
63
|
+
slurm_config = get_slurm_config(
|
64
|
+
wftask=wftask,
|
65
|
+
workflow_dir=workflow_dir,
|
66
|
+
workflow_dir_user=workflow_dir_user,
|
67
|
+
which_type=which_type,
|
68
|
+
)
|
69
|
+
|
70
|
+
# Get TaskFiles object
|
71
|
+
task_files = get_task_file_paths(
|
72
|
+
workflow_dir=workflow_dir,
|
73
|
+
workflow_dir_user=workflow_dir_user,
|
74
|
+
task_order=wftask.order,
|
75
|
+
)
|
76
|
+
|
77
|
+
# Prepare and return output dictionary
|
78
|
+
submit_setup_dict = dict(
|
79
|
+
slurm_config=slurm_config,
|
80
|
+
task_files=task_files,
|
81
|
+
)
|
82
|
+
return submit_setup_dict
|
@@ -0,0 +1,182 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Literal
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from fractal_server.app.models.v2 import WorkflowTaskV2
|
6
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
7
|
+
_parse_mem_value,
|
8
|
+
)
|
9
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
10
|
+
load_slurm_config_file,
|
11
|
+
)
|
12
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import logger
|
13
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import SlurmConfig
|
14
|
+
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
15
|
+
SlurmConfigError,
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
def get_slurm_config(
|
20
|
+
wftask: WorkflowTaskV2,
|
21
|
+
workflow_dir: Path,
|
22
|
+
workflow_dir_user: Path,
|
23
|
+
which_type: Literal["non_parallel", "parallel"],
|
24
|
+
config_path: Optional[Path] = None,
|
25
|
+
) -> SlurmConfig:
|
26
|
+
"""
|
27
|
+
Prepare a `SlurmConfig` configuration object
|
28
|
+
|
29
|
+
The argument `which_type` determines whether we use `wftask.meta_parallel`
|
30
|
+
or `wftask.meta_non_parallel`. In the following descritpion, let us assume
|
31
|
+
that `which_type="parallel"`.
|
32
|
+
|
33
|
+
The sources for `SlurmConfig` attributes, in increasing priority order, are
|
34
|
+
|
35
|
+
1. The general content of the Fractal SLURM configuration file.
|
36
|
+
2. The GPU-specific content of the Fractal SLURM configuration file, if
|
37
|
+
appropriate.
|
38
|
+
3. Properties in `wftask.meta_parallel` (which typically include those in
|
39
|
+
`wftask.task.meta_parallel`). Note that `wftask.meta_parallel` may be
|
40
|
+
`None`.
|
41
|
+
|
42
|
+
Arguments:
|
43
|
+
wftask:
|
44
|
+
WorkflowTask for which the SLURM configuration is is to be
|
45
|
+
prepared.
|
46
|
+
workflow_dir:
|
47
|
+
Server-owned directory to store all task-execution-related relevant
|
48
|
+
files (inputs, outputs, errors, and all meta files related to the
|
49
|
+
job execution). Note: users cannot write directly to this folder.
|
50
|
+
workflow_dir_user:
|
51
|
+
User-side directory with the same scope as `workflow_dir`, and
|
52
|
+
where a user can write.
|
53
|
+
config_path:
|
54
|
+
Path of a Fractal SLURM configuration file; if `None`, use
|
55
|
+
`FRACTAL_SLURM_CONFIG_FILE` variable from settings.
|
56
|
+
which_type:
|
57
|
+
Determines whether to use `meta_parallel` or `meta_non_parallel`.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
slurm_config:
|
61
|
+
The SlurmConfig object
|
62
|
+
"""
|
63
|
+
|
64
|
+
if which_type == "non_parallel":
|
65
|
+
wftask_meta = wftask.meta_non_parallel
|
66
|
+
elif which_type == "parallel":
|
67
|
+
wftask_meta = wftask.meta_parallel
|
68
|
+
else:
|
69
|
+
raise ValueError(
|
70
|
+
f"get_slurm_config received invalid argument {which_type=}."
|
71
|
+
)
|
72
|
+
|
73
|
+
logger.debug(
|
74
|
+
"[get_slurm_config] WorkflowTask meta attribute: {wftask_meta=}"
|
75
|
+
)
|
76
|
+
|
77
|
+
# Incorporate slurm_env.default_slurm_config
|
78
|
+
slurm_env = load_slurm_config_file(config_path=config_path)
|
79
|
+
slurm_dict = slurm_env.default_slurm_config.dict(
|
80
|
+
exclude_unset=True, exclude={"mem"}
|
81
|
+
)
|
82
|
+
if slurm_env.default_slurm_config.mem:
|
83
|
+
slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
|
84
|
+
|
85
|
+
# Incorporate slurm_env.batching_config
|
86
|
+
for key, value in slurm_env.batching_config.dict().items():
|
87
|
+
slurm_dict[key] = value
|
88
|
+
|
89
|
+
# Incorporate slurm_env.user_local_exports
|
90
|
+
slurm_dict["user_local_exports"] = slurm_env.user_local_exports
|
91
|
+
|
92
|
+
logger.debug(
|
93
|
+
"[get_slurm_config] Fractal SLURM configuration file: "
|
94
|
+
f"{slurm_env.dict()=}"
|
95
|
+
)
|
96
|
+
|
97
|
+
# GPU-related options
|
98
|
+
# Notes about priority:
|
99
|
+
# 1. This block of definitions takes priority over other definitions from
|
100
|
+
# slurm_env which are not under the `needs_gpu` subgroup
|
101
|
+
# 2. This block of definitions has lower priority than whatever comes next
|
102
|
+
# (i.e. from WorkflowTask.meta).
|
103
|
+
if wftask_meta is not None:
|
104
|
+
needs_gpu = wftask_meta.get("needs_gpu", False)
|
105
|
+
else:
|
106
|
+
needs_gpu = False
|
107
|
+
logger.debug(f"[get_slurm_config] {needs_gpu=}")
|
108
|
+
if needs_gpu:
|
109
|
+
for key, value in slurm_env.gpu_slurm_config.dict(
|
110
|
+
exclude_unset=True, exclude={"mem"}
|
111
|
+
).items():
|
112
|
+
slurm_dict[key] = value
|
113
|
+
if slurm_env.gpu_slurm_config.mem:
|
114
|
+
slurm_dict["mem_per_task_MB"] = slurm_env.gpu_slurm_config.mem
|
115
|
+
|
116
|
+
# Number of CPUs per task, for multithreading
|
117
|
+
if wftask_meta is not None and "cpus_per_task" in wftask_meta:
|
118
|
+
cpus_per_task = int(wftask_meta["cpus_per_task"])
|
119
|
+
slurm_dict["cpus_per_task"] = cpus_per_task
|
120
|
+
|
121
|
+
# Required memory per task, in MB
|
122
|
+
if wftask_meta is not None and "mem" in wftask_meta:
|
123
|
+
raw_mem = wftask_meta["mem"]
|
124
|
+
mem_per_task_MB = _parse_mem_value(raw_mem)
|
125
|
+
slurm_dict["mem_per_task_MB"] = mem_per_task_MB
|
126
|
+
|
127
|
+
# Job name
|
128
|
+
if wftask.is_legacy_task:
|
129
|
+
job_name = wftask.task_legacy.name.replace(" ", "_")
|
130
|
+
else:
|
131
|
+
job_name = wftask.task.name.replace(" ", "_")
|
132
|
+
slurm_dict["job_name"] = job_name
|
133
|
+
|
134
|
+
# Optional SLURM arguments and extra lines
|
135
|
+
if wftask_meta is not None:
|
136
|
+
account = wftask_meta.get("account", None)
|
137
|
+
if account is not None:
|
138
|
+
error_msg = (
|
139
|
+
f"Invalid {account=} property in WorkflowTask `meta` "
|
140
|
+
"attribute.\n"
|
141
|
+
"SLURM account must be set in the request body of the "
|
142
|
+
"apply-workflow endpoint, or by modifying the user properties."
|
143
|
+
)
|
144
|
+
logger.error(error_msg)
|
145
|
+
raise SlurmConfigError(error_msg)
|
146
|
+
for key in ["time", "gres", "constraint"]:
|
147
|
+
value = wftask_meta.get(key, None)
|
148
|
+
if value:
|
149
|
+
slurm_dict[key] = value
|
150
|
+
if wftask_meta is not None:
|
151
|
+
extra_lines = wftask_meta.get("extra_lines", [])
|
152
|
+
else:
|
153
|
+
extra_lines = []
|
154
|
+
extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
|
155
|
+
if len(set(extra_lines)) != len(extra_lines):
|
156
|
+
logger.debug(
|
157
|
+
"[get_slurm_config] Removing repeated elements "
|
158
|
+
f"from {extra_lines=}."
|
159
|
+
)
|
160
|
+
extra_lines = list(set(extra_lines))
|
161
|
+
slurm_dict["extra_lines"] = extra_lines
|
162
|
+
|
163
|
+
# Job-batching parameters (if None, they will be determined heuristically)
|
164
|
+
if wftask_meta is not None:
|
165
|
+
tasks_per_job = wftask_meta.get("tasks_per_job", None)
|
166
|
+
parallel_tasks_per_job = wftask_meta.get(
|
167
|
+
"parallel_tasks_per_job", None
|
168
|
+
)
|
169
|
+
else:
|
170
|
+
tasks_per_job = None
|
171
|
+
parallel_tasks_per_job = None
|
172
|
+
slurm_dict["tasks_per_job"] = tasks_per_job
|
173
|
+
slurm_dict["parallel_tasks_per_job"] = parallel_tasks_per_job
|
174
|
+
|
175
|
+
# Put everything together
|
176
|
+
logger.debug(
|
177
|
+
"[get_slurm_config] Now create a SlurmConfig object based "
|
178
|
+
f"on {slurm_dict=}"
|
179
|
+
)
|
180
|
+
slurm_config = SlurmConfig(**slurm_dict)
|
181
|
+
|
182
|
+
return slurm_config
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from typing import TypeVar
|
2
|
+
|
3
|
+
from ....images import SingleImage
|
4
|
+
from ....images import SingleImageTaskOutput
|
5
|
+
from .task_interface import InitArgsModel
|
6
|
+
|
7
|
+
T = TypeVar("T", SingleImage, SingleImageTaskOutput, InitArgsModel)
|
8
|
+
|
9
|
+
|
10
|
+
def deduplicate_list(
|
11
|
+
this_list: list[T],
|
12
|
+
) -> list[T]:
|
13
|
+
"""
|
14
|
+
Custom replacement for `set(this_list)`, when items are non-hashable.
|
15
|
+
"""
|
16
|
+
new_list_dict = []
|
17
|
+
new_list_objs = []
|
18
|
+
for this_obj in this_list:
|
19
|
+
this_dict = this_obj.dict()
|
20
|
+
if this_dict not in new_list_dict:
|
21
|
+
new_list_dict.append(this_dict)
|
22
|
+
new_list_objs.append(this_obj)
|
23
|
+
return new_list_objs
|
@@ -0,0 +1,165 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
6
|
+
# Marco Franzon <marco.franzon@exact-lab.it>
|
7
|
+
#
|
8
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
9
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
10
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
11
|
+
# Zurich.
|
12
|
+
"""
|
13
|
+
Helper functions to handle Dataset history.
|
14
|
+
"""
|
15
|
+
import json
|
16
|
+
import logging
|
17
|
+
from pathlib import Path
|
18
|
+
from typing import Any
|
19
|
+
from typing import Optional
|
20
|
+
|
21
|
+
from ...models.v2 import DatasetV2
|
22
|
+
from ...models.v2 import JobV2
|
23
|
+
from ...models.v2 import WorkflowTaskV2
|
24
|
+
from ...models.v2 import WorkflowV2
|
25
|
+
from ...schemas.v2 import WorkflowTaskStatusTypeV2
|
26
|
+
from ..filenames import FILTERS_FILENAME
|
27
|
+
from ..filenames import HISTORY_FILENAME
|
28
|
+
from ..filenames import IMAGES_FILENAME
|
29
|
+
|
30
|
+
|
31
|
+
def assemble_history_failed_job(
|
32
|
+
job: JobV2,
|
33
|
+
dataset: DatasetV2,
|
34
|
+
workflow: WorkflowV2,
|
35
|
+
logger_name: Optional[str] = None,
|
36
|
+
failed_wftask: Optional[WorkflowTaskV2] = None,
|
37
|
+
) -> list[dict[str, Any]]:
|
38
|
+
"""
|
39
|
+
Assemble `history` after a workflow-execution job fails.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
job:
|
43
|
+
The failed `JobV2` object.
|
44
|
+
dataset:
|
45
|
+
The `DatasetV2` object associated to `job`.
|
46
|
+
workflow:
|
47
|
+
The `WorkflowV2` object associated to `job`.
|
48
|
+
logger_name: A logger name.
|
49
|
+
failed_wftask:
|
50
|
+
If set, append it to `history` during step 3; if `None`, infer
|
51
|
+
it by comparing the job task list and the one in
|
52
|
+
`HISTORY_FILENAME`.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
The new value of `history`, to be merged into
|
56
|
+
`dataset.meta`.
|
57
|
+
"""
|
58
|
+
|
59
|
+
logger = logging.getLogger(logger_name)
|
60
|
+
|
61
|
+
# The final value of the history attribute should include up to three
|
62
|
+
# parts, coming from: the database, the temporary file, the failed-task
|
63
|
+
# information.
|
64
|
+
|
65
|
+
# Part 1: Read exising history from DB
|
66
|
+
new_history = dataset.history
|
67
|
+
|
68
|
+
# Part 2: Extend history based on temporary-file contents
|
69
|
+
tmp_history_file = Path(job.working_dir) / HISTORY_FILENAME
|
70
|
+
try:
|
71
|
+
with tmp_history_file.open("r") as f:
|
72
|
+
tmp_file_history = json.load(f)
|
73
|
+
new_history.extend(tmp_file_history)
|
74
|
+
except FileNotFoundError:
|
75
|
+
tmp_file_history = []
|
76
|
+
|
77
|
+
# Part 3/A: Identify failed task, if needed
|
78
|
+
if failed_wftask is None:
|
79
|
+
job_wftasks = workflow.task_list[
|
80
|
+
job.first_task_index : (job.last_task_index + 1) # noqa
|
81
|
+
]
|
82
|
+
tmp_file_wftasks = [
|
83
|
+
history_item["workflowtask"] for history_item in tmp_file_history
|
84
|
+
]
|
85
|
+
if len(job_wftasks) <= len(tmp_file_wftasks):
|
86
|
+
n_tasks_job = len(job_wftasks)
|
87
|
+
n_tasks_tmp = len(tmp_file_wftasks)
|
88
|
+
logger.error(
|
89
|
+
"Cannot identify the failed task based on job task list "
|
90
|
+
f"(length {n_tasks_job}) and temporary-file task list "
|
91
|
+
f"(length {n_tasks_tmp})."
|
92
|
+
)
|
93
|
+
logger.error("Failed task not appended to history.")
|
94
|
+
else:
|
95
|
+
failed_wftask = job_wftasks[len(tmp_file_wftasks)]
|
96
|
+
|
97
|
+
# Part 3/B: Append failed task to history
|
98
|
+
if failed_wftask is not None:
|
99
|
+
failed_wftask_dump = failed_wftask.model_dump(
|
100
|
+
exclude={"task", "task_legacy"}
|
101
|
+
)
|
102
|
+
if failed_wftask.is_legacy_task:
|
103
|
+
failed_wftask_dump[
|
104
|
+
"task_legacy"
|
105
|
+
] = failed_wftask.task_legacy.model_dump()
|
106
|
+
else:
|
107
|
+
failed_wftask_dump["task"] = failed_wftask.task.model_dump()
|
108
|
+
new_history_item = dict(
|
109
|
+
workflowtask=failed_wftask_dump,
|
110
|
+
status=WorkflowTaskStatusTypeV2.FAILED,
|
111
|
+
parallelization=dict(), # FIXME: re-include parallelization
|
112
|
+
)
|
113
|
+
new_history.append(new_history_item)
|
114
|
+
|
115
|
+
return new_history
|
116
|
+
|
117
|
+
|
118
|
+
def assemble_images_failed_job(job: JobV2) -> Optional[dict[str, Any]]:
|
119
|
+
"""
|
120
|
+
Assemble `DatasetV2.images` for a failed workflow-execution.
|
121
|
+
|
122
|
+
Assemble new value of `images` based on the last successful task, i.e.
|
123
|
+
based on the content of the temporary `IMAGES_FILENAME` file. If the file
|
124
|
+
is missing, return `None`.
|
125
|
+
|
126
|
+
Argumentss:
|
127
|
+
job:
|
128
|
+
The failed `JobV2` object.
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
The new value of `dataset.images`, or `None` if `IMAGES_FILENAME`
|
132
|
+
is missing.
|
133
|
+
"""
|
134
|
+
tmp_file = Path(job.working_dir) / IMAGES_FILENAME
|
135
|
+
try:
|
136
|
+
with tmp_file.open("r") as f:
|
137
|
+
new_images = json.load(f)
|
138
|
+
return new_images
|
139
|
+
except FileNotFoundError:
|
140
|
+
return None
|
141
|
+
|
142
|
+
|
143
|
+
def assemble_filters_failed_job(job: JobV2) -> Optional[dict[str, Any]]:
|
144
|
+
"""
|
145
|
+
Assemble `DatasetV2.filters` for a failed workflow-execution.
|
146
|
+
|
147
|
+
Assemble new value of `filters` based on the last successful task, i.e.
|
148
|
+
based on the content of the temporary `FILTERS_FILENAME` file. If the file
|
149
|
+
is missing, return `None`.
|
150
|
+
|
151
|
+
Argumentss:
|
152
|
+
job:
|
153
|
+
The failed `JobV2` object.
|
154
|
+
|
155
|
+
Returns:
|
156
|
+
The new value of `dataset.filters`, or `None` if `FILTERS_FILENAME`
|
157
|
+
is missing.
|
158
|
+
"""
|
159
|
+
tmp_file = Path(job.working_dir) / FILTERS_FILENAME
|
160
|
+
try:
|
161
|
+
with tmp_file.open("r") as f:
|
162
|
+
new_filters = json.load(f)
|
163
|
+
return new_filters
|
164
|
+
except FileNotFoundError:
|
165
|
+
return None
|