fractal-server 2.13.1__py3-none-any.whl → 2.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/__main__.py +3 -1
- fractal_server/app/models/linkusergroup.py +6 -2
- fractal_server/app/models/v2/__init__.py +7 -1
- fractal_server/app/models/v2/dataset.py +1 -11
- fractal_server/app/models/v2/history.py +78 -0
- fractal_server/app/models/v2/job.py +10 -3
- fractal_server/app/models/v2/task_group.py +2 -2
- fractal_server/app/models/v2/workflow.py +1 -1
- fractal_server/app/models/v2/workflowtask.py +1 -1
- fractal_server/app/routes/admin/v2/accounting.py +18 -28
- fractal_server/app/routes/admin/v2/task.py +1 -1
- fractal_server/app/routes/admin/v2/task_group.py +0 -17
- fractal_server/app/routes/api/__init__.py +1 -1
- fractal_server/app/routes/api/v2/__init__.py +8 -2
- fractal_server/app/routes/api/v2/_aux_functions.py +66 -0
- fractal_server/app/routes/api/v2/_aux_functions_history.py +166 -0
- fractal_server/app/routes/api/v2/dataset.py +0 -17
- fractal_server/app/routes/api/v2/history.py +544 -0
- fractal_server/app/routes/api/v2/images.py +31 -43
- fractal_server/app/routes/api/v2/job.py +30 -0
- fractal_server/app/routes/api/v2/project.py +1 -53
- fractal_server/app/routes/api/v2/{status.py → status_legacy.py} +6 -6
- fractal_server/app/routes/api/v2/submit.py +16 -14
- fractal_server/app/routes/api/v2/task.py +3 -10
- fractal_server/app/routes/api/v2/task_collection_custom.py +4 -9
- fractal_server/app/routes/api/v2/task_group.py +0 -17
- fractal_server/app/routes/api/v2/verify_image_types.py +61 -0
- fractal_server/app/routes/api/v2/workflow.py +28 -69
- fractal_server/app/routes/api/v2/workflowtask.py +53 -50
- fractal_server/app/routes/auth/group.py +0 -16
- fractal_server/app/routes/auth/oauth.py +5 -3
- fractal_server/app/routes/pagination.py +47 -0
- fractal_server/app/runner/components.py +0 -3
- fractal_server/app/runner/compress_folder.py +57 -29
- fractal_server/app/runner/exceptions.py +4 -0
- fractal_server/app/runner/executors/base_runner.py +157 -0
- fractal_server/app/runner/{v2/_local/_local_config.py → executors/local/get_local_config.py} +7 -9
- fractal_server/app/runner/executors/local/runner.py +248 -0
- fractal_server/app/runner/executors/{slurm → slurm_common}/_batching.py +1 -1
- fractal_server/app/runner/executors/{slurm → slurm_common}/_slurm_config.py +9 -7
- fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py +868 -0
- fractal_server/app/runner/{v2/_slurm_common → executors/slurm_common}/get_slurm_config.py +48 -17
- fractal_server/app/runner/executors/{slurm → slurm_common}/remote.py +36 -47
- fractal_server/app/runner/executors/slurm_common/slurm_job_task_models.py +134 -0
- fractal_server/app/runner/executors/slurm_ssh/runner.py +268 -0
- fractal_server/app/runner/executors/slurm_sudo/__init__.py +0 -0
- fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_subprocess_run_as_user.py +2 -83
- fractal_server/app/runner/executors/slurm_sudo/runner.py +193 -0
- fractal_server/app/runner/extract_archive.py +1 -3
- fractal_server/app/runner/task_files.py +134 -87
- fractal_server/app/runner/v2/__init__.py +0 -399
- fractal_server/app/runner/v2/_local.py +88 -0
- fractal_server/app/runner/v2/{_slurm_ssh/__init__.py → _slurm_ssh.py} +20 -19
- fractal_server/app/runner/v2/{_slurm_sudo/__init__.py → _slurm_sudo.py} +17 -15
- fractal_server/app/runner/v2/db_tools.py +119 -0
- fractal_server/app/runner/v2/runner.py +206 -95
- fractal_server/app/runner/v2/runner_functions.py +488 -187
- fractal_server/app/runner/v2/runner_functions_low_level.py +40 -43
- fractal_server/app/runner/v2/submit_workflow.py +358 -0
- fractal_server/app/runner/v2/task_interface.py +31 -0
- fractal_server/app/schemas/_validators.py +13 -24
- fractal_server/app/schemas/user.py +10 -7
- fractal_server/app/schemas/user_settings.py +9 -21
- fractal_server/app/schemas/v2/__init__.py +9 -1
- fractal_server/app/schemas/v2/dataset.py +12 -94
- fractal_server/app/schemas/v2/dumps.py +26 -9
- fractal_server/app/schemas/v2/history.py +80 -0
- fractal_server/app/schemas/v2/job.py +15 -8
- fractal_server/app/schemas/v2/manifest.py +14 -7
- fractal_server/app/schemas/v2/project.py +9 -7
- fractal_server/app/schemas/v2/status_legacy.py +35 -0
- fractal_server/app/schemas/v2/task.py +72 -77
- fractal_server/app/schemas/v2/task_collection.py +14 -32
- fractal_server/app/schemas/v2/task_group.py +10 -9
- fractal_server/app/schemas/v2/workflow.py +10 -11
- fractal_server/app/schemas/v2/workflowtask.py +2 -21
- fractal_server/app/security/__init__.py +3 -3
- fractal_server/app/security/signup_email.py +2 -2
- fractal_server/config.py +41 -46
- fractal_server/images/tools.py +23 -0
- fractal_server/migrations/versions/47351f8c7ebc_drop_dataset_filters.py +50 -0
- fractal_server/migrations/versions/9db60297b8b2_set_ondelete.py +250 -0
- fractal_server/migrations/versions/c90a7c76e996_job_id_in_history_run.py +41 -0
- fractal_server/migrations/versions/e81103413827_add_job_type_filters.py +36 -0
- fractal_server/migrations/versions/f37aceb45062_make_historyunit_logfile_required.py +39 -0
- fractal_server/migrations/versions/fbce16ff4e47_new_history_items.py +120 -0
- fractal_server/ssh/_fabric.py +28 -14
- fractal_server/tasks/v2/local/collect.py +2 -2
- fractal_server/tasks/v2/ssh/collect.py +2 -2
- fractal_server/tasks/v2/templates/2_pip_install.sh +1 -1
- fractal_server/tasks/v2/templates/4_pip_show.sh +1 -1
- fractal_server/tasks/v2/utils_background.py +0 -19
- fractal_server/tasks/v2/utils_database.py +30 -17
- fractal_server/tasks/v2/utils_templates.py +6 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/METADATA +4 -4
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/RECORD +106 -96
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/WHEEL +1 -1
- fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +0 -126
- fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +0 -116
- fractal_server/app/runner/executors/slurm/ssh/executor.py +0 -1386
- fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py +0 -71
- fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py +0 -130
- fractal_server/app/runner/executors/slurm/sudo/executor.py +0 -1281
- fractal_server/app/runner/v2/_local/__init__.py +0 -132
- fractal_server/app/runner/v2/_local/_submit_setup.py +0 -52
- fractal_server/app/runner/v2/_local/executor.py +0 -100
- fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py +0 -83
- fractal_server/app/runner/v2/_slurm_sudo/_submit_setup.py +0 -83
- fractal_server/app/runner/v2/handle_failed_job.py +0 -59
- fractal_server/app/schemas/v2/status.py +0 -16
- /fractal_server/app/{runner/executors/slurm → history}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{slurm/ssh → local}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{slurm/sudo → slurm_common}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{_job_states.py → slurm_common/_job_states.py} +0 -0
- /fractal_server/app/runner/executors/{slurm → slurm_common}/utils_executors.py +0 -0
- /fractal_server/app/runner/{v2/_slurm_common → executors/slurm_ssh}/__init__.py +0 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/LICENSE +0 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,248 @@
|
|
1
|
+
from concurrent.futures import Future
|
2
|
+
from concurrent.futures import ThreadPoolExecutor
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Any
|
5
|
+
from typing import Literal
|
6
|
+
|
7
|
+
from .get_local_config import LocalBackendConfig
|
8
|
+
from fractal_server.app.db import get_sync_db
|
9
|
+
from fractal_server.app.runner.exceptions import TaskExecutionError
|
10
|
+
from fractal_server.app.runner.executors.base_runner import BaseRunner
|
11
|
+
from fractal_server.app.runner.task_files import TaskFiles
|
12
|
+
from fractal_server.app.runner.v2.db_tools import (
|
13
|
+
bulk_update_status_of_history_unit,
|
14
|
+
)
|
15
|
+
from fractal_server.app.runner.v2.db_tools import update_status_of_history_unit
|
16
|
+
from fractal_server.app.schemas.v2 import HistoryUnitStatus
|
17
|
+
from fractal_server.logger import set_logger
|
18
|
+
|
19
|
+
|
20
|
+
logger = set_logger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
class LocalRunner(BaseRunner):
|
24
|
+
executor: ThreadPoolExecutor
|
25
|
+
root_dir_local: Path
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
root_dir_local: Path,
|
30
|
+
):
|
31
|
+
self.root_dir_local = root_dir_local
|
32
|
+
self.root_dir_local.mkdir(parents=True, exist_ok=True)
|
33
|
+
self.executor = ThreadPoolExecutor()
|
34
|
+
logger.debug("Create LocalRunner")
|
35
|
+
|
36
|
+
def __enter__(self):
|
37
|
+
logger.debug("Enter LocalRunner")
|
38
|
+
return self
|
39
|
+
|
40
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
41
|
+
logger.debug("Exit LocalRunner")
|
42
|
+
self.executor.shutdown(
|
43
|
+
wait=False,
|
44
|
+
cancel_futures=True,
|
45
|
+
)
|
46
|
+
return self.executor.__exit__(exc_type, exc_val, exc_tb)
|
47
|
+
|
48
|
+
def submit(
|
49
|
+
self,
|
50
|
+
func: callable,
|
51
|
+
parameters: dict[str, Any],
|
52
|
+
history_unit_id: int,
|
53
|
+
task_files: TaskFiles,
|
54
|
+
task_type: Literal[
|
55
|
+
"non_parallel",
|
56
|
+
"converter_non_parallel",
|
57
|
+
"compound",
|
58
|
+
"converter_compound",
|
59
|
+
],
|
60
|
+
config: LocalBackendConfig,
|
61
|
+
user_id: int,
|
62
|
+
) -> tuple[Any, Exception]:
|
63
|
+
logger.debug("[submit] START")
|
64
|
+
|
65
|
+
try:
|
66
|
+
self.validate_submit_parameters(parameters, task_type=task_type)
|
67
|
+
workdir_local = task_files.wftask_subfolder_local
|
68
|
+
workdir_local.mkdir()
|
69
|
+
|
70
|
+
# SUBMISSION PHASE
|
71
|
+
future = self.executor.submit(
|
72
|
+
func,
|
73
|
+
parameters=parameters,
|
74
|
+
remote_files=task_files.remote_files_dict,
|
75
|
+
)
|
76
|
+
except Exception as e:
|
77
|
+
logger.error(
|
78
|
+
"[submit] Unexpected exception during submission. "
|
79
|
+
f"Original error {str(e)}"
|
80
|
+
)
|
81
|
+
result = None
|
82
|
+
exception = TaskExecutionError(str(e))
|
83
|
+
with next(get_sync_db()) as db:
|
84
|
+
update_status_of_history_unit(
|
85
|
+
history_unit_id=history_unit_id,
|
86
|
+
status=HistoryUnitStatus.FAILED,
|
87
|
+
db_sync=db,
|
88
|
+
)
|
89
|
+
return None, exception
|
90
|
+
|
91
|
+
# RETRIEVAL PHASE
|
92
|
+
with next(get_sync_db()) as db:
|
93
|
+
try:
|
94
|
+
result = future.result()
|
95
|
+
logger.debug("[submit] END with result")
|
96
|
+
if task_type not in ["compound", "converter_compound"]:
|
97
|
+
update_status_of_history_unit(
|
98
|
+
history_unit_id=history_unit_id,
|
99
|
+
status=HistoryUnitStatus.DONE,
|
100
|
+
db_sync=db,
|
101
|
+
)
|
102
|
+
return result, None
|
103
|
+
except Exception as e:
|
104
|
+
logger.debug("[submit] END with exception")
|
105
|
+
update_status_of_history_unit(
|
106
|
+
history_unit_id=history_unit_id,
|
107
|
+
status=HistoryUnitStatus.FAILED,
|
108
|
+
db_sync=db,
|
109
|
+
)
|
110
|
+
return None, TaskExecutionError(str(e))
|
111
|
+
|
112
|
+
def multisubmit(
|
113
|
+
self,
|
114
|
+
func: callable,
|
115
|
+
list_parameters: list[dict],
|
116
|
+
history_unit_ids: list[int],
|
117
|
+
list_task_files: list[TaskFiles],
|
118
|
+
task_type: Literal["parallel", "compound", "converter_compound"],
|
119
|
+
config: LocalBackendConfig,
|
120
|
+
user_id: int,
|
121
|
+
) -> tuple[dict[int, Any], dict[int, BaseException]]:
|
122
|
+
"""
|
123
|
+
Note: `list_parameters`, `list_task_files` and `history_unit_ids`
|
124
|
+
have the same size. For parallel tasks, this is also the number of
|
125
|
+
input images, while for compound tasks these can differ.
|
126
|
+
"""
|
127
|
+
|
128
|
+
logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
|
129
|
+
results: dict[int, Any] = {}
|
130
|
+
exceptions: dict[int, BaseException] = {}
|
131
|
+
|
132
|
+
try:
|
133
|
+
|
134
|
+
self.validate_multisubmit_parameters(
|
135
|
+
list_parameters=list_parameters,
|
136
|
+
task_type=task_type,
|
137
|
+
list_task_files=list_task_files,
|
138
|
+
history_unit_ids=history_unit_ids,
|
139
|
+
)
|
140
|
+
|
141
|
+
workdir_local = list_task_files[0].wftask_subfolder_local
|
142
|
+
if task_type == "parallel":
|
143
|
+
workdir_local.mkdir()
|
144
|
+
|
145
|
+
# Set `n_elements` and `parallel_tasks_per_job`
|
146
|
+
n_elements = len(list_parameters)
|
147
|
+
parallel_tasks_per_job = config.parallel_tasks_per_job
|
148
|
+
if parallel_tasks_per_job is None:
|
149
|
+
parallel_tasks_per_job = n_elements
|
150
|
+
|
151
|
+
except Exception as e:
|
152
|
+
logger.error(
|
153
|
+
"[multisubmit] Unexpected exception during preliminary phase. "
|
154
|
+
f"Original error {str(e)}"
|
155
|
+
)
|
156
|
+
exception = TaskExecutionError(str(e))
|
157
|
+
exceptions = {
|
158
|
+
ind: exception for ind in range(len(list_parameters))
|
159
|
+
}
|
160
|
+
if task_type == "parallel":
|
161
|
+
with next(get_sync_db()) as db:
|
162
|
+
bulk_update_status_of_history_unit(
|
163
|
+
history_unit_ids=history_unit_ids,
|
164
|
+
status=HistoryUnitStatus.FAILED,
|
165
|
+
db_sync=db,
|
166
|
+
)
|
167
|
+
return results, exceptions
|
168
|
+
|
169
|
+
# Execute tasks, in chunks of size `parallel_tasks_per_job`
|
170
|
+
for ind_chunk in range(0, n_elements, parallel_tasks_per_job):
|
171
|
+
|
172
|
+
list_parameters_chunk = list_parameters[
|
173
|
+
ind_chunk : ind_chunk + parallel_tasks_per_job
|
174
|
+
]
|
175
|
+
|
176
|
+
active_futures: dict[int, Future] = {}
|
177
|
+
for ind_within_chunk, kwargs in enumerate(list_parameters_chunk):
|
178
|
+
positional_index = ind_chunk + ind_within_chunk
|
179
|
+
try:
|
180
|
+
future = self.executor.submit(
|
181
|
+
func,
|
182
|
+
parameters=kwargs,
|
183
|
+
remote_files=list_task_files[
|
184
|
+
positional_index
|
185
|
+
].remote_files_dict,
|
186
|
+
)
|
187
|
+
active_futures[positional_index] = future
|
188
|
+
except Exception as e:
|
189
|
+
logger.error(
|
190
|
+
"[multisubmit] Unexpected exception during submission."
|
191
|
+
f" Original error {str(e)}"
|
192
|
+
)
|
193
|
+
current_history_unit_id = history_unit_ids[
|
194
|
+
positional_index
|
195
|
+
]
|
196
|
+
exceptions[positional_index] = TaskExecutionError(str(e))
|
197
|
+
if task_type == "parallel":
|
198
|
+
with next(get_sync_db()) as db:
|
199
|
+
update_status_of_history_unit(
|
200
|
+
history_unit_id=current_history_unit_id,
|
201
|
+
status=HistoryUnitStatus.FAILED,
|
202
|
+
db_sync=db,
|
203
|
+
)
|
204
|
+
while active_futures:
|
205
|
+
finished_futures = [
|
206
|
+
index_and_future
|
207
|
+
for index_and_future in active_futures.items()
|
208
|
+
if not index_and_future[1].running()
|
209
|
+
]
|
210
|
+
if len(finished_futures) == 0:
|
211
|
+
continue
|
212
|
+
|
213
|
+
with next(get_sync_db()) as db:
|
214
|
+
for positional_index, fut in finished_futures:
|
215
|
+
active_futures.pop(positional_index)
|
216
|
+
if task_type == "parallel":
|
217
|
+
current_history_unit_id = history_unit_ids[
|
218
|
+
positional_index
|
219
|
+
]
|
220
|
+
|
221
|
+
try:
|
222
|
+
results[positional_index] = fut.result()
|
223
|
+
if task_type == "parallel":
|
224
|
+
update_status_of_history_unit(
|
225
|
+
history_unit_id=current_history_unit_id,
|
226
|
+
status=HistoryUnitStatus.DONE,
|
227
|
+
db_sync=db,
|
228
|
+
)
|
229
|
+
|
230
|
+
except Exception as e:
|
231
|
+
logger.debug(
|
232
|
+
"Multisubmit failed in retrieval "
|
233
|
+
"phase with the following error "
|
234
|
+
f"{str(e)}"
|
235
|
+
)
|
236
|
+
exceptions[positional_index] = TaskExecutionError(
|
237
|
+
str(e)
|
238
|
+
)
|
239
|
+
if task_type == "parallel":
|
240
|
+
update_status_of_history_unit(
|
241
|
+
history_unit_id=current_history_unit_id,
|
242
|
+
status=HistoryUnitStatus.FAILED,
|
243
|
+
db_sync=db,
|
244
|
+
)
|
245
|
+
|
246
|
+
logger.debug(f"[multisubmit] END, {len(results)=}, {len(exceptions)=}")
|
247
|
+
|
248
|
+
return results, exceptions
|
@@ -22,9 +22,9 @@ from pydantic import ConfigDict
|
|
22
22
|
from pydantic import Field
|
23
23
|
from pydantic import ValidationError
|
24
24
|
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
25
|
+
from fractal_server.config import get_settings
|
26
|
+
from fractal_server.logger import set_logger
|
27
|
+
from fractal_server.syringe import Inject
|
28
28
|
|
29
29
|
logger = set_logger(__name__)
|
30
30
|
|
@@ -213,7 +213,7 @@ class SlurmConfig(BaseModel):
|
|
213
213
|
expected file content are defined in
|
214
214
|
[`SlurmConfigFile`](./#fractal_server.app.runner._slurm._slurm_config.SlurmConfigFile)).
|
215
215
|
|
216
|
-
Part of the attributes map directly to some of the SLURM
|
216
|
+
Part of the attributes map directly to some of the SLURM attributes (see
|
217
217
|
https://slurm.schedmd.com/sbatch.html), e.g. `partition`. Other attributes
|
218
218
|
are metaparameters which are needed in fractal-server to combine multiple
|
219
219
|
tasks in the same SLURM job (e.g. `parallel_tasks_per_job` or
|
@@ -368,9 +368,7 @@ class SlurmConfig(BaseModel):
|
|
368
368
|
if value is not None:
|
369
369
|
# Handle the `time` parameter
|
370
370
|
if key == "time" and self.parallel_tasks_per_job > 1:
|
371
|
-
#
|
372
|
-
# we simply propagate `time`, but this is not enough when
|
373
|
-
# several `srun` are combined in a single script.
|
371
|
+
# NOTE: see issue #1632
|
374
372
|
logger.warning(
|
375
373
|
f"`time` SLURM parameter is set to {self.time}, "
|
376
374
|
"but this does not take into account the number of "
|
@@ -407,6 +405,10 @@ class SlurmConfig(BaseModel):
|
|
407
405
|
|
408
406
|
return lines
|
409
407
|
|
408
|
+
@property
|
409
|
+
def batch_size(self) -> int:
|
410
|
+
return self.tasks_per_job
|
411
|
+
|
410
412
|
|
411
413
|
def _parse_mem_value(raw_mem: Union[str, int]) -> int:
|
412
414
|
"""
|