fractal-server 2.13.1__py3-none-any.whl → 2.14.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/history/__init__.py +4 -0
- fractal_server/app/history/image_updates.py +142 -0
- fractal_server/app/history/status_enum.py +16 -0
- fractal_server/app/models/v2/__init__.py +5 -1
- fractal_server/app/models/v2/history.py +53 -0
- fractal_server/app/routes/api/v2/__init__.py +2 -2
- fractal_server/app/routes/api/v2/_aux_functions.py +78 -0
- fractal_server/app/routes/api/v2/dataset.py +12 -9
- fractal_server/app/routes/api/v2/history.py +247 -0
- fractal_server/app/routes/api/v2/project.py +25 -0
- fractal_server/app/routes/api/v2/workflow.py +18 -3
- fractal_server/app/routes/api/v2/workflowtask.py +22 -0
- fractal_server/app/runner/executors/base_runner.py +114 -0
- fractal_server/app/runner/{v2/_local → executors/local}/_local_config.py +3 -3
- fractal_server/app/runner/executors/local/_submit_setup.py +54 -0
- fractal_server/app/runner/executors/local/runner.py +200 -0
- fractal_server/app/runner/executors/{slurm → slurm_common}/_batching.py +1 -1
- fractal_server/app/runner/executors/{slurm → slurm_common}/_slurm_config.py +3 -3
- fractal_server/app/runner/{v2/_slurm_ssh → executors/slurm_common}/_submit_setup.py +13 -12
- fractal_server/app/runner/{v2/_slurm_common → executors/slurm_common}/get_slurm_config.py +9 -15
- fractal_server/app/runner/executors/{slurm/ssh → slurm_ssh}/_executor_wait_thread.py +1 -1
- fractal_server/app/runner/executors/{slurm/ssh → slurm_ssh}/_slurm_job.py +1 -1
- fractal_server/app/runner/executors/{slurm/ssh → slurm_ssh}/executor.py +13 -14
- fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_check_jobs_status.py +11 -9
- fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_executor_wait_thread.py +3 -3
- fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_subprocess_run_as_user.py +2 -68
- fractal_server/app/runner/executors/slurm_sudo/runner.py +632 -0
- fractal_server/app/runner/task_files.py +70 -96
- fractal_server/app/runner/v2/__init__.py +5 -19
- fractal_server/app/runner/v2/_local.py +84 -0
- fractal_server/app/runner/v2/{_slurm_ssh/__init__.py → _slurm_ssh.py} +10 -13
- fractal_server/app/runner/v2/{_slurm_sudo/__init__.py → _slurm_sudo.py} +10 -12
- fractal_server/app/runner/v2/runner.py +93 -28
- fractal_server/app/runner/v2/runner_functions.py +85 -62
- fractal_server/app/runner/v2/runner_functions_low_level.py +20 -20
- fractal_server/app/schemas/v2/dataset.py +0 -17
- fractal_server/app/schemas/v2/history.py +23 -0
- fractal_server/config.py +2 -2
- fractal_server/migrations/versions/8223fcef886c_image_status.py +63 -0
- fractal_server/migrations/versions/87cd72a537a2_add_historyitem_table.py +68 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0a1.dist-info}/METADATA +1 -1
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0a1.dist-info}/RECORD +53 -47
- fractal_server/app/routes/api/v2/status.py +0 -168
- fractal_server/app/runner/executors/slurm/sudo/executor.py +0 -1281
- fractal_server/app/runner/v2/_local/__init__.py +0 -132
- fractal_server/app/runner/v2/_local/_submit_setup.py +0 -52
- fractal_server/app/runner/v2/_local/executor.py +0 -100
- fractal_server/app/runner/v2/_slurm_sudo/_submit_setup.py +0 -83
- fractal_server/app/runner/v2/handle_failed_job.py +0 -59
- /fractal_server/app/runner/executors/{slurm → local}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{slurm/ssh → slurm_common}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{_job_states.py → slurm_common/_job_states.py} +0 -0
- /fractal_server/app/runner/executors/{slurm → slurm_common}/remote.py +0 -0
- /fractal_server/app/runner/executors/{slurm → slurm_common}/utils_executors.py +0 -0
- /fractal_server/app/runner/executors/{slurm/sudo → slurm_ssh}/__init__.py +0 -0
- /fractal_server/app/runner/{v2/_slurm_common → executors/slurm_sudo}/__init__.py +0 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0a1.dist-info}/LICENSE +0 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0a1.dist-info}/WHEEL +0 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0a1.dist-info}/entry_points.txt +0 -0
@@ -5,6 +5,7 @@ from fastapi import Depends
|
|
5
5
|
from fastapi import HTTPException
|
6
6
|
from fastapi import Response
|
7
7
|
from fastapi import status
|
8
|
+
from sqlmodel import delete
|
8
9
|
from sqlmodel import select
|
9
10
|
|
10
11
|
from .....logger import reset_logger_handlers
|
@@ -12,6 +13,8 @@ from .....logger import set_logger
|
|
12
13
|
from ....db import AsyncSession
|
13
14
|
from ....db import get_async_db
|
14
15
|
from ....models.v2 import DatasetV2
|
16
|
+
from ....models.v2 import HistoryItemV2
|
17
|
+
from ....models.v2 import ImageStatus
|
15
18
|
from ....models.v2 import JobV2
|
16
19
|
from ....models.v2 import LinkUserProjectV2
|
17
20
|
from ....models.v2 import ProjectV2
|
@@ -161,6 +164,22 @@ async def delete_project(
|
|
161
164
|
for job in jobs:
|
162
165
|
logger.info(f"Setting Job[{job.id}].workflow_id to None.")
|
163
166
|
job.workflow_id = None
|
167
|
+
# Cascade operations: set foreign-keys to null for history items
|
168
|
+
# which are in relationship with the current workflow
|
169
|
+
wft_ids = [wft.id for wft in wf.task_list]
|
170
|
+
stm = select(HistoryItemV2).where(
|
171
|
+
HistoryItemV2.workflowtask_id.in_(wft_ids)
|
172
|
+
)
|
173
|
+
res = await db.execute(stm)
|
174
|
+
history_items = res.scalars().all()
|
175
|
+
for history_item in history_items:
|
176
|
+
history_item.workflowtask_id = None
|
177
|
+
# Cascade operations: delete all image status which are in relationship
|
178
|
+
# with the current workflow
|
179
|
+
stm = delete(ImageStatus).where(
|
180
|
+
ImageStatus.workflowtask_id.in_(wft_ids)
|
181
|
+
)
|
182
|
+
await db.execute(stm)
|
164
183
|
# Delete workflow
|
165
184
|
logger.info(f"Adding Workflow[{wf.id}] to deletion.")
|
166
185
|
await db.delete(wf)
|
@@ -180,6 +199,12 @@ async def delete_project(
|
|
180
199
|
for job in jobs:
|
181
200
|
logger.info(f"Setting Job[{job.id}].dataset_id to None.")
|
182
201
|
job.dataset_id = None
|
202
|
+
# Cascade operations: delete history items and image statuses which are
|
203
|
+
# in relationship with the current dataset
|
204
|
+
stm = delete(HistoryItemV2).where(HistoryItemV2.dataset_id == ds.id)
|
205
|
+
await db.execute(stm)
|
206
|
+
stm = delete(ImageStatus).where(ImageStatus.dataset_id == ds.id)
|
207
|
+
await db.execute(stm)
|
183
208
|
# Delete dataset
|
184
209
|
logger.info(f"Adding Dataset[{ds.id}] to deletion.")
|
185
210
|
await db.delete(ds)
|
@@ -7,10 +7,13 @@ from fastapi import HTTPException
|
|
7
7
|
from fastapi import Response
|
8
8
|
from fastapi import status
|
9
9
|
from pydantic import BaseModel
|
10
|
+
from sqlmodel import delete
|
10
11
|
from sqlmodel import select
|
11
12
|
|
12
13
|
from ....db import AsyncSession
|
13
14
|
from ....db import get_async_db
|
15
|
+
from ....models.v2 import HistoryItemV2
|
16
|
+
from ....models.v2 import ImageStatus
|
14
17
|
from ....models.v2 import JobV2
|
15
18
|
from ....models.v2 import ProjectV2
|
16
19
|
from ....models.v2 import WorkflowV2
|
@@ -225,14 +228,26 @@ async def delete_workflow(
|
|
225
228
|
),
|
226
229
|
)
|
227
230
|
|
228
|
-
# Cascade
|
229
|
-
# relationship with the current workflow
|
231
|
+
# Cascade operation: set foreign-keys to null for jobs and history items
|
232
|
+
# which are in relationship with the current workflow.
|
230
233
|
stm = select(JobV2).where(JobV2.workflow_id == workflow_id)
|
231
234
|
res = await db.execute(stm)
|
232
235
|
jobs = res.scalars().all()
|
233
236
|
for job in jobs:
|
234
237
|
job.workflow_id = None
|
235
238
|
|
239
|
+
wft_ids = [wft.id for wft in workflow.task_list]
|
240
|
+
stm = select(HistoryItemV2).where(
|
241
|
+
HistoryItemV2.workflowtask_id.in_(wft_ids)
|
242
|
+
)
|
243
|
+
res = await db.execute(stm)
|
244
|
+
history_items = res.scalars().all()
|
245
|
+
for history_item in history_items:
|
246
|
+
history_item.workflowtask_id = None
|
247
|
+
|
248
|
+
stm = delete(ImageStatus).where(ImageStatus.workflowtask_id.in_(wft_ids))
|
249
|
+
await db.execute(stm)
|
250
|
+
|
236
251
|
# Delete workflow
|
237
252
|
await db.delete(workflow)
|
238
253
|
await db.commit()
|
@@ -244,7 +259,7 @@ async def delete_workflow(
|
|
244
259
|
"/project/{project_id}/workflow/{workflow_id}/export/",
|
245
260
|
response_model=WorkflowExportV2,
|
246
261
|
)
|
247
|
-
async def
|
262
|
+
async def export_workflow(
|
248
263
|
project_id: int,
|
249
264
|
workflow_id: int,
|
250
265
|
user: UserOAuth = Depends(current_active_user),
|
@@ -6,6 +6,8 @@ from fastapi import Depends
|
|
6
6
|
from fastapi import HTTPException
|
7
7
|
from fastapi import Response
|
8
8
|
from fastapi import status
|
9
|
+
from sqlmodel import delete
|
10
|
+
from sqlmodel import select
|
9
11
|
|
10
12
|
from ....db import AsyncSession
|
11
13
|
from ....db import get_async_db
|
@@ -15,6 +17,8 @@ from ._aux_functions import _workflow_insert_task
|
|
15
17
|
from ._aux_functions_tasks import _check_type_filters_compatibility
|
16
18
|
from ._aux_functions_tasks import _get_task_read_access
|
17
19
|
from fractal_server.app.models import UserOAuth
|
20
|
+
from fractal_server.app.models.v2 import HistoryItemV2
|
21
|
+
from fractal_server.app.models.v2 import ImageStatus
|
18
22
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
19
23
|
from fractal_server.app.routes.auth import current_active_user
|
20
24
|
from fractal_server.app.schemas.v2 import WorkflowTaskCreateV2
|
@@ -333,6 +337,24 @@ async def delete_workflowtask(
|
|
333
337
|
db=db,
|
334
338
|
)
|
335
339
|
|
340
|
+
# Cascade operations:
|
341
|
+
# * set foreign-keys to null for history items which are in relationship
|
342
|
+
# with the current workflowtask;
|
343
|
+
# * delete ImageStatus in relationship with the current workflowtask.
|
344
|
+
stm = select(HistoryItemV2).where(
|
345
|
+
HistoryItemV2.workflowtask_id == db_workflow_task.id
|
346
|
+
)
|
347
|
+
res = await db.execute(stm)
|
348
|
+
history_items = res.scalars().all()
|
349
|
+
for history_item in history_items:
|
350
|
+
history_item.workflowtask_id = None
|
351
|
+
|
352
|
+
stm = delete(ImageStatus).where(
|
353
|
+
ImageStatus.workflowtask_id == db_workflow_task.id
|
354
|
+
)
|
355
|
+
await db.execute(stm)
|
356
|
+
|
357
|
+
# Delete WorkflowTask
|
336
358
|
await db.delete(db_workflow_task)
|
337
359
|
await db.commit()
|
338
360
|
|
@@ -0,0 +1,114 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
from fractal_server.app.runner.components import _COMPONENT_KEY_
|
4
|
+
|
5
|
+
|
6
|
+
class BaseRunner(object):
|
7
|
+
"""
|
8
|
+
Base class for Fractal runners.
|
9
|
+
"""
|
10
|
+
|
11
|
+
def shutdown(self, *args, **kwargs):
|
12
|
+
raise NotImplementedError()
|
13
|
+
|
14
|
+
def submit(
|
15
|
+
self,
|
16
|
+
func: callable,
|
17
|
+
parameters: dict[str, Any],
|
18
|
+
history_item_id: int,
|
19
|
+
in_compound_task: bool,
|
20
|
+
**kwargs,
|
21
|
+
) -> tuple[Any, BaseException]:
|
22
|
+
"""
|
23
|
+
Run a single fractal task.
|
24
|
+
|
25
|
+
# FIXME: Describe more in detail
|
26
|
+
|
27
|
+
Args:
|
28
|
+
func:
|
29
|
+
Function to be executed.
|
30
|
+
parameters:
|
31
|
+
Dictionary of parameters. Must include `zarr_urls` key.
|
32
|
+
history_item_id:
|
33
|
+
Database ID of the corresponding `HistoryItemV2` entry.
|
34
|
+
in_compound_task:
|
35
|
+
Whether this is the init part of a compound task.
|
36
|
+
kwargs:
|
37
|
+
Runner-specific parameters.
|
38
|
+
"""
|
39
|
+
raise NotImplementedError()
|
40
|
+
|
41
|
+
def multisubmit(
|
42
|
+
self,
|
43
|
+
func: callable,
|
44
|
+
list_parameters: list[dict[str, Any]],
|
45
|
+
history_item_id: int,
|
46
|
+
in_compound_task: bool,
|
47
|
+
**kwargs,
|
48
|
+
) -> tuple[dict[int, Any], dict[int, BaseException]]:
|
49
|
+
"""
|
50
|
+
Run a parallel fractal task.
|
51
|
+
|
52
|
+
# FIXME: Describe more in detail
|
53
|
+
|
54
|
+
Args:
|
55
|
+
func:
|
56
|
+
Function to be executed.
|
57
|
+
list_parameters:
|
58
|
+
List of dictionaries of parameters. Each one must include a
|
59
|
+
`zarr_url` key.
|
60
|
+
history_item_id:
|
61
|
+
Database ID of the corresponding `HistoryItemV2` entry.
|
62
|
+
in_compound_task:
|
63
|
+
Whether this is the compute part of a compound task.
|
64
|
+
kwargs:
|
65
|
+
Runner-specific parameters.
|
66
|
+
"""
|
67
|
+
raise NotImplementedError()
|
68
|
+
|
69
|
+
def validate_submit_parameters(self, parameters: dict[str, Any]) -> None:
|
70
|
+
"""
|
71
|
+
Validate parameters for `submit` method
|
72
|
+
|
73
|
+
Args:
|
74
|
+
parameters: Parameters dictionary.
|
75
|
+
"""
|
76
|
+
if not isinstance(parameters, dict):
|
77
|
+
raise ValueError("`parameters` must be a dictionary.")
|
78
|
+
if "zarr_urls" not in parameters.keys():
|
79
|
+
raise ValueError(
|
80
|
+
f"No 'zarr_urls' key in in {list(parameters.keys())}"
|
81
|
+
)
|
82
|
+
if _COMPONENT_KEY_ not in parameters.keys():
|
83
|
+
raise ValueError(
|
84
|
+
f"No '{_COMPONENT_KEY_}' key in in {list(parameters.keys())}"
|
85
|
+
)
|
86
|
+
|
87
|
+
def validate_multisubmit_parameters(
|
88
|
+
self,
|
89
|
+
list_parameters: list[dict[str, Any]],
|
90
|
+
in_compound_task: bool,
|
91
|
+
) -> None:
|
92
|
+
"""
|
93
|
+
Validate parameters for `multi_submit` method
|
94
|
+
|
95
|
+
Args:
|
96
|
+
list_parameters: List of parameters dictionaries.
|
97
|
+
in_compound_task:
|
98
|
+
Whether this is the compute part of a compound task.
|
99
|
+
"""
|
100
|
+
for single_kwargs in list_parameters:
|
101
|
+
if not isinstance(single_kwargs, dict):
|
102
|
+
raise RuntimeError("kwargs itemt must be a dictionary.")
|
103
|
+
if "zarr_url" not in single_kwargs.keys():
|
104
|
+
raise RuntimeError(
|
105
|
+
f"No 'zarr_url' key in in {list(single_kwargs.keys())}"
|
106
|
+
)
|
107
|
+
if _COMPONENT_KEY_ not in single_kwargs.keys():
|
108
|
+
raise ValueError(
|
109
|
+
f"No '{_COMPONENT_KEY_}' key in in {list(single_kwargs.keys())}"
|
110
|
+
)
|
111
|
+
if not in_compound_task:
|
112
|
+
zarr_urls = [kwargs["zarr_url"] for kwargs in list_parameters]
|
113
|
+
if len(zarr_urls) != len(set(zarr_urls)):
|
114
|
+
raise RuntimeError("Non-unique zarr_urls")
|
@@ -20,9 +20,9 @@ from pydantic import BaseModel
|
|
20
20
|
from pydantic import ConfigDict
|
21
21
|
from pydantic import ValidationError
|
22
22
|
|
23
|
-
from
|
24
|
-
from
|
25
|
-
from
|
23
|
+
from fractal_server.app.models.v2 import WorkflowTaskV2
|
24
|
+
from fractal_server.config import get_settings
|
25
|
+
from fractal_server.syringe import Inject
|
26
26
|
|
27
27
|
|
28
28
|
class LocalBackendConfigError(ValueError):
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Any
|
3
|
+
from typing import Literal
|
4
|
+
|
5
|
+
from ...task_files import TaskFiles
|
6
|
+
from ._local_config import get_local_backend_config
|
7
|
+
from fractal_server.app.models.v2 import WorkflowTaskV2
|
8
|
+
|
9
|
+
|
10
|
+
def _local_submit_setup(
|
11
|
+
*,
|
12
|
+
wftask: WorkflowTaskV2,
|
13
|
+
root_dir_local: Path,
|
14
|
+
root_dir_remote: Path,
|
15
|
+
which_type: Literal["non_parallel", "parallel"],
|
16
|
+
) -> dict[str, Any]:
|
17
|
+
"""
|
18
|
+
Collect WorkflowTask-specific configuration parameters from different
|
19
|
+
sources, and inject them for execution.
|
20
|
+
|
21
|
+
FIXME
|
22
|
+
|
23
|
+
Arguments:
|
24
|
+
wftask:
|
25
|
+
WorkflowTask for which the configuration is to be assembled
|
26
|
+
workflow_dir_local:
|
27
|
+
Not used in this function.
|
28
|
+
workflow_dir_remote:
|
29
|
+
Not used in this function.
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
submit_setup_dict:
|
33
|
+
A dictionary that will be passed on to
|
34
|
+
`FractalThreadPoolExecutor.submit` and
|
35
|
+
`FractalThreadPoolExecutor.map`, so as to set extra options.
|
36
|
+
"""
|
37
|
+
|
38
|
+
local_backend_config = get_local_backend_config(
|
39
|
+
wftask=wftask,
|
40
|
+
which_type=which_type,
|
41
|
+
)
|
42
|
+
|
43
|
+
# Get TaskFiles object
|
44
|
+
task_files = TaskFiles(
|
45
|
+
root_dir_local=root_dir_local,
|
46
|
+
root_dir_remote=root_dir_remote,
|
47
|
+
task_order=wftask.order,
|
48
|
+
task_name=wftask.task.name,
|
49
|
+
)
|
50
|
+
|
51
|
+
return dict(
|
52
|
+
local_backend_config=local_backend_config,
|
53
|
+
task_files=task_files,
|
54
|
+
)
|
@@ -0,0 +1,200 @@
|
|
1
|
+
from concurrent.futures import ThreadPoolExecutor
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Any
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
from ._local_config import get_default_local_backend_config
|
7
|
+
from ._local_config import LocalBackendConfig
|
8
|
+
from fractal_server.app.history import HistoryItemImageStatus
|
9
|
+
from fractal_server.app.history import update_all_images
|
10
|
+
from fractal_server.app.history import update_single_image
|
11
|
+
from fractal_server.app.history import update_single_image_logfile
|
12
|
+
from fractal_server.app.runner.components import _COMPONENT_KEY_
|
13
|
+
from fractal_server.app.runner.executors.base_runner import BaseRunner
|
14
|
+
from fractal_server.app.runner.task_files import TaskFiles
|
15
|
+
from fractal_server.logger import set_logger
|
16
|
+
|
17
|
+
logger = set_logger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
class LocalRunner(BaseRunner):
|
21
|
+
executor: ThreadPoolExecutor
|
22
|
+
root_dir_local: Path
|
23
|
+
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
root_dir_local: Path,
|
27
|
+
):
|
28
|
+
|
29
|
+
self.root_dir_local = root_dir_local
|
30
|
+
self.root_dir_local.mkdir(parents=True, exist_ok=True)
|
31
|
+
self.executor = ThreadPoolExecutor()
|
32
|
+
logger.debug("Create LocalRunner")
|
33
|
+
|
34
|
+
def __enter__(self):
|
35
|
+
logger.debug("Enter LocalRunner")
|
36
|
+
return self
|
37
|
+
|
38
|
+
def shutdown(self):
|
39
|
+
logger.debug("Now shut LocalRunner.executor down")
|
40
|
+
self.executor.shutdown(
|
41
|
+
wait=False,
|
42
|
+
cancel_futures=True,
|
43
|
+
)
|
44
|
+
|
45
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
46
|
+
logger.debug("Exit LocalRunner")
|
47
|
+
self.shutdown()
|
48
|
+
return self.executor.__exit__(exc_type, exc_val, exc_tb)
|
49
|
+
|
50
|
+
def submit(
|
51
|
+
self,
|
52
|
+
func: callable,
|
53
|
+
parameters: dict[str, Any],
|
54
|
+
history_item_id: int,
|
55
|
+
task_files: TaskFiles,
|
56
|
+
in_compound_task: bool = False,
|
57
|
+
**kwargs,
|
58
|
+
) -> tuple[Any, Exception]:
|
59
|
+
logger.debug("[submit] START")
|
60
|
+
|
61
|
+
current_task_files = TaskFiles(
|
62
|
+
**task_files.model_dump(
|
63
|
+
exclude={"component"},
|
64
|
+
),
|
65
|
+
component=parameters[_COMPONENT_KEY_],
|
66
|
+
)
|
67
|
+
|
68
|
+
self.validate_submit_parameters(parameters)
|
69
|
+
workdir_local = current_task_files.wftask_subfolder_local
|
70
|
+
workdir_local.mkdir()
|
71
|
+
# SUBMISSION PHASE
|
72
|
+
future = self.executor.submit(func, parameters=parameters)
|
73
|
+
|
74
|
+
# RETRIEVAL PHASE
|
75
|
+
try:
|
76
|
+
result = future.result()
|
77
|
+
if not in_compound_task:
|
78
|
+
update_all_images(
|
79
|
+
history_item_id=history_item_id,
|
80
|
+
status=HistoryItemImageStatus.DONE,
|
81
|
+
logfile=current_task_files.log_file_local,
|
82
|
+
)
|
83
|
+
logger.debug(f"[submit] END {result=}")
|
84
|
+
return result, None
|
85
|
+
except Exception as e:
|
86
|
+
exception = e
|
87
|
+
update_all_images(
|
88
|
+
history_item_id=history_item_id,
|
89
|
+
status=HistoryItemImageStatus.FAILED,
|
90
|
+
logfile=current_task_files.log_file_local,
|
91
|
+
)
|
92
|
+
logger.debug(f"[submit] END {exception=}")
|
93
|
+
return None, exception
|
94
|
+
|
95
|
+
def multisubmit(
|
96
|
+
self,
|
97
|
+
func: callable,
|
98
|
+
list_parameters: list[dict],
|
99
|
+
history_item_id: int,
|
100
|
+
task_files: TaskFiles,
|
101
|
+
in_compound_task: bool = False,
|
102
|
+
local_backend_config: Optional[LocalBackendConfig] = None,
|
103
|
+
**kwargs,
|
104
|
+
):
|
105
|
+
logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
|
106
|
+
|
107
|
+
self.validate_multisubmit_parameters(
|
108
|
+
list_parameters=list_parameters,
|
109
|
+
in_compound_task=in_compound_task,
|
110
|
+
)
|
111
|
+
|
112
|
+
workdir_local = task_files.wftask_subfolder_local
|
113
|
+
if not in_compound_task:
|
114
|
+
workdir_local.mkdir()
|
115
|
+
|
116
|
+
# Get local_backend_config
|
117
|
+
if local_backend_config is None:
|
118
|
+
local_backend_config = get_default_local_backend_config()
|
119
|
+
|
120
|
+
# Set `n_elements` and `parallel_tasks_per_job`
|
121
|
+
n_elements = len(list_parameters)
|
122
|
+
parallel_tasks_per_job = local_backend_config.parallel_tasks_per_job
|
123
|
+
if parallel_tasks_per_job is None:
|
124
|
+
parallel_tasks_per_job = n_elements
|
125
|
+
|
126
|
+
original_task_files = task_files
|
127
|
+
|
128
|
+
# Execute tasks, in chunks of size `parallel_tasks_per_job`
|
129
|
+
results = {}
|
130
|
+
exceptions = {}
|
131
|
+
for ind_chunk in range(0, n_elements, parallel_tasks_per_job):
|
132
|
+
list_parameters_chunk = list_parameters[
|
133
|
+
ind_chunk : ind_chunk + parallel_tasks_per_job
|
134
|
+
]
|
135
|
+
from concurrent.futures import Future
|
136
|
+
|
137
|
+
active_futures: dict[int, Future] = {}
|
138
|
+
active_task_files: dict[int, TaskFiles] = {}
|
139
|
+
for ind_within_chunk, kwargs in enumerate(list_parameters_chunk):
|
140
|
+
positional_index = ind_chunk + ind_within_chunk
|
141
|
+
component = kwargs[_COMPONENT_KEY_]
|
142
|
+
future = self.executor.submit(func, parameters=kwargs)
|
143
|
+
active_futures[positional_index] = future
|
144
|
+
active_task_files[positional_index] = TaskFiles(
|
145
|
+
**original_task_files.model_dump(exclude={"component"}),
|
146
|
+
component=component,
|
147
|
+
)
|
148
|
+
|
149
|
+
while active_futures:
|
150
|
+
# FIXME: add shutdown detection
|
151
|
+
# if file exists: cancel all futures, and raise
|
152
|
+
finished_futures = [
|
153
|
+
keyval
|
154
|
+
for keyval in active_futures.items()
|
155
|
+
if not keyval[1].running()
|
156
|
+
]
|
157
|
+
for positional_index, fut in finished_futures:
|
158
|
+
active_futures.pop(positional_index)
|
159
|
+
current_task_files = active_task_files.pop(
|
160
|
+
positional_index
|
161
|
+
)
|
162
|
+
zarr_url = list_parameters[positional_index]["zarr_url"]
|
163
|
+
if not in_compound_task:
|
164
|
+
update_single_image_logfile(
|
165
|
+
history_item_id=history_item_id,
|
166
|
+
zarr_url=zarr_url,
|
167
|
+
logfile=current_task_files.log_file_local,
|
168
|
+
)
|
169
|
+
try:
|
170
|
+
results[positional_index] = fut.result()
|
171
|
+
print(f"Mark {zarr_url=} as done, {kwargs}")
|
172
|
+
if not in_compound_task:
|
173
|
+
update_single_image(
|
174
|
+
history_item_id=history_item_id,
|
175
|
+
zarr_url=zarr_url,
|
176
|
+
status=HistoryItemImageStatus.DONE,
|
177
|
+
)
|
178
|
+
except Exception as e:
|
179
|
+
print(f"Mark {zarr_url=} as failed, {kwargs} - {e}")
|
180
|
+
exceptions[positional_index] = e
|
181
|
+
if not in_compound_task:
|
182
|
+
update_single_image(
|
183
|
+
history_item_id=history_item_id,
|
184
|
+
zarr_url=zarr_url,
|
185
|
+
status=HistoryItemImageStatus.FAILED,
|
186
|
+
)
|
187
|
+
if in_compound_task:
|
188
|
+
if exceptions == {}:
|
189
|
+
update_all_images(
|
190
|
+
history_item_id=history_item_id,
|
191
|
+
status=HistoryItemImageStatus.DONE,
|
192
|
+
)
|
193
|
+
else:
|
194
|
+
update_all_images(
|
195
|
+
history_item_id=history_item_id,
|
196
|
+
status=HistoryItemImageStatus.FAILED,
|
197
|
+
)
|
198
|
+
logger.debug(f"[multisubmit] END, {results=}, {exceptions=}")
|
199
|
+
|
200
|
+
return results, exceptions
|
@@ -22,9 +22,9 @@ from pydantic import ConfigDict
|
|
22
22
|
from pydantic import Field
|
23
23
|
from pydantic import ValidationError
|
24
24
|
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
25
|
+
from fractal_server.config import get_settings
|
26
|
+
from fractal_server.logger import set_logger
|
27
|
+
from fractal_server.syringe import Inject
|
28
28
|
|
29
29
|
logger = set_logger(__name__)
|
30
30
|
|
@@ -14,11 +14,12 @@ Submodule to define _slurm_submit_setup, which is also the reference
|
|
14
14
|
implementation of `submit_setup_call`.
|
15
15
|
"""
|
16
16
|
from pathlib import Path
|
17
|
+
from typing import Any
|
17
18
|
from typing import Literal
|
18
19
|
|
19
|
-
from ...task_files import
|
20
|
+
from ...task_files import TaskFiles
|
20
21
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
21
|
-
from fractal_server.app.runner.
|
22
|
+
from fractal_server.app.runner.executors.slurm_common.get_slurm_config import (
|
22
23
|
get_slurm_config,
|
23
24
|
)
|
24
25
|
|
@@ -26,14 +27,16 @@ from fractal_server.app.runner.v2._slurm_common.get_slurm_config import (
|
|
26
27
|
def _slurm_submit_setup(
|
27
28
|
*,
|
28
29
|
wftask: WorkflowTaskV2,
|
29
|
-
|
30
|
-
|
30
|
+
root_dir_local: Path,
|
31
|
+
root_dir_remote: Path,
|
31
32
|
which_type: Literal["non_parallel", "parallel"],
|
32
|
-
) -> dict[str,
|
33
|
+
) -> dict[str, Any]:
|
33
34
|
"""
|
34
|
-
Collect
|
35
|
+
Collect WorkflowTask-specific configuration parameters from different
|
35
36
|
sources, and inject them for execution.
|
36
37
|
|
38
|
+
FIXME
|
39
|
+
|
37
40
|
Here goes all the logic for reading attributes from the appropriate sources
|
38
41
|
and transforming them into an appropriate `SlurmConfig` object (encoding
|
39
42
|
SLURM configuration) and `TaskFiles` object (with details e.g. about file
|
@@ -68,16 +71,14 @@ def _slurm_submit_setup(
|
|
68
71
|
)
|
69
72
|
|
70
73
|
# Get TaskFiles object
|
71
|
-
task_files =
|
72
|
-
|
73
|
-
|
74
|
+
task_files = TaskFiles(
|
75
|
+
root_dir_local=root_dir_local,
|
76
|
+
root_dir_remote=root_dir_remote,
|
74
77
|
task_order=wftask.order,
|
75
78
|
task_name=wftask.task.name,
|
76
79
|
)
|
77
80
|
|
78
|
-
|
79
|
-
submit_setup_dict = dict(
|
81
|
+
return dict(
|
80
82
|
slurm_config=slurm_config,
|
81
83
|
task_files=task_files,
|
82
84
|
)
|
83
|
-
return submit_setup_dict
|
@@ -2,18 +2,12 @@ from pathlib import Path
|
|
2
2
|
from typing import Literal
|
3
3
|
from typing import Optional
|
4
4
|
|
5
|
+
from ._slurm_config import _parse_mem_value
|
6
|
+
from ._slurm_config import load_slurm_config_file
|
7
|
+
from ._slurm_config import logger
|
8
|
+
from ._slurm_config import SlurmConfig
|
9
|
+
from ._slurm_config import SlurmConfigError
|
5
10
|
from fractal_server.app.models.v2 import WorkflowTaskV2
|
6
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
7
|
-
_parse_mem_value,
|
8
|
-
)
|
9
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
10
|
-
load_slurm_config_file,
|
11
|
-
)
|
12
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import logger
|
13
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import SlurmConfig
|
14
|
-
from fractal_server.app.runner.executors.slurm._slurm_config import (
|
15
|
-
SlurmConfigError,
|
16
|
-
)
|
17
11
|
|
18
12
|
|
19
13
|
def get_slurm_config(
|
@@ -142,8 +136,8 @@ def get_slurm_config(
|
|
142
136
|
extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
|
143
137
|
if len(set(extra_lines)) != len(extra_lines):
|
144
138
|
logger.debug(
|
145
|
-
"[get_slurm_config] Removing repeated elements "
|
146
|
-
f"
|
139
|
+
"[get_slurm_config] Removing repeated elements from "
|
140
|
+
f"{extra_lines=}."
|
147
141
|
)
|
148
142
|
extra_lines = list(set(extra_lines))
|
149
143
|
slurm_dict["extra_lines"] = extra_lines
|
@@ -162,8 +156,8 @@ def get_slurm_config(
|
|
162
156
|
|
163
157
|
# Put everything together
|
164
158
|
logger.debug(
|
165
|
-
"[get_slurm_config] Now create a SlurmConfig object based "
|
166
|
-
f"
|
159
|
+
"[get_slurm_config] Now create a SlurmConfig object based on "
|
160
|
+
f"{slurm_dict=}"
|
167
161
|
)
|
168
162
|
slurm_config = SlurmConfig(**slurm_dict)
|
169
163
|
|