fractal-server 1.4.10__py3-none-any.whl → 2.0.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/models/__init__.py +4 -7
- fractal_server/app/models/linkuserproject.py +9 -0
- fractal_server/app/models/security.py +6 -0
- fractal_server/app/models/state.py +1 -1
- fractal_server/app/models/v1/__init__.py +10 -0
- fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
- fractal_server/app/models/{job.py → v1/job.py} +5 -5
- fractal_server/app/models/{project.py → v1/project.py} +5 -5
- fractal_server/app/models/{task.py → v1/task.py} +7 -2
- fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
- fractal_server/app/models/v2/__init__.py +20 -0
- fractal_server/app/models/v2/dataset.py +55 -0
- fractal_server/app/models/v2/job.py +51 -0
- fractal_server/app/models/v2/project.py +31 -0
- fractal_server/app/models/v2/task.py +93 -0
- fractal_server/app/models/v2/workflow.py +43 -0
- fractal_server/app/models/v2/workflowtask.py +90 -0
- fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
- fractal_server/app/routes/admin/v2.py +275 -0
- fractal_server/app/routes/api/v1/__init__.py +7 -7
- fractal_server/app/routes/api/v1/_aux_functions.py +2 -2
- fractal_server/app/routes/api/v1/dataset.py +37 -37
- fractal_server/app/routes/api/v1/job.py +12 -12
- fractal_server/app/routes/api/v1/project.py +23 -21
- fractal_server/app/routes/api/v1/task.py +24 -14
- fractal_server/app/routes/api/v1/task_collection.py +16 -14
- fractal_server/app/routes/api/v1/workflow.py +24 -24
- fractal_server/app/routes/api/v1/workflowtask.py +10 -10
- fractal_server/app/routes/api/v2/__init__.py +28 -0
- fractal_server/app/routes/api/v2/_aux_functions.py +497 -0
- fractal_server/app/routes/api/v2/apply.py +220 -0
- fractal_server/app/routes/api/v2/dataset.py +310 -0
- fractal_server/app/routes/api/v2/images.py +212 -0
- fractal_server/app/routes/api/v2/job.py +200 -0
- fractal_server/app/routes/api/v2/project.py +205 -0
- fractal_server/app/routes/api/v2/task.py +222 -0
- fractal_server/app/routes/api/v2/task_collection.py +229 -0
- fractal_server/app/routes/api/v2/workflow.py +398 -0
- fractal_server/app/routes/api/v2/workflowtask.py +269 -0
- fractal_server/app/routes/aux/_job.py +1 -1
- fractal_server/app/runner/async_wrap.py +27 -0
- fractal_server/app/runner/exceptions.py +129 -0
- fractal_server/app/runner/executors/local/__init__.py +3 -0
- fractal_server/app/runner/{_local → executors/local}/executor.py +2 -2
- fractal_server/app/runner/executors/slurm/__init__.py +3 -0
- fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_check_jobs_status.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
- fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +9 -9
- fractal_server/app/runner/filenames.py +6 -0
- fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
- fractal_server/app/runner/task_files.py +105 -0
- fractal_server/app/runner/{__init__.py → v1/__init__.py} +24 -22
- fractal_server/app/runner/{_common.py → v1/_common.py} +13 -120
- fractal_server/app/runner/{_local → v1/_local}/__init__.py +6 -6
- fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
- fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
- fractal_server/app/runner/v1/_slurm/__init__.py +310 -0
- fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +3 -9
- fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
- fractal_server/app/runner/v1/common.py +117 -0
- fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
- fractal_server/app/runner/v2/__init__.py +337 -0
- fractal_server/app/runner/v2/_local/__init__.py +169 -0
- fractal_server/app/runner/v2/_local/_local_config.py +118 -0
- fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
- fractal_server/app/runner/v2/_slurm/__init__.py +157 -0
- fractal_server/app/runner/v2/_slurm/_submit_setup.py +83 -0
- fractal_server/app/runner/v2/_slurm/get_slurm_config.py +179 -0
- fractal_server/app/runner/v2/components.py +5 -0
- fractal_server/app/runner/v2/deduplicate_list.py +24 -0
- fractal_server/app/runner/v2/handle_failed_job.py +156 -0
- fractal_server/app/runner/v2/merge_outputs.py +41 -0
- fractal_server/app/runner/v2/runner.py +264 -0
- fractal_server/app/runner/v2/runner_functions.py +339 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +134 -0
- fractal_server/app/runner/v2/task_interface.py +43 -0
- fractal_server/app/runner/v2/v1_compat.py +21 -0
- fractal_server/app/schemas/__init__.py +4 -42
- fractal_server/app/schemas/v1/__init__.py +42 -0
- fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
- fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
- fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
- fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
- fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
- fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
- fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
- fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
- fractal_server/app/schemas/v2/__init__.py +34 -0
- fractal_server/app/schemas/v2/dataset.py +88 -0
- fractal_server/app/schemas/v2/dumps.py +87 -0
- fractal_server/app/schemas/v2/job.py +113 -0
- fractal_server/app/schemas/v2/manifest.py +109 -0
- fractal_server/app/schemas/v2/project.py +36 -0
- fractal_server/app/schemas/v2/task.py +121 -0
- fractal_server/app/schemas/v2/task_collection.py +105 -0
- fractal_server/app/schemas/v2/workflow.py +78 -0
- fractal_server/app/schemas/v2/workflowtask.py +118 -0
- fractal_server/config.py +5 -4
- fractal_server/images/__init__.py +50 -0
- fractal_server/images/tools.py +86 -0
- fractal_server/main.py +11 -3
- fractal_server/migrations/versions/4b35c5cefbe3_tmp_is_v2_compatible.py +39 -0
- fractal_server/migrations/versions/56af171b0159_v2.py +217 -0
- fractal_server/migrations/versions/876f28db9d4e_tmp_split_task_and_wftask_meta.py +68 -0
- fractal_server/migrations/versions/974c802f0dd0_tmp_workflowtaskv2_type_in_db.py +37 -0
- fractal_server/migrations/versions/9cd305cd6023_tmp_workflowtaskv2.py +40 -0
- fractal_server/migrations/versions/a6231ed6273c_tmp_args_schemas_in_taskv2.py +42 -0
- fractal_server/migrations/versions/b9e9eed9d442_tmp_taskv2_type.py +37 -0
- fractal_server/migrations/versions/e3e639454d4b_tmp_make_task_meta_non_optional.py +50 -0
- fractal_server/tasks/__init__.py +0 -5
- fractal_server/tasks/endpoint_operations.py +13 -19
- fractal_server/tasks/utils.py +35 -0
- fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
- fractal_server/tasks/{background_operations.py → v1/background_operations.py} +18 -50
- fractal_server/tasks/v1/get_collection_data.py +14 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
- fractal_server/tasks/v2/background_operations.py +382 -0
- fractal_server/tasks/v2/get_collection_data.py +14 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0a0.dist-info}/METADATA +1 -1
- fractal_server-2.0.0a0.dist-info/RECORD +166 -0
- fractal_server/app/runner/_slurm/.gitignore +0 -2
- fractal_server/app/runner/_slurm/__init__.py +0 -150
- fractal_server/app/runner/common.py +0 -311
- fractal_server-1.4.10.dist-info/RECORD +0 -98
- /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0a0.dist-info}/LICENSE +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0a0.dist-info}/WHEEL +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0a0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,269 @@
|
|
1
|
+
from copy import deepcopy
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from fastapi import APIRouter
|
5
|
+
from fastapi import Depends
|
6
|
+
from fastapi import HTTPException
|
7
|
+
from fastapi import Response
|
8
|
+
from fastapi import status
|
9
|
+
|
10
|
+
from ....db import AsyncSession
|
11
|
+
from ....db import get_async_db
|
12
|
+
from ....models.v1 import Task
|
13
|
+
from ....models.v2 import TaskV2
|
14
|
+
from ....schemas.v2 import WorkflowTaskCreateV2
|
15
|
+
from ....schemas.v2 import WorkflowTaskReadV2
|
16
|
+
from ....schemas.v2 import WorkflowTaskUpdateV2
|
17
|
+
from ....security import current_active_user
|
18
|
+
from ....security import User
|
19
|
+
from ._aux_functions import _get_workflow_check_owner
|
20
|
+
from ._aux_functions import _get_workflow_task_check_owner
|
21
|
+
from ._aux_functions import _workflow_insert_task
|
22
|
+
|
23
|
+
router = APIRouter()
|
24
|
+
|
25
|
+
|
26
|
+
@router.post(
|
27
|
+
"/project/{project_id}/workflow/{workflow_id}/wftask/",
|
28
|
+
response_model=WorkflowTaskReadV2,
|
29
|
+
status_code=status.HTTP_201_CREATED,
|
30
|
+
)
|
31
|
+
async def create_workflowtask(
|
32
|
+
project_id: int,
|
33
|
+
workflow_id: int,
|
34
|
+
task_id: int,
|
35
|
+
new_task: WorkflowTaskCreateV2,
|
36
|
+
user: User = Depends(current_active_user),
|
37
|
+
db: AsyncSession = Depends(get_async_db),
|
38
|
+
) -> Optional[WorkflowTaskReadV2]:
|
39
|
+
"""
|
40
|
+
Add a WorkflowTask to a Workflow
|
41
|
+
"""
|
42
|
+
|
43
|
+
workflow = await _get_workflow_check_owner(
|
44
|
+
project_id=project_id, workflow_id=workflow_id, user_id=user.id, db=db
|
45
|
+
)
|
46
|
+
|
47
|
+
if new_task.is_legacy_task is True:
|
48
|
+
task = await db.get(Task, task_id)
|
49
|
+
if not task.is_v2_compatible:
|
50
|
+
raise HTTPException(
|
51
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
52
|
+
detail=f"Task {task_id} is not V2-compatible.",
|
53
|
+
)
|
54
|
+
else:
|
55
|
+
task = await db.get(TaskV2, task_id)
|
56
|
+
|
57
|
+
if not task:
|
58
|
+
if new_task.is_legacy_task:
|
59
|
+
error = f"Task {task_id} not found."
|
60
|
+
else:
|
61
|
+
error = f"TaskV2 {task_id} not found."
|
62
|
+
|
63
|
+
raise HTTPException(
|
64
|
+
status_code=status.HTTP_404_NOT_FOUND, detail=error
|
65
|
+
)
|
66
|
+
|
67
|
+
if new_task.is_legacy_task is True or task.type == "parallel":
|
68
|
+
if (
|
69
|
+
new_task.meta_non_parallel is not None
|
70
|
+
or new_task.args_non_parallel is not None
|
71
|
+
):
|
72
|
+
raise HTTPException(
|
73
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
74
|
+
detail=(
|
75
|
+
"Cannot set `WorkflowTaskV2.meta_non_parallel` or "
|
76
|
+
"`WorkflowTask.args_non_parallel` if the associated Task "
|
77
|
+
"is `parallel` (or legacy)."
|
78
|
+
),
|
79
|
+
)
|
80
|
+
elif task.type == "non_parallel":
|
81
|
+
if (
|
82
|
+
new_task.meta_parallel is not None
|
83
|
+
or new_task.args_parallel is not None
|
84
|
+
):
|
85
|
+
raise HTTPException(
|
86
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
87
|
+
detail=(
|
88
|
+
"Cannot set `WorkflowTaskV2.meta_parallel` or "
|
89
|
+
"`WorkflowTask.args_parallel` if the associated Task "
|
90
|
+
"is `non_parallel`."
|
91
|
+
),
|
92
|
+
)
|
93
|
+
|
94
|
+
async with db:
|
95
|
+
workflow_task = await _workflow_insert_task(
|
96
|
+
workflow_id=workflow.id,
|
97
|
+
is_legacy_task=new_task.is_legacy_task,
|
98
|
+
task_id=task_id,
|
99
|
+
order=new_task.order,
|
100
|
+
meta_non_parallel=new_task.meta_non_parallel,
|
101
|
+
meta_parallel=new_task.meta_parallel,
|
102
|
+
args_non_parallel=new_task.args_non_parallel,
|
103
|
+
args_parallel=new_task.args_parallel,
|
104
|
+
input_filters=new_task.input_filters,
|
105
|
+
db=db,
|
106
|
+
)
|
107
|
+
|
108
|
+
await db.close()
|
109
|
+
|
110
|
+
return workflow_task
|
111
|
+
|
112
|
+
|
113
|
+
@router.get(
|
114
|
+
"/project/{project_id}/workflow/{workflow_id}/wftask/{workflow_task_id}/",
|
115
|
+
response_model=WorkflowTaskReadV2,
|
116
|
+
)
|
117
|
+
async def read_workflowtask(
|
118
|
+
project_id: int,
|
119
|
+
workflow_id: int,
|
120
|
+
workflow_task_id: int,
|
121
|
+
user: User = Depends(current_active_user),
|
122
|
+
db: AsyncSession = Depends(get_async_db),
|
123
|
+
):
|
124
|
+
workflow_task, _ = await _get_workflow_task_check_owner(
|
125
|
+
project_id=project_id,
|
126
|
+
workflow_task_id=workflow_task_id,
|
127
|
+
workflow_id=workflow_id,
|
128
|
+
user_id=user.id,
|
129
|
+
db=db,
|
130
|
+
)
|
131
|
+
return workflow_task
|
132
|
+
|
133
|
+
|
134
|
+
@router.patch(
|
135
|
+
"/project/{project_id}/workflow/{workflow_id}/wftask/{workflow_task_id}/",
|
136
|
+
response_model=WorkflowTaskReadV2,
|
137
|
+
)
|
138
|
+
async def update_workflowtask(
|
139
|
+
project_id: int,
|
140
|
+
workflow_id: int,
|
141
|
+
workflow_task_id: int,
|
142
|
+
workflow_task_update: WorkflowTaskUpdateV2,
|
143
|
+
user: User = Depends(current_active_user),
|
144
|
+
db: AsyncSession = Depends(get_async_db),
|
145
|
+
) -> Optional[WorkflowTaskReadV2]:
|
146
|
+
"""
|
147
|
+
Edit a WorkflowTask of a Workflow
|
148
|
+
"""
|
149
|
+
|
150
|
+
db_wf_task, db_workflow = await _get_workflow_task_check_owner(
|
151
|
+
project_id=project_id,
|
152
|
+
workflow_task_id=workflow_task_id,
|
153
|
+
workflow_id=workflow_id,
|
154
|
+
user_id=user.id,
|
155
|
+
db=db,
|
156
|
+
)
|
157
|
+
|
158
|
+
if db_wf_task.task_type == "parallel" and (
|
159
|
+
workflow_task_update.args_non_parallel is not None
|
160
|
+
or workflow_task_update.meta_non_parallel is not None
|
161
|
+
):
|
162
|
+
raise HTTPException(
|
163
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
164
|
+
detail=(
|
165
|
+
"Cannot patch `WorkflowTaskV2.args_non_parallel` or "
|
166
|
+
"`WorkflowTask.meta_non_parallel` if the associated Task is "
|
167
|
+
"parallel."
|
168
|
+
),
|
169
|
+
)
|
170
|
+
elif db_wf_task.task_type == "non_parallel" and (
|
171
|
+
workflow_task_update.args_parallel is not None
|
172
|
+
or workflow_task_update.meta_parallel is not None
|
173
|
+
):
|
174
|
+
raise HTTPException(
|
175
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
176
|
+
detail=(
|
177
|
+
"Cannot patch `WorkflowTaskV2.args_parallel` or "
|
178
|
+
"`WorkflowTask.meta_parallel` if the associated Task is "
|
179
|
+
"non parallel."
|
180
|
+
),
|
181
|
+
)
|
182
|
+
|
183
|
+
for key, value in workflow_task_update.dict(exclude_unset=True).items():
|
184
|
+
if key == "args_parallel":
|
185
|
+
# Get default arguments via a Task property method
|
186
|
+
if db_wf_task.is_legacy_task:
|
187
|
+
default_args = (
|
188
|
+
db_wf_task.task_legacy.default_args_from_args_schema
|
189
|
+
)
|
190
|
+
else:
|
191
|
+
default_args = (
|
192
|
+
db_wf_task.task.default_args_parallel_from_args_schema
|
193
|
+
)
|
194
|
+
# Override default_args with args value items
|
195
|
+
actual_args = deepcopy(default_args)
|
196
|
+
if value is not None:
|
197
|
+
for k, v in value.items():
|
198
|
+
actual_args[k] = v
|
199
|
+
if not actual_args:
|
200
|
+
actual_args = None
|
201
|
+
setattr(db_wf_task, key, actual_args)
|
202
|
+
elif key == "args_non_parallel":
|
203
|
+
# Get default arguments via a Task property method
|
204
|
+
default_args = deepcopy(
|
205
|
+
db_wf_task.task.default_args_non_parallel_from_args_schema
|
206
|
+
)
|
207
|
+
# Override default_args with args value items
|
208
|
+
actual_args = default_args.copy()
|
209
|
+
if value is not None:
|
210
|
+
for k, v in value.items():
|
211
|
+
actual_args[k] = v
|
212
|
+
if not actual_args:
|
213
|
+
actual_args = None
|
214
|
+
setattr(db_wf_task, key, actual_args)
|
215
|
+
elif key == "meta_parallel":
|
216
|
+
current_meta_parallel = deepcopy(db_wf_task.meta_parallel) or {}
|
217
|
+
current_meta_parallel.update(value)
|
218
|
+
setattr(db_wf_task, key, current_meta_parallel)
|
219
|
+
elif key == "meta_non_parallel":
|
220
|
+
current_meta_non_parallel = (
|
221
|
+
deepcopy(db_wf_task.meta_non_parallel) or {}
|
222
|
+
)
|
223
|
+
current_meta_non_parallel.update(value)
|
224
|
+
setattr(db_wf_task, key, current_meta_non_parallel)
|
225
|
+
# FIXME handle `input_filters`
|
226
|
+
else:
|
227
|
+
raise HTTPException(
|
228
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
229
|
+
detail=f"patch_workflow_task endpoint cannot set {key=}",
|
230
|
+
)
|
231
|
+
|
232
|
+
await db.commit()
|
233
|
+
await db.refresh(db_wf_task)
|
234
|
+
await db.close()
|
235
|
+
|
236
|
+
return db_wf_task
|
237
|
+
|
238
|
+
|
239
|
+
@router.delete(
|
240
|
+
"/project/{project_id}/workflow/{workflow_id}/wftask/{workflow_task_id}/",
|
241
|
+
status_code=status.HTTP_204_NO_CONTENT,
|
242
|
+
)
|
243
|
+
async def delete_workflowtask(
|
244
|
+
project_id: int,
|
245
|
+
workflow_id: int,
|
246
|
+
workflow_task_id: int,
|
247
|
+
user: User = Depends(current_active_user),
|
248
|
+
db: AsyncSession = Depends(get_async_db),
|
249
|
+
) -> Response:
|
250
|
+
"""
|
251
|
+
Delete a WorkflowTask of a Workflow
|
252
|
+
"""
|
253
|
+
|
254
|
+
db_workflow_task, db_workflow = await _get_workflow_task_check_owner(
|
255
|
+
project_id=project_id,
|
256
|
+
workflow_task_id=workflow_task_id,
|
257
|
+
workflow_id=workflow_id,
|
258
|
+
user_id=user.id,
|
259
|
+
db=db,
|
260
|
+
)
|
261
|
+
|
262
|
+
await db.delete(db_workflow_task)
|
263
|
+
await db.commit()
|
264
|
+
|
265
|
+
await db.refresh(db_workflow)
|
266
|
+
db_workflow.task_list.reorder()
|
267
|
+
await db.commit()
|
268
|
+
|
269
|
+
return Response(status_code=status.HTTP_204_NO_CONTENT)
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import asyncio
|
2
|
+
from functools import partial
|
3
|
+
from functools import wraps
|
4
|
+
from typing import Callable
|
5
|
+
|
6
|
+
|
7
|
+
def async_wrap(func: Callable) -> Callable:
|
8
|
+
"""
|
9
|
+
Wrap a synchronous callable in an async task
|
10
|
+
|
11
|
+
Ref: [issue #140](https://github.com/fractal-analytics-platform/fractal-server/issues/140)
|
12
|
+
and [this StackOverflow answer](https://stackoverflow.com/q/43241221/19085332).
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
async_wrapper:
|
16
|
+
A factory that allows wrapping a blocking callable within a
|
17
|
+
coroutine.
|
18
|
+
""" # noqa: E501
|
19
|
+
|
20
|
+
@wraps(func)
|
21
|
+
async def async_wrapper(*args, loop=None, executor=None, **kwargs):
|
22
|
+
if loop is None:
|
23
|
+
loop = asyncio.get_event_loop()
|
24
|
+
pfunc = partial(func, *args, **kwargs)
|
25
|
+
return await loop.run_in_executor(executor, pfunc)
|
26
|
+
|
27
|
+
return async_wrapper
|
@@ -0,0 +1,129 @@
|
|
1
|
+
import os
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
|
5
|
+
class TaskExecutionError(RuntimeError):
|
6
|
+
"""
|
7
|
+
Forwards errors occurred during the execution of a task
|
8
|
+
|
9
|
+
This error wraps and forwards errors occurred during the execution of
|
10
|
+
tasks, when the exit code is larger than 0 (i.e. the error took place
|
11
|
+
within the task). This error also adds information that is useful to track
|
12
|
+
down and debug the failing task within a workflow.
|
13
|
+
|
14
|
+
Attributes:
|
15
|
+
workflow_task_id:
|
16
|
+
ID of the workflow task that failed.
|
17
|
+
workflow_task_order:
|
18
|
+
Order of the task within the workflow.
|
19
|
+
task_name:
|
20
|
+
Human readable name of the failing task.
|
21
|
+
"""
|
22
|
+
|
23
|
+
workflow_task_id: Optional[int] = None
|
24
|
+
workflow_task_order: Optional[int] = None
|
25
|
+
task_name: Optional[str] = None
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
*args,
|
30
|
+
workflow_task_id: Optional[int] = None,
|
31
|
+
workflow_task_order: Optional[int] = None,
|
32
|
+
task_name: Optional[str] = None,
|
33
|
+
):
|
34
|
+
super().__init__(*args)
|
35
|
+
self.workflow_task_id = workflow_task_id
|
36
|
+
self.workflow_task_order = workflow_task_order
|
37
|
+
self.task_name = task_name
|
38
|
+
|
39
|
+
|
40
|
+
class JobExecutionError(RuntimeError):
|
41
|
+
"""
|
42
|
+
Forwards errors in the execution of a task that are due to external factors
|
43
|
+
|
44
|
+
This error wraps and forwards errors occurred during the execution of
|
45
|
+
tasks, but related to external factors like:
|
46
|
+
|
47
|
+
1. A negative exit code (e.g. because the task received a TERM or KILL
|
48
|
+
signal);
|
49
|
+
2. An error on the executor side (e.g. the SLURM executor could not
|
50
|
+
find the pickled file with task output).
|
51
|
+
|
52
|
+
This error also adds information that is useful to track down and debug the
|
53
|
+
failing task within a workflow.
|
54
|
+
|
55
|
+
Attributes:
|
56
|
+
info:
|
57
|
+
A free field for additional information
|
58
|
+
cmd_file:
|
59
|
+
Path to the file of the command that was executed (e.g. a SLURM
|
60
|
+
submission script).
|
61
|
+
stdout_file:
|
62
|
+
Path to the file with the command stdout
|
63
|
+
stderr_file:
|
64
|
+
Path to the file with the command stderr
|
65
|
+
"""
|
66
|
+
|
67
|
+
cmd_file: Optional[str] = None
|
68
|
+
stdout_file: Optional[str] = None
|
69
|
+
stderr_file: Optional[str] = None
|
70
|
+
info: Optional[str] = None
|
71
|
+
|
72
|
+
def __init__(
|
73
|
+
self,
|
74
|
+
*args,
|
75
|
+
cmd_file: Optional[str] = None,
|
76
|
+
stdout_file: Optional[str] = None,
|
77
|
+
stderr_file: Optional[str] = None,
|
78
|
+
info: Optional[str] = None,
|
79
|
+
):
|
80
|
+
super().__init__(*args)
|
81
|
+
self.cmd_file = cmd_file
|
82
|
+
self.stdout_file = stdout_file
|
83
|
+
self.stderr_file = stderr_file
|
84
|
+
self.info = info
|
85
|
+
|
86
|
+
def _read_file(self, filepath: str) -> str:
|
87
|
+
"""
|
88
|
+
Return the content of a text file, and handle the cases where it is
|
89
|
+
empty or missing
|
90
|
+
"""
|
91
|
+
if os.path.exists(filepath):
|
92
|
+
with open(filepath, "r") as f:
|
93
|
+
content = f.read()
|
94
|
+
if content:
|
95
|
+
return f"Content of {filepath}:\n{content}"
|
96
|
+
else:
|
97
|
+
return f"File {filepath} is empty\n"
|
98
|
+
else:
|
99
|
+
return f"File {filepath} is missing\n"
|
100
|
+
|
101
|
+
def assemble_error(self) -> str:
|
102
|
+
"""
|
103
|
+
Read the files that are specified in attributes, and combine them in an
|
104
|
+
error message.
|
105
|
+
"""
|
106
|
+
if self.cmd_file:
|
107
|
+
content = self._read_file(self.cmd_file)
|
108
|
+
cmd_content = f"COMMAND:\n{content}\n\n"
|
109
|
+
else:
|
110
|
+
cmd_content = ""
|
111
|
+
if self.stdout_file:
|
112
|
+
content = self._read_file(self.stdout_file)
|
113
|
+
out_content = f"STDOUT:\n{content}\n\n"
|
114
|
+
else:
|
115
|
+
out_content = ""
|
116
|
+
if self.stderr_file:
|
117
|
+
content = self._read_file(self.stderr_file)
|
118
|
+
err_content = f"STDERR:\n{content}\n\n"
|
119
|
+
else:
|
120
|
+
err_content = ""
|
121
|
+
|
122
|
+
content = f"{cmd_content}{out_content}{err_content}"
|
123
|
+
if self.info:
|
124
|
+
content = f"{content}ADDITIONAL INFO:\n{self.info}\n\n"
|
125
|
+
|
126
|
+
if not content:
|
127
|
+
content = str(self)
|
128
|
+
message = f"JobExecutionError\n\n{content}"
|
129
|
+
return message
|
@@ -18,8 +18,8 @@ from typing import Iterable
|
|
18
18
|
from typing import Optional
|
19
19
|
from typing import Sequence
|
20
20
|
|
21
|
-
from ._local_config import get_default_local_backend_config
|
22
|
-
from ._local_config import LocalBackendConfig
|
21
|
+
from ...v1._local._local_config import get_default_local_backend_config
|
22
|
+
from ...v1._local._local_config import LocalBackendConfig
|
23
23
|
|
24
24
|
|
25
25
|
class FractalThreadPoolExecutor(ThreadPoolExecutor):
|
@@ -22,10 +22,9 @@ from pydantic import Extra
|
|
22
22
|
from pydantic import Field
|
23
23
|
from pydantic.error_wrappers import ValidationError
|
24
24
|
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
28
|
-
from ...models import WorkflowTask
|
25
|
+
from .....config import get_settings
|
26
|
+
from .....logger import set_logger
|
27
|
+
from .....syringe import Inject
|
29
28
|
|
30
29
|
logger = set_logger(__name__)
|
31
30
|
|
@@ -459,151 +458,3 @@ def get_default_slurm_config():
|
|
459
458
|
target_num_jobs=2,
|
460
459
|
max_num_jobs=4,
|
461
460
|
)
|
462
|
-
|
463
|
-
|
464
|
-
def get_slurm_config(
|
465
|
-
wftask: WorkflowTask,
|
466
|
-
workflow_dir: Path,
|
467
|
-
workflow_dir_user: Path,
|
468
|
-
config_path: Optional[Path] = None,
|
469
|
-
) -> SlurmConfig:
|
470
|
-
"""
|
471
|
-
Prepare a `SlurmConfig` configuration object
|
472
|
-
|
473
|
-
The sources for `SlurmConfig` attributes, in increasing priority order, are
|
474
|
-
|
475
|
-
1. The general content of the Fractal SLURM configuration file.
|
476
|
-
2. The GPU-specific content of the Fractal SLURM configuration file, if
|
477
|
-
appropriate.
|
478
|
-
3. Properties in `wftask.meta` (which, for `WorkflowTask`s added through
|
479
|
-
`Workflow.insert_task`, also includes `wftask.task.meta`);
|
480
|
-
|
481
|
-
Note: `wftask.meta` may be `None`.
|
482
|
-
|
483
|
-
Arguments:
|
484
|
-
wftask:
|
485
|
-
WorkflowTask for which the SLURM configuration is is to be
|
486
|
-
prepared.
|
487
|
-
workflow_dir:
|
488
|
-
Server-owned directory to store all task-execution-related relevant
|
489
|
-
files (inputs, outputs, errors, and all meta files related to the
|
490
|
-
job execution). Note: users cannot write directly to this folder.
|
491
|
-
workflow_dir_user:
|
492
|
-
User-side directory with the same scope as `workflow_dir`, and
|
493
|
-
where a user can write.
|
494
|
-
config_path:
|
495
|
-
Path of aFractal SLURM configuration file; if `None`, use
|
496
|
-
`FRACTAL_SLURM_CONFIG_FILE` variable from settings.
|
497
|
-
|
498
|
-
Returns:
|
499
|
-
slurm_config:
|
500
|
-
The SlurmConfig object
|
501
|
-
"""
|
502
|
-
|
503
|
-
logger.debug(
|
504
|
-
"[get_slurm_config] WorkflowTask meta attribute: {wftask.meta=}"
|
505
|
-
)
|
506
|
-
|
507
|
-
# Incorporate slurm_env.default_slurm_config
|
508
|
-
slurm_env = load_slurm_config_file(config_path=config_path)
|
509
|
-
slurm_dict = slurm_env.default_slurm_config.dict(
|
510
|
-
exclude_unset=True, exclude={"mem"}
|
511
|
-
)
|
512
|
-
if slurm_env.default_slurm_config.mem:
|
513
|
-
slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
|
514
|
-
|
515
|
-
# Incorporate slurm_env.batching_config
|
516
|
-
for key, value in slurm_env.batching_config.dict().items():
|
517
|
-
slurm_dict[key] = value
|
518
|
-
|
519
|
-
# Incorporate slurm_env.user_local_exports
|
520
|
-
slurm_dict["user_local_exports"] = slurm_env.user_local_exports
|
521
|
-
|
522
|
-
logger.debug(
|
523
|
-
"[get_slurm_config] Fractal SLURM configuration file: "
|
524
|
-
f"{slurm_env.dict()=}"
|
525
|
-
)
|
526
|
-
|
527
|
-
# GPU-related options
|
528
|
-
# Notes about priority:
|
529
|
-
# 1. This block of definitions takes priority over other definitions from
|
530
|
-
# slurm_env which are not under the `needs_gpu` subgroup
|
531
|
-
# 2. This block of definitions has lower priority than whatever comes next
|
532
|
-
# (i.e. from WorkflowTask.meta).
|
533
|
-
if wftask.meta is not None:
|
534
|
-
needs_gpu = wftask.meta.get("needs_gpu", False)
|
535
|
-
else:
|
536
|
-
needs_gpu = False
|
537
|
-
logger.debug(f"[get_slurm_config] {needs_gpu=}")
|
538
|
-
if needs_gpu:
|
539
|
-
for key, value in slurm_env.gpu_slurm_config.dict(
|
540
|
-
exclude_unset=True, exclude={"mem"}
|
541
|
-
).items():
|
542
|
-
slurm_dict[key] = value
|
543
|
-
if slurm_env.gpu_slurm_config.mem:
|
544
|
-
slurm_dict["mem_per_task_MB"] = slurm_env.gpu_slurm_config.mem
|
545
|
-
|
546
|
-
# Number of CPUs per task, for multithreading
|
547
|
-
if wftask.meta is not None and "cpus_per_task" in wftask.meta:
|
548
|
-
cpus_per_task = int(wftask.meta["cpus_per_task"])
|
549
|
-
slurm_dict["cpus_per_task"] = cpus_per_task
|
550
|
-
|
551
|
-
# Required memory per task, in MB
|
552
|
-
if wftask.meta is not None and "mem" in wftask.meta:
|
553
|
-
raw_mem = wftask.meta["mem"]
|
554
|
-
mem_per_task_MB = _parse_mem_value(raw_mem)
|
555
|
-
slurm_dict["mem_per_task_MB"] = mem_per_task_MB
|
556
|
-
|
557
|
-
# Job name
|
558
|
-
job_name = wftask.task.name.replace(" ", "_")
|
559
|
-
slurm_dict["job_name"] = job_name
|
560
|
-
|
561
|
-
# Optional SLURM arguments and extra lines
|
562
|
-
if wftask.meta is not None:
|
563
|
-
account = wftask.meta.get("account", None)
|
564
|
-
if account is not None:
|
565
|
-
error_msg = (
|
566
|
-
f"Invalid {account=} property in WorkflowTask `meta` "
|
567
|
-
"attribute.\n"
|
568
|
-
"SLURM account must be set in the request body of the "
|
569
|
-
"apply-workflow endpoint, or by modifying the user properties."
|
570
|
-
)
|
571
|
-
logger.error(error_msg)
|
572
|
-
raise SlurmConfigError(error_msg)
|
573
|
-
for key in ["time", "gres", "constraint"]:
|
574
|
-
value = wftask.meta.get(key, None)
|
575
|
-
if value:
|
576
|
-
slurm_dict[key] = value
|
577
|
-
if wftask.meta is not None:
|
578
|
-
extra_lines = wftask.meta.get("extra_lines", [])
|
579
|
-
else:
|
580
|
-
extra_lines = []
|
581
|
-
extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
|
582
|
-
if len(set(extra_lines)) != len(extra_lines):
|
583
|
-
logger.debug(
|
584
|
-
"[get_slurm_config] Removing repeated elements "
|
585
|
-
f"from {extra_lines=}."
|
586
|
-
)
|
587
|
-
extra_lines = list(set(extra_lines))
|
588
|
-
slurm_dict["extra_lines"] = extra_lines
|
589
|
-
|
590
|
-
# Job-batching parameters (if None, they will be determined heuristically)
|
591
|
-
if wftask.meta is not None:
|
592
|
-
tasks_per_job = wftask.meta.get("tasks_per_job", None)
|
593
|
-
parallel_tasks_per_job = wftask.meta.get(
|
594
|
-
"parallel_tasks_per_job", None
|
595
|
-
)
|
596
|
-
else:
|
597
|
-
tasks_per_job = None
|
598
|
-
parallel_tasks_per_job = None
|
599
|
-
slurm_dict["tasks_per_job"] = tasks_per_job
|
600
|
-
slurm_dict["parallel_tasks_per_job"] = parallel_tasks_per_job
|
601
|
-
|
602
|
-
# Put everything together
|
603
|
-
logger.debug(
|
604
|
-
"[get_slurm_config] Now create a SlurmConfig object based "
|
605
|
-
f"on {slurm_dict=}"
|
606
|
-
)
|
607
|
-
slurm_config = SlurmConfig(**slurm_dict)
|
608
|
-
|
609
|
-
return slurm_config
|