fractal-server 1.4.10__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/models/__init__.py +6 -8
- fractal_server/app/models/linkuserproject.py +9 -0
- fractal_server/app/models/security.py +6 -0
- fractal_server/app/models/v1/__init__.py +12 -0
- fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
- fractal_server/app/models/{job.py → v1/job.py} +5 -5
- fractal_server/app/models/{project.py → v1/project.py} +5 -5
- fractal_server/app/models/{state.py → v1/state.py} +2 -2
- fractal_server/app/models/{task.py → v1/task.py} +7 -2
- fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
- fractal_server/app/models/v2/__init__.py +22 -0
- fractal_server/app/models/v2/collection_state.py +21 -0
- fractal_server/app/models/v2/dataset.py +54 -0
- fractal_server/app/models/v2/job.py +51 -0
- fractal_server/app/models/v2/project.py +30 -0
- fractal_server/app/models/v2/task.py +93 -0
- fractal_server/app/models/v2/workflow.py +35 -0
- fractal_server/app/models/v2/workflowtask.py +49 -0
- fractal_server/app/routes/admin/__init__.py +0 -0
- fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
- fractal_server/app/routes/admin/v2.py +309 -0
- fractal_server/app/routes/api/v1/__init__.py +7 -7
- fractal_server/app/routes/api/v1/_aux_functions.py +8 -8
- fractal_server/app/routes/api/v1/dataset.py +41 -41
- fractal_server/app/routes/api/v1/job.py +14 -14
- fractal_server/app/routes/api/v1/project.py +27 -25
- fractal_server/app/routes/api/v1/task.py +26 -16
- fractal_server/app/routes/api/v1/task_collection.py +28 -16
- fractal_server/app/routes/api/v1/workflow.py +28 -28
- fractal_server/app/routes/api/v1/workflowtask.py +11 -11
- fractal_server/app/routes/api/v2/__init__.py +34 -0
- fractal_server/app/routes/api/v2/_aux_functions.py +502 -0
- fractal_server/app/routes/api/v2/dataset.py +293 -0
- fractal_server/app/routes/api/v2/images.py +279 -0
- fractal_server/app/routes/api/v2/job.py +200 -0
- fractal_server/app/routes/api/v2/project.py +186 -0
- fractal_server/app/routes/api/v2/status.py +150 -0
- fractal_server/app/routes/api/v2/submit.py +210 -0
- fractal_server/app/routes/api/v2/task.py +222 -0
- fractal_server/app/routes/api/v2/task_collection.py +239 -0
- fractal_server/app/routes/api/v2/task_legacy.py +59 -0
- fractal_server/app/routes/api/v2/workflow.py +380 -0
- fractal_server/app/routes/api/v2/workflowtask.py +265 -0
- fractal_server/app/routes/aux/_job.py +2 -2
- fractal_server/app/runner/__init__.py +0 -364
- fractal_server/app/runner/async_wrap.py +27 -0
- fractal_server/app/runner/components.py +5 -0
- fractal_server/app/runner/exceptions.py +129 -0
- fractal_server/app/runner/executors/__init__.py +0 -0
- fractal_server/app/runner/executors/slurm/__init__.py +3 -0
- fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_check_jobs_status.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
- fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +32 -21
- fractal_server/app/runner/filenames.py +6 -0
- fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
- fractal_server/app/runner/task_files.py +103 -0
- fractal_server/app/runner/v1/__init__.py +366 -0
- fractal_server/app/runner/{_common.py → v1/_common.py} +14 -121
- fractal_server/app/runner/{_local → v1/_local}/__init__.py +5 -4
- fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
- fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
- fractal_server/app/runner/v1/_slurm/__init__.py +312 -0
- fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +5 -11
- fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
- fractal_server/app/runner/v1/common.py +117 -0
- fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
- fractal_server/app/runner/v2/__init__.py +336 -0
- fractal_server/app/runner/v2/_local/__init__.py +162 -0
- fractal_server/app/runner/v2/_local/_local_config.py +118 -0
- fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
- fractal_server/app/runner/v2/_local/executor.py +100 -0
- fractal_server/app/runner/{_slurm → v2/_slurm}/__init__.py +38 -47
- fractal_server/app/runner/v2/_slurm/_submit_setup.py +82 -0
- fractal_server/app/runner/v2/_slurm/get_slurm_config.py +182 -0
- fractal_server/app/runner/v2/deduplicate_list.py +23 -0
- fractal_server/app/runner/v2/handle_failed_job.py +165 -0
- fractal_server/app/runner/v2/merge_outputs.py +38 -0
- fractal_server/app/runner/v2/runner.py +343 -0
- fractal_server/app/runner/v2/runner_functions.py +374 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +130 -0
- fractal_server/app/runner/v2/task_interface.py +62 -0
- fractal_server/app/runner/v2/v1_compat.py +31 -0
- fractal_server/app/schemas/__init__.py +1 -42
- fractal_server/app/schemas/_validators.py +28 -5
- fractal_server/app/schemas/v1/__init__.py +36 -0
- fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
- fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
- fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
- fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
- fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
- fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
- fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
- fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
- fractal_server/app/schemas/v2/__init__.py +37 -0
- fractal_server/app/schemas/v2/dataset.py +126 -0
- fractal_server/app/schemas/v2/dumps.py +87 -0
- fractal_server/app/schemas/v2/job.py +114 -0
- fractal_server/app/schemas/v2/manifest.py +159 -0
- fractal_server/app/schemas/v2/project.py +34 -0
- fractal_server/app/schemas/v2/status.py +16 -0
- fractal_server/app/schemas/v2/task.py +151 -0
- fractal_server/app/schemas/v2/task_collection.py +109 -0
- fractal_server/app/schemas/v2/workflow.py +79 -0
- fractal_server/app/schemas/v2/workflowtask.py +208 -0
- fractal_server/config.py +5 -4
- fractal_server/images/__init__.py +4 -0
- fractal_server/images/models.py +136 -0
- fractal_server/images/tools.py +84 -0
- fractal_server/main.py +11 -3
- fractal_server/migrations/env.py +0 -2
- fractal_server/migrations/versions/5bf02391cfef_v2.py +245 -0
- fractal_server/tasks/__init__.py +0 -5
- fractal_server/tasks/endpoint_operations.py +13 -19
- fractal_server/tasks/utils.py +35 -0
- fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
- fractal_server/tasks/v1/__init__.py +0 -0
- fractal_server/tasks/{background_operations.py → v1/background_operations.py} +20 -52
- fractal_server/tasks/v1/get_collection_data.py +14 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
- fractal_server/tasks/v2/__init__.py +0 -0
- fractal_server/tasks/v2/background_operations.py +381 -0
- fractal_server/tasks/v2/get_collection_data.py +14 -0
- fractal_server/urls.py +13 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/METADATA +10 -10
- fractal_server-2.0.0.dist-info/RECORD +169 -0
- fractal_server/app/runner/_slurm/.gitignore +0 -2
- fractal_server/app/runner/common.py +0 -311
- fractal_server/app/schemas/json_schemas/manifest.json +0 -81
- fractal_server-1.4.10.dist-info/RECORD +0 -98
- /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
- /fractal_server/app/runner/{_local → v1/_local}/executor.py +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/LICENSE +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/WHEEL +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -3,8 +3,8 @@ from pathlib import Path
|
|
3
3
|
from zipfile import ZIP_DEFLATED
|
4
4
|
from zipfile import ZipFile
|
5
5
|
|
6
|
-
from ...models import ApplyWorkflow
|
7
|
-
from ...runner.
|
6
|
+
from ...models.v1 import ApplyWorkflow
|
7
|
+
from ...runner.filenames import SHUTDOWN_FILENAME
|
8
8
|
|
9
9
|
|
10
10
|
def _write_shutdown_file(*, job: ApplyWorkflow):
|
@@ -1,364 +0,0 @@
|
|
1
|
-
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
-
# University of Zurich
|
3
|
-
#
|
4
|
-
# Original authors:
|
5
|
-
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
6
|
-
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
7
|
-
# Marco Franzon <marco.franzon@exact-lab.it>
|
8
|
-
#
|
9
|
-
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
10
|
-
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
11
|
-
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
12
|
-
# Zurich.
|
13
|
-
"""
|
14
|
-
Runner backend subsystem root
|
15
|
-
|
16
|
-
This module is the single entry point to the runner backend subsystem. Other
|
17
|
-
subystems should only import this module and not its submodules or the
|
18
|
-
individual backends.
|
19
|
-
"""
|
20
|
-
import os
|
21
|
-
import traceback
|
22
|
-
from pathlib import Path
|
23
|
-
from typing import Optional
|
24
|
-
|
25
|
-
from ... import __VERSION__
|
26
|
-
from ...config import get_settings
|
27
|
-
from ...logger import set_logger
|
28
|
-
from ...syringe import Inject
|
29
|
-
from ...utils import get_timestamp
|
30
|
-
from ..db import DB
|
31
|
-
from ..models import ApplyWorkflow
|
32
|
-
from ..models import Dataset
|
33
|
-
from ..models import Workflow
|
34
|
-
from ..models import WorkflowTask
|
35
|
-
from ..schemas import JobStatusType
|
36
|
-
from ._common import WORKFLOW_LOG_FILENAME
|
37
|
-
from ._local import process_workflow as local_process_workflow
|
38
|
-
from ._slurm import process_workflow as slurm_process_workflow
|
39
|
-
from .common import close_job_logger
|
40
|
-
from .common import JobExecutionError
|
41
|
-
from .common import TaskExecutionError
|
42
|
-
from .common import validate_workflow_compatibility # noqa: F401
|
43
|
-
from .handle_failed_job import assemble_history_failed_job
|
44
|
-
from .handle_failed_job import assemble_meta_failed_job
|
45
|
-
|
46
|
-
|
47
|
-
_backends = {}
|
48
|
-
_backends["local"] = local_process_workflow
|
49
|
-
_backends["slurm"] = slurm_process_workflow
|
50
|
-
|
51
|
-
|
52
|
-
async def submit_workflow(
|
53
|
-
*,
|
54
|
-
workflow_id: int,
|
55
|
-
input_dataset_id: int,
|
56
|
-
output_dataset_id: int,
|
57
|
-
job_id: int,
|
58
|
-
worker_init: Optional[str] = None,
|
59
|
-
slurm_user: Optional[str] = None,
|
60
|
-
user_cache_dir: Optional[str] = None,
|
61
|
-
) -> None:
|
62
|
-
"""
|
63
|
-
Prepares a workflow and applies it to a dataset
|
64
|
-
|
65
|
-
This function wraps the process_workflow one, which is different for each
|
66
|
-
backend (e.g. local or slurm backend).
|
67
|
-
|
68
|
-
Args:
|
69
|
-
workflow_id:
|
70
|
-
ID of the workflow being applied
|
71
|
-
input_dataset_id:
|
72
|
-
Input dataset ID
|
73
|
-
output_dataset_id:
|
74
|
-
ID of the destination dataset of the workflow.
|
75
|
-
job_id:
|
76
|
-
Id of the job record which stores the state for the current
|
77
|
-
workflow application.
|
78
|
-
worker_init:
|
79
|
-
Custom executor parameters that get parsed before the execution of
|
80
|
-
each task.
|
81
|
-
user_cache_dir:
|
82
|
-
Cache directory (namely a path where the user can write); for the
|
83
|
-
slurm backend, this is used as a base directory for
|
84
|
-
`job.working_dir_user`.
|
85
|
-
slurm_user:
|
86
|
-
The username to impersonate for the workflow execution, for the
|
87
|
-
slurm backend.
|
88
|
-
"""
|
89
|
-
|
90
|
-
# Declare runner backend and set `process_workflow` function
|
91
|
-
settings = Inject(get_settings)
|
92
|
-
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
93
|
-
if FRACTAL_RUNNER_BACKEND == "local":
|
94
|
-
process_workflow = local_process_workflow
|
95
|
-
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
96
|
-
process_workflow = slurm_process_workflow
|
97
|
-
else:
|
98
|
-
raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
|
99
|
-
|
100
|
-
with next(DB.get_sync_db()) as db_sync:
|
101
|
-
|
102
|
-
job: ApplyWorkflow = db_sync.get(ApplyWorkflow, job_id)
|
103
|
-
if not job:
|
104
|
-
raise ValueError(f"Cannot fetch job {job_id} from database")
|
105
|
-
|
106
|
-
input_dataset: Dataset = db_sync.get(Dataset, input_dataset_id)
|
107
|
-
output_dataset: Dataset = db_sync.get(Dataset, output_dataset_id)
|
108
|
-
workflow: Workflow = db_sync.get(Workflow, workflow_id)
|
109
|
-
if not (input_dataset and output_dataset and workflow):
|
110
|
-
log_msg = ""
|
111
|
-
if not input_dataset:
|
112
|
-
log_msg += (
|
113
|
-
f"Cannot fetch input_dataset {input_dataset_id} "
|
114
|
-
"from database\n"
|
115
|
-
)
|
116
|
-
if not output_dataset:
|
117
|
-
log_msg += (
|
118
|
-
f"Cannot fetch output_dataset {output_dataset_id} "
|
119
|
-
"from database\n"
|
120
|
-
)
|
121
|
-
if not workflow:
|
122
|
-
log_msg += (
|
123
|
-
f"Cannot fetch workflow {workflow_id} from database\n"
|
124
|
-
)
|
125
|
-
job.status = JobStatusType.FAILED
|
126
|
-
job.end_timestamp = get_timestamp()
|
127
|
-
job.log = log_msg
|
128
|
-
db_sync.merge(job)
|
129
|
-
db_sync.commit()
|
130
|
-
db_sync.close()
|
131
|
-
return
|
132
|
-
|
133
|
-
# Prepare some of process_workflow arguments
|
134
|
-
input_paths = input_dataset.paths
|
135
|
-
output_path = output_dataset.paths[0]
|
136
|
-
|
137
|
-
# Define and create server-side working folder
|
138
|
-
project_id = workflow.project_id
|
139
|
-
timestamp_string = get_timestamp().strftime("%Y%m%d_%H%M%S")
|
140
|
-
WORKFLOW_DIR = (
|
141
|
-
settings.FRACTAL_RUNNER_WORKING_BASE_DIR
|
142
|
-
/ (
|
143
|
-
f"proj_{project_id:07d}_wf_{workflow_id:07d}_job_{job_id:07d}"
|
144
|
-
f"_{timestamp_string}"
|
145
|
-
)
|
146
|
-
).resolve()
|
147
|
-
|
148
|
-
if WORKFLOW_DIR.exists():
|
149
|
-
raise RuntimeError(f"Workflow dir {WORKFLOW_DIR} already exists.")
|
150
|
-
|
151
|
-
# Create WORKFLOW_DIR with 755 permissions
|
152
|
-
original_umask = os.umask(0)
|
153
|
-
WORKFLOW_DIR.mkdir(parents=True, mode=0o755)
|
154
|
-
os.umask(original_umask)
|
155
|
-
|
156
|
-
# Define and create user-side working folder, if needed
|
157
|
-
if FRACTAL_RUNNER_BACKEND == "local":
|
158
|
-
WORKFLOW_DIR_USER = WORKFLOW_DIR
|
159
|
-
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
160
|
-
|
161
|
-
from ._slurm._subprocess_run_as_user import _mkdir_as_user
|
162
|
-
|
163
|
-
WORKFLOW_DIR_USER = (
|
164
|
-
Path(user_cache_dir) / f"{WORKFLOW_DIR.name}"
|
165
|
-
).resolve()
|
166
|
-
_mkdir_as_user(folder=str(WORKFLOW_DIR_USER), user=slurm_user)
|
167
|
-
else:
|
168
|
-
raise ValueError(f"{FRACTAL_RUNNER_BACKEND=} not supported")
|
169
|
-
|
170
|
-
# Update db
|
171
|
-
job.working_dir = WORKFLOW_DIR.as_posix()
|
172
|
-
job.working_dir_user = WORKFLOW_DIR_USER.as_posix()
|
173
|
-
db_sync.merge(job)
|
174
|
-
db_sync.commit()
|
175
|
-
|
176
|
-
# After Session.commit() is called, either explicitly or when using a
|
177
|
-
# context manager, all objects associated with the Session are expired.
|
178
|
-
# https://docs.sqlalchemy.org/en/14/orm/
|
179
|
-
# session_basics.html#opening-and-closing-a-session
|
180
|
-
# https://docs.sqlalchemy.org/en/14/orm/
|
181
|
-
# session_state_management.html#refreshing-expiring
|
182
|
-
|
183
|
-
# See issue #928:
|
184
|
-
# https://github.com/fractal-analytics-platform/
|
185
|
-
# fractal-server/issues/928
|
186
|
-
|
187
|
-
db_sync.refresh(input_dataset)
|
188
|
-
db_sync.refresh(output_dataset)
|
189
|
-
db_sync.refresh(workflow)
|
190
|
-
|
191
|
-
# Write logs
|
192
|
-
logger_name = f"WF{workflow_id}_job{job_id}"
|
193
|
-
log_file_path = WORKFLOW_DIR / WORKFLOW_LOG_FILENAME
|
194
|
-
logger = set_logger(
|
195
|
-
logger_name=logger_name,
|
196
|
-
log_file_path=log_file_path,
|
197
|
-
)
|
198
|
-
logger.info(
|
199
|
-
f'Start execution of workflow "{workflow.name}"; '
|
200
|
-
f"more logs at {str(log_file_path)}"
|
201
|
-
)
|
202
|
-
logger.debug(f"fractal_server.__VERSION__: {__VERSION__}")
|
203
|
-
logger.debug(f"FRACTAL_RUNNER_BACKEND: {FRACTAL_RUNNER_BACKEND}")
|
204
|
-
logger.debug(f"slurm_user: {slurm_user}")
|
205
|
-
logger.debug(f"slurm_account: {job.slurm_account}")
|
206
|
-
logger.debug(f"worker_init: {worker_init}")
|
207
|
-
logger.debug(f"input metadata keys: {list(input_dataset.meta.keys())}")
|
208
|
-
logger.debug(f"input_paths: {input_paths}")
|
209
|
-
logger.debug(f"output_path: {output_path}")
|
210
|
-
logger.debug(f"job.id: {job.id}")
|
211
|
-
logger.debug(f"job.working_dir: {job.working_dir}")
|
212
|
-
logger.debug(f"job.working_dir_user: {job.working_dir_user}")
|
213
|
-
logger.debug(f"job.first_task_index: {job.first_task_index}")
|
214
|
-
logger.debug(f"job.last_task_index: {job.last_task_index}")
|
215
|
-
logger.debug(f'START workflow "{workflow.name}"')
|
216
|
-
|
217
|
-
try:
|
218
|
-
# "The Session.close() method does not prevent the Session from being
|
219
|
-
# used again. The Session itself does not actually have a distinct
|
220
|
-
# “closed” state; it merely means the Session will release all database
|
221
|
-
# connections and ORM objects."
|
222
|
-
# (https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.Session.close).
|
223
|
-
#
|
224
|
-
# We close the session before the (possibly long) process_workflow
|
225
|
-
# call, to make sure all DB connections are released. The reason why we
|
226
|
-
# are not using a context manager within the try block is that we also
|
227
|
-
# need access to db_sync in the except branches.
|
228
|
-
db_sync = next(DB.get_sync_db())
|
229
|
-
db_sync.close()
|
230
|
-
|
231
|
-
output_dataset_meta_hist = await process_workflow(
|
232
|
-
workflow=workflow,
|
233
|
-
input_paths=input_paths,
|
234
|
-
output_path=output_path,
|
235
|
-
input_metadata=input_dataset.meta,
|
236
|
-
input_history=input_dataset.history,
|
237
|
-
slurm_user=slurm_user,
|
238
|
-
slurm_account=job.slurm_account,
|
239
|
-
user_cache_dir=user_cache_dir,
|
240
|
-
workflow_dir=WORKFLOW_DIR,
|
241
|
-
workflow_dir_user=WORKFLOW_DIR_USER,
|
242
|
-
logger_name=logger_name,
|
243
|
-
worker_init=worker_init,
|
244
|
-
first_task_index=job.first_task_index,
|
245
|
-
last_task_index=job.last_task_index,
|
246
|
-
)
|
247
|
-
|
248
|
-
logger.info(
|
249
|
-
f'End execution of workflow "{workflow.name}"; '
|
250
|
-
f"more logs at {str(log_file_path)}"
|
251
|
-
)
|
252
|
-
logger.debug(f'END workflow "{workflow.name}"')
|
253
|
-
|
254
|
-
# Replace output_dataset.meta and output_dataset.history with their
|
255
|
-
# up-to-date versions, obtained within process_workflow
|
256
|
-
output_dataset.history = output_dataset_meta_hist.pop("history")
|
257
|
-
output_dataset.meta = output_dataset_meta_hist.pop("metadata")
|
258
|
-
|
259
|
-
db_sync.merge(output_dataset)
|
260
|
-
|
261
|
-
# Update job DB entry
|
262
|
-
job.status = JobStatusType.DONE
|
263
|
-
job.end_timestamp = get_timestamp()
|
264
|
-
with log_file_path.open("r") as f:
|
265
|
-
logs = f.read()
|
266
|
-
job.log = logs
|
267
|
-
db_sync.merge(job)
|
268
|
-
close_job_logger(logger)
|
269
|
-
db_sync.commit()
|
270
|
-
|
271
|
-
except TaskExecutionError as e:
|
272
|
-
|
273
|
-
logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
|
274
|
-
logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
|
275
|
-
|
276
|
-
# Assemble output_dataset.meta based on the last successful task, i.e.
|
277
|
-
# based on METADATA_FILENAME
|
278
|
-
output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
|
279
|
-
|
280
|
-
# Assemble new history and assign it to output_dataset.meta
|
281
|
-
failed_wftask = db_sync.get(WorkflowTask, e.workflow_task_id)
|
282
|
-
output_dataset.history = assemble_history_failed_job(
|
283
|
-
job,
|
284
|
-
output_dataset,
|
285
|
-
workflow,
|
286
|
-
logger,
|
287
|
-
failed_wftask=failed_wftask,
|
288
|
-
)
|
289
|
-
|
290
|
-
db_sync.merge(output_dataset)
|
291
|
-
|
292
|
-
job.status = JobStatusType.FAILED
|
293
|
-
job.end_timestamp = get_timestamp()
|
294
|
-
|
295
|
-
exception_args_string = "\n".join(e.args)
|
296
|
-
job.log = (
|
297
|
-
f"TASK ERROR: "
|
298
|
-
f"Task name: {e.task_name}, "
|
299
|
-
f"position in Workflow: {e.workflow_task_order}\n"
|
300
|
-
f"TRACEBACK:\n{exception_args_string}"
|
301
|
-
)
|
302
|
-
db_sync.merge(job)
|
303
|
-
close_job_logger(logger)
|
304
|
-
db_sync.commit()
|
305
|
-
|
306
|
-
except JobExecutionError as e:
|
307
|
-
|
308
|
-
logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
|
309
|
-
logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
|
310
|
-
|
311
|
-
# Assemble output_dataset.meta based on the last successful task, i.e.
|
312
|
-
# based on METADATA_FILENAME
|
313
|
-
output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
|
314
|
-
|
315
|
-
# Assemble new history and assign it to output_dataset.meta
|
316
|
-
output_dataset.history = assemble_history_failed_job(
|
317
|
-
job,
|
318
|
-
output_dataset,
|
319
|
-
workflow,
|
320
|
-
logger,
|
321
|
-
)
|
322
|
-
|
323
|
-
db_sync.merge(output_dataset)
|
324
|
-
|
325
|
-
job.status = JobStatusType.FAILED
|
326
|
-
job.end_timestamp = get_timestamp()
|
327
|
-
error = e.assemble_error()
|
328
|
-
job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
|
329
|
-
db_sync.merge(job)
|
330
|
-
close_job_logger(logger)
|
331
|
-
db_sync.commit()
|
332
|
-
|
333
|
-
except Exception:
|
334
|
-
|
335
|
-
logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
|
336
|
-
logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
|
337
|
-
|
338
|
-
current_traceback = traceback.format_exc()
|
339
|
-
|
340
|
-
# Assemble output_dataset.meta based on the last successful task, i.e.
|
341
|
-
# based on METADATA_FILENAME
|
342
|
-
output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
|
343
|
-
|
344
|
-
# Assemble new history and assign it to output_dataset.meta
|
345
|
-
output_dataset.history = assemble_history_failed_job(
|
346
|
-
job,
|
347
|
-
output_dataset,
|
348
|
-
workflow,
|
349
|
-
logger,
|
350
|
-
)
|
351
|
-
|
352
|
-
db_sync.merge(output_dataset)
|
353
|
-
|
354
|
-
job.status = JobStatusType.FAILED
|
355
|
-
job.end_timestamp = get_timestamp()
|
356
|
-
job.log = (
|
357
|
-
f"UNKNOWN ERROR in Fractal job {job.id}\n"
|
358
|
-
f"TRACEBACK:\n{current_traceback}"
|
359
|
-
)
|
360
|
-
db_sync.merge(job)
|
361
|
-
close_job_logger(logger)
|
362
|
-
db_sync.commit()
|
363
|
-
finally:
|
364
|
-
db_sync.close()
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import asyncio
|
2
|
+
from functools import partial
|
3
|
+
from functools import wraps
|
4
|
+
from typing import Callable
|
5
|
+
|
6
|
+
|
7
|
+
def async_wrap(func: Callable) -> Callable:
|
8
|
+
"""
|
9
|
+
Wrap a synchronous callable in an async task
|
10
|
+
|
11
|
+
Ref: [issue #140](https://github.com/fractal-analytics-platform/fractal-server/issues/140)
|
12
|
+
and [this StackOverflow answer](https://stackoverflow.com/q/43241221/19085332).
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
async_wrapper:
|
16
|
+
A factory that allows wrapping a blocking callable within a
|
17
|
+
coroutine.
|
18
|
+
""" # noqa: E501
|
19
|
+
|
20
|
+
@wraps(func)
|
21
|
+
async def async_wrapper(*args, loop=None, executor=None, **kwargs):
|
22
|
+
if loop is None:
|
23
|
+
loop = asyncio.get_event_loop()
|
24
|
+
pfunc = partial(func, *args, **kwargs)
|
25
|
+
return await loop.run_in_executor(executor, pfunc)
|
26
|
+
|
27
|
+
return async_wrapper
|
@@ -0,0 +1,129 @@
|
|
1
|
+
import os
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
|
5
|
+
class TaskExecutionError(RuntimeError):
|
6
|
+
"""
|
7
|
+
Forwards errors occurred during the execution of a task
|
8
|
+
|
9
|
+
This error wraps and forwards errors occurred during the execution of
|
10
|
+
tasks, when the exit code is larger than 0 (i.e. the error took place
|
11
|
+
within the task). This error also adds information that is useful to track
|
12
|
+
down and debug the failing task within a workflow.
|
13
|
+
|
14
|
+
Attributes:
|
15
|
+
workflow_task_id:
|
16
|
+
ID of the workflow task that failed.
|
17
|
+
workflow_task_order:
|
18
|
+
Order of the task within the workflow.
|
19
|
+
task_name:
|
20
|
+
Human readable name of the failing task.
|
21
|
+
"""
|
22
|
+
|
23
|
+
workflow_task_id: Optional[int] = None
|
24
|
+
workflow_task_order: Optional[int] = None
|
25
|
+
task_name: Optional[str] = None
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
*args,
|
30
|
+
workflow_task_id: Optional[int] = None,
|
31
|
+
workflow_task_order: Optional[int] = None,
|
32
|
+
task_name: Optional[str] = None,
|
33
|
+
):
|
34
|
+
super().__init__(*args)
|
35
|
+
self.workflow_task_id = workflow_task_id
|
36
|
+
self.workflow_task_order = workflow_task_order
|
37
|
+
self.task_name = task_name
|
38
|
+
|
39
|
+
|
40
|
+
class JobExecutionError(RuntimeError):
|
41
|
+
"""
|
42
|
+
Forwards errors in the execution of a task that are due to external factors
|
43
|
+
|
44
|
+
This error wraps and forwards errors occurred during the execution of
|
45
|
+
tasks, but related to external factors like:
|
46
|
+
|
47
|
+
1. A negative exit code (e.g. because the task received a TERM or KILL
|
48
|
+
signal);
|
49
|
+
2. An error on the executor side (e.g. the SLURM executor could not
|
50
|
+
find the pickled file with task output).
|
51
|
+
|
52
|
+
This error also adds information that is useful to track down and debug the
|
53
|
+
failing task within a workflow.
|
54
|
+
|
55
|
+
Attributes:
|
56
|
+
info:
|
57
|
+
A free field for additional information
|
58
|
+
cmd_file:
|
59
|
+
Path to the file of the command that was executed (e.g. a SLURM
|
60
|
+
submission script).
|
61
|
+
stdout_file:
|
62
|
+
Path to the file with the command stdout
|
63
|
+
stderr_file:
|
64
|
+
Path to the file with the command stderr
|
65
|
+
"""
|
66
|
+
|
67
|
+
cmd_file: Optional[str] = None
|
68
|
+
stdout_file: Optional[str] = None
|
69
|
+
stderr_file: Optional[str] = None
|
70
|
+
info: Optional[str] = None
|
71
|
+
|
72
|
+
def __init__(
|
73
|
+
self,
|
74
|
+
*args,
|
75
|
+
cmd_file: Optional[str] = None,
|
76
|
+
stdout_file: Optional[str] = None,
|
77
|
+
stderr_file: Optional[str] = None,
|
78
|
+
info: Optional[str] = None,
|
79
|
+
):
|
80
|
+
super().__init__(*args)
|
81
|
+
self.cmd_file = cmd_file
|
82
|
+
self.stdout_file = stdout_file
|
83
|
+
self.stderr_file = stderr_file
|
84
|
+
self.info = info
|
85
|
+
|
86
|
+
def _read_file(self, filepath: str) -> str:
|
87
|
+
"""
|
88
|
+
Return the content of a text file, and handle the cases where it is
|
89
|
+
empty or missing
|
90
|
+
"""
|
91
|
+
if os.path.exists(filepath):
|
92
|
+
with open(filepath, "r") as f:
|
93
|
+
content = f.read()
|
94
|
+
if content:
|
95
|
+
return f"Content of {filepath}:\n{content}"
|
96
|
+
else:
|
97
|
+
return f"File {filepath} is empty\n"
|
98
|
+
else:
|
99
|
+
return f"File {filepath} is missing\n"
|
100
|
+
|
101
|
+
def assemble_error(self) -> str:
|
102
|
+
"""
|
103
|
+
Read the files that are specified in attributes, and combine them in an
|
104
|
+
error message.
|
105
|
+
"""
|
106
|
+
if self.cmd_file:
|
107
|
+
content = self._read_file(self.cmd_file)
|
108
|
+
cmd_content = f"COMMAND:\n{content}\n\n"
|
109
|
+
else:
|
110
|
+
cmd_content = ""
|
111
|
+
if self.stdout_file:
|
112
|
+
content = self._read_file(self.stdout_file)
|
113
|
+
out_content = f"STDOUT:\n{content}\n\n"
|
114
|
+
else:
|
115
|
+
out_content = ""
|
116
|
+
if self.stderr_file:
|
117
|
+
content = self._read_file(self.stderr_file)
|
118
|
+
err_content = f"STDERR:\n{content}\n\n"
|
119
|
+
else:
|
120
|
+
err_content = ""
|
121
|
+
|
122
|
+
content = f"{cmd_content}{out_content}{err_content}"
|
123
|
+
if self.info:
|
124
|
+
content = f"{content}ADDITIONAL INFO:\n{self.info}\n\n"
|
125
|
+
|
126
|
+
if not content:
|
127
|
+
content = str(self)
|
128
|
+
message = f"JobExecutionError\n\n{content}"
|
129
|
+
return message
|
File without changes
|