fractal-server 1.4.10__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/models/__init__.py +6 -8
- fractal_server/app/models/linkuserproject.py +9 -0
- fractal_server/app/models/security.py +6 -0
- fractal_server/app/models/v1/__init__.py +12 -0
- fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
- fractal_server/app/models/{job.py → v1/job.py} +5 -5
- fractal_server/app/models/{project.py → v1/project.py} +5 -5
- fractal_server/app/models/{state.py → v1/state.py} +2 -2
- fractal_server/app/models/{task.py → v1/task.py} +7 -2
- fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
- fractal_server/app/models/v2/__init__.py +22 -0
- fractal_server/app/models/v2/collection_state.py +21 -0
- fractal_server/app/models/v2/dataset.py +54 -0
- fractal_server/app/models/v2/job.py +51 -0
- fractal_server/app/models/v2/project.py +30 -0
- fractal_server/app/models/v2/task.py +93 -0
- fractal_server/app/models/v2/workflow.py +35 -0
- fractal_server/app/models/v2/workflowtask.py +49 -0
- fractal_server/app/routes/admin/__init__.py +0 -0
- fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
- fractal_server/app/routes/admin/v2.py +309 -0
- fractal_server/app/routes/api/v1/__init__.py +7 -7
- fractal_server/app/routes/api/v1/_aux_functions.py +8 -8
- fractal_server/app/routes/api/v1/dataset.py +41 -41
- fractal_server/app/routes/api/v1/job.py +14 -14
- fractal_server/app/routes/api/v1/project.py +27 -25
- fractal_server/app/routes/api/v1/task.py +26 -16
- fractal_server/app/routes/api/v1/task_collection.py +28 -16
- fractal_server/app/routes/api/v1/workflow.py +28 -28
- fractal_server/app/routes/api/v1/workflowtask.py +11 -11
- fractal_server/app/routes/api/v2/__init__.py +34 -0
- fractal_server/app/routes/api/v2/_aux_functions.py +502 -0
- fractal_server/app/routes/api/v2/dataset.py +293 -0
- fractal_server/app/routes/api/v2/images.py +279 -0
- fractal_server/app/routes/api/v2/job.py +200 -0
- fractal_server/app/routes/api/v2/project.py +186 -0
- fractal_server/app/routes/api/v2/status.py +150 -0
- fractal_server/app/routes/api/v2/submit.py +210 -0
- fractal_server/app/routes/api/v2/task.py +222 -0
- fractal_server/app/routes/api/v2/task_collection.py +239 -0
- fractal_server/app/routes/api/v2/task_legacy.py +59 -0
- fractal_server/app/routes/api/v2/workflow.py +380 -0
- fractal_server/app/routes/api/v2/workflowtask.py +265 -0
- fractal_server/app/routes/aux/_job.py +2 -2
- fractal_server/app/runner/__init__.py +0 -364
- fractal_server/app/runner/async_wrap.py +27 -0
- fractal_server/app/runner/components.py +5 -0
- fractal_server/app/runner/exceptions.py +129 -0
- fractal_server/app/runner/executors/__init__.py +0 -0
- fractal_server/app/runner/executors/slurm/__init__.py +3 -0
- fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_check_jobs_status.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
- fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +32 -21
- fractal_server/app/runner/filenames.py +6 -0
- fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
- fractal_server/app/runner/task_files.py +103 -0
- fractal_server/app/runner/v1/__init__.py +366 -0
- fractal_server/app/runner/{_common.py → v1/_common.py} +14 -121
- fractal_server/app/runner/{_local → v1/_local}/__init__.py +5 -4
- fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
- fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
- fractal_server/app/runner/v1/_slurm/__init__.py +312 -0
- fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +5 -11
- fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
- fractal_server/app/runner/v1/common.py +117 -0
- fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
- fractal_server/app/runner/v2/__init__.py +336 -0
- fractal_server/app/runner/v2/_local/__init__.py +162 -0
- fractal_server/app/runner/v2/_local/_local_config.py +118 -0
- fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
- fractal_server/app/runner/v2/_local/executor.py +100 -0
- fractal_server/app/runner/{_slurm → v2/_slurm}/__init__.py +38 -47
- fractal_server/app/runner/v2/_slurm/_submit_setup.py +82 -0
- fractal_server/app/runner/v2/_slurm/get_slurm_config.py +182 -0
- fractal_server/app/runner/v2/deduplicate_list.py +23 -0
- fractal_server/app/runner/v2/handle_failed_job.py +165 -0
- fractal_server/app/runner/v2/merge_outputs.py +38 -0
- fractal_server/app/runner/v2/runner.py +343 -0
- fractal_server/app/runner/v2/runner_functions.py +374 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +130 -0
- fractal_server/app/runner/v2/task_interface.py +62 -0
- fractal_server/app/runner/v2/v1_compat.py +31 -0
- fractal_server/app/schemas/__init__.py +1 -42
- fractal_server/app/schemas/_validators.py +28 -5
- fractal_server/app/schemas/v1/__init__.py +36 -0
- fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
- fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
- fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
- fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
- fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
- fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
- fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
- fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
- fractal_server/app/schemas/v2/__init__.py +37 -0
- fractal_server/app/schemas/v2/dataset.py +126 -0
- fractal_server/app/schemas/v2/dumps.py +87 -0
- fractal_server/app/schemas/v2/job.py +114 -0
- fractal_server/app/schemas/v2/manifest.py +159 -0
- fractal_server/app/schemas/v2/project.py +34 -0
- fractal_server/app/schemas/v2/status.py +16 -0
- fractal_server/app/schemas/v2/task.py +151 -0
- fractal_server/app/schemas/v2/task_collection.py +109 -0
- fractal_server/app/schemas/v2/workflow.py +79 -0
- fractal_server/app/schemas/v2/workflowtask.py +208 -0
- fractal_server/config.py +5 -4
- fractal_server/images/__init__.py +4 -0
- fractal_server/images/models.py +136 -0
- fractal_server/images/tools.py +84 -0
- fractal_server/main.py +11 -3
- fractal_server/migrations/env.py +0 -2
- fractal_server/migrations/versions/5bf02391cfef_v2.py +245 -0
- fractal_server/tasks/__init__.py +0 -5
- fractal_server/tasks/endpoint_operations.py +13 -19
- fractal_server/tasks/utils.py +35 -0
- fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
- fractal_server/tasks/v1/__init__.py +0 -0
- fractal_server/tasks/{background_operations.py → v1/background_operations.py} +20 -52
- fractal_server/tasks/v1/get_collection_data.py +14 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
- fractal_server/tasks/v2/__init__.py +0 -0
- fractal_server/tasks/v2/background_operations.py +381 -0
- fractal_server/tasks/v2/get_collection_data.py +14 -0
- fractal_server/urls.py +13 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/METADATA +10 -10
- fractal_server-2.0.0.dist-info/RECORD +169 -0
- fractal_server/app/runner/_slurm/.gitignore +0 -2
- fractal_server/app/runner/common.py +0 -311
- fractal_server/app/schemas/json_schemas/manifest.json +0 -81
- fractal_server-1.4.10.dist-info/RECORD +0 -98
- /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
- /fractal_server/app/runner/{_local → v1/_local}/executor.py +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/LICENSE +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/WHEEL +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,366 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
6
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
7
|
+
# Marco Franzon <marco.franzon@exact-lab.it>
|
8
|
+
#
|
9
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
10
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
11
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
12
|
+
# Zurich.
|
13
|
+
"""
|
14
|
+
Runner backend subsystem root
|
15
|
+
|
16
|
+
This module is the single entry point to the runner backend subsystem. Other
|
17
|
+
subystems should only import this module and not its submodules or the
|
18
|
+
individual backends.
|
19
|
+
"""
|
20
|
+
import os
|
21
|
+
import traceback
|
22
|
+
from pathlib import Path
|
23
|
+
from typing import Optional
|
24
|
+
|
25
|
+
from ....logger import set_logger
|
26
|
+
from ....syringe import Inject
|
27
|
+
from ....utils import get_timestamp
|
28
|
+
from ...db import DB
|
29
|
+
from ...models.v1 import ApplyWorkflow
|
30
|
+
from ...models.v1 import Dataset
|
31
|
+
from ...models.v1 import Workflow
|
32
|
+
from ...models.v1 import WorkflowTask
|
33
|
+
from ...schemas.v1 import JobStatusTypeV1
|
34
|
+
from ..exceptions import JobExecutionError
|
35
|
+
from ..exceptions import TaskExecutionError
|
36
|
+
from ..filenames import WORKFLOW_LOG_FILENAME
|
37
|
+
from ._local import process_workflow as local_process_workflow
|
38
|
+
from ._slurm import process_workflow as slurm_process_workflow
|
39
|
+
from .common import close_job_logger
|
40
|
+
from .common import validate_workflow_compatibility # noqa: F401
|
41
|
+
from .handle_failed_job import assemble_history_failed_job
|
42
|
+
from .handle_failed_job import assemble_meta_failed_job
|
43
|
+
from fractal_server import __VERSION__
|
44
|
+
from fractal_server.config import get_settings
|
45
|
+
|
46
|
+
|
47
|
+
_backends = {}
|
48
|
+
_backends["local"] = local_process_workflow
|
49
|
+
_backends["slurm"] = slurm_process_workflow
|
50
|
+
|
51
|
+
|
52
|
+
async def submit_workflow(
|
53
|
+
*,
|
54
|
+
workflow_id: int,
|
55
|
+
input_dataset_id: int,
|
56
|
+
output_dataset_id: int,
|
57
|
+
job_id: int,
|
58
|
+
worker_init: Optional[str] = None,
|
59
|
+
slurm_user: Optional[str] = None,
|
60
|
+
user_cache_dir: Optional[str] = None,
|
61
|
+
) -> None:
|
62
|
+
"""
|
63
|
+
Prepares a workflow and applies it to a dataset
|
64
|
+
|
65
|
+
This function wraps the process_workflow one, which is different for each
|
66
|
+
backend (e.g. local or slurm backend).
|
67
|
+
|
68
|
+
Args:
|
69
|
+
workflow_id:
|
70
|
+
ID of the workflow being applied
|
71
|
+
input_dataset_id:
|
72
|
+
Input dataset ID
|
73
|
+
output_dataset_id:
|
74
|
+
ID of the destination dataset of the workflow.
|
75
|
+
job_id:
|
76
|
+
Id of the job record which stores the state for the current
|
77
|
+
workflow application.
|
78
|
+
worker_init:
|
79
|
+
Custom executor parameters that get parsed before the execution of
|
80
|
+
each task.
|
81
|
+
user_cache_dir:
|
82
|
+
Cache directory (namely a path where the user can write); for the
|
83
|
+
slurm backend, this is used as a base directory for
|
84
|
+
`job.working_dir_user`.
|
85
|
+
slurm_user:
|
86
|
+
The username to impersonate for the workflow execution, for the
|
87
|
+
slurm backend.
|
88
|
+
"""
|
89
|
+
|
90
|
+
# Declare runner backend and set `process_workflow` function
|
91
|
+
settings = Inject(get_settings)
|
92
|
+
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
93
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
94
|
+
process_workflow = local_process_workflow
|
95
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
96
|
+
process_workflow = slurm_process_workflow
|
97
|
+
else:
|
98
|
+
raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
|
99
|
+
|
100
|
+
with next(DB.get_sync_db()) as db_sync:
|
101
|
+
|
102
|
+
job: ApplyWorkflow = db_sync.get(ApplyWorkflow, job_id)
|
103
|
+
if not job:
|
104
|
+
raise ValueError(f"Cannot fetch job {job_id} from database")
|
105
|
+
|
106
|
+
input_dataset: Dataset = db_sync.get(Dataset, input_dataset_id)
|
107
|
+
output_dataset: Dataset = db_sync.get(Dataset, output_dataset_id)
|
108
|
+
workflow: Workflow = db_sync.get(Workflow, workflow_id)
|
109
|
+
if not (input_dataset and output_dataset and workflow):
|
110
|
+
log_msg = ""
|
111
|
+
if not input_dataset:
|
112
|
+
log_msg += (
|
113
|
+
f"Cannot fetch input_dataset {input_dataset_id} "
|
114
|
+
"from database\n"
|
115
|
+
)
|
116
|
+
if not output_dataset:
|
117
|
+
log_msg += (
|
118
|
+
f"Cannot fetch output_dataset {output_dataset_id} "
|
119
|
+
"from database\n"
|
120
|
+
)
|
121
|
+
if not workflow:
|
122
|
+
log_msg += (
|
123
|
+
f"Cannot fetch workflow {workflow_id} from database\n"
|
124
|
+
)
|
125
|
+
job.status = JobStatusTypeV1.FAILED
|
126
|
+
job.end_timestamp = get_timestamp()
|
127
|
+
job.log = log_msg
|
128
|
+
db_sync.merge(job)
|
129
|
+
db_sync.commit()
|
130
|
+
db_sync.close()
|
131
|
+
return
|
132
|
+
|
133
|
+
# Prepare some of process_workflow arguments
|
134
|
+
input_paths = input_dataset.paths
|
135
|
+
output_path = output_dataset.paths[0]
|
136
|
+
|
137
|
+
# Define and create server-side working folder
|
138
|
+
project_id = workflow.project_id
|
139
|
+
timestamp_string = get_timestamp().strftime("%Y%m%d_%H%M%S")
|
140
|
+
WORKFLOW_DIR = (
|
141
|
+
settings.FRACTAL_RUNNER_WORKING_BASE_DIR
|
142
|
+
/ (
|
143
|
+
f"proj_{project_id:07d}_wf_{workflow_id:07d}_job_{job_id:07d}"
|
144
|
+
f"_{timestamp_string}"
|
145
|
+
)
|
146
|
+
).resolve()
|
147
|
+
|
148
|
+
if WORKFLOW_DIR.exists():
|
149
|
+
raise RuntimeError(f"Workflow dir {WORKFLOW_DIR} already exists.")
|
150
|
+
|
151
|
+
# Create WORKFLOW_DIR with 755 permissions
|
152
|
+
original_umask = os.umask(0)
|
153
|
+
WORKFLOW_DIR.mkdir(parents=True, mode=0o755)
|
154
|
+
os.umask(original_umask)
|
155
|
+
|
156
|
+
# Define and create user-side working folder, if needed
|
157
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
158
|
+
WORKFLOW_DIR_USER = WORKFLOW_DIR
|
159
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
160
|
+
|
161
|
+
from ..executors.slurm._subprocess_run_as_user import (
|
162
|
+
_mkdir_as_user,
|
163
|
+
)
|
164
|
+
|
165
|
+
WORKFLOW_DIR_USER = (
|
166
|
+
Path(user_cache_dir) / f"{WORKFLOW_DIR.name}"
|
167
|
+
).resolve()
|
168
|
+
_mkdir_as_user(folder=str(WORKFLOW_DIR_USER), user=slurm_user)
|
169
|
+
else:
|
170
|
+
raise ValueError(f"{FRACTAL_RUNNER_BACKEND=} not supported")
|
171
|
+
|
172
|
+
# Update db
|
173
|
+
job.working_dir = WORKFLOW_DIR.as_posix()
|
174
|
+
job.working_dir_user = WORKFLOW_DIR_USER.as_posix()
|
175
|
+
db_sync.merge(job)
|
176
|
+
db_sync.commit()
|
177
|
+
|
178
|
+
# After Session.commit() is called, either explicitly or when using a
|
179
|
+
# context manager, all objects associated with the Session are expired.
|
180
|
+
# https://docs.sqlalchemy.org/en/14/orm/
|
181
|
+
# session_basics.html#opening-and-closing-a-session
|
182
|
+
# https://docs.sqlalchemy.org/en/14/orm/
|
183
|
+
# session_state_management.html#refreshing-expiring
|
184
|
+
|
185
|
+
# See issue #928:
|
186
|
+
# https://github.com/fractal-analytics-platform/
|
187
|
+
# fractal-server/issues/928
|
188
|
+
|
189
|
+
db_sync.refresh(input_dataset)
|
190
|
+
db_sync.refresh(output_dataset)
|
191
|
+
db_sync.refresh(workflow)
|
192
|
+
|
193
|
+
# Write logs
|
194
|
+
logger_name = f"WF{workflow_id}_job{job_id}"
|
195
|
+
log_file_path = WORKFLOW_DIR / WORKFLOW_LOG_FILENAME
|
196
|
+
logger = set_logger(
|
197
|
+
logger_name=logger_name,
|
198
|
+
log_file_path=log_file_path,
|
199
|
+
)
|
200
|
+
logger.info(
|
201
|
+
f'Start execution of workflow "{workflow.name}"; '
|
202
|
+
f"more logs at {str(log_file_path)}"
|
203
|
+
)
|
204
|
+
logger.debug(f"fractal_server.__VERSION__: {__VERSION__}")
|
205
|
+
logger.debug(f"FRACTAL_RUNNER_BACKEND: {FRACTAL_RUNNER_BACKEND}")
|
206
|
+
logger.debug(f"slurm_user: {slurm_user}")
|
207
|
+
logger.debug(f"slurm_account: {job.slurm_account}")
|
208
|
+
logger.debug(f"worker_init: {worker_init}")
|
209
|
+
logger.debug(f"input metadata keys: {list(input_dataset.meta.keys())}")
|
210
|
+
logger.debug(f"input_paths: {input_paths}")
|
211
|
+
logger.debug(f"output_path: {output_path}")
|
212
|
+
logger.debug(f"job.id: {job.id}")
|
213
|
+
logger.debug(f"job.working_dir: {job.working_dir}")
|
214
|
+
logger.debug(f"job.working_dir_user: {job.working_dir_user}")
|
215
|
+
logger.debug(f"job.first_task_index: {job.first_task_index}")
|
216
|
+
logger.debug(f"job.last_task_index: {job.last_task_index}")
|
217
|
+
logger.debug(f'START workflow "{workflow.name}"')
|
218
|
+
|
219
|
+
try:
|
220
|
+
# "The Session.close() method does not prevent the Session from being
|
221
|
+
# used again. The Session itself does not actually have a distinct
|
222
|
+
# “closed” state; it merely means the Session will release all database
|
223
|
+
# connections and ORM objects."
|
224
|
+
# (https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.Session.close).
|
225
|
+
#
|
226
|
+
# We close the session before the (possibly long) process_workflow
|
227
|
+
# call, to make sure all DB connections are released. The reason why we
|
228
|
+
# are not using a context manager within the try block is that we also
|
229
|
+
# need access to db_sync in the except branches.
|
230
|
+
db_sync = next(DB.get_sync_db())
|
231
|
+
db_sync.close()
|
232
|
+
|
233
|
+
output_dataset_meta_hist = await process_workflow(
|
234
|
+
workflow=workflow,
|
235
|
+
input_paths=input_paths,
|
236
|
+
output_path=output_path,
|
237
|
+
input_metadata=input_dataset.meta,
|
238
|
+
input_history=input_dataset.history,
|
239
|
+
slurm_user=slurm_user,
|
240
|
+
slurm_account=job.slurm_account,
|
241
|
+
user_cache_dir=user_cache_dir,
|
242
|
+
workflow_dir=WORKFLOW_DIR,
|
243
|
+
workflow_dir_user=WORKFLOW_DIR_USER,
|
244
|
+
logger_name=logger_name,
|
245
|
+
worker_init=worker_init,
|
246
|
+
first_task_index=job.first_task_index,
|
247
|
+
last_task_index=job.last_task_index,
|
248
|
+
)
|
249
|
+
|
250
|
+
logger.info(
|
251
|
+
f'End execution of workflow "{workflow.name}"; '
|
252
|
+
f"more logs at {str(log_file_path)}"
|
253
|
+
)
|
254
|
+
logger.debug(f'END workflow "{workflow.name}"')
|
255
|
+
|
256
|
+
# Replace output_dataset.meta and output_dataset.history with their
|
257
|
+
# up-to-date versions, obtained within process_workflow
|
258
|
+
output_dataset.history = output_dataset_meta_hist.pop("history")
|
259
|
+
output_dataset.meta = output_dataset_meta_hist.pop("metadata")
|
260
|
+
|
261
|
+
db_sync.merge(output_dataset)
|
262
|
+
|
263
|
+
# Update job DB entry
|
264
|
+
job.status = JobStatusTypeV1.DONE
|
265
|
+
job.end_timestamp = get_timestamp()
|
266
|
+
with log_file_path.open("r") as f:
|
267
|
+
logs = f.read()
|
268
|
+
job.log = logs
|
269
|
+
db_sync.merge(job)
|
270
|
+
close_job_logger(logger)
|
271
|
+
db_sync.commit()
|
272
|
+
|
273
|
+
except TaskExecutionError as e:
|
274
|
+
|
275
|
+
logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
|
276
|
+
logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
|
277
|
+
|
278
|
+
# Assemble output_dataset.meta based on the last successful task, i.e.
|
279
|
+
# based on METADATA_FILENAME
|
280
|
+
output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
|
281
|
+
|
282
|
+
# Assemble new history and assign it to output_dataset.meta
|
283
|
+
failed_wftask = db_sync.get(WorkflowTask, e.workflow_task_id)
|
284
|
+
output_dataset.history = assemble_history_failed_job(
|
285
|
+
job,
|
286
|
+
output_dataset,
|
287
|
+
workflow,
|
288
|
+
logger,
|
289
|
+
failed_wftask=failed_wftask,
|
290
|
+
)
|
291
|
+
|
292
|
+
db_sync.merge(output_dataset)
|
293
|
+
|
294
|
+
job.status = JobStatusTypeV1.FAILED
|
295
|
+
job.end_timestamp = get_timestamp()
|
296
|
+
|
297
|
+
exception_args_string = "\n".join(e.args)
|
298
|
+
job.log = (
|
299
|
+
f"TASK ERROR: "
|
300
|
+
f"Task name: {e.task_name}, "
|
301
|
+
f"position in Workflow: {e.workflow_task_order}\n"
|
302
|
+
f"TRACEBACK:\n{exception_args_string}"
|
303
|
+
)
|
304
|
+
db_sync.merge(job)
|
305
|
+
close_job_logger(logger)
|
306
|
+
db_sync.commit()
|
307
|
+
|
308
|
+
except JobExecutionError as e:
|
309
|
+
|
310
|
+
logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
|
311
|
+
logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
|
312
|
+
|
313
|
+
# Assemble output_dataset.meta based on the last successful task, i.e.
|
314
|
+
# based on METADATA_FILENAME
|
315
|
+
output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
|
316
|
+
|
317
|
+
# Assemble new history and assign it to output_dataset.meta
|
318
|
+
output_dataset.history = assemble_history_failed_job(
|
319
|
+
job,
|
320
|
+
output_dataset,
|
321
|
+
workflow,
|
322
|
+
logger,
|
323
|
+
)
|
324
|
+
|
325
|
+
db_sync.merge(output_dataset)
|
326
|
+
|
327
|
+
job.status = JobStatusTypeV1.FAILED
|
328
|
+
job.end_timestamp = get_timestamp()
|
329
|
+
error = e.assemble_error()
|
330
|
+
job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
|
331
|
+
db_sync.merge(job)
|
332
|
+
close_job_logger(logger)
|
333
|
+
db_sync.commit()
|
334
|
+
|
335
|
+
except Exception:
|
336
|
+
|
337
|
+
logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
|
338
|
+
logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
|
339
|
+
|
340
|
+
current_traceback = traceback.format_exc()
|
341
|
+
|
342
|
+
# Assemble output_dataset.meta based on the last successful task, i.e.
|
343
|
+
# based on METADATA_FILENAME
|
344
|
+
output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
|
345
|
+
|
346
|
+
# Assemble new history and assign it to output_dataset.meta
|
347
|
+
output_dataset.history = assemble_history_failed_job(
|
348
|
+
job,
|
349
|
+
output_dataset,
|
350
|
+
workflow,
|
351
|
+
logger,
|
352
|
+
)
|
353
|
+
|
354
|
+
db_sync.merge(output_dataset)
|
355
|
+
|
356
|
+
job.status = JobStatusTypeV1.FAILED
|
357
|
+
job.end_timestamp = get_timestamp()
|
358
|
+
job.log = (
|
359
|
+
f"UNKNOWN ERROR in Fractal job {job.id}\n"
|
360
|
+
f"TRACEBACK:\n{current_traceback}"
|
361
|
+
)
|
362
|
+
db_sync.merge(job)
|
363
|
+
close_job_logger(logger)
|
364
|
+
db_sync.commit()
|
365
|
+
finally:
|
366
|
+
db_sync.close()
|
@@ -11,7 +11,6 @@ import subprocess # nosec
|
|
11
11
|
import traceback
|
12
12
|
from concurrent.futures import Executor
|
13
13
|
from copy import deepcopy
|
14
|
-
from functools import lru_cache
|
15
14
|
from functools import partial
|
16
15
|
from pathlib import Path
|
17
16
|
from shlex import split as shlex_split
|
@@ -19,22 +18,19 @@ from typing import Any
|
|
19
18
|
from typing import Callable
|
20
19
|
from typing import Optional
|
21
20
|
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
25
|
-
from
|
26
|
-
from
|
27
|
-
from
|
28
|
-
from
|
29
|
-
from
|
21
|
+
from ....config import get_settings
|
22
|
+
from ....logger import get_logger
|
23
|
+
from ....syringe import Inject
|
24
|
+
from ...models.v1 import Task
|
25
|
+
from ...models.v1 import WorkflowTask
|
26
|
+
from ...schemas.v1 import WorkflowTaskStatusTypeV1
|
27
|
+
from ..exceptions import JobExecutionError
|
28
|
+
from ..exceptions import TaskExecutionError
|
30
29
|
from .common import TaskParameters
|
31
30
|
from .common import write_args_file
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
METADATA_FILENAME = "metadata.json"
|
36
|
-
SHUTDOWN_FILENAME = "shutdown"
|
37
|
-
WORKFLOW_LOG_FILENAME = "workflow.log"
|
31
|
+
from fractal_server.app.runner.filenames import HISTORY_FILENAME
|
32
|
+
from fractal_server.app.runner.filenames import METADATA_FILENAME
|
33
|
+
from fractal_server.app.runner.task_files import get_task_file_paths
|
38
34
|
|
39
35
|
|
40
36
|
def no_op_submit_setup_call(
|
@@ -42,7 +38,6 @@ def no_op_submit_setup_call(
|
|
42
38
|
wftask: WorkflowTask,
|
43
39
|
workflow_dir: Path,
|
44
40
|
workflow_dir_user: Path,
|
45
|
-
task_pars: TaskParameters,
|
46
41
|
) -> dict:
|
47
42
|
"""
|
48
43
|
Default (no-operation) interface of submit_setup_call.
|
@@ -50,14 +45,6 @@ def no_op_submit_setup_call(
|
|
50
45
|
return {}
|
51
46
|
|
52
47
|
|
53
|
-
def sanitize_component(value: str) -> str:
|
54
|
-
"""
|
55
|
-
Remove {" ", "/", "."} form a string, e.g. going from
|
56
|
-
'plate.zarr/B/03/0' to 'plate_zarr_B_03_0'.
|
57
|
-
"""
|
58
|
-
return value.replace(" ", "_").replace("/", "_").replace(".", "_")
|
59
|
-
|
60
|
-
|
61
48
|
def _task_needs_image_list(_task: Task) -> bool:
|
62
49
|
"""
|
63
50
|
Whether a task requires `metadata["image"]` in its `args.json` file.
|
@@ -78,98 +65,6 @@ def _task_needs_image_list(_task: Task) -> bool:
|
|
78
65
|
return False
|
79
66
|
|
80
67
|
|
81
|
-
class TaskFiles:
|
82
|
-
"""
|
83
|
-
Group all file paths pertaining to a task
|
84
|
-
|
85
|
-
Attributes:
|
86
|
-
workflow_dir:
|
87
|
-
Server-owned directory to store all task-execution-related relevant
|
88
|
-
files (inputs, outputs, errors, and all meta files related to the
|
89
|
-
job execution). Note: users cannot write directly to this folder.
|
90
|
-
workflow_dir_user:
|
91
|
-
User-side directory with the same scope as `workflow_dir`, and
|
92
|
-
where a user can write.
|
93
|
-
task_order:
|
94
|
-
Positional order of the task within a workflow.
|
95
|
-
component:
|
96
|
-
Specific component to run the task for (relevant for tasks that
|
97
|
-
will be executed in parallel over many components).
|
98
|
-
file_prefix:
|
99
|
-
Prefix for all task-related files.
|
100
|
-
args:
|
101
|
-
Path for input json file.
|
102
|
-
metadiff:
|
103
|
-
Path for output json file with metadata update.
|
104
|
-
out:
|
105
|
-
Path for task-execution stdout.
|
106
|
-
err:
|
107
|
-
Path for task-execution stderr.
|
108
|
-
"""
|
109
|
-
|
110
|
-
workflow_dir: Path
|
111
|
-
workflow_dir_user: Path
|
112
|
-
task_order: Optional[int] = None
|
113
|
-
component: Optional[str] = None
|
114
|
-
|
115
|
-
file_prefix: str
|
116
|
-
args: Path
|
117
|
-
out: Path
|
118
|
-
err: Path
|
119
|
-
metadiff: Path
|
120
|
-
|
121
|
-
def __init__(
|
122
|
-
self,
|
123
|
-
workflow_dir: Path,
|
124
|
-
workflow_dir_user: Path,
|
125
|
-
task_order: Optional[int] = None,
|
126
|
-
component: Optional[str] = None,
|
127
|
-
):
|
128
|
-
self.workflow_dir = workflow_dir
|
129
|
-
self.workflow_dir_user = workflow_dir_user
|
130
|
-
self.task_order = task_order
|
131
|
-
self.component = component
|
132
|
-
|
133
|
-
if self.component is not None:
|
134
|
-
component_safe = sanitize_component(str(self.component))
|
135
|
-
component_safe = f"_par_{component_safe}"
|
136
|
-
else:
|
137
|
-
component_safe = ""
|
138
|
-
|
139
|
-
if self.task_order is not None:
|
140
|
-
order = str(self.task_order)
|
141
|
-
else:
|
142
|
-
order = "task"
|
143
|
-
self.file_prefix = f"{order}{component_safe}"
|
144
|
-
self.args = self.workflow_dir_user / f"{self.file_prefix}.args.json"
|
145
|
-
self.out = self.workflow_dir_user / f"{self.file_prefix}.out"
|
146
|
-
self.err = self.workflow_dir_user / f"{self.file_prefix}.err"
|
147
|
-
self.metadiff = (
|
148
|
-
self.workflow_dir_user / f"{self.file_prefix}.metadiff.json"
|
149
|
-
)
|
150
|
-
|
151
|
-
|
152
|
-
@lru_cache()
|
153
|
-
def get_task_file_paths(
|
154
|
-
workflow_dir: Path,
|
155
|
-
workflow_dir_user: Path,
|
156
|
-
task_order: Optional[int] = None,
|
157
|
-
component: Optional[str] = None,
|
158
|
-
) -> TaskFiles:
|
159
|
-
"""
|
160
|
-
Return the corrisponding TaskFiles object
|
161
|
-
|
162
|
-
This function is mainly used as a cache to avoid instantiating needless
|
163
|
-
objects.
|
164
|
-
"""
|
165
|
-
return TaskFiles(
|
166
|
-
workflow_dir=workflow_dir,
|
167
|
-
workflow_dir_user=workflow_dir_user,
|
168
|
-
task_order=task_order,
|
169
|
-
component=component,
|
170
|
-
)
|
171
|
-
|
172
|
-
|
173
68
|
def _call_command_wrapper(cmd: str, stdout: Path, stderr: Path) -> None:
|
174
69
|
"""
|
175
70
|
Call a command and write its stdout and stderr to files
|
@@ -312,7 +207,7 @@ def call_single_task(
|
|
312
207
|
with task_files.metadiff.open("r") as f_metadiff:
|
313
208
|
diff_metadata = json.load(f_metadiff)
|
314
209
|
except FileNotFoundError as e:
|
315
|
-
logger.
|
210
|
+
logger.warning(
|
316
211
|
f"Skip collection of updated metadata. Original error: {str(e)}"
|
317
212
|
)
|
318
213
|
diff_metadata = {}
|
@@ -331,7 +226,7 @@ def call_single_task(
|
|
331
226
|
wftask_dump["task"] = wftask.task.model_dump()
|
332
227
|
new_history_item = dict(
|
333
228
|
workflowtask=wftask_dump,
|
334
|
-
status=
|
229
|
+
status=WorkflowTaskStatusTypeV1.DONE,
|
335
230
|
parallelization=None,
|
336
231
|
)
|
337
232
|
updated_history = task_pars.history.copy()
|
@@ -529,7 +424,6 @@ def call_parallel_task(
|
|
529
424
|
try:
|
530
425
|
extra_setup = submit_setup_call(
|
531
426
|
wftask=wftask,
|
532
|
-
task_pars=task_pars_depend,
|
533
427
|
workflow_dir=workflow_dir,
|
534
428
|
workflow_dir_user=workflow_dir_user,
|
535
429
|
)
|
@@ -592,7 +486,7 @@ def call_parallel_task(
|
|
592
486
|
wftask_dump["task"] = wftask.task.model_dump()
|
593
487
|
new_history_item = dict(
|
594
488
|
workflowtask=wftask_dump,
|
595
|
-
status=
|
489
|
+
status=WorkflowTaskStatusTypeV1.DONE,
|
596
490
|
parallelization=dict(
|
597
491
|
parallelization_level=wftask.parallelization_level,
|
598
492
|
component_list=component_list,
|
@@ -681,7 +575,6 @@ def execute_tasks(
|
|
681
575
|
try:
|
682
576
|
extra_setup = submit_setup_call(
|
683
577
|
wftask=this_wftask,
|
684
|
-
task_pars=current_task_pars,
|
685
578
|
workflow_dir=workflow_dir,
|
686
579
|
workflow_dir_user=workflow_dir_user,
|
687
580
|
)
|
@@ -23,10 +23,10 @@ from pathlib import Path
|
|
23
23
|
from typing import Any
|
24
24
|
from typing import Optional
|
25
25
|
|
26
|
-
from
|
26
|
+
from ....models.v1 import Workflow
|
27
|
+
from ...async_wrap import async_wrap
|
28
|
+
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
27
29
|
from .._common import execute_tasks
|
28
|
-
from ..common import async_wrap
|
29
|
-
from ..common import set_start_and_last_task_index
|
30
30
|
from ..common import TaskParameters
|
31
31
|
from ._submit_setup import _local_submit_setup
|
32
32
|
from .executor import FractalThreadPoolExecutor
|
@@ -49,7 +49,8 @@ def _process_workflow(
|
|
49
49
|
|
50
50
|
Schedules the workflow using a `FractalThreadPoolExecutor`.
|
51
51
|
|
52
|
-
Cf.
|
52
|
+
Cf.
|
53
|
+
[process_workflow][fractal_server.app.runner.v1._local.process_workflow]
|
53
54
|
for the call signature.
|
54
55
|
"""
|
55
56
|
|
@@ -19,9 +19,9 @@ from pydantic import BaseModel
|
|
19
19
|
from pydantic import Extra
|
20
20
|
from pydantic.error_wrappers import ValidationError
|
21
21
|
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
22
|
+
from .....config import get_settings
|
23
|
+
from .....syringe import Inject
|
24
|
+
from ....models.v1 import WorkflowTask
|
25
25
|
|
26
26
|
|
27
27
|
class LocalBackendConfigError(ValueError):
|
@@ -63,15 +63,14 @@ def get_local_backend_config(
|
|
63
63
|
The sources for `parallel_tasks_per_job` attributes, starting from the
|
64
64
|
highest-priority one, are
|
65
65
|
|
66
|
-
1. Properties in `wftask.meta
|
67
|
-
`Workflow.insert_task`, also includes `wftask.task.meta`);
|
66
|
+
1. Properties in `wftask.meta`;
|
68
67
|
2. The general content of the local-backend configuration file;
|
69
68
|
3. The default value (`None`).
|
70
69
|
|
71
70
|
Arguments:
|
72
71
|
wftask:
|
73
|
-
WorkflowTask for which the backend configuration
|
74
|
-
prepared.
|
72
|
+
WorkflowTask (V1) for which the backend configuration should
|
73
|
+
be prepared.
|
75
74
|
config_path:
|
76
75
|
Path of local-backend configuration file; if `None`, use
|
77
76
|
`FRACTAL_LOCAL_CONFIG_FILE` variable from settings.
|
@@ -14,8 +14,7 @@ Submodule to define _local_submit_setup
|
|
14
14
|
from pathlib import Path
|
15
15
|
from typing import Optional
|
16
16
|
|
17
|
-
from
|
18
|
-
from ..common import TaskParameters
|
17
|
+
from ....models.v1 import WorkflowTask
|
19
18
|
from ._local_config import get_local_backend_config
|
20
19
|
|
21
20
|
|
@@ -24,7 +23,6 @@ def _local_submit_setup(
|
|
24
23
|
wftask: WorkflowTask,
|
25
24
|
workflow_dir: Optional[Path] = None,
|
26
25
|
workflow_dir_user: Optional[Path] = None,
|
27
|
-
task_pars: Optional[TaskParameters] = None,
|
28
26
|
) -> dict[str, object]:
|
29
27
|
"""
|
30
28
|
Collect WorfklowTask-specific configuration parameters from different
|
@@ -33,8 +31,6 @@ def _local_submit_setup(
|
|
33
31
|
Arguments:
|
34
32
|
wftask:
|
35
33
|
WorkflowTask for which the configuration is to be assembled
|
36
|
-
task_pars:
|
37
|
-
Not used in this function.
|
38
34
|
workflow_dir:
|
39
35
|
Not used in this function.
|
40
36
|
workflow_dir_user:
|