fractal-server 1.4.6__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/db/__init__.py +0 -1
- fractal_server/app/models/__init__.py +6 -8
- fractal_server/app/models/linkuserproject.py +9 -0
- fractal_server/app/models/security.py +6 -0
- fractal_server/app/models/v1/__init__.py +12 -0
- fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
- fractal_server/app/models/{job.py → v1/job.py} +5 -5
- fractal_server/app/models/{project.py → v1/project.py} +5 -5
- fractal_server/app/models/{state.py → v1/state.py} +2 -2
- fractal_server/app/models/{task.py → v1/task.py} +7 -2
- fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
- fractal_server/app/models/v2/__init__.py +22 -0
- fractal_server/app/models/v2/collection_state.py +21 -0
- fractal_server/app/models/v2/dataset.py +54 -0
- fractal_server/app/models/v2/job.py +51 -0
- fractal_server/app/models/v2/project.py +30 -0
- fractal_server/app/models/v2/task.py +93 -0
- fractal_server/app/models/v2/workflow.py +35 -0
- fractal_server/app/models/v2/workflowtask.py +49 -0
- fractal_server/app/routes/admin/__init__.py +0 -0
- fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
- fractal_server/app/routes/admin/v2.py +309 -0
- fractal_server/app/routes/api/v1/__init__.py +7 -7
- fractal_server/app/routes/api/v1/_aux_functions.py +8 -8
- fractal_server/app/routes/api/v1/dataset.py +48 -41
- fractal_server/app/routes/api/v1/job.py +14 -14
- fractal_server/app/routes/api/v1/project.py +30 -27
- fractal_server/app/routes/api/v1/task.py +26 -16
- fractal_server/app/routes/api/v1/task_collection.py +28 -16
- fractal_server/app/routes/api/v1/workflow.py +28 -28
- fractal_server/app/routes/api/v1/workflowtask.py +11 -11
- fractal_server/app/routes/api/v2/__init__.py +34 -0
- fractal_server/app/routes/api/v2/_aux_functions.py +502 -0
- fractal_server/app/routes/api/v2/dataset.py +293 -0
- fractal_server/app/routes/api/v2/images.py +279 -0
- fractal_server/app/routes/api/v2/job.py +200 -0
- fractal_server/app/routes/api/v2/project.py +186 -0
- fractal_server/app/routes/api/v2/status.py +150 -0
- fractal_server/app/routes/api/v2/submit.py +210 -0
- fractal_server/app/routes/api/v2/task.py +222 -0
- fractal_server/app/routes/api/v2/task_collection.py +239 -0
- fractal_server/app/routes/api/v2/task_legacy.py +59 -0
- fractal_server/app/routes/api/v2/workflow.py +380 -0
- fractal_server/app/routes/api/v2/workflowtask.py +265 -0
- fractal_server/app/routes/aux/_job.py +2 -2
- fractal_server/app/runner/__init__.py +0 -379
- fractal_server/app/runner/async_wrap.py +27 -0
- fractal_server/app/runner/components.py +5 -0
- fractal_server/app/runner/exceptions.py +129 -0
- fractal_server/app/runner/executors/__init__.py +0 -0
- fractal_server/app/runner/executors/slurm/__init__.py +3 -0
- fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
- fractal_server/app/runner/executors/slurm/_check_jobs_status.py +72 -0
- fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +3 -4
- fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
- fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +42 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +46 -27
- fractal_server/app/runner/filenames.py +6 -0
- fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
- fractal_server/app/runner/task_files.py +103 -0
- fractal_server/app/runner/v1/__init__.py +366 -0
- fractal_server/app/runner/{_common.py → v1/_common.py} +56 -111
- fractal_server/app/runner/{_local → v1/_local}/__init__.py +5 -4
- fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
- fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
- fractal_server/app/runner/v1/_slurm/__init__.py +312 -0
- fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +5 -11
- fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
- fractal_server/app/runner/v1/common.py +117 -0
- fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
- fractal_server/app/runner/v2/__init__.py +336 -0
- fractal_server/app/runner/v2/_local/__init__.py +162 -0
- fractal_server/app/runner/v2/_local/_local_config.py +118 -0
- fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
- fractal_server/app/runner/v2/_local/executor.py +100 -0
- fractal_server/app/runner/{_slurm → v2/_slurm}/__init__.py +38 -47
- fractal_server/app/runner/v2/_slurm/_submit_setup.py +82 -0
- fractal_server/app/runner/v2/_slurm/get_slurm_config.py +182 -0
- fractal_server/app/runner/v2/deduplicate_list.py +23 -0
- fractal_server/app/runner/v2/handle_failed_job.py +165 -0
- fractal_server/app/runner/v2/merge_outputs.py +38 -0
- fractal_server/app/runner/v2/runner.py +343 -0
- fractal_server/app/runner/v2/runner_functions.py +374 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +130 -0
- fractal_server/app/runner/v2/task_interface.py +62 -0
- fractal_server/app/runner/v2/v1_compat.py +31 -0
- fractal_server/app/schemas/__init__.py +1 -42
- fractal_server/app/schemas/_validators.py +28 -5
- fractal_server/app/schemas/v1/__init__.py +36 -0
- fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
- fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
- fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
- fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
- fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
- fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
- fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
- fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
- fractal_server/app/schemas/v2/__init__.py +37 -0
- fractal_server/app/schemas/v2/dataset.py +126 -0
- fractal_server/app/schemas/v2/dumps.py +87 -0
- fractal_server/app/schemas/v2/job.py +114 -0
- fractal_server/app/schemas/v2/manifest.py +159 -0
- fractal_server/app/schemas/v2/project.py +34 -0
- fractal_server/app/schemas/v2/status.py +16 -0
- fractal_server/app/schemas/v2/task.py +151 -0
- fractal_server/app/schemas/v2/task_collection.py +109 -0
- fractal_server/app/schemas/v2/workflow.py +79 -0
- fractal_server/app/schemas/v2/workflowtask.py +208 -0
- fractal_server/config.py +13 -10
- fractal_server/images/__init__.py +4 -0
- fractal_server/images/models.py +136 -0
- fractal_server/images/tools.py +84 -0
- fractal_server/main.py +11 -3
- fractal_server/migrations/env.py +0 -2
- fractal_server/migrations/versions/5bf02391cfef_v2.py +245 -0
- fractal_server/tasks/__init__.py +0 -5
- fractal_server/tasks/endpoint_operations.py +13 -19
- fractal_server/tasks/utils.py +35 -0
- fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
- fractal_server/tasks/v1/__init__.py +0 -0
- fractal_server/tasks/{background_operations.py → v1/background_operations.py} +20 -52
- fractal_server/tasks/v1/get_collection_data.py +14 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
- fractal_server/tasks/v2/__init__.py +0 -0
- fractal_server/tasks/v2/background_operations.py +381 -0
- fractal_server/tasks/v2/get_collection_data.py +14 -0
- fractal_server/urls.py +13 -0
- {fractal_server-1.4.6.dist-info → fractal_server-2.0.0.dist-info}/METADATA +11 -12
- fractal_server-2.0.0.dist-info/RECORD +169 -0
- fractal_server/app/runner/_slurm/.gitignore +0 -2
- fractal_server/app/runner/common.py +0 -307
- fractal_server/app/schemas/json_schemas/manifest.json +0 -81
- fractal_server-1.4.6.dist-info/RECORD +0 -97
- /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
- /fractal_server/app/runner/{_local → v1/_local}/executor.py +0 -0
- {fractal_server-1.4.6.dist-info → fractal_server-2.0.0.dist-info}/LICENSE +0 -0
- {fractal_server-1.4.6.dist-info → fractal_server-2.0.0.dist-info}/WHEEL +0 -0
- {fractal_server-1.4.6.dist-info → fractal_server-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,366 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
6
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
7
|
+
# Marco Franzon <marco.franzon@exact-lab.it>
|
8
|
+
#
|
9
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
10
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
11
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
12
|
+
# Zurich.
|
13
|
+
"""
|
14
|
+
Runner backend subsystem root
|
15
|
+
|
16
|
+
This module is the single entry point to the runner backend subsystem. Other
|
17
|
+
subystems should only import this module and not its submodules or the
|
18
|
+
individual backends.
|
19
|
+
"""
|
20
|
+
import os
|
21
|
+
import traceback
|
22
|
+
from pathlib import Path
|
23
|
+
from typing import Optional
|
24
|
+
|
25
|
+
from ....logger import set_logger
|
26
|
+
from ....syringe import Inject
|
27
|
+
from ....utils import get_timestamp
|
28
|
+
from ...db import DB
|
29
|
+
from ...models.v1 import ApplyWorkflow
|
30
|
+
from ...models.v1 import Dataset
|
31
|
+
from ...models.v1 import Workflow
|
32
|
+
from ...models.v1 import WorkflowTask
|
33
|
+
from ...schemas.v1 import JobStatusTypeV1
|
34
|
+
from ..exceptions import JobExecutionError
|
35
|
+
from ..exceptions import TaskExecutionError
|
36
|
+
from ..filenames import WORKFLOW_LOG_FILENAME
|
37
|
+
from ._local import process_workflow as local_process_workflow
|
38
|
+
from ._slurm import process_workflow as slurm_process_workflow
|
39
|
+
from .common import close_job_logger
|
40
|
+
from .common import validate_workflow_compatibility # noqa: F401
|
41
|
+
from .handle_failed_job import assemble_history_failed_job
|
42
|
+
from .handle_failed_job import assemble_meta_failed_job
|
43
|
+
from fractal_server import __VERSION__
|
44
|
+
from fractal_server.config import get_settings
|
45
|
+
|
46
|
+
|
47
|
+
_backends = {}
|
48
|
+
_backends["local"] = local_process_workflow
|
49
|
+
_backends["slurm"] = slurm_process_workflow
|
50
|
+
|
51
|
+
|
52
|
+
async def submit_workflow(
|
53
|
+
*,
|
54
|
+
workflow_id: int,
|
55
|
+
input_dataset_id: int,
|
56
|
+
output_dataset_id: int,
|
57
|
+
job_id: int,
|
58
|
+
worker_init: Optional[str] = None,
|
59
|
+
slurm_user: Optional[str] = None,
|
60
|
+
user_cache_dir: Optional[str] = None,
|
61
|
+
) -> None:
|
62
|
+
"""
|
63
|
+
Prepares a workflow and applies it to a dataset
|
64
|
+
|
65
|
+
This function wraps the process_workflow one, which is different for each
|
66
|
+
backend (e.g. local or slurm backend).
|
67
|
+
|
68
|
+
Args:
|
69
|
+
workflow_id:
|
70
|
+
ID of the workflow being applied
|
71
|
+
input_dataset_id:
|
72
|
+
Input dataset ID
|
73
|
+
output_dataset_id:
|
74
|
+
ID of the destination dataset of the workflow.
|
75
|
+
job_id:
|
76
|
+
Id of the job record which stores the state for the current
|
77
|
+
workflow application.
|
78
|
+
worker_init:
|
79
|
+
Custom executor parameters that get parsed before the execution of
|
80
|
+
each task.
|
81
|
+
user_cache_dir:
|
82
|
+
Cache directory (namely a path where the user can write); for the
|
83
|
+
slurm backend, this is used as a base directory for
|
84
|
+
`job.working_dir_user`.
|
85
|
+
slurm_user:
|
86
|
+
The username to impersonate for the workflow execution, for the
|
87
|
+
slurm backend.
|
88
|
+
"""
|
89
|
+
|
90
|
+
# Declare runner backend and set `process_workflow` function
|
91
|
+
settings = Inject(get_settings)
|
92
|
+
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
93
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
94
|
+
process_workflow = local_process_workflow
|
95
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
96
|
+
process_workflow = slurm_process_workflow
|
97
|
+
else:
|
98
|
+
raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
|
99
|
+
|
100
|
+
with next(DB.get_sync_db()) as db_sync:
|
101
|
+
|
102
|
+
job: ApplyWorkflow = db_sync.get(ApplyWorkflow, job_id)
|
103
|
+
if not job:
|
104
|
+
raise ValueError(f"Cannot fetch job {job_id} from database")
|
105
|
+
|
106
|
+
input_dataset: Dataset = db_sync.get(Dataset, input_dataset_id)
|
107
|
+
output_dataset: Dataset = db_sync.get(Dataset, output_dataset_id)
|
108
|
+
workflow: Workflow = db_sync.get(Workflow, workflow_id)
|
109
|
+
if not (input_dataset and output_dataset and workflow):
|
110
|
+
log_msg = ""
|
111
|
+
if not input_dataset:
|
112
|
+
log_msg += (
|
113
|
+
f"Cannot fetch input_dataset {input_dataset_id} "
|
114
|
+
"from database\n"
|
115
|
+
)
|
116
|
+
if not output_dataset:
|
117
|
+
log_msg += (
|
118
|
+
f"Cannot fetch output_dataset {output_dataset_id} "
|
119
|
+
"from database\n"
|
120
|
+
)
|
121
|
+
if not workflow:
|
122
|
+
log_msg += (
|
123
|
+
f"Cannot fetch workflow {workflow_id} from database\n"
|
124
|
+
)
|
125
|
+
job.status = JobStatusTypeV1.FAILED
|
126
|
+
job.end_timestamp = get_timestamp()
|
127
|
+
job.log = log_msg
|
128
|
+
db_sync.merge(job)
|
129
|
+
db_sync.commit()
|
130
|
+
db_sync.close()
|
131
|
+
return
|
132
|
+
|
133
|
+
# Prepare some of process_workflow arguments
|
134
|
+
input_paths = input_dataset.paths
|
135
|
+
output_path = output_dataset.paths[0]
|
136
|
+
|
137
|
+
# Define and create server-side working folder
|
138
|
+
project_id = workflow.project_id
|
139
|
+
timestamp_string = get_timestamp().strftime("%Y%m%d_%H%M%S")
|
140
|
+
WORKFLOW_DIR = (
|
141
|
+
settings.FRACTAL_RUNNER_WORKING_BASE_DIR
|
142
|
+
/ (
|
143
|
+
f"proj_{project_id:07d}_wf_{workflow_id:07d}_job_{job_id:07d}"
|
144
|
+
f"_{timestamp_string}"
|
145
|
+
)
|
146
|
+
).resolve()
|
147
|
+
|
148
|
+
if WORKFLOW_DIR.exists():
|
149
|
+
raise RuntimeError(f"Workflow dir {WORKFLOW_DIR} already exists.")
|
150
|
+
|
151
|
+
# Create WORKFLOW_DIR with 755 permissions
|
152
|
+
original_umask = os.umask(0)
|
153
|
+
WORKFLOW_DIR.mkdir(parents=True, mode=0o755)
|
154
|
+
os.umask(original_umask)
|
155
|
+
|
156
|
+
# Define and create user-side working folder, if needed
|
157
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
158
|
+
WORKFLOW_DIR_USER = WORKFLOW_DIR
|
159
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
160
|
+
|
161
|
+
from ..executors.slurm._subprocess_run_as_user import (
|
162
|
+
_mkdir_as_user,
|
163
|
+
)
|
164
|
+
|
165
|
+
WORKFLOW_DIR_USER = (
|
166
|
+
Path(user_cache_dir) / f"{WORKFLOW_DIR.name}"
|
167
|
+
).resolve()
|
168
|
+
_mkdir_as_user(folder=str(WORKFLOW_DIR_USER), user=slurm_user)
|
169
|
+
else:
|
170
|
+
raise ValueError(f"{FRACTAL_RUNNER_BACKEND=} not supported")
|
171
|
+
|
172
|
+
# Update db
|
173
|
+
job.working_dir = WORKFLOW_DIR.as_posix()
|
174
|
+
job.working_dir_user = WORKFLOW_DIR_USER.as_posix()
|
175
|
+
db_sync.merge(job)
|
176
|
+
db_sync.commit()
|
177
|
+
|
178
|
+
# After Session.commit() is called, either explicitly or when using a
|
179
|
+
# context manager, all objects associated with the Session are expired.
|
180
|
+
# https://docs.sqlalchemy.org/en/14/orm/
|
181
|
+
# session_basics.html#opening-and-closing-a-session
|
182
|
+
# https://docs.sqlalchemy.org/en/14/orm/
|
183
|
+
# session_state_management.html#refreshing-expiring
|
184
|
+
|
185
|
+
# See issue #928:
|
186
|
+
# https://github.com/fractal-analytics-platform/
|
187
|
+
# fractal-server/issues/928
|
188
|
+
|
189
|
+
db_sync.refresh(input_dataset)
|
190
|
+
db_sync.refresh(output_dataset)
|
191
|
+
db_sync.refresh(workflow)
|
192
|
+
|
193
|
+
# Write logs
|
194
|
+
logger_name = f"WF{workflow_id}_job{job_id}"
|
195
|
+
log_file_path = WORKFLOW_DIR / WORKFLOW_LOG_FILENAME
|
196
|
+
logger = set_logger(
|
197
|
+
logger_name=logger_name,
|
198
|
+
log_file_path=log_file_path,
|
199
|
+
)
|
200
|
+
logger.info(
|
201
|
+
f'Start execution of workflow "{workflow.name}"; '
|
202
|
+
f"more logs at {str(log_file_path)}"
|
203
|
+
)
|
204
|
+
logger.debug(f"fractal_server.__VERSION__: {__VERSION__}")
|
205
|
+
logger.debug(f"FRACTAL_RUNNER_BACKEND: {FRACTAL_RUNNER_BACKEND}")
|
206
|
+
logger.debug(f"slurm_user: {slurm_user}")
|
207
|
+
logger.debug(f"slurm_account: {job.slurm_account}")
|
208
|
+
logger.debug(f"worker_init: {worker_init}")
|
209
|
+
logger.debug(f"input metadata keys: {list(input_dataset.meta.keys())}")
|
210
|
+
logger.debug(f"input_paths: {input_paths}")
|
211
|
+
logger.debug(f"output_path: {output_path}")
|
212
|
+
logger.debug(f"job.id: {job.id}")
|
213
|
+
logger.debug(f"job.working_dir: {job.working_dir}")
|
214
|
+
logger.debug(f"job.working_dir_user: {job.working_dir_user}")
|
215
|
+
logger.debug(f"job.first_task_index: {job.first_task_index}")
|
216
|
+
logger.debug(f"job.last_task_index: {job.last_task_index}")
|
217
|
+
logger.debug(f'START workflow "{workflow.name}"')
|
218
|
+
|
219
|
+
try:
|
220
|
+
# "The Session.close() method does not prevent the Session from being
|
221
|
+
# used again. The Session itself does not actually have a distinct
|
222
|
+
# “closed” state; it merely means the Session will release all database
|
223
|
+
# connections and ORM objects."
|
224
|
+
# (https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.Session.close).
|
225
|
+
#
|
226
|
+
# We close the session before the (possibly long) process_workflow
|
227
|
+
# call, to make sure all DB connections are released. The reason why we
|
228
|
+
# are not using a context manager within the try block is that we also
|
229
|
+
# need access to db_sync in the except branches.
|
230
|
+
db_sync = next(DB.get_sync_db())
|
231
|
+
db_sync.close()
|
232
|
+
|
233
|
+
output_dataset_meta_hist = await process_workflow(
|
234
|
+
workflow=workflow,
|
235
|
+
input_paths=input_paths,
|
236
|
+
output_path=output_path,
|
237
|
+
input_metadata=input_dataset.meta,
|
238
|
+
input_history=input_dataset.history,
|
239
|
+
slurm_user=slurm_user,
|
240
|
+
slurm_account=job.slurm_account,
|
241
|
+
user_cache_dir=user_cache_dir,
|
242
|
+
workflow_dir=WORKFLOW_DIR,
|
243
|
+
workflow_dir_user=WORKFLOW_DIR_USER,
|
244
|
+
logger_name=logger_name,
|
245
|
+
worker_init=worker_init,
|
246
|
+
first_task_index=job.first_task_index,
|
247
|
+
last_task_index=job.last_task_index,
|
248
|
+
)
|
249
|
+
|
250
|
+
logger.info(
|
251
|
+
f'End execution of workflow "{workflow.name}"; '
|
252
|
+
f"more logs at {str(log_file_path)}"
|
253
|
+
)
|
254
|
+
logger.debug(f'END workflow "{workflow.name}"')
|
255
|
+
|
256
|
+
# Replace output_dataset.meta and output_dataset.history with their
|
257
|
+
# up-to-date versions, obtained within process_workflow
|
258
|
+
output_dataset.history = output_dataset_meta_hist.pop("history")
|
259
|
+
output_dataset.meta = output_dataset_meta_hist.pop("metadata")
|
260
|
+
|
261
|
+
db_sync.merge(output_dataset)
|
262
|
+
|
263
|
+
# Update job DB entry
|
264
|
+
job.status = JobStatusTypeV1.DONE
|
265
|
+
job.end_timestamp = get_timestamp()
|
266
|
+
with log_file_path.open("r") as f:
|
267
|
+
logs = f.read()
|
268
|
+
job.log = logs
|
269
|
+
db_sync.merge(job)
|
270
|
+
close_job_logger(logger)
|
271
|
+
db_sync.commit()
|
272
|
+
|
273
|
+
except TaskExecutionError as e:
|
274
|
+
|
275
|
+
logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
|
276
|
+
logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
|
277
|
+
|
278
|
+
# Assemble output_dataset.meta based on the last successful task, i.e.
|
279
|
+
# based on METADATA_FILENAME
|
280
|
+
output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
|
281
|
+
|
282
|
+
# Assemble new history and assign it to output_dataset.meta
|
283
|
+
failed_wftask = db_sync.get(WorkflowTask, e.workflow_task_id)
|
284
|
+
output_dataset.history = assemble_history_failed_job(
|
285
|
+
job,
|
286
|
+
output_dataset,
|
287
|
+
workflow,
|
288
|
+
logger,
|
289
|
+
failed_wftask=failed_wftask,
|
290
|
+
)
|
291
|
+
|
292
|
+
db_sync.merge(output_dataset)
|
293
|
+
|
294
|
+
job.status = JobStatusTypeV1.FAILED
|
295
|
+
job.end_timestamp = get_timestamp()
|
296
|
+
|
297
|
+
exception_args_string = "\n".join(e.args)
|
298
|
+
job.log = (
|
299
|
+
f"TASK ERROR: "
|
300
|
+
f"Task name: {e.task_name}, "
|
301
|
+
f"position in Workflow: {e.workflow_task_order}\n"
|
302
|
+
f"TRACEBACK:\n{exception_args_string}"
|
303
|
+
)
|
304
|
+
db_sync.merge(job)
|
305
|
+
close_job_logger(logger)
|
306
|
+
db_sync.commit()
|
307
|
+
|
308
|
+
except JobExecutionError as e:
|
309
|
+
|
310
|
+
logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
|
311
|
+
logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
|
312
|
+
|
313
|
+
# Assemble output_dataset.meta based on the last successful task, i.e.
|
314
|
+
# based on METADATA_FILENAME
|
315
|
+
output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
|
316
|
+
|
317
|
+
# Assemble new history and assign it to output_dataset.meta
|
318
|
+
output_dataset.history = assemble_history_failed_job(
|
319
|
+
job,
|
320
|
+
output_dataset,
|
321
|
+
workflow,
|
322
|
+
logger,
|
323
|
+
)
|
324
|
+
|
325
|
+
db_sync.merge(output_dataset)
|
326
|
+
|
327
|
+
job.status = JobStatusTypeV1.FAILED
|
328
|
+
job.end_timestamp = get_timestamp()
|
329
|
+
error = e.assemble_error()
|
330
|
+
job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
|
331
|
+
db_sync.merge(job)
|
332
|
+
close_job_logger(logger)
|
333
|
+
db_sync.commit()
|
334
|
+
|
335
|
+
except Exception:
|
336
|
+
|
337
|
+
logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
|
338
|
+
logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
|
339
|
+
|
340
|
+
current_traceback = traceback.format_exc()
|
341
|
+
|
342
|
+
# Assemble output_dataset.meta based on the last successful task, i.e.
|
343
|
+
# based on METADATA_FILENAME
|
344
|
+
output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
|
345
|
+
|
346
|
+
# Assemble new history and assign it to output_dataset.meta
|
347
|
+
output_dataset.history = assemble_history_failed_job(
|
348
|
+
job,
|
349
|
+
output_dataset,
|
350
|
+
workflow,
|
351
|
+
logger,
|
352
|
+
)
|
353
|
+
|
354
|
+
db_sync.merge(output_dataset)
|
355
|
+
|
356
|
+
job.status = JobStatusTypeV1.FAILED
|
357
|
+
job.end_timestamp = get_timestamp()
|
358
|
+
job.log = (
|
359
|
+
f"UNKNOWN ERROR in Fractal job {job.id}\n"
|
360
|
+
f"TRACEBACK:\n{current_traceback}"
|
361
|
+
)
|
362
|
+
db_sync.merge(job)
|
363
|
+
close_job_logger(logger)
|
364
|
+
db_sync.commit()
|
365
|
+
finally:
|
366
|
+
db_sync.close()
|
@@ -10,7 +10,7 @@ import shutil
|
|
10
10
|
import subprocess # nosec
|
11
11
|
import traceback
|
12
12
|
from concurrent.futures import Executor
|
13
|
-
from
|
13
|
+
from copy import deepcopy
|
14
14
|
from functools import partial
|
15
15
|
from pathlib import Path
|
16
16
|
from shlex import split as shlex_split
|
@@ -18,18 +18,19 @@ from typing import Any
|
|
18
18
|
from typing import Callable
|
19
19
|
from typing import Optional
|
20
20
|
|
21
|
-
from
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from .
|
25
|
-
from .
|
21
|
+
from ....config import get_settings
|
22
|
+
from ....logger import get_logger
|
23
|
+
from ....syringe import Inject
|
24
|
+
from ...models.v1 import Task
|
25
|
+
from ...models.v1 import WorkflowTask
|
26
|
+
from ...schemas.v1 import WorkflowTaskStatusTypeV1
|
27
|
+
from ..exceptions import JobExecutionError
|
28
|
+
from ..exceptions import TaskExecutionError
|
26
29
|
from .common import TaskParameters
|
27
30
|
from .common import write_args_file
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
SHUTDOWN_FILENAME = "shutdown"
|
32
|
-
WORKFLOW_LOG_FILENAME = "workflow.log"
|
31
|
+
from fractal_server.app.runner.filenames import HISTORY_FILENAME
|
32
|
+
from fractal_server.app.runner.filenames import METADATA_FILENAME
|
33
|
+
from fractal_server.app.runner.task_files import get_task_file_paths
|
33
34
|
|
34
35
|
|
35
36
|
def no_op_submit_setup_call(
|
@@ -37,7 +38,6 @@ def no_op_submit_setup_call(
|
|
37
38
|
wftask: WorkflowTask,
|
38
39
|
workflow_dir: Path,
|
39
40
|
workflow_dir_user: Path,
|
40
|
-
task_pars: TaskParameters,
|
41
41
|
) -> dict:
|
42
42
|
"""
|
43
43
|
Default (no-operation) interface of submit_setup_call.
|
@@ -45,104 +45,24 @@ def no_op_submit_setup_call(
|
|
45
45
|
return {}
|
46
46
|
|
47
47
|
|
48
|
-
def
|
49
|
-
"""
|
50
|
-
Remove {" ", "/", "."} form a string, e.g. going from
|
51
|
-
'plate.zarr/B/03/0' to 'plate_zarr_B_03_0'.
|
48
|
+
def _task_needs_image_list(_task: Task) -> bool:
|
52
49
|
"""
|
53
|
-
|
50
|
+
Whether a task requires `metadata["image"]` in its `args.json` file.
|
54
51
|
|
52
|
+
For details see
|
53
|
+
https://github.com/fractal-analytics-platform/fractal-server/issues/1237
|
55
54
|
|
56
|
-
|
57
|
-
|
58
|
-
Group all file paths pertaining to a task
|
59
|
-
|
60
|
-
Attributes:
|
61
|
-
workflow_dir:
|
62
|
-
Server-owned directory to store all task-execution-related relevant
|
63
|
-
files (inputs, outputs, errors, and all meta files related to the
|
64
|
-
job execution). Note: users cannot write directly to this folder.
|
65
|
-
workflow_dir_user:
|
66
|
-
User-side directory with the same scope as `workflow_dir`, and
|
67
|
-
where a user can write.
|
68
|
-
task_order:
|
69
|
-
Positional order of the task within a workflow.
|
70
|
-
component:
|
71
|
-
Specific component to run the task for (relevant for tasks that
|
72
|
-
will be executed in parallel over many components).
|
73
|
-
file_prefix:
|
74
|
-
Prefix for all task-related files.
|
75
|
-
args:
|
76
|
-
Path for input json file.
|
77
|
-
metadiff:
|
78
|
-
Path for output json file with metadata update.
|
79
|
-
out:
|
80
|
-
Path for task-execution stdout.
|
81
|
-
err:
|
82
|
-
Path for task-execution stderr.
|
83
|
-
"""
|
84
|
-
|
85
|
-
workflow_dir: Path
|
86
|
-
workflow_dir_user: Path
|
87
|
-
task_order: Optional[int] = None
|
88
|
-
component: Optional[str] = None
|
89
|
-
|
90
|
-
file_prefix: str
|
91
|
-
args: Path
|
92
|
-
out: Path
|
93
|
-
err: Path
|
94
|
-
metadiff: Path
|
95
|
-
|
96
|
-
def __init__(
|
97
|
-
self,
|
98
|
-
workflow_dir: Path,
|
99
|
-
workflow_dir_user: Path,
|
100
|
-
task_order: Optional[int] = None,
|
101
|
-
component: Optional[str] = None,
|
102
|
-
):
|
103
|
-
self.workflow_dir = workflow_dir
|
104
|
-
self.workflow_dir_user = workflow_dir_user
|
105
|
-
self.task_order = task_order
|
106
|
-
self.component = component
|
107
|
-
|
108
|
-
if self.component is not None:
|
109
|
-
component_safe = sanitize_component(str(self.component))
|
110
|
-
component_safe = f"_par_{component_safe}"
|
111
|
-
else:
|
112
|
-
component_safe = ""
|
113
|
-
|
114
|
-
if self.task_order is not None:
|
115
|
-
order = str(self.task_order)
|
116
|
-
else:
|
117
|
-
order = "task"
|
118
|
-
self.file_prefix = f"{order}{component_safe}"
|
119
|
-
self.args = self.workflow_dir_user / f"{self.file_prefix}.args.json"
|
120
|
-
self.out = self.workflow_dir_user / f"{self.file_prefix}.out"
|
121
|
-
self.err = self.workflow_dir_user / f"{self.file_prefix}.err"
|
122
|
-
self.metadiff = (
|
123
|
-
self.workflow_dir_user / f"{self.file_prefix}.metadiff.json"
|
124
|
-
)
|
125
|
-
|
126
|
-
|
127
|
-
@lru_cache()
|
128
|
-
def get_task_file_paths(
|
129
|
-
workflow_dir: Path,
|
130
|
-
workflow_dir_user: Path,
|
131
|
-
task_order: Optional[int] = None,
|
132
|
-
component: Optional[str] = None,
|
133
|
-
) -> TaskFiles:
|
134
|
-
"""
|
135
|
-
Return the corrisponding TaskFiles object
|
136
|
-
|
137
|
-
This function is mainly used as a cache to avoid instantiating needless
|
138
|
-
objects.
|
55
|
+
Args:
|
56
|
+
_task: The task to be checked.
|
139
57
|
"""
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
task_order=task_order,
|
144
|
-
component=component,
|
58
|
+
settings = Inject(get_settings)
|
59
|
+
exception_task_names = settings.FRACTAL_RUNNER_TASKS_INCLUDE_IMAGE.split(
|
60
|
+
";"
|
145
61
|
)
|
62
|
+
if _task.name in exception_task_names:
|
63
|
+
return True
|
64
|
+
else:
|
65
|
+
return False
|
146
66
|
|
147
67
|
|
148
68
|
def _call_command_wrapper(cmd: str, stdout: Path, stderr: Path) -> None:
|
@@ -287,7 +207,7 @@ def call_single_task(
|
|
287
207
|
with task_files.metadiff.open("r") as f_metadiff:
|
288
208
|
diff_metadata = json.load(f_metadiff)
|
289
209
|
except FileNotFoundError as e:
|
290
|
-
logger.
|
210
|
+
logger.warning(
|
291
211
|
f"Skip collection of updated metadata. Original error: {str(e)}"
|
292
212
|
)
|
293
213
|
diff_metadata = {}
|
@@ -306,7 +226,7 @@ def call_single_task(
|
|
306
226
|
wftask_dump["task"] = wftask.task.model_dump()
|
307
227
|
new_history_item = dict(
|
308
228
|
workflowtask=wftask_dump,
|
309
|
-
status=
|
229
|
+
status=WorkflowTaskStatusTypeV1.DONE,
|
310
230
|
parallelization=None,
|
311
231
|
)
|
312
232
|
updated_history = task_pars.history.copy()
|
@@ -418,6 +338,26 @@ def call_single_parallel_task(
|
|
418
338
|
return this_meta_update
|
419
339
|
|
420
340
|
|
341
|
+
def trim_TaskParameters(
|
342
|
+
task_params: TaskParameters,
|
343
|
+
_task: Task,
|
344
|
+
) -> TaskParameters:
|
345
|
+
"""
|
346
|
+
Return a smaller copy of a TaskParameter object.
|
347
|
+
|
348
|
+
Remove metadata["image"] key/value pair - see issues 1237 and 1242.
|
349
|
+
(https://github.com/fractal-analytics-platform/fractal-server/issues/1237)
|
350
|
+
This applies only to parallel tasks with names different from the ones
|
351
|
+
defined in `_task_needs_image_list`.
|
352
|
+
"""
|
353
|
+
task_params_slim = deepcopy(task_params)
|
354
|
+
if not _task_needs_image_list(_task) and _task.is_parallel:
|
355
|
+
if "image" in task_params_slim.metadata.keys():
|
356
|
+
task_params_slim.metadata.pop("image")
|
357
|
+
task_params_slim.history = []
|
358
|
+
return task_params_slim
|
359
|
+
|
360
|
+
|
421
361
|
def call_parallel_task(
|
422
362
|
*,
|
423
363
|
executor: Executor,
|
@@ -484,7 +424,6 @@ def call_parallel_task(
|
|
484
424
|
try:
|
485
425
|
extra_setup = submit_setup_call(
|
486
426
|
wftask=wftask,
|
487
|
-
task_pars=task_pars_depend,
|
488
427
|
workflow_dir=workflow_dir,
|
489
428
|
workflow_dir_user=workflow_dir_user,
|
490
429
|
)
|
@@ -496,10 +435,14 @@ def call_parallel_task(
|
|
496
435
|
)
|
497
436
|
|
498
437
|
# Preliminary steps
|
438
|
+
actual_task_pars_depend = trim_TaskParameters(
|
439
|
+
task_pars_depend, wftask.task
|
440
|
+
)
|
441
|
+
|
499
442
|
partial_call_task = partial(
|
500
443
|
call_single_parallel_task,
|
501
444
|
wftask=wftask,
|
502
|
-
task_pars=
|
445
|
+
task_pars=actual_task_pars_depend,
|
503
446
|
workflow_dir=workflow_dir,
|
504
447
|
workflow_dir_user=workflow_dir_user,
|
505
448
|
)
|
@@ -543,7 +486,7 @@ def call_parallel_task(
|
|
543
486
|
wftask_dump["task"] = wftask.task.model_dump()
|
544
487
|
new_history_item = dict(
|
545
488
|
workflowtask=wftask_dump,
|
546
|
-
status=
|
489
|
+
status=WorkflowTaskStatusTypeV1.DONE,
|
547
490
|
parallelization=dict(
|
548
491
|
parallelization_level=wftask.parallelization_level,
|
549
492
|
component_list=component_list,
|
@@ -632,7 +575,6 @@ def execute_tasks(
|
|
632
575
|
try:
|
633
576
|
extra_setup = submit_setup_call(
|
634
577
|
wftask=this_wftask,
|
635
|
-
task_pars=current_task_pars,
|
636
578
|
workflow_dir=workflow_dir,
|
637
579
|
workflow_dir_user=workflow_dir_user,
|
638
580
|
)
|
@@ -645,6 +587,9 @@ def execute_tasks(
|
|
645
587
|
# NOTE: executor.submit(call_single_task, ...) is non-blocking,
|
646
588
|
# i.e. the returned future may have `this_wftask_future.done() =
|
647
589
|
# False`. We make it blocking right away, by calling `.result()`
|
590
|
+
# NOTE: do not use trim_TaskParameters for non-parallel tasks,
|
591
|
+
# since the `task_pars` argument in `call_single_task` is also used
|
592
|
+
# as a basis for new `metadata`.
|
648
593
|
this_wftask_future = executor.submit(
|
649
594
|
call_single_task,
|
650
595
|
wftask=this_wftask,
|
@@ -23,10 +23,10 @@ from pathlib import Path
|
|
23
23
|
from typing import Any
|
24
24
|
from typing import Optional
|
25
25
|
|
26
|
-
from
|
26
|
+
from ....models.v1 import Workflow
|
27
|
+
from ...async_wrap import async_wrap
|
28
|
+
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
27
29
|
from .._common import execute_tasks
|
28
|
-
from ..common import async_wrap
|
29
|
-
from ..common import set_start_and_last_task_index
|
30
30
|
from ..common import TaskParameters
|
31
31
|
from ._submit_setup import _local_submit_setup
|
32
32
|
from .executor import FractalThreadPoolExecutor
|
@@ -49,7 +49,8 @@ def _process_workflow(
|
|
49
49
|
|
50
50
|
Schedules the workflow using a `FractalThreadPoolExecutor`.
|
51
51
|
|
52
|
-
Cf.
|
52
|
+
Cf.
|
53
|
+
[process_workflow][fractal_server.app.runner.v1._local.process_workflow]
|
53
54
|
for the call signature.
|
54
55
|
"""
|
55
56
|
|
@@ -19,9 +19,9 @@ from pydantic import BaseModel
|
|
19
19
|
from pydantic import Extra
|
20
20
|
from pydantic.error_wrappers import ValidationError
|
21
21
|
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
22
|
+
from .....config import get_settings
|
23
|
+
from .....syringe import Inject
|
24
|
+
from ....models.v1 import WorkflowTask
|
25
25
|
|
26
26
|
|
27
27
|
class LocalBackendConfigError(ValueError):
|
@@ -63,15 +63,14 @@ def get_local_backend_config(
|
|
63
63
|
The sources for `parallel_tasks_per_job` attributes, starting from the
|
64
64
|
highest-priority one, are
|
65
65
|
|
66
|
-
1. Properties in `wftask.meta
|
67
|
-
`Workflow.insert_task`, also includes `wftask.task.meta`);
|
66
|
+
1. Properties in `wftask.meta`;
|
68
67
|
2. The general content of the local-backend configuration file;
|
69
68
|
3. The default value (`None`).
|
70
69
|
|
71
70
|
Arguments:
|
72
71
|
wftask:
|
73
|
-
WorkflowTask for which the backend configuration
|
74
|
-
prepared.
|
72
|
+
WorkflowTask (V1) for which the backend configuration should
|
73
|
+
be prepared.
|
75
74
|
config_path:
|
76
75
|
Path of local-backend configuration file; if `None`, use
|
77
76
|
`FRACTAL_LOCAL_CONFIG_FILE` variable from settings.
|