fractal-server 1.4.10__py3-none-any.whl → 2.0.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/models/__init__.py +4 -7
- fractal_server/app/models/linkuserproject.py +9 -0
- fractal_server/app/models/security.py +6 -0
- fractal_server/app/models/state.py +1 -1
- fractal_server/app/models/v1/__init__.py +10 -0
- fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
- fractal_server/app/models/{job.py → v1/job.py} +5 -5
- fractal_server/app/models/{project.py → v1/project.py} +5 -5
- fractal_server/app/models/{task.py → v1/task.py} +7 -2
- fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
- fractal_server/app/models/v2/__init__.py +20 -0
- fractal_server/app/models/v2/dataset.py +55 -0
- fractal_server/app/models/v2/job.py +51 -0
- fractal_server/app/models/v2/project.py +31 -0
- fractal_server/app/models/v2/task.py +93 -0
- fractal_server/app/models/v2/workflow.py +43 -0
- fractal_server/app/models/v2/workflowtask.py +90 -0
- fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
- fractal_server/app/routes/admin/v2.py +275 -0
- fractal_server/app/routes/api/v1/__init__.py +7 -7
- fractal_server/app/routes/api/v1/_aux_functions.py +2 -2
- fractal_server/app/routes/api/v1/dataset.py +37 -37
- fractal_server/app/routes/api/v1/job.py +12 -12
- fractal_server/app/routes/api/v1/project.py +23 -21
- fractal_server/app/routes/api/v1/task.py +24 -14
- fractal_server/app/routes/api/v1/task_collection.py +16 -14
- fractal_server/app/routes/api/v1/workflow.py +24 -24
- fractal_server/app/routes/api/v1/workflowtask.py +10 -10
- fractal_server/app/routes/api/v2/__init__.py +28 -0
- fractal_server/app/routes/api/v2/_aux_functions.py +497 -0
- fractal_server/app/routes/api/v2/apply.py +220 -0
- fractal_server/app/routes/api/v2/dataset.py +310 -0
- fractal_server/app/routes/api/v2/images.py +212 -0
- fractal_server/app/routes/api/v2/job.py +200 -0
- fractal_server/app/routes/api/v2/project.py +205 -0
- fractal_server/app/routes/api/v2/task.py +222 -0
- fractal_server/app/routes/api/v2/task_collection.py +229 -0
- fractal_server/app/routes/api/v2/workflow.py +398 -0
- fractal_server/app/routes/api/v2/workflowtask.py +269 -0
- fractal_server/app/routes/aux/_job.py +1 -1
- fractal_server/app/runner/async_wrap.py +27 -0
- fractal_server/app/runner/exceptions.py +129 -0
- fractal_server/app/runner/executors/local/__init__.py +3 -0
- fractal_server/app/runner/{_local → executors/local}/executor.py +2 -2
- fractal_server/app/runner/executors/slurm/__init__.py +3 -0
- fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_check_jobs_status.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
- fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +1 -1
- fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +9 -9
- fractal_server/app/runner/filenames.py +6 -0
- fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
- fractal_server/app/runner/task_files.py +105 -0
- fractal_server/app/runner/{__init__.py → v1/__init__.py} +24 -22
- fractal_server/app/runner/{_common.py → v1/_common.py} +13 -120
- fractal_server/app/runner/{_local → v1/_local}/__init__.py +6 -6
- fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
- fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
- fractal_server/app/runner/v1/_slurm/__init__.py +310 -0
- fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +3 -9
- fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
- fractal_server/app/runner/v1/common.py +117 -0
- fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
- fractal_server/app/runner/v2/__init__.py +337 -0
- fractal_server/app/runner/v2/_local/__init__.py +169 -0
- fractal_server/app/runner/v2/_local/_local_config.py +118 -0
- fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
- fractal_server/app/runner/v2/_slurm/__init__.py +157 -0
- fractal_server/app/runner/v2/_slurm/_submit_setup.py +83 -0
- fractal_server/app/runner/v2/_slurm/get_slurm_config.py +179 -0
- fractal_server/app/runner/v2/components.py +5 -0
- fractal_server/app/runner/v2/deduplicate_list.py +24 -0
- fractal_server/app/runner/v2/handle_failed_job.py +156 -0
- fractal_server/app/runner/v2/merge_outputs.py +41 -0
- fractal_server/app/runner/v2/runner.py +264 -0
- fractal_server/app/runner/v2/runner_functions.py +339 -0
- fractal_server/app/runner/v2/runner_functions_low_level.py +134 -0
- fractal_server/app/runner/v2/task_interface.py +43 -0
- fractal_server/app/runner/v2/v1_compat.py +21 -0
- fractal_server/app/schemas/__init__.py +4 -42
- fractal_server/app/schemas/v1/__init__.py +42 -0
- fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
- fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
- fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
- fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
- fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
- fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
- fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
- fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
- fractal_server/app/schemas/v2/__init__.py +34 -0
- fractal_server/app/schemas/v2/dataset.py +88 -0
- fractal_server/app/schemas/v2/dumps.py +87 -0
- fractal_server/app/schemas/v2/job.py +113 -0
- fractal_server/app/schemas/v2/manifest.py +109 -0
- fractal_server/app/schemas/v2/project.py +36 -0
- fractal_server/app/schemas/v2/task.py +121 -0
- fractal_server/app/schemas/v2/task_collection.py +105 -0
- fractal_server/app/schemas/v2/workflow.py +78 -0
- fractal_server/app/schemas/v2/workflowtask.py +118 -0
- fractal_server/config.py +5 -4
- fractal_server/images/__init__.py +50 -0
- fractal_server/images/tools.py +86 -0
- fractal_server/main.py +11 -3
- fractal_server/migrations/versions/4b35c5cefbe3_tmp_is_v2_compatible.py +39 -0
- fractal_server/migrations/versions/56af171b0159_v2.py +217 -0
- fractal_server/migrations/versions/876f28db9d4e_tmp_split_task_and_wftask_meta.py +68 -0
- fractal_server/migrations/versions/974c802f0dd0_tmp_workflowtaskv2_type_in_db.py +37 -0
- fractal_server/migrations/versions/9cd305cd6023_tmp_workflowtaskv2.py +40 -0
- fractal_server/migrations/versions/a6231ed6273c_tmp_args_schemas_in_taskv2.py +42 -0
- fractal_server/migrations/versions/b9e9eed9d442_tmp_taskv2_type.py +37 -0
- fractal_server/migrations/versions/e3e639454d4b_tmp_make_task_meta_non_optional.py +50 -0
- fractal_server/tasks/__init__.py +0 -5
- fractal_server/tasks/endpoint_operations.py +13 -19
- fractal_server/tasks/utils.py +35 -0
- fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
- fractal_server/tasks/{background_operations.py → v1/background_operations.py} +18 -50
- fractal_server/tasks/v1/get_collection_data.py +14 -0
- fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
- fractal_server/tasks/v2/background_operations.py +382 -0
- fractal_server/tasks/v2/get_collection_data.py +14 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0a0.dist-info}/METADATA +1 -1
- fractal_server-2.0.0a0.dist-info/RECORD +166 -0
- fractal_server/app/runner/_slurm/.gitignore +0 -2
- fractal_server/app/runner/_slurm/__init__.py +0 -150
- fractal_server/app/runner/common.py +0 -311
- fractal_server-1.4.10.dist-info/RECORD +0 -98
- /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0a0.dist-info}/LICENSE +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0a0.dist-info}/WHEEL +0 -0
- {fractal_server-1.4.10.dist-info → fractal_server-2.0.0a0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,337 @@
|
|
1
|
+
"""
|
2
|
+
Runner backend subsystem root V2
|
3
|
+
|
4
|
+
This module is the single entry point to the runner backend subsystem V2.
|
5
|
+
Other subystems should only import this module and not its submodules or
|
6
|
+
the individual backends.
|
7
|
+
"""
|
8
|
+
import os
|
9
|
+
import traceback
|
10
|
+
from pathlib import Path
|
11
|
+
from typing import Optional
|
12
|
+
|
13
|
+
from sqlalchemy.orm.attributes import flag_modified
|
14
|
+
|
15
|
+
from ....config import get_settings
|
16
|
+
from ....logger import close_logger
|
17
|
+
from ....logger import set_logger
|
18
|
+
from ....syringe import Inject
|
19
|
+
from ....utils import get_timestamp
|
20
|
+
from ...db import DB
|
21
|
+
from ...models.v2 import DatasetV2
|
22
|
+
from ...models.v2 import JobV2
|
23
|
+
from ...models.v2 import WorkflowTaskV2
|
24
|
+
from ...models.v2 import WorkflowV2
|
25
|
+
from ...schemas.v2 import JobStatusTypeV2
|
26
|
+
from ..exceptions import JobExecutionError
|
27
|
+
from ..exceptions import TaskExecutionError
|
28
|
+
from ..filenames import WORKFLOW_LOG_FILENAME
|
29
|
+
from ._local import process_workflow as local_process_workflow
|
30
|
+
from ._slurm import process_workflow as slurm_process_workflow
|
31
|
+
from .handle_failed_job import assemble_filters_failed_job
|
32
|
+
from .handle_failed_job import assemble_history_failed_job
|
33
|
+
from .handle_failed_job import assemble_images_failed_job
|
34
|
+
from .runner import execute_tasks_v2 # noqa
|
35
|
+
from fractal_server import __VERSION__
|
36
|
+
|
37
|
+
_backends = {}
|
38
|
+
_backends["local"] = local_process_workflow
|
39
|
+
_backends["slurm"] = slurm_process_workflow
|
40
|
+
|
41
|
+
|
42
|
+
async def submit_workflow(
|
43
|
+
*,
|
44
|
+
workflow_id: int,
|
45
|
+
dataset_id: int,
|
46
|
+
job_id: int,
|
47
|
+
worker_init: Optional[str] = None,
|
48
|
+
slurm_user: Optional[str] = None,
|
49
|
+
user_cache_dir: Optional[str] = None,
|
50
|
+
) -> None:
|
51
|
+
"""
|
52
|
+
Prepares a workflow and applies it to a dataset
|
53
|
+
|
54
|
+
This function wraps the process_workflow one, which is different for each
|
55
|
+
backend (e.g. local or slurm backend).
|
56
|
+
|
57
|
+
Args:
|
58
|
+
workflow_id:
|
59
|
+
ID of the workflow being applied
|
60
|
+
dataset_id:
|
61
|
+
Dataset ID
|
62
|
+
job_id:
|
63
|
+
Id of the job record which stores the state for the current
|
64
|
+
workflow application.
|
65
|
+
worker_init:
|
66
|
+
Custom executor parameters that get parsed before the execution of
|
67
|
+
each task.
|
68
|
+
user_cache_dir:
|
69
|
+
Cache directory (namely a path where the user can write); for the
|
70
|
+
slurm backend, this is used as a base directory for
|
71
|
+
`job.working_dir_user`.
|
72
|
+
slurm_user:
|
73
|
+
The username to impersonate for the workflow execution, for the
|
74
|
+
slurm backend.
|
75
|
+
"""
|
76
|
+
|
77
|
+
# Declare runner backend and set `process_workflow` function
|
78
|
+
settings = Inject(get_settings)
|
79
|
+
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
80
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
81
|
+
process_workflow = local_process_workflow
|
82
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
83
|
+
process_workflow = slurm_process_workflow
|
84
|
+
else:
|
85
|
+
raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
|
86
|
+
|
87
|
+
with next(DB.get_sync_db()) as db_sync:
|
88
|
+
|
89
|
+
job: JobV2 = db_sync.get(JobV2, job_id)
|
90
|
+
if not job:
|
91
|
+
raise ValueError(f"Cannot fetch job {job_id} from database")
|
92
|
+
|
93
|
+
dataset: DatasetV2 = db_sync.get(DatasetV2, dataset_id)
|
94
|
+
workflow: WorkflowV2 = db_sync.get(WorkflowV2, workflow_id)
|
95
|
+
if not (dataset and workflow):
|
96
|
+
log_msg = ""
|
97
|
+
if not dataset:
|
98
|
+
log_msg += f"Cannot fetch dataset {dataset_id} from database\n"
|
99
|
+
if not workflow:
|
100
|
+
log_msg += (
|
101
|
+
f"Cannot fetch workflow {workflow_id} from database\n"
|
102
|
+
)
|
103
|
+
job.status = JobStatusTypeV2.FAILED
|
104
|
+
job.end_timestamp = get_timestamp()
|
105
|
+
job.log = log_msg
|
106
|
+
db_sync.merge(job)
|
107
|
+
db_sync.commit()
|
108
|
+
db_sync.close()
|
109
|
+
return
|
110
|
+
|
111
|
+
# Define and create server-side working folder
|
112
|
+
project_id = workflow.project_id
|
113
|
+
timestamp_string = get_timestamp().strftime("%Y%m%d_%H%M%S")
|
114
|
+
WORKFLOW_DIR = (
|
115
|
+
settings.FRACTAL_RUNNER_WORKING_BASE_DIR
|
116
|
+
/ (
|
117
|
+
f"proj_{project_id:07d}_wf_{workflow_id:07d}_job_{job_id:07d}"
|
118
|
+
f"_{timestamp_string}"
|
119
|
+
)
|
120
|
+
).resolve()
|
121
|
+
|
122
|
+
if WORKFLOW_DIR.exists():
|
123
|
+
raise RuntimeError(f"Workflow dir {WORKFLOW_DIR} already exists.")
|
124
|
+
|
125
|
+
# Create WORKFLOW_DIR with 755 permissions
|
126
|
+
original_umask = os.umask(0)
|
127
|
+
WORKFLOW_DIR.mkdir(parents=True, mode=0o755)
|
128
|
+
os.umask(original_umask)
|
129
|
+
|
130
|
+
# Define and create user-side working folder, if needed
|
131
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
132
|
+
WORKFLOW_DIR_USER = WORKFLOW_DIR
|
133
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
134
|
+
|
135
|
+
from ..executors.slurm._subprocess_run_as_user import (
|
136
|
+
_mkdir_as_user,
|
137
|
+
)
|
138
|
+
|
139
|
+
WORKFLOW_DIR_USER = (
|
140
|
+
Path(user_cache_dir) / f"{WORKFLOW_DIR.name}"
|
141
|
+
).resolve()
|
142
|
+
_mkdir_as_user(folder=str(WORKFLOW_DIR_USER), user=slurm_user)
|
143
|
+
else:
|
144
|
+
raise ValueError(f"{FRACTAL_RUNNER_BACKEND=} not supported")
|
145
|
+
|
146
|
+
# Update db
|
147
|
+
job.working_dir = WORKFLOW_DIR.as_posix()
|
148
|
+
job.working_dir_user = WORKFLOW_DIR_USER.as_posix()
|
149
|
+
db_sync.merge(job)
|
150
|
+
db_sync.commit()
|
151
|
+
|
152
|
+
# After Session.commit() is called, either explicitly or when using a
|
153
|
+
# context manager, all objects associated with the Session are expired.
|
154
|
+
# https://docs.sqlalchemy.org/en/14/orm/
|
155
|
+
# session_basics.html#opening-and-closing-a-session
|
156
|
+
# https://docs.sqlalchemy.org/en/14/orm/
|
157
|
+
# session_state_management.html#refreshing-expiring
|
158
|
+
|
159
|
+
# See issue #928:
|
160
|
+
# https://github.com/fractal-analytics-platform/
|
161
|
+
# fractal-server/issues/928
|
162
|
+
|
163
|
+
db_sync.refresh(dataset)
|
164
|
+
db_sync.refresh(workflow)
|
165
|
+
|
166
|
+
# Write logs
|
167
|
+
logger_name = f"WF{workflow_id}_job{job_id}"
|
168
|
+
log_file_path = WORKFLOW_DIR / WORKFLOW_LOG_FILENAME
|
169
|
+
logger = set_logger(
|
170
|
+
logger_name=logger_name,
|
171
|
+
log_file_path=log_file_path,
|
172
|
+
)
|
173
|
+
logger.info(
|
174
|
+
f'Start execution of workflow "{workflow.name}"; '
|
175
|
+
f"more logs at {str(log_file_path)}"
|
176
|
+
)
|
177
|
+
logger.debug(f"fractal_server.__VERSION__: {__VERSION__}")
|
178
|
+
logger.debug(f"FRACTAL_RUNNER_BACKEND: {FRACTAL_RUNNER_BACKEND}")
|
179
|
+
logger.debug(f"slurm_user: {slurm_user}")
|
180
|
+
logger.debug(f"slurm_account: {job.slurm_account}")
|
181
|
+
logger.debug(f"worker_init: {worker_init}")
|
182
|
+
logger.debug(f"job.id: {job.id}")
|
183
|
+
logger.debug(f"job.working_dir: {job.working_dir}")
|
184
|
+
logger.debug(f"job.working_dir_user: {job.working_dir_user}")
|
185
|
+
logger.debug(f"job.first_task_index: {job.first_task_index}")
|
186
|
+
logger.debug(f"job.last_task_index: {job.last_task_index}")
|
187
|
+
logger.debug(f'START workflow "{workflow.name}"')
|
188
|
+
|
189
|
+
try:
|
190
|
+
# "The Session.close() method does not prevent the Session from being
|
191
|
+
# used again. The Session itself does not actually have a distinct
|
192
|
+
# “closed” state; it merely means the Session will release all database
|
193
|
+
# connections and ORM objects."
|
194
|
+
# (https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.Session.close).
|
195
|
+
#
|
196
|
+
# We close the session before the (possibly long) process_workflow
|
197
|
+
# call, to make sure all DB connections are released. The reason why we
|
198
|
+
# are not using a context manager within the try block is that we also
|
199
|
+
# need access to db_sync in the except branches.
|
200
|
+
db_sync = next(DB.get_sync_db())
|
201
|
+
db_sync.close()
|
202
|
+
|
203
|
+
new_dataset_attributes = await process_workflow(
|
204
|
+
workflow=workflow,
|
205
|
+
dataset=dataset,
|
206
|
+
slurm_user=slurm_user,
|
207
|
+
slurm_account=job.slurm_account,
|
208
|
+
user_cache_dir=user_cache_dir,
|
209
|
+
workflow_dir=WORKFLOW_DIR,
|
210
|
+
workflow_dir_user=WORKFLOW_DIR_USER,
|
211
|
+
logger_name=logger_name,
|
212
|
+
worker_init=worker_init,
|
213
|
+
first_task_index=job.first_task_index,
|
214
|
+
last_task_index=job.last_task_index,
|
215
|
+
)
|
216
|
+
|
217
|
+
logger.info(
|
218
|
+
f'End execution of workflow "{workflow.name}"; '
|
219
|
+
f"more logs at {str(log_file_path)}"
|
220
|
+
)
|
221
|
+
logger.debug(f'END workflow "{workflow.name}"')
|
222
|
+
|
223
|
+
# Update dataset attributes, in case of successful execution
|
224
|
+
dataset.history.extend(new_dataset_attributes["history"])
|
225
|
+
dataset.filters = new_dataset_attributes["filters"]
|
226
|
+
dataset.images = new_dataset_attributes["images"]
|
227
|
+
for attribute_name in ["filters", "history", "images"]:
|
228
|
+
flag_modified(dataset, attribute_name)
|
229
|
+
db_sync.merge(dataset)
|
230
|
+
|
231
|
+
# Update job DB entry
|
232
|
+
job.status = JobStatusTypeV2.DONE
|
233
|
+
job.end_timestamp = get_timestamp()
|
234
|
+
with log_file_path.open("r") as f:
|
235
|
+
logs = f.read()
|
236
|
+
job.log = logs
|
237
|
+
db_sync.merge(job)
|
238
|
+
db_sync.commit()
|
239
|
+
|
240
|
+
except TaskExecutionError as e:
|
241
|
+
|
242
|
+
logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
|
243
|
+
logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
|
244
|
+
|
245
|
+
# Read dataset attributes produced by the last successful task, and
|
246
|
+
# update the DB dataset accordingly
|
247
|
+
failed_wftask = db_sync.get(WorkflowTaskV2, e.workflow_task_id)
|
248
|
+
dataset.history = assemble_history_failed_job(
|
249
|
+
job,
|
250
|
+
dataset,
|
251
|
+
workflow,
|
252
|
+
logger,
|
253
|
+
failed_wftask=failed_wftask,
|
254
|
+
)
|
255
|
+
latest_filters = assemble_filters_failed_job(job)
|
256
|
+
if latest_filters is not None:
|
257
|
+
dataset.filters = latest_filters
|
258
|
+
latest_images = assemble_images_failed_job(job)
|
259
|
+
if latest_images is not None:
|
260
|
+
dataset.images = latest_images
|
261
|
+
db_sync.merge(dataset)
|
262
|
+
|
263
|
+
job.status = JobStatusTypeV2.FAILED
|
264
|
+
job.end_timestamp = get_timestamp()
|
265
|
+
|
266
|
+
exception_args_string = "\n".join(e.args)
|
267
|
+
job.log = (
|
268
|
+
f"TASK ERROR: "
|
269
|
+
f"Task name: {e.task_name}, "
|
270
|
+
f"position in Workflow: {e.workflow_task_order}\n"
|
271
|
+
f"TRACEBACK:\n{exception_args_string}"
|
272
|
+
)
|
273
|
+
db_sync.merge(job)
|
274
|
+
db_sync.commit()
|
275
|
+
|
276
|
+
except JobExecutionError as e:
|
277
|
+
|
278
|
+
logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
|
279
|
+
logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
|
280
|
+
|
281
|
+
# Read dataset attributes produced by the last successful task, and
|
282
|
+
# update the DB dataset accordingly
|
283
|
+
dataset.history = assemble_history_failed_job(
|
284
|
+
job,
|
285
|
+
dataset,
|
286
|
+
workflow,
|
287
|
+
logger,
|
288
|
+
)
|
289
|
+
latest_filters = assemble_filters_failed_job(job)
|
290
|
+
if latest_filters is not None:
|
291
|
+
dataset.filters = latest_filters
|
292
|
+
latest_images = assemble_images_failed_job(job)
|
293
|
+
if latest_images is not None:
|
294
|
+
dataset.images = latest_images
|
295
|
+
db_sync.merge(dataset)
|
296
|
+
|
297
|
+
job.status = JobStatusTypeV2.FAILED
|
298
|
+
job.end_timestamp = get_timestamp()
|
299
|
+
error = e.assemble_error()
|
300
|
+
job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
|
301
|
+
db_sync.merge(job)
|
302
|
+
db_sync.commit()
|
303
|
+
|
304
|
+
except Exception:
|
305
|
+
|
306
|
+
logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
|
307
|
+
logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
|
308
|
+
|
309
|
+
current_traceback = traceback.format_exc()
|
310
|
+
|
311
|
+
# Read dataset attributes produced by the last successful task, and
|
312
|
+
# update the DB dataset accordingly
|
313
|
+
dataset.history = assemble_history_failed_job(
|
314
|
+
job,
|
315
|
+
dataset,
|
316
|
+
workflow,
|
317
|
+
logger,
|
318
|
+
)
|
319
|
+
latest_filters = assemble_filters_failed_job(job)
|
320
|
+
if latest_filters is not None:
|
321
|
+
dataset.filters = latest_filters
|
322
|
+
latest_images = assemble_images_failed_job(job)
|
323
|
+
if latest_images is not None:
|
324
|
+
dataset.images = latest_images
|
325
|
+
db_sync.merge(dataset)
|
326
|
+
|
327
|
+
job.status = JobStatusTypeV2.FAILED
|
328
|
+
job.end_timestamp = get_timestamp()
|
329
|
+
job.log = (
|
330
|
+
f"UNKNOWN ERROR in Fractal job {job.id}\n"
|
331
|
+
f"TRACEBACK:\n{current_traceback}"
|
332
|
+
)
|
333
|
+
db_sync.merge(job)
|
334
|
+
db_sync.commit()
|
335
|
+
finally:
|
336
|
+
close_logger(logger)
|
337
|
+
db_sync.close()
|
@@ -0,0 +1,169 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
|
6
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
7
|
+
# Marco Franzon <marco.franzon@exact-lab.it>
|
8
|
+
#
|
9
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
10
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
11
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
12
|
+
# Zurich.
|
13
|
+
"""
|
14
|
+
Local Bakend
|
15
|
+
|
16
|
+
This backend runs Fractal workflows using `FractalThreadPoolExecutor` (a custom
|
17
|
+
version of Python
|
18
|
+
[ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor))
|
19
|
+
to run tasks in several threads.
|
20
|
+
Incidentally, it also represents the reference implementation for a backend.
|
21
|
+
"""
|
22
|
+
from pathlib import Path
|
23
|
+
from typing import Optional
|
24
|
+
|
25
|
+
from ....models.v2 import DatasetV2
|
26
|
+
from ....models.v2 import WorkflowV2
|
27
|
+
from ...async_wrap import async_wrap
|
28
|
+
from ...executors.local.executor import FractalThreadPoolExecutor
|
29
|
+
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
30
|
+
from ..runner import execute_tasks_v2
|
31
|
+
from ._submit_setup import _local_submit_setup
|
32
|
+
|
33
|
+
# from typing import Any
|
34
|
+
|
35
|
+
|
36
|
+
def _process_workflow(
|
37
|
+
*,
|
38
|
+
workflow: WorkflowV2,
|
39
|
+
dataset: DatasetV2,
|
40
|
+
logger_name: str,
|
41
|
+
workflow_dir: Path,
|
42
|
+
first_task_index: int,
|
43
|
+
last_task_index: int,
|
44
|
+
) -> dict:
|
45
|
+
"""
|
46
|
+
Internal processing routine
|
47
|
+
|
48
|
+
Schedules the workflow using a `FractalThreadPoolExecutor`.
|
49
|
+
|
50
|
+
Cf. [process_workflow][fractal_server.app.runner._local.process_workflow]
|
51
|
+
for the call signature.
|
52
|
+
"""
|
53
|
+
|
54
|
+
with FractalThreadPoolExecutor() as executor:
|
55
|
+
new_dataset_attributes = execute_tasks_v2(
|
56
|
+
wf_task_list=workflow.task_list[
|
57
|
+
first_task_index : (last_task_index + 1) # noqa
|
58
|
+
], # noqa
|
59
|
+
dataset=dataset,
|
60
|
+
executor=executor,
|
61
|
+
workflow_dir=workflow_dir,
|
62
|
+
workflow_dir_user=workflow_dir,
|
63
|
+
logger_name=logger_name,
|
64
|
+
submit_setup_call=_local_submit_setup,
|
65
|
+
)
|
66
|
+
return new_dataset_attributes
|
67
|
+
|
68
|
+
|
69
|
+
async def process_workflow(
|
70
|
+
*,
|
71
|
+
workflow: WorkflowV2,
|
72
|
+
dataset: DatasetV2,
|
73
|
+
workflow_dir: Path,
|
74
|
+
workflow_dir_user: Optional[Path] = None,
|
75
|
+
first_task_index: Optional[int] = None,
|
76
|
+
last_task_index: Optional[int] = None,
|
77
|
+
logger_name: str,
|
78
|
+
# Slurm-specific
|
79
|
+
user_cache_dir: Optional[str] = None,
|
80
|
+
slurm_user: Optional[str] = None,
|
81
|
+
slurm_account: Optional[str] = None,
|
82
|
+
worker_init: Optional[str] = None,
|
83
|
+
) -> dict:
|
84
|
+
"""
|
85
|
+
Run a workflow
|
86
|
+
|
87
|
+
This function is responsible for running a workflow on some input data,
|
88
|
+
saving the output and taking care of any exception raised during the run.
|
89
|
+
|
90
|
+
NOTE: This is the `local` backend's public interface, which also works as
|
91
|
+
a reference implementation for other backends.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
workflow:
|
95
|
+
The workflow to be run
|
96
|
+
input_paths:
|
97
|
+
The paths to the input files to pass to the first task of the
|
98
|
+
workflow
|
99
|
+
output_path:
|
100
|
+
The destination path for the last task of the workflow
|
101
|
+
input_metadata:
|
102
|
+
Initial metadata, passed to the first task
|
103
|
+
logger_name:
|
104
|
+
Name of the logger to log information on the run to
|
105
|
+
workflow_dir:
|
106
|
+
Working directory for this run.
|
107
|
+
workflow_dir_user:
|
108
|
+
Working directory for this run, on the user side. This argument is
|
109
|
+
present for compatibility with the standard backend interface, but
|
110
|
+
for the `local` backend it cannot be different from `workflow_dir`.
|
111
|
+
slurm_user:
|
112
|
+
Username to impersonate to run the workflow. This argument is
|
113
|
+
present for compatibility with the standard backend interface, but
|
114
|
+
is ignored in the `local` backend.
|
115
|
+
slurm_account:
|
116
|
+
SLURM account to use when running the workflow. This argument is
|
117
|
+
present for compatibility with the standard backend interface, but
|
118
|
+
is ignored in the `local` backend.
|
119
|
+
user_cache_dir:
|
120
|
+
Cache directory of the user who will run the workflow. This
|
121
|
+
argument is present for compatibility with the standard backend
|
122
|
+
interface, but is ignored in the `local` backend.
|
123
|
+
worker_init:
|
124
|
+
Any additional, usually backend specific, information to be passed
|
125
|
+
to the backend executor. This argument is present for compatibility
|
126
|
+
with the standard backend interface, but is ignored in the `local`
|
127
|
+
backend.
|
128
|
+
first_task_index:
|
129
|
+
Positional index of the first task to execute; if `None`, start
|
130
|
+
from `0`.
|
131
|
+
last_task_index:
|
132
|
+
Positional index of the last task to execute; if `None`, proceed
|
133
|
+
until the last task.
|
134
|
+
|
135
|
+
Raises:
|
136
|
+
TaskExecutionError: wrapper for errors raised during tasks' execution
|
137
|
+
(positive exit codes).
|
138
|
+
JobExecutionError: wrapper for errors raised by the tasks' executors
|
139
|
+
(negative exit codes).
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
output_dataset_metadata:
|
143
|
+
The updated metadata for the dataset, as returned by the last task
|
144
|
+
of the workflow
|
145
|
+
"""
|
146
|
+
|
147
|
+
if workflow_dir_user and (workflow_dir_user != workflow_dir):
|
148
|
+
raise NotImplementedError(
|
149
|
+
"Local backend does not support different directories "
|
150
|
+
f"{workflow_dir=} and {workflow_dir_user=}"
|
151
|
+
)
|
152
|
+
|
153
|
+
# Set values of first_task_index and last_task_index
|
154
|
+
num_tasks = len(workflow.task_list)
|
155
|
+
first_task_index, last_task_index = set_start_and_last_task_index(
|
156
|
+
num_tasks,
|
157
|
+
first_task_index=first_task_index,
|
158
|
+
last_task_index=last_task_index,
|
159
|
+
)
|
160
|
+
|
161
|
+
new_dataset_attributes = await async_wrap(_process_workflow)(
|
162
|
+
workflow=workflow,
|
163
|
+
dataset=dataset,
|
164
|
+
logger_name=logger_name,
|
165
|
+
workflow_dir=workflow_dir,
|
166
|
+
first_task_index=first_task_index,
|
167
|
+
last_task_index=last_task_index,
|
168
|
+
)
|
169
|
+
return new_dataset_attributes
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
6
|
+
#
|
7
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
8
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
9
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
10
|
+
# Zurich.
|
11
|
+
"""
|
12
|
+
Submodule to handle the local-backend configuration for a WorkflowTask
|
13
|
+
"""
|
14
|
+
import json
|
15
|
+
from pathlib import Path
|
16
|
+
from typing import Literal
|
17
|
+
from typing import Optional
|
18
|
+
|
19
|
+
from pydantic import BaseModel
|
20
|
+
from pydantic import Extra
|
21
|
+
from pydantic.error_wrappers import ValidationError
|
22
|
+
|
23
|
+
from .....config import get_settings
|
24
|
+
from .....syringe import Inject
|
25
|
+
from ....models.v2 import WorkflowTaskV2
|
26
|
+
|
27
|
+
|
28
|
+
class LocalBackendConfigError(ValueError):
|
29
|
+
"""
|
30
|
+
Local-backend configuration error
|
31
|
+
"""
|
32
|
+
|
33
|
+
pass
|
34
|
+
|
35
|
+
|
36
|
+
class LocalBackendConfig(BaseModel, extra=Extra.forbid):
|
37
|
+
"""
|
38
|
+
Specifications of the local-backend configuration
|
39
|
+
|
40
|
+
Attributes:
|
41
|
+
parallel_tasks_per_job:
|
42
|
+
Maximum number of tasks to be run in parallel as part of a call to
|
43
|
+
`FractalThreadPoolExecutor.map`; if `None`, then all tasks will
|
44
|
+
start at the same time.
|
45
|
+
"""
|
46
|
+
|
47
|
+
parallel_tasks_per_job: Optional[int]
|
48
|
+
|
49
|
+
|
50
|
+
def get_default_local_backend_config():
|
51
|
+
"""
|
52
|
+
Return a default `LocalBackendConfig` configuration object
|
53
|
+
"""
|
54
|
+
return LocalBackendConfig(parallel_tasks_per_job=None)
|
55
|
+
|
56
|
+
|
57
|
+
def get_local_backend_config(
|
58
|
+
wftask: WorkflowTaskV2,
|
59
|
+
which_type: Literal["non_parallel", "parallel"],
|
60
|
+
config_path: Optional[Path] = None,
|
61
|
+
) -> LocalBackendConfig:
|
62
|
+
"""
|
63
|
+
Prepare a `LocalBackendConfig` configuration object
|
64
|
+
|
65
|
+
The sources for `parallel_tasks_per_job` attributes, starting from the
|
66
|
+
highest-priority one, are
|
67
|
+
|
68
|
+
1. Properties in `wftask.meta_parallel` or `wftask.meta_non_parallel`
|
69
|
+
(depending on `which_type`);
|
70
|
+
2. The general content of the local-backend configuration file;
|
71
|
+
3. The default value (`None`).
|
72
|
+
|
73
|
+
Arguments:
|
74
|
+
wftask:
|
75
|
+
WorkflowTaskV2 for which the backend configuration should
|
76
|
+
be prepared.
|
77
|
+
config_path:
|
78
|
+
Path of local-backend configuration file; if `None`, use
|
79
|
+
`FRACTAL_LOCAL_CONFIG_FILE` variable from settings.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
A local-backend configuration object
|
83
|
+
"""
|
84
|
+
|
85
|
+
key = "parallel_tasks_per_job"
|
86
|
+
default_value = None
|
87
|
+
|
88
|
+
if which_type == "non_parallel":
|
89
|
+
wftask_meta = wftask.meta_non_parallel
|
90
|
+
elif which_type == "parallel":
|
91
|
+
wftask_meta = wftask.meta_parallel
|
92
|
+
else:
|
93
|
+
raise ValueError(
|
94
|
+
"`get_local_backend_config` received an invalid argument"
|
95
|
+
f" {which_type=}."
|
96
|
+
)
|
97
|
+
|
98
|
+
if wftask_meta and key in wftask_meta:
|
99
|
+
parallel_tasks_per_job = wftask.meta[key]
|
100
|
+
else:
|
101
|
+
if not config_path:
|
102
|
+
settings = Inject(get_settings)
|
103
|
+
config_path = settings.FRACTAL_LOCAL_CONFIG_FILE
|
104
|
+
if config_path is None:
|
105
|
+
parallel_tasks_per_job = default_value
|
106
|
+
else:
|
107
|
+
with config_path.open("r") as f:
|
108
|
+
env = json.load(f)
|
109
|
+
try:
|
110
|
+
_ = LocalBackendConfig(**env)
|
111
|
+
except ValidationError as e:
|
112
|
+
raise LocalBackendConfigError(
|
113
|
+
f"Error while loading {config_path=}. "
|
114
|
+
f"Original error:\n{str(e)}"
|
115
|
+
)
|
116
|
+
|
117
|
+
parallel_tasks_per_job = env.get(key, default_value)
|
118
|
+
return LocalBackendConfig(parallel_tasks_per_job=parallel_tasks_per_job)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
|
2
|
+
# University of Zurich
|
3
|
+
#
|
4
|
+
# Original authors:
|
5
|
+
# Tommaso Comparin <tommaso.comparin@exact-lab.it>
|
6
|
+
#
|
7
|
+
# This file is part of Fractal and was originally developed by eXact lab S.r.l.
|
8
|
+
# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
|
9
|
+
# Institute for Biomedical Research and Pelkmans Lab from the University of
|
10
|
+
# Zurich.
|
11
|
+
"""
|
12
|
+
Submodule to define _local_submit_setup
|
13
|
+
"""
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Literal
|
16
|
+
from typing import Optional
|
17
|
+
|
18
|
+
from ....models.v2 import WorkflowTaskV2
|
19
|
+
from ._local_config import get_local_backend_config
|
20
|
+
|
21
|
+
|
22
|
+
def _local_submit_setup(
|
23
|
+
*,
|
24
|
+
wftask: WorkflowTaskV2,
|
25
|
+
workflow_dir: Optional[Path] = None,
|
26
|
+
workflow_dir_user: Optional[Path] = None,
|
27
|
+
which_type: Literal["non_parallel", "parallel"],
|
28
|
+
) -> dict[str, object]:
|
29
|
+
"""
|
30
|
+
Collect WorfklowTask-specific configuration parameters from different
|
31
|
+
sources, and inject them for execution.
|
32
|
+
|
33
|
+
Arguments:
|
34
|
+
wftask:
|
35
|
+
WorkflowTask for which the configuration is to be assembled
|
36
|
+
workflow_dir:
|
37
|
+
Not used in this function.
|
38
|
+
workflow_dir_user:
|
39
|
+
Not used in this function.
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
submit_setup_dict:
|
43
|
+
A dictionary that will be passed on to
|
44
|
+
`FractalThreadPoolExecutor.submit` and
|
45
|
+
`FractalThreadPoolExecutor.map`, so as to set extra options.
|
46
|
+
"""
|
47
|
+
|
48
|
+
local_backend_config = get_local_backend_config(
|
49
|
+
wftask=wftask, which_type=which_type
|
50
|
+
)
|
51
|
+
|
52
|
+
return dict(local_backend_config=local_backend_config)
|