fractal-server 2.0.6__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/app/db/__init__.py +1 -1
- fractal_server/app/routes/admin/v1.py +2 -4
- fractal_server/app/routes/admin/v2.py +2 -4
- fractal_server/app/routes/api/v1/_aux_functions.py +24 -0
- fractal_server/app/routes/api/v1/job.py +3 -4
- fractal_server/app/routes/api/v1/project.py +28 -18
- fractal_server/app/routes/api/v2/_aux_functions.py +35 -12
- fractal_server/app/routes/api/v2/job.py +3 -4
- fractal_server/app/routes/api/v2/project.py +21 -0
- fractal_server/app/routes/api/v2/submit.py +36 -15
- fractal_server/app/routes/aux/_job.py +3 -1
- fractal_server/app/routes/aux/_runner.py +3 -3
- fractal_server/app/runner/executors/slurm/executor.py +169 -68
- fractal_server/app/runner/shutdown.py +88 -0
- fractal_server/app/runner/task_files.py +59 -27
- fractal_server/app/runner/v1/__init__.py +113 -64
- fractal_server/app/runner/v1/_common.py +53 -51
- fractal_server/app/runner/v1/_local/__init__.py +12 -11
- fractal_server/app/runner/v1/_local/_submit_setup.py +4 -4
- fractal_server/app/runner/v1/_slurm/__init__.py +16 -16
- fractal_server/app/runner/v1/_slurm/_submit_setup.py +11 -10
- fractal_server/app/runner/v1/_slurm/get_slurm_config.py +6 -6
- fractal_server/app/runner/v2/__init__.py +139 -60
- fractal_server/app/runner/v2/_local/__init__.py +12 -11
- fractal_server/app/runner/v2/_local/_local_config.py +1 -1
- fractal_server/app/runner/v2/_local/_submit_setup.py +4 -4
- fractal_server/app/runner/v2/_local_experimental/__init__.py +155 -0
- fractal_server/app/runner/v2/_local_experimental/_local_config.py +108 -0
- fractal_server/app/runner/v2/_local_experimental/_submit_setup.py +42 -0
- fractal_server/app/runner/v2/_local_experimental/executor.py +156 -0
- fractal_server/app/runner/v2/_slurm/__init__.py +10 -10
- fractal_server/app/runner/v2/_slurm/_submit_setup.py +11 -10
- fractal_server/app/runner/v2/_slurm/get_slurm_config.py +6 -6
- fractal_server/app/runner/v2/runner.py +17 -15
- fractal_server/app/runner/v2/runner_functions.py +38 -38
- fractal_server/app/runner/v2/runner_functions_low_level.py +12 -6
- fractal_server/app/security/__init__.py +4 -5
- fractal_server/config.py +73 -19
- fractal_server/gunicorn_fractal.py +40 -0
- fractal_server/{logger/__init__.py → logger.py} +2 -2
- fractal_server/main.py +45 -26
- fractal_server/migrations/env.py +1 -1
- {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/METADATA +4 -1
- {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/RECORD +48 -43
- fractal_server/logger/gunicorn_logger.py +0 -19
- {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/LICENSE +0 -0
- {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/WHEEL +0 -0
- {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/entry_points.txt +0 -0
@@ -10,9 +10,11 @@ import traceback
|
|
10
10
|
from pathlib import Path
|
11
11
|
from typing import Optional
|
12
12
|
|
13
|
+
from sqlalchemy.orm import Session as DBSyncSession
|
13
14
|
from sqlalchemy.orm.attributes import flag_modified
|
14
15
|
|
15
16
|
from ....config import get_settings
|
17
|
+
from ....logger import get_logger
|
16
18
|
from ....logger import reset_logger_handlers
|
17
19
|
from ....logger import set_logger
|
18
20
|
from ....syringe import Inject
|
@@ -25,8 +27,13 @@ from ...models.v2 import WorkflowV2
|
|
25
27
|
from ...schemas.v2 import JobStatusTypeV2
|
26
28
|
from ..exceptions import JobExecutionError
|
27
29
|
from ..exceptions import TaskExecutionError
|
30
|
+
from ..executors.slurm._subprocess_run_as_user import _mkdir_as_user
|
28
31
|
from ..filenames import WORKFLOW_LOG_FILENAME
|
32
|
+
from ..task_files import task_subfolder_name
|
29
33
|
from ._local import process_workflow as local_process_workflow
|
34
|
+
from ._local_experimental import (
|
35
|
+
process_workflow as local_experimental_process_workflow,
|
36
|
+
)
|
30
37
|
from ._slurm import process_workflow as slurm_process_workflow
|
31
38
|
from .handle_failed_job import assemble_filters_failed_job
|
32
39
|
from .handle_failed_job import assemble_history_failed_job
|
@@ -35,9 +42,31 @@ from fractal_server import __VERSION__
|
|
35
42
|
|
36
43
|
_backends = {}
|
37
44
|
_backends["local"] = local_process_workflow
|
45
|
+
_backends["local_experimental"] = local_experimental_process_workflow
|
38
46
|
_backends["slurm"] = slurm_process_workflow
|
39
47
|
|
40
48
|
|
49
|
+
def fail_job(
|
50
|
+
*,
|
51
|
+
db: DBSyncSession,
|
52
|
+
job: JobV2,
|
53
|
+
log_msg: str,
|
54
|
+
logger_name: str,
|
55
|
+
emit_log: bool = False,
|
56
|
+
) -> None:
|
57
|
+
logger = get_logger(logger_name=logger_name)
|
58
|
+
if emit_log:
|
59
|
+
logger.error(log_msg)
|
60
|
+
reset_logger_handlers(logger)
|
61
|
+
job.status = JobStatusTypeV2.FAILED
|
62
|
+
job.end_timestamp = get_timestamp()
|
63
|
+
job.log = log_msg
|
64
|
+
db.merge(job)
|
65
|
+
db.commit()
|
66
|
+
db.close()
|
67
|
+
return
|
68
|
+
|
69
|
+
|
41
70
|
async def submit_workflow(
|
42
71
|
*,
|
43
72
|
workflow_id: int,
|
@@ -72,22 +101,34 @@ async def submit_workflow(
|
|
72
101
|
The username to impersonate for the workflow execution, for the
|
73
102
|
slurm backend.
|
74
103
|
"""
|
75
|
-
|
76
|
-
|
77
|
-
settings = Inject(get_settings)
|
78
|
-
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
79
|
-
if FRACTAL_RUNNER_BACKEND == "local":
|
80
|
-
process_workflow = local_process_workflow
|
81
|
-
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
82
|
-
process_workflow = slurm_process_workflow
|
83
|
-
else:
|
84
|
-
raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
|
104
|
+
logger_name = f"WF{workflow_id}_job{job_id}"
|
105
|
+
logger = set_logger(logger_name=logger_name)
|
85
106
|
|
86
107
|
with next(DB.get_sync_db()) as db_sync:
|
87
108
|
|
88
109
|
job: JobV2 = db_sync.get(JobV2, job_id)
|
89
110
|
if not job:
|
90
|
-
|
111
|
+
logger.error(f"JobV2 {job_id} does not exist")
|
112
|
+
return
|
113
|
+
|
114
|
+
# Declare runner backend and set `process_workflow` function
|
115
|
+
settings = Inject(get_settings)
|
116
|
+
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
117
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
118
|
+
process_workflow = local_process_workflow
|
119
|
+
elif FRACTAL_RUNNER_BACKEND == "local_experimental":
|
120
|
+
process_workflow = local_experimental_process_workflow
|
121
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
122
|
+
process_workflow = slurm_process_workflow
|
123
|
+
else:
|
124
|
+
fail_job(
|
125
|
+
db=db_sync,
|
126
|
+
job=job,
|
127
|
+
log_msg=f"Invalid {FRACTAL_RUNNER_BACKEND=}",
|
128
|
+
logger_name=logger_name,
|
129
|
+
emit_log=True,
|
130
|
+
)
|
131
|
+
return
|
91
132
|
|
92
133
|
dataset: DatasetV2 = db_sync.get(DatasetV2, dataset_id)
|
93
134
|
workflow: WorkflowV2 = db_sync.get(WorkflowV2, workflow_id)
|
@@ -99,38 +140,75 @@ async def submit_workflow(
|
|
99
140
|
log_msg += (
|
100
141
|
f"Cannot fetch workflow {workflow_id} from database\n"
|
101
142
|
)
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
db_sync.merge(job)
|
106
|
-
db_sync.commit()
|
107
|
-
db_sync.close()
|
143
|
+
fail_job(
|
144
|
+
db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name
|
145
|
+
)
|
108
146
|
return
|
109
147
|
|
110
148
|
# Define and create server-side working folder
|
111
|
-
|
112
|
-
if
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
149
|
+
WORKFLOW_DIR_LOCAL = Path(job.working_dir)
|
150
|
+
if WORKFLOW_DIR_LOCAL.exists():
|
151
|
+
fail_job(
|
152
|
+
db=db_sync,
|
153
|
+
job=job,
|
154
|
+
log_msg=f"Workflow dir {WORKFLOW_DIR_LOCAL} already exists.",
|
155
|
+
logger_name=logger_name,
|
156
|
+
emit_log=True,
|
157
|
+
)
|
119
158
|
return
|
120
159
|
|
121
|
-
|
122
|
-
original_umask = os.umask(0)
|
123
|
-
WORKFLOW_DIR.mkdir(parents=True, mode=0o755)
|
124
|
-
os.umask(original_umask)
|
160
|
+
try:
|
125
161
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
)
|
162
|
+
# Create WORKFLOW_DIR
|
163
|
+
original_umask = os.umask(0)
|
164
|
+
WORKFLOW_DIR_LOCAL.mkdir(parents=True, mode=0o755)
|
165
|
+
|
166
|
+
os.umask(original_umask)
|
132
167
|
|
133
|
-
|
168
|
+
# Define and create WORKFLOW_DIR_REMOTE
|
169
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
170
|
+
WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
|
171
|
+
elif FRACTAL_RUNNER_BACKEND == "local_experimental":
|
172
|
+
WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
|
173
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
174
|
+
WORKFLOW_DIR_REMOTE = (
|
175
|
+
Path(user_cache_dir) / WORKFLOW_DIR_LOCAL.name
|
176
|
+
)
|
177
|
+
_mkdir_as_user(
|
178
|
+
folder=str(WORKFLOW_DIR_REMOTE), user=slurm_user
|
179
|
+
)
|
180
|
+
|
181
|
+
# Create all tasks subfolders
|
182
|
+
for order in range(job.first_task_index, job.last_task_index + 1):
|
183
|
+
this_wftask = workflow.task_list[order]
|
184
|
+
if this_wftask.is_legacy_task:
|
185
|
+
task_name = this_wftask.task_legacy.name
|
186
|
+
else:
|
187
|
+
task_name = this_wftask.task.name
|
188
|
+
subfolder_name = task_subfolder_name(
|
189
|
+
order=order,
|
190
|
+
task_name=task_name,
|
191
|
+
)
|
192
|
+
original_umask = os.umask(0)
|
193
|
+
(WORKFLOW_DIR_LOCAL / subfolder_name).mkdir(mode=0o755)
|
194
|
+
os.umask(original_umask)
|
195
|
+
if FRACTAL_RUNNER_BACKEND == "slurm":
|
196
|
+
_mkdir_as_user(
|
197
|
+
folder=str(WORKFLOW_DIR_REMOTE / subfolder_name),
|
198
|
+
user=slurm_user,
|
199
|
+
)
|
200
|
+
except Exception as e:
|
201
|
+
fail_job(
|
202
|
+
db=db_sync,
|
203
|
+
job=job,
|
204
|
+
log_msg=(
|
205
|
+
"An error occurred while creating job folder and "
|
206
|
+
f"subfolders.\nOriginal error: {str(e)}"
|
207
|
+
),
|
208
|
+
logger_name=logger_name,
|
209
|
+
emit_log=True,
|
210
|
+
)
|
211
|
+
return
|
134
212
|
|
135
213
|
# After Session.commit() is called, either explicitly or when using a
|
136
214
|
# context manager, all objects associated with the Session are expired.
|
@@ -145,10 +223,11 @@ async def submit_workflow(
|
|
145
223
|
|
146
224
|
db_sync.refresh(dataset)
|
147
225
|
db_sync.refresh(workflow)
|
226
|
+
for wftask in workflow.task_list:
|
227
|
+
db_sync.refresh(wftask)
|
148
228
|
|
149
229
|
# Write logs
|
150
|
-
|
151
|
-
log_file_path = WORKFLOW_DIR / WORKFLOW_LOG_FILENAME
|
230
|
+
log_file_path = WORKFLOW_DIR_LOCAL / WORKFLOW_LOG_FILENAME
|
152
231
|
logger = set_logger(
|
153
232
|
logger_name=logger_name,
|
154
233
|
log_file_path=log_file_path,
|
@@ -189,8 +268,8 @@ async def submit_workflow(
|
|
189
268
|
slurm_user=slurm_user,
|
190
269
|
slurm_account=job.slurm_account,
|
191
270
|
user_cache_dir=user_cache_dir,
|
192
|
-
|
193
|
-
|
271
|
+
workflow_dir_local=WORKFLOW_DIR_LOCAL,
|
272
|
+
workflow_dir_remote=WORKFLOW_DIR_REMOTE,
|
194
273
|
logger_name=logger_name,
|
195
274
|
worker_init=worker_init,
|
196
275
|
first_task_index=job.first_task_index,
|
@@ -243,18 +322,14 @@ async def submit_workflow(
|
|
243
322
|
dataset.images = latest_images
|
244
323
|
db_sync.merge(dataset)
|
245
324
|
|
246
|
-
job.status = JobStatusTypeV2.FAILED
|
247
|
-
job.end_timestamp = get_timestamp()
|
248
|
-
|
249
325
|
exception_args_string = "\n".join(e.args)
|
250
|
-
|
326
|
+
log_msg = (
|
251
327
|
f"TASK ERROR: "
|
252
328
|
f"Task name: {e.task_name}, "
|
253
329
|
f"position in Workflow: {e.workflow_task_order}\n"
|
254
330
|
f"TRACEBACK:\n{exception_args_string}"
|
255
331
|
)
|
256
|
-
db_sync
|
257
|
-
db_sync.commit()
|
332
|
+
fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name)
|
258
333
|
|
259
334
|
except JobExecutionError as e:
|
260
335
|
|
@@ -277,12 +352,15 @@ async def submit_workflow(
|
|
277
352
|
dataset.images = latest_images
|
278
353
|
db_sync.merge(dataset)
|
279
354
|
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
355
|
+
fail_job(
|
356
|
+
db=db_sync,
|
357
|
+
job=job,
|
358
|
+
log_msg=(
|
359
|
+
f"JOB ERROR in Fractal job {job.id}:\n"
|
360
|
+
f"TRACEBACK:\n{e.assemble_error()}"
|
361
|
+
),
|
362
|
+
logger_name=logger_name,
|
363
|
+
)
|
286
364
|
|
287
365
|
except Exception:
|
288
366
|
|
@@ -306,15 +384,16 @@ async def submit_workflow(
|
|
306
384
|
if latest_images is not None:
|
307
385
|
dataset.images = latest_images
|
308
386
|
db_sync.merge(dataset)
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
387
|
+
fail_job(
|
388
|
+
db=db_sync,
|
389
|
+
job=job,
|
390
|
+
log_msg=(
|
391
|
+
f"UNKNOWN ERROR in Fractal job {job.id}\n"
|
392
|
+
f"TRACEBACK:\n{current_traceback}"
|
393
|
+
),
|
394
|
+
logger_name=logger_name,
|
315
395
|
)
|
316
|
-
|
317
|
-
db_sync.commit()
|
396
|
+
|
318
397
|
finally:
|
319
398
|
reset_logger_handlers(logger)
|
320
399
|
db_sync.close()
|
@@ -36,7 +36,7 @@ def _process_workflow(
|
|
36
36
|
workflow: WorkflowV2,
|
37
37
|
dataset: DatasetV2,
|
38
38
|
logger_name: str,
|
39
|
-
|
39
|
+
workflow_dir_local: Path,
|
40
40
|
first_task_index: int,
|
41
41
|
last_task_index: int,
|
42
42
|
) -> dict:
|
@@ -57,8 +57,8 @@ def _process_workflow(
|
|
57
57
|
], # noqa
|
58
58
|
dataset=dataset,
|
59
59
|
executor=executor,
|
60
|
-
|
61
|
-
|
60
|
+
workflow_dir_local=workflow_dir_local,
|
61
|
+
workflow_dir_remote=workflow_dir_local,
|
62
62
|
logger_name=logger_name,
|
63
63
|
submit_setup_call=_local_submit_setup,
|
64
64
|
)
|
@@ -69,8 +69,8 @@ async def process_workflow(
|
|
69
69
|
*,
|
70
70
|
workflow: WorkflowV2,
|
71
71
|
dataset: DatasetV2,
|
72
|
-
|
73
|
-
|
72
|
+
workflow_dir_local: Path,
|
73
|
+
workflow_dir_remote: Optional[Path] = None,
|
74
74
|
first_task_index: Optional[int] = None,
|
75
75
|
last_task_index: Optional[int] = None,
|
76
76
|
logger_name: str,
|
@@ -94,12 +94,13 @@ async def process_workflow(
|
|
94
94
|
The workflow to be run
|
95
95
|
dataset:
|
96
96
|
Initial dataset.
|
97
|
-
|
97
|
+
workflow_dir_local:
|
98
98
|
Working directory for this run.
|
99
|
-
|
99
|
+
workflow_dir_remote:
|
100
100
|
Working directory for this run, on the user side. This argument is
|
101
101
|
present for compatibility with the standard backend interface, but
|
102
|
-
for the `local` backend it cannot be different from
|
102
|
+
for the `local` backend it cannot be different from
|
103
|
+
`workflow_dir_local`.
|
103
104
|
first_task_index:
|
104
105
|
Positional index of the first task to execute; if `None`, start
|
105
106
|
from `0`.
|
@@ -137,10 +138,10 @@ async def process_workflow(
|
|
137
138
|
of the workflow
|
138
139
|
"""
|
139
140
|
|
140
|
-
if
|
141
|
+
if workflow_dir_remote and (workflow_dir_remote != workflow_dir_local):
|
141
142
|
raise NotImplementedError(
|
142
143
|
"Local backend does not support different directories "
|
143
|
-
f"{
|
144
|
+
f"{workflow_dir_local=} and {workflow_dir_remote=}"
|
144
145
|
)
|
145
146
|
|
146
147
|
# Set values of first_task_index and last_task_index
|
@@ -155,7 +156,7 @@ async def process_workflow(
|
|
155
156
|
workflow=workflow,
|
156
157
|
dataset=dataset,
|
157
158
|
logger_name=logger_name,
|
158
|
-
|
159
|
+
workflow_dir_local=workflow_dir_local,
|
159
160
|
first_task_index=first_task_index,
|
160
161
|
last_task_index=last_task_index,
|
161
162
|
)
|
@@ -22,8 +22,8 @@ from ._local_config import get_local_backend_config
|
|
22
22
|
def _local_submit_setup(
|
23
23
|
*,
|
24
24
|
wftask: WorkflowTaskV2,
|
25
|
-
|
26
|
-
|
25
|
+
workflow_dir_local: Optional[Path] = None,
|
26
|
+
workflow_dir_remote: Optional[Path] = None,
|
27
27
|
which_type: Literal["non_parallel", "parallel"],
|
28
28
|
) -> dict[str, object]:
|
29
29
|
"""
|
@@ -33,9 +33,9 @@ def _local_submit_setup(
|
|
33
33
|
Arguments:
|
34
34
|
wftask:
|
35
35
|
WorkflowTask for which the configuration is to be assembled
|
36
|
-
|
36
|
+
workflow_dir_local:
|
37
37
|
Not used in this function.
|
38
|
-
|
38
|
+
workflow_dir_remote:
|
39
39
|
Not used in this function.
|
40
40
|
|
41
41
|
Returns:
|
@@ -0,0 +1,155 @@
|
|
1
|
+
from concurrent.futures.process import BrokenProcessPool
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from ....models.v2 import DatasetV2
|
6
|
+
from ....models.v2 import WorkflowV2
|
7
|
+
from ...async_wrap import async_wrap
|
8
|
+
from ...exceptions import JobExecutionError
|
9
|
+
from ...filenames import SHUTDOWN_FILENAME
|
10
|
+
from ...set_start_and_last_task_index import set_start_and_last_task_index
|
11
|
+
from ..runner import execute_tasks_v2
|
12
|
+
from ._submit_setup import _local_submit_setup
|
13
|
+
from .executor import FractalProcessPoolExecutor
|
14
|
+
|
15
|
+
|
16
|
+
def _process_workflow(
|
17
|
+
*,
|
18
|
+
workflow: WorkflowV2,
|
19
|
+
dataset: DatasetV2,
|
20
|
+
logger_name: str,
|
21
|
+
workflow_dir_local: Path,
|
22
|
+
first_task_index: int,
|
23
|
+
last_task_index: int,
|
24
|
+
) -> dict:
|
25
|
+
"""
|
26
|
+
Internal processing routine
|
27
|
+
|
28
|
+
Schedules the workflow using a `FractalProcessPoolExecutor`.
|
29
|
+
|
30
|
+
Cf.
|
31
|
+
[process_workflow][fractal_server.app.runner.v2._local_experimental.process_workflow]
|
32
|
+
for the call signature.
|
33
|
+
"""
|
34
|
+
with FractalProcessPoolExecutor(
|
35
|
+
shutdown_file=workflow_dir_local / SHUTDOWN_FILENAME
|
36
|
+
) as executor:
|
37
|
+
try:
|
38
|
+
new_dataset_attributes = execute_tasks_v2(
|
39
|
+
wf_task_list=workflow.task_list[
|
40
|
+
first_task_index : (last_task_index + 1) # noqa
|
41
|
+
],
|
42
|
+
dataset=dataset,
|
43
|
+
executor=executor,
|
44
|
+
workflow_dir_local=workflow_dir_local,
|
45
|
+
workflow_dir_remote=workflow_dir_local,
|
46
|
+
logger_name=logger_name,
|
47
|
+
submit_setup_call=_local_submit_setup,
|
48
|
+
)
|
49
|
+
except BrokenProcessPool as e:
|
50
|
+
raise JobExecutionError(
|
51
|
+
info=(
|
52
|
+
"Job failed with BrokenProcessPool error, likely due to "
|
53
|
+
f"an executor shutdown.\nOriginal error:\n{e.args[0]}"
|
54
|
+
)
|
55
|
+
)
|
56
|
+
|
57
|
+
return new_dataset_attributes
|
58
|
+
|
59
|
+
|
60
|
+
async def process_workflow(
|
61
|
+
*,
|
62
|
+
workflow: WorkflowV2,
|
63
|
+
dataset: DatasetV2,
|
64
|
+
workflow_dir_local: Path,
|
65
|
+
workflow_dir_remote: Optional[Path] = None,
|
66
|
+
first_task_index: Optional[int] = None,
|
67
|
+
last_task_index: Optional[int] = None,
|
68
|
+
logger_name: str,
|
69
|
+
# Slurm-specific
|
70
|
+
user_cache_dir: Optional[str] = None,
|
71
|
+
slurm_user: Optional[str] = None,
|
72
|
+
slurm_account: Optional[str] = None,
|
73
|
+
worker_init: Optional[str] = None,
|
74
|
+
) -> dict:
|
75
|
+
"""
|
76
|
+
Run a workflow
|
77
|
+
|
78
|
+
This function is responsible for running a workflow on some input data,
|
79
|
+
saving the output and taking care of any exception raised during the run.
|
80
|
+
|
81
|
+
NOTE: This is the `local_experimental` backend's public interface,
|
82
|
+
which also works as a reference implementation for other backends.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
workflow:
|
86
|
+
The workflow to be run
|
87
|
+
dataset:
|
88
|
+
Initial dataset.
|
89
|
+
workflow_dir_local:
|
90
|
+
Working directory for this run.
|
91
|
+
workflow_dir_remote:
|
92
|
+
Working directory for this run, on the user side. This argument is
|
93
|
+
present for compatibility with the standard backend interface, but
|
94
|
+
for the `local` backend it cannot be different from
|
95
|
+
`workflow_dir_local`.
|
96
|
+
first_task_index:
|
97
|
+
Positional index of the first task to execute; if `None`, start
|
98
|
+
from `0`.
|
99
|
+
last_task_index:
|
100
|
+
Positional index of the last task to execute; if `None`, proceed
|
101
|
+
until the last task.
|
102
|
+
logger_name: Logger name
|
103
|
+
slurm_user:
|
104
|
+
Username to impersonate to run the workflow. This argument is
|
105
|
+
present for compatibility with the standard backend interface, but
|
106
|
+
is ignored in the `local` backend.
|
107
|
+
slurm_account:
|
108
|
+
SLURM account to use when running the workflow. This argument is
|
109
|
+
present for compatibility with the standard backend interface, but
|
110
|
+
is ignored in the `local` backend.
|
111
|
+
user_cache_dir:
|
112
|
+
Cache directory of the user who will run the workflow. This
|
113
|
+
argument is present for compatibility with the standard backend
|
114
|
+
interface, but is ignored in the `local` backend.
|
115
|
+
worker_init:
|
116
|
+
Any additional, usually backend specific, information to be passed
|
117
|
+
to the backend executor. This argument is present for compatibility
|
118
|
+
with the standard backend interface, but is ignored in the `local`
|
119
|
+
backend.
|
120
|
+
|
121
|
+
Raises:
|
122
|
+
TaskExecutionError: wrapper for errors raised during tasks' execution
|
123
|
+
(positive exit codes).
|
124
|
+
JobExecutionError: wrapper for errors raised by the tasks' executors
|
125
|
+
(negative exit codes).
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
output_dataset_metadata:
|
129
|
+
The updated metadata for the dataset, as returned by the last task
|
130
|
+
of the workflow
|
131
|
+
"""
|
132
|
+
|
133
|
+
if workflow_dir_remote and (workflow_dir_remote != workflow_dir_local):
|
134
|
+
raise NotImplementedError(
|
135
|
+
"LocalExperimental backend does not support different directories "
|
136
|
+
f"{workflow_dir_local=} and {workflow_dir_remote=}"
|
137
|
+
)
|
138
|
+
|
139
|
+
# Set values of first_task_index and last_task_index
|
140
|
+
num_tasks = len(workflow.task_list)
|
141
|
+
first_task_index, last_task_index = set_start_and_last_task_index(
|
142
|
+
num_tasks,
|
143
|
+
first_task_index=first_task_index,
|
144
|
+
last_task_index=last_task_index,
|
145
|
+
)
|
146
|
+
|
147
|
+
new_dataset_attributes = await async_wrap(_process_workflow)(
|
148
|
+
workflow=workflow,
|
149
|
+
dataset=dataset,
|
150
|
+
logger_name=logger_name,
|
151
|
+
workflow_dir_local=workflow_dir_local,
|
152
|
+
first_task_index=first_task_index,
|
153
|
+
last_task_index=last_task_index,
|
154
|
+
)
|
155
|
+
return new_dataset_attributes
|
@@ -0,0 +1,108 @@
|
|
1
|
+
"""
|
2
|
+
Submodule to handle the local-backend configuration for a WorkflowTask
|
3
|
+
"""
|
4
|
+
import json
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Literal
|
7
|
+
from typing import Optional
|
8
|
+
|
9
|
+
from pydantic import BaseModel
|
10
|
+
from pydantic import Extra
|
11
|
+
from pydantic.error_wrappers import ValidationError
|
12
|
+
|
13
|
+
from .....config import get_settings
|
14
|
+
from .....syringe import Inject
|
15
|
+
from ....models.v2 import WorkflowTaskV2
|
16
|
+
|
17
|
+
|
18
|
+
class LocalBackendConfigError(ValueError):
|
19
|
+
"""
|
20
|
+
Local-backend configuration error
|
21
|
+
"""
|
22
|
+
|
23
|
+
pass
|
24
|
+
|
25
|
+
|
26
|
+
class LocalBackendConfig(BaseModel, extra=Extra.forbid):
|
27
|
+
"""
|
28
|
+
Specifications of the local-backend configuration
|
29
|
+
|
30
|
+
Attributes:
|
31
|
+
parallel_tasks_per_job:
|
32
|
+
Maximum number of tasks to be run in parallel as part of a call to
|
33
|
+
`FractalProcessPoolExecutor.map`; if `None`, then all tasks will
|
34
|
+
start at the same time.
|
35
|
+
"""
|
36
|
+
|
37
|
+
parallel_tasks_per_job: Optional[int]
|
38
|
+
|
39
|
+
|
40
|
+
def get_default_local_backend_config():
|
41
|
+
"""
|
42
|
+
Return a default `LocalBackendConfig` configuration object
|
43
|
+
"""
|
44
|
+
return LocalBackendConfig(parallel_tasks_per_job=None)
|
45
|
+
|
46
|
+
|
47
|
+
def get_local_backend_config(
|
48
|
+
wftask: WorkflowTaskV2,
|
49
|
+
which_type: Literal["non_parallel", "parallel"],
|
50
|
+
config_path: Optional[Path] = None,
|
51
|
+
) -> LocalBackendConfig:
|
52
|
+
"""
|
53
|
+
Prepare a `LocalBackendConfig` configuration object
|
54
|
+
|
55
|
+
The sources for `parallel_tasks_per_job` attributes, starting from the
|
56
|
+
highest-priority one, are
|
57
|
+
|
58
|
+
1. Properties in `wftask.meta_parallel` or `wftask.meta_non_parallel`
|
59
|
+
(depending on `which_type`);
|
60
|
+
2. The general content of the local-backend configuration file;
|
61
|
+
3. The default value (`None`).
|
62
|
+
|
63
|
+
Arguments:
|
64
|
+
wftask:
|
65
|
+
WorkflowTaskV2 for which the backend configuration should
|
66
|
+
be prepared.
|
67
|
+
config_path:
|
68
|
+
Path of local-backend configuration file; if `None`, use
|
69
|
+
`FRACTAL_LOCAL_CONFIG_FILE` variable from settings.
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
A local-backend configuration object
|
73
|
+
"""
|
74
|
+
|
75
|
+
key = "parallel_tasks_per_job"
|
76
|
+
default_value = None
|
77
|
+
|
78
|
+
if which_type == "non_parallel":
|
79
|
+
wftask_meta = wftask.meta_non_parallel
|
80
|
+
elif which_type == "parallel":
|
81
|
+
wftask_meta = wftask.meta_parallel
|
82
|
+
else:
|
83
|
+
raise ValueError(
|
84
|
+
"`get_local_backend_config` received an invalid argument"
|
85
|
+
f" {which_type=}."
|
86
|
+
)
|
87
|
+
|
88
|
+
if wftask_meta and key in wftask_meta:
|
89
|
+
parallel_tasks_per_job = wftask_meta[key]
|
90
|
+
else:
|
91
|
+
if not config_path:
|
92
|
+
settings = Inject(get_settings)
|
93
|
+
config_path = settings.FRACTAL_LOCAL_CONFIG_FILE
|
94
|
+
if config_path is None:
|
95
|
+
parallel_tasks_per_job = default_value
|
96
|
+
else:
|
97
|
+
with config_path.open("r") as f:
|
98
|
+
env = json.load(f)
|
99
|
+
try:
|
100
|
+
_ = LocalBackendConfig(**env)
|
101
|
+
except ValidationError as e:
|
102
|
+
raise LocalBackendConfigError(
|
103
|
+
f"Error while loading {config_path=}. "
|
104
|
+
f"Original error:\n{str(e)}"
|
105
|
+
)
|
106
|
+
|
107
|
+
parallel_tasks_per_job = env.get(key, default_value)
|
108
|
+
return LocalBackendConfig(parallel_tasks_per_job=parallel_tasks_per_job)
|