fractal-server 2.13.1__py3-none-any.whl → 2.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fractal_server/__init__.py +1 -1
- fractal_server/__main__.py +3 -1
- fractal_server/app/models/linkusergroup.py +6 -2
- fractal_server/app/models/v2/__init__.py +7 -1
- fractal_server/app/models/v2/dataset.py +1 -11
- fractal_server/app/models/v2/history.py +78 -0
- fractal_server/app/models/v2/job.py +10 -3
- fractal_server/app/models/v2/task_group.py +2 -2
- fractal_server/app/models/v2/workflow.py +1 -1
- fractal_server/app/models/v2/workflowtask.py +1 -1
- fractal_server/app/routes/admin/v2/accounting.py +18 -28
- fractal_server/app/routes/admin/v2/task.py +1 -1
- fractal_server/app/routes/admin/v2/task_group.py +0 -17
- fractal_server/app/routes/api/__init__.py +1 -1
- fractal_server/app/routes/api/v2/__init__.py +8 -2
- fractal_server/app/routes/api/v2/_aux_functions.py +66 -0
- fractal_server/app/routes/api/v2/_aux_functions_history.py +166 -0
- fractal_server/app/routes/api/v2/dataset.py +0 -17
- fractal_server/app/routes/api/v2/history.py +544 -0
- fractal_server/app/routes/api/v2/images.py +31 -43
- fractal_server/app/routes/api/v2/job.py +30 -0
- fractal_server/app/routes/api/v2/project.py +1 -53
- fractal_server/app/routes/api/v2/{status.py → status_legacy.py} +6 -6
- fractal_server/app/routes/api/v2/submit.py +16 -14
- fractal_server/app/routes/api/v2/task.py +3 -10
- fractal_server/app/routes/api/v2/task_collection_custom.py +4 -9
- fractal_server/app/routes/api/v2/task_group.py +0 -17
- fractal_server/app/routes/api/v2/verify_image_types.py +61 -0
- fractal_server/app/routes/api/v2/workflow.py +28 -69
- fractal_server/app/routes/api/v2/workflowtask.py +53 -50
- fractal_server/app/routes/auth/group.py +0 -16
- fractal_server/app/routes/auth/oauth.py +5 -3
- fractal_server/app/routes/pagination.py +47 -0
- fractal_server/app/runner/components.py +0 -3
- fractal_server/app/runner/compress_folder.py +57 -29
- fractal_server/app/runner/exceptions.py +4 -0
- fractal_server/app/runner/executors/base_runner.py +157 -0
- fractal_server/app/runner/{v2/_local/_local_config.py → executors/local/get_local_config.py} +7 -9
- fractal_server/app/runner/executors/local/runner.py +248 -0
- fractal_server/app/runner/executors/{slurm → slurm_common}/_batching.py +1 -1
- fractal_server/app/runner/executors/{slurm → slurm_common}/_slurm_config.py +9 -7
- fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py +868 -0
- fractal_server/app/runner/{v2/_slurm_common → executors/slurm_common}/get_slurm_config.py +48 -17
- fractal_server/app/runner/executors/{slurm → slurm_common}/remote.py +36 -47
- fractal_server/app/runner/executors/slurm_common/slurm_job_task_models.py +134 -0
- fractal_server/app/runner/executors/slurm_ssh/runner.py +268 -0
- fractal_server/app/runner/executors/slurm_sudo/__init__.py +0 -0
- fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_subprocess_run_as_user.py +2 -83
- fractal_server/app/runner/executors/slurm_sudo/runner.py +193 -0
- fractal_server/app/runner/extract_archive.py +1 -3
- fractal_server/app/runner/task_files.py +134 -87
- fractal_server/app/runner/v2/__init__.py +0 -399
- fractal_server/app/runner/v2/_local.py +88 -0
- fractal_server/app/runner/v2/{_slurm_ssh/__init__.py → _slurm_ssh.py} +20 -19
- fractal_server/app/runner/v2/{_slurm_sudo/__init__.py → _slurm_sudo.py} +17 -15
- fractal_server/app/runner/v2/db_tools.py +119 -0
- fractal_server/app/runner/v2/runner.py +206 -95
- fractal_server/app/runner/v2/runner_functions.py +488 -187
- fractal_server/app/runner/v2/runner_functions_low_level.py +40 -43
- fractal_server/app/runner/v2/submit_workflow.py +358 -0
- fractal_server/app/runner/v2/task_interface.py +31 -0
- fractal_server/app/schemas/_validators.py +13 -24
- fractal_server/app/schemas/user.py +10 -7
- fractal_server/app/schemas/user_settings.py +9 -21
- fractal_server/app/schemas/v2/__init__.py +9 -1
- fractal_server/app/schemas/v2/dataset.py +12 -94
- fractal_server/app/schemas/v2/dumps.py +26 -9
- fractal_server/app/schemas/v2/history.py +80 -0
- fractal_server/app/schemas/v2/job.py +15 -8
- fractal_server/app/schemas/v2/manifest.py +14 -7
- fractal_server/app/schemas/v2/project.py +9 -7
- fractal_server/app/schemas/v2/status_legacy.py +35 -0
- fractal_server/app/schemas/v2/task.py +72 -77
- fractal_server/app/schemas/v2/task_collection.py +14 -32
- fractal_server/app/schemas/v2/task_group.py +10 -9
- fractal_server/app/schemas/v2/workflow.py +10 -11
- fractal_server/app/schemas/v2/workflowtask.py +2 -21
- fractal_server/app/security/__init__.py +3 -3
- fractal_server/app/security/signup_email.py +2 -2
- fractal_server/config.py +41 -46
- fractal_server/images/tools.py +23 -0
- fractal_server/migrations/versions/47351f8c7ebc_drop_dataset_filters.py +50 -0
- fractal_server/migrations/versions/9db60297b8b2_set_ondelete.py +250 -0
- fractal_server/migrations/versions/c90a7c76e996_job_id_in_history_run.py +41 -0
- fractal_server/migrations/versions/e81103413827_add_job_type_filters.py +36 -0
- fractal_server/migrations/versions/f37aceb45062_make_historyunit_logfile_required.py +39 -0
- fractal_server/migrations/versions/fbce16ff4e47_new_history_items.py +120 -0
- fractal_server/ssh/_fabric.py +28 -14
- fractal_server/tasks/v2/local/collect.py +2 -2
- fractal_server/tasks/v2/ssh/collect.py +2 -2
- fractal_server/tasks/v2/templates/2_pip_install.sh +1 -1
- fractal_server/tasks/v2/templates/4_pip_show.sh +1 -1
- fractal_server/tasks/v2/utils_background.py +0 -19
- fractal_server/tasks/v2/utils_database.py +30 -17
- fractal_server/tasks/v2/utils_templates.py +6 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/METADATA +4 -4
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/RECORD +106 -96
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/WHEEL +1 -1
- fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +0 -126
- fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +0 -116
- fractal_server/app/runner/executors/slurm/ssh/executor.py +0 -1386
- fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py +0 -71
- fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py +0 -130
- fractal_server/app/runner/executors/slurm/sudo/executor.py +0 -1281
- fractal_server/app/runner/v2/_local/__init__.py +0 -132
- fractal_server/app/runner/v2/_local/_submit_setup.py +0 -52
- fractal_server/app/runner/v2/_local/executor.py +0 -100
- fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py +0 -83
- fractal_server/app/runner/v2/_slurm_sudo/_submit_setup.py +0 -83
- fractal_server/app/runner/v2/handle_failed_job.py +0 -59
- fractal_server/app/schemas/v2/status.py +0 -16
- /fractal_server/app/{runner/executors/slurm → history}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{slurm/ssh → local}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{slurm/sudo → slurm_common}/__init__.py +0 -0
- /fractal_server/app/runner/executors/{_job_states.py → slurm_common/_job_states.py} +0 -0
- /fractal_server/app/runner/executors/{slurm → slurm_common}/utils_executors.py +0 -0
- /fractal_server/app/runner/{v2/_slurm_common → executors/slurm_ssh}/__init__.py +0 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/LICENSE +0 -0
- {fractal_server-2.13.1.dist-info → fractal_server-2.14.0.dist-info}/entry_points.txt +0 -0
@@ -2,20 +2,15 @@ import json
|
|
2
2
|
import logging
|
3
3
|
import shutil
|
4
4
|
import subprocess # nosec
|
5
|
-
from
|
6
|
-
from shlex import split as shlex_split
|
5
|
+
from shlex import split
|
7
6
|
from typing import Any
|
8
|
-
from typing import Optional
|
9
7
|
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from ..exceptions import TaskExecutionError
|
13
|
-
from fractal_server.app.models.v2 import WorkflowTaskV2
|
14
|
-
from fractal_server.app.runner.task_files import get_task_file_paths
|
8
|
+
from fractal_server.app.runner.exceptions import JobExecutionError
|
9
|
+
from fractal_server.app.runner.exceptions import TaskExecutionError
|
15
10
|
from fractal_server.string_tools import validate_cmd
|
16
11
|
|
17
12
|
|
18
|
-
def _call_command_wrapper(cmd: str, log_path:
|
13
|
+
def _call_command_wrapper(cmd: str, log_path: str) -> None:
|
19
14
|
"""
|
20
15
|
Call a command and write its stdout and stderr to files
|
21
16
|
|
@@ -32,9 +27,9 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
|
|
32
27
|
raise TaskExecutionError(f"Invalid command. Original error: {str(e)}")
|
33
28
|
|
34
29
|
# Verify that task command is executable
|
35
|
-
if shutil.which(
|
30
|
+
if shutil.which(split(cmd)[0]) is None:
|
36
31
|
msg = (
|
37
|
-
f'Command "{
|
32
|
+
f'Command "{split(cmd)[0]}" is not valid. '
|
38
33
|
"Hint: make sure that it is executable."
|
39
34
|
)
|
40
35
|
raise TaskExecutionError(msg)
|
@@ -42,7 +37,7 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
|
|
42
37
|
with open(log_path, "w") as fp_log:
|
43
38
|
try:
|
44
39
|
result = subprocess.run( # nosec
|
45
|
-
|
40
|
+
split(cmd),
|
46
41
|
stderr=fp_log,
|
47
42
|
stdout=fp_log,
|
48
43
|
)
|
@@ -50,7 +45,7 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
|
|
50
45
|
raise e
|
51
46
|
|
52
47
|
if result.returncode > 0:
|
53
|
-
with
|
48
|
+
with open(log_path, "r") as fp_stderr:
|
54
49
|
err = fp_stderr.read()
|
55
50
|
raise TaskExecutionError(err)
|
56
51
|
elif result.returncode < 0:
|
@@ -60,58 +55,60 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
|
|
60
55
|
|
61
56
|
|
62
57
|
def run_single_task(
|
63
|
-
|
58
|
+
# COMMON to all parallel tasks
|
64
59
|
command: str,
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
60
|
+
workflow_task_order: int,
|
61
|
+
workflow_task_id: int,
|
62
|
+
task_name: str,
|
63
|
+
# SPECIAL for each parallel task
|
64
|
+
parameters: dict[str, Any],
|
65
|
+
remote_files: dict[str, str],
|
69
66
|
) -> dict[str, Any]:
|
70
67
|
"""
|
71
|
-
Runs within an executor.
|
68
|
+
Runs within an executor (AKA on the SLURM cluster).
|
72
69
|
"""
|
73
70
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
71
|
+
try:
|
72
|
+
args_file_remote = remote_files["args_file_remote"]
|
73
|
+
metadiff_file_remote = remote_files["metadiff_file_remote"]
|
74
|
+
log_file_remote = remote_files["log_file_remote"]
|
75
|
+
except KeyError:
|
76
|
+
raise TaskExecutionError(
|
77
|
+
f"Invalid {remote_files=}",
|
78
|
+
workflow_task_order=workflow_task_order,
|
79
|
+
workflow_task_id=workflow_task_id,
|
80
|
+
task_name=task_name,
|
81
|
+
)
|
81
82
|
|
82
|
-
|
83
|
-
|
84
|
-
workflow_dir_local=workflow_dir_local,
|
85
|
-
workflow_dir_remote=workflow_dir_remote,
|
86
|
-
task_order=wftask.order,
|
87
|
-
task_name=task_name,
|
88
|
-
component=component,
|
89
|
-
)
|
83
|
+
logger = logging.getLogger(None)
|
84
|
+
logger.debug(f"Now start running {command=}")
|
90
85
|
|
91
86
|
# Write arguments to args.json file
|
92
|
-
|
93
|
-
|
87
|
+
# NOTE: see issue 2346
|
88
|
+
with open(args_file_remote, "w") as f:
|
89
|
+
json.dump(parameters, f, indent=2)
|
94
90
|
|
95
91
|
# Assemble full command
|
92
|
+
# NOTE: this could be assembled backend-side
|
96
93
|
full_command = (
|
97
94
|
f"{command} "
|
98
|
-
f"--args-json {
|
99
|
-
f"--out-json {
|
95
|
+
f"--args-json {args_file_remote} "
|
96
|
+
f"--out-json {metadiff_file_remote}"
|
100
97
|
)
|
101
98
|
|
102
99
|
try:
|
103
100
|
_call_command_wrapper(
|
104
101
|
full_command,
|
105
|
-
log_path=
|
102
|
+
log_path=log_file_remote,
|
106
103
|
)
|
107
104
|
except TaskExecutionError as e:
|
108
|
-
e.workflow_task_order =
|
109
|
-
e.workflow_task_id =
|
110
|
-
e.task_name =
|
105
|
+
e.workflow_task_order = workflow_task_order
|
106
|
+
e.workflow_task_id = workflow_task_id
|
107
|
+
e.task_name = task_name
|
111
108
|
raise e
|
112
109
|
|
113
110
|
try:
|
114
|
-
with
|
111
|
+
with open(metadiff_file_remote, "r") as f:
|
115
112
|
out_meta = json.load(f)
|
116
113
|
except FileNotFoundError as e:
|
117
114
|
logger.debug(
|
@@ -0,0 +1,358 @@
|
|
1
|
+
"""
|
2
|
+
Runner backend subsystem root V2
|
3
|
+
|
4
|
+
This module is the single entry point to the runner backend subsystem V2.
|
5
|
+
Other subsystems should only import this module and not its submodules or
|
6
|
+
the individual backends.
|
7
|
+
"""
|
8
|
+
import os
|
9
|
+
import traceback
|
10
|
+
from pathlib import Path
|
11
|
+
from typing import Optional
|
12
|
+
|
13
|
+
from sqlalchemy.orm import Session as DBSyncSession
|
14
|
+
|
15
|
+
from ....config import get_settings
|
16
|
+
from ....logger import get_logger
|
17
|
+
from ....logger import reset_logger_handlers
|
18
|
+
from ....logger import set_logger
|
19
|
+
from ....ssh._fabric import FractalSSH
|
20
|
+
from ....syringe import Inject
|
21
|
+
from ....utils import get_timestamp
|
22
|
+
from ....zip_tools import _zip_folder_to_file_and_remove
|
23
|
+
from ...db import DB
|
24
|
+
from ...models.v2 import DatasetV2
|
25
|
+
from ...models.v2 import JobV2
|
26
|
+
from ...models.v2 import WorkflowV2
|
27
|
+
from ...schemas.v2 import JobStatusTypeV2
|
28
|
+
from ..exceptions import JobExecutionError
|
29
|
+
from ..exceptions import TaskExecutionError
|
30
|
+
from ..filenames import WORKFLOW_LOG_FILENAME
|
31
|
+
from ._local import process_workflow as local_process_workflow
|
32
|
+
from ._slurm_ssh import process_workflow as slurm_ssh_process_workflow
|
33
|
+
from ._slurm_sudo import process_workflow as slurm_sudo_process_workflow
|
34
|
+
from fractal_server import __VERSION__
|
35
|
+
from fractal_server.app.models import UserSettings
|
36
|
+
|
37
|
+
|
38
|
+
_backends = {}
|
39
|
+
_backends["local"] = local_process_workflow
|
40
|
+
_backends["slurm"] = slurm_sudo_process_workflow
|
41
|
+
_backends["slurm_ssh"] = slurm_ssh_process_workflow
|
42
|
+
|
43
|
+
|
44
|
+
def fail_job(
|
45
|
+
*,
|
46
|
+
db: DBSyncSession,
|
47
|
+
job: JobV2,
|
48
|
+
log_msg: str,
|
49
|
+
logger_name: str,
|
50
|
+
emit_log: bool = False,
|
51
|
+
) -> None:
|
52
|
+
logger = get_logger(logger_name=logger_name)
|
53
|
+
if emit_log:
|
54
|
+
logger.error(log_msg)
|
55
|
+
reset_logger_handlers(logger)
|
56
|
+
job.status = JobStatusTypeV2.FAILED
|
57
|
+
job.end_timestamp = get_timestamp()
|
58
|
+
job.log = log_msg
|
59
|
+
db.merge(job)
|
60
|
+
db.commit()
|
61
|
+
db.close()
|
62
|
+
return
|
63
|
+
|
64
|
+
|
65
|
+
def submit_workflow(
|
66
|
+
*,
|
67
|
+
workflow_id: int,
|
68
|
+
dataset_id: int,
|
69
|
+
job_id: int,
|
70
|
+
user_id: int,
|
71
|
+
user_settings: UserSettings,
|
72
|
+
worker_init: Optional[str] = None,
|
73
|
+
slurm_user: Optional[str] = None,
|
74
|
+
user_cache_dir: Optional[str] = None,
|
75
|
+
fractal_ssh: Optional[FractalSSH] = None,
|
76
|
+
) -> None:
|
77
|
+
"""
|
78
|
+
Prepares a workflow and applies it to a dataset
|
79
|
+
|
80
|
+
This function wraps the process_workflow one, which is different for each
|
81
|
+
backend (e.g. local or slurm backend).
|
82
|
+
|
83
|
+
Args:
|
84
|
+
workflow_id:
|
85
|
+
ID of the workflow being applied
|
86
|
+
dataset_id:
|
87
|
+
Dataset ID
|
88
|
+
job_id:
|
89
|
+
Id of the job record which stores the state for the current
|
90
|
+
workflow application.
|
91
|
+
user_id:
|
92
|
+
User ID.
|
93
|
+
worker_init:
|
94
|
+
Custom executor parameters that get parsed before the execution of
|
95
|
+
each task.
|
96
|
+
user_cache_dir:
|
97
|
+
Cache directory (namely a path where the user can write); for the
|
98
|
+
slurm backend, this is used as a base directory for
|
99
|
+
`job.working_dir_user`.
|
100
|
+
slurm_user:
|
101
|
+
The username to impersonate for the workflow execution, for the
|
102
|
+
slurm backend.
|
103
|
+
"""
|
104
|
+
# Declare runner backend and set `process_workflow` function
|
105
|
+
settings = Inject(get_settings)
|
106
|
+
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
107
|
+
logger_name = f"WF{workflow_id}_job{job_id}"
|
108
|
+
logger = set_logger(logger_name=logger_name)
|
109
|
+
|
110
|
+
with next(DB.get_sync_db()) as db_sync:
|
111
|
+
try:
|
112
|
+
job: Optional[JobV2] = db_sync.get(JobV2, job_id)
|
113
|
+
dataset: Optional[DatasetV2] = db_sync.get(DatasetV2, dataset_id)
|
114
|
+
workflow: Optional[WorkflowV2] = db_sync.get(
|
115
|
+
WorkflowV2, workflow_id
|
116
|
+
)
|
117
|
+
except Exception as e:
|
118
|
+
logger.error(
|
119
|
+
f"Error connecting to the database. Original error: {str(e)}"
|
120
|
+
)
|
121
|
+
reset_logger_handlers(logger)
|
122
|
+
return
|
123
|
+
|
124
|
+
if job is None:
|
125
|
+
logger.error(f"JobV2 {job_id} does not exist")
|
126
|
+
reset_logger_handlers(logger)
|
127
|
+
return
|
128
|
+
if dataset is None or workflow is None:
|
129
|
+
log_msg = ""
|
130
|
+
if not dataset:
|
131
|
+
log_msg += f"Cannot fetch dataset {dataset_id} from database\n"
|
132
|
+
if not workflow:
|
133
|
+
log_msg += (
|
134
|
+
f"Cannot fetch workflow {workflow_id} from database\n"
|
135
|
+
)
|
136
|
+
fail_job(
|
137
|
+
db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name
|
138
|
+
)
|
139
|
+
return
|
140
|
+
|
141
|
+
# Declare runner backend and set `process_workflow` function
|
142
|
+
settings = Inject(get_settings)
|
143
|
+
FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
|
144
|
+
try:
|
145
|
+
process_workflow = _backends[settings.FRACTAL_RUNNER_BACKEND]
|
146
|
+
except KeyError as e:
|
147
|
+
fail_job(
|
148
|
+
db=db_sync,
|
149
|
+
job=job,
|
150
|
+
log_msg=(
|
151
|
+
f"Invalid {FRACTAL_RUNNER_BACKEND=}.\n"
|
152
|
+
f"Original KeyError: {str(e)}"
|
153
|
+
),
|
154
|
+
logger_name=logger_name,
|
155
|
+
emit_log=True,
|
156
|
+
)
|
157
|
+
return
|
158
|
+
|
159
|
+
# Define and create server-side working folder
|
160
|
+
WORKFLOW_DIR_LOCAL = Path(job.working_dir)
|
161
|
+
if WORKFLOW_DIR_LOCAL.exists():
|
162
|
+
fail_job(
|
163
|
+
db=db_sync,
|
164
|
+
job=job,
|
165
|
+
log_msg=f"Workflow dir {WORKFLOW_DIR_LOCAL} already exists.",
|
166
|
+
logger_name=logger_name,
|
167
|
+
emit_log=True,
|
168
|
+
)
|
169
|
+
return
|
170
|
+
|
171
|
+
try:
|
172
|
+
# Create WORKFLOW_DIR_LOCAL and define WORKFLOW_DIR_REMOTE
|
173
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
174
|
+
WORKFLOW_DIR_LOCAL.mkdir(parents=True)
|
175
|
+
WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
|
176
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
177
|
+
original_umask = os.umask(0)
|
178
|
+
WORKFLOW_DIR_LOCAL.mkdir(parents=True, mode=0o755)
|
179
|
+
os.umask(original_umask)
|
180
|
+
WORKFLOW_DIR_REMOTE = (
|
181
|
+
Path(user_cache_dir) / WORKFLOW_DIR_LOCAL.name
|
182
|
+
)
|
183
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
184
|
+
WORKFLOW_DIR_LOCAL.mkdir(parents=True)
|
185
|
+
WORKFLOW_DIR_REMOTE = (
|
186
|
+
Path(user_settings.ssh_jobs_dir) / WORKFLOW_DIR_LOCAL.name
|
187
|
+
)
|
188
|
+
else:
|
189
|
+
raise ValueError(
|
190
|
+
"Invalid FRACTAL_RUNNER_BACKEND="
|
191
|
+
f"{settings.FRACTAL_RUNNER_BACKEND}."
|
192
|
+
)
|
193
|
+
except Exception as e:
|
194
|
+
error_type = type(e).__name__
|
195
|
+
fail_job(
|
196
|
+
db=db_sync,
|
197
|
+
job=job,
|
198
|
+
log_msg=(
|
199
|
+
f"{error_type} error occurred while creating job folder "
|
200
|
+
f"and subfolders.\nOriginal error: {str(e)}"
|
201
|
+
),
|
202
|
+
logger_name=logger_name,
|
203
|
+
emit_log=True,
|
204
|
+
)
|
205
|
+
return
|
206
|
+
|
207
|
+
# After Session.commit() is called, either explicitly or when using a
|
208
|
+
# context manager, all objects associated with the Session are expired.
|
209
|
+
# https://docs.sqlalchemy.org/en/14/orm/
|
210
|
+
# session_basics.html#opening-and-closing-a-session
|
211
|
+
# https://docs.sqlalchemy.org/en/14/orm/
|
212
|
+
# session_state_management.html#refreshing-expiring
|
213
|
+
|
214
|
+
# See issue #928:
|
215
|
+
# https://github.com/fractal-analytics-platform/
|
216
|
+
# fractal-server/issues/928
|
217
|
+
|
218
|
+
db_sync.refresh(dataset)
|
219
|
+
db_sync.refresh(workflow)
|
220
|
+
for wftask in workflow.task_list:
|
221
|
+
db_sync.refresh(wftask)
|
222
|
+
|
223
|
+
# Write logs
|
224
|
+
log_file_path = WORKFLOW_DIR_LOCAL / WORKFLOW_LOG_FILENAME
|
225
|
+
logger = set_logger(
|
226
|
+
logger_name=logger_name,
|
227
|
+
log_file_path=log_file_path,
|
228
|
+
)
|
229
|
+
logger.info(
|
230
|
+
f'Start execution of workflow "{workflow.name}"; '
|
231
|
+
f"more logs at {str(log_file_path)}"
|
232
|
+
)
|
233
|
+
logger.debug(f"fractal_server.__VERSION__: {__VERSION__}")
|
234
|
+
logger.debug(f"FRACTAL_RUNNER_BACKEND: {FRACTAL_RUNNER_BACKEND}")
|
235
|
+
if FRACTAL_RUNNER_BACKEND == "slurm":
|
236
|
+
logger.debug(f"slurm_user: {slurm_user}")
|
237
|
+
logger.debug(f"slurm_account: {job.slurm_account}")
|
238
|
+
logger.debug(f"worker_init: {worker_init}")
|
239
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
240
|
+
logger.debug(f"ssh_user: {user_settings.ssh_username}")
|
241
|
+
logger.debug(f"base dir: {user_settings.ssh_tasks_dir}")
|
242
|
+
logger.debug(f"worker_init: {worker_init}")
|
243
|
+
logger.debug(f"job.id: {job.id}")
|
244
|
+
logger.debug(f"job.working_dir: {job.working_dir}")
|
245
|
+
logger.debug(f"job.working_dir_user: {job.working_dir_user}")
|
246
|
+
logger.debug(f"job.first_task_index: {job.first_task_index}")
|
247
|
+
logger.debug(f"job.last_task_index: {job.last_task_index}")
|
248
|
+
logger.debug(f'START workflow "{workflow.name}"')
|
249
|
+
|
250
|
+
try:
|
251
|
+
if FRACTAL_RUNNER_BACKEND == "local":
|
252
|
+
process_workflow = local_process_workflow
|
253
|
+
backend_specific_kwargs = {}
|
254
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm":
|
255
|
+
process_workflow = slurm_sudo_process_workflow
|
256
|
+
backend_specific_kwargs = dict(
|
257
|
+
slurm_user=slurm_user,
|
258
|
+
slurm_account=job.slurm_account,
|
259
|
+
user_cache_dir=user_cache_dir,
|
260
|
+
)
|
261
|
+
elif FRACTAL_RUNNER_BACKEND == "slurm_ssh":
|
262
|
+
process_workflow = slurm_ssh_process_workflow
|
263
|
+
backend_specific_kwargs = dict(fractal_ssh=fractal_ssh)
|
264
|
+
else:
|
265
|
+
raise RuntimeError(
|
266
|
+
f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}"
|
267
|
+
)
|
268
|
+
|
269
|
+
# "The Session.close() method does not prevent the Session from being
|
270
|
+
# used again. The Session itself does not actually have a distinct
|
271
|
+
# “closed” state; it merely means the Session will release all database
|
272
|
+
# connections and ORM objects."
|
273
|
+
# (https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.Session.close).
|
274
|
+
#
|
275
|
+
# We close the session before the (possibly long) process_workflow
|
276
|
+
# call, to make sure all DB connections are released. The reason why we
|
277
|
+
# are not using a context manager within the try block is that we also
|
278
|
+
# need access to db_sync in the except branches.
|
279
|
+
db_sync = next(DB.get_sync_db())
|
280
|
+
db_sync.close()
|
281
|
+
|
282
|
+
process_workflow(
|
283
|
+
workflow=workflow,
|
284
|
+
dataset=dataset,
|
285
|
+
job_id=job_id,
|
286
|
+
user_id=user_id,
|
287
|
+
workflow_dir_local=WORKFLOW_DIR_LOCAL,
|
288
|
+
workflow_dir_remote=WORKFLOW_DIR_REMOTE,
|
289
|
+
logger_name=logger_name,
|
290
|
+
worker_init=worker_init,
|
291
|
+
first_task_index=job.first_task_index,
|
292
|
+
last_task_index=job.last_task_index,
|
293
|
+
job_attribute_filters=job.attribute_filters,
|
294
|
+
job_type_filters=job.type_filters,
|
295
|
+
**backend_specific_kwargs,
|
296
|
+
)
|
297
|
+
|
298
|
+
logger.info(
|
299
|
+
f'End execution of workflow "{workflow.name}"; '
|
300
|
+
f"more logs at {str(log_file_path)}"
|
301
|
+
)
|
302
|
+
logger.debug(f'END workflow "{workflow.name}"')
|
303
|
+
|
304
|
+
# Update job DB entry
|
305
|
+
job.status = JobStatusTypeV2.DONE
|
306
|
+
job.end_timestamp = get_timestamp()
|
307
|
+
with log_file_path.open("r") as f:
|
308
|
+
logs = f.read()
|
309
|
+
job.log = logs
|
310
|
+
db_sync.merge(job)
|
311
|
+
db_sync.commit()
|
312
|
+
|
313
|
+
except TaskExecutionError as e:
|
314
|
+
logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
|
315
|
+
logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
|
316
|
+
|
317
|
+
exception_args_string = "\n".join(e.args)
|
318
|
+
log_msg = (
|
319
|
+
f"TASK ERROR: "
|
320
|
+
f"Task name: {e.task_name}, "
|
321
|
+
f"position in Workflow: {e.workflow_task_order}\n"
|
322
|
+
f"TRACEBACK:\n{exception_args_string}"
|
323
|
+
)
|
324
|
+
fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name)
|
325
|
+
|
326
|
+
except JobExecutionError as e:
|
327
|
+
logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
|
328
|
+
logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
|
329
|
+
|
330
|
+
fail_job(
|
331
|
+
db=db_sync,
|
332
|
+
job=job,
|
333
|
+
log_msg=(
|
334
|
+
f"JOB ERROR in Fractal job {job.id}:\n"
|
335
|
+
f"TRACEBACK:\n{e.assemble_error()}"
|
336
|
+
),
|
337
|
+
logger_name=logger_name,
|
338
|
+
)
|
339
|
+
|
340
|
+
except Exception:
|
341
|
+
logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
|
342
|
+
logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
|
343
|
+
|
344
|
+
current_traceback = traceback.format_exc()
|
345
|
+
fail_job(
|
346
|
+
db=db_sync,
|
347
|
+
job=job,
|
348
|
+
log_msg=(
|
349
|
+
f"UNKNOWN ERROR in Fractal job {job.id}\n"
|
350
|
+
f"TRACEBACK:\n{current_traceback}"
|
351
|
+
),
|
352
|
+
logger_name=logger_name,
|
353
|
+
)
|
354
|
+
|
355
|
+
finally:
|
356
|
+
reset_logger_handlers(logger)
|
357
|
+
db_sync.close()
|
358
|
+
_zip_folder_to_file_and_remove(folder=job.working_dir)
|
@@ -1,11 +1,14 @@
|
|
1
1
|
from typing import Any
|
2
|
+
from typing import Optional
|
2
3
|
|
3
4
|
from pydantic import BaseModel
|
4
5
|
from pydantic import ConfigDict
|
5
6
|
from pydantic import Field
|
6
7
|
from pydantic import field_validator
|
8
|
+
from pydantic import ValidationError
|
7
9
|
|
8
10
|
from ....images import SingleImageTaskOutput
|
11
|
+
from fractal_server.app.runner.exceptions import TaskOutputValidationError
|
9
12
|
from fractal_server.urls import normalize_url
|
10
13
|
|
11
14
|
|
@@ -61,3 +64,31 @@ class InitTaskOutput(BaseModel):
|
|
61
64
|
model_config = ConfigDict(extra="forbid")
|
62
65
|
|
63
66
|
parallelization_list: list[InitArgsModel] = Field(default_factory=list)
|
67
|
+
|
68
|
+
|
69
|
+
def _cast_and_validate_TaskOutput(
|
70
|
+
task_output: dict[str, Any]
|
71
|
+
) -> Optional[TaskOutput]:
|
72
|
+
try:
|
73
|
+
validated_task_output = TaskOutput(**task_output)
|
74
|
+
return validated_task_output
|
75
|
+
except ValidationError as e:
|
76
|
+
raise TaskOutputValidationError(
|
77
|
+
"Validation of task output failed.\n"
|
78
|
+
f"Original error: {str(e)}\n"
|
79
|
+
f"Original data: {task_output}."
|
80
|
+
)
|
81
|
+
|
82
|
+
|
83
|
+
def _cast_and_validate_InitTaskOutput(
|
84
|
+
init_task_output: dict[str, Any],
|
85
|
+
) -> Optional[InitTaskOutput]:
|
86
|
+
try:
|
87
|
+
validated_init_task_output = InitTaskOutput(**init_task_output)
|
88
|
+
return validated_init_task_output
|
89
|
+
except ValidationError as e:
|
90
|
+
raise TaskOutputValidationError(
|
91
|
+
"Validation of init-task output failed.\n"
|
92
|
+
f"Original error: {str(e)}\n"
|
93
|
+
f"Original data: {init_task_output}."
|
94
|
+
)
|
@@ -1,43 +1,32 @@
|
|
1
1
|
import os
|
2
|
+
from typing import Annotated
|
2
3
|
from typing import Any
|
3
4
|
from typing import Optional
|
4
5
|
|
6
|
+
from pydantic.types import StringConstraints
|
5
7
|
|
6
|
-
def valstr(attribute: str, accept_none: bool = False):
|
7
|
-
"""
|
8
|
-
Check that a string attribute is not an empty string, and remove the
|
9
|
-
leading and trailing whitespace characters.
|
10
8
|
|
11
|
-
|
12
|
-
|
9
|
+
def cant_set_none(value: Any) -> Any:
|
10
|
+
if value is None:
|
11
|
+
raise ValueError("Field cannot be set to 'None'.")
|
12
|
+
return value
|
13
13
|
|
14
|
-
def val(cls, string: Optional[str]) -> Optional[str]:
|
15
|
-
if string is None:
|
16
|
-
if accept_none:
|
17
|
-
return string
|
18
|
-
else:
|
19
|
-
raise ValueError(
|
20
|
-
f"String attribute '{attribute}' cannot be None"
|
21
|
-
)
|
22
|
-
s = string.strip()
|
23
|
-
if not s:
|
24
|
-
raise ValueError(f"String attribute '{attribute}' cannot be empty")
|
25
|
-
return s
|
26
14
|
|
27
|
-
|
15
|
+
NonEmptyString = Annotated[
|
16
|
+
str, StringConstraints(min_length=1, strip_whitespace=True)
|
17
|
+
]
|
28
18
|
|
29
19
|
|
30
20
|
def valdict_keys(attribute: str):
|
31
21
|
def val(cls, d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]:
|
32
22
|
"""
|
33
|
-
|
34
|
-
identical keys.
|
23
|
+
Strip every key of the dictionary, and fail if there are identical keys
|
35
24
|
"""
|
36
25
|
if d is not None:
|
37
26
|
old_keys = list(d.keys())
|
38
|
-
new_keys = [
|
39
|
-
|
40
|
-
|
27
|
+
new_keys = [key.strip() for key in old_keys]
|
28
|
+
if any(k == "" for k in new_keys):
|
29
|
+
raise ValueError(f"Empty string in {new_keys}.")
|
41
30
|
if len(new_keys) != len(set(new_keys)):
|
42
31
|
raise ValueError(
|
43
32
|
f"Dictionary contains multiple identical keys: '{d}'."
|
@@ -7,8 +7,8 @@ from pydantic import Field
|
|
7
7
|
from pydantic import field_validator
|
8
8
|
from pydantic import ValidationInfo
|
9
9
|
|
10
|
+
from ._validators import NonEmptyString
|
10
11
|
from ._validators import val_unique_list
|
11
|
-
from ._validators import valstr
|
12
12
|
|
13
13
|
__all__ = (
|
14
14
|
"UserRead",
|
@@ -57,12 +57,12 @@ class UserUpdate(schemas.BaseUserUpdate):
|
|
57
57
|
|
58
58
|
model_config = ConfigDict(extra="forbid")
|
59
59
|
|
60
|
-
username: Optional[
|
60
|
+
username: Optional[NonEmptyString] = None
|
61
61
|
|
62
62
|
# Validators
|
63
|
-
_username = field_validator("username")(classmethod(valstr("username")))
|
64
63
|
|
65
64
|
@field_validator(
|
65
|
+
"username",
|
66
66
|
"is_active",
|
67
67
|
"is_verified",
|
68
68
|
"is_superuser",
|
@@ -94,11 +94,14 @@ class UserCreate(schemas.BaseUserCreate):
|
|
94
94
|
username:
|
95
95
|
"""
|
96
96
|
|
97
|
-
username: Optional[
|
97
|
+
username: Optional[NonEmptyString] = None
|
98
98
|
|
99
|
-
|
100
|
-
|
101
|
-
|
99
|
+
@field_validator("username")
|
100
|
+
@classmethod
|
101
|
+
def cant_set_none(cls, v, info: ValidationInfo):
|
102
|
+
if v is None:
|
103
|
+
raise ValueError(f"Cannot set {info.field_name}=None")
|
104
|
+
return v
|
102
105
|
|
103
106
|
|
104
107
|
class UserUpdateGroups(BaseModel):
|