fractal-server 2.13.0__py3-none-any.whl → 2.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/__main__.py +3 -1
  3. fractal_server/app/models/linkusergroup.py +6 -2
  4. fractal_server/app/models/v2/__init__.py +11 -1
  5. fractal_server/app/models/v2/accounting.py +35 -0
  6. fractal_server/app/models/v2/dataset.py +1 -11
  7. fractal_server/app/models/v2/history.py +78 -0
  8. fractal_server/app/models/v2/job.py +10 -3
  9. fractal_server/app/models/v2/task_group.py +2 -2
  10. fractal_server/app/models/v2/workflow.py +1 -1
  11. fractal_server/app/models/v2/workflowtask.py +1 -1
  12. fractal_server/app/routes/admin/v2/__init__.py +4 -0
  13. fractal_server/app/routes/admin/v2/accounting.py +98 -0
  14. fractal_server/app/routes/admin/v2/impersonate.py +35 -0
  15. fractal_server/app/routes/admin/v2/job.py +5 -13
  16. fractal_server/app/routes/admin/v2/task.py +1 -1
  17. fractal_server/app/routes/admin/v2/task_group.py +4 -29
  18. fractal_server/app/routes/api/__init__.py +1 -1
  19. fractal_server/app/routes/api/v2/__init__.py +8 -2
  20. fractal_server/app/routes/api/v2/_aux_functions.py +66 -0
  21. fractal_server/app/routes/api/v2/_aux_functions_history.py +166 -0
  22. fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py +3 -3
  23. fractal_server/app/routes/api/v2/dataset.py +0 -17
  24. fractal_server/app/routes/api/v2/history.py +544 -0
  25. fractal_server/app/routes/api/v2/images.py +31 -43
  26. fractal_server/app/routes/api/v2/job.py +30 -0
  27. fractal_server/app/routes/api/v2/project.py +1 -53
  28. fractal_server/app/routes/api/v2/{status.py → status_legacy.py} +6 -6
  29. fractal_server/app/routes/api/v2/submit.py +17 -14
  30. fractal_server/app/routes/api/v2/task.py +3 -10
  31. fractal_server/app/routes/api/v2/task_collection_custom.py +4 -9
  32. fractal_server/app/routes/api/v2/task_group.py +2 -22
  33. fractal_server/app/routes/api/v2/verify_image_types.py +61 -0
  34. fractal_server/app/routes/api/v2/workflow.py +28 -69
  35. fractal_server/app/routes/api/v2/workflowtask.py +53 -50
  36. fractal_server/app/routes/auth/group.py +0 -16
  37. fractal_server/app/routes/auth/oauth.py +5 -3
  38. fractal_server/app/routes/aux/__init__.py +0 -20
  39. fractal_server/app/routes/pagination.py +47 -0
  40. fractal_server/app/runner/components.py +0 -3
  41. fractal_server/app/runner/compress_folder.py +57 -29
  42. fractal_server/app/runner/exceptions.py +4 -0
  43. fractal_server/app/runner/executors/base_runner.py +157 -0
  44. fractal_server/app/runner/{v2/_local/_local_config.py → executors/local/get_local_config.py} +7 -9
  45. fractal_server/app/runner/executors/local/runner.py +248 -0
  46. fractal_server/app/runner/executors/{slurm → slurm_common}/_batching.py +1 -1
  47. fractal_server/app/runner/executors/{slurm → slurm_common}/_slurm_config.py +9 -7
  48. fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py +868 -0
  49. fractal_server/app/runner/{v2/_slurm_common → executors/slurm_common}/get_slurm_config.py +48 -17
  50. fractal_server/app/runner/executors/{slurm → slurm_common}/remote.py +36 -47
  51. fractal_server/app/runner/executors/slurm_common/slurm_job_task_models.py +134 -0
  52. fractal_server/app/runner/executors/slurm_ssh/runner.py +268 -0
  53. fractal_server/app/runner/executors/slurm_sudo/__init__.py +0 -0
  54. fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_subprocess_run_as_user.py +2 -83
  55. fractal_server/app/runner/executors/slurm_sudo/runner.py +193 -0
  56. fractal_server/app/runner/extract_archive.py +1 -3
  57. fractal_server/app/runner/task_files.py +134 -87
  58. fractal_server/app/runner/v2/__init__.py +0 -395
  59. fractal_server/app/runner/v2/_local.py +88 -0
  60. fractal_server/app/runner/v2/{_slurm_ssh/__init__.py → _slurm_ssh.py} +22 -19
  61. fractal_server/app/runner/v2/{_slurm_sudo/__init__.py → _slurm_sudo.py} +19 -15
  62. fractal_server/app/runner/v2/db_tools.py +119 -0
  63. fractal_server/app/runner/v2/runner.py +219 -98
  64. fractal_server/app/runner/v2/runner_functions.py +491 -189
  65. fractal_server/app/runner/v2/runner_functions_low_level.py +40 -43
  66. fractal_server/app/runner/v2/submit_workflow.py +358 -0
  67. fractal_server/app/runner/v2/task_interface.py +31 -0
  68. fractal_server/app/schemas/_validators.py +13 -24
  69. fractal_server/app/schemas/user.py +10 -7
  70. fractal_server/app/schemas/user_settings.py +9 -21
  71. fractal_server/app/schemas/v2/__init__.py +10 -1
  72. fractal_server/app/schemas/v2/accounting.py +18 -0
  73. fractal_server/app/schemas/v2/dataset.py +12 -94
  74. fractal_server/app/schemas/v2/dumps.py +26 -9
  75. fractal_server/app/schemas/v2/history.py +80 -0
  76. fractal_server/app/schemas/v2/job.py +15 -8
  77. fractal_server/app/schemas/v2/manifest.py +14 -7
  78. fractal_server/app/schemas/v2/project.py +9 -7
  79. fractal_server/app/schemas/v2/status_legacy.py +35 -0
  80. fractal_server/app/schemas/v2/task.py +72 -77
  81. fractal_server/app/schemas/v2/task_collection.py +14 -32
  82. fractal_server/app/schemas/v2/task_group.py +10 -9
  83. fractal_server/app/schemas/v2/workflow.py +10 -11
  84. fractal_server/app/schemas/v2/workflowtask.py +2 -21
  85. fractal_server/app/security/__init__.py +3 -3
  86. fractal_server/app/security/signup_email.py +2 -2
  87. fractal_server/config.py +91 -90
  88. fractal_server/images/tools.py +23 -0
  89. fractal_server/migrations/versions/47351f8c7ebc_drop_dataset_filters.py +50 -0
  90. fractal_server/migrations/versions/9db60297b8b2_set_ondelete.py +250 -0
  91. fractal_server/migrations/versions/af1ef1c83c9b_add_accounting_tables.py +57 -0
  92. fractal_server/migrations/versions/c90a7c76e996_job_id_in_history_run.py +41 -0
  93. fractal_server/migrations/versions/e81103413827_add_job_type_filters.py +36 -0
  94. fractal_server/migrations/versions/f37aceb45062_make_historyunit_logfile_required.py +39 -0
  95. fractal_server/migrations/versions/fbce16ff4e47_new_history_items.py +120 -0
  96. fractal_server/ssh/_fabric.py +28 -14
  97. fractal_server/tasks/v2/local/collect.py +2 -2
  98. fractal_server/tasks/v2/ssh/collect.py +2 -2
  99. fractal_server/tasks/v2/templates/2_pip_install.sh +1 -1
  100. fractal_server/tasks/v2/templates/4_pip_show.sh +1 -1
  101. fractal_server/tasks/v2/utils_background.py +1 -20
  102. fractal_server/tasks/v2/utils_database.py +30 -17
  103. fractal_server/tasks/v2/utils_templates.py +6 -0
  104. {fractal_server-2.13.0.dist-info → fractal_server-2.14.0.dist-info}/METADATA +4 -4
  105. {fractal_server-2.13.0.dist-info → fractal_server-2.14.0.dist-info}/RECORD +114 -99
  106. {fractal_server-2.13.0.dist-info → fractal_server-2.14.0.dist-info}/WHEEL +1 -1
  107. fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +0 -126
  108. fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +0 -116
  109. fractal_server/app/runner/executors/slurm/ssh/executor.py +0 -1386
  110. fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py +0 -71
  111. fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py +0 -130
  112. fractal_server/app/runner/executors/slurm/sudo/executor.py +0 -1281
  113. fractal_server/app/runner/v2/_local/__init__.py +0 -129
  114. fractal_server/app/runner/v2/_local/_submit_setup.py +0 -52
  115. fractal_server/app/runner/v2/_local/executor.py +0 -100
  116. fractal_server/app/runner/v2/_slurm_ssh/_submit_setup.py +0 -83
  117. fractal_server/app/runner/v2/_slurm_sudo/_submit_setup.py +0 -83
  118. fractal_server/app/runner/v2/handle_failed_job.py +0 -59
  119. fractal_server/app/schemas/v2/status.py +0 -16
  120. /fractal_server/app/{runner/executors/slurm → history}/__init__.py +0 -0
  121. /fractal_server/app/runner/executors/{slurm/ssh → local}/__init__.py +0 -0
  122. /fractal_server/app/runner/executors/{slurm/sudo → slurm_common}/__init__.py +0 -0
  123. /fractal_server/app/runner/executors/{_job_states.py → slurm_common/_job_states.py} +0 -0
  124. /fractal_server/app/runner/executors/{slurm → slurm_common}/utils_executors.py +0 -0
  125. /fractal_server/app/runner/{v2/_slurm_common → executors/slurm_ssh}/__init__.py +0 -0
  126. {fractal_server-2.13.0.dist-info → fractal_server-2.14.0.dist-info}/LICENSE +0 -0
  127. {fractal_server-2.13.0.dist-info → fractal_server-2.14.0.dist-info}/entry_points.txt +0 -0
@@ -2,20 +2,15 @@ import json
2
2
  import logging
3
3
  import shutil
4
4
  import subprocess # nosec
5
- from pathlib import Path
6
- from shlex import split as shlex_split
5
+ from shlex import split
7
6
  from typing import Any
8
- from typing import Optional
9
7
 
10
- from ..components import _COMPONENT_KEY_
11
- from ..exceptions import JobExecutionError
12
- from ..exceptions import TaskExecutionError
13
- from fractal_server.app.models.v2 import WorkflowTaskV2
14
- from fractal_server.app.runner.task_files import get_task_file_paths
8
+ from fractal_server.app.runner.exceptions import JobExecutionError
9
+ from fractal_server.app.runner.exceptions import TaskExecutionError
15
10
  from fractal_server.string_tools import validate_cmd
16
11
 
17
12
 
18
- def _call_command_wrapper(cmd: str, log_path: Path) -> None:
13
+ def _call_command_wrapper(cmd: str, log_path: str) -> None:
19
14
  """
20
15
  Call a command and write its stdout and stderr to files
21
16
 
@@ -32,9 +27,9 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
32
27
  raise TaskExecutionError(f"Invalid command. Original error: {str(e)}")
33
28
 
34
29
  # Verify that task command is executable
35
- if shutil.which(shlex_split(cmd)[0]) is None:
30
+ if shutil.which(split(cmd)[0]) is None:
36
31
  msg = (
37
- f'Command "{shlex_split(cmd)[0]}" is not valid. '
32
+ f'Command "{split(cmd)[0]}" is not valid. '
38
33
  "Hint: make sure that it is executable."
39
34
  )
40
35
  raise TaskExecutionError(msg)
@@ -42,7 +37,7 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
42
37
  with open(log_path, "w") as fp_log:
43
38
  try:
44
39
  result = subprocess.run( # nosec
45
- shlex_split(cmd),
40
+ split(cmd),
46
41
  stderr=fp_log,
47
42
  stdout=fp_log,
48
43
  )
@@ -50,7 +45,7 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
50
45
  raise e
51
46
 
52
47
  if result.returncode > 0:
53
- with log_path.open("r") as fp_stderr:
48
+ with open(log_path, "r") as fp_stderr:
54
49
  err = fp_stderr.read()
55
50
  raise TaskExecutionError(err)
56
51
  elif result.returncode < 0:
@@ -60,58 +55,60 @@ def _call_command_wrapper(cmd: str, log_path: Path) -> None:
60
55
 
61
56
 
62
57
  def run_single_task(
63
- args: dict[str, Any],
58
+ # COMMON to all parallel tasks
64
59
  command: str,
65
- wftask: WorkflowTaskV2,
66
- workflow_dir_local: Path,
67
- workflow_dir_remote: Optional[Path] = None,
68
- logger_name: Optional[str] = None,
60
+ workflow_task_order: int,
61
+ workflow_task_id: int,
62
+ task_name: str,
63
+ # SPECIAL for each parallel task
64
+ parameters: dict[str, Any],
65
+ remote_files: dict[str, str],
69
66
  ) -> dict[str, Any]:
70
67
  """
71
- Runs within an executor.
68
+ Runs within an executor (AKA on the SLURM cluster).
72
69
  """
73
70
 
74
- logger = logging.getLogger(logger_name)
75
- logger.debug(f"Now start running {command=}")
76
-
77
- if not workflow_dir_remote:
78
- workflow_dir_remote = workflow_dir_local
79
-
80
- task_name = wftask.task.name
71
+ try:
72
+ args_file_remote = remote_files["args_file_remote"]
73
+ metadiff_file_remote = remote_files["metadiff_file_remote"]
74
+ log_file_remote = remote_files["log_file_remote"]
75
+ except KeyError:
76
+ raise TaskExecutionError(
77
+ f"Invalid {remote_files=}",
78
+ workflow_task_order=workflow_task_order,
79
+ workflow_task_id=workflow_task_id,
80
+ task_name=task_name,
81
+ )
81
82
 
82
- component = args.pop(_COMPONENT_KEY_, None)
83
- task_files = get_task_file_paths(
84
- workflow_dir_local=workflow_dir_local,
85
- workflow_dir_remote=workflow_dir_remote,
86
- task_order=wftask.order,
87
- task_name=task_name,
88
- component=component,
89
- )
83
+ logger = logging.getLogger(None)
84
+ logger.debug(f"Now start running {command=}")
90
85
 
91
86
  # Write arguments to args.json file
92
- with task_files.args.open("w") as f:
93
- json.dump(args, f, indent=2)
87
+ # NOTE: see issue 2346
88
+ with open(args_file_remote, "w") as f:
89
+ json.dump(parameters, f, indent=2)
94
90
 
95
91
  # Assemble full command
92
+ # NOTE: this could be assembled backend-side
96
93
  full_command = (
97
94
  f"{command} "
98
- f"--args-json {task_files.args.as_posix()} "
99
- f"--out-json {task_files.metadiff.as_posix()}"
95
+ f"--args-json {args_file_remote} "
96
+ f"--out-json {metadiff_file_remote}"
100
97
  )
101
98
 
102
99
  try:
103
100
  _call_command_wrapper(
104
101
  full_command,
105
- log_path=task_files.log,
102
+ log_path=log_file_remote,
106
103
  )
107
104
  except TaskExecutionError as e:
108
- e.workflow_task_order = wftask.order
109
- e.workflow_task_id = wftask.id
110
- e.task_name = wftask.task.name
105
+ e.workflow_task_order = workflow_task_order
106
+ e.workflow_task_id = workflow_task_id
107
+ e.task_name = task_name
111
108
  raise e
112
109
 
113
110
  try:
114
- with task_files.metadiff.open("r") as f:
111
+ with open(metadiff_file_remote, "r") as f:
115
112
  out_meta = json.load(f)
116
113
  except FileNotFoundError as e:
117
114
  logger.debug(
@@ -0,0 +1,358 @@
1
+ """
2
+ Runner backend subsystem root V2
3
+
4
+ This module is the single entry point to the runner backend subsystem V2.
5
+ Other subsystems should only import this module and not its submodules or
6
+ the individual backends.
7
+ """
8
+ import os
9
+ import traceback
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+ from sqlalchemy.orm import Session as DBSyncSession
14
+
15
+ from ....config import get_settings
16
+ from ....logger import get_logger
17
+ from ....logger import reset_logger_handlers
18
+ from ....logger import set_logger
19
+ from ....ssh._fabric import FractalSSH
20
+ from ....syringe import Inject
21
+ from ....utils import get_timestamp
22
+ from ....zip_tools import _zip_folder_to_file_and_remove
23
+ from ...db import DB
24
+ from ...models.v2 import DatasetV2
25
+ from ...models.v2 import JobV2
26
+ from ...models.v2 import WorkflowV2
27
+ from ...schemas.v2 import JobStatusTypeV2
28
+ from ..exceptions import JobExecutionError
29
+ from ..exceptions import TaskExecutionError
30
+ from ..filenames import WORKFLOW_LOG_FILENAME
31
+ from ._local import process_workflow as local_process_workflow
32
+ from ._slurm_ssh import process_workflow as slurm_ssh_process_workflow
33
+ from ._slurm_sudo import process_workflow as slurm_sudo_process_workflow
34
+ from fractal_server import __VERSION__
35
+ from fractal_server.app.models import UserSettings
36
+
37
+
38
+ _backends = {}
39
+ _backends["local"] = local_process_workflow
40
+ _backends["slurm"] = slurm_sudo_process_workflow
41
+ _backends["slurm_ssh"] = slurm_ssh_process_workflow
42
+
43
+
44
+ def fail_job(
45
+ *,
46
+ db: DBSyncSession,
47
+ job: JobV2,
48
+ log_msg: str,
49
+ logger_name: str,
50
+ emit_log: bool = False,
51
+ ) -> None:
52
+ logger = get_logger(logger_name=logger_name)
53
+ if emit_log:
54
+ logger.error(log_msg)
55
+ reset_logger_handlers(logger)
56
+ job.status = JobStatusTypeV2.FAILED
57
+ job.end_timestamp = get_timestamp()
58
+ job.log = log_msg
59
+ db.merge(job)
60
+ db.commit()
61
+ db.close()
62
+ return
63
+
64
+
65
+ def submit_workflow(
66
+ *,
67
+ workflow_id: int,
68
+ dataset_id: int,
69
+ job_id: int,
70
+ user_id: int,
71
+ user_settings: UserSettings,
72
+ worker_init: Optional[str] = None,
73
+ slurm_user: Optional[str] = None,
74
+ user_cache_dir: Optional[str] = None,
75
+ fractal_ssh: Optional[FractalSSH] = None,
76
+ ) -> None:
77
+ """
78
+ Prepares a workflow and applies it to a dataset
79
+
80
+ This function wraps the process_workflow one, which is different for each
81
+ backend (e.g. local or slurm backend).
82
+
83
+ Args:
84
+ workflow_id:
85
+ ID of the workflow being applied
86
+ dataset_id:
87
+ Dataset ID
88
+ job_id:
89
+ Id of the job record which stores the state for the current
90
+ workflow application.
91
+ user_id:
92
+ User ID.
93
+ worker_init:
94
+ Custom executor parameters that get parsed before the execution of
95
+ each task.
96
+ user_cache_dir:
97
+ Cache directory (namely a path where the user can write); for the
98
+ slurm backend, this is used as a base directory for
99
+ `job.working_dir_user`.
100
+ slurm_user:
101
+ The username to impersonate for the workflow execution, for the
102
+ slurm backend.
103
+ """
104
+ # Declare runner backend and set `process_workflow` function
105
+ settings = Inject(get_settings)
106
+ FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
107
+ logger_name = f"WF{workflow_id}_job{job_id}"
108
+ logger = set_logger(logger_name=logger_name)
109
+
110
+ with next(DB.get_sync_db()) as db_sync:
111
+ try:
112
+ job: Optional[JobV2] = db_sync.get(JobV2, job_id)
113
+ dataset: Optional[DatasetV2] = db_sync.get(DatasetV2, dataset_id)
114
+ workflow: Optional[WorkflowV2] = db_sync.get(
115
+ WorkflowV2, workflow_id
116
+ )
117
+ except Exception as e:
118
+ logger.error(
119
+ f"Error connecting to the database. Original error: {str(e)}"
120
+ )
121
+ reset_logger_handlers(logger)
122
+ return
123
+
124
+ if job is None:
125
+ logger.error(f"JobV2 {job_id} does not exist")
126
+ reset_logger_handlers(logger)
127
+ return
128
+ if dataset is None or workflow is None:
129
+ log_msg = ""
130
+ if not dataset:
131
+ log_msg += f"Cannot fetch dataset {dataset_id} from database\n"
132
+ if not workflow:
133
+ log_msg += (
134
+ f"Cannot fetch workflow {workflow_id} from database\n"
135
+ )
136
+ fail_job(
137
+ db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name
138
+ )
139
+ return
140
+
141
+ # Declare runner backend and set `process_workflow` function
142
+ settings = Inject(get_settings)
143
+ FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
144
+ try:
145
+ process_workflow = _backends[settings.FRACTAL_RUNNER_BACKEND]
146
+ except KeyError as e:
147
+ fail_job(
148
+ db=db_sync,
149
+ job=job,
150
+ log_msg=(
151
+ f"Invalid {FRACTAL_RUNNER_BACKEND=}.\n"
152
+ f"Original KeyError: {str(e)}"
153
+ ),
154
+ logger_name=logger_name,
155
+ emit_log=True,
156
+ )
157
+ return
158
+
159
+ # Define and create server-side working folder
160
+ WORKFLOW_DIR_LOCAL = Path(job.working_dir)
161
+ if WORKFLOW_DIR_LOCAL.exists():
162
+ fail_job(
163
+ db=db_sync,
164
+ job=job,
165
+ log_msg=f"Workflow dir {WORKFLOW_DIR_LOCAL} already exists.",
166
+ logger_name=logger_name,
167
+ emit_log=True,
168
+ )
169
+ return
170
+
171
+ try:
172
+ # Create WORKFLOW_DIR_LOCAL and define WORKFLOW_DIR_REMOTE
173
+ if FRACTAL_RUNNER_BACKEND == "local":
174
+ WORKFLOW_DIR_LOCAL.mkdir(parents=True)
175
+ WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
176
+ elif FRACTAL_RUNNER_BACKEND == "slurm":
177
+ original_umask = os.umask(0)
178
+ WORKFLOW_DIR_LOCAL.mkdir(parents=True, mode=0o755)
179
+ os.umask(original_umask)
180
+ WORKFLOW_DIR_REMOTE = (
181
+ Path(user_cache_dir) / WORKFLOW_DIR_LOCAL.name
182
+ )
183
+ elif FRACTAL_RUNNER_BACKEND == "slurm_ssh":
184
+ WORKFLOW_DIR_LOCAL.mkdir(parents=True)
185
+ WORKFLOW_DIR_REMOTE = (
186
+ Path(user_settings.ssh_jobs_dir) / WORKFLOW_DIR_LOCAL.name
187
+ )
188
+ else:
189
+ raise ValueError(
190
+ "Invalid FRACTAL_RUNNER_BACKEND="
191
+ f"{settings.FRACTAL_RUNNER_BACKEND}."
192
+ )
193
+ except Exception as e:
194
+ error_type = type(e).__name__
195
+ fail_job(
196
+ db=db_sync,
197
+ job=job,
198
+ log_msg=(
199
+ f"{error_type} error occurred while creating job folder "
200
+ f"and subfolders.\nOriginal error: {str(e)}"
201
+ ),
202
+ logger_name=logger_name,
203
+ emit_log=True,
204
+ )
205
+ return
206
+
207
+ # After Session.commit() is called, either explicitly or when using a
208
+ # context manager, all objects associated with the Session are expired.
209
+ # https://docs.sqlalchemy.org/en/14/orm/
210
+ # session_basics.html#opening-and-closing-a-session
211
+ # https://docs.sqlalchemy.org/en/14/orm/
212
+ # session_state_management.html#refreshing-expiring
213
+
214
+ # See issue #928:
215
+ # https://github.com/fractal-analytics-platform/
216
+ # fractal-server/issues/928
217
+
218
+ db_sync.refresh(dataset)
219
+ db_sync.refresh(workflow)
220
+ for wftask in workflow.task_list:
221
+ db_sync.refresh(wftask)
222
+
223
+ # Write logs
224
+ log_file_path = WORKFLOW_DIR_LOCAL / WORKFLOW_LOG_FILENAME
225
+ logger = set_logger(
226
+ logger_name=logger_name,
227
+ log_file_path=log_file_path,
228
+ )
229
+ logger.info(
230
+ f'Start execution of workflow "{workflow.name}"; '
231
+ f"more logs at {str(log_file_path)}"
232
+ )
233
+ logger.debug(f"fractal_server.__VERSION__: {__VERSION__}")
234
+ logger.debug(f"FRACTAL_RUNNER_BACKEND: {FRACTAL_RUNNER_BACKEND}")
235
+ if FRACTAL_RUNNER_BACKEND == "slurm":
236
+ logger.debug(f"slurm_user: {slurm_user}")
237
+ logger.debug(f"slurm_account: {job.slurm_account}")
238
+ logger.debug(f"worker_init: {worker_init}")
239
+ elif FRACTAL_RUNNER_BACKEND == "slurm_ssh":
240
+ logger.debug(f"ssh_user: {user_settings.ssh_username}")
241
+ logger.debug(f"base dir: {user_settings.ssh_tasks_dir}")
242
+ logger.debug(f"worker_init: {worker_init}")
243
+ logger.debug(f"job.id: {job.id}")
244
+ logger.debug(f"job.working_dir: {job.working_dir}")
245
+ logger.debug(f"job.working_dir_user: {job.working_dir_user}")
246
+ logger.debug(f"job.first_task_index: {job.first_task_index}")
247
+ logger.debug(f"job.last_task_index: {job.last_task_index}")
248
+ logger.debug(f'START workflow "{workflow.name}"')
249
+
250
+ try:
251
+ if FRACTAL_RUNNER_BACKEND == "local":
252
+ process_workflow = local_process_workflow
253
+ backend_specific_kwargs = {}
254
+ elif FRACTAL_RUNNER_BACKEND == "slurm":
255
+ process_workflow = slurm_sudo_process_workflow
256
+ backend_specific_kwargs = dict(
257
+ slurm_user=slurm_user,
258
+ slurm_account=job.slurm_account,
259
+ user_cache_dir=user_cache_dir,
260
+ )
261
+ elif FRACTAL_RUNNER_BACKEND == "slurm_ssh":
262
+ process_workflow = slurm_ssh_process_workflow
263
+ backend_specific_kwargs = dict(fractal_ssh=fractal_ssh)
264
+ else:
265
+ raise RuntimeError(
266
+ f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}"
267
+ )
268
+
269
+ # "The Session.close() method does not prevent the Session from being
270
+ # used again. The Session itself does not actually have a distinct
271
+ # “closed” state; it merely means the Session will release all database
272
+ # connections and ORM objects."
273
+ # (https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.Session.close).
274
+ #
275
+ # We close the session before the (possibly long) process_workflow
276
+ # call, to make sure all DB connections are released. The reason why we
277
+ # are not using a context manager within the try block is that we also
278
+ # need access to db_sync in the except branches.
279
+ db_sync = next(DB.get_sync_db())
280
+ db_sync.close()
281
+
282
+ process_workflow(
283
+ workflow=workflow,
284
+ dataset=dataset,
285
+ job_id=job_id,
286
+ user_id=user_id,
287
+ workflow_dir_local=WORKFLOW_DIR_LOCAL,
288
+ workflow_dir_remote=WORKFLOW_DIR_REMOTE,
289
+ logger_name=logger_name,
290
+ worker_init=worker_init,
291
+ first_task_index=job.first_task_index,
292
+ last_task_index=job.last_task_index,
293
+ job_attribute_filters=job.attribute_filters,
294
+ job_type_filters=job.type_filters,
295
+ **backend_specific_kwargs,
296
+ )
297
+
298
+ logger.info(
299
+ f'End execution of workflow "{workflow.name}"; '
300
+ f"more logs at {str(log_file_path)}"
301
+ )
302
+ logger.debug(f'END workflow "{workflow.name}"')
303
+
304
+ # Update job DB entry
305
+ job.status = JobStatusTypeV2.DONE
306
+ job.end_timestamp = get_timestamp()
307
+ with log_file_path.open("r") as f:
308
+ logs = f.read()
309
+ job.log = logs
310
+ db_sync.merge(job)
311
+ db_sync.commit()
312
+
313
+ except TaskExecutionError as e:
314
+ logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
315
+ logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
316
+
317
+ exception_args_string = "\n".join(e.args)
318
+ log_msg = (
319
+ f"TASK ERROR: "
320
+ f"Task name: {e.task_name}, "
321
+ f"position in Workflow: {e.workflow_task_order}\n"
322
+ f"TRACEBACK:\n{exception_args_string}"
323
+ )
324
+ fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name)
325
+
326
+ except JobExecutionError as e:
327
+ logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
328
+ logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
329
+
330
+ fail_job(
331
+ db=db_sync,
332
+ job=job,
333
+ log_msg=(
334
+ f"JOB ERROR in Fractal job {job.id}:\n"
335
+ f"TRACEBACK:\n{e.assemble_error()}"
336
+ ),
337
+ logger_name=logger_name,
338
+ )
339
+
340
+ except Exception:
341
+ logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
342
+ logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
343
+
344
+ current_traceback = traceback.format_exc()
345
+ fail_job(
346
+ db=db_sync,
347
+ job=job,
348
+ log_msg=(
349
+ f"UNKNOWN ERROR in Fractal job {job.id}\n"
350
+ f"TRACEBACK:\n{current_traceback}"
351
+ ),
352
+ logger_name=logger_name,
353
+ )
354
+
355
+ finally:
356
+ reset_logger_handlers(logger)
357
+ db_sync.close()
358
+ _zip_folder_to_file_and_remove(folder=job.working_dir)
@@ -1,11 +1,14 @@
1
1
  from typing import Any
2
+ from typing import Optional
2
3
 
3
4
  from pydantic import BaseModel
4
5
  from pydantic import ConfigDict
5
6
  from pydantic import Field
6
7
  from pydantic import field_validator
8
+ from pydantic import ValidationError
7
9
 
8
10
  from ....images import SingleImageTaskOutput
11
+ from fractal_server.app.runner.exceptions import TaskOutputValidationError
9
12
  from fractal_server.urls import normalize_url
10
13
 
11
14
 
@@ -61,3 +64,31 @@ class InitTaskOutput(BaseModel):
61
64
  model_config = ConfigDict(extra="forbid")
62
65
 
63
66
  parallelization_list: list[InitArgsModel] = Field(default_factory=list)
67
+
68
+
69
+ def _cast_and_validate_TaskOutput(
70
+ task_output: dict[str, Any]
71
+ ) -> Optional[TaskOutput]:
72
+ try:
73
+ validated_task_output = TaskOutput(**task_output)
74
+ return validated_task_output
75
+ except ValidationError as e:
76
+ raise TaskOutputValidationError(
77
+ "Validation of task output failed.\n"
78
+ f"Original error: {str(e)}\n"
79
+ f"Original data: {task_output}."
80
+ )
81
+
82
+
83
+ def _cast_and_validate_InitTaskOutput(
84
+ init_task_output: dict[str, Any],
85
+ ) -> Optional[InitTaskOutput]:
86
+ try:
87
+ validated_init_task_output = InitTaskOutput(**init_task_output)
88
+ return validated_init_task_output
89
+ except ValidationError as e:
90
+ raise TaskOutputValidationError(
91
+ "Validation of init-task output failed.\n"
92
+ f"Original error: {str(e)}\n"
93
+ f"Original data: {init_task_output}."
94
+ )
@@ -1,43 +1,32 @@
1
1
  import os
2
+ from typing import Annotated
2
3
  from typing import Any
3
4
  from typing import Optional
4
5
 
6
+ from pydantic.types import StringConstraints
5
7
 
6
- def valstr(attribute: str, accept_none: bool = False):
7
- """
8
- Check that a string attribute is not an empty string, and remove the
9
- leading and trailing whitespace characters.
10
8
 
11
- If `accept_none`, the validator also accepts `None`.
12
- """
9
+ def cant_set_none(value: Any) -> Any:
10
+ if value is None:
11
+ raise ValueError("Field cannot be set to 'None'.")
12
+ return value
13
13
 
14
- def val(cls, string: Optional[str]) -> Optional[str]:
15
- if string is None:
16
- if accept_none:
17
- return string
18
- else:
19
- raise ValueError(
20
- f"String attribute '{attribute}' cannot be None"
21
- )
22
- s = string.strip()
23
- if not s:
24
- raise ValueError(f"String attribute '{attribute}' cannot be empty")
25
- return s
26
14
 
27
- return val
15
+ NonEmptyString = Annotated[
16
+ str, StringConstraints(min_length=1, strip_whitespace=True)
17
+ ]
28
18
 
29
19
 
30
20
  def valdict_keys(attribute: str):
31
21
  def val(cls, d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]:
32
22
  """
33
- Apply valstr to every key of the dictionary, and fail if there are
34
- identical keys.
23
+ Strip every key of the dictionary, and fail if there are identical keys
35
24
  """
36
25
  if d is not None:
37
26
  old_keys = list(d.keys())
38
- new_keys = [
39
- valstr(f"{attribute}[{key}]")(cls, key) for key in old_keys
40
- ]
27
+ new_keys = [key.strip() for key in old_keys]
28
+ if any(k == "" for k in new_keys):
29
+ raise ValueError(f"Empty string in {new_keys}.")
41
30
  if len(new_keys) != len(set(new_keys)):
42
31
  raise ValueError(
43
32
  f"Dictionary contains multiple identical keys: '{d}'."
@@ -7,8 +7,8 @@ from pydantic import Field
7
7
  from pydantic import field_validator
8
8
  from pydantic import ValidationInfo
9
9
 
10
+ from ._validators import NonEmptyString
10
11
  from ._validators import val_unique_list
11
- from ._validators import valstr
12
12
 
13
13
  __all__ = (
14
14
  "UserRead",
@@ -57,12 +57,12 @@ class UserUpdate(schemas.BaseUserUpdate):
57
57
 
58
58
  model_config = ConfigDict(extra="forbid")
59
59
 
60
- username: Optional[str] = None
60
+ username: Optional[NonEmptyString] = None
61
61
 
62
62
  # Validators
63
- _username = field_validator("username")(classmethod(valstr("username")))
64
63
 
65
64
  @field_validator(
65
+ "username",
66
66
  "is_active",
67
67
  "is_verified",
68
68
  "is_superuser",
@@ -94,11 +94,14 @@ class UserCreate(schemas.BaseUserCreate):
94
94
  username:
95
95
  """
96
96
 
97
- username: Optional[str] = None
97
+ username: Optional[NonEmptyString] = None
98
98
 
99
- # Validators
100
-
101
- _username = field_validator("username")(classmethod(valstr("username")))
99
+ @field_validator("username")
100
+ @classmethod
101
+ def cant_set_none(cls, v, info: ValidationInfo):
102
+ if v is None:
103
+ raise ValueError(f"Cannot set {info.field_name}=None")
104
+ return v
102
105
 
103
106
 
104
107
  class UserUpdateGroups(BaseModel):