fractal-server 1.4.10__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/models/__init__.py +6 -8
  3. fractal_server/app/models/linkuserproject.py +9 -0
  4. fractal_server/app/models/security.py +6 -0
  5. fractal_server/app/models/v1/__init__.py +12 -0
  6. fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
  7. fractal_server/app/models/{job.py → v1/job.py} +5 -5
  8. fractal_server/app/models/{project.py → v1/project.py} +5 -5
  9. fractal_server/app/models/{state.py → v1/state.py} +2 -2
  10. fractal_server/app/models/{task.py → v1/task.py} +7 -2
  11. fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
  12. fractal_server/app/models/v2/__init__.py +22 -0
  13. fractal_server/app/models/v2/collection_state.py +21 -0
  14. fractal_server/app/models/v2/dataset.py +54 -0
  15. fractal_server/app/models/v2/job.py +51 -0
  16. fractal_server/app/models/v2/project.py +30 -0
  17. fractal_server/app/models/v2/task.py +93 -0
  18. fractal_server/app/models/v2/workflow.py +35 -0
  19. fractal_server/app/models/v2/workflowtask.py +49 -0
  20. fractal_server/app/routes/admin/__init__.py +0 -0
  21. fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
  22. fractal_server/app/routes/admin/v2.py +309 -0
  23. fractal_server/app/routes/api/v1/__init__.py +7 -7
  24. fractal_server/app/routes/api/v1/_aux_functions.py +8 -8
  25. fractal_server/app/routes/api/v1/dataset.py +41 -41
  26. fractal_server/app/routes/api/v1/job.py +14 -14
  27. fractal_server/app/routes/api/v1/project.py +27 -25
  28. fractal_server/app/routes/api/v1/task.py +26 -16
  29. fractal_server/app/routes/api/v1/task_collection.py +28 -16
  30. fractal_server/app/routes/api/v1/workflow.py +28 -28
  31. fractal_server/app/routes/api/v1/workflowtask.py +11 -11
  32. fractal_server/app/routes/api/v2/__init__.py +34 -0
  33. fractal_server/app/routes/api/v2/_aux_functions.py +502 -0
  34. fractal_server/app/routes/api/v2/dataset.py +293 -0
  35. fractal_server/app/routes/api/v2/images.py +279 -0
  36. fractal_server/app/routes/api/v2/job.py +200 -0
  37. fractal_server/app/routes/api/v2/project.py +186 -0
  38. fractal_server/app/routes/api/v2/status.py +150 -0
  39. fractal_server/app/routes/api/v2/submit.py +210 -0
  40. fractal_server/app/routes/api/v2/task.py +222 -0
  41. fractal_server/app/routes/api/v2/task_collection.py +239 -0
  42. fractal_server/app/routes/api/v2/task_legacy.py +59 -0
  43. fractal_server/app/routes/api/v2/workflow.py +380 -0
  44. fractal_server/app/routes/api/v2/workflowtask.py +265 -0
  45. fractal_server/app/routes/aux/_job.py +2 -2
  46. fractal_server/app/runner/__init__.py +0 -364
  47. fractal_server/app/runner/async_wrap.py +27 -0
  48. fractal_server/app/runner/components.py +5 -0
  49. fractal_server/app/runner/exceptions.py +129 -0
  50. fractal_server/app/runner/executors/__init__.py +0 -0
  51. fractal_server/app/runner/executors/slurm/__init__.py +3 -0
  52. fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
  53. fractal_server/app/runner/{_slurm → executors/slurm}/_check_jobs_status.py +1 -1
  54. fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +1 -1
  55. fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
  56. fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +1 -1
  57. fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +32 -21
  58. fractal_server/app/runner/filenames.py +6 -0
  59. fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
  60. fractal_server/app/runner/task_files.py +103 -0
  61. fractal_server/app/runner/v1/__init__.py +366 -0
  62. fractal_server/app/runner/{_common.py → v1/_common.py} +14 -121
  63. fractal_server/app/runner/{_local → v1/_local}/__init__.py +5 -4
  64. fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
  65. fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
  66. fractal_server/app/runner/v1/_slurm/__init__.py +312 -0
  67. fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +5 -11
  68. fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
  69. fractal_server/app/runner/v1/common.py +117 -0
  70. fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
  71. fractal_server/app/runner/v2/__init__.py +336 -0
  72. fractal_server/app/runner/v2/_local/__init__.py +162 -0
  73. fractal_server/app/runner/v2/_local/_local_config.py +118 -0
  74. fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
  75. fractal_server/app/runner/v2/_local/executor.py +100 -0
  76. fractal_server/app/runner/{_slurm → v2/_slurm}/__init__.py +38 -47
  77. fractal_server/app/runner/v2/_slurm/_submit_setup.py +82 -0
  78. fractal_server/app/runner/v2/_slurm/get_slurm_config.py +182 -0
  79. fractal_server/app/runner/v2/deduplicate_list.py +23 -0
  80. fractal_server/app/runner/v2/handle_failed_job.py +165 -0
  81. fractal_server/app/runner/v2/merge_outputs.py +38 -0
  82. fractal_server/app/runner/v2/runner.py +343 -0
  83. fractal_server/app/runner/v2/runner_functions.py +374 -0
  84. fractal_server/app/runner/v2/runner_functions_low_level.py +130 -0
  85. fractal_server/app/runner/v2/task_interface.py +62 -0
  86. fractal_server/app/runner/v2/v1_compat.py +31 -0
  87. fractal_server/app/schemas/__init__.py +1 -42
  88. fractal_server/app/schemas/_validators.py +28 -5
  89. fractal_server/app/schemas/v1/__init__.py +36 -0
  90. fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
  91. fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
  92. fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
  93. fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
  94. fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
  95. fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
  96. fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
  97. fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
  98. fractal_server/app/schemas/v2/__init__.py +37 -0
  99. fractal_server/app/schemas/v2/dataset.py +126 -0
  100. fractal_server/app/schemas/v2/dumps.py +87 -0
  101. fractal_server/app/schemas/v2/job.py +114 -0
  102. fractal_server/app/schemas/v2/manifest.py +159 -0
  103. fractal_server/app/schemas/v2/project.py +34 -0
  104. fractal_server/app/schemas/v2/status.py +16 -0
  105. fractal_server/app/schemas/v2/task.py +151 -0
  106. fractal_server/app/schemas/v2/task_collection.py +109 -0
  107. fractal_server/app/schemas/v2/workflow.py +79 -0
  108. fractal_server/app/schemas/v2/workflowtask.py +208 -0
  109. fractal_server/config.py +5 -4
  110. fractal_server/images/__init__.py +4 -0
  111. fractal_server/images/models.py +136 -0
  112. fractal_server/images/tools.py +84 -0
  113. fractal_server/main.py +11 -3
  114. fractal_server/migrations/env.py +0 -2
  115. fractal_server/migrations/versions/5bf02391cfef_v2.py +245 -0
  116. fractal_server/tasks/__init__.py +0 -5
  117. fractal_server/tasks/endpoint_operations.py +13 -19
  118. fractal_server/tasks/utils.py +35 -0
  119. fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
  120. fractal_server/tasks/v1/__init__.py +0 -0
  121. fractal_server/tasks/{background_operations.py → v1/background_operations.py} +20 -52
  122. fractal_server/tasks/v1/get_collection_data.py +14 -0
  123. fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
  124. fractal_server/tasks/v2/__init__.py +0 -0
  125. fractal_server/tasks/v2/background_operations.py +381 -0
  126. fractal_server/tasks/v2/get_collection_data.py +14 -0
  127. fractal_server/urls.py +13 -0
  128. {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/METADATA +10 -10
  129. fractal_server-2.0.0.dist-info/RECORD +169 -0
  130. fractal_server/app/runner/_slurm/.gitignore +0 -2
  131. fractal_server/app/runner/common.py +0 -311
  132. fractal_server/app/schemas/json_schemas/manifest.json +0 -81
  133. fractal_server-1.4.10.dist-info/RECORD +0 -98
  134. /fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
  135. /fractal_server/app/runner/{_local → v1/_local}/executor.py +0 -0
  136. {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/LICENSE +0 -0
  137. {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/WHEEL +0 -0
  138. {fractal_server-1.4.10.dist-info → fractal_server-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -3,8 +3,8 @@ from pathlib import Path
3
3
  from zipfile import ZIP_DEFLATED
4
4
  from zipfile import ZipFile
5
5
 
6
- from ...models import ApplyWorkflow
7
- from ...runner._common import SHUTDOWN_FILENAME
6
+ from ...models.v1 import ApplyWorkflow
7
+ from ...runner.filenames import SHUTDOWN_FILENAME
8
8
 
9
9
 
10
10
  def _write_shutdown_file(*, job: ApplyWorkflow):
@@ -1,364 +0,0 @@
1
- # Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
2
- # University of Zurich
3
- #
4
- # Original authors:
5
- # Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
6
- # Tommaso Comparin <tommaso.comparin@exact-lab.it>
7
- # Marco Franzon <marco.franzon@exact-lab.it>
8
- #
9
- # This file is part of Fractal and was originally developed by eXact lab S.r.l.
10
- # <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
11
- # Institute for Biomedical Research and Pelkmans Lab from the University of
12
- # Zurich.
13
- """
14
- Runner backend subsystem root
15
-
16
- This module is the single entry point to the runner backend subsystem. Other
17
- subystems should only import this module and not its submodules or the
18
- individual backends.
19
- """
20
- import os
21
- import traceback
22
- from pathlib import Path
23
- from typing import Optional
24
-
25
- from ... import __VERSION__
26
- from ...config import get_settings
27
- from ...logger import set_logger
28
- from ...syringe import Inject
29
- from ...utils import get_timestamp
30
- from ..db import DB
31
- from ..models import ApplyWorkflow
32
- from ..models import Dataset
33
- from ..models import Workflow
34
- from ..models import WorkflowTask
35
- from ..schemas import JobStatusType
36
- from ._common import WORKFLOW_LOG_FILENAME
37
- from ._local import process_workflow as local_process_workflow
38
- from ._slurm import process_workflow as slurm_process_workflow
39
- from .common import close_job_logger
40
- from .common import JobExecutionError
41
- from .common import TaskExecutionError
42
- from .common import validate_workflow_compatibility # noqa: F401
43
- from .handle_failed_job import assemble_history_failed_job
44
- from .handle_failed_job import assemble_meta_failed_job
45
-
46
-
47
- _backends = {}
48
- _backends["local"] = local_process_workflow
49
- _backends["slurm"] = slurm_process_workflow
50
-
51
-
52
- async def submit_workflow(
53
- *,
54
- workflow_id: int,
55
- input_dataset_id: int,
56
- output_dataset_id: int,
57
- job_id: int,
58
- worker_init: Optional[str] = None,
59
- slurm_user: Optional[str] = None,
60
- user_cache_dir: Optional[str] = None,
61
- ) -> None:
62
- """
63
- Prepares a workflow and applies it to a dataset
64
-
65
- This function wraps the process_workflow one, which is different for each
66
- backend (e.g. local or slurm backend).
67
-
68
- Args:
69
- workflow_id:
70
- ID of the workflow being applied
71
- input_dataset_id:
72
- Input dataset ID
73
- output_dataset_id:
74
- ID of the destination dataset of the workflow.
75
- job_id:
76
- Id of the job record which stores the state for the current
77
- workflow application.
78
- worker_init:
79
- Custom executor parameters that get parsed before the execution of
80
- each task.
81
- user_cache_dir:
82
- Cache directory (namely a path where the user can write); for the
83
- slurm backend, this is used as a base directory for
84
- `job.working_dir_user`.
85
- slurm_user:
86
- The username to impersonate for the workflow execution, for the
87
- slurm backend.
88
- """
89
-
90
- # Declare runner backend and set `process_workflow` function
91
- settings = Inject(get_settings)
92
- FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
93
- if FRACTAL_RUNNER_BACKEND == "local":
94
- process_workflow = local_process_workflow
95
- elif FRACTAL_RUNNER_BACKEND == "slurm":
96
- process_workflow = slurm_process_workflow
97
- else:
98
- raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
99
-
100
- with next(DB.get_sync_db()) as db_sync:
101
-
102
- job: ApplyWorkflow = db_sync.get(ApplyWorkflow, job_id)
103
- if not job:
104
- raise ValueError(f"Cannot fetch job {job_id} from database")
105
-
106
- input_dataset: Dataset = db_sync.get(Dataset, input_dataset_id)
107
- output_dataset: Dataset = db_sync.get(Dataset, output_dataset_id)
108
- workflow: Workflow = db_sync.get(Workflow, workflow_id)
109
- if not (input_dataset and output_dataset and workflow):
110
- log_msg = ""
111
- if not input_dataset:
112
- log_msg += (
113
- f"Cannot fetch input_dataset {input_dataset_id} "
114
- "from database\n"
115
- )
116
- if not output_dataset:
117
- log_msg += (
118
- f"Cannot fetch output_dataset {output_dataset_id} "
119
- "from database\n"
120
- )
121
- if not workflow:
122
- log_msg += (
123
- f"Cannot fetch workflow {workflow_id} from database\n"
124
- )
125
- job.status = JobStatusType.FAILED
126
- job.end_timestamp = get_timestamp()
127
- job.log = log_msg
128
- db_sync.merge(job)
129
- db_sync.commit()
130
- db_sync.close()
131
- return
132
-
133
- # Prepare some of process_workflow arguments
134
- input_paths = input_dataset.paths
135
- output_path = output_dataset.paths[0]
136
-
137
- # Define and create server-side working folder
138
- project_id = workflow.project_id
139
- timestamp_string = get_timestamp().strftime("%Y%m%d_%H%M%S")
140
- WORKFLOW_DIR = (
141
- settings.FRACTAL_RUNNER_WORKING_BASE_DIR
142
- / (
143
- f"proj_{project_id:07d}_wf_{workflow_id:07d}_job_{job_id:07d}"
144
- f"_{timestamp_string}"
145
- )
146
- ).resolve()
147
-
148
- if WORKFLOW_DIR.exists():
149
- raise RuntimeError(f"Workflow dir {WORKFLOW_DIR} already exists.")
150
-
151
- # Create WORKFLOW_DIR with 755 permissions
152
- original_umask = os.umask(0)
153
- WORKFLOW_DIR.mkdir(parents=True, mode=0o755)
154
- os.umask(original_umask)
155
-
156
- # Define and create user-side working folder, if needed
157
- if FRACTAL_RUNNER_BACKEND == "local":
158
- WORKFLOW_DIR_USER = WORKFLOW_DIR
159
- elif FRACTAL_RUNNER_BACKEND == "slurm":
160
-
161
- from ._slurm._subprocess_run_as_user import _mkdir_as_user
162
-
163
- WORKFLOW_DIR_USER = (
164
- Path(user_cache_dir) / f"{WORKFLOW_DIR.name}"
165
- ).resolve()
166
- _mkdir_as_user(folder=str(WORKFLOW_DIR_USER), user=slurm_user)
167
- else:
168
- raise ValueError(f"{FRACTAL_RUNNER_BACKEND=} not supported")
169
-
170
- # Update db
171
- job.working_dir = WORKFLOW_DIR.as_posix()
172
- job.working_dir_user = WORKFLOW_DIR_USER.as_posix()
173
- db_sync.merge(job)
174
- db_sync.commit()
175
-
176
- # After Session.commit() is called, either explicitly or when using a
177
- # context manager, all objects associated with the Session are expired.
178
- # https://docs.sqlalchemy.org/en/14/orm/
179
- # session_basics.html#opening-and-closing-a-session
180
- # https://docs.sqlalchemy.org/en/14/orm/
181
- # session_state_management.html#refreshing-expiring
182
-
183
- # See issue #928:
184
- # https://github.com/fractal-analytics-platform/
185
- # fractal-server/issues/928
186
-
187
- db_sync.refresh(input_dataset)
188
- db_sync.refresh(output_dataset)
189
- db_sync.refresh(workflow)
190
-
191
- # Write logs
192
- logger_name = f"WF{workflow_id}_job{job_id}"
193
- log_file_path = WORKFLOW_DIR / WORKFLOW_LOG_FILENAME
194
- logger = set_logger(
195
- logger_name=logger_name,
196
- log_file_path=log_file_path,
197
- )
198
- logger.info(
199
- f'Start execution of workflow "{workflow.name}"; '
200
- f"more logs at {str(log_file_path)}"
201
- )
202
- logger.debug(f"fractal_server.__VERSION__: {__VERSION__}")
203
- logger.debug(f"FRACTAL_RUNNER_BACKEND: {FRACTAL_RUNNER_BACKEND}")
204
- logger.debug(f"slurm_user: {slurm_user}")
205
- logger.debug(f"slurm_account: {job.slurm_account}")
206
- logger.debug(f"worker_init: {worker_init}")
207
- logger.debug(f"input metadata keys: {list(input_dataset.meta.keys())}")
208
- logger.debug(f"input_paths: {input_paths}")
209
- logger.debug(f"output_path: {output_path}")
210
- logger.debug(f"job.id: {job.id}")
211
- logger.debug(f"job.working_dir: {job.working_dir}")
212
- logger.debug(f"job.working_dir_user: {job.working_dir_user}")
213
- logger.debug(f"job.first_task_index: {job.first_task_index}")
214
- logger.debug(f"job.last_task_index: {job.last_task_index}")
215
- logger.debug(f'START workflow "{workflow.name}"')
216
-
217
- try:
218
- # "The Session.close() method does not prevent the Session from being
219
- # used again. The Session itself does not actually have a distinct
220
- # “closed” state; it merely means the Session will release all database
221
- # connections and ORM objects."
222
- # (https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.Session.close).
223
- #
224
- # We close the session before the (possibly long) process_workflow
225
- # call, to make sure all DB connections are released. The reason why we
226
- # are not using a context manager within the try block is that we also
227
- # need access to db_sync in the except branches.
228
- db_sync = next(DB.get_sync_db())
229
- db_sync.close()
230
-
231
- output_dataset_meta_hist = await process_workflow(
232
- workflow=workflow,
233
- input_paths=input_paths,
234
- output_path=output_path,
235
- input_metadata=input_dataset.meta,
236
- input_history=input_dataset.history,
237
- slurm_user=slurm_user,
238
- slurm_account=job.slurm_account,
239
- user_cache_dir=user_cache_dir,
240
- workflow_dir=WORKFLOW_DIR,
241
- workflow_dir_user=WORKFLOW_DIR_USER,
242
- logger_name=logger_name,
243
- worker_init=worker_init,
244
- first_task_index=job.first_task_index,
245
- last_task_index=job.last_task_index,
246
- )
247
-
248
- logger.info(
249
- f'End execution of workflow "{workflow.name}"; '
250
- f"more logs at {str(log_file_path)}"
251
- )
252
- logger.debug(f'END workflow "{workflow.name}"')
253
-
254
- # Replace output_dataset.meta and output_dataset.history with their
255
- # up-to-date versions, obtained within process_workflow
256
- output_dataset.history = output_dataset_meta_hist.pop("history")
257
- output_dataset.meta = output_dataset_meta_hist.pop("metadata")
258
-
259
- db_sync.merge(output_dataset)
260
-
261
- # Update job DB entry
262
- job.status = JobStatusType.DONE
263
- job.end_timestamp = get_timestamp()
264
- with log_file_path.open("r") as f:
265
- logs = f.read()
266
- job.log = logs
267
- db_sync.merge(job)
268
- close_job_logger(logger)
269
- db_sync.commit()
270
-
271
- except TaskExecutionError as e:
272
-
273
- logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
274
- logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
275
-
276
- # Assemble output_dataset.meta based on the last successful task, i.e.
277
- # based on METADATA_FILENAME
278
- output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
279
-
280
- # Assemble new history and assign it to output_dataset.meta
281
- failed_wftask = db_sync.get(WorkflowTask, e.workflow_task_id)
282
- output_dataset.history = assemble_history_failed_job(
283
- job,
284
- output_dataset,
285
- workflow,
286
- logger,
287
- failed_wftask=failed_wftask,
288
- )
289
-
290
- db_sync.merge(output_dataset)
291
-
292
- job.status = JobStatusType.FAILED
293
- job.end_timestamp = get_timestamp()
294
-
295
- exception_args_string = "\n".join(e.args)
296
- job.log = (
297
- f"TASK ERROR: "
298
- f"Task name: {e.task_name}, "
299
- f"position in Workflow: {e.workflow_task_order}\n"
300
- f"TRACEBACK:\n{exception_args_string}"
301
- )
302
- db_sync.merge(job)
303
- close_job_logger(logger)
304
- db_sync.commit()
305
-
306
- except JobExecutionError as e:
307
-
308
- logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
309
- logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
310
-
311
- # Assemble output_dataset.meta based on the last successful task, i.e.
312
- # based on METADATA_FILENAME
313
- output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
314
-
315
- # Assemble new history and assign it to output_dataset.meta
316
- output_dataset.history = assemble_history_failed_job(
317
- job,
318
- output_dataset,
319
- workflow,
320
- logger,
321
- )
322
-
323
- db_sync.merge(output_dataset)
324
-
325
- job.status = JobStatusType.FAILED
326
- job.end_timestamp = get_timestamp()
327
- error = e.assemble_error()
328
- job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
329
- db_sync.merge(job)
330
- close_job_logger(logger)
331
- db_sync.commit()
332
-
333
- except Exception:
334
-
335
- logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
336
- logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
337
-
338
- current_traceback = traceback.format_exc()
339
-
340
- # Assemble output_dataset.meta based on the last successful task, i.e.
341
- # based on METADATA_FILENAME
342
- output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
343
-
344
- # Assemble new history and assign it to output_dataset.meta
345
- output_dataset.history = assemble_history_failed_job(
346
- job,
347
- output_dataset,
348
- workflow,
349
- logger,
350
- )
351
-
352
- db_sync.merge(output_dataset)
353
-
354
- job.status = JobStatusType.FAILED
355
- job.end_timestamp = get_timestamp()
356
- job.log = (
357
- f"UNKNOWN ERROR in Fractal job {job.id}\n"
358
- f"TRACEBACK:\n{current_traceback}"
359
- )
360
- db_sync.merge(job)
361
- close_job_logger(logger)
362
- db_sync.commit()
363
- finally:
364
- db_sync.close()
@@ -0,0 +1,27 @@
1
+ import asyncio
2
+ from functools import partial
3
+ from functools import wraps
4
+ from typing import Callable
5
+
6
+
7
+ def async_wrap(func: Callable) -> Callable:
8
+ """
9
+ Wrap a synchronous callable in an async task
10
+
11
+ Ref: [issue #140](https://github.com/fractal-analytics-platform/fractal-server/issues/140)
12
+ and [this StackOverflow answer](https://stackoverflow.com/q/43241221/19085332).
13
+
14
+ Returns:
15
+ async_wrapper:
16
+ A factory that allows wrapping a blocking callable within a
17
+ coroutine.
18
+ """ # noqa: E501
19
+
20
+ @wraps(func)
21
+ async def async_wrapper(*args, loop=None, executor=None, **kwargs):
22
+ if loop is None:
23
+ loop = asyncio.get_event_loop()
24
+ pfunc = partial(func, *args, **kwargs)
25
+ return await loop.run_in_executor(executor, pfunc)
26
+
27
+ return async_wrapper
@@ -0,0 +1,5 @@
1
+ def _index_to_component(ind: int) -> str:
2
+ return f"{ind:07d}"
3
+
4
+
5
+ _COMPONENT_KEY_ = "__FRACTAL_PARALLEL_COMPONENT__"
@@ -0,0 +1,129 @@
1
+ import os
2
+ from typing import Optional
3
+
4
+
5
+ class TaskExecutionError(RuntimeError):
6
+ """
7
+ Forwards errors occurred during the execution of a task
8
+
9
+ This error wraps and forwards errors occurred during the execution of
10
+ tasks, when the exit code is larger than 0 (i.e. the error took place
11
+ within the task). This error also adds information that is useful to track
12
+ down and debug the failing task within a workflow.
13
+
14
+ Attributes:
15
+ workflow_task_id:
16
+ ID of the workflow task that failed.
17
+ workflow_task_order:
18
+ Order of the task within the workflow.
19
+ task_name:
20
+ Human readable name of the failing task.
21
+ """
22
+
23
+ workflow_task_id: Optional[int] = None
24
+ workflow_task_order: Optional[int] = None
25
+ task_name: Optional[str] = None
26
+
27
+ def __init__(
28
+ self,
29
+ *args,
30
+ workflow_task_id: Optional[int] = None,
31
+ workflow_task_order: Optional[int] = None,
32
+ task_name: Optional[str] = None,
33
+ ):
34
+ super().__init__(*args)
35
+ self.workflow_task_id = workflow_task_id
36
+ self.workflow_task_order = workflow_task_order
37
+ self.task_name = task_name
38
+
39
+
40
+ class JobExecutionError(RuntimeError):
41
+ """
42
+ Forwards errors in the execution of a task that are due to external factors
43
+
44
+ This error wraps and forwards errors occurred during the execution of
45
+ tasks, but related to external factors like:
46
+
47
+ 1. A negative exit code (e.g. because the task received a TERM or KILL
48
+ signal);
49
+ 2. An error on the executor side (e.g. the SLURM executor could not
50
+ find the pickled file with task output).
51
+
52
+ This error also adds information that is useful to track down and debug the
53
+ failing task within a workflow.
54
+
55
+ Attributes:
56
+ info:
57
+ A free field for additional information
58
+ cmd_file:
59
+ Path to the file of the command that was executed (e.g. a SLURM
60
+ submission script).
61
+ stdout_file:
62
+ Path to the file with the command stdout
63
+ stderr_file:
64
+ Path to the file with the command stderr
65
+ """
66
+
67
+ cmd_file: Optional[str] = None
68
+ stdout_file: Optional[str] = None
69
+ stderr_file: Optional[str] = None
70
+ info: Optional[str] = None
71
+
72
+ def __init__(
73
+ self,
74
+ *args,
75
+ cmd_file: Optional[str] = None,
76
+ stdout_file: Optional[str] = None,
77
+ stderr_file: Optional[str] = None,
78
+ info: Optional[str] = None,
79
+ ):
80
+ super().__init__(*args)
81
+ self.cmd_file = cmd_file
82
+ self.stdout_file = stdout_file
83
+ self.stderr_file = stderr_file
84
+ self.info = info
85
+
86
+ def _read_file(self, filepath: str) -> str:
87
+ """
88
+ Return the content of a text file, and handle the cases where it is
89
+ empty or missing
90
+ """
91
+ if os.path.exists(filepath):
92
+ with open(filepath, "r") as f:
93
+ content = f.read()
94
+ if content:
95
+ return f"Content of {filepath}:\n{content}"
96
+ else:
97
+ return f"File {filepath} is empty\n"
98
+ else:
99
+ return f"File {filepath} is missing\n"
100
+
101
+ def assemble_error(self) -> str:
102
+ """
103
+ Read the files that are specified in attributes, and combine them in an
104
+ error message.
105
+ """
106
+ if self.cmd_file:
107
+ content = self._read_file(self.cmd_file)
108
+ cmd_content = f"COMMAND:\n{content}\n\n"
109
+ else:
110
+ cmd_content = ""
111
+ if self.stdout_file:
112
+ content = self._read_file(self.stdout_file)
113
+ out_content = f"STDOUT:\n{content}\n\n"
114
+ else:
115
+ out_content = ""
116
+ if self.stderr_file:
117
+ content = self._read_file(self.stderr_file)
118
+ err_content = f"STDERR:\n{content}\n\n"
119
+ else:
120
+ err_content = ""
121
+
122
+ content = f"{cmd_content}{out_content}{err_content}"
123
+ if self.info:
124
+ content = f"{content}ADDITIONAL INFO:\n{self.info}\n\n"
125
+
126
+ if not content:
127
+ content = str(self)
128
+ message = f"JobExecutionError\n\n{content}"
129
+ return message
File without changes
@@ -0,0 +1,3 @@
1
+ from .executor import SlurmExecutor
2
+
3
+ __all__ = ["SlurmExecutor"]
@@ -14,7 +14,7 @@ Submodule to determine the number of total/parallel tasks per SLURM job.
14
14
  import math
15
15
  from typing import Optional
16
16
 
17
- from ....logger import set_logger
17
+ from .....logger import set_logger
18
18
 
19
19
  logger = set_logger(__name__)
20
20
 
@@ -2,7 +2,7 @@ from subprocess import run # nosec
2
2
 
3
3
  from cfut.slurm import STATES_FINISHED
4
4
 
5
- from ....logger import set_logger
5
+ from .....logger import set_logger
6
6
 
7
7
 
8
8
  logger = set_logger(__name__)
@@ -7,7 +7,7 @@ from typing import Optional
7
7
 
8
8
  from cfut import FileWaitThread
9
9
 
10
- from ....logger import set_logger
10
+ from .....logger import set_logger
11
11
  from ._check_jobs_status import _jobs_finished
12
12
 
13
13
  logger = set_logger(__name__)