fractal-server 2.1.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/db/__init__.py +1 -1
  3. fractal_server/app/routes/admin/v1.py +2 -4
  4. fractal_server/app/routes/admin/v2.py +2 -4
  5. fractal_server/app/routes/api/v1/_aux_functions.py +24 -0
  6. fractal_server/app/routes/api/v1/job.py +3 -4
  7. fractal_server/app/routes/api/v1/project.py +28 -18
  8. fractal_server/app/routes/api/v2/_aux_functions.py +35 -12
  9. fractal_server/app/routes/api/v2/job.py +3 -4
  10. fractal_server/app/routes/api/v2/project.py +21 -0
  11. fractal_server/app/routes/api/v2/submit.py +33 -7
  12. fractal_server/app/routes/aux/_job.py +3 -1
  13. fractal_server/app/routes/aux/_runner.py +3 -3
  14. fractal_server/app/runner/executors/slurm/executor.py +157 -68
  15. fractal_server/app/runner/shutdown.py +88 -0
  16. fractal_server/app/runner/task_files.py +59 -27
  17. fractal_server/app/runner/v1/__init__.py +110 -56
  18. fractal_server/app/runner/v1/_common.py +53 -51
  19. fractal_server/app/runner/v1/_local/__init__.py +12 -11
  20. fractal_server/app/runner/v1/_local/_submit_setup.py +4 -4
  21. fractal_server/app/runner/v1/_slurm/__init__.py +16 -16
  22. fractal_server/app/runner/v1/_slurm/_submit_setup.py +11 -10
  23. fractal_server/app/runner/v1/_slurm/get_slurm_config.py +6 -6
  24. fractal_server/app/runner/v2/__init__.py +139 -60
  25. fractal_server/app/runner/v2/_local/__init__.py +12 -11
  26. fractal_server/app/runner/v2/_local/_local_config.py +1 -1
  27. fractal_server/app/runner/v2/_local/_submit_setup.py +4 -4
  28. fractal_server/app/runner/v2/_local_experimental/__init__.py +155 -0
  29. fractal_server/app/runner/v2/_local_experimental/_local_config.py +108 -0
  30. fractal_server/app/runner/v2/_local_experimental/_submit_setup.py +42 -0
  31. fractal_server/app/runner/v2/_local_experimental/executor.py +156 -0
  32. fractal_server/app/runner/v2/_slurm/__init__.py +10 -10
  33. fractal_server/app/runner/v2/_slurm/_submit_setup.py +11 -10
  34. fractal_server/app/runner/v2/_slurm/get_slurm_config.py +6 -6
  35. fractal_server/app/runner/v2/runner.py +17 -15
  36. fractal_server/app/runner/v2/runner_functions.py +38 -38
  37. fractal_server/app/runner/v2/runner_functions_low_level.py +12 -6
  38. fractal_server/config.py +52 -19
  39. fractal_server/gunicorn_fractal.py +40 -0
  40. fractal_server/{logger/__init__.py → logger.py} +2 -2
  41. fractal_server/main.py +24 -1
  42. fractal_server/migrations/env.py +1 -1
  43. {fractal_server-2.1.0.dist-info → fractal_server-2.2.0.dist-info}/METADATA +4 -1
  44. {fractal_server-2.1.0.dist-info → fractal_server-2.2.0.dist-info}/RECORD +47 -42
  45. fractal_server/logger/gunicorn_logger.py +0 -19
  46. {fractal_server-2.1.0.dist-info → fractal_server-2.2.0.dist-info}/LICENSE +0 -0
  47. {fractal_server-2.1.0.dist-info → fractal_server-2.2.0.dist-info}/WHEEL +0 -0
  48. {fractal_server-2.1.0.dist-info → fractal_server-2.2.0.dist-info}/entry_points.txt +0 -0
@@ -1,32 +1,55 @@
1
1
  from pathlib import Path
2
2
  from typing import Optional
3
+ from typing import Union
4
+
5
+ from fractal_server.tasks.utils import slugify_task_name
3
6
 
4
7
 
5
8
  def sanitize_component(value: str) -> str:
6
9
  """
7
10
  Remove {" ", "/", "."} form a string, e.g. going from
8
11
  'plate.zarr/B/03/0' to 'plate_zarr_B_03_0'.
12
+
13
+ Args:
14
+ value: Input strig
9
15
  """
10
16
  return value.replace(" ", "_").replace("/", "_").replace(".", "_")
11
17
 
12
18
 
19
+ def task_subfolder_name(order: Union[int, str], task_name: str) -> str:
20
+ """
21
+ Get name of task-specific subfolder.
22
+
23
+ Args:
24
+ order:
25
+ task_name:
26
+ """
27
+ task_name_slug = slugify_task_name(task_name)
28
+ return f"{order}_{task_name_slug}"
29
+
30
+
13
31
  class TaskFiles:
14
32
  """
15
33
  Group all file paths pertaining to a task
16
34
 
17
35
  Attributes:
18
- workflow_dir:
36
+ workflow_dir_local:
19
37
  Server-owned directory to store all task-execution-related relevant
20
- files (inputs, outputs, errors, and all meta files related to the
21
- job execution). Note: users cannot write directly to this folder.
22
- workflow_dir_user:
23
- User-side directory with the same scope as `workflow_dir`, and
24
- where a user can write.
38
+ files. Note: users cannot write directly to this folder.
39
+ workflow_dir_remote:
40
+ User-side directory with the same scope as `workflow_dir_local`,
41
+ and where a user can write.
42
+ subfolder_name:
43
+ Name of task-specific subfolder
44
+ remote_subfolder:
45
+ Path to user-side task-specific subfolder
46
+ task_name:
47
+ Name of the task
25
48
  task_order:
26
49
  Positional order of the task within a workflow.
27
50
  component:
28
- Specific component to run the task for (relevant for tasks that
29
- will be executed in parallel over many components).
51
+ Specific component to run the task for (relevant for tasks to be
52
+ executed in parallel over many components).
30
53
  file_prefix:
31
54
  Prefix for all task-related files.
32
55
  args:
@@ -39,12 +62,16 @@ class TaskFiles:
39
62
  Path for task-execution stderr.
40
63
  """
41
64
 
42
- workflow_dir: Path
43
- workflow_dir_user: Path
65
+ workflow_dir_local: Path
66
+ workflow_dir_remote: Path
67
+ remote_subfolder: Path
68
+ subfolder_name: str
69
+ task_name: str
44
70
  task_order: Optional[int] = None
45
71
  component: Optional[str] = None
46
72
 
47
73
  file_prefix: str
74
+ file_prefix_with_subfolder: str
48
75
  args: Path
49
76
  out: Path
50
77
  err: Path
@@ -53,14 +80,16 @@ class TaskFiles:
53
80
 
54
81
  def __init__(
55
82
  self,
56
- workflow_dir: Path,
57
- workflow_dir_user: Path,
83
+ workflow_dir_local: Path,
84
+ workflow_dir_remote: Path,
85
+ task_name: str,
58
86
  task_order: Optional[int] = None,
59
87
  component: Optional[str] = None,
60
88
  ):
61
- self.workflow_dir = workflow_dir
62
- self.workflow_dir_user = workflow_dir_user
89
+ self.workflow_dir_local = workflow_dir_local
90
+ self.workflow_dir_remote = workflow_dir_remote
63
91
  self.task_order = task_order
92
+ self.task_name = task_name
64
93
  self.component = component
65
94
 
66
95
  if self.component is not None:
@@ -72,32 +101,35 @@ class TaskFiles:
72
101
  if self.task_order is not None:
73
102
  order = str(self.task_order)
74
103
  else:
75
- order = "task"
104
+ order = "0"
76
105
  self.file_prefix = f"{order}{component_safe}"
77
- self.args = self.workflow_dir_user / f"{self.file_prefix}.args.json"
78
- self.out = self.workflow_dir_user / f"{self.file_prefix}.out"
79
- self.err = self.workflow_dir_user / f"{self.file_prefix}.err"
80
- self.log = self.workflow_dir_user / f"{self.file_prefix}.log"
106
+ self.subfolder_name = task_subfolder_name(
107
+ order=order, task_name=self.task_name
108
+ )
109
+ self.remote_subfolder = self.workflow_dir_remote / self.subfolder_name
110
+ self.args = self.remote_subfolder / f"{self.file_prefix}.args.json"
111
+ self.out = self.remote_subfolder / f"{self.file_prefix}.out"
112
+ self.err = self.remote_subfolder / f"{self.file_prefix}.err"
113
+ self.log = self.remote_subfolder / f"{self.file_prefix}.log"
81
114
  self.metadiff = (
82
- self.workflow_dir_user / f"{self.file_prefix}.metadiff.json"
115
+ self.remote_subfolder / f"{self.file_prefix}.metadiff.json"
83
116
  )
84
117
 
85
118
 
86
119
  def get_task_file_paths(
87
- workflow_dir: Path,
88
- workflow_dir_user: Path,
120
+ workflow_dir_local: Path,
121
+ workflow_dir_remote: Path,
122
+ task_name: str,
89
123
  task_order: Optional[int] = None,
90
124
  component: Optional[str] = None,
91
125
  ) -> TaskFiles:
92
126
  """
93
127
  Return the corrisponding TaskFiles object
94
-
95
- This function is mainly used as a cache to avoid instantiating needless
96
- objects.
97
128
  """
98
129
  return TaskFiles(
99
- workflow_dir=workflow_dir,
100
- workflow_dir_user=workflow_dir_user,
130
+ workflow_dir_local=workflow_dir_local,
131
+ workflow_dir_remote=workflow_dir_remote,
132
+ task_name=task_name,
101
133
  task_order=task_order,
102
134
  component=component,
103
135
  )
@@ -22,6 +22,10 @@ import traceback
22
22
  from pathlib import Path
23
23
  from typing import Optional
24
24
 
25
+ from sqlalchemy.orm import Session as DBSyncSession
26
+
27
+ from ....logger import get_logger
28
+ from ....logger import reset_logger_handlers
25
29
  from ....logger import set_logger
26
30
  from ....syringe import Inject
27
31
  from ....utils import get_timestamp
@@ -33,7 +37,11 @@ from ...models.v1 import WorkflowTask
33
37
  from ...schemas.v1 import JobStatusTypeV1
34
38
  from ..exceptions import JobExecutionError
35
39
  from ..exceptions import TaskExecutionError
40
+ from ..executors.slurm._subprocess_run_as_user import (
41
+ _mkdir_as_user,
42
+ )
36
43
  from ..filenames import WORKFLOW_LOG_FILENAME
44
+ from ..task_files import task_subfolder_name
37
45
  from ._local import process_workflow as local_process_workflow
38
46
  from ._slurm import process_workflow as slurm_process_workflow
39
47
  from .common import close_job_logger
@@ -49,6 +57,27 @@ _backends["local"] = local_process_workflow
49
57
  _backends["slurm"] = slurm_process_workflow
50
58
 
51
59
 
60
+ def fail_job(
61
+ *,
62
+ db: DBSyncSession,
63
+ job: ApplyWorkflow,
64
+ log_msg: str,
65
+ logger_name: str,
66
+ emit_log: bool = False,
67
+ ) -> None:
68
+ logger = get_logger(logger_name=logger_name)
69
+ if emit_log:
70
+ logger.error(log_msg)
71
+ reset_logger_handlers(logger)
72
+ job.status = JobStatusTypeV1.FAILED
73
+ job.end_timestamp = get_timestamp()
74
+ job.log = log_msg
75
+ db.merge(job)
76
+ db.commit()
77
+ db.close()
78
+ return
79
+
80
+
52
81
  async def submit_workflow(
53
82
  *,
54
83
  workflow_id: int,
@@ -87,21 +116,41 @@ async def submit_workflow(
87
116
  slurm backend.
88
117
  """
89
118
 
90
- # Declare runner backend and set `process_workflow` function
91
- settings = Inject(get_settings)
92
- FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
93
- if FRACTAL_RUNNER_BACKEND == "local":
94
- process_workflow = local_process_workflow
95
- elif FRACTAL_RUNNER_BACKEND == "slurm":
96
- process_workflow = slurm_process_workflow
97
- else:
98
- raise RuntimeError(f"Invalid runner backend {FRACTAL_RUNNER_BACKEND=}")
119
+ logger_name = f"WF{workflow_id}_job{job_id}"
120
+ logger = set_logger(logger_name=logger_name)
99
121
 
100
122
  with next(DB.get_sync_db()) as db_sync:
101
123
 
102
124
  job: ApplyWorkflow = db_sync.get(ApplyWorkflow, job_id)
103
125
  if not job:
104
- raise ValueError(f"Cannot fetch job {job_id} from database")
126
+ logger.error(f"ApplyWorkflow {job_id} does not exist")
127
+ return
128
+
129
+ settings = Inject(get_settings)
130
+ FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
131
+ if FRACTAL_RUNNER_BACKEND == "local":
132
+ process_workflow = local_process_workflow
133
+ elif FRACTAL_RUNNER_BACKEND == "slurm":
134
+ process_workflow = slurm_process_workflow
135
+ else:
136
+
137
+ if FRACTAL_RUNNER_BACKEND == "local_experimental":
138
+ log_msg = (
139
+ f"{FRACTAL_RUNNER_BACKEND=} is not available for v1 jobs."
140
+ )
141
+ else:
142
+ log_msg = f"Invalid {FRACTAL_RUNNER_BACKEND=}"
143
+
144
+ fail_job(
145
+ job=job,
146
+ db=db_sync,
147
+ log_msg=log_msg,
148
+ logger_name=logger_name,
149
+ emit_log=True,
150
+ )
151
+ return
152
+
153
+ # Declare runner backend and set `process_workflow` function
105
154
 
106
155
  input_dataset: Dataset = db_sync.get(Dataset, input_dataset_id)
107
156
  output_dataset: Dataset = db_sync.get(Dataset, output_dataset_id)
@@ -122,12 +171,9 @@ async def submit_workflow(
122
171
  log_msg += (
123
172
  f"Cannot fetch workflow {workflow_id} from database\n"
124
173
  )
125
- job.status = JobStatusTypeV1.FAILED
126
- job.end_timestamp = get_timestamp()
127
- job.log = log_msg
128
- db_sync.merge(job)
129
- db_sync.commit()
130
- db_sync.close()
174
+ fail_job(
175
+ db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name
176
+ )
131
177
  return
132
178
 
133
179
  # Prepare some of process_workflow arguments
@@ -137,36 +183,53 @@ async def submit_workflow(
137
183
  # Define and create server-side working folder
138
184
  project_id = workflow.project_id
139
185
  timestamp_string = get_timestamp().strftime("%Y%m%d_%H%M%S")
140
- WORKFLOW_DIR = settings.FRACTAL_RUNNER_WORKING_BASE_DIR / (
186
+ WORKFLOW_DIR_LOCAL = settings.FRACTAL_RUNNER_WORKING_BASE_DIR / (
141
187
  f"proj_{project_id:07d}_wf_{workflow_id:07d}_job_{job_id:07d}"
142
188
  f"_{timestamp_string}"
143
189
  )
144
190
 
145
- if WORKFLOW_DIR.exists():
146
- raise RuntimeError(f"Workflow dir {WORKFLOW_DIR} already exists.")
191
+ if WORKFLOW_DIR_LOCAL.exists():
192
+ fail_job(
193
+ db=db_sync,
194
+ job=job,
195
+ log_msg=f"Workflow dir {WORKFLOW_DIR_LOCAL} already exists.",
196
+ logger_name=logger_name,
197
+ emit_log=True,
198
+ )
199
+ return
147
200
 
148
- # Create WORKFLOW_DIR with 755 permissions
201
+ # Create WORKFLOW_DIR
149
202
  original_umask = os.umask(0)
150
- WORKFLOW_DIR.mkdir(parents=True, mode=0o755)
203
+ WORKFLOW_DIR_LOCAL.mkdir(parents=True, mode=0o755)
151
204
  os.umask(original_umask)
152
205
 
153
- # Define and create user-side working folder, if needed
206
+ # Define and create WORKFLOW_DIR_REMOTE
154
207
  if FRACTAL_RUNNER_BACKEND == "local":
155
- WORKFLOW_DIR_USER = WORKFLOW_DIR
208
+ WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
156
209
  elif FRACTAL_RUNNER_BACKEND == "slurm":
157
-
158
- from ..executors.slurm._subprocess_run_as_user import (
159
- _mkdir_as_user,
210
+ WORKFLOW_DIR_REMOTE = (
211
+ Path(user_cache_dir) / WORKFLOW_DIR_LOCAL.name
160
212
  )
213
+ _mkdir_as_user(folder=str(WORKFLOW_DIR_REMOTE), user=slurm_user)
161
214
 
162
- WORKFLOW_DIR_USER = Path(user_cache_dir) / f"{WORKFLOW_DIR.name}"
163
- _mkdir_as_user(folder=str(WORKFLOW_DIR_USER), user=slurm_user)
164
- else:
165
- raise ValueError(f"{FRACTAL_RUNNER_BACKEND=} not supported")
215
+ # Create all tasks subfolders
216
+ for order in range(job.first_task_index, job.last_task_index + 1):
217
+ subfolder_name = task_subfolder_name(
218
+ order=order,
219
+ task_name=workflow.task_list[order].task.name,
220
+ )
221
+ original_umask = os.umask(0)
222
+ (WORKFLOW_DIR_LOCAL / subfolder_name).mkdir(mode=0o755)
223
+ os.umask(original_umask)
224
+ if FRACTAL_RUNNER_BACKEND == "slurm":
225
+ _mkdir_as_user(
226
+ folder=str(WORKFLOW_DIR_REMOTE / subfolder_name),
227
+ user=slurm_user,
228
+ )
166
229
 
167
230
  # Update db
168
- job.working_dir = WORKFLOW_DIR.as_posix()
169
- job.working_dir_user = WORKFLOW_DIR_USER.as_posix()
231
+ job.working_dir = WORKFLOW_DIR_LOCAL.as_posix()
232
+ job.working_dir_user = WORKFLOW_DIR_REMOTE.as_posix()
170
233
  db_sync.merge(job)
171
234
  db_sync.commit()
172
235
 
@@ -186,8 +249,7 @@ async def submit_workflow(
186
249
  db_sync.refresh(workflow)
187
250
 
188
251
  # Write logs
189
- logger_name = f"WF{workflow_id}_job{job_id}"
190
- log_file_path = WORKFLOW_DIR / WORKFLOW_LOG_FILENAME
252
+ log_file_path = WORKFLOW_DIR_LOCAL / WORKFLOW_LOG_FILENAME
191
253
  logger = set_logger(
192
254
  logger_name=logger_name,
193
255
  log_file_path=log_file_path,
@@ -234,8 +296,8 @@ async def submit_workflow(
234
296
  slurm_user=slurm_user,
235
297
  slurm_account=job.slurm_account,
236
298
  user_cache_dir=user_cache_dir,
237
- workflow_dir=WORKFLOW_DIR,
238
- workflow_dir_user=WORKFLOW_DIR_USER,
299
+ workflow_dir_local=WORKFLOW_DIR_LOCAL,
300
+ workflow_dir_remote=WORKFLOW_DIR_REMOTE,
239
301
  logger_name=logger_name,
240
302
  worker_init=worker_init,
241
303
  first_task_index=job.first_task_index,
@@ -286,19 +348,14 @@ async def submit_workflow(
286
348
 
287
349
  db_sync.merge(output_dataset)
288
350
 
289
- job.status = JobStatusTypeV1.FAILED
290
- job.end_timestamp = get_timestamp()
291
-
292
351
  exception_args_string = "\n".join(e.args)
293
- job.log = (
352
+ log_msg = (
294
353
  f"TASK ERROR: "
295
354
  f"Task name: {e.task_name}, "
296
355
  f"position in Workflow: {e.workflow_task_order}\n"
297
356
  f"TRACEBACK:\n{exception_args_string}"
298
357
  )
299
- db_sync.merge(job)
300
- close_job_logger(logger)
301
- db_sync.commit()
358
+ fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name)
302
359
 
303
360
  except JobExecutionError as e:
304
361
 
@@ -318,14 +375,13 @@ async def submit_workflow(
318
375
  )
319
376
 
320
377
  db_sync.merge(output_dataset)
321
-
322
- job.status = JobStatusTypeV1.FAILED
323
- job.end_timestamp = get_timestamp()
324
378
  error = e.assemble_error()
325
- job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
326
- db_sync.merge(job)
327
- close_job_logger(logger)
328
- db_sync.commit()
379
+ fail_job(
380
+ db=db_sync,
381
+ job=job,
382
+ log_msg=f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}",
383
+ logger_name=logger_name,
384
+ )
329
385
 
330
386
  except Exception:
331
387
 
@@ -348,14 +404,12 @@ async def submit_workflow(
348
404
 
349
405
  db_sync.merge(output_dataset)
350
406
 
351
- job.status = JobStatusTypeV1.FAILED
352
- job.end_timestamp = get_timestamp()
353
- job.log = (
407
+ log_msg = (
354
408
  f"UNKNOWN ERROR in Fractal job {job.id}\n"
355
409
  f"TRACEBACK:\n{current_traceback}"
356
410
  )
357
- db_sync.merge(job)
358
- close_job_logger(logger)
359
- db_sync.commit()
411
+ fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name)
412
+
360
413
  finally:
361
414
  db_sync.close()
415
+ reset_logger_handlers(logger)