fractal-server 2.0.6__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/db/__init__.py +1 -1
  3. fractal_server/app/routes/admin/v1.py +2 -4
  4. fractal_server/app/routes/admin/v2.py +2 -4
  5. fractal_server/app/routes/api/v1/_aux_functions.py +24 -0
  6. fractal_server/app/routes/api/v1/job.py +3 -4
  7. fractal_server/app/routes/api/v1/project.py +28 -18
  8. fractal_server/app/routes/api/v2/_aux_functions.py +35 -12
  9. fractal_server/app/routes/api/v2/job.py +3 -4
  10. fractal_server/app/routes/api/v2/project.py +21 -0
  11. fractal_server/app/routes/api/v2/submit.py +36 -15
  12. fractal_server/app/routes/aux/_job.py +3 -1
  13. fractal_server/app/routes/aux/_runner.py +3 -3
  14. fractal_server/app/runner/executors/slurm/executor.py +169 -68
  15. fractal_server/app/runner/shutdown.py +88 -0
  16. fractal_server/app/runner/task_files.py +59 -27
  17. fractal_server/app/runner/v1/__init__.py +113 -64
  18. fractal_server/app/runner/v1/_common.py +53 -51
  19. fractal_server/app/runner/v1/_local/__init__.py +12 -11
  20. fractal_server/app/runner/v1/_local/_submit_setup.py +4 -4
  21. fractal_server/app/runner/v1/_slurm/__init__.py +16 -16
  22. fractal_server/app/runner/v1/_slurm/_submit_setup.py +11 -10
  23. fractal_server/app/runner/v1/_slurm/get_slurm_config.py +6 -6
  24. fractal_server/app/runner/v2/__init__.py +139 -60
  25. fractal_server/app/runner/v2/_local/__init__.py +12 -11
  26. fractal_server/app/runner/v2/_local/_local_config.py +1 -1
  27. fractal_server/app/runner/v2/_local/_submit_setup.py +4 -4
  28. fractal_server/app/runner/v2/_local_experimental/__init__.py +155 -0
  29. fractal_server/app/runner/v2/_local_experimental/_local_config.py +108 -0
  30. fractal_server/app/runner/v2/_local_experimental/_submit_setup.py +42 -0
  31. fractal_server/app/runner/v2/_local_experimental/executor.py +156 -0
  32. fractal_server/app/runner/v2/_slurm/__init__.py +10 -10
  33. fractal_server/app/runner/v2/_slurm/_submit_setup.py +11 -10
  34. fractal_server/app/runner/v2/_slurm/get_slurm_config.py +6 -6
  35. fractal_server/app/runner/v2/runner.py +17 -15
  36. fractal_server/app/runner/v2/runner_functions.py +38 -38
  37. fractal_server/app/runner/v2/runner_functions_low_level.py +12 -6
  38. fractal_server/app/security/__init__.py +4 -5
  39. fractal_server/config.py +73 -19
  40. fractal_server/gunicorn_fractal.py +40 -0
  41. fractal_server/{logger/__init__.py → logger.py} +2 -2
  42. fractal_server/main.py +45 -26
  43. fractal_server/migrations/env.py +1 -1
  44. {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/METADATA +4 -1
  45. {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/RECORD +48 -43
  46. fractal_server/logger/gunicorn_logger.py +0 -19
  47. {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/LICENSE +0 -0
  48. {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/WHEEL +0 -0
  49. {fractal_server-2.0.6.dist-info → fractal_server-2.2.0.dist-info}/entry_points.txt +0 -0
@@ -110,12 +110,14 @@ class SlurmJob:
110
110
  slurm_file_prefix:
111
111
  Prefix for SLURM-job related files (submission script and SLURM
112
112
  stdout/stderr); this is also needed in the
113
- `_copy_files_from_user_to_server` method.
113
+ `_copy_files_from_remote_to_local` method.
114
114
  wftask_file_prefixes:
115
115
  Prefix for files that are created as part of the functions
116
116
  submitted for execution on the `FractalSlurmExecutor`; this is
117
- needed in the `_copy_files_from_user_to_server` method, and also to
118
- construct the names of per-task input/output pickle files.
117
+ needed in the `_copy_files_from_remote_to_local` method, and also
118
+ to construct the names of per-task input/output pickle files.
119
+ wftask_subfolder_name:
120
+ Name of the per-task subfolder (e.g. `7_task_name`).
119
121
  slurm_script:
120
122
  Path of SLURM submission script.
121
123
  slurm_stdout:
@@ -145,6 +147,7 @@ class SlurmJob:
145
147
  # Per-task attributes
146
148
  workerids: tuple[str, ...]
147
149
  wftask_file_prefixes: tuple[str, ...]
150
+ wftask_subfolder_name: str
148
151
  input_pickle_files: tuple[Path, ...]
149
152
  output_pickle_files: tuple[Path, ...]
150
153
  # Slurm configuration
@@ -196,9 +199,9 @@ class FractalSlurmExecutor(SlurmExecutor):
196
199
  common_script_lines:
197
200
  Arbitrary script lines that will always be included in the
198
201
  sbatch script
199
- working_dir:
202
+ workflow_dir_local:
200
203
  Directory for both the cfut/SLURM and fractal-server files and logs
201
- working_dir_user:
204
+ workflow_dir_remote:
202
205
  Directory for both the cfut/SLURM and fractal-server files and logs
203
206
  map_jobid_to_slurm_files:
204
207
  Dictionary with paths of slurm-related files for active jobs
@@ -209,17 +212,18 @@ class FractalSlurmExecutor(SlurmExecutor):
209
212
  shutdown_file: str
210
213
  common_script_lines: list[str]
211
214
  user_cache_dir: str
212
- working_dir: Path
213
- working_dir_user: Path
215
+ workflow_dir_local: Path
216
+ workflow_dir_remote: Path
214
217
  map_jobid_to_slurm_files: dict[str, tuple[str, str, str]]
215
218
  keep_pickle_files: bool
216
219
  slurm_account: Optional[str]
220
+ jobs: dict[str, tuple[Future, SlurmJob]]
217
221
 
218
222
  def __init__(
219
223
  self,
220
224
  slurm_user: str,
221
- working_dir: Path,
222
- working_dir_user: Path,
225
+ workflow_dir_local: Path,
226
+ workflow_dir_remote: Path,
223
227
  shutdown_file: Optional[str] = None,
224
228
  user_cache_dir: Optional[str] = None,
225
229
  common_script_lines: Optional[list[str]] = None,
@@ -262,14 +266,14 @@ class FractalSlurmExecutor(SlurmExecutor):
262
266
  except StopIteration:
263
267
  pass
264
268
 
265
- self.working_dir = working_dir
269
+ self.workflow_dir_local = workflow_dir_local
266
270
  if not _path_exists_as_user(
267
- path=str(working_dir_user), user=self.slurm_user
271
+ path=str(workflow_dir_remote), user=self.slurm_user
268
272
  ):
269
- logger.info(f"Missing folder {working_dir_user=}")
273
+ logger.info(f"Missing folder {workflow_dir_remote=}")
270
274
  self.user_cache_dir = user_cache_dir
271
275
 
272
- self.working_dir_user = working_dir_user
276
+ self.workflow_dir_remote = workflow_dir_remote
273
277
  self.map_jobid_to_slurm_files = {}
274
278
 
275
279
  # Set the attribute slurm_poll_interval for self.wait_thread (see
@@ -281,7 +285,8 @@ class FractalSlurmExecutor(SlurmExecutor):
281
285
  self.wait_thread.slurm_user = self.slurm_user
282
286
 
283
287
  self.wait_thread.shutdown_file = (
284
- shutdown_file or (self.working_dir / SHUTDOWN_FILENAME).as_posix()
288
+ shutdown_file
289
+ or (self.workflow_dir_local / SHUTDOWN_FILENAME).as_posix()
285
290
  )
286
291
  self.wait_thread.shutdown_callback = self.shutdown
287
292
 
@@ -294,32 +299,64 @@ class FractalSlurmExecutor(SlurmExecutor):
294
299
  self.map_jobid_to_slurm_files.pop(jobid)
295
300
 
296
301
  def get_input_pickle_file_path(
297
- self, arg: str, prefix: Optional[str] = None
302
+ self, *, arg: str, subfolder_name: str, prefix: Optional[str] = None
298
303
  ) -> Path:
304
+
299
305
  prefix = prefix or "cfut"
300
- return self.working_dir / f"{prefix}_in_{arg}.pickle"
306
+ output = (
307
+ self.workflow_dir_local
308
+ / subfolder_name
309
+ / f"{prefix}_in_{arg}.pickle"
310
+ )
311
+ return output
301
312
 
302
313
  def get_output_pickle_file_path(
303
- self, arg: str, prefix: Optional[str] = None
314
+ self, *, arg: str, subfolder_name: str, prefix: Optional[str] = None
304
315
  ) -> Path:
305
316
  prefix = prefix or "cfut"
306
- return self.working_dir_user / f"{prefix}_out_{arg}.pickle"
317
+ return (
318
+ self.workflow_dir_remote
319
+ / subfolder_name
320
+ / f"{prefix}_out_{arg}.pickle"
321
+ )
307
322
 
308
- def get_slurm_script_file_path(self, prefix: Optional[str] = None) -> Path:
323
+ def get_slurm_script_file_path(
324
+ self, *, subfolder_name: str, prefix: Optional[str] = None
325
+ ) -> Path:
309
326
  prefix = prefix or "_temp"
310
- return self.working_dir / f"{prefix}_slurm_submit.sbatch"
327
+ return (
328
+ self.workflow_dir_local
329
+ / subfolder_name
330
+ / f"{prefix}_slurm_submit.sbatch"
331
+ )
311
332
 
312
333
  def get_slurm_stdout_file_path(
313
- self, arg: str = "%j", prefix: Optional[str] = None
334
+ self,
335
+ *,
336
+ subfolder_name: str,
337
+ arg: str = "%j",
338
+ prefix: Optional[str] = None,
314
339
  ) -> Path:
315
340
  prefix = prefix or "slurmpy.stdout"
316
- return self.working_dir_user / f"{prefix}_slurm_{arg}.out"
341
+ return (
342
+ self.workflow_dir_remote
343
+ / subfolder_name
344
+ / f"{prefix}_slurm_{arg}.out"
345
+ )
317
346
 
318
347
  def get_slurm_stderr_file_path(
319
- self, arg: str = "%j", prefix: Optional[str] = None
348
+ self,
349
+ *,
350
+ subfolder_name: str,
351
+ arg: str = "%j",
352
+ prefix: Optional[str] = None,
320
353
  ) -> Path:
321
354
  prefix = prefix or "slurmpy.stderr"
322
- return self.working_dir_user / f"{prefix}_slurm_{arg}.err"
355
+ return (
356
+ self.workflow_dir_remote
357
+ / subfolder_name
358
+ / f"{prefix}_slurm_{arg}.err"
359
+ )
323
360
 
324
361
  def submit(
325
362
  self,
@@ -599,6 +636,8 @@ class FractalSlurmExecutor(SlurmExecutor):
599
636
  )
600
637
  job.single_task_submission = True
601
638
  job.wftask_file_prefixes = (task_files.file_prefix,)
639
+ job.wftask_subfolder_name = task_files.subfolder_name
640
+
602
641
  else:
603
642
  if not components or len(components) < 1:
604
643
  raise ValueError(
@@ -613,33 +652,60 @@ class FractalSlurmExecutor(SlurmExecutor):
613
652
  )
614
653
 
615
654
  _prefixes = []
655
+ _subfolder_names = []
616
656
  for component in components:
617
657
  if isinstance(component, dict):
618
658
  # This is needed for V2
619
659
  actual_component = component.get(_COMPONENT_KEY_, None)
620
660
  else:
621
661
  actual_component = component
622
- _prefixes.append(
623
- get_task_file_paths(
624
- workflow_dir=task_files.workflow_dir,
625
- workflow_dir_user=task_files.workflow_dir_user,
626
- task_order=task_files.task_order,
627
- component=actual_component,
628
- ).file_prefix
662
+ _task_file_paths = get_task_file_paths(
663
+ workflow_dir_local=task_files.workflow_dir_local,
664
+ workflow_dir_remote=task_files.workflow_dir_remote,
665
+ task_name=task_files.task_name,
666
+ task_order=task_files.task_order,
667
+ component=actual_component,
629
668
  )
669
+ _prefixes.append(_task_file_paths.file_prefix)
670
+ _subfolder_names.append(_task_file_paths.subfolder_name)
630
671
  job.wftask_file_prefixes = tuple(_prefixes)
631
672
 
673
+ num_subfolders = len(set(_subfolder_names))
674
+ if num_subfolders != 1:
675
+ error_msg_short = (
676
+ f"[_submit_job] Subfolder list has {num_subfolders} "
677
+ "different values, but it must have only one (since "
678
+ "workflow tasks are executed one by one)."
679
+ )
680
+ error_msg_detail = (
681
+ "[_submit_job] Current unique subfolder names: "
682
+ f"{set(_subfolder_names)}"
683
+ )
684
+ logger.error(error_msg_short)
685
+ logger.error(error_msg_detail)
686
+ raise ValueError(error_msg_short)
687
+ job.wftask_subfolder_name = _subfolder_names[0]
688
+
689
+ # Check that server-side subfolder exists
690
+ subfolder_path = self.workflow_dir_local / job.wftask_subfolder_name
691
+ if not subfolder_path.exists():
692
+ raise FileNotFoundError(
693
+ f"Missing folder {subfolder_path.as_posix()}."
694
+ )
695
+
632
696
  # Define I/O pickle file names/paths
633
697
  job.input_pickle_files = tuple(
634
698
  self.get_input_pickle_file_path(
635
- job.workerids[ind],
699
+ arg=job.workerids[ind],
700
+ subfolder_name=job.wftask_subfolder_name,
636
701
  prefix=job.wftask_file_prefixes[ind],
637
702
  )
638
703
  for ind in range(job.num_tasks_tot)
639
704
  )
640
705
  job.output_pickle_files = tuple(
641
706
  self.get_output_pickle_file_path(
642
- job.workerids[ind],
707
+ arg=job.workerids[ind],
708
+ subfolder_name=job.wftask_subfolder_name,
643
709
  prefix=job.wftask_file_prefixes[ind],
644
710
  )
645
711
  for ind in range(job.num_tasks_tot)
@@ -647,13 +713,16 @@ class FractalSlurmExecutor(SlurmExecutor):
647
713
 
648
714
  # Define SLURM-job file names/paths
649
715
  job.slurm_script = self.get_slurm_script_file_path(
650
- prefix=job.slurm_file_prefix
716
+ subfolder_name=job.wftask_subfolder_name,
717
+ prefix=job.slurm_file_prefix,
651
718
  )
652
719
  job.slurm_stdout = self.get_slurm_stdout_file_path(
653
- prefix=job.slurm_file_prefix
720
+ subfolder_name=job.wftask_subfolder_name,
721
+ prefix=job.slurm_file_prefix,
654
722
  )
655
723
  job.slurm_stderr = self.get_slurm_stderr_file_path(
656
- prefix=job.slurm_file_prefix
724
+ subfolder_name=job.wftask_subfolder_name,
725
+ prefix=job.slurm_file_prefix,
657
726
  )
658
727
 
659
728
  # Dump serialized versions+function+args+kwargs to pickle file
@@ -706,10 +775,10 @@ class FractalSlurmExecutor(SlurmExecutor):
706
775
  Prepare the `JobExecutionError` for a given job
707
776
 
708
777
  This method creates a `JobExecutionError` object and sets its attribute
709
- to the appropriate SLURM-related file names. Note that the method shoul
710
- always be called after values in `self.map_jobid_to_slurm_files` have
711
- been updated, so that they point to `self.working_dir` files which are
712
- readable from `fractal-server`.
778
+ to the appropriate SLURM-related file names. Note that the method
779
+ should always be called after values in `self.map_jobid_to_slurm_files`
780
+ have been updated, so that they point to `self.workflow_dir_local`
781
+ files which are readable from `fractal-server`.
713
782
 
714
783
  Arguments:
715
784
  jobid:
@@ -758,13 +827,13 @@ class FractalSlurmExecutor(SlurmExecutor):
758
827
  if not self.jobs:
759
828
  self.jobs_empty_cond.notify_all()
760
829
 
761
- # Copy all relevant files from self.working_dir_user to
762
- # self.working_dir
830
+ # Copy all relevant files from self.workflow_dir_remote to
831
+ # self.workflow_dir_local
763
832
 
764
- self._copy_files_from_user_to_server(job)
833
+ self._copy_files_from_remote_to_local(job)
765
834
 
766
- # Update the paths to use the files in self.working_dir (rather
767
- # than the user's ones in self.working_dir_user)
835
+ # Update the paths to use the files in self.workflow_dir_local
836
+ # (rather than the user's ones in self.workflow_dir_remote)
768
837
  with self.jobs_lock:
769
838
  self.map_jobid_to_slurm_files[jobid]
770
839
  (
@@ -773,10 +842,14 @@ class FractalSlurmExecutor(SlurmExecutor):
773
842
  slurm_stderr_file,
774
843
  ) = self.map_jobid_to_slurm_files[jobid]
775
844
  new_slurm_stdout_file = str(
776
- self.working_dir / Path(slurm_stdout_file).name
845
+ self.workflow_dir_local
846
+ / job.wftask_subfolder_name
847
+ / Path(slurm_stdout_file).name
777
848
  )
778
849
  new_slurm_stderr_file = str(
779
- self.working_dir / Path(slurm_stderr_file).name
850
+ self.workflow_dir_local
851
+ / job.wftask_subfolder_name
852
+ / Path(slurm_stderr_file).name
780
853
  )
781
854
  with self.jobs_lock:
782
855
  self.map_jobid_to_slurm_files[jobid] = (
@@ -787,7 +860,8 @@ class FractalSlurmExecutor(SlurmExecutor):
787
860
 
788
861
  in_paths = job.input_pickle_files
789
862
  out_paths = tuple(
790
- self.working_dir / f.name for f in job.output_pickle_files
863
+ (self.workflow_dir_local / job.wftask_subfolder_name / f.name)
864
+ for f in job.output_pickle_files
791
865
  )
792
866
 
793
867
  outputs = []
@@ -908,16 +982,16 @@ class FractalSlurmExecutor(SlurmExecutor):
908
982
  " FractalSlurmExecutor._completion."
909
983
  )
910
984
 
911
- def _copy_files_from_user_to_server(
985
+ def _copy_files_from_remote_to_local(
912
986
  self,
913
987
  job: SlurmJob,
914
988
  ):
915
989
  """
916
990
  Impersonate the user and copy task-related files
917
991
 
918
- For all files in `self.working_dir_user` that start with
992
+ For all files in `self.workflow_dir_remote` that start with
919
993
  `job.file_prefix`, read them (with `sudo -u` impersonation) and write
920
- them to `self.working_dir`.
994
+ them to `self.workflow_dir_local`.
921
995
 
922
996
  Files to copy:
923
997
  * Job-related files (SLURM stderr/stdout files); with prefix
@@ -932,36 +1006,48 @@ class FractalSlurmExecutor(SlurmExecutor):
932
1006
  Raises:
933
1007
  JobExecutionError: If a `cat` command fails.
934
1008
  """
935
- logger.debug("Enter _copy_files_from_user_to_server")
936
- if self.working_dir_user == self.working_dir:
1009
+ logger.debug("[_copy_files_from_remote_to_local] Start")
1010
+
1011
+ if self.workflow_dir_remote == self.workflow_dir_local:
1012
+ logger.debug(
1013
+ "[_copy_files_from_remote_to_local] "
1014
+ "workflow_dir_local corresponds to workflow_dir_remote, "
1015
+ "return."
1016
+ )
937
1017
  return
938
1018
 
1019
+ subfolder_name = job.wftask_subfolder_name
939
1020
  prefixes = set(
940
1021
  [job.slurm_file_prefix] + list(job.wftask_file_prefixes)
941
1022
  )
942
1023
 
943
- logger.debug(f"[_copy_files_from_user_to_server] {prefixes=}")
944
1024
  logger.debug(
945
- f"[_copy_files_from_user_to_server] {str(self.working_dir_user)=}"
1025
+ "[_copy_files_from_remote_to_local] "
1026
+ f"WorkflowTask subfolder_name: {subfolder_name}"
1027
+ )
1028
+ logger.debug(f"[_copy_files_from_remote_to_local] {prefixes=}")
1029
+ logger.debug(
1030
+ "[_copy_files_from_remote_to_local] "
1031
+ f"{str(self.workflow_dir_remote)=}"
946
1032
  )
947
1033
 
948
1034
  for prefix in prefixes:
949
1035
 
950
1036
  if prefix == job.slurm_file_prefix:
951
1037
  files_to_copy = _glob_as_user(
952
- folder=str(self.working_dir_user),
1038
+ folder=str(self.workflow_dir_remote / subfolder_name),
953
1039
  user=self.slurm_user,
954
1040
  startswith=prefix,
955
1041
  )
956
1042
  else:
957
1043
  files_to_copy = _glob_as_user_strict(
958
- folder=str(self.working_dir_user),
1044
+ folder=str(self.workflow_dir_remote / subfolder_name),
959
1045
  user=self.slurm_user,
960
1046
  startswith=prefix,
961
1047
  )
962
1048
 
963
1049
  logger.debug(
964
- "[_copy_files_from_user_to_server] "
1050
+ "[_copy_files_from_remote_to_local] "
965
1051
  f"{prefix=}, {len(files_to_copy)=}"
966
1052
  )
967
1053
 
@@ -972,7 +1058,9 @@ class FractalSlurmExecutor(SlurmExecutor):
972
1058
  "contains whitespaces"
973
1059
  )
974
1060
  source_file_path = str(
975
- self.working_dir_user / source_file_name
1061
+ self.workflow_dir_remote
1062
+ / subfolder_name
1063
+ / source_file_name
976
1064
  )
977
1065
 
978
1066
  # Read source_file_path (requires sudo)
@@ -991,10 +1079,12 @@ class FractalSlurmExecutor(SlurmExecutor):
991
1079
  logger.error(info)
992
1080
  raise JobExecutionError(info)
993
1081
  # Write to dest_file_path (including empty files)
994
- dest_file_path = str(self.working_dir / source_file_name)
1082
+ dest_file_path = str(
1083
+ self.workflow_dir_local / subfolder_name / source_file_name
1084
+ )
995
1085
  with open(dest_file_path, "wb") as f:
996
1086
  f.write(res.stdout)
997
- logger.debug("[_copy_files_from_user_to_server] End")
1087
+ logger.debug("[_copy_files_from_remote_to_local] End")
998
1088
 
999
1089
  def _start(
1000
1090
  self,
@@ -1099,7 +1189,7 @@ class FractalSlurmExecutor(SlurmExecutor):
1099
1189
  [
1100
1190
  f"#SBATCH --err={slurm_err_path}",
1101
1191
  f"#SBATCH --out={slurm_out_path}",
1102
- f"#SBATCH -D {self.working_dir_user}",
1192
+ f"#SBATCH -D {self.workflow_dir_remote}",
1103
1193
  ]
1104
1194
  )
1105
1195
  script_lines = slurm_config.sort_script_lines(script_lines)
@@ -1131,12 +1221,11 @@ class FractalSlurmExecutor(SlurmExecutor):
1131
1221
  This will be called when self.submit or self.map are called from
1132
1222
  outside fractal-server, and then lack some optional arguments.
1133
1223
  """
1134
- import random
1135
-
1136
1224
  task_files = TaskFiles(
1137
- workflow_dir=self.working_dir,
1138
- workflow_dir_user=self.working_dir_user,
1139
- task_order=random.randint(10000, 99999), # nosec
1225
+ workflow_dir_local=self.workflow_dir_local,
1226
+ workflow_dir_remote=self.workflow_dir_remote,
1227
+ task_order=None,
1228
+ task_name="name",
1140
1229
  )
1141
1230
  return task_files
1142
1231
 
@@ -1154,7 +1243,7 @@ class FractalSlurmExecutor(SlurmExecutor):
1154
1243
  while self.jobs:
1155
1244
  jobid, fut_and_job = self.jobs.popitem()
1156
1245
  slurm_jobs_to_scancel.append(jobid)
1157
- fut, job = fut_and_job[:]
1246
+ fut = fut_and_job[0]
1158
1247
  self.map_jobid_to_slurm_files.pop(jobid)
1159
1248
  if not fut.cancelled():
1160
1249
  fut.set_exception(
@@ -1188,3 +1277,15 @@ class FractalSlurmExecutor(SlurmExecutor):
1188
1277
  raise JobExecutionError(info=error_msg)
1189
1278
 
1190
1279
  logger.debug("Executor shutdown: end")
1280
+
1281
+ def __exit__(self, *args, **kwargs):
1282
+ """
1283
+ See
1284
+ https://github.com/fractal-analytics-platform/fractal-server/issues/1508
1285
+ """
1286
+ logger.debug(
1287
+ "[FractalSlurmExecutor.__exit__] Stop and join `wait_thread`"
1288
+ )
1289
+ self.wait_thread.stop()
1290
+ self.wait_thread.join()
1291
+ logger.debug("[FractalSlurmExecutor.__exit__] End")
@@ -0,0 +1,88 @@
1
+ import time
2
+
3
+ from sqlmodel import select
4
+
5
+ from fractal_server.app.db import get_async_db
6
+ from fractal_server.app.models.v1 import ApplyWorkflow
7
+ from fractal_server.app.models.v1.job import JobStatusTypeV1
8
+ from fractal_server.app.models.v2 import JobV2
9
+ from fractal_server.app.models.v2.job import JobStatusTypeV2
10
+ from fractal_server.app.routes.aux._job import _write_shutdown_file
11
+ from fractal_server.config import get_settings
12
+ from fractal_server.logger import get_logger
13
+ from fractal_server.syringe import Inject
14
+
15
+
16
+ async def cleanup_after_shutdown(
17
+ *, jobsV1: list[int], jobsV2: list[int], logger_name: str
18
+ ):
19
+ logger = get_logger(logger_name)
20
+ logger.info("Cleanup function after shutdown")
21
+ stm_v2 = (
22
+ select(JobV2)
23
+ .where(JobV2.id.in_(jobsV2))
24
+ .where(JobV2.status == JobStatusTypeV2.SUBMITTED)
25
+ )
26
+
27
+ stm_v1 = (
28
+ select(ApplyWorkflow)
29
+ .where(ApplyWorkflow.id.in_(jobsV1))
30
+ .where(ApplyWorkflow.status == JobStatusTypeV1.SUBMITTED)
31
+ )
32
+
33
+ async for session in get_async_db():
34
+ jobsV2_db = (await session.execute(stm_v2)).scalars().all()
35
+ jobsV1_db = (await session.execute(stm_v1)).scalars().all()
36
+
37
+ for job in jobsV2_db:
38
+ _write_shutdown_file(job=job)
39
+
40
+ for job in jobsV1_db:
41
+ _write_shutdown_file(job=job)
42
+
43
+ settings = Inject(get_settings)
44
+
45
+ t_start = time.perf_counter()
46
+ while (
47
+ time.perf_counter() - t_start
48
+ ) < settings.FRACTAL_GRACEFUL_SHUTDOWN_TIME: # 30 seconds
49
+ logger.info("Waiting 3 seconds before checking")
50
+ time.sleep(3)
51
+ jobsV2_db = (await session.execute(stm_v2)).scalars().all()
52
+ jobsV1_db = (await session.execute(stm_v1)).scalars().all()
53
+
54
+ if len(jobsV2_db) == 0 and len(jobsV1_db) == 0:
55
+ logger.info(
56
+ (
57
+ "All jobs associated to this app are "
58
+ "either done or failed. Exit."
59
+ )
60
+ )
61
+ return
62
+ else:
63
+ logger.info(
64
+ (
65
+ f"Some jobs are still 'submitted' "
66
+ f"{jobsV1_db=}, {jobsV2_db=}"
67
+ )
68
+ )
69
+ logger.info(
70
+ (
71
+ "Graceful shutdown reached its maximum time, "
72
+ "but some jobs are still submitted"
73
+ )
74
+ )
75
+
76
+ for job in jobsV2_db:
77
+ job.status = "failed"
78
+ job.log = (job.log or "") + "\nJob stopped due to app shutdown\n"
79
+ session.add(job)
80
+ await session.commit()
81
+
82
+ for job in jobsV1_db:
83
+ job.status = "failed"
84
+ job.log = (job.log or "") + "\nJob stopped due to app shutdown\n"
85
+ session.add(job)
86
+ await session.commit()
87
+
88
+ logger.info("Exit from shutdown logic")