fractal-server 2.12.1__py3-none-any.whl → 2.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/app/models/security.py +9 -12
  3. fractal_server/app/models/v2/dataset.py +2 -2
  4. fractal_server/app/models/v2/job.py +11 -9
  5. fractal_server/app/models/v2/task.py +2 -3
  6. fractal_server/app/models/v2/task_group.py +6 -2
  7. fractal_server/app/models/v2/workflowtask.py +15 -8
  8. fractal_server/app/routes/admin/v2/task.py +1 -1
  9. fractal_server/app/routes/admin/v2/task_group.py +1 -1
  10. fractal_server/app/routes/api/v2/dataset.py +4 -4
  11. fractal_server/app/routes/api/v2/images.py +11 -11
  12. fractal_server/app/routes/api/v2/project.py +2 -2
  13. fractal_server/app/routes/api/v2/status.py +1 -1
  14. fractal_server/app/routes/api/v2/submit.py +8 -6
  15. fractal_server/app/routes/api/v2/task.py +4 -2
  16. fractal_server/app/routes/api/v2/task_collection.py +3 -2
  17. fractal_server/app/routes/api/v2/task_group.py +2 -2
  18. fractal_server/app/routes/api/v2/workflow.py +3 -3
  19. fractal_server/app/routes/api/v2/workflow_import.py +3 -3
  20. fractal_server/app/routes/api/v2/workflowtask.py +3 -1
  21. fractal_server/app/routes/auth/_aux_auth.py +4 -1
  22. fractal_server/app/routes/auth/current_user.py +3 -5
  23. fractal_server/app/routes/auth/group.py +1 -1
  24. fractal_server/app/routes/auth/users.py +2 -4
  25. fractal_server/app/routes/aux/_runner.py +1 -1
  26. fractal_server/app/routes/aux/validate_user_settings.py +1 -2
  27. fractal_server/app/runner/executors/_job_states.py +13 -0
  28. fractal_server/app/runner/executors/slurm/_slurm_config.py +26 -18
  29. fractal_server/app/runner/executors/slurm/ssh/__init__.py +0 -3
  30. fractal_server/app/runner/executors/slurm/ssh/_executor_wait_thread.py +31 -22
  31. fractal_server/app/runner/executors/slurm/ssh/_slurm_job.py +2 -5
  32. fractal_server/app/runner/executors/slurm/ssh/executor.py +21 -27
  33. fractal_server/app/runner/executors/slurm/sudo/__init__.py +0 -3
  34. fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py +1 -2
  35. fractal_server/app/runner/executors/slurm/sudo/_executor_wait_thread.py +37 -47
  36. fractal_server/app/runner/executors/slurm/sudo/executor.py +25 -24
  37. fractal_server/app/runner/v2/__init__.py +0 -9
  38. fractal_server/app/runner/v2/_local/_local_config.py +5 -4
  39. fractal_server/app/runner/v2/_slurm_common/get_slurm_config.py +4 -4
  40. fractal_server/app/runner/v2/_slurm_sudo/__init__.py +2 -2
  41. fractal_server/app/runner/v2/deduplicate_list.py +1 -1
  42. fractal_server/app/runner/v2/runner.py +9 -4
  43. fractal_server/app/runner/v2/task_interface.py +15 -7
  44. fractal_server/app/schemas/_filter_validators.py +6 -3
  45. fractal_server/app/schemas/_validators.py +7 -5
  46. fractal_server/app/schemas/user.py +23 -18
  47. fractal_server/app/schemas/user_group.py +25 -11
  48. fractal_server/app/schemas/user_settings.py +31 -24
  49. fractal_server/app/schemas/v2/dataset.py +48 -35
  50. fractal_server/app/schemas/v2/dumps.py +16 -14
  51. fractal_server/app/schemas/v2/job.py +49 -29
  52. fractal_server/app/schemas/v2/manifest.py +32 -28
  53. fractal_server/app/schemas/v2/project.py +18 -8
  54. fractal_server/app/schemas/v2/task.py +86 -75
  55. fractal_server/app/schemas/v2/task_collection.py +41 -30
  56. fractal_server/app/schemas/v2/task_group.py +39 -20
  57. fractal_server/app/schemas/v2/workflow.py +24 -12
  58. fractal_server/app/schemas/v2/workflowtask.py +63 -61
  59. fractal_server/app/security/__init__.py +1 -1
  60. fractal_server/config.py +32 -25
  61. fractal_server/images/models.py +18 -12
  62. fractal_server/main.py +1 -1
  63. fractal_server/tasks/v2/utils_background.py +1 -1
  64. fractal_server/tasks/v2/utils_database.py +1 -1
  65. {fractal_server-2.12.1.dist-info → fractal_server-2.13.0.dist-info}/METADATA +9 -10
  66. {fractal_server-2.12.1.dist-info → fractal_server-2.13.0.dist-info}/RECORD +69 -72
  67. fractal_server/app/runner/v2/_local_experimental/__init__.py +0 -121
  68. fractal_server/app/runner/v2/_local_experimental/_local_config.py +0 -108
  69. fractal_server/app/runner/v2/_local_experimental/_submit_setup.py +0 -42
  70. fractal_server/app/runner/v2/_local_experimental/executor.py +0 -157
  71. {fractal_server-2.12.1.dist-info → fractal_server-2.13.0.dist-info}/LICENSE +0 -0
  72. {fractal_server-2.12.1.dist-info → fractal_server-2.13.0.dist-info}/WHEEL +0 -0
  73. {fractal_server-2.12.1.dist-info → fractal_server-2.13.0.dist-info}/entry_points.txt +0 -0
@@ -18,9 +18,9 @@ from typing import Optional
18
18
  from typing import Union
19
19
 
20
20
  from pydantic import BaseModel
21
- from pydantic import Extra
21
+ from pydantic import ConfigDict
22
22
  from pydantic import Field
23
- from pydantic.error_wrappers import ValidationError
23
+ from pydantic import ValidationError
24
24
 
25
25
  from .....config import get_settings
26
26
  from .....logger import set_logger
@@ -37,7 +37,7 @@ class SlurmConfigError(ValueError):
37
37
  pass
38
38
 
39
39
 
40
- class _SlurmConfigSet(BaseModel, extra=Extra.forbid):
40
+ class _SlurmConfigSet(BaseModel):
41
41
  """
42
42
  Options that can be set in `FRACTAL_SLURM_CONFIG_FILE` for the default/gpu
43
43
  SLURM config. Only used as part of `SlurmConfigFile`.
@@ -54,19 +54,21 @@ class _SlurmConfigSet(BaseModel, extra=Extra.forbid):
54
54
  extra_lines:
55
55
  """
56
56
 
57
- partition: Optional[str]
58
- cpus_per_task: Optional[int]
59
- mem: Optional[Union[int, str]]
60
- constraint: Optional[str]
61
- gres: Optional[str]
62
- time: Optional[str]
63
- account: Optional[str]
64
- extra_lines: Optional[list[str]]
65
- pre_submission_commands: Optional[list[str]]
66
- gpus: Optional[str]
57
+ model_config = ConfigDict(extra="forbid")
67
58
 
59
+ partition: Optional[str] = None
60
+ cpus_per_task: Optional[int] = None
61
+ mem: Optional[Union[int, str]] = None
62
+ constraint: Optional[str] = None
63
+ gres: Optional[str] = None
64
+ time: Optional[str] = None
65
+ account: Optional[str] = None
66
+ extra_lines: Optional[list[str]] = None
67
+ pre_submission_commands: Optional[list[str]] = None
68
+ gpus: Optional[str] = None
68
69
 
69
- class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
70
+
71
+ class _BatchingConfigSet(BaseModel):
70
72
  """
71
73
  Options that can be set in `FRACTAL_SLURM_CONFIG_FILE` to configure the
72
74
  batching strategy (that is, how to combine several tasks in a single SLURM
@@ -83,6 +85,8 @@ class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
83
85
  max_num_jobs:
84
86
  """
85
87
 
88
+ model_config = ConfigDict(extra="forbid")
89
+
86
90
  target_cpus_per_job: int
87
91
  max_cpus_per_job: int
88
92
  target_mem_per_job: Union[int, str]
@@ -91,7 +95,7 @@ class _BatchingConfigSet(BaseModel, extra=Extra.forbid):
91
95
  max_num_jobs: int
92
96
 
93
97
 
94
- class SlurmConfigFile(BaseModel, extra=Extra.forbid):
98
+ class SlurmConfigFile(BaseModel):
95
99
  """
96
100
  Specifications for the content of `FRACTAL_SLURM_CONFIG_FILE`
97
101
 
@@ -136,10 +140,12 @@ class SlurmConfigFile(BaseModel, extra=Extra.forbid):
136
140
  directory.
137
141
  """
138
142
 
143
+ model_config = ConfigDict(extra="forbid")
144
+
139
145
  default_slurm_config: _SlurmConfigSet
140
- gpu_slurm_config: Optional[_SlurmConfigSet]
146
+ gpu_slurm_config: Optional[_SlurmConfigSet] = None
141
147
  batching_config: _BatchingConfigSet
142
- user_local_exports: Optional[dict[str, str]]
148
+ user_local_exports: Optional[dict[str, str]] = None
143
149
 
144
150
 
145
151
  def load_slurm_config_file(
@@ -196,7 +202,7 @@ def load_slurm_config_file(
196
202
  return obj
197
203
 
198
204
 
199
- class SlurmConfig(BaseModel, extra=Extra.forbid):
205
+ class SlurmConfig(BaseModel):
200
206
  """
201
207
  Abstraction for SLURM parameters
202
208
 
@@ -247,6 +253,8 @@ class SlurmConfig(BaseModel, extra=Extra.forbid):
247
253
  command.
248
254
  """
249
255
 
256
+ model_config = ConfigDict(extra="forbid")
257
+
250
258
  # Required SLURM parameters (note that the integer attributes are those
251
259
  # that will need to scale up with the number of parallel tasks per job)
252
260
  partition: str
@@ -1,3 +0,0 @@
1
- from .executor import SlurmExecutor
2
-
3
- __all__ = ["SlurmExecutor"]
@@ -1,10 +1,8 @@
1
1
  import os
2
+ import threading
2
3
  import time
3
4
  import traceback
4
5
  from itertools import count
5
- from typing import Callable
6
-
7
- from cfut import FileWaitThread
8
6
 
9
7
  from ......logger import set_logger
10
8
  from fractal_server.app.runner.exceptions import JobExecutionError
@@ -12,35 +10,46 @@ from fractal_server.app.runner.exceptions import JobExecutionError
12
10
  logger = set_logger(__name__)
13
11
 
14
12
 
15
- class FractalSlurmWaitThread(FileWaitThread):
13
+ class FractalSlurmSSHWaitThread(threading.Thread):
16
14
  """
17
- Overrides the original clusterfutures.FileWaitThread, so that:
18
-
19
- 1. Each jobid in the waiting list is associated to a tuple of filenames,
20
- rather than a single one.
21
- 2. In the `check` method, we avoid output-file existence checks (which
22
- would require `sudo -u user ls` calls), and we rather check for the
23
- existence of the shutdown file. All the logic to check whether a job is
24
- complete is deferred to the `cfut.slurm.jobs_finished` function.
25
- 3. There are additional attributes (...).
26
-
27
- This class is based on clusterfutures 0.5. Original Copyright: 2022
28
- Adrian Sampson, released under the MIT licence
15
+ Thread that monitors a pool of SLURM jobs
16
+
17
+ This class is a custom re-implementation of the waiting thread class from:
18
+
19
+ > clusterfutures <https://github.com/sampsyo/clusterfutures>
20
+ > Original Copyright
21
+ > Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
22
+ > License: MIT
23
+
24
+ Attributes:
25
+ shutdown_file:
26
+ shutdown_callback:
27
+ slurm_poll_interval:
28
+ jobs_finished_callback:
29
+ active_job_ids:
30
+ shutdown:
31
+ lock:
29
32
  """
30
33
 
31
34
  shutdown_file: str
32
- shutdown_callback: Callable
33
- jobs_finished_callback: Callable
35
+ shutdown_callback: callable
34
36
  slurm_poll_interval = 30
37
+ jobs_finished_callback: callable
35
38
  active_job_ids: list[str]
39
+ shutdown: bool
40
+ _lock: threading.Lock
36
41
 
37
- def __init__(self, *args, **kwargs):
42
+ def __init__(self, callback: callable, interval=1):
38
43
  """
39
44
  Init method
40
45
 
41
46
  This method is executed on the main thread.
42
47
  """
43
- super().__init__(*args, **kwargs)
48
+ threading.Thread.__init__(self, daemon=True)
49
+ self.callback = callback
50
+ self.interval = interval
51
+ self._lock = threading.Lock()
52
+ self.shutdown = False
44
53
  self.active_job_ids = []
45
54
 
46
55
  def wait(self, *, job_id: str):
@@ -53,7 +62,7 @@ class FractalSlurmWaitThread(FileWaitThread):
53
62
  error_msg = "Cannot call `wait` method after executor shutdown."
54
63
  logger.warning(error_msg)
55
64
  raise JobExecutionError(info=error_msg)
56
- with self.lock:
65
+ with self._lock:
57
66
  self.active_job_ids.append(job_id)
58
67
 
59
68
  def check_shutdown(self):
@@ -109,7 +118,7 @@ class FractalSlurmWaitThread(FileWaitThread):
109
118
  pass
110
119
  return
111
120
  if ind % skip == 0:
112
- with self.lock:
121
+ with self._lock:
113
122
  try:
114
123
  self.check_jobs()
115
124
  except Exception: # nosec
@@ -1,8 +1,7 @@
1
+ import uuid
1
2
  from pathlib import Path
2
3
  from typing import Optional
3
4
 
4
- from cfut.util import random_string
5
-
6
5
  from fractal_server.app.runner.executors.slurm._slurm_config import (
7
6
  SlurmConfig,
8
7
  )
@@ -106,9 +105,7 @@ class SlurmJob:
106
105
  )
107
106
  else:
108
107
  self.wftask_file_prefixes = wftask_file_prefixes
109
- self.workerids = tuple(
110
- random_string() for i in range(self.num_tasks_tot)
111
- )
108
+ self.workerids = tuple(uuid.uuid4() for i in range(self.num_tasks_tot))
112
109
  self.slurm_config = slurm_config
113
110
 
114
111
  def get_clean_output_pickle_files(self) -> tuple[str, ...]:
@@ -1,20 +1,9 @@
1
- # This adapts clusterfutures <https://github.com/sampsyo/clusterfutures>
2
- # Original Copyright
3
- # Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
4
- # License: MIT
5
- #
6
- # Modified by:
7
- # Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
8
- # Tommaso Comparin <tommaso.comparin@exact-lab.it>
9
- # Marco Franzon <marco.franzon@exact-lab.it>
10
- #
11
- # Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
12
- # University of Zurich
13
1
  import json
14
2
  import math
15
3
  import sys
16
4
  import threading
17
5
  import time
6
+ from concurrent.futures import Executor
18
7
  from concurrent.futures import Future
19
8
  from concurrent.futures import InvalidStateError
20
9
  from copy import copy
@@ -25,18 +14,18 @@ from typing import Optional
25
14
  from typing import Sequence
26
15
 
27
16
  import cloudpickle
28
- from cfut import SlurmExecutor
29
17
 
30
18
  from ....filenames import SHUTDOWN_FILENAME
31
19
  from ....task_files import get_task_file_paths
32
20
  from ....task_files import TaskFiles
33
21
  from ....versions import get_versions
22
+ from ..._job_states import STATES_FINISHED
34
23
  from ...slurm._slurm_config import SlurmConfig
35
24
  from .._batching import heuristics
36
25
  from ..utils_executors import get_pickle_file_path
37
26
  from ..utils_executors import get_slurm_file_path
38
27
  from ..utils_executors import get_slurm_script_file_path
39
- from ._executor_wait_thread import FractalSlurmWaitThread
28
+ from ._executor_wait_thread import FractalSlurmSSHWaitThread
40
29
  from fractal_server.app.runner.components import _COMPONENT_KEY_
41
30
  from fractal_server.app.runner.compress_folder import compress_folder
42
31
  from fractal_server.app.runner.exceptions import JobExecutionError
@@ -48,24 +37,31 @@ from fractal_server.logger import set_logger
48
37
  from fractal_server.ssh._fabric import FractalSSH
49
38
  from fractal_server.syringe import Inject
50
39
 
40
+
51
41
  logger = set_logger(__name__)
52
42
 
53
43
 
54
- class FractalSlurmSSHExecutor(SlurmExecutor):
44
+ class FractalSlurmSSHExecutor(Executor):
55
45
  """
56
- FractalSlurmSSHExecutor (inherits from cfut.SlurmExecutor)
46
+ Executor to submit SLURM jobs via SSH
47
+
48
+ This class is a custom re-implementation of the SLURM executor from
49
+
50
+ > clusterfutures <https://github.com/sampsyo/clusterfutures>
51
+ > Original Copyright
52
+ > Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
53
+ > License: MIT
57
54
 
58
- FIXME: docstring
59
55
 
60
56
  Attributes:
61
57
  fractal_ssh: FractalSSH connection with custom lock
62
- shutdown_file:
63
- python_remote: Equal to `settings.FRACTAL_SLURM_WORKER_PYTHON`
64
- wait_thread_cls: Class for waiting thread
65
58
  workflow_dir_local:
66
59
  Directory for both the cfut/SLURM and fractal-server files and logs
67
60
  workflow_dir_remote:
68
61
  Directory for both the cfut/SLURM and fractal-server files and logs
62
+ shutdown_file:
63
+ python_remote: Equal to `settings.FRACTAL_SLURM_WORKER_PYTHON`
64
+ wait_thread_cls: Class for waiting thread
69
65
  common_script_lines:
70
66
  Arbitrary script lines that will always be included in the
71
67
  sbatch script
@@ -82,10 +78,10 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
82
78
  shutdown_file: str
83
79
  python_remote: str
84
80
 
85
- wait_thread_cls = FractalSlurmWaitThread
81
+ wait_thread_cls = FractalSlurmSSHWaitThread
86
82
 
87
83
  common_script_lines: list[str]
88
- slurm_account: Optional[str]
84
+ slurm_account: Optional[str] = None
89
85
 
90
86
  jobs: dict[str, tuple[Future, SlurmJob]]
91
87
  map_jobid_to_slurm_files_local: dict[str, tuple[str, str, str]]
@@ -1159,7 +1155,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1159
1155
  Path(tarfile_path_local).unlink()
1160
1156
 
1161
1157
  t_1 = time.perf_counter()
1162
- logger.info("[_get_subfolder_sftp] End - " f"elapsed: {t_1-t_0:.3f} s")
1158
+ logger.info(f"[_get_subfolder_sftp] End - elapsed: {t_1 - t_0:.3f} s")
1163
1159
 
1164
1160
  def _prepare_sbatch_script(
1165
1161
  self,
@@ -1258,7 +1254,7 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1258
1254
  logger.debug("Executor shutdown: end")
1259
1255
 
1260
1256
  def _stop_and_join_wait_thread(self):
1261
- self.wait_thread.stop()
1257
+ self.wait_thread.shutdown = True
1262
1258
  self.wait_thread.join()
1263
1259
 
1264
1260
  def __exit__(self, *args, **kwargs):
@@ -1295,8 +1291,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1295
1291
  (released under the MIT licence)
1296
1292
  """
1297
1293
 
1298
- from cfut.slurm import STATES_FINISHED
1299
-
1300
1294
  logger.debug(
1301
1295
  f"[FractalSlurmSSHExecutor._jobs_finished] START ({job_ids=})"
1302
1296
  )
@@ -1387,6 +1381,6 @@ class FractalSlurmSSHExecutor(SlurmExecutor):
1387
1381
  t_end_handshake = time.perf_counter()
1388
1382
  logger.info(
1389
1383
  "[FractalSlurmSSHExecutor.ssh_handshake] END"
1390
- f" - elapsed: {t_end_handshake-t_start_handshake:.3f} s"
1384
+ f" - elapsed: {t_end_handshake - t_start_handshake:.3f} s"
1391
1385
  )
1392
1386
  return remote_versions
@@ -1,3 +0,0 @@
1
- from .executor import SlurmExecutor
2
-
3
- __all__ = ["SlurmExecutor"]
@@ -1,8 +1,7 @@
1
1
  from subprocess import run # nosec
2
2
 
3
- from cfut.slurm import STATES_FINISHED
4
-
5
3
  from ......logger import set_logger
4
+ from ..._job_states import STATES_FINISHED
6
5
 
7
6
 
8
7
  logger = set_logger(__name__)
@@ -1,12 +1,10 @@
1
1
  import os
2
+ import threading
2
3
  import time
3
4
  import traceback
4
5
  from itertools import count
5
- from typing import Callable
6
6
  from typing import Optional
7
7
 
8
- from cfut import FileWaitThread
9
-
10
8
  from ......logger import set_logger
11
9
  from ._check_jobs_status import _jobs_finished
12
10
  from fractal_server.app.runner.exceptions import JobExecutionError
@@ -14,33 +12,43 @@ from fractal_server.app.runner.exceptions import JobExecutionError
14
12
  logger = set_logger(__name__)
15
13
 
16
14
 
17
- class FractalFileWaitThread(FileWaitThread):
15
+ class FractalSlurmSudoWaitThread(threading.Thread):
18
16
  """
19
- Overrides the original clusterfutures.FileWaitThread, so that:
20
-
21
- 1. Each jobid in the waiting list is associated to a tuple of filenames,
22
- rather than a single one.
23
- 2. In the `check` method, we avoid output-file existence checks (which
24
- would require `sudo -u user ls` calls), and we rather check for the
25
- existence of the shutdown file. All the logic to check whether a job is
26
- complete is deferred to the `cfut.slurm.jobs_finished` function.
27
- 3. There are additional attributes (`slurm_user`, `shutdown_file` and
28
- `shutdown_callback`).
29
-
30
- This class is copied from clusterfutures 0.5. Original Copyright: 2022
31
- Adrian Sampson, released under the MIT licence
32
-
33
- Note: in principle we could avoid the definition of
34
- `FractalFileWaitThread`, and pack all this code in
35
- `FractalSlurmWaitThread`.
17
+ Thread that monitors a pool of SLURM jobs
18
+
19
+ This class is a custom re-implementation of the waiting thread class from:
20
+
21
+ > clusterfutures <https://github.com/sampsyo/clusterfutures>
22
+ > Original Copyright
23
+ > Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
24
+ > License: MIT
25
+
26
+ Attributes:
27
+ slurm_user:
28
+ shutdown_file:
29
+ shutdown_callback:
30
+ slurm_poll_interval:
31
+ waiting:
32
+ shutdown:
33
+ lock:
36
34
  """
37
35
 
38
36
  slurm_user: str
39
37
  shutdown_file: Optional[str] = None
40
- shutdown_callback: Callable
41
-
42
- def __init__(self, *args, **kwargs):
43
- super().__init__(*args, **kwargs)
38
+ shutdown_callback: callable
39
+ slurm_poll_interval: int = 30
40
+ waiting: dict[tuple[str, ...], str]
41
+ shutdown: bool
42
+ _lock: threading.Lock
43
+
44
+ def __init__(self, callback: callable, interval=1):
45
+ threading.Thread.__init__(self, daemon=True)
46
+ self.callback = callback
47
+ self.interval = interval
48
+ self.waiting = {}
49
+ self._lock = threading.Lock() # To protect the .waiting dict
50
+ self.shutdown = False
51
+ self.active_job_ids = []
44
52
 
45
53
  def wait(
46
54
  self,
@@ -61,10 +69,10 @@ class FractalFileWaitThread(FileWaitThread):
61
69
  error_msg = "Cannot call `wait` method after executor shutdown."
62
70
  logger.warning(error_msg)
63
71
  raise JobExecutionError(info=error_msg)
64
- with self.lock:
72
+ with self._lock:
65
73
  self.waiting[filenames] = jobid
66
74
 
67
- def check(self, i):
75
+ def check_shutdown(self, i):
68
76
  """
69
77
  Do one shutdown-file-existence check.
70
78
 
@@ -99,30 +107,12 @@ class FractalFileWaitThread(FileWaitThread):
99
107
  if self.shutdown:
100
108
  self.shutdown_callback()
101
109
  return
102
- with self.lock:
110
+ with self._lock:
103
111
  self.check(i)
104
112
  time.sleep(self.interval)
105
113
 
106
-
107
- class FractalSlurmWaitThread(FractalFileWaitThread):
108
- """
109
- Replaces the original clusterfutures.SlurmWaitThread, to inherit from
110
- FractalFileWaitThread instead of FileWaitThread.
111
-
112
- The function is copied from clusterfutures 0.5. Original Copyright: 2022
113
- Adrian Sampson, released under the MIT licence
114
-
115
- **Note**: if `self.interval != 1` then this should be modified, but for
116
- `clusterfutures` v0.5 `self.interval` is indeed equal to `1`.
117
-
118
- Changed from clusterfutures:
119
- * Rename `id_to_filename` to `id_to_filenames`
120
- """
121
-
122
- slurm_poll_interval = 30
123
-
124
114
  def check(self, i):
125
- super().check(i)
115
+ self.check_shutdown(i)
126
116
  if i % (self.slurm_poll_interval // self.interval) == 0:
127
117
  try:
128
118
  finished_jobs = _jobs_finished(self.waiting.values())
@@ -1,21 +1,12 @@
1
- # This adapts clusterfutures <https://github.com/sampsyo/clusterfutures>
2
- # Original Copyright
3
- # Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
4
- # License: MIT
5
- #
6
- # Modified by:
7
- # Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
8
- # Tommaso Comparin <tommaso.comparin@exact-lab.it>
9
- # Marco Franzon <marco.franzon@exact-lab.it>
10
- #
11
- # Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
12
- # University of Zurich
13
1
  import json
14
2
  import math
15
3
  import shlex
16
4
  import subprocess # nosec
17
5
  import sys
6
+ import threading
18
7
  import time
8
+ import uuid
9
+ from concurrent.futures import Executor
19
10
  from concurrent.futures import Future
20
11
  from concurrent.futures import InvalidStateError
21
12
  from copy import copy
@@ -27,8 +18,6 @@ from typing import Optional
27
18
  from typing import Sequence
28
19
 
29
20
  import cloudpickle
30
- from cfut import SlurmExecutor
31
- from cfut.util import random_string
32
21
 
33
22
  from ......config import get_settings
34
23
  from ......logger import set_logger
@@ -43,7 +32,7 @@ from .._batching import heuristics
43
32
  from ..utils_executors import get_pickle_file_path
44
33
  from ..utils_executors import get_slurm_file_path
45
34
  from ..utils_executors import get_slurm_script_file_path
46
- from ._executor_wait_thread import FractalSlurmWaitThread
35
+ from ._executor_wait_thread import FractalSlurmSudoWaitThread
47
36
  from ._subprocess_run_as_user import _glob_as_user
48
37
  from ._subprocess_run_as_user import _glob_as_user_strict
49
38
  from ._subprocess_run_as_user import _path_exists_as_user
@@ -180,9 +169,7 @@ class SlurmJob:
180
169
  )
181
170
  else:
182
171
  self.wftask_file_prefixes = wftask_file_prefixes
183
- self.workerids = tuple(
184
- random_string() for i in range(self.num_tasks_tot)
185
- )
172
+ self.workerids = tuple(uuid.uuid4() for i in range(self.num_tasks_tot))
186
173
  self.slurm_config = slurm_config
187
174
 
188
175
  def get_clean_output_pickle_files(self) -> tuple[str, ...]:
@@ -193,9 +180,17 @@ class SlurmJob:
193
180
  return tuple(str(f.as_posix()) for f in self.output_pickle_files)
194
181
 
195
182
 
196
- class FractalSlurmExecutor(SlurmExecutor):
183
+ class FractalSlurmSudoExecutor(Executor):
197
184
  """
198
- FractalSlurmExecutor (inherits from cfut.SlurmExecutor)
185
+ Executor to submit SLURM jobs as a different user, via `sudo -u`
186
+
187
+ This class is a custom re-implementation of the SLURM executor from
188
+
189
+ > clusterfutures <https://github.com/sampsyo/clusterfutures>
190
+ > Original Copyright
191
+ > Copyright 2021 Adrian Sampson <asampson@cs.washington.edu>
192
+ > License: MIT
193
+
199
194
 
200
195
  Attributes:
201
196
  slurm_user:
@@ -211,7 +206,7 @@ class FractalSlurmExecutor(SlurmExecutor):
211
206
  Dictionary with paths of slurm-related files for active jobs
212
207
  """
213
208
 
214
- wait_thread_cls = FractalSlurmWaitThread
209
+ wait_thread_cls = FractalSlurmSudoWaitThread
215
210
  slurm_user: str
216
211
  shutdown_file: str
217
212
  common_script_lines: list[str]
@@ -219,7 +214,7 @@ class FractalSlurmExecutor(SlurmExecutor):
219
214
  workflow_dir_local: Path
220
215
  workflow_dir_remote: Path
221
216
  map_jobid_to_slurm_files: dict[str, tuple[str, str, str]]
222
- slurm_account: Optional[str]
217
+ slurm_account: Optional[str] = None
223
218
  jobs: dict[str, tuple[Future, SlurmJob]]
224
219
 
225
220
  def __init__(
@@ -244,7 +239,13 @@ class FractalSlurmExecutor(SlurmExecutor):
244
239
  "Missing attribute FractalSlurmExecutor.slurm_user"
245
240
  )
246
241
 
247
- super().__init__(*args, **kwargs)
242
+ self.jobs = {}
243
+ self.job_outfiles = {}
244
+ self.jobs_lock = threading.Lock()
245
+ self.jobs_empty_cond = threading.Condition(self.jobs_lock)
246
+
247
+ self.wait_thread = self.wait_thread_cls(self._completion)
248
+ self.wait_thread.start()
248
249
 
249
250
  # Assign `wait_thread.shutdown_callback` early, since it may be called
250
251
  # from within `_stop_and_join_wait_thread` (e.g. if an exception is
@@ -1239,7 +1240,7 @@ class FractalSlurmExecutor(SlurmExecutor):
1239
1240
  logger.debug("Executor shutdown: end")
1240
1241
 
1241
1242
  def _stop_and_join_wait_thread(self):
1242
- self.wait_thread.stop()
1243
+ self.wait_thread.shutdown = True
1243
1244
  self.wait_thread.join()
1244
1245
 
1245
1246
  def __exit__(self, *args, **kwargs):
@@ -31,9 +31,6 @@ from ..executors.slurm.sudo._subprocess_run_as_user import _mkdir_as_user
31
31
  from ..filenames import WORKFLOW_LOG_FILENAME
32
32
  from ..task_files import task_subfolder_name
33
33
  from ._local import process_workflow as local_process_workflow
34
- from ._local_experimental import (
35
- process_workflow as local_experimental_process_workflow,
36
- )
37
34
  from ._slurm_ssh import process_workflow as slurm_ssh_process_workflow
38
35
  from ._slurm_sudo import process_workflow as slurm_sudo_process_workflow
39
36
  from .handle_failed_job import mark_last_wftask_as_failed
@@ -45,7 +42,6 @@ _backends = {}
45
42
  _backends["local"] = local_process_workflow
46
43
  _backends["slurm"] = slurm_sudo_process_workflow
47
44
  _backends["slurm_ssh"] = slurm_ssh_process_workflow
48
- _backends["local_experimental"] = local_experimental_process_workflow
49
45
 
50
46
 
51
47
  def fail_job(
@@ -184,8 +180,6 @@ def submit_workflow(
184
180
  # Define and create WORKFLOW_DIR_REMOTE
185
181
  if FRACTAL_RUNNER_BACKEND == "local":
186
182
  WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
187
- elif FRACTAL_RUNNER_BACKEND == "local_experimental":
188
- WORKFLOW_DIR_REMOTE = WORKFLOW_DIR_LOCAL
189
183
  elif FRACTAL_RUNNER_BACKEND == "slurm":
190
184
  WORKFLOW_DIR_REMOTE = (
191
185
  Path(user_cache_dir) / WORKFLOW_DIR_LOCAL.name
@@ -287,9 +281,6 @@ def submit_workflow(
287
281
  if FRACTAL_RUNNER_BACKEND == "local":
288
282
  process_workflow = local_process_workflow
289
283
  backend_specific_kwargs = {}
290
- elif FRACTAL_RUNNER_BACKEND == "local_experimental":
291
- process_workflow = local_experimental_process_workflow
292
- backend_specific_kwargs = {}
293
284
  elif FRACTAL_RUNNER_BACKEND == "slurm":
294
285
  process_workflow = slurm_sudo_process_workflow
295
286
  backend_specific_kwargs = dict(
@@ -17,8 +17,8 @@ from typing import Literal
17
17
  from typing import Optional
18
18
 
19
19
  from pydantic import BaseModel
20
- from pydantic import Extra
21
- from pydantic.error_wrappers import ValidationError
20
+ from pydantic import ConfigDict
21
+ from pydantic import ValidationError
22
22
 
23
23
  from .....config import get_settings
24
24
  from .....syringe import Inject
@@ -33,7 +33,7 @@ class LocalBackendConfigError(ValueError):
33
33
  pass
34
34
 
35
35
 
36
- class LocalBackendConfig(BaseModel, extra=Extra.forbid):
36
+ class LocalBackendConfig(BaseModel):
37
37
  """
38
38
  Specifications of the local-backend configuration
39
39
 
@@ -44,7 +44,8 @@ class LocalBackendConfig(BaseModel, extra=Extra.forbid):
44
44
  start at the same time.
45
45
  """
46
46
 
47
- parallel_tasks_per_job: Optional[int]
47
+ model_config = ConfigDict(extra="forbid")
48
+ parallel_tasks_per_job: Optional[int] = None
48
49
 
49
50
 
50
51
  def get_default_local_backend_config():