fractal-server 2.16.6__py3-none-any.whl → 2.17.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. fractal_server/__init__.py +1 -1
  2. fractal_server/__main__.py +129 -22
  3. fractal_server/app/db/__init__.py +9 -11
  4. fractal_server/app/models/security.py +7 -3
  5. fractal_server/app/models/user_settings.py +0 -4
  6. fractal_server/app/models/v2/__init__.py +4 -0
  7. fractal_server/app/models/v2/profile.py +16 -0
  8. fractal_server/app/models/v2/project.py +3 -0
  9. fractal_server/app/models/v2/resource.py +130 -0
  10. fractal_server/app/models/v2/task_group.py +3 -0
  11. fractal_server/app/routes/admin/v2/__init__.py +4 -0
  12. fractal_server/app/routes/admin/v2/_aux_functions.py +55 -0
  13. fractal_server/app/routes/admin/v2/profile.py +86 -0
  14. fractal_server/app/routes/admin/v2/resource.py +229 -0
  15. fractal_server/app/routes/admin/v2/task_group_lifecycle.py +48 -82
  16. fractal_server/app/routes/api/__init__.py +26 -7
  17. fractal_server/app/routes/api/v2/_aux_functions.py +27 -1
  18. fractal_server/app/routes/api/v2/_aux_functions_history.py +2 -2
  19. fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py +3 -3
  20. fractal_server/app/routes/api/v2/_aux_functions_tasks.py +7 -7
  21. fractal_server/app/routes/api/v2/project.py +5 -1
  22. fractal_server/app/routes/api/v2/submit.py +32 -24
  23. fractal_server/app/routes/api/v2/task.py +5 -0
  24. fractal_server/app/routes/api/v2/task_collection.py +36 -47
  25. fractal_server/app/routes/api/v2/task_collection_custom.py +11 -5
  26. fractal_server/app/routes/api/v2/task_collection_pixi.py +34 -40
  27. fractal_server/app/routes/api/v2/task_group_lifecycle.py +39 -82
  28. fractal_server/app/routes/auth/_aux_auth.py +3 -3
  29. fractal_server/app/routes/auth/current_user.py +45 -7
  30. fractal_server/app/routes/auth/oauth.py +1 -1
  31. fractal_server/app/routes/auth/users.py +9 -0
  32. fractal_server/app/routes/aux/_runner.py +2 -1
  33. fractal_server/app/routes/aux/validate_user_profile.py +62 -0
  34. fractal_server/app/routes/aux/validate_user_settings.py +12 -9
  35. fractal_server/app/schemas/user.py +20 -13
  36. fractal_server/app/schemas/user_settings.py +0 -4
  37. fractal_server/app/schemas/v2/__init__.py +11 -0
  38. fractal_server/app/schemas/v2/profile.py +72 -0
  39. fractal_server/app/schemas/v2/resource.py +117 -0
  40. fractal_server/app/security/__init__.py +6 -13
  41. fractal_server/app/security/signup_email.py +2 -2
  42. fractal_server/app/user_settings.py +2 -12
  43. fractal_server/config/__init__.py +23 -0
  44. fractal_server/config/_database.py +58 -0
  45. fractal_server/config/_email.py +170 -0
  46. fractal_server/config/_init_data.py +27 -0
  47. fractal_server/config/_main.py +216 -0
  48. fractal_server/config/_settings_config.py +7 -0
  49. fractal_server/images/tools.py +3 -3
  50. fractal_server/logger.py +3 -3
  51. fractal_server/main.py +14 -21
  52. fractal_server/migrations/versions/90f6508c6379_drop_useroauth_username.py +36 -0
  53. fractal_server/migrations/versions/a80ac5a352bf_resource_profile.py +195 -0
  54. fractal_server/runner/config/__init__.py +2 -0
  55. fractal_server/runner/config/_local.py +21 -0
  56. fractal_server/runner/config/_slurm.py +128 -0
  57. fractal_server/runner/config/slurm_mem_to_MB.py +63 -0
  58. fractal_server/runner/exceptions.py +4 -0
  59. fractal_server/runner/executors/base_runner.py +17 -7
  60. fractal_server/runner/executors/local/get_local_config.py +21 -86
  61. fractal_server/runner/executors/local/runner.py +48 -5
  62. fractal_server/runner/executors/slurm_common/_batching.py +2 -2
  63. fractal_server/runner/executors/slurm_common/base_slurm_runner.py +59 -25
  64. fractal_server/runner/executors/slurm_common/get_slurm_config.py +38 -54
  65. fractal_server/runner/executors/slurm_common/remote.py +1 -1
  66. fractal_server/runner/executors/slurm_common/{_slurm_config.py → slurm_config.py} +3 -254
  67. fractal_server/runner/executors/slurm_common/slurm_job_task_models.py +1 -1
  68. fractal_server/runner/executors/slurm_ssh/runner.py +12 -14
  69. fractal_server/runner/executors/slurm_sudo/_subprocess_run_as_user.py +2 -2
  70. fractal_server/runner/executors/slurm_sudo/runner.py +12 -12
  71. fractal_server/runner/v2/_local.py +36 -21
  72. fractal_server/runner/v2/_slurm_ssh.py +40 -4
  73. fractal_server/runner/v2/_slurm_sudo.py +41 -11
  74. fractal_server/runner/v2/db_tools.py +1 -1
  75. fractal_server/runner/v2/runner.py +3 -11
  76. fractal_server/runner/v2/runner_functions.py +42 -28
  77. fractal_server/runner/v2/submit_workflow.py +87 -108
  78. fractal_server/runner/versions.py +8 -3
  79. fractal_server/ssh/_fabric.py +6 -6
  80. fractal_server/tasks/config/__init__.py +3 -0
  81. fractal_server/tasks/config/_pixi.py +127 -0
  82. fractal_server/tasks/config/_python.py +51 -0
  83. fractal_server/tasks/v2/local/_utils.py +7 -7
  84. fractal_server/tasks/v2/local/collect.py +13 -5
  85. fractal_server/tasks/v2/local/collect_pixi.py +26 -10
  86. fractal_server/tasks/v2/local/deactivate.py +7 -1
  87. fractal_server/tasks/v2/local/deactivate_pixi.py +5 -1
  88. fractal_server/tasks/v2/local/delete.py +4 -0
  89. fractal_server/tasks/v2/local/reactivate.py +13 -5
  90. fractal_server/tasks/v2/local/reactivate_pixi.py +27 -9
  91. fractal_server/tasks/v2/ssh/_pixi_slurm_ssh.py +11 -10
  92. fractal_server/tasks/v2/ssh/_utils.py +6 -7
  93. fractal_server/tasks/v2/ssh/collect.py +19 -12
  94. fractal_server/tasks/v2/ssh/collect_pixi.py +34 -16
  95. fractal_server/tasks/v2/ssh/deactivate.py +12 -8
  96. fractal_server/tasks/v2/ssh/deactivate_pixi.py +14 -10
  97. fractal_server/tasks/v2/ssh/delete.py +12 -9
  98. fractal_server/tasks/v2/ssh/reactivate.py +18 -12
  99. fractal_server/tasks/v2/ssh/reactivate_pixi.py +36 -17
  100. fractal_server/tasks/v2/templates/4_pip_show.sh +4 -6
  101. fractal_server/tasks/v2/utils_database.py +2 -2
  102. fractal_server/tasks/v2/utils_python_interpreter.py +8 -16
  103. fractal_server/tasks/v2/utils_templates.py +7 -10
  104. fractal_server/utils.py +1 -1
  105. {fractal_server-2.16.6.dist-info → fractal_server-2.17.0a0.dist-info}/METADATA +1 -1
  106. {fractal_server-2.16.6.dist-info → fractal_server-2.17.0a0.dist-info}/RECORD +110 -88
  107. fractal_server/config.py +0 -906
  108. /fractal_server/{runner → app}/shutdown.py +0 -0
  109. {fractal_server-2.16.6.dist-info → fractal_server-2.17.0a0.dist-info}/WHEEL +0 -0
  110. {fractal_server-2.16.6.dist-info → fractal_server-2.17.0a0.dist-info}/entry_points.txt +0 -0
  111. {fractal_server-2.16.6.dist-info → fractal_server-2.17.0a0.dist-info}/licenses/LICENSE +0 -0
@@ -9,17 +9,17 @@ from typing import Literal
9
9
  from pydantic import BaseModel
10
10
  from pydantic import ConfigDict
11
11
 
12
- from ..slurm_common._slurm_config import SlurmConfig
13
12
  from ..slurm_common.slurm_job_task_models import SlurmJob
14
13
  from ..slurm_common.slurm_job_task_models import SlurmTask
15
14
  from ._job_states import STATES_FINISHED
15
+ from .slurm_config import SlurmConfig
16
16
  from fractal_server import __VERSION__
17
17
  from fractal_server.app.db import get_sync_db
18
18
  from fractal_server.app.models.v2 import AccountingRecordSlurm
19
19
  from fractal_server.app.schemas.v2 import HistoryUnitStatus
20
20
  from fractal_server.app.schemas.v2 import TaskType
21
- from fractal_server.config import get_settings
22
21
  from fractal_server.logger import set_logger
22
+ from fractal_server.runner.config import JobRunnerConfigSLURM
23
23
  from fractal_server.runner.exceptions import JobExecutionError
24
24
  from fractal_server.runner.exceptions import TaskExecutionError
25
25
  from fractal_server.runner.executors.base_runner import BaseRunner
@@ -31,7 +31,6 @@ from fractal_server.runner.v2.db_tools import (
31
31
  bulk_update_status_of_history_unit,
32
32
  )
33
33
  from fractal_server.runner.v2.db_tools import update_status_of_history_unit
34
- from fractal_server.syringe import Inject
35
34
 
36
35
  SHUTDOWN_ERROR_MESSAGE = "Failed due to job-execution shutdown."
37
36
  SHUTDOWN_EXCEPTION = JobExecutionError(SHUTDOWN_ERROR_MESSAGE)
@@ -77,16 +76,18 @@ class BaseSlurmRunner(BaseRunner):
77
76
  python_worker_interpreter: str
78
77
  slurm_runner_type: Literal["ssh", "sudo"]
79
78
  slurm_account: str | None = None
79
+ shared_config: JobRunnerConfigSLURM
80
80
 
81
81
  def __init__(
82
82
  self,
83
+ *,
83
84
  root_dir_local: Path,
84
85
  root_dir_remote: Path,
85
86
  slurm_runner_type: Literal["ssh", "sudo"],
86
87
  python_worker_interpreter: str,
88
+ poll_interval: int,
87
89
  common_script_lines: list[str] | None = None,
88
- user_cache_dir: str | None = None,
89
- poll_interval: int | None = None,
90
+ user_cache_dir: str | None = None, # FIXME: make required?
90
91
  slurm_account: str | None = None,
91
92
  ):
92
93
  self.slurm_runner_type = slurm_runner_type
@@ -98,11 +99,7 @@ class BaseSlurmRunner(BaseRunner):
98
99
  self.python_worker_interpreter = python_worker_interpreter
99
100
  self.slurm_account = slurm_account
100
101
 
101
- settings = Inject(get_settings)
102
-
103
- self.poll_interval = (
104
- poll_interval or settings.FRACTAL_SLURM_POLL_INTERVAL
105
- )
102
+ self.poll_interval = poll_interval
106
103
  self.poll_interval_internal = self.poll_interval / 10.0
107
104
 
108
105
  self.check_fractal_server_versions()
@@ -134,12 +131,10 @@ class BaseSlurmRunner(BaseRunner):
134
131
  def _run_remote_cmd(self, cmd: str) -> str:
135
132
  raise NotImplementedError("Implement in child class.")
136
133
 
137
- def run_squeue(self, *, job_ids: list[str], **kwargs) -> str:
134
+ def run_squeue(self, *, job_ids: list[str]) -> str:
138
135
  raise NotImplementedError("Implement in child class.")
139
136
 
140
- def _is_squeue_error_recoverable(
141
- self, exception: BaseException
142
- ) -> Literal[True]:
137
+ def _is_squeue_error_recoverable(self, exception: BaseException) -> bool:
143
138
  """
144
139
  Determine whether a `squeue` error is considered recoverable.
145
140
 
@@ -245,7 +240,7 @@ class BaseSlurmRunner(BaseRunner):
245
240
  A new, up-to-date, `SlurmConfig` object.
246
241
  """
247
242
 
248
- new_slurm_config = slurm_config.model_copy()
243
+ new_slurm_config = slurm_config.model_copy(deep=True)
249
244
 
250
245
  # Include SLURM account in `slurm_config`.
251
246
  if self.slurm_account is not None:
@@ -473,7 +468,7 @@ class BaseSlurmRunner(BaseRunner):
473
468
  *,
474
469
  task: SlurmTask,
475
470
  was_job_scancelled: bool = False,
476
- ) -> tuple[Any, Exception]:
471
+ ) -> tuple[Any, Exception | None]:
477
472
  try:
478
473
  with open(task.output_file_local) as f:
479
474
  output = json.load(f)
@@ -566,6 +561,10 @@ class BaseSlurmRunner(BaseRunner):
566
561
  def job_ids(self) -> list[str]:
567
562
  return list(self.jobs.keys())
568
563
 
564
+ @property
565
+ def job_ids_int(self) -> list[int]:
566
+ return list(map(int, self.jobs.keys()))
567
+
569
568
  def wait_and_check_shutdown(self) -> list[str]:
570
569
  """
571
570
  Wait at most `self.poll_interval`, while also checking for shutdown.
@@ -602,6 +601,7 @@ class BaseSlurmRunner(BaseRunner):
602
601
 
603
602
  def submit(
604
603
  self,
604
+ *,
605
605
  base_command: str,
606
606
  workflow_task_order: int,
607
607
  workflow_task_id: int,
@@ -612,7 +612,23 @@ class BaseSlurmRunner(BaseRunner):
612
612
  config: SlurmConfig,
613
613
  task_type: SubmitTaskType,
614
614
  user_id: int,
615
- ) -> tuple[Any, Exception]:
615
+ ) -> tuple[Any, Exception | None]:
616
+ """
617
+ Run a single fractal task.
618
+
619
+ Args:
620
+ base_command:
621
+ workflow_task_order:
622
+ workflow_task_id:
623
+ task_name:
624
+ parameters: Dictionary of parameters.
625
+ history_unit_id:
626
+ Database ID of the corresponding `HistoryUnit` entry.
627
+ task_type: Task type.
628
+ task_files: `TaskFiles` object.
629
+ config: Runner-specific parameters.
630
+ user_id:
631
+ """
616
632
  logger.debug("[submit] START")
617
633
 
618
634
  # Always refresh `executor_error_log` before starting a task
@@ -687,7 +703,7 @@ class BaseSlurmRunner(BaseRunner):
687
703
 
688
704
  create_accounting_record_slurm(
689
705
  user_id=user_id,
690
- slurm_job_ids=self.job_ids,
706
+ slurm_job_ids=self.job_ids_int,
691
707
  )
692
708
 
693
709
  # Retrieval phase
@@ -757,11 +773,12 @@ class BaseSlurmRunner(BaseRunner):
757
773
 
758
774
  def multisubmit(
759
775
  self,
776
+ *,
760
777
  base_command: str,
761
778
  workflow_task_order: int,
762
779
  workflow_task_id: int,
763
780
  task_name: str,
764
- list_parameters: list[dict],
781
+ list_parameters: list[dict[str, Any]],
765
782
  history_unit_ids: list[int],
766
783
  list_task_files: list[TaskFiles],
767
784
  task_type: MultisubmitTaskType,
@@ -769,9 +786,26 @@ class BaseSlurmRunner(BaseRunner):
769
786
  user_id: int,
770
787
  ) -> tuple[dict[int, Any], dict[int, BaseException]]:
771
788
  """
789
+ Run a parallel fractal task.
790
+
772
791
  Note: `list_parameters`, `list_task_files` and `history_unit_ids`
773
792
  have the same size. For parallel tasks, this is also the number of
774
793
  input images, while for compound tasks these can differ.
794
+
795
+ Args:
796
+ base_command:
797
+ workflow_task_order:
798
+ workflow_task_id:
799
+ task_name:
800
+ list_parameters:
801
+ List of dictionaries of parameters (each one must include
802
+ `zarr_urls` key).
803
+ history_unit_ids:
804
+ Database IDs of the corresponding `HistoryUnit` entries.
805
+ list_task_files: `TaskFiles` objects.
806
+ task_type: Task type.
807
+ config: Runner-specific parameters.
808
+ user_id:
775
809
  """
776
810
 
777
811
  # Always refresh `executor_error_log` before starting a task
@@ -779,6 +813,9 @@ class BaseSlurmRunner(BaseRunner):
779
813
 
780
814
  config = self._enrich_slurm_config(config)
781
815
 
816
+ results: dict[int, Any] = {}
817
+ exceptions: dict[int, BaseException] = {}
818
+
782
819
  logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
783
820
  try:
784
821
  if self.is_shutdown():
@@ -789,8 +826,8 @@ class BaseSlurmRunner(BaseRunner):
789
826
  status=HistoryUnitStatus.FAILED,
790
827
  db_sync=db,
791
828
  )
792
- results: dict[int, Any] = {}
793
- exceptions: dict[int, BaseException] = {
829
+ results = {}
830
+ exceptions = {
794
831
  ind: SHUTDOWN_EXCEPTION
795
832
  for ind in range(len(list_parameters))
796
833
  }
@@ -812,9 +849,6 @@ class BaseSlurmRunner(BaseRunner):
812
849
  self._mkdir_local_folder(workdir_local.as_posix())
813
850
  self._mkdir_remote_folder(folder=workdir_remote.as_posix())
814
851
 
815
- results: dict[int, Any] = {}
816
- exceptions: dict[int, BaseException] = {}
817
-
818
852
  # NOTE: chunking has already taken place in `get_slurm_config`,
819
853
  # so that `config.tasks_per_job` is now set.
820
854
 
@@ -889,7 +923,7 @@ class BaseSlurmRunner(BaseRunner):
889
923
 
890
924
  create_accounting_record_slurm(
891
925
  user_id=user_id,
892
- slurm_job_ids=self.job_ids,
926
+ slurm_job_ids=self.job_ids_int,
893
927
  )
894
928
 
895
929
  except Exception as e:
@@ -1,50 +1,42 @@
1
- from pathlib import Path
2
1
  from typing import Literal
3
2
 
4
3
  from ._batching import heuristics
5
- from ._slurm_config import _parse_mem_value
6
- from ._slurm_config import load_slurm_config_file
7
- from ._slurm_config import logger
8
- from ._slurm_config import SlurmConfig
9
- from ._slurm_config import SlurmConfigError
4
+ from .slurm_config import logger
5
+ from .slurm_config import SlurmConfig
10
6
  from fractal_server.app.models.v2 import WorkflowTaskV2
7
+ from fractal_server.runner.config import JobRunnerConfigSLURM
8
+ from fractal_server.runner.config.slurm_mem_to_MB import slurm_mem_to_MB
9
+ from fractal_server.runner.exceptions import SlurmConfigError
11
10
  from fractal_server.string_tools import interpret_as_bool
12
11
 
13
12
 
14
- def get_slurm_config_internal(
13
+ def _get_slurm_config_internal(
14
+ shared_config: JobRunnerConfigSLURM,
15
15
  wftask: WorkflowTaskV2,
16
16
  which_type: Literal["non_parallel", "parallel"],
17
- config_path: Path | None = None,
18
17
  ) -> SlurmConfig:
19
18
  """
20
- Prepare a `SlurmConfig` configuration object
21
19
 
22
- The argument `which_type` determines whether we use `wftask.meta_parallel`
23
- or `wftask.meta_non_parallel`. In the following description, let us assume
24
- that `which_type="parallel"`.
20
+ Prepare a specific `SlurmConfig` configuration.
25
21
 
26
- The sources for `SlurmConfig` attributes, in increasing priority order, are
22
+ The base configuration is the runner-level `shared_config` object, based
23
+ on `resource.jobs_runner_config` (note that GPU-specific properties take
24
+ priority, when `needs_gpu=True`). We then incorporate attributes from
25
+ `wftask.meta_{non_parallel,parallel}` - with higher priority.
27
26
 
28
- 1. The general content of the Fractal SLURM configuration file.
29
- 2. The GPU-specific content of the Fractal SLURM configuration file, if
30
- appropriate.
31
- 3. Properties in `wftask.meta_parallel` (which typically include those in
32
- `wftask.task.meta_parallel`). Note that `wftask.meta_parallel` may be
33
- `None`.
34
-
35
- Arguments:
27
+ Args:
28
+ shared_config:
29
+ Configuration object based on `resource.jobs_runner_config`.
36
30
  wftask:
37
- WorkflowTask for which the SLURM configuration is is to be
38
- prepared.
39
- config_path:
40
- Path of a Fractal SLURM configuration file; if `None`, use
41
- `FRACTAL_SLURM_CONFIG_FILE` variable from settings.
31
+ WorkflowTaskV2 for which the backend configuration should
32
+ be prepared.
42
33
  which_type:
43
- Determines whether to use `meta_parallel` or `meta_non_parallel`.
34
+ Whether we should look at the non-parallel or parallel part
35
+ of `wftask`.
36
+ tot_tasks: Not used here, only present as a common interface.
44
37
 
45
38
  Returns:
46
- slurm_config:
47
- The SlurmConfig object
39
+ A ready-to-use `SlurmConfig` object.
48
40
  """
49
41
 
50
42
  if which_type == "non_parallel":
@@ -60,25 +52,19 @@ def get_slurm_config_internal(
60
52
  f"[get_slurm_config] WorkflowTask meta attribute: {wftask_meta=}"
61
53
  )
62
54
 
63
- # Incorporate slurm_env.default_slurm_config
64
- slurm_env = load_slurm_config_file(config_path=config_path)
65
- slurm_dict = slurm_env.default_slurm_config.model_dump(
55
+ # Start from `shared_config`
56
+ slurm_dict = shared_config.default_slurm_config.model_dump(
66
57
  exclude_unset=True, exclude={"mem"}
67
58
  )
68
- if slurm_env.default_slurm_config.mem:
69
- slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
59
+ if shared_config.default_slurm_config.mem:
60
+ slurm_dict["mem_per_task_MB"] = shared_config.default_slurm_config.mem
70
61
 
71
62
  # Incorporate slurm_env.batching_config
72
- for key, value in slurm_env.batching_config.model_dump().items():
63
+ for key, value in shared_config.batching_config.model_dump().items():
73
64
  slurm_dict[key] = value
74
65
 
75
66
  # Incorporate slurm_env.user_local_exports
76
- slurm_dict["user_local_exports"] = slurm_env.user_local_exports
77
-
78
- logger.debug(
79
- "[get_slurm_config] Fractal SLURM configuration file: "
80
- f"{slurm_env.model_dump()=}"
81
- )
67
+ slurm_dict["user_local_exports"] = shared_config.user_local_exports
82
68
 
83
69
  # GPU-related options
84
70
  # Notes about priority:
@@ -92,12 +78,12 @@ def get_slurm_config_internal(
92
78
  needs_gpu = False
93
79
  logger.debug(f"[get_slurm_config] {needs_gpu=}")
94
80
  if needs_gpu:
95
- for key, value in slurm_env.gpu_slurm_config.model_dump(
81
+ for key, value in shared_config.gpu_slurm_config.model_dump(
96
82
  exclude_unset=True, exclude={"mem"}
97
83
  ).items():
98
84
  slurm_dict[key] = value
99
- if slurm_env.gpu_slurm_config.mem:
100
- slurm_dict["mem_per_task_MB"] = slurm_env.gpu_slurm_config.mem
85
+ if shared_config.gpu_slurm_config.mem:
86
+ slurm_dict["mem_per_task_MB"] = shared_config.gpu_slurm_config.mem
101
87
 
102
88
  # Number of CPUs per task, for multithreading
103
89
  if wftask_meta is not None and "cpus_per_task" in wftask_meta:
@@ -107,7 +93,7 @@ def get_slurm_config_internal(
107
93
  # Required memory per task, in MB
108
94
  if wftask_meta is not None and "mem" in wftask_meta:
109
95
  raw_mem = wftask_meta["mem"]
110
- mem_per_task_MB = _parse_mem_value(raw_mem)
96
+ mem_per_task_MB = slurm_mem_to_MB(raw_mem)
111
97
  slurm_dict["mem_per_task_MB"] = mem_per_task_MB
112
98
 
113
99
  # Job name
@@ -144,8 +130,7 @@ def get_slurm_config_internal(
144
130
  extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
145
131
  if len(set(extra_lines)) != len(extra_lines):
146
132
  logger.debug(
147
- "[get_slurm_config] Removing repeated elements from "
148
- f"{extra_lines=}."
133
+ f"[get_slurm_config] Removing repeated elements in {extra_lines=}."
149
134
  )
150
135
  extra_lines = list(set(extra_lines))
151
136
  slurm_dict["extra_lines"] = extra_lines
@@ -164,8 +149,7 @@ def get_slurm_config_internal(
164
149
 
165
150
  # Put everything together
166
151
  logger.debug(
167
- "[get_slurm_config] Now create a SlurmConfig object based on "
168
- f"{slurm_dict=}"
152
+ f"[get_slurm_config] Create SlurmConfig object based on {slurm_dict=}"
169
153
  )
170
154
  slurm_config = SlurmConfig(**slurm_dict)
171
155
 
@@ -173,15 +157,15 @@ def get_slurm_config_internal(
173
157
 
174
158
 
175
159
  def get_slurm_config(
160
+ shared_config: JobRunnerConfigSLURM,
176
161
  wftask: WorkflowTaskV2,
177
162
  which_type: Literal["non_parallel", "parallel"],
178
- config_path: Path | None = None,
179
163
  tot_tasks: int = 1,
180
164
  ) -> SlurmConfig:
181
- config = get_slurm_config_internal(
182
- wftask,
183
- which_type,
184
- config_path,
165
+ config = _get_slurm_config_internal(
166
+ shared_config=shared_config,
167
+ wftask=wftask,
168
+ which_type=which_type,
185
169
  )
186
170
 
187
171
  # Set/validate parameters for task batching
@@ -23,7 +23,7 @@ def worker(
23
23
  """
24
24
  Execute a job, possibly on a remote node.
25
25
 
26
- Arguments:
26
+ Args:
27
27
  in_fname: Absolute path to the input file (must be readable).
28
28
  out_fname: Absolute path of the output file (must be writeable).
29
29
  """
@@ -1,208 +1,17 @@
1
- # Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
2
- # University of Zurich
3
- #
4
- # Original authors:
5
- # Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
6
- # Tommaso Comparin <tommaso.comparin@exact-lab.it>
7
- #
8
- # This file is part of Fractal and was originally developed by eXact lab S.r.l.
9
- # <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
10
- # Institute for Biomedical Research and Pelkmans Lab from the University of
11
- # Zurich.
12
1
  """
13
2
  Submodule to handle the SLURM configuration for a WorkflowTask
14
3
  """
15
- import json
16
4
  from pathlib import Path
17
5
 
18
6
  from pydantic import BaseModel
19
7
  from pydantic import ConfigDict
20
8
  from pydantic import Field
21
- from pydantic import ValidationError
22
9
 
23
- from fractal_server.config import get_settings
24
10
  from fractal_server.logger import set_logger
25
- from fractal_server.syringe import Inject
26
11
 
27
12
  logger = set_logger(__name__)
28
13
 
29
14
 
30
- class SlurmConfigError(ValueError):
31
- """
32
- Slurm configuration error
33
- """
34
-
35
- pass
36
-
37
-
38
- class _SlurmConfigSet(BaseModel):
39
- """
40
- Options that can be set in `FRACTAL_SLURM_CONFIG_FILE` for the default/gpu
41
- SLURM config. Only used as part of `SlurmConfigFile`.
42
-
43
- Attributes:
44
- partition:
45
- cpus_per_task:
46
- mem:
47
- See `_parse_mem_value` for details on allowed values.
48
- constraint:
49
- gres:
50
- time:
51
- exclude:
52
- nodelist:
53
- account:
54
- extra_lines:
55
- """
56
-
57
- model_config = ConfigDict(extra="forbid")
58
-
59
- partition: str | None = None
60
- cpus_per_task: int | None = None
61
- mem: int | str | None = None
62
- constraint: str | None = None
63
- gres: str | None = None
64
- exclude: str | None = None
65
- nodelist: str | None = None
66
- time: str | None = None
67
- account: str | None = None
68
- extra_lines: list[str] | None = None
69
- gpus: str | None = None
70
-
71
-
72
- class _BatchingConfigSet(BaseModel):
73
- """
74
- Options that can be set in `FRACTAL_SLURM_CONFIG_FILE` to configure the
75
- batching strategy (that is, how to combine several tasks in a single SLURM
76
- job). Only used as part of `SlurmConfigFile`.
77
-
78
- Attributes:
79
- target_cpus_per_job:
80
- max_cpus_per_job:
81
- target_mem_per_job:
82
- (see `_parse_mem_value` for details on allowed values)
83
- max_mem_per_job:
84
- (see `_parse_mem_value` for details on allowed values)
85
- target_num_jobs:
86
- max_num_jobs:
87
- """
88
-
89
- model_config = ConfigDict(extra="forbid")
90
-
91
- target_cpus_per_job: int
92
- max_cpus_per_job: int
93
- target_mem_per_job: int | str
94
- max_mem_per_job: int | str
95
- target_num_jobs: int
96
- max_num_jobs: int
97
-
98
-
99
- class SlurmConfigFile(BaseModel):
100
- """
101
- Specifications for the content of `FRACTAL_SLURM_CONFIG_FILE`
102
-
103
- This must be a JSON file, and a valid example is
104
- ```JSON
105
- {
106
- "default_slurm_config": {
107
- "partition": "main",
108
- "cpus_per_task": 1
109
- },
110
- "gpu_slurm_config": {
111
- "partition": "gpu",
112
- "extra_lines": ["#SBATCH --gres=gpu:v100:1"]
113
- },
114
- "batching_config": {
115
- "target_cpus_per_job": 1,
116
- "max_cpus_per_job": 1,
117
- "target_mem_per_job": 200,
118
- "max_mem_per_job": 500,
119
- "target_num_jobs": 2,
120
- "max_num_jobs": 4
121
- },
122
- "user_local_exports": {
123
- "CELLPOSE_LOCAL_MODELS_PATH": "CELLPOSE_LOCAL_MODELS_PATH",
124
- "NAPARI_CONFIG": "napari_config.json"
125
- }
126
- }
127
- ```
128
-
129
- See `_SlurmConfigSet` and `_BatchingConfigSet` for more details.
130
-
131
- Attributes:
132
- default_slurm_config:
133
- Common default options for all tasks.
134
- gpu_slurm_config:
135
- Default configuration for all GPU tasks.
136
- batching_config:
137
- Configuration of the batching strategy.
138
- user_local_exports:
139
- Key-value pairs to be included as `export`-ed variables in SLURM
140
- submission script, after prepending values with the user's cache
141
- directory.
142
- """
143
-
144
- model_config = ConfigDict(extra="forbid")
145
-
146
- default_slurm_config: _SlurmConfigSet
147
- gpu_slurm_config: _SlurmConfigSet | None = None
148
- batching_config: _BatchingConfigSet
149
- user_local_exports: dict[str, str] | None = None
150
-
151
-
152
- def load_slurm_config_file(
153
- config_path: Path | None = None,
154
- ) -> SlurmConfigFile:
155
- """
156
- Load a SLURM configuration file and validate its content with
157
- `SlurmConfigFile`.
158
-
159
- Arguments:
160
- config_path:
161
- """
162
-
163
- if not config_path:
164
- settings = Inject(get_settings)
165
- config_path = settings.FRACTAL_SLURM_CONFIG_FILE
166
-
167
- # Load file
168
- logger.debug(f"[get_slurm_config] Now loading {config_path=}")
169
- try:
170
- with config_path.open("r") as f:
171
- slurm_env = json.load(f)
172
- except Exception as e:
173
- raise SlurmConfigError(
174
- f"Error while loading {config_path=}. "
175
- f"Original error:\n{str(e)}"
176
- )
177
-
178
- # Validate file content
179
- logger.debug(f"[load_slurm_config_file] Now validating {config_path=}")
180
- logger.debug(f"[load_slurm_config_file] {slurm_env=}")
181
- try:
182
- obj = SlurmConfigFile(**slurm_env)
183
- except ValidationError as e:
184
- raise SlurmConfigError(
185
- f"Error while loading {config_path=}. "
186
- f"Original error:\n{str(e)}"
187
- )
188
-
189
- # Convert memory to MB units, in all relevant attributes
190
- if obj.default_slurm_config.mem:
191
- obj.default_slurm_config.mem = _parse_mem_value(
192
- obj.default_slurm_config.mem
193
- )
194
- if obj.gpu_slurm_config and obj.gpu_slurm_config.mem:
195
- obj.gpu_slurm_config.mem = _parse_mem_value(obj.gpu_slurm_config.mem)
196
- obj.batching_config.target_mem_per_job = _parse_mem_value(
197
- obj.batching_config.target_mem_per_job
198
- )
199
- obj.batching_config.max_mem_per_job = _parse_mem_value(
200
- obj.batching_config.max_mem_per_job
201
- )
202
-
203
- return obj
204
-
205
-
206
15
  class SlurmConfig(BaseModel):
207
16
  """
208
17
  Abstraction for SLURM parameters
@@ -210,9 +19,7 @@ class SlurmConfig(BaseModel):
210
19
  **NOTE**: `SlurmConfig` objects are created internally in `fractal-server`,
211
20
  and they are not meant to be initialized by the user; the same holds for
212
21
  `SlurmConfig` attributes (e.g. `mem_per_task_MB`), which are not meant to
213
- be part of the `FRACTAL_SLURM_CONFIG_FILE` JSON file (details on the
214
- expected file content are defined in
215
- [`SlurmConfigFile`](#fractal_server.runner._slurm._slurm_config.SlurmConfigFile)).
22
+ be part of the superuser-defined `resource.jobs_runner_config` JSON field.
216
23
 
217
24
  Part of the attributes map directly to some of the SLURM attributes (see
218
25
  https://slurm.schedmd.com/sbatch.html), e.g. `partition`. Other attributes
@@ -313,7 +120,7 @@ class SlurmConfig(BaseModel):
313
120
  2. Lines starting with `self.prefix`;
314
121
  3. Other lines.
315
122
 
316
- Arguments:
123
+ Args:
317
124
  script_lines:
318
125
  """
319
126
 
@@ -335,7 +142,7 @@ class SlurmConfig(BaseModel):
335
142
  Compile `SlurmConfig` object into the preamble of a SLURM submission
336
143
  script.
337
144
 
338
- Arguments:
145
+ Args:
339
146
  remote_export_dir:
340
147
  Base directory for exports defined in
341
148
  `self.user_local_exports`.
@@ -411,61 +218,3 @@ class SlurmConfig(BaseModel):
411
218
  @property
412
219
  def batch_size(self) -> int:
413
220
  return self.tasks_per_job
414
-
415
-
416
- def _parse_mem_value(raw_mem: str | int) -> int:
417
- """
418
- Convert a memory-specification string into an integer (in MB units), or
419
- simply return the input if it is already an integer.
420
-
421
- Supported units are `"M", "G", "T"`, with `"M"` being the default; some
422
- parsing examples are: `"10M" -> 10000`, `"3G" -> 3000000`.
423
-
424
- Arguments:
425
- raw_mem:
426
- A string (e.g. `"100M"`) or an integer (in MB).
427
-
428
- Returns:
429
- Integer value of memory in MB units.
430
- """
431
-
432
- info = f"[_parse_mem_value] {raw_mem=}"
433
- error_msg = (
434
- f"{info}, invalid specification of memory requirements "
435
- "(valid examples: 93, 71M, 93G, 71T)."
436
- )
437
-
438
- # Handle integer argument
439
- if type(raw_mem) is int:
440
- return raw_mem
441
-
442
- # Handle string argument
443
- if not raw_mem[0].isdigit(): # fail e.g. for raw_mem="M100"
444
- logger.error(error_msg)
445
- raise SlurmConfigError(error_msg)
446
- if raw_mem.isdigit():
447
- mem_MB = int(raw_mem)
448
- elif raw_mem.endswith("M"):
449
- stripped_raw_mem = raw_mem.strip("M")
450
- if not stripped_raw_mem.isdigit():
451
- logger.error(error_msg)
452
- raise SlurmConfigError(error_msg)
453
- mem_MB = int(stripped_raw_mem)
454
- elif raw_mem.endswith("G"):
455
- stripped_raw_mem = raw_mem.strip("G")
456
- if not stripped_raw_mem.isdigit():
457
- logger.error(error_msg)
458
- raise SlurmConfigError(error_msg)
459
- mem_MB = int(stripped_raw_mem) * 10**3
460
- elif raw_mem.endswith("T"):
461
- stripped_raw_mem = raw_mem.strip("T")
462
- if not stripped_raw_mem.isdigit():
463
- logger.error(error_msg)
464
- raise SlurmConfigError(error_msg)
465
- mem_MB = int(stripped_raw_mem) * 10**6
466
- else:
467
- logger.error(error_msg)
468
- raise SlurmConfigError(error_msg)
469
-
470
- logger.debug(f"{info}, return {mem_MB}")
471
- return mem_MB