PyPI - fractal-server - Versions diffs - 2.16.6__py3-none-any.whl → 2.17.0__py3-none-any.whl - Mend

fractal-server 2.16.6py3-none-any.whl → 2.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (142) hide show

fractal_server/__init__.py +1 -1
fractal_server/__main__.py +178 -52
fractal_server/app/db/__init__.py +9 -11
fractal_server/app/models/security.py +30 -22
fractal_server/app/models/user_settings.py +5 -4
fractal_server/app/models/v2/__init__.py +4 -0
fractal_server/app/models/v2/profile.py +16 -0
fractal_server/app/models/v2/project.py +5 -0
fractal_server/app/models/v2/resource.py +130 -0
fractal_server/app/models/v2/task_group.py +4 -0
fractal_server/app/routes/admin/v2/__init__.py +4 -0
fractal_server/app/routes/admin/v2/_aux_functions.py +55 -0
fractal_server/app/routes/admin/v2/accounting.py +3 -3
fractal_server/app/routes/admin/v2/impersonate.py +2 -2
fractal_server/app/routes/admin/v2/job.py +51 -15
fractal_server/app/routes/admin/v2/profile.py +100 -0
fractal_server/app/routes/admin/v2/project.py +2 -2
fractal_server/app/routes/admin/v2/resource.py +222 -0
fractal_server/app/routes/admin/v2/task.py +59 -32
fractal_server/app/routes/admin/v2/task_group.py +17 -12
fractal_server/app/routes/admin/v2/task_group_lifecycle.py +52 -86
fractal_server/app/routes/api/__init__.py +45 -8
fractal_server/app/routes/api/v2/_aux_functions.py +17 -1
fractal_server/app/routes/api/v2/_aux_functions_history.py +2 -2
fractal_server/app/routes/api/v2/_aux_functions_task_lifecycle.py +3 -3
fractal_server/app/routes/api/v2/_aux_functions_tasks.py +55 -19
fractal_server/app/routes/api/v2/_aux_task_group_disambiguation.py +21 -17
fractal_server/app/routes/api/v2/dataset.py +10 -19
fractal_server/app/routes/api/v2/history.py +8 -8
fractal_server/app/routes/api/v2/images.py +5 -5
fractal_server/app/routes/api/v2/job.py +8 -8
fractal_server/app/routes/api/v2/pre_submission_checks.py +3 -3
fractal_server/app/routes/api/v2/project.py +15 -7
fractal_server/app/routes/api/v2/status_legacy.py +2 -2
fractal_server/app/routes/api/v2/submit.py +49 -42
fractal_server/app/routes/api/v2/task.py +26 -8
fractal_server/app/routes/api/v2/task_collection.py +39 -50
fractal_server/app/routes/api/v2/task_collection_custom.py +10 -6
fractal_server/app/routes/api/v2/task_collection_pixi.py +34 -42
fractal_server/app/routes/api/v2/task_group.py +19 -9
fractal_server/app/routes/api/v2/task_group_lifecycle.py +43 -86
fractal_server/app/routes/api/v2/task_version_update.py +3 -3
fractal_server/app/routes/api/v2/workflow.py +9 -9
fractal_server/app/routes/api/v2/workflow_import.py +25 -13
fractal_server/app/routes/api/v2/workflowtask.py +5 -5
fractal_server/app/routes/auth/__init__.py +34 -5
fractal_server/app/routes/auth/_aux_auth.py +39 -20
fractal_server/app/routes/auth/current_user.py +56 -67
fractal_server/app/routes/auth/group.py +29 -46
fractal_server/app/routes/auth/oauth.py +55 -38
fractal_server/app/routes/auth/register.py +2 -2
fractal_server/app/routes/auth/router.py +4 -2
fractal_server/app/routes/auth/users.py +29 -53
fractal_server/app/routes/aux/_runner.py +2 -1
fractal_server/app/routes/aux/validate_user_profile.py +62 -0
fractal_server/app/schemas/__init__.py +0 -1
fractal_server/app/schemas/user.py +43 -13
fractal_server/app/schemas/user_group.py +2 -1
fractal_server/app/schemas/v2/__init__.py +12 -0
fractal_server/app/schemas/v2/profile.py +78 -0
fractal_server/app/schemas/v2/resource.py +137 -0
fractal_server/app/schemas/v2/task_collection.py +11 -3
fractal_server/app/schemas/v2/task_group.py +5 -0
fractal_server/app/security/__init__.py +174 -75
fractal_server/app/security/signup_email.py +52 -34
fractal_server/config/__init__.py +27 -0
fractal_server/config/_data.py +68 -0
fractal_server/config/_database.py +59 -0
fractal_server/config/_email.py +133 -0
fractal_server/config/_main.py +78 -0
fractal_server/config/_oauth.py +69 -0
fractal_server/config/_settings_config.py +7 -0
fractal_server/data_migrations/2_17_0.py +339 -0
fractal_server/images/tools.py +3 -3
fractal_server/logger.py +3 -3
fractal_server/main.py +17 -23
fractal_server/migrations/naming_convention.py +1 -1
fractal_server/migrations/versions/83bc2ad3ffcc_2_17_0.py +195 -0
fractal_server/runner/config/__init__.py +2 -0
fractal_server/runner/config/_local.py +21 -0
fractal_server/runner/config/_slurm.py +129 -0
fractal_server/runner/config/slurm_mem_to_MB.py +63 -0
fractal_server/runner/exceptions.py +4 -0
fractal_server/runner/executors/base_runner.py +17 -7
fractal_server/runner/executors/local/get_local_config.py +21 -86
fractal_server/runner/executors/local/runner.py +48 -5
fractal_server/runner/executors/slurm_common/_batching.py +2 -2
fractal_server/runner/executors/slurm_common/base_slurm_runner.py +60 -26
fractal_server/runner/executors/slurm_common/get_slurm_config.py +39 -55
fractal_server/runner/executors/slurm_common/remote.py +1 -1
fractal_server/runner/executors/slurm_common/slurm_config.py +214 -0
fractal_server/runner/executors/slurm_common/slurm_job_task_models.py +1 -1
fractal_server/runner/executors/slurm_ssh/runner.py +12 -14
fractal_server/runner/executors/slurm_sudo/_subprocess_run_as_user.py +2 -2
fractal_server/runner/executors/slurm_sudo/runner.py +12 -12
fractal_server/runner/v2/_local.py +36 -21
fractal_server/runner/v2/_slurm_ssh.py +41 -4
fractal_server/runner/v2/_slurm_sudo.py +42 -12
fractal_server/runner/v2/db_tools.py +1 -1
fractal_server/runner/v2/runner.py +3 -11
fractal_server/runner/v2/runner_functions.py +42 -28
fractal_server/runner/v2/submit_workflow.py +88 -109
fractal_server/runner/versions.py +8 -3
fractal_server/ssh/_fabric.py +6 -6
fractal_server/tasks/config/__init__.py +3 -0
fractal_server/tasks/config/_pixi.py +127 -0
fractal_server/tasks/config/_python.py +51 -0
fractal_server/tasks/v2/local/_utils.py +7 -7
fractal_server/tasks/v2/local/collect.py +13 -5
fractal_server/tasks/v2/local/collect_pixi.py +26 -10
fractal_server/tasks/v2/local/deactivate.py +7 -1
fractal_server/tasks/v2/local/deactivate_pixi.py +5 -1
fractal_server/tasks/v2/local/delete.py +5 -1
fractal_server/tasks/v2/local/reactivate.py +13 -5
fractal_server/tasks/v2/local/reactivate_pixi.py +27 -9
fractal_server/tasks/v2/ssh/_pixi_slurm_ssh.py +11 -10
fractal_server/tasks/v2/ssh/_utils.py +6 -7
fractal_server/tasks/v2/ssh/collect.py +19 -12
fractal_server/tasks/v2/ssh/collect_pixi.py +34 -16
fractal_server/tasks/v2/ssh/deactivate.py +12 -8
fractal_server/tasks/v2/ssh/deactivate_pixi.py +14 -10
fractal_server/tasks/v2/ssh/delete.py +12 -9
fractal_server/tasks/v2/ssh/reactivate.py +18 -12
fractal_server/tasks/v2/ssh/reactivate_pixi.py +36 -17
fractal_server/tasks/v2/templates/4_pip_show.sh +4 -6
fractal_server/tasks/v2/utils_database.py +2 -2
fractal_server/tasks/v2/utils_pixi.py +3 -0
fractal_server/tasks/v2/utils_python_interpreter.py +8 -16
fractal_server/tasks/v2/utils_templates.py +7 -10
fractal_server/utils.py +1 -1
{fractal_server-2.16.6.dist-info → fractal_server-2.17.0.dist-info}/METADATA +4 -6
{fractal_server-2.16.6.dist-info → fractal_server-2.17.0.dist-info}/RECORD +136 -117
fractal_server/app/routes/aux/validate_user_settings.py +0 -73
fractal_server/app/schemas/user_settings.py +0 -67
fractal_server/app/user_settings.py +0 -42
fractal_server/config.py +0 -906
fractal_server/data_migrations/2_14_10.py +0 -48
fractal_server/runner/executors/slurm_common/_slurm_config.py +0 -471
/fractal_server/{runner → app}/shutdown.py +0 -0
{fractal_server-2.16.6.dist-info → fractal_server-2.17.0.dist-info}/WHEEL +0 -0
{fractal_server-2.16.6.dist-info → fractal_server-2.17.0.dist-info}/entry_points.txt +0 -0
{fractal_server-2.16.6.dist-info → fractal_server-2.17.0.dist-info}/licenses/LICENSE +0 -0

fractal_server/runner/executors/slurm_common/base_slurm_runner.py CHANGED Viewed

@@ -9,17 +9,17 @@ from typing import Literal
 from pydantic import BaseModel
 from pydantic import ConfigDict
-from ..slurm_common._slurm_config import SlurmConfig
 from ..slurm_common.slurm_job_task_models import SlurmJob
 from ..slurm_common.slurm_job_task_models import SlurmTask
 from ._job_states import STATES_FINISHED
+from .slurm_config import SlurmConfig
 from fractal_server import __VERSION__
 from fractal_server.app.db import get_sync_db
 from fractal_server.app.models.v2 import AccountingRecordSlurm
 from fractal_server.app.schemas.v2 import HistoryUnitStatus
 from fractal_server.app.schemas.v2 import TaskType
-from fractal_server.config import get_settings
 from fractal_server.logger import set_logger
+from fractal_server.runner.config import JobRunnerConfigSLURM
 from fractal_server.runner.exceptions import JobExecutionError
 from fractal_server.runner.exceptions import TaskExecutionError
 from fractal_server.runner.executors.base_runner import BaseRunner
@@ -31,7 +31,6 @@ from fractal_server.runner.v2.db_tools import (
     bulk_update_status_of_history_unit,
 )
 from fractal_server.runner.v2.db_tools import update_status_of_history_unit
-from fractal_server.syringe import Inject
 SHUTDOWN_ERROR_MESSAGE = "Failed due to job-execution shutdown."
 SHUTDOWN_EXCEPTION = JobExecutionError(SHUTDOWN_ERROR_MESSAGE)
@@ -77,16 +76,18 @@ class BaseSlurmRunner(BaseRunner):
     python_worker_interpreter: str
     slurm_runner_type: Literal["ssh", "sudo"]
     slurm_account: str | None = None
+    shared_config: JobRunnerConfigSLURM
     def __init__(
         self,
+        *,
         root_dir_local: Path,
         root_dir_remote: Path,
         slurm_runner_type: Literal["ssh", "sudo"],
         python_worker_interpreter: str,
+        poll_interval: int,
         common_script_lines: list[str] | None = None,
-        user_cache_dir: str | None = None,
-        poll_interval: int | None = None,
+        user_cache_dir: str,
         slurm_account: str | None = None,
     ):
         self.slurm_runner_type = slurm_runner_type
@@ -98,11 +99,7 @@ class BaseSlurmRunner(BaseRunner):
         self.python_worker_interpreter = python_worker_interpreter
         self.slurm_account = slurm_account
-        settings = Inject(get_settings)
-        self.poll_interval = (
-            poll_interval or settings.FRACTAL_SLURM_POLL_INTERVAL
-        )
+        self.poll_interval = poll_interval
         self.poll_interval_internal = self.poll_interval / 10.0
         self.check_fractal_server_versions()
@@ -134,12 +131,10 @@ class BaseSlurmRunner(BaseRunner):
     def _run_remote_cmd(self, cmd: str) -> str:
         raise NotImplementedError("Implement in child class.")
-    def run_squeue(self, *, job_ids: list[str], **kwargs) -> str:
+    def run_squeue(self, *, job_ids: list[str]) -> str:
         raise NotImplementedError("Implement in child class.")
-    def _is_squeue_error_recoverable(
-        self, exception: BaseException
-    ) -> Literal[True]:
+    def _is_squeue_error_recoverable(self, exception: BaseException) -> bool:
         """
         Determine whether a `squeue` error is considered recoverable.
@@ -245,7 +240,7 @@ class BaseSlurmRunner(BaseRunner):
             A new, up-to-date, `SlurmConfig` object.
         """
-        new_slurm_config = slurm_config.model_copy()
+        new_slurm_config = slurm_config.model_copy(deep=True)
         # Include SLURM account in `slurm_config`.
         if self.slurm_account is not None:
@@ -257,7 +252,7 @@ class BaseSlurmRunner(BaseRunner):
                 f"Add {self.common_script_lines} to "
                 f"{new_slurm_config.extra_lines=}."
             )
-            current_extra_lines = new_slurm_config.extra_lines or []
+            current_extra_lines = new_slurm_config.extra_lines
             new_slurm_config.extra_lines = (
                 current_extra_lines + self.common_script_lines
             )
@@ -473,7 +468,7 @@ class BaseSlurmRunner(BaseRunner):
         *,
         task: SlurmTask,
         was_job_scancelled: bool = False,
-    ) -> tuple[Any, Exception]:
+    ) -> tuple[Any, Exception | None]:
         try:
             with open(task.output_file_local) as f:
                 output = json.load(f)
@@ -566,6 +561,10 @@ class BaseSlurmRunner(BaseRunner):
     def job_ids(self) -> list[str]:
         return list(self.jobs.keys())
+    @property
+    def job_ids_int(self) -> list[int]:
+        return list(map(int, self.jobs.keys()))
     def wait_and_check_shutdown(self) -> list[str]:
         """
         Wait at most `self.poll_interval`, while also checking for shutdown.
@@ -602,6 +601,7 @@ class BaseSlurmRunner(BaseRunner):
     def submit(
         self,
+        *,
         base_command: str,
         workflow_task_order: int,
         workflow_task_id: int,
@@ -612,7 +612,23 @@ class BaseSlurmRunner(BaseRunner):
         config: SlurmConfig,
         task_type: SubmitTaskType,
         user_id: int,
-    ) -> tuple[Any, Exception]:
+    ) -> tuple[Any, Exception | None]:
+        """
+        Run a single fractal task.
+        Args:
+            base_command:
+            workflow_task_order:
+            workflow_task_id:
+            task_name:
+            parameters: Dictionary of parameters.
+            history_unit_id:
+                Database ID of the corresponding `HistoryUnit` entry.
+            task_type: Task type.
+            task_files: `TaskFiles` object.
+            config: Runner-specific parameters.
+            user_id:
+        """
         logger.debug("[submit] START")
         # Always refresh `executor_error_log` before starting a task
@@ -687,7 +703,7 @@ class BaseSlurmRunner(BaseRunner):
             create_accounting_record_slurm(
                 user_id=user_id,
-                slurm_job_ids=self.job_ids,
+                slurm_job_ids=self.job_ids_int,
             )
             # Retrieval phase
@@ -757,11 +773,12 @@ class BaseSlurmRunner(BaseRunner):
     def multisubmit(
         self,
+        *,
         base_command: str,
         workflow_task_order: int,
         workflow_task_id: int,
         task_name: str,
-        list_parameters: list[dict],
+        list_parameters: list[dict[str, Any]],
         history_unit_ids: list[int],
         list_task_files: list[TaskFiles],
         task_type: MultisubmitTaskType,
@@ -769,9 +786,26 @@ class BaseSlurmRunner(BaseRunner):
         user_id: int,
     ) -> tuple[dict[int, Any], dict[int, BaseException]]:
         """
+        Run a parallel fractal task.
         Note: `list_parameters`, `list_task_files` and `history_unit_ids`
         have the same size. For parallel tasks, this is also the number of
         input images, while for compound tasks these can differ.
+        Args:
+            base_command:
+            workflow_task_order:
+            workflow_task_id:
+            task_name:
+            list_parameters:
+                List of dictionaries of parameters (each one must include
+                `zarr_urls` key).
+            history_unit_ids:
+                Database IDs of the corresponding `HistoryUnit` entries.
+            list_task_files: `TaskFiles` objects.
+            task_type: Task type.
+            config: Runner-specific parameters.
+            user_id:
         """
         # Always refresh `executor_error_log` before starting a task
@@ -779,6 +813,9 @@ class BaseSlurmRunner(BaseRunner):
         config = self._enrich_slurm_config(config)
+        results: dict[int, Any] = {}
+        exceptions: dict[int, BaseException] = {}
         logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
         try:
             if self.is_shutdown():
@@ -789,8 +826,8 @@ class BaseSlurmRunner(BaseRunner):
                             status=HistoryUnitStatus.FAILED,
                             db_sync=db,
                         )
-                results: dict[int, Any] = {}
-                exceptions: dict[int, BaseException] = {
+                results = {}
+                exceptions = {
                     ind: SHUTDOWN_EXCEPTION
                     for ind in range(len(list_parameters))
                 }
@@ -812,9 +849,6 @@ class BaseSlurmRunner(BaseRunner):
                 self._mkdir_local_folder(workdir_local.as_posix())
                 self._mkdir_remote_folder(folder=workdir_remote.as_posix())
-            results: dict[int, Any] = {}
-            exceptions: dict[int, BaseException] = {}
             # NOTE: chunking has already taken place in `get_slurm_config`,
             # so that `config.tasks_per_job` is now set.
@@ -889,7 +923,7 @@ class BaseSlurmRunner(BaseRunner):
             create_accounting_record_slurm(
                 user_id=user_id,
-                slurm_job_ids=self.job_ids,
+                slurm_job_ids=self.job_ids_int,
             )
         except Exception as e:

fractal_server/runner/executors/slurm_common/get_slurm_config.py CHANGED Viewed

@@ -1,50 +1,42 @@
-from pathlib import Path
 from typing import Literal
 from ._batching import heuristics
-from ._slurm_config import _parse_mem_value
-from ._slurm_config import load_slurm_config_file
-from ._slurm_config import logger
-from ._slurm_config import SlurmConfig
-from ._slurm_config import SlurmConfigError
+from .slurm_config import logger
+from .slurm_config import SlurmConfig
 from fractal_server.app.models.v2 import WorkflowTaskV2
+from fractal_server.runner.config import JobRunnerConfigSLURM
+from fractal_server.runner.config.slurm_mem_to_MB import slurm_mem_to_MB
+from fractal_server.runner.exceptions import SlurmConfigError
 from fractal_server.string_tools import interpret_as_bool
-def get_slurm_config_internal(
+def _get_slurm_config_internal(
+    shared_config: JobRunnerConfigSLURM,
     wftask: WorkflowTaskV2,
     which_type: Literal["non_parallel", "parallel"],
-    config_path: Path | None = None,
 ) -> SlurmConfig:
     """
-    Prepare a `SlurmConfig` configuration object
-    The argument `which_type` determines whether we use `wftask.meta_parallel`
-    or `wftask.meta_non_parallel`. In the following description, let us assume
-    that `which_type="parallel"`.
+    Prepare a specific `SlurmConfig` configuration.
-    The sources for `SlurmConfig` attributes, in increasing priority order, are
+    The base configuration is the runner-level `shared_config` object, based
+    on `resource.jobs_runner_config` (note that GPU-specific properties take
+    priority, when `needs_gpu=True`). We then incorporate attributes from
+    `wftask.meta_{non_parallel,parallel}` - with higher priority.
-    1. The general content of the Fractal SLURM configuration file.
-    2. The GPU-specific content of the Fractal SLURM configuration file, if
-        appropriate.
-    3. Properties in `wftask.meta_parallel` (which typically include those in
-       `wftask.task.meta_parallel`). Note that `wftask.meta_parallel` may be
-       `None`.
-    Arguments:
+    Args:
+        shared_config:
+            Configuration object based on `resource.jobs_runner_config`.
         wftask:
-            WorkflowTask for which the SLURM configuration is is to be
-            prepared.
-        config_path:
-            Path of a Fractal SLURM configuration file; if `None`, use
-            `FRACTAL_SLURM_CONFIG_FILE` variable from settings.
+            WorkflowTaskV2 for which the backend configuration should
+            be prepared.
         which_type:
-            Determines whether to use `meta_parallel` or `meta_non_parallel`.
+            Whether we should look at the non-parallel or parallel part
+            of `wftask`.
+        tot_tasks: Not used here, only present as a common interface.
     Returns:
-        slurm_config:
-            The SlurmConfig object
+        A ready-to-use `SlurmConfig` object.
     """
     if which_type == "non_parallel":
@@ -60,25 +52,19 @@ def get_slurm_config_internal(
         f"[get_slurm_config] WorkflowTask meta attribute: {wftask_meta=}"
     )
-    # Incorporate slurm_env.default_slurm_config
-    slurm_env = load_slurm_config_file(config_path=config_path)
-    slurm_dict = slurm_env.default_slurm_config.model_dump(
+    # Start from `shared_config`
+    slurm_dict = shared_config.default_slurm_config.model_dump(
         exclude_unset=True, exclude={"mem"}
     )
-    if slurm_env.default_slurm_config.mem:
-        slurm_dict["mem_per_task_MB"] = slurm_env.default_slurm_config.mem
+    if shared_config.default_slurm_config.mem:
+        slurm_dict["mem_per_task_MB"] = shared_config.default_slurm_config.mem
     # Incorporate slurm_env.batching_config
-    for key, value in slurm_env.batching_config.model_dump().items():
+    for key, value in shared_config.batching_config.model_dump().items():
         slurm_dict[key] = value
     # Incorporate slurm_env.user_local_exports
-    slurm_dict["user_local_exports"] = slurm_env.user_local_exports
-    logger.debug(
-        "[get_slurm_config] Fractal SLURM configuration file: "
-        f"{slurm_env.model_dump()=}"
-    )
+    slurm_dict["user_local_exports"] = shared_config.user_local_exports
     # GPU-related options
     # Notes about priority:
@@ -91,13 +77,13 @@ def get_slurm_config_internal(
     else:
         needs_gpu = False
     logger.debug(f"[get_slurm_config] {needs_gpu=}")
-    if needs_gpu:
-        for key, value in slurm_env.gpu_slurm_config.model_dump(
+    if needs_gpu and shared_config.gpu_slurm_config is not None:
+        for key, value in shared_config.gpu_slurm_config.model_dump(
             exclude_unset=True, exclude={"mem"}
         ).items():
             slurm_dict[key] = value
-        if slurm_env.gpu_slurm_config.mem:
-            slurm_dict["mem_per_task_MB"] = slurm_env.gpu_slurm_config.mem
+        if shared_config.gpu_slurm_config.mem:
+            slurm_dict["mem_per_task_MB"] = shared_config.gpu_slurm_config.mem
     # Number of CPUs per task, for multithreading
     if wftask_meta is not None and "cpus_per_task" in wftask_meta:
@@ -107,7 +93,7 @@ def get_slurm_config_internal(
     # Required memory per task, in MB
     if wftask_meta is not None and "mem" in wftask_meta:
         raw_mem = wftask_meta["mem"]
-        mem_per_task_MB = _parse_mem_value(raw_mem)
+        mem_per_task_MB = slurm_mem_to_MB(raw_mem)
         slurm_dict["mem_per_task_MB"] = mem_per_task_MB
     # Job name
@@ -144,8 +130,7 @@ def get_slurm_config_internal(
     extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
     if len(set(extra_lines)) != len(extra_lines):
         logger.debug(
-            "[get_slurm_config] Removing repeated elements from "
-            f"{extra_lines=}."
+            f"[get_slurm_config] Removing repeated elements in {extra_lines=}."
         )
         extra_lines = list(set(extra_lines))
     slurm_dict["extra_lines"] = extra_lines
@@ -164,8 +149,7 @@ def get_slurm_config_internal(
     # Put everything together
     logger.debug(
-        "[get_slurm_config] Now create a SlurmConfig object based on "
-        f"{slurm_dict=}"
+        f"[get_slurm_config] Create SlurmConfig object based on {slurm_dict=}"
     )
     slurm_config = SlurmConfig(**slurm_dict)
@@ -173,15 +157,15 @@ def get_slurm_config_internal(
 def get_slurm_config(
+    shared_config: JobRunnerConfigSLURM,
     wftask: WorkflowTaskV2,
     which_type: Literal["non_parallel", "parallel"],
-    config_path: Path | None = None,
     tot_tasks: int = 1,
 ) -> SlurmConfig:
-    config = get_slurm_config_internal(
-        wftask,
-        which_type,
-        config_path,
+    config = _get_slurm_config_internal(
+        shared_config=shared_config,
+        wftask=wftask,
+        which_type=which_type,
     )
     # Set/validate parameters for task batching

fractal_server/runner/executors/slurm_common/remote.py CHANGED Viewed

@@ -23,7 +23,7 @@ def worker(
     """
     Execute a job, possibly on a remote node.
-    Arguments:
+    Args:
         in_fname: Absolute path to the input file (must be readable).
         out_fname: Absolute path of the output file (must be writeable).
     """

fractal_server/runner/executors/slurm_common/slurm_config.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""
+Submodule to handle the SLURM configuration for a WorkflowTask
+"""
+from pathlib import Path
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from pydantic import Field
+from fractal_server.logger import set_logger
+logger = set_logger(__name__)
+class SlurmConfig(BaseModel):
+    """
+    Abstraction for SLURM parameters
+    **NOTE**: `SlurmConfig` objects are created internally in `fractal-server`,
+    and they are not meant to be initialized by the user; the same holds for
+    `SlurmConfig` attributes (e.g. `mem_per_task_MB`), which are not meant to
+    be part of the superuser-defined `resource.jobs_runner_config` JSON field.
+    Part of the attributes map directly to some of the SLURM attributes (see
+    https://slurm.schedmd.com/sbatch.html), e.g. `partition`. Other attributes
+    are metaparameters which are needed in fractal-server to combine multiple
+    tasks in the same SLURM job (e.g. `parallel_tasks_per_job` or
+    `max_num_jobs`).
+    Attributes:
+        partition: Corresponds to SLURM option.
+        cpus_per_task: Corresponds to SLURM option.
+        mem_per_task_MB: Corresponds to `mem` SLURM option.
+        job_name: Corresponds to `name` SLURM option.
+        constraint: Corresponds to SLURM option.
+        gres: Corresponds to SLURM option.
+        account: Corresponds to SLURM option.
+        gpus: Corresponds to SLURM option.
+        time: Corresponds to SLURM option (WARNING: not fully supported).
+        nodelist: Corresponds to SLURM option.
+        exclude: Corresponds to SLURM option.
+        prefix: Prefix of configuration lines in SLURM submission scripts.
+        shebang_line: Shebang line for SLURM submission scripts.
+        extra_lines: Additional lines to include in SLURM submission scripts.
+        tasks_per_job: Number of tasks for each SLURM job.
+        parallel_tasks_per_job: Number of tasks to run in parallel for
+                                each SLURM job.
+        target_cpus_per_job: Optimal number of CPUs to be requested in each
+                             SLURM job.
+        max_cpus_per_job: Maximum number of CPUs that can be requested in each
+                          SLURM job.
+        target_mem_per_job: Optimal amount of memory (in MB) to be requested in
+                            each SLURM job.
+        max_mem_per_job: Maximum amount of memory (in MB) that can be requested
+                         in each SLURM job.
+        target_num_jobs: Optimal number of SLURM jobs for a given WorkflowTask.
+        max_num_jobs: Maximum number of SLURM jobs for a given WorkflowTask.
+        user_local_exports:
+            Key-value pairs to be included as `export`-ed variables in SLURM
+            submission script, after prepending values with the user's cache
+            directory.
+    """
+    model_config = ConfigDict(extra="forbid")
+    # Required SLURM parameters (note that the integer attributes are those
+    # that will need to scale up with the number of parallel tasks per job)
+    partition: str
+    cpus_per_task: int
+    mem_per_task_MB: int
+    prefix: str = "#SBATCH"
+    shebang_line: str = "#!/bin/sh"
+    # Optional SLURM parameters
+    job_name: str | None = None
+    constraint: str | None = None
+    gres: str | None = None
+    gpus: str | None = None
+    time: str | None = None
+    account: str | None = None
+    nodelist: str | None = None
+    exclude: str | None = None
+    # Free-field attribute for extra lines to be added to the SLURM job
+    # preamble
+    extra_lines: list[str] = Field(default_factory=list)
+    # Variables that will be `export`ed in the SLURM submission script
+    user_local_exports: dict[str, str] = Field(default_factory=dict)
+    # Metaparameters needed to combine multiple tasks in each SLURM job
+    tasks_per_job: int | None = None
+    parallel_tasks_per_job: int | None = None
+    target_cpus_per_job: int
+    max_cpus_per_job: int
+    target_mem_per_job: int
+    max_mem_per_job: int
+    target_num_jobs: int
+    max_num_jobs: int
+    def _sorted_extra_lines(self) -> list[str]:
+        """
+        Return a copy of `self.extra_lines`, where lines starting with
+        `self.prefix` are listed first.
+        """
+        def _no_prefix(_line):
+            if _line.startswith(self.prefix):
+                return 0
+            else:
+                return 1
+        return sorted(self.extra_lines, key=_no_prefix)
+    def sort_script_lines(self, script_lines: list[str]) -> list[str]:
+        """
+        Return a copy of `script_lines`, where lines are sorted as in:
+        1. `self.shebang_line` (if present);
+        2. Lines starting with `self.prefix`;
+        3. Other lines.
+        Args:
+            script_lines:
+        """
+        def _sorting_function(_line):
+            if _line == self.shebang_line:
+                return 0
+            elif _line.startswith(self.prefix):
+                return 1
+            else:
+                return 2
+        return sorted(script_lines, key=_sorting_function)
+    def to_sbatch_preamble(
+        self,
+        remote_export_dir: str,
+    ) -> list[str]:
+        """
+        Compile `SlurmConfig` object into the preamble of a SLURM submission
+        script.
+        Args:
+            remote_export_dir:
+                Base directory for exports defined in
+                `self.user_local_exports`.
+        """
+        if self.parallel_tasks_per_job is None:
+            raise ValueError(
+                "SlurmConfig.sbatch_preamble requires that "
+                f"{self.parallel_tasks_per_job=} is not None."
+            )
+        if len(self.extra_lines) != len(set(self.extra_lines)):
+            raise ValueError(f"{self.extra_lines=} contains repetitions")
+        mem_per_job_MB = self.parallel_tasks_per_job * self.mem_per_task_MB
+        lines = [
+            self.shebang_line,
+            f"{self.prefix} --partition={self.partition}",
+            f"{self.prefix} --ntasks={self.parallel_tasks_per_job}",
+            f"{self.prefix} --cpus-per-task={self.cpus_per_task}",
+            f"{self.prefix} --mem={mem_per_job_MB}M",
+        ]
+        for key in [
+            "job_name",
+            "constraint",
+            "gres",
+            "gpus",
+            "time",
+            "account",
+            "exclude",
+            "nodelist",
+        ]:
+            value = getattr(self, key)
+            if value is not None:
+                # Handle the `time` parameter
+                if key == "time" and self.parallel_tasks_per_job > 1:
+                    # NOTE: see issue #1632
+                    logger.warning(
+                        f"`time` SLURM parameter is set to {self.time}, "
+                        "but this does not take into account the number of "
+                        f"SLURM tasks ({self.parallel_tasks_per_job})."
+                    )
+                option = key.replace("_", "-")
+                lines.append(f"{self.prefix} --{option}={value}")
+        for line in self._sorted_extra_lines():
+            lines.append(line)
+        if self.user_local_exports:
+            for key, value in self.user_local_exports.items():
+                tmp_value = str(Path(remote_export_dir) / value)
+                lines.append(f"export {key}={tmp_value}")
+        """
+        FIXME export SRUN_CPUS_PER_TASK
+        # From https://slurm.schedmd.com/sbatch.html: Beginning with 22.05,
+        # srun will not inherit the --cpus-per-task value requested by salloc
+        # or sbatch.  It must be requested again with the call to srun or set
+        # with the SRUN_CPUS_PER_TASK environment variable if desired for the
+        # task(s).
+        if config.cpus_per_task:
+            #additional_setup_lines.append(
+                f"export SRUN_CPUS_PER_TASK={config.cpus_per_task}"
+            )
+        """
+        return lines
+    @property
+    def batch_size(self) -> int:
+        return self.tasks_per_job

fractal_server/runner/executors/slurm_common/slurm_job_task_models.py CHANGED Viewed

@@ -112,7 +112,7 @@ class SlurmJob(BaseModel):
         return self.slurm_stderr_remote_path.as_posix()
     @property
-    def slurm_stdout_local_path(self) -> str:
+    def slurm_stdout_local_path(self) -> Path:
         return (
             self.workdir_local
             / f"{self.prefix}-slurm-{self.slurm_job_id_placeholder}.out"

fractal-server 2.16.6__py3-none-any.whl → 2.17.0__py3-none-any.whl

fractal-server 2.16.6py3-none-any.whl → 2.17.0py3-none-any.whl