PyPI - fractal-server - Versions diffs - 2.15.6__py3-none-any.whl → 2.15.8__py3-none-any.whl - Mend

fractal-server 2.15.6py3-none-any.whl → 2.15.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

fractal_server/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __VERSION__ = "2.15.6"
1	+ __VERSION__ = "2.15.8"

fractal_server/app/models/security.py CHANGED Viewed

@@ -73,10 +73,9 @@ class UserOAuth(SQLModel, table=True):
         is_active:
         is_superuser:
         is_verified:
-        slurm_user:
-        slurm_accounts:
         username:
         oauth_accounts:
+        settings:
     """
     __tablename__ = "user_oauth"

fractal_server/app/routes/admin/v2/job.py CHANGED Viewed

@@ -109,7 +109,7 @@ async def view_job(
 @router.get("/{job_id}/", response_model=JobReadV2)
 async def view_single_job(
-    job_id: int = None,
+    job_id: int,
     show_tmp_logs: bool = False,
     user: UserOAuth = Depends(current_active_superuser),
     db: AsyncSession = Depends(get_async_db),

fractal_server/app/routes/admin/v2/task_group_lifecycle.py CHANGED Viewed

@@ -25,7 +25,6 @@ from fractal_server.app.routes.aux.validate_user_settings import (
 from fractal_server.app.schemas.v2 import TaskGroupActivityActionV2
 from fractal_server.app.schemas.v2 import TaskGroupActivityStatusV2
 from fractal_server.app.schemas.v2 import TaskGroupActivityV2Read
-from fractal_server.app.schemas.v2 import TaskGroupReadV2
 from fractal_server.app.schemas.v2 import TaskGroupV2OriginEnum
 from fractal_server.config import get_settings
 from fractal_server.logger import set_logger
@@ -52,7 +51,7 @@ async def deactivate_task_group(
     response: Response,
     superuser: UserOAuth = Depends(current_active_superuser),
     db: AsyncSession = Depends(get_async_db),
-) -> TaskGroupReadV2:
+) -> TaskGroupActivityV2Read:
     """
     Deactivate task-group venv
     """
@@ -157,7 +156,7 @@ async def reactivate_task_group(
     response: Response,
     superuser: UserOAuth = Depends(current_active_superuser),
     db: AsyncSession = Depends(get_async_db),
-) -> TaskGroupReadV2:
+) -> TaskGroupActivityV2Read:
     """
     Deactivate task-group venv
     """

fractal_server/app/routes/api/v2/_aux_functions_history.py CHANGED Viewed

@@ -102,8 +102,9 @@ async def _verify_workflow_and_dataset_access(
     Verify user access to a dataset/workflow pair.
     Args:
+        project_id:
+        workflow_id:
         dataset_id:
-        workflow_task_id:
         user_id:
         db:
     """
@@ -148,7 +149,7 @@ async def get_wftask_check_owner(
     Args:
         project_id:
         dataset_id:
-        workflow_task_id:
+        workflowtask_id:
         user_id:
         db:
     """

fractal_server/app/routes/api/v2/_aux_functions_task_version_update.py CHANGED Viewed

@@ -6,7 +6,7 @@ def get_new_workflow_task_meta(
     old_workflow_task_meta: dict | None,
     old_task_meta: dict | None,
     new_task_meta: dict | None,
-) -> dict[str, Any]:
+) -> dict[str, Any] | None:
     """
     Prepare new meta field based on old/new tasks and old workflow task.
     """

fractal_server/app/routes/api/v2/_aux_functions_tasks.py CHANGED Viewed

@@ -231,11 +231,14 @@ async def _get_collection_task_group_activity_status_message(
     )
     task_group_activity_list = res.scalars().all()
     if len(task_group_activity_list) > 1:
-        msg = (
-            "\nWarning: "
+        msg_short = (
             "Expected only one TaskGroupActivityV2 associated to TaskGroup "
             f"{task_group_id}, found {len(task_group_activity_list)} "
             f"(IDs: {[tga.id for tga in task_group_activity_list]})."
+        )
+        logger.error(f"UnreachableBranchError: {msg_short}")
+        msg = (
+            f"\nWarning: {msg_short}\n"
             "Warning: this should have not happened, please contact an admin."
         )
     elif len(task_group_activity_list) == 1:
@@ -268,13 +271,16 @@ async def _verify_non_duplication_user_constraint(
     if duplicate:
         user = await db.get(UserOAuth, user_id)
         if len(duplicate) > 1:
+            error_msg = (
+                f"User '{user.email}' already owns {len(duplicate)} task "
+                f"groups with name='{pkg_name}' and {version=} "
+                f"(IDs: {[group.id for group in duplicate]})."
+            )
+            logger.error(f"UnreachableBranchError: {error_msg}")
             raise HTTPException(
                 status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
                 detail=(
-                    "Invalid state:\n"
-                    f"User '{user.email}' already owns {len(duplicate)} task "
-                    f"groups with name='{pkg_name}' and {version=} "
-                    f"(IDs: {[group.id for group in duplicate]}).\n"
+                    f"Invalid state: {error_msg}\n"
                     "This should have not happened: please contact an admin."
                 ),
             )
@@ -310,13 +316,16 @@ async def _verify_non_duplication_group_constraint(
     if duplicate:
         user_group = await db.get(UserGroup, user_group_id)
         if len(duplicate) > 1:
+            error_msg = (
+                f"UserGroup '{user_group.name}' already owns "
+                f"{len(duplicate)} task groups with name='{pkg_name}' and "
+                f"{version=} (IDs: {[group.id for group in duplicate]}).\n"
+            )
+            logger.error(error_msg)
             raise HTTPException(
                 status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
                 detail=(
-                    "Invalid state:\n"
-                    f"UserGroup '{user_group.name}' already owns "
-                    f"{len(duplicate)} task groups with name='{pkg_name}' and "
-                    f"{version=} (IDs: {[group.id for group in duplicate]}).\n"
+                    f"Invalid state:\n{error_msg}"
                     "This should have not happened: please contact an admin."
                 ),
             )

fractal_server/app/routes/api/v2/submit.py CHANGED Viewed

@@ -156,6 +156,28 @@ async def apply_workflow(
         if len(user_settings.slurm_accounts) > 0:
             job_create.slurm_account = user_settings.slurm_accounts[0]
+    # User appropriate FractalSSH object
+    if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
+        ssh_config = dict(
+            user=user_settings.ssh_username,
+            host=user_settings.ssh_host,
+            key_path=user_settings.ssh_private_key_path,
+        )
+        fractal_ssh_list = request.app.state.fractal_ssh_list
+        try:
+            fractal_ssh = fractal_ssh_list.get(**ssh_config)
+        except Exception as e:
+            logger.error(
+                "Could not get a valid SSH connection in the submit endpoint. "
+                f"Original error: '{str(e)}'."
+            )
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail="Error in setting up the SSH connection.",
+            )
+    else:
+        fractal_ssh = None
     # Add new Job object to DB
     job = JobV2(
         project_id=project_id,
@@ -219,18 +241,6 @@ async def apply_workflow(
     await db.merge(job)
     await db.commit()
-    # User appropriate FractalSSH object
-    if settings.FRACTAL_RUNNER_BACKEND == "slurm_ssh":
-        ssh_config = dict(
-            user=user_settings.ssh_username,
-            host=user_settings.ssh_host,
-            key_path=user_settings.ssh_private_key_path,
-        )
-        fractal_ssh_list = request.app.state.fractal_ssh_list
-        fractal_ssh = fractal_ssh_list.get(**ssh_config)
-    else:
-        fractal_ssh = None
     # Expunge user settings from db, to use in background task
     db.expunge(user_settings)

fractal_server/app/routes/api/v2/task.py CHANGED Viewed

@@ -69,7 +69,7 @@ async def get_list_task(
         stm = stm.where(TaskV2.authors.icontains(author))
     res = await db.execute(stm)
-    task_list = res.scalars().all()
+    task_list = list(res.scalars().all())
     await db.close()
     if args_schema is False:
         for task in task_list:

fractal_server/app/routes/api/v2/task_group_lifecycle.py CHANGED Viewed

@@ -49,7 +49,7 @@ async def deactivate_task_group(
     response: Response,
     user: UserOAuth = Depends(current_active_user),
     db: AsyncSession = Depends(get_async_db),
-) -> TaskGroupReadV2:
+) -> TaskGroupActivityV2Read:
     """
     Deactivate task-group venv
     """

fractal_server/app/runner/executors/base_runner.py CHANGED Viewed

@@ -91,14 +91,15 @@ class BaseRunner:
             workflow_task_order:
             workflow_task_id:
             task_name:
-            parameters:
-                Dictionary of parameters. Must include `zarr_urls` key.
+            list_parameters:
+                List of dictionaries of parameters (each one must include
+                `zarr_urls` key).
             history_unit_ids:
                 Database IDs of the corresponding `HistoryUnit` entries.
+            list_task_files: `TaskFiles` objects.
             task_type: Task type.
-            task_files: `TaskFiles` object.
             config: Runner-specific parameters.
-            user_id
+            user_id:
         """
         raise NotImplementedError()

fractal_server/app/runner/executors/slurm_common/_slurm_config.py CHANGED Viewed

@@ -66,7 +66,6 @@ class _SlurmConfigSet(BaseModel):
     time: str | None = None
     account: str | None = None
     extra_lines: list[str] | None = None
-    pre_submission_commands: list[str] | None = None
     gpus: str | None = None
@@ -213,7 +212,7 @@ class SlurmConfig(BaseModel):
     `SlurmConfig` attributes (e.g. `mem_per_task_MB`), which are not meant to
     be part of the `FRACTAL_SLURM_CONFIG_FILE` JSON file (details on the
     expected file content are defined in
-    [`SlurmConfigFile`](./#fractal_server.app.runner._slurm._slurm_config.SlurmConfigFile)).
+    [`SlurmConfigFile`](#fractal_server.app.runner._slurm._slurm_config.SlurmConfigFile)).
     Part of the attributes map directly to some of the SLURM attributes (see
     https://slurm.schedmd.com/sbatch.html), e.g. `partition`. Other attributes
@@ -253,8 +252,6 @@ class SlurmConfig(BaseModel):
             Key-value pairs to be included as `export`-ed variables in SLURM
             submission script, after prepending values with the user's cache
             directory.
-        pre_submission_commands: List of commands to be prepended to the sbatch
-            command.
     """
     model_config = ConfigDict(extra="forbid")
@@ -294,8 +291,6 @@ class SlurmConfig(BaseModel):
     target_num_jobs: int
     max_num_jobs: int
-    pre_submission_commands: list[str] = Field(default_factory=list)
     def _sorted_extra_lines(self) -> list[str]:
         """
         Return a copy of `self.extra_lines`, where lines starting with

fractal_server/app/runner/executors/slurm_common/base_slurm_runner.py CHANGED Viewed

@@ -137,7 +137,9 @@ class BaseSlurmRunner(BaseRunner):
     def run_squeue(self, *, job_ids: list[str], **kwargs) -> str:
         raise NotImplementedError("Implement in child class.")
-    def _is_squeue_error_recoverable(self, exception: BaseException) -> True:
+    def _is_squeue_error_recoverable(
+        self, exception: BaseException
+    ) -> Literal[True]:
         """
         Determine whether a `squeue` error is considered recoverable.
@@ -262,14 +264,25 @@ class BaseSlurmRunner(BaseRunner):
         return new_slurm_config
-    def _submit_single_sbatch(
+    def _prepare_single_slurm_job(
         self,
         *,
         base_command: str,
         slurm_job: SlurmJob,
         slurm_config: SlurmConfig,
     ) -> str:
-        logger.debug("[_submit_single_sbatch] START")
+        """
+        Prepare submission script locally.
+        Args:
+            base_command: Base of task executable command.
+            slurm_job: `SlurmJob` object
+            slurm_config: Configuration for SLURM job
+        Returns:
+            Command to submit the SLURM job.
+        """
+        logger.debug("[_prepare_single_slurm_job] START")
         for task in slurm_job.tasks:
             # Write input file
@@ -299,24 +312,10 @@ class BaseSlurmRunner(BaseRunner):
                 json.dump(task.parameters, f, indent=2)
             logger.debug(
-                "[_submit_single_sbatch] Written " f"{task.input_file_local=}"
+                "[_prepare_single_slurm_job] Written "
+                f"{task.input_file_local=}"
             )
-            if self.slurm_runner_type == "ssh":
-                # Send input file (only relevant for SSH)
-                self.fractal_ssh.send_file(
-                    local=task.input_file_local,
-                    remote=task.input_file_remote,
-                )
-                self.fractal_ssh.send_file(
-                    local=task.task_files.args_file_local,
-                    remote=task.task_files.args_file_remote,
-                )
-                logger.debug(
-                    "[_submit_single_sbatch] Transferred "
-                    f"{task.input_file_local=}"
-                )
         # Prepare commands to be included in SLURM submission script
         cmdlines = []
         for task in slurm_job.tasks:
@@ -353,7 +352,7 @@ class BaseSlurmRunner(BaseRunner):
             ]
         )
         script_lines = slurm_config.sort_script_lines(script_lines)
-        logger.debug(script_lines)
+        logger.debug(f"[_prepare_single_slurm_job] {script_lines=}")
         # Always print output of `uname -n` and `pwd`
         script_lines.append('\necho "Hostname: $(uname -n)"')
@@ -373,61 +372,64 @@ class BaseSlurmRunner(BaseRunner):
                 f"--mem={mem_per_task_MB}MB "
                 f"{cmd} &"
             )
-        script_lines.append("wait\n")
-        script = "\n".join(script_lines)
+        script_lines.append("wait\n\n")
         script_lines.append(
             'echo "End time:   $(date +"%Y-%m-%dT%H:%M:%S%z")"'
         )
+        script = "\n".join(script_lines)
         # Write submission script
         with open(slurm_job.slurm_submission_script_local, "w") as f:
             f.write(script)
         logger.debug(
-            "[_submit_single_sbatch] Written "
+            "[_prepare_single_slurm_job] Written "
             f"{slurm_job.slurm_submission_script_local=}"
         )
         if self.slurm_runner_type == "ssh":
-            self.fractal_ssh.send_file(
-                local=slurm_job.slurm_submission_script_local,
-                remote=slurm_job.slurm_submission_script_remote,
-            )
             submit_command = (
-                "sbatch --parsable "
-                f"{slurm_job.slurm_submission_script_remote}"
+                f"sbatch --parsable {slurm_job.slurm_submission_script_remote}"
             )
         else:
             submit_command = (
-                "sbatch --parsable "
-                f"{slurm_job.slurm_submission_script_local}"
+                f"sbatch --parsable {slurm_job.slurm_submission_script_local}"
             )
-        # Run sbatch
-        pre_submission_cmds = slurm_config.pre_submission_commands
-        if len(pre_submission_cmds) == 0:
-            logger.debug(f"Now run {submit_command=}")
-            sbatch_stdout = self._run_remote_cmd(submit_command)
-        else:
-            logger.debug(f"Now using {pre_submission_cmds=}")
-            script_lines = pre_submission_cmds + [submit_command]
-            wrapper_script_contents = "\n".join(script_lines)
-            wrapper_script_contents = f"{wrapper_script_contents}\n"
-            if self.slurm_runner_type == "ssh":
-                wrapper_script = (
-                    f"{slurm_job.slurm_submission_script_remote}_wrapper.sh"
-                )
-                self.fractal_ssh.write_remote_file(
-                    path=wrapper_script, content=wrapper_script_contents
-                )
-            else:
-                wrapper_script = (
-                    f"{slurm_job.slurm_submission_script_local}_wrapper.sh"
-                )
-                with open(wrapper_script, "w") as f:
-                    f.write(wrapper_script_contents)
-            logger.debug(f"Now run {wrapper_script=}")
-            sbatch_stdout = self._run_remote_cmd(f"bash {wrapper_script}")
+        logger.debug("[_prepare_single_slurm_job] END")
+        return submit_command
+    def _send_many_job_inputs(
+        self, *, workdir_local: Path, workdir_remote: Path
+    ) -> None:
+        """
+        Placeholder method.
+        This method is intentionally left unimplemented in the base class.
+        Subclasses must override it to provide the logic for transferring
+        input data.
+        """
+        pass
+    def _submit_single_sbatch(
+        self,
+        *,
+        submit_command: str,
+        slurm_job: SlurmJob,
+    ) -> None:
+        """
+        Run `sbatch` and add the `slurm_job` to `self.jobs`.
+        Args:
+            submit_command:
+                The SLURM submission command prepared in
+                `self._prepare_single_slurm_job`.
+            slurm_job: The `SlurmJob` object.
+        """
+        logger.debug("[_submit_single_sbatch] START")
         # Submit SLURM job and retrieve job ID
+        logger.debug(f"[_submit_single_sbatch] Now run {submit_command=}")
+        sbatch_stdout = self._run_remote_cmd(submit_command)
         logger.info(f"[_submit_single_sbatch] {sbatch_stdout=}")
         stdout = sbatch_stdout.strip("\n")
         submitted_job_id = int(stdout)
@@ -623,11 +625,19 @@ class BaseSlurmRunner(BaseRunner):
             )
             config.parallel_tasks_per_job = 1
-            self._submit_single_sbatch(
+            submit_command = self._prepare_single_slurm_job(
                 base_command=base_command,
                 slurm_job=slurm_job,
                 slurm_config=config,
             )
+            self._send_many_job_inputs(
+                workdir_local=workdir_local,
+                workdir_remote=workdir_remote,
+            )
+            self._submit_single_sbatch(
+                submit_command=submit_command,
+                slurm_job=slurm_job,
+            )
             logger.debug(f"[submit] END submission phase, {self.job_ids=}")
             create_accounting_record_slurm(
@@ -726,8 +736,8 @@ class BaseSlurmRunner(BaseRunner):
                             status=HistoryUnitStatus.FAILED,
                             db_sync=db,
                         )
-                results = {}
-                exceptions = {
+                results: dict[int, Any] = {}
+                exceptions: dict[int, BaseException] = {
                     ind: SHUTDOWN_EXCEPTION
                     for ind in range(len(list_parameters))
                 }
@@ -801,13 +811,25 @@ class BaseSlurmRunner(BaseRunner):
                     )
                 )
-            # NOTE: see issue 2431
-            logger.debug("[multisubmit] Transfer files and submit jobs.")
+            submit_commands = []
             for slurm_job in jobs_to_submit:
+                submit_commands.append(
+                    self._prepare_single_slurm_job(
+                        base_command=base_command,
+                        slurm_job=slurm_job,
+                        slurm_config=config,
+                    )
+                )
+            self._send_many_job_inputs(
+                workdir_local=workdir_local,
+                workdir_remote=workdir_remote,
+            )
+            for slurm_job, submit_command in zip(
+                jobs_to_submit, submit_commands
+            ):
                 self._submit_single_sbatch(
-                    base_command=base_command,
+                    submit_command=submit_command,
                     slurm_job=slurm_job,
-                    slurm_config=config,
                 )
             logger.info(f"[multisubmit] END submission phase, {self.job_ids=}")
@@ -830,8 +852,10 @@ class BaseSlurmRunner(BaseRunner):
                         status=HistoryUnitStatus.FAILED,
                         db_sync=db,
                     )
-            results = {}
-            exceptions = {ind: e for ind in range(len(list_parameters))}
+            results: dict[int, Any] = {}
+            exceptions: dict[int, BaseException] = {
+                ind: e for ind in range(len(list_parameters))
+            }
             return results, exceptions
         # Retrieval phase

fractal_server/app/runner/executors/slurm_ssh/runner.py CHANGED Viewed

@@ -166,12 +166,69 @@ class SlurmSSHRunner(BaseSlurmRunner):
         stdout = self.fractal_ssh.run_command(cmd=cmd)
         return stdout
+    def _send_many_job_inputs(
+        self, *, workdir_local: Path, workdir_remote: Path
+    ) -> None:
+        """
+        Compress, transfer, and extract a local working directory onto a remote
+        host.
+        This method creates a temporary `.tar.gz` archive of the given
+        `workdir_local`, transfers it to the remote machine via the configured
+        SSH connection, extracts it into `workdir_remote`, and removes the
+        temporary archive from both local and remote filesystems.
+        """
+        logger.debug("[_send_many_job_inputs] START")
+        tar_path_local = workdir_local.with_suffix(".tar.gz")
+        tar_name = Path(tar_path_local).name
+        tar_path_remote = workdir_remote.parent / tar_name
+        tar_compression_cmd = get_tar_compression_cmd(
+            subfolder_path=workdir_local, filelist_path=None
+        )
+        _, tar_extraction_cmd = get_tar_extraction_cmd(
+            archive_path=tar_path_remote
+        )
+        rm_tar_cmd = f"rm {tar_path_remote.as_posix()}"
+        try:
+            run_subprocess(tar_compression_cmd, logger_name=logger.name)
+            logger.debug(
+                "[_send_many_job_inputs] "
+                f"{workdir_local=} compressed to {tar_path_local=}."
+            )
+            self.fractal_ssh.send_file(
+                local=tar_path_local.as_posix(),
+                remote=tar_path_remote.as_posix(),
+            )
+            logger.debug(
+                "[_send_many_job_inputs] "
+                f"{tar_path_local=} sent via SSH to {tar_path_remote=}."
+            )
+            self.fractal_ssh.run_command(cmd=tar_extraction_cmd)
+            logger.debug(
+                "[_send_many_job_inputs] "
+                f"{tar_path_remote=} extracted to {workdir_remote=}."
+            )
+            self.fractal_ssh.run_command(cmd=rm_tar_cmd)
+            logger.debug(
+                "[_send_many_job_inputs] "
+                f"{tar_path_remote=} removed from remote server."
+            )
+        except Exception as e:
+            raise e
+        finally:
+            Path(tar_path_local).unlink(missing_ok=True)
+            logger.debug(f"[_send_many_job_inputs] {tar_path_local=} removed.")
+        logger.debug("[_send_many_job_inputs] END.")
     def run_squeue(
         self,
         *,
         job_ids: list[str],
-        base_interval: float = 2.0,
-        max_attempts: int = 7,
     ) -> str:
         """
         Run `squeue` for a set of SLURM job IDs.
@@ -205,8 +262,6 @@ class SlurmSSHRunner(BaseSlurmRunner):
         try:
             stdout = self.fractal_ssh.run_command(
                 cmd=cmd,
-                base_interval=base_interval,
-                max_attempts=max_attempts,
             )
             return stdout
         except FractalSSHCommandError as e:

fractal_server/app/runner/executors/slurm_ssh/tar_commands.py CHANGED Viewed

@@ -7,7 +7,7 @@ from pathlib import Path
 def get_tar_compression_cmd(
     subfolder_path: Path,
     filelist_path: Path | None,
-) -> tuple[str, str]:
+) -> str:
     """
     Prepare command to compress e.g. `/path/dir` into `/path/dir.tar.gz`.
@@ -17,7 +17,6 @@ def get_tar_compression_cmd(
     Args:
         subfolder_path: Absolute path to the folder to compress.
         filelist_path: If set, to be used in the `--files-from` option.
-        expected_tarfile: If set, it should match to the returned one.
     Returns:
         tar command

fractal_server/app/runner/executors/slurm_sudo/runner.py CHANGED Viewed

@@ -167,7 +167,7 @@ class SudoSlurmRunner(BaseSlurmRunner):
         )
         return res.stdout
-    def run_squeue(self, job_ids: list[str]) -> str:
+    def run_squeue(self, *, job_ids: list[str]) -> str:
         """
         Run `squeue` for a set of SLURM job IDs.
         """

fractal_server/app/runner/v2/runner.py CHANGED Viewed

@@ -47,7 +47,7 @@ def _remove_status_from_attributes(
     Drop attribute `IMAGE_STATUS_KEY` from all images.
     """
     images_copy = deepcopy(images)
-    [img["attributes"].pop(IMAGE_STATUS_KEY) for img in images_copy]
+    [img["attributes"].pop(IMAGE_STATUS_KEY, None) for img in images_copy]
     return images_copy

fractal-server 2.15.6__py3-none-any.whl → 2.15.8__py3-none-any.whl

fractal-server 2.15.6py3-none-any.whl → 2.15.8py3-none-any.whl