PyPI - fractal-server - Versions diffs - 1.4.6__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

fractal-server 1.4.6py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

fractal_server/__init__.py +1 -1
fractal_server/app/db/__init__.py +0 -1
fractal_server/app/models/__init__.py +6 -8
fractal_server/app/models/linkuserproject.py +9 -0
fractal_server/app/models/security.py +6 -0
fractal_server/app/models/v1/__init__.py +12 -0
fractal_server/app/models/{dataset.py → v1/dataset.py} +5 -5
fractal_server/app/models/{job.py → v1/job.py} +5 -5
fractal_server/app/models/{project.py → v1/project.py} +5 -5
fractal_server/app/models/{state.py → v1/state.py} +2 -2
fractal_server/app/models/{task.py → v1/task.py} +7 -2
fractal_server/app/models/{workflow.py → v1/workflow.py} +5 -5
fractal_server/app/models/v2/__init__.py +22 -0
fractal_server/app/models/v2/collection_state.py +21 -0
fractal_server/app/models/v2/dataset.py +54 -0
fractal_server/app/models/v2/job.py +51 -0
fractal_server/app/models/v2/project.py +30 -0
fractal_server/app/models/v2/task.py +93 -0
fractal_server/app/models/v2/workflow.py +35 -0
fractal_server/app/models/v2/workflowtask.py +49 -0
fractal_server/app/routes/admin/__init__.py +0 -0
fractal_server/app/routes/{admin.py → admin/v1.py} +42 -42
fractal_server/app/routes/admin/v2.py +309 -0
fractal_server/app/routes/api/v1/__init__.py +7 -7
fractal_server/app/routes/api/v1/_aux_functions.py +8 -8
fractal_server/app/routes/api/v1/dataset.py +48 -41
fractal_server/app/routes/api/v1/job.py +14 -14
fractal_server/app/routes/api/v1/project.py +30 -27
fractal_server/app/routes/api/v1/task.py +26 -16
fractal_server/app/routes/api/v1/task_collection.py +28 -16
fractal_server/app/routes/api/v1/workflow.py +28 -28
fractal_server/app/routes/api/v1/workflowtask.py +11 -11
fractal_server/app/routes/api/v2/__init__.py +34 -0
fractal_server/app/routes/api/v2/_aux_functions.py +502 -0
fractal_server/app/routes/api/v2/dataset.py +293 -0
fractal_server/app/routes/api/v2/images.py +279 -0
fractal_server/app/routes/api/v2/job.py +200 -0
fractal_server/app/routes/api/v2/project.py +186 -0
fractal_server/app/routes/api/v2/status.py +150 -0
fractal_server/app/routes/api/v2/submit.py +210 -0
fractal_server/app/routes/api/v2/task.py +222 -0
fractal_server/app/routes/api/v2/task_collection.py +239 -0
fractal_server/app/routes/api/v2/task_legacy.py +59 -0
fractal_server/app/routes/api/v2/workflow.py +380 -0
fractal_server/app/routes/api/v2/workflowtask.py +265 -0
fractal_server/app/routes/aux/_job.py +2 -2
fractal_server/app/runner/__init__.py +0 -379
fractal_server/app/runner/async_wrap.py +27 -0
fractal_server/app/runner/components.py +5 -0
fractal_server/app/runner/exceptions.py +129 -0
fractal_server/app/runner/executors/__init__.py +0 -0
fractal_server/app/runner/executors/slurm/__init__.py +3 -0
fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py +1 -1
fractal_server/app/runner/executors/slurm/_check_jobs_status.py +72 -0
fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py +3 -4
fractal_server/app/runner/{_slurm → executors/slurm}/_slurm_config.py +3 -152
fractal_server/app/runner/{_slurm → executors/slurm}/_subprocess_run_as_user.py +42 -1
fractal_server/app/runner/{_slurm → executors/slurm}/executor.py +46 -27
fractal_server/app/runner/filenames.py +6 -0
fractal_server/app/runner/set_start_and_last_task_index.py +39 -0
fractal_server/app/runner/task_files.py +103 -0
fractal_server/app/runner/v1/__init__.py +366 -0
fractal_server/app/runner/{_common.py → v1/_common.py} +56 -111
fractal_server/app/runner/{_local → v1/_local}/__init__.py +5 -4
fractal_server/app/runner/{_local → v1/_local}/_local_config.py +6 -7
fractal_server/app/runner/{_local → v1/_local}/_submit_setup.py +1 -5
fractal_server/app/runner/v1/_slurm/__init__.py +312 -0
fractal_server/app/runner/{_slurm → v1/_slurm}/_submit_setup.py +5 -11
fractal_server/app/runner/v1/_slurm/get_slurm_config.py +163 -0
fractal_server/app/runner/v1/common.py +117 -0
fractal_server/app/runner/{handle_failed_job.py → v1/handle_failed_job.py} +8 -8
fractal_server/app/runner/v2/__init__.py +336 -0
fractal_server/app/runner/v2/_local/__init__.py +162 -0
fractal_server/app/runner/v2/_local/_local_config.py +118 -0
fractal_server/app/runner/v2/_local/_submit_setup.py +52 -0
fractal_server/app/runner/v2/_local/executor.py +100 -0
fractal_server/app/runner/{_slurm → v2/_slurm}/__init__.py +38 -47
fractal_server/app/runner/v2/_slurm/_submit_setup.py +82 -0
fractal_server/app/runner/v2/_slurm/get_slurm_config.py +182 -0
fractal_server/app/runner/v2/deduplicate_list.py +23 -0
fractal_server/app/runner/v2/handle_failed_job.py +165 -0
fractal_server/app/runner/v2/merge_outputs.py +38 -0
fractal_server/app/runner/v2/runner.py +343 -0
fractal_server/app/runner/v2/runner_functions.py +374 -0
fractal_server/app/runner/v2/runner_functions_low_level.py +130 -0
fractal_server/app/runner/v2/task_interface.py +62 -0
fractal_server/app/runner/v2/v1_compat.py +31 -0
fractal_server/app/schemas/__init__.py +1 -42
fractal_server/app/schemas/_validators.py +28 -5
fractal_server/app/schemas/v1/__init__.py +36 -0
fractal_server/app/schemas/{applyworkflow.py → v1/applyworkflow.py} +18 -18
fractal_server/app/schemas/{dataset.py → v1/dataset.py} +30 -30
fractal_server/app/schemas/{dumps.py → v1/dumps.py} +8 -8
fractal_server/app/schemas/{manifest.py → v1/manifest.py} +5 -5
fractal_server/app/schemas/{project.py → v1/project.py} +9 -9
fractal_server/app/schemas/{task.py → v1/task.py} +12 -12
fractal_server/app/schemas/{task_collection.py → v1/task_collection.py} +7 -7
fractal_server/app/schemas/{workflow.py → v1/workflow.py} +38 -38
fractal_server/app/schemas/v2/__init__.py +37 -0
fractal_server/app/schemas/v2/dataset.py +126 -0
fractal_server/app/schemas/v2/dumps.py +87 -0
fractal_server/app/schemas/v2/job.py +114 -0
fractal_server/app/schemas/v2/manifest.py +159 -0
fractal_server/app/schemas/v2/project.py +34 -0
fractal_server/app/schemas/v2/status.py +16 -0
fractal_server/app/schemas/v2/task.py +151 -0
fractal_server/app/schemas/v2/task_collection.py +109 -0
fractal_server/app/schemas/v2/workflow.py +79 -0
fractal_server/app/schemas/v2/workflowtask.py +208 -0
fractal_server/config.py +13 -10
fractal_server/images/__init__.py +4 -0
fractal_server/images/models.py +136 -0
fractal_server/images/tools.py +84 -0
fractal_server/main.py +11 -3
fractal_server/migrations/env.py +0 -2
fractal_server/migrations/versions/5bf02391cfef_v2.py +245 -0
fractal_server/tasks/__init__.py +0 -5
fractal_server/tasks/endpoint_operations.py +13 -19
fractal_server/tasks/utils.py +35 -0
fractal_server/tasks/{_TaskCollectPip.py → v1/_TaskCollectPip.py} +3 -3
fractal_server/tasks/v1/__init__.py +0 -0
fractal_server/tasks/{background_operations.py → v1/background_operations.py} +20 -52
fractal_server/tasks/v1/get_collection_data.py +14 -0
fractal_server/tasks/v2/_TaskCollectPip.py +103 -0
fractal_server/tasks/v2/__init__.py +0 -0
fractal_server/tasks/v2/background_operations.py +381 -0
fractal_server/tasks/v2/get_collection_data.py +14 -0
fractal_server/urls.py +13 -0
{fractal_server-1.4.6.dist-info → fractal_server-2.0.0.dist-info}/METADATA +11 -12
fractal_server-2.0.0.dist-info/RECORD +169 -0
fractal_server/app/runner/_slurm/.gitignore +0 -2
fractal_server/app/runner/common.py +0 -307
fractal_server/app/schemas/json_schemas/manifest.json +0 -81
fractal_server-1.4.6.dist-info/RECORD +0 -97
/fractal_server/app/runner/{_slurm → executors/slurm}/remote.py +0 -0
/fractal_server/app/runner/{_local → v1/_local}/executor.py +0 -0
{fractal_server-1.4.6.dist-info → fractal_server-2.0.0.dist-info}/LICENSE +0 -0
{fractal_server-1.4.6.dist-info → fractal_server-2.0.0.dist-info}/WHEEL +0 -0
{fractal_server-1.4.6.dist-info → fractal_server-2.0.0.dist-info}/entry_points.txt +0 -0

fractal_server/app/routes/aux/_job.py CHANGED Viewed

@@ -3,8 +3,8 @@ from pathlib import Path
 from zipfile import ZIP_DEFLATED
 from zipfile import ZipFile
-from ...models import ApplyWorkflow
-from ...runner._common import SHUTDOWN_FILENAME
+from ...models.v1 import ApplyWorkflow
+from ...runner.filenames import SHUTDOWN_FILENAME
 def _write_shutdown_file(*, job: ApplyWorkflow):

fractal_server/app/runner/__init__.py CHANGED Viewed

@@ -1,379 +0,0 @@
-# Copyright 2022 (C) Friedrich Miescher Institute for Biomedical Research and
-# University of Zurich
-#
-# Original authors:
-# Jacopo Nespolo <jacopo.nespolo@exact-lab.it>
-# Tommaso Comparin <tommaso.comparin@exact-lab.it>
-# Marco Franzon <marco.franzon@exact-lab.it>
-#
-# This file is part of Fractal and was originally developed by eXact lab S.r.l.
-# <exact-lab.it> under contract with Liberali Lab from the Friedrich Miescher
-# Institute for Biomedical Research and Pelkmans Lab from the University of
-# Zurich.
-"""
-Runner backend subsystem root
-This module is the single entry point to the runner backend subsystem. Other
-subystems should only import this module and not its submodules or the
-individual backends.
-"""
-import os
-import traceback
-from pathlib import Path
-from typing import Optional
-from ... import __VERSION__
-from ...config import get_settings
-from ...logger import set_logger
-from ...syringe import Inject
-from ...utils import get_timestamp
-from ..db import DB
-from ..models import ApplyWorkflow
-from ..models import Dataset
-from ..models import Workflow
-from ..models import WorkflowTask
-from ..schemas import JobStatusType
-from ._common import WORKFLOW_LOG_FILENAME
-from ._local import process_workflow as local_process_workflow
-from .common import close_job_logger
-from .common import JobExecutionError
-from .common import TaskExecutionError
-from .common import validate_workflow_compatibility  # noqa: F401
-from .handle_failed_job import assemble_history_failed_job
-from .handle_failed_job import assemble_meta_failed_job
-_backends = {}
-_backend_errors: dict[str, Exception] = {}
-_backends["local"] = local_process_workflow
-try:
-    from ._slurm import process_workflow as slurm_process_workflow
-    _backends["slurm"] = slurm_process_workflow
-except ModuleNotFoundError as e:
-    _backend_errors["slurm"] = e
-def get_process_workflow():
-    settings = Inject(get_settings)
-    try:
-        process_workflow = _backends[settings.FRACTAL_RUNNER_BACKEND]
-    except KeyError:
-        raise _backend_errors.get(
-            settings.FRACTAL_RUNNER_BACKEND,
-            RuntimeError(
-                "Unknown error during collection of backend "
-                f"`{settings.FRACTAL_RUNNER_BACKEND}`"
-            ),
-        )
-    return process_workflow
-async def submit_workflow(
-    *,
-    workflow_id: int,
-    input_dataset_id: int,
-    output_dataset_id: int,
-    job_id: int,
-    worker_init: Optional[str] = None,
-    slurm_user: Optional[str] = None,
-    user_cache_dir: Optional[str] = None,
-) -> None:
-    """
-    Prepares a workflow and applies it to a dataset
-    This function wraps the process_workflow one, which is different for each
-    backend (e.g. local or slurm backend).
-    Args:
-        workflow_id:
-            ID of the workflow being applied
-        input_dataset_id:
-            Input dataset ID
-        output_dataset_id:
-            ID of the destination dataset of the workflow.
-        job_id:
-            Id of the job record which stores the state for the current
-            workflow application.
-        worker_init:
-            Custom executor parameters that get parsed before the execution of
-            each task.
-        user_cache_dir:
-            Cache directory (namely a path where the user can write); for the
-            slurm backend, this is used as a base directory for
-            `job.working_dir_user`.
-        slurm_user:
-            The username to impersonate for the workflow execution, for the
-            slurm backend.
-    """
-    with next(DB.get_sync_db()) as db_sync:
-        job: ApplyWorkflow = db_sync.get(ApplyWorkflow, job_id)
-        if not job:
-            raise ValueError(f"Cannot fetch job {job_id} from database")
-        input_dataset: Dataset = db_sync.get(Dataset, input_dataset_id)
-        output_dataset: Dataset = db_sync.get(Dataset, output_dataset_id)
-        workflow: Workflow = db_sync.get(Workflow, workflow_id)
-        if not (input_dataset and output_dataset and workflow):
-            log_msg = ""
-            if not input_dataset:
-                log_msg += (
-                    f"Cannot fetch input_dataset {input_dataset_id} "
-                    "from database\n"
-                )
-            if not output_dataset:
-                log_msg += (
-                    f"Cannot fetch output_dataset {output_dataset_id} "
-                    "from database\n"
-                )
-            if not workflow:
-                log_msg += (
-                    f"Cannot fetch workflow {workflow_id} from database\n"
-                )
-            job.status = JobStatusType.FAILED
-            job.end_timestamp = get_timestamp()
-            job.log = log_msg
-            db_sync.merge(job)
-            db_sync.commit()
-            db_sync.close()
-            return
-        # Select backend
-        settings = Inject(get_settings)
-        FRACTAL_RUNNER_BACKEND = settings.FRACTAL_RUNNER_BACKEND
-        process_workflow = get_process_workflow()
-        # Prepare some of process_workflow arguments
-        input_paths = input_dataset.paths
-        output_path = output_dataset.paths[0]
-        # Define and create server-side working folder
-        project_id = workflow.project_id
-        timestamp_string = get_timestamp().strftime("%Y%m%d_%H%M%S")
-        WORKFLOW_DIR = (
-            settings.FRACTAL_RUNNER_WORKING_BASE_DIR
-            / (
-                f"proj_{project_id:07d}_wf_{workflow_id:07d}_job_{job_id:07d}"
-                f"_{timestamp_string}"
-            )
-        ).resolve()
-        if WORKFLOW_DIR.exists():
-            raise RuntimeError(f"Workflow dir {WORKFLOW_DIR} already exists.")
-        # Create WORKFLOW_DIR with 755 permissions
-        original_umask = os.umask(0)
-        WORKFLOW_DIR.mkdir(parents=True, mode=0o755)
-        os.umask(original_umask)
-        # Define and create user-side working folder, if needed
-        if FRACTAL_RUNNER_BACKEND == "local":
-            WORKFLOW_DIR_USER = WORKFLOW_DIR
-        elif FRACTAL_RUNNER_BACKEND == "slurm":
-            from ._slurm._subprocess_run_as_user import _mkdir_as_user
-            WORKFLOW_DIR_USER = (
-                Path(user_cache_dir) / f"{WORKFLOW_DIR.name}"
-            ).resolve()
-            _mkdir_as_user(folder=str(WORKFLOW_DIR_USER), user=slurm_user)
-        else:
-            raise ValueError(f"{FRACTAL_RUNNER_BACKEND=} not supported")
-        # Update db
-        job.working_dir = WORKFLOW_DIR.as_posix()
-        job.working_dir_user = WORKFLOW_DIR_USER.as_posix()
-        db_sync.merge(job)
-        db_sync.commit()
-        # After Session.commit() is called, either explicitly or when using a
-        # context manager, all objects associated with the Session are expired.
-        # https://docs.sqlalchemy.org/en/14/orm/
-        #   session_basics.html#opening-and-closing-a-session
-        # https://docs.sqlalchemy.org/en/14/orm/
-        #   session_state_management.html#refreshing-expiring
-        # See issue #928:
-        # https://github.com/fractal-analytics-platform/
-        #   fractal-server/issues/928
-        db_sync.refresh(input_dataset)
-        db_sync.refresh(output_dataset)
-        db_sync.refresh(workflow)
-        # Write logs
-        logger_name = f"WF{workflow_id}_job{job_id}"
-        log_file_path = WORKFLOW_DIR / WORKFLOW_LOG_FILENAME
-        logger = set_logger(
-            logger_name=logger_name,
-            log_file_path=log_file_path,
-        )
-        logger.info(
-            f'Start execution of workflow "{workflow.name}"; '
-            f"more logs at {str(log_file_path)}"
-        )
-        logger.debug(f"fractal_server.__VERSION__: {__VERSION__}")
-        logger.debug(f"FRACTAL_RUNNER_BACKEND: {FRACTAL_RUNNER_BACKEND}")
-        logger.debug(f"slurm_user: {slurm_user}")
-        logger.debug(f"slurm_account: {job.slurm_account}")
-        logger.debug(f"worker_init: {worker_init}")
-        logger.debug(f"input metadata: {input_dataset.meta}")
-        logger.debug(f"input_paths: {input_paths}")
-        logger.debug(f"output_path: {output_path}")
-        logger.debug(f"job.id: {job.id}")
-        logger.debug(f"job.working_dir: {job.working_dir}")
-        logger.debug(f"job.working_dir_user: {job.working_dir_user}")
-        logger.debug(f"job.first_task_index: {job.first_task_index}")
-        logger.debug(f"job.last_task_index: {job.last_task_index}")
-        logger.debug(f'START workflow "{workflow.name}"')
-    try:
-        # "The Session.close() method does not prevent the Session from being
-        # used again. The Session itself does not actually have a distinct
-        # “closed” state; it merely means the Session will release all database
-        # connections and ORM objects."
-        # (https://docs.sqlalchemy.org/en/20/orm/session_api.html#sqlalchemy.orm.Session.close).
-        #
-        # We close the session before the (possibly long) process_workflow
-        # call, to make sure all DB connections are released. The reason why we
-        # are not using a context manager within the try block is that we also
-        # need access to db_sync in the except branches.
-        db_sync = next(DB.get_sync_db())
-        db_sync.close()
-        output_dataset_meta_hist = await process_workflow(
-            workflow=workflow,
-            input_paths=input_paths,
-            output_path=output_path,
-            input_metadata=input_dataset.meta,
-            input_history=input_dataset.history,
-            slurm_user=slurm_user,
-            slurm_account=job.slurm_account,
-            user_cache_dir=user_cache_dir,
-            workflow_dir=WORKFLOW_DIR,
-            workflow_dir_user=WORKFLOW_DIR_USER,
-            logger_name=logger_name,
-            worker_init=worker_init,
-            first_task_index=job.first_task_index,
-            last_task_index=job.last_task_index,
-        )
-        logger.info(
-            f'End execution of workflow "{workflow.name}"; '
-            f"more logs at {str(log_file_path)}"
-        )
-        logger.debug(f'END workflow "{workflow.name}"')
-        # Replace output_dataset.meta and output_dataset.history with their
-        # up-to-date versions, obtained within process_workflow
-        output_dataset.history = output_dataset_meta_hist.pop("history")
-        output_dataset.meta = output_dataset_meta_hist.pop("metadata")
-        db_sync.merge(output_dataset)
-        # Update job DB entry
-        job.status = JobStatusType.DONE
-        job.end_timestamp = get_timestamp()
-        with log_file_path.open("r") as f:
-            logs = f.read()
-        job.log = logs
-        db_sync.merge(job)
-        close_job_logger(logger)
-        db_sync.commit()
-    except TaskExecutionError as e:
-        logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
-        logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
-        # Assemble output_dataset.meta based on the last successful task, i.e.
-        # based on METADATA_FILENAME
-        output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
-        # Assemble new history and assign it to output_dataset.meta
-        failed_wftask = db_sync.get(WorkflowTask, e.workflow_task_id)
-        output_dataset.history = assemble_history_failed_job(
-            job,
-            output_dataset,
-            workflow,
-            logger,
-            failed_wftask=failed_wftask,
-        )
-        db_sync.merge(output_dataset)
-        job.status = JobStatusType.FAILED
-        job.end_timestamp = get_timestamp()
-        exception_args_string = "\n".join(e.args)
-        job.log = (
-            f"TASK ERROR: "
-            f"Task name: {e.task_name}, "
-            f"position in Workflow: {e.workflow_task_order=}\n"
-            f"TRACEBACK:\n{exception_args_string}"
-        )
-        db_sync.merge(job)
-        close_job_logger(logger)
-        db_sync.commit()
-    except JobExecutionError as e:
-        logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
-        logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
-        # Assemble output_dataset.meta based on the last successful task, i.e.
-        # based on METADATA_FILENAME
-        output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
-        # Assemble new history and assign it to output_dataset.meta
-        output_dataset.history = assemble_history_failed_job(
-            job,
-            output_dataset,
-            workflow,
-            logger,
-        )
-        db_sync.merge(output_dataset)
-        job.status = JobStatusType.FAILED
-        job.end_timestamp = get_timestamp()
-        error = e.assemble_error()
-        job.log = f"JOB ERROR in Fractal job {job.id}:\nTRACEBACK:\n{error}"
-        db_sync.merge(job)
-        close_job_logger(logger)
-        db_sync.commit()
-    except Exception:
-        logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
-        logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
-        current_traceback = traceback.format_exc()
-        # Assemble output_dataset.meta based on the last successful task, i.e.
-        # based on METADATA_FILENAME
-        output_dataset.meta = assemble_meta_failed_job(job, output_dataset)
-        # Assemble new history and assign it to output_dataset.meta
-        output_dataset.history = assemble_history_failed_job(
-            job,
-            output_dataset,
-            workflow,
-            logger,
-        )
-        db_sync.merge(output_dataset)
-        job.status = JobStatusType.FAILED
-        job.end_timestamp = get_timestamp()
-        job.log = (
-            f"UNKNOWN ERROR in Fractal job {job.id}\n"
-            f"TRACEBACK:\n{current_traceback}"
-        )
-        db_sync.merge(job)
-        close_job_logger(logger)
-        db_sync.commit()
-    finally:
-        db_sync.close()

fractal_server/app/runner/async_wrap.py ADDED Viewed

@@ -0,0 +1,27 @@
+import asyncio
+from functools import partial
+from functools import wraps
+from typing import Callable
+def async_wrap(func: Callable) -> Callable:
+    """
+    Wrap a synchronous callable in an async task
+    Ref: [issue #140](https://github.com/fractal-analytics-platform/fractal-server/issues/140)
+    and [this StackOverflow answer](https://stackoverflow.com/q/43241221/19085332).
+    Returns:
+        async_wrapper:
+            A factory that allows wrapping a blocking callable within a
+            coroutine.
+    """  # noqa: E501
+    @wraps(func)
+    async def async_wrapper(*args, loop=None, executor=None, **kwargs):
+        if loop is None:
+            loop = asyncio.get_event_loop()
+        pfunc = partial(func, *args, **kwargs)
+        return await loop.run_in_executor(executor, pfunc)
+    return async_wrapper

fractal_server/app/runner/components.py ADDED Viewed

@@ -0,0 +1,5 @@
+def _index_to_component(ind: int) -> str:
+    return f"{ind:07d}"
+_COMPONENT_KEY_ = "__FRACTAL_PARALLEL_COMPONENT__"

fractal_server/app/runner/exceptions.py ADDED Viewed

@@ -0,0 +1,129 @@
+import os
+from typing import Optional
+class TaskExecutionError(RuntimeError):
+    """
+    Forwards errors occurred during the execution of a task
+    This error wraps and forwards errors occurred during the execution of
+    tasks, when the exit code is larger than 0 (i.e. the error took place
+    within the task). This error also adds information that is useful to track
+    down and debug the failing task within a workflow.
+    Attributes:
+        workflow_task_id:
+            ID of the workflow task that failed.
+        workflow_task_order:
+            Order of the task within the workflow.
+        task_name:
+            Human readable name of the failing task.
+    """
+    workflow_task_id: Optional[int] = None
+    workflow_task_order: Optional[int] = None
+    task_name: Optional[str] = None
+    def __init__(
+        self,
+        *args,
+        workflow_task_id: Optional[int] = None,
+        workflow_task_order: Optional[int] = None,
+        task_name: Optional[str] = None,
+    ):
+        super().__init__(*args)
+        self.workflow_task_id = workflow_task_id
+        self.workflow_task_order = workflow_task_order
+        self.task_name = task_name
+class JobExecutionError(RuntimeError):
+    """
+    Forwards errors in the execution of a task that are due to external factors
+    This error wraps and forwards errors occurred during the execution of
+    tasks, but related to external factors like:
+    1. A negative exit code (e.g. because the task received a TERM or KILL
+       signal);
+    2. An error on the executor side (e.g. the SLURM executor could not
+       find the pickled file with task output).
+    This error also adds information that is useful to track down and debug the
+    failing task within a workflow.
+    Attributes:
+        info:
+            A free field for additional information
+        cmd_file:
+            Path to the file of the command that was executed (e.g. a SLURM
+            submission script).
+        stdout_file:
+            Path to the file with the command stdout
+        stderr_file:
+            Path to the file with the command stderr
+    """
+    cmd_file: Optional[str] = None
+    stdout_file: Optional[str] = None
+    stderr_file: Optional[str] = None
+    info: Optional[str] = None
+    def __init__(
+        self,
+        *args,
+        cmd_file: Optional[str] = None,
+        stdout_file: Optional[str] = None,
+        stderr_file: Optional[str] = None,
+        info: Optional[str] = None,
+    ):
+        super().__init__(*args)
+        self.cmd_file = cmd_file
+        self.stdout_file = stdout_file
+        self.stderr_file = stderr_file
+        self.info = info
+    def _read_file(self, filepath: str) -> str:
+        """
+        Return the content of a text file, and handle the cases where it is
+        empty or missing
+        """
+        if os.path.exists(filepath):
+            with open(filepath, "r") as f:
+                content = f.read()
+                if content:
+                    return f"Content of {filepath}:\n{content}"
+                else:
+                    return f"File {filepath} is empty\n"
+        else:
+            return f"File {filepath} is missing\n"
+    def assemble_error(self) -> str:
+        """
+        Read the files that are specified in attributes, and combine them in an
+        error message.
+        """
+        if self.cmd_file:
+            content = self._read_file(self.cmd_file)
+            cmd_content = f"COMMAND:\n{content}\n\n"
+        else:
+            cmd_content = ""
+        if self.stdout_file:
+            content = self._read_file(self.stdout_file)
+            out_content = f"STDOUT:\n{content}\n\n"
+        else:
+            out_content = ""
+        if self.stderr_file:
+            content = self._read_file(self.stderr_file)
+            err_content = f"STDERR:\n{content}\n\n"
+        else:
+            err_content = ""
+        content = f"{cmd_content}{out_content}{err_content}"
+        if self.info:
+            content = f"{content}ADDITIONAL INFO:\n{self.info}\n\n"
+        if not content:
+            content = str(self)
+        message = f"JobExecutionError\n\n{content}"
+        return message

fractal_server/app/runner/executors/__init__.py ADDED Viewed

File without changes

fractal_server/app/runner/executors/slurm/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .executor import SlurmExecutor
+__all__ = ["SlurmExecutor"]

fractal_server/app/runner/{_slurm → executors/slurm}/_batching.py RENAMED Viewed

@@ -14,7 +14,7 @@ Submodule to determine the number of total/parallel tasks per SLURM job.
 import math
 from typing import Optional
-from ....logger import set_logger
+from .....logger import set_logger
 logger = set_logger(__name__)

fractal_server/app/runner/executors/slurm/_check_jobs_status.py ADDED Viewed

@@ -0,0 +1,72 @@
+from subprocess import run  # nosec
+from cfut.slurm import STATES_FINISHED
+from .....logger import set_logger
+logger = set_logger(__name__)
+def run_squeue(job_ids):
+    res = run(  # nosec
+        [
+            "squeue",
+            "--noheader",
+            "--format=%i %T",
+            "--jobs",
+            ",".join([str(j) for j in job_ids]),
+            "--states=all",
+        ],
+        capture_output=True,
+        encoding="utf-8",
+        check=False,
+    )
+    if res.returncode != 0:
+        logger.warning(
+            f"squeue command with {job_ids}"
+            f" failed with:\n{res.stderr=}\n{res.stdout=}"
+        )
+    return res
+def _jobs_finished(job_ids) -> set[str]:
+    """
+    Check which ones of the given Slurm jobs already finished
+    The function is based on the `_jobs_finished` function from
+    clusterfutures (version 0.5).
+    Original Copyright: 2022 Adrian Sampson
+    (released under the MIT licence)
+    """
+    # If there is no Slurm job to check, return right away
+    if not job_ids:
+        return set()
+    id_to_state = dict()
+    res = run_squeue(job_ids)
+    if res.returncode == 0:
+        id_to_state = {
+            out.split()[0]: out.split()[1] for out in res.stdout.splitlines()
+        }
+    else:
+        id_to_state = dict()
+        for j in job_ids:
+            res = run_squeue([j])
+            if res.returncode != 0:
+                logger.info(f"Job {j} not found. Marked it as completed")
+                id_to_state.update({str(j): "COMPLETED"})
+            else:
+                id_to_state.update(
+                    {res.stdout.split()[0]: res.stdout.split()[1]}
+                )
+    # Finished jobs only stay in squeue for a few mins (configurable). If
+    # a job ID isn't there, we'll assume it's finished.
+    return {
+        j
+        for j in job_ids
+        if id_to_state.get(j, "COMPLETED") in STATES_FINISHED
+    }

fractal_server/app/runner/{_slurm → executors/slurm}/_executor_wait_thread.py RENAMED Viewed

@@ -6,10 +6,9 @@ from typing import Callable
 from typing import Optional
 from cfut import FileWaitThread
-from cfut import slurm
-from ....logger import set_logger
+from .....logger import set_logger
+from ._check_jobs_status import _jobs_finished
 logger = set_logger(__name__)
@@ -121,7 +120,7 @@ class FractalSlurmWaitThread(FractalFileWaitThread):
         super().check(i)
         if i % (self.slurm_poll_interval // self.interval) == 0:
             try:
-                finished_jobs = slurm.jobs_finished(self.waiting.values())
+                finished_jobs = _jobs_finished(self.waiting.values())
             except Exception:
                 # Don't abandon completion checking if jobs_finished errors
                 traceback.print_exc()

fractal-server 1.4.6__py3-none-any.whl → 2.0.0__py3-none-any.whl

fractal-server 1.4.6py3-none-any.whl → 2.0.0py3-none-any.whl