PyPI - fractal-server - Versions diffs - 2.10.6__py3-none-any.whl → 2.11.0__py3-none-any.whl - Mend

fractal-server 2.10.6py3-none-any.whl → 2.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

fractal_server/__init__.py +1 -1
fractal_server/app/models/v2/dataset.py +9 -6
fractal_server/app/models/v2/job.py +5 -0
fractal_server/app/models/v2/workflowtask.py +5 -8
fractal_server/app/routes/api/v1/dataset.py +2 -2
fractal_server/app/routes/api/v2/_aux_functions.py +3 -10
fractal_server/app/routes/api/v2/_aux_functions_tasks.py +21 -0
fractal_server/app/routes/api/v2/images.py +30 -7
fractal_server/app/routes/api/v2/job.py +14 -1
fractal_server/app/routes/api/v2/status.py +20 -20
fractal_server/app/routes/api/v2/submit.py +11 -4
fractal_server/app/routes/api/v2/workflow.py +95 -0
fractal_server/app/routes/api/v2/workflow_import.py +8 -0
fractal_server/app/routes/api/v2/workflowtask.py +45 -26
fractal_server/app/runner/{async_wrap.py → async_wrap_v1.py} +1 -1
fractal_server/app/runner/executors/slurm/_slurm_config.py +1 -1
fractal_server/app/runner/executors/slurm/ssh/executor.py +2 -2
fractal_server/app/runner/filenames.py +2 -4
fractal_server/app/runner/v1/_common.py +4 -4
fractal_server/app/runner/v1/_local/__init__.py +2 -2
fractal_server/app/runner/v1/_slurm/__init__.py +2 -2
fractal_server/app/runner/v1/handle_failed_job.py +4 -4
fractal_server/app/runner/v2/__init__.py +12 -66
fractal_server/app/runner/v2/_local/__init__.py +17 -47
fractal_server/app/runner/v2/_local_experimental/__init__.py +27 -61
fractal_server/app/runner/v2/_slurm_ssh/__init__.py +26 -65
fractal_server/app/runner/v2/_slurm_sudo/__init__.py +24 -66
fractal_server/app/runner/v2/handle_failed_job.py +31 -130
fractal_server/app/runner/v2/merge_outputs.py +6 -17
fractal_server/app/runner/v2/runner.py +51 -89
fractal_server/app/runner/v2/task_interface.py +0 -2
fractal_server/app/schemas/_filter_validators.py +43 -0
fractal_server/app/schemas/_validators.py +13 -2
fractal_server/app/schemas/v2/dataset.py +85 -12
fractal_server/app/schemas/v2/dumps.py +6 -8
fractal_server/app/schemas/v2/job.py +14 -0
fractal_server/app/schemas/v2/task.py +9 -9
fractal_server/app/schemas/v2/task_group.py +2 -2
fractal_server/app/schemas/v2/workflowtask.py +69 -20
fractal_server/data_migrations/2_11_0.py +168 -0
fractal_server/images/__init__.py +0 -1
fractal_server/images/models.py +12 -35
fractal_server/images/tools.py +53 -14
fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py +96 -0
fractal_server/utils.py +9 -7
{fractal_server-2.10.6.dist-info → fractal_server-2.11.0.dist-info}/METADATA +1 -1
{fractal_server-2.10.6.dist-info → fractal_server-2.11.0.dist-info}/RECORD +50 -47
{fractal_server-2.10.6.dist-info → fractal_server-2.11.0.dist-info}/LICENSE +0 -0
{fractal_server-2.10.6.dist-info → fractal_server-2.11.0.dist-info}/WHEEL +0 -0
{fractal_server-2.10.6.dist-info → fractal_server-2.11.0.dist-info}/entry_points.txt +0 -0

fractal_server/app/runner/v2/_local_experimental/__init__.py CHANGED Viewed

@@ -4,60 +4,16 @@ from typing import Optional
 from ....models.v2 import DatasetV2
 from ....models.v2 import WorkflowV2
-from ...async_wrap import async_wrap
 from ...exceptions import JobExecutionError
 from ...filenames import SHUTDOWN_FILENAME
 from ...set_start_and_last_task_index import set_start_and_last_task_index
 from ..runner import execute_tasks_v2
 from ._submit_setup import _local_submit_setup
 from .executor import FractalProcessPoolExecutor
+from fractal_server.images.models import AttributeFiltersType
-def _process_workflow(
-    *,
-    workflow: WorkflowV2,
-    dataset: DatasetV2,
-    logger_name: str,
-    workflow_dir_local: Path,
-    first_task_index: int,
-    last_task_index: int,
-) -> dict:
-    """
-    Internal processing routine
-    Schedules the workflow using a `FractalProcessPoolExecutor`.
-    Cf.
-    [process_workflow][fractal_server.app.runner.v2._local_experimental.process_workflow]
-    for the call signature.
-    """
-    with FractalProcessPoolExecutor(
-        shutdown_file=workflow_dir_local / SHUTDOWN_FILENAME
-    ) as executor:
-        try:
-            new_dataset_attributes = execute_tasks_v2(
-                wf_task_list=workflow.task_list[
-                    first_task_index : (last_task_index + 1)  # noqa
-                ],
-                dataset=dataset,
-                executor=executor,
-                workflow_dir_local=workflow_dir_local,
-                workflow_dir_remote=workflow_dir_local,
-                logger_name=logger_name,
-                submit_setup_call=_local_submit_setup,
-            )
-        except BrokenProcessPool as e:
-            raise JobExecutionError(
-                info=(
-                    "Job failed with BrokenProcessPool error, likely due to "
-                    f"an executor shutdown.\nOriginal error:\n{e.args[0]}"
-                )
-            )
-    return new_dataset_attributes
-async def process_workflow(
+def process_workflow(
     *,
     workflow: WorkflowV2,
     dataset: DatasetV2,
@@ -66,12 +22,13 @@ async def process_workflow(
     first_task_index: Optional[int] = None,
     last_task_index: Optional[int] = None,
     logger_name: str,
+    job_attribute_filters: AttributeFiltersType,
     # Slurm-specific
     user_cache_dir: Optional[str] = None,
     slurm_user: Optional[str] = None,
     slurm_account: Optional[str] = None,
     worker_init: Optional[str] = None,
-) -> dict:
+) -> None:
     """
     Run a workflow
@@ -123,11 +80,6 @@ async def process_workflow(
                             (positive exit codes).
         JobExecutionError: wrapper for errors raised by the tasks' executors
                            (negative exit codes).
-    Returns:
-        output_dataset_metadata:
-            The updated metadata for the dataset, as returned by the last task
-            of the workflow
     """
     if workflow_dir_remote and (workflow_dir_remote != workflow_dir_local):
@@ -144,12 +96,26 @@ async def process_workflow(
         last_task_index=last_task_index,
     )
-    new_dataset_attributes = await async_wrap(_process_workflow)(
-        workflow=workflow,
-        dataset=dataset,
-        logger_name=logger_name,
-        workflow_dir_local=workflow_dir_local,
-        first_task_index=first_task_index,
-        last_task_index=last_task_index,
-    )
-    return new_dataset_attributes
+    with FractalProcessPoolExecutor(
+        shutdown_file=workflow_dir_local / SHUTDOWN_FILENAME
+    ) as executor:
+        try:
+            execute_tasks_v2(
+                wf_task_list=workflow.task_list[
+                    first_task_index : (last_task_index + 1)
+                ],
+                dataset=dataset,
+                executor=executor,
+                workflow_dir_local=workflow_dir_local,
+                workflow_dir_remote=workflow_dir_local,
+                logger_name=logger_name,
+                submit_setup_call=_local_submit_setup,
+                job_attribute_filters=job_attribute_filters,
+            )
+        except BrokenProcessPool as e:
+            raise JobExecutionError(
+                info=(
+                    "Job failed with BrokenProcessPool error, likely due to "
+                    f"an executor shutdown.\nOriginal error:\n{e.args[0]}"
+                )
+            )

fractal_server/app/runner/v2/_slurm_ssh/__init__.py CHANGED Viewed

@@ -17,48 +17,51 @@ This backend runs fractal workflows in a SLURM cluster using Clusterfutures
 Executor objects.
 """
 from pathlib import Path
-from typing import Any
 from typing import Optional
-from typing import Union
 from .....ssh._fabric import FractalSSH
 from ....models.v2 import DatasetV2
 from ....models.v2 import WorkflowV2
-from ...async_wrap import async_wrap
 from ...exceptions import JobExecutionError
 from ...executors.slurm.ssh.executor import FractalSlurmSSHExecutor
 from ...set_start_and_last_task_index import set_start_and_last_task_index
 from ..runner import execute_tasks_v2
 from ._submit_setup import _slurm_submit_setup
+from fractal_server.images.models import AttributeFiltersType
 from fractal_server.logger import set_logger
 logger = set_logger(__name__)
-def _process_workflow(
+def process_workflow(
     *,
     workflow: WorkflowV2,
     dataset: DatasetV2,
-    logger_name: str,
     workflow_dir_local: Path,
-    workflow_dir_remote: Path,
-    first_task_index: int,
-    last_task_index: int,
+    workflow_dir_remote: Optional[Path] = None,
+    first_task_index: Optional[int] = None,
+    last_task_index: Optional[int] = None,
+    logger_name: str,
+    job_attribute_filters: AttributeFiltersType,
     fractal_ssh: FractalSSH,
-    worker_init: Optional[Union[str, list[str]]] = None,
-) -> dict[str, Any]:
+    worker_init: Optional[str] = None,
+    # Not used
+    user_cache_dir: Optional[str] = None,
+    slurm_user: Optional[str] = None,
+    slurm_account: Optional[str] = None,
+) -> None:
     """
-    Internal processing routine for the SLURM backend
-    This function initialises the a FractalSlurmExecutor, setting logging,
-    workflow working dir and user to impersonate. It then schedules the
-    workflow tasks and returns the new dataset attributes
-    Returns:
-        new_dataset_attributes:
+    Process workflow (SLURM backend public interface)
     """
+    # Set values of first_task_index and last_task_index
+    num_tasks = len(workflow.task_list)
+    first_task_index, last_task_index = set_start_and_last_task_index(
+        num_tasks,
+        first_task_index=first_task_index,
+        last_task_index=last_task_index,
+    )
     if isinstance(worker_init, str):
         worker_init = worker_init.split("\n")
@@ -80,57 +83,15 @@ def _process_workflow(
         workflow_dir_remote=workflow_dir_remote,
         common_script_lines=worker_init,
     ) as executor:
-        new_dataset_attributes = execute_tasks_v2(
+        execute_tasks_v2(
             wf_task_list=workflow.task_list[
-                first_task_index : (last_task_index + 1)  # noqa
-            ],  # noqa
+                first_task_index : (last_task_index + 1)
+            ],
             dataset=dataset,
             executor=executor,
             workflow_dir_local=workflow_dir_local,
             workflow_dir_remote=workflow_dir_remote,
             logger_name=logger_name,
             submit_setup_call=_slurm_submit_setup,
+            job_attribute_filters=job_attribute_filters,
         )
-    return new_dataset_attributes
-async def process_workflow(
-    *,
-    workflow: WorkflowV2,
-    dataset: DatasetV2,
-    workflow_dir_local: Path,
-    workflow_dir_remote: Optional[Path] = None,
-    first_task_index: Optional[int] = None,
-    last_task_index: Optional[int] = None,
-    logger_name: str,
-    # Not used
-    fractal_ssh: FractalSSH,
-    user_cache_dir: Optional[str] = None,
-    slurm_user: Optional[str] = None,
-    slurm_account: Optional[str] = None,
-    worker_init: Optional[str] = None,
-) -> dict:
-    """
-    Process workflow (SLURM backend public interface)
-    """
-    # Set values of first_task_index and last_task_index
-    num_tasks = len(workflow.task_list)
-    first_task_index, last_task_index = set_start_and_last_task_index(
-        num_tasks,
-        first_task_index=first_task_index,
-        last_task_index=last_task_index,
-    )
-    new_dataset_attributes = await async_wrap(_process_workflow)(
-        workflow=workflow,
-        dataset=dataset,
-        logger_name=logger_name,
-        workflow_dir_local=workflow_dir_local,
-        workflow_dir_remote=workflow_dir_remote,
-        first_task_index=first_task_index,
-        last_task_index=last_task_index,
-        worker_init=worker_init,
-        fractal_ssh=fractal_ssh,
-    )
-    return new_dataset_attributes

fractal_server/app/runner/v2/_slurm_sudo/__init__.py CHANGED Viewed

@@ -17,44 +17,45 @@ This backend runs fractal workflows in a SLURM cluster using Clusterfutures
 Executor objects.
 """
 from pathlib import Path
-from typing import Any
 from typing import Optional
-from typing import Union
 from ....models.v2 import DatasetV2
 from ....models.v2 import WorkflowV2
-from ...async_wrap import async_wrap
 from ...executors.slurm.sudo.executor import FractalSlurmExecutor
 from ...set_start_and_last_task_index import set_start_and_last_task_index
 from ..runner import execute_tasks_v2
 from ._submit_setup import _slurm_submit_setup
+from fractal_server.images.models import AttributeFiltersType
-def _process_workflow(
+def process_workflow(
     *,
     workflow: WorkflowV2,
     dataset: DatasetV2,
-    logger_name: str,
     workflow_dir_local: Path,
-    workflow_dir_remote: Path,
-    first_task_index: int,
-    last_task_index: int,
+    workflow_dir_remote: Optional[Path] = None,
+    first_task_index: Optional[int] = None,
+    last_task_index: Optional[int] = None,
+    logger_name: str,
+    job_attribute_filters: AttributeFiltersType,
+    # Slurm-specific
+    user_cache_dir: Optional[str] = None,
     slurm_user: Optional[str] = None,
     slurm_account: Optional[str] = None,
-    user_cache_dir: str,
-    worker_init: Optional[Union[str, list[str]]] = None,
-) -> dict[str, Any]:
+    worker_init: Optional[str] = None,
+) -> None:
     """
-    Internal processing routine for the SLURM backend
-    This function initialises the a FractalSlurmExecutor, setting logging,
-    workflow working dir and user to impersonate. It then schedules the
-    workflow tasks and returns the new dataset attributes
-    Returns:
-        new_dataset_attributes:
+    Process workflow (SLURM backend public interface).
     """
+    # Set values of first_task_index and last_task_index
+    num_tasks = len(workflow.task_list)
+    first_task_index, last_task_index = set_start_and_last_task_index(
+        num_tasks,
+        first_task_index=first_task_index,
+        last_task_index=last_task_index,
+    )
     if not slurm_user:
         raise RuntimeError(
             "slurm_user argument is required, for slurm backend"
@@ -73,58 +74,15 @@ def _process_workflow(
         common_script_lines=worker_init,
         slurm_account=slurm_account,
     ) as executor:
-        new_dataset_attributes = execute_tasks_v2(
+        execute_tasks_v2(
             wf_task_list=workflow.task_list[
-                first_task_index : (last_task_index + 1)  # noqa
-            ],  # noqa
+                first_task_index : (last_task_index + 1)
+            ],
             dataset=dataset,
             executor=executor,
             workflow_dir_local=workflow_dir_local,
             workflow_dir_remote=workflow_dir_remote,
             logger_name=logger_name,
             submit_setup_call=_slurm_submit_setup,
+            job_attribute_filters=job_attribute_filters,
         )
-    return new_dataset_attributes
-async def process_workflow(
-    *,
-    workflow: WorkflowV2,
-    dataset: DatasetV2,
-    workflow_dir_local: Path,
-    workflow_dir_remote: Optional[Path] = None,
-    first_task_index: Optional[int] = None,
-    last_task_index: Optional[int] = None,
-    logger_name: str,
-    # Slurm-specific
-    user_cache_dir: Optional[str] = None,
-    slurm_user: Optional[str] = None,
-    slurm_account: Optional[str] = None,
-    worker_init: Optional[str] = None,
-) -> dict:
-    """
-    Process workflow (SLURM backend public interface).
-    """
-    # Set values of first_task_index and last_task_index
-    num_tasks = len(workflow.task_list)
-    first_task_index, last_task_index = set_start_and_last_task_index(
-        num_tasks,
-        first_task_index=first_task_index,
-        last_task_index=last_task_index,
-    )
-    new_dataset_attributes = await async_wrap(_process_workflow)(
-        workflow=workflow,
-        dataset=dataset,
-        logger_name=logger_name,
-        workflow_dir_local=workflow_dir_local,
-        workflow_dir_remote=workflow_dir_remote,
-        first_task_index=first_task_index,
-        last_task_index=last_task_index,
-        user_cache_dir=user_cache_dir,
-        slurm_user=slurm_user,
-        slurm_account=slurm_account,
-        worker_init=worker_init,
-    )
-    return new_dataset_attributes

fractal_server/app/runner/v2/handle_failed_job.py CHANGED Viewed

@@ -12,147 +12,48 @@
 """
 Helper functions to handle Dataset history.
 """
-import json
 import logging
-from pathlib import Path
-from typing import Any
-from typing import Optional
+from sqlalchemy.orm.attributes import flag_modified
 from ...models.v2 import DatasetV2
-from ...models.v2 import JobV2
-from ...models.v2 import WorkflowTaskV2
-from ...models.v2 import WorkflowV2
 from ...schemas.v2 import WorkflowTaskStatusTypeV2
-from ..filenames import FILTERS_FILENAME
-from ..filenames import HISTORY_FILENAME
-from ..filenames import IMAGES_FILENAME
+from fractal_server.app.db import get_sync_db
-def assemble_history_failed_job(
-    job: JobV2,
-    dataset: DatasetV2,
-    workflow: WorkflowV2,
-    logger_name: Optional[str] = None,
-    failed_wftask: Optional[WorkflowTaskV2] = None,
-) -> list[dict[str, Any]]:
+def mark_last_wftask_as_failed(
+    dataset_id: int,
+    logger_name: str,
+) -> None:
     """
-    Assemble `history` after a workflow-execution job fails.
+    Edit dataset history, by marking last item as failed.
     Args:
-        job:
-            The failed `JobV2` object.
-        dataset:
-            The `DatasetV2` object associated to `job`.
-        workflow:
-            The `WorkflowV2` object associated to `job`.
+        dataset: The `DatasetV2` object
         logger_name: A logger name.
-        failed_wftask:
-            If set, append it to `history` during step 3; if `None`, infer
-            it by comparing the job task list and the one in
-            `HISTORY_FILENAME`.
-    Returns:
-        The new value of `history`, to be merged into
-        `dataset.meta`.
     """
     logger = logging.getLogger(logger_name)
-    # The final value of the history attribute should include up to three
-    # parts, coming from: the database, the temporary file, the failed-task
-    # information.
-    # Part 1: Read exising history from DB
-    new_history = dataset.history
-    # Part 2: Extend history based on temporary-file contents
-    tmp_history_file = Path(job.working_dir) / HISTORY_FILENAME
-    try:
-        with tmp_history_file.open("r") as f:
-            tmp_file_history = json.load(f)
-            new_history.extend(tmp_file_history)
-    except FileNotFoundError:
-        tmp_file_history = []
-    # Part 3/A: Identify failed task, if needed
-    if failed_wftask is None:
-        job_wftasks = workflow.task_list[
-            job.first_task_index : (job.last_task_index + 1)  # noqa
-        ]
-        tmp_file_wftasks = [
-            history_item["workflowtask"] for history_item in tmp_file_history
-        ]
-        if len(job_wftasks) <= len(tmp_file_wftasks):
-            n_tasks_job = len(job_wftasks)
-            n_tasks_tmp = len(tmp_file_wftasks)
-            logger.error(
-                "Cannot identify the failed task based on job task list "
-                f"(length {n_tasks_job}) and temporary-file task list "
-                f"(length {n_tasks_tmp})."
+    with next(get_sync_db()) as db:
+        db_dataset = db.get(DatasetV2, dataset_id)
+        if len(db_dataset.history) == 0:
+            logger.warning(
+                f"History for {dataset_id=} is empty. Likely reason: the job "
+                "failed before its first task was marked as SUBMITTED. "
+                "Continue."
             )
-            logger.error("Failed task not appended to history.")
-        else:
-            failed_wftask = job_wftasks[len(tmp_file_wftasks)]
-    # Part 3/B: Append failed task to history
-    if failed_wftask is not None:
-        failed_wftask_dump = failed_wftask.model_dump(exclude={"task"})
-        failed_wftask_dump["task"] = failed_wftask.task.model_dump()
-        new_history_item = dict(
-            workflowtask=failed_wftask_dump,
-            status=WorkflowTaskStatusTypeV2.FAILED,
-            parallelization=dict(),  # FIXME: re-include parallelization
-        )
-        new_history.append(new_history_item)
-    return new_history
-def assemble_images_failed_job(job: JobV2) -> Optional[dict[str, Any]]:
-    """
-    Assemble `DatasetV2.images` for a failed workflow-execution.
-    Assemble new value of `images` based on the last successful task, i.e.
-    based on the content of the temporary `IMAGES_FILENAME` file. If the file
-    is missing, return `None`.
-    Argumentss:
-        job:
-            The failed `JobV2` object.
-    Returns:
-        The new value of `dataset.images`, or `None` if `IMAGES_FILENAME`
-        is missing.
-    """
-    tmp_file = Path(job.working_dir) / IMAGES_FILENAME
-    try:
-        with tmp_file.open("r") as f:
-            new_images = json.load(f)
-        return new_images
-    except FileNotFoundError:
-        return None
-def assemble_filters_failed_job(job: JobV2) -> Optional[dict[str, Any]]:
-    """
-    Assemble `DatasetV2.filters` for a failed workflow-execution.
-    Assemble new value of `filters` based on the last successful task, i.e.
-    based on the content of the temporary `FILTERS_FILENAME` file. If the file
-    is missing, return `None`.
-    Argumentss:
-        job:
-            The failed `JobV2` object.
-    Returns:
-        The new value of `dataset.filters`, or `None` if `FILTERS_FILENAME`
-        is missing.
-    """
-    tmp_file = Path(job.working_dir) / FILTERS_FILENAME
-    try:
-        with tmp_file.open("r") as f:
-            new_filters = json.load(f)
-        return new_filters
-    except FileNotFoundError:
-        return None
+            return
+        workflowtask_id = db_dataset.history[-1]["workflowtask"]["id"]
+        last_item_status = db_dataset.history[-1]["status"]
+        if last_item_status != WorkflowTaskStatusTypeV2.SUBMITTED:
+            logger.warning(
+                "Unexpected branch: "
+                f"Last history item, for {workflowtask_id=}, "
+                f"has status {last_item_status}. Skip."
+            )
+            return
+        logger.info(f"Setting history item for {workflowtask_id=} to failed.")
+        db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.FAILED
+        flag_modified(db_dataset, "history")
+        db.merge(db_dataset)
+        db.commit()

fractal_server/app/runner/v2/merge_outputs.py CHANGED Viewed

@@ -1,38 +1,27 @@
-from copy import copy
 from fractal_server.app.runner.v2.deduplicate_list import deduplicate_list
 from fractal_server.app.runner.v2.task_interface import TaskOutput
 def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput:
+    if len(task_outputs) == 0:
+        return TaskOutput()
     final_image_list_updates = []
     final_image_list_removals = []
-    last_new_filters = None
-    for ind, task_output in enumerate(task_outputs):
+    for task_output in task_outputs:
         final_image_list_updates.extend(task_output.image_list_updates)
         final_image_list_removals.extend(task_output.image_list_removals)
-        # Check that all filters are the same
-        current_new_filters = task_output.filters
-        if ind == 0:
-            last_new_filters = copy(current_new_filters)
-        if current_new_filters != last_new_filters:
-            raise ValueError(f"{current_new_filters=} but {last_new_filters=}")
-        last_new_filters = copy(current_new_filters)
+    # Note: the ordering of `image_list_removals` is not guaranteed
     final_image_list_updates = deduplicate_list(final_image_list_updates)
-    additional_args = {}
-    if last_new_filters is not None:
-        additional_args["filters"] = last_new_filters
+    final_image_list_removals = list(set(final_image_list_removals))
     final_output = TaskOutput(
         image_list_updates=final_image_list_updates,
         image_list_removals=final_image_list_removals,
-        **additional_args,
     )
     return final_output

fractal-server 2.10.6__py3-none-any.whl → 2.11.0__py3-none-any.whl

fractal-server 2.10.6py3-none-any.whl → 2.11.0py3-none-any.whl