PyPI - fractal-server - Versions diffs - 2.13.1__py3-none-any.whl → 2.14.0a1__py3-none-any.whl - Mend

fractal-server 2.13.1py3-none-any.whl → 2.14.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

fractal_server/app/runner/task_files.py CHANGED Viewed

@@ -2,6 +2,8 @@ from pathlib import Path
 from typing import Optional
 from typing import Union
+from pydantic import BaseModel
 from fractal_server.string_tools import sanitize_string
@@ -17,108 +19,80 @@ def task_subfolder_name(order: Union[int, str], task_name: str) -> str:
     return f"{order}_{task_name_slug}"
-class TaskFiles:
+class TaskFiles(BaseModel):
     """
-    Group all file paths pertaining to a task
-    Attributes:
-        workflow_dir_local:
-            Server-owned directory to store all task-execution-related relevant
-            files. Note: users cannot write directly to this folder.
-        workflow_dir_remote:
-            User-side directory with the same scope as `workflow_dir_local`,
-            and where a user can write.
-        subfolder_name:
-            Name of task-specific subfolder
-        remote_subfolder:
-            Path to user-side task-specific subfolder
-        task_name:
-            Name of the task
-        task_order:
-            Positional order of the task within a workflow.
-        component:
-            Specific component to run the task for (relevant for tasks to be
-            executed in parallel over many components).
-        file_prefix:
-            Prefix for all task-related files.
-        args:
-            Path for input json file.
-        metadiff:
-            Path for output json file with metadata update.
-        out:
-            Path for task-execution stdout.
-        err:
-            Path for task-execution stderr.
+    Group all file paths pertaining to a task     FIXME
     """
-    workflow_dir_local: Path
-    workflow_dir_remote: Path
-    remote_subfolder: Path
-    subfolder_name: str
+    # Parent directory
+    root_dir_local: Path
+    root_dir_remote: Path
+    # Per-wftask
     task_name: str
-    task_order: Optional[int] = None
+    task_order: int
+    # Per-single-component
     component: Optional[str] = None
-    file_prefix: str
-    file_prefix_with_subfolder: str
-    args: Path
-    out: Path
-    err: Path
-    log: Path
-    metadiff: Path
-    def __init__(
-        self,
-        workflow_dir_local: Path,
-        workflow_dir_remote: Path,
-        task_name: str,
-        task_order: Optional[int] = None,
-        component: Optional[str] = None,
-    ):
-        self.workflow_dir_local = workflow_dir_local
-        self.workflow_dir_remote = workflow_dir_remote
-        self.task_order = task_order
-        self.task_name = task_name
-        self.component = component
-        if self.component is not None:
-            component_safe = sanitize_string(str(self.component))
-            component_safe = f"_par_{component_safe}"
-        else:
-            component_safe = ""
-        if self.task_order is not None:
-            order = str(self.task_order)
-        else:
-            order = "0"
-        self.file_prefix = f"{order}{component_safe}"
-        self.subfolder_name = task_subfolder_name(
-            order=order, task_name=self.task_name
-        )
-        self.remote_subfolder = self.workflow_dir_remote / self.subfolder_name
-        self.args = self.remote_subfolder / f"{self.file_prefix}.args.json"
-        self.out = self.remote_subfolder / f"{self.file_prefix}.out"
-        self.err = self.remote_subfolder / f"{self.file_prefix}.err"
-        self.log = self.remote_subfolder / f"{self.file_prefix}.log"
-        self.metadiff = (
-            self.remote_subfolder / f"{self.file_prefix}.metadiff.json"
+    def _check_component(self):
+        if self.component is None:
+            raise ValueError("`component` cannot be None")
+    @property
+    def subfolder_name(self) -> str:
+        order = str(self.task_order or 0)
+        return task_subfolder_name(
+            order=order,
+            task_name=self.task_name,
         )
+    @property
+    def wftask_subfolder_remote(self) -> Path:
+        return self.root_dir_remote / self.subfolder_name
-def get_task_file_paths(
-    workflow_dir_local: Path,
-    workflow_dir_remote: Path,
-    task_name: str,
-    task_order: Optional[int] = None,
-    component: Optional[str] = None,
-) -> TaskFiles:
-    """
-    Return the corrisponding TaskFiles object
-    """
-    return TaskFiles(
-        workflow_dir_local=workflow_dir_local,
-        workflow_dir_remote=workflow_dir_remote,
-        task_name=task_name,
-        task_order=task_order,
-        component=component,
-    )
+    @property
+    def wftask_subfolder_local(self) -> Path:
+        return self.root_dir_local / self.subfolder_name
+    @property
+    def log_file_local(self) -> str:
+        self._check_component()
+        return (
+            self.wftask_subfolder_local / f"{self.component}-log.txt"
+        ).as_posix()
+    @property
+    def log_file_remote(self) -> str:
+        self._check_component()
+        return (
+            self.wftask_subfolder_remote / f"{self.component}-log.txt"
+        ).as_posix()
+    @property
+    def args_file_local(self) -> str:
+        self._check_component()
+        return (
+            self.wftask_subfolder_local / f"{self.component}-args.json"
+        ).as_posix()
+    @property
+    def args_file_remote(self) -> str:
+        self._check_component()
+        return (
+            self.wftask_subfolder_remote / f"{self.component}-args.json"
+        ).as_posix()
+    @property
+    def metadiff_file_local(self) -> str:
+        self._check_component()
+        return (
+            self.wftask_subfolder_local / f"{self.component}-metadiff.json"
+        ).as_posix()
+    @property
+    def metadiff_file_remote(self) -> str:
+        self._check_component()
+        return (
+            self.wftask_subfolder_remote / f"{self.component}-metadiff.json"
+        ).as_posix()

fractal_server/app/runner/v2/__init__.py CHANGED Viewed

@@ -27,13 +27,12 @@ from ...models.v2 import WorkflowV2
 from ...schemas.v2 import JobStatusTypeV2
 from ..exceptions import JobExecutionError
 from ..exceptions import TaskExecutionError
-from ..executors.slurm.sudo._subprocess_run_as_user import _mkdir_as_user
+from ..executors.slurm_sudo._subprocess_run_as_user import _mkdir_as_user
 from ..filenames import WORKFLOW_LOG_FILENAME
 from ..task_files import task_subfolder_name
 from ._local import process_workflow as local_process_workflow
 from ._slurm_ssh import process_workflow as slurm_ssh_process_workflow
 from ._slurm_sudo import process_workflow as slurm_sudo_process_workflow
-from .handle_failed_job import mark_last_wftask_as_failed
 from fractal_server import __VERSION__
 from fractal_server.app.models import UserSettings
@@ -201,7 +200,7 @@ def submit_workflow(
                     f"{settings.FRACTAL_RUNNER_BACKEND}."
                 )
-            # Create all tasks subfolders
+            # Create all tasks subfolders # FIXME: do this with Runner
             for order in range(job.first_task_index, job.last_task_index + 1):
                 this_wftask = workflow.task_list[order]
                 task_name = this_wftask.task.name
@@ -219,10 +218,7 @@ def submit_workflow(
                         folder=str(WORKFLOW_DIR_REMOTE / subfolder_name),
                         user=slurm_user,
                     )
-                else:
-                    # Create local subfolder (with standard permission set)
-                    (WORKFLOW_DIR_LOCAL / subfolder_name).mkdir()
-                    logger.info("Skip remote-subfolder creation")
         except Exception as e:
             error_type = type(e).__name__
             fail_job(
@@ -345,10 +341,6 @@ def submit_workflow(
         logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.')
         logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).')
-        mark_last_wftask_as_failed(
-            dataset_id=dataset_id,
-            logger_name=logger_name,
-        )
         exception_args_string = "\n".join(e.args)
         log_msg = (
             f"TASK ERROR: "
@@ -361,10 +353,7 @@ def submit_workflow(
     except JobExecutionError as e:
         logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.')
         logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).')
-        mark_last_wftask_as_failed(
-            dataset_id=dataset_id,
-            logger_name=logger_name,
-        )
         fail_job(
             db=db_sync,
             job=job,
@@ -378,10 +367,7 @@ def submit_workflow(
     except Exception:
         logger.debug(f'FAILED workflow "{workflow.name}", unknown error.')
         logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).')
-        mark_last_wftask_as_failed(
-            dataset_id=dataset_id,
-            logger_name=logger_name,
-        )
         current_traceback = traceback.format_exc()
         fail_job(
             db=db_sync,

fractal_server/app/runner/v2/_local.py ADDED Viewed

@@ -0,0 +1,84 @@
+from pathlib import Path
+from typing import Optional
+from ...models.v2 import DatasetV2
+from ...models.v2 import WorkflowV2
+from ..executors.local._submit_setup import _local_submit_setup
+from ..executors.local.runner import LocalRunner
+from ..set_start_and_last_task_index import set_start_and_last_task_index
+from .runner import execute_tasks_v2
+from fractal_server.images.models import AttributeFiltersType
+def process_workflow(
+    *,
+    workflow: WorkflowV2,
+    dataset: DatasetV2,
+    workflow_dir_local: Path,
+    workflow_dir_remote: Optional[Path] = None,
+    first_task_index: Optional[int] = None,
+    last_task_index: Optional[int] = None,
+    logger_name: str,
+    job_attribute_filters: AttributeFiltersType,
+    user_id: int,
+    **kwargs,
+) -> None:
+    """
+    Run a workflow through
+    Args:
+        workflow:
+            The workflow to be run
+        dataset:
+            Initial dataset.
+        workflow_dir_local:
+            Working directory for this run.
+        workflow_dir_remote:
+            Working directory for this run, on the user side. This argument is
+            present for compatibility with the standard backend interface, but
+            for the `local` backend it cannot be different from
+            `workflow_dir_local`.
+        first_task_index:
+            Positional index of the first task to execute; if `None`, start
+            from `0`.
+        last_task_index:
+            Positional index of the last task to execute; if `None`, proceed
+            until the last task.
+        logger_name: Logger name
+        user_id:
+    Raises:
+        TaskExecutionError: wrapper for errors raised during tasks' execution
+                            (positive exit codes).
+        JobExecutionError: wrapper for errors raised by the tasks' executors
+                           (negative exit codes).
+    """
+    if workflow_dir_remote and (workflow_dir_remote != workflow_dir_local):
+        raise NotImplementedError(
+            "Local backend does not support different directories "
+            f"{workflow_dir_local=} and {workflow_dir_remote=}"
+        )
+    # Set values of first_task_index and last_task_index
+    num_tasks = len(workflow.task_list)
+    first_task_index, last_task_index = set_start_and_last_task_index(
+        num_tasks,
+        first_task_index=first_task_index,
+        last_task_index=last_task_index,
+    )
+    with LocalRunner(root_dir_local=workflow_dir_local) as runner:
+        execute_tasks_v2(
+            wf_task_list=workflow.task_list[
+                first_task_index : (last_task_index + 1)
+            ],
+            dataset=dataset,
+            runner=runner,
+            workflow_dir_local=workflow_dir_local,
+            workflow_dir_remote=workflow_dir_local,
+            logger_name=logger_name,
+            submit_setup_call=_local_submit_setup,
+            job_attribute_filters=job_attribute_filters,
+            user_id=user_id,
+        )

fractal_server/app/runner/v2/{_slurm_ssh/__init__.py → _slurm_ssh.py} RENAMED Viewed

@@ -19,14 +19,14 @@ Executor objects.
 from pathlib import Path
 from typing import Optional
-from .....ssh._fabric import FractalSSH
-from ....models.v2 import DatasetV2
-from ....models.v2 import WorkflowV2
-from ...exceptions import JobExecutionError
-from ...executors.slurm.ssh.executor import FractalSlurmSSHExecutor
-from ...set_start_and_last_task_index import set_start_and_last_task_index
-from ..runner import execute_tasks_v2
-from ._submit_setup import _slurm_submit_setup
+from ....ssh._fabric import FractalSSH
+from ...models.v2 import DatasetV2
+from ...models.v2 import WorkflowV2
+from ..exceptions import JobExecutionError
+from ..executors.slurm_common._submit_setup import _slurm_submit_setup
+from ..executors.slurm_ssh.executor import FractalSlurmSSHExecutor
+from ..set_start_and_last_task_index import set_start_and_last_task_index
+from .runner import execute_tasks_v2
 from fractal_server.images.models import AttributeFiltersType
 from fractal_server.logger import set_logger
@@ -46,10 +46,7 @@ def process_workflow(
     fractal_ssh: FractalSSH,
     worker_init: Optional[str] = None,
     user_id: int,
-    # Not used
-    user_cache_dir: Optional[str] = None,
-    slurm_user: Optional[str] = None,
-    slurm_account: Optional[str] = None,
+    **kwargs,  # not used
 ) -> None:
     """
     Process workflow (SLURM backend public interface)
@@ -89,7 +86,7 @@ def process_workflow(
                 first_task_index : (last_task_index + 1)
             ],
             dataset=dataset,
-            executor=executor,
+            runner=executor,
             workflow_dir_local=workflow_dir_local,
             workflow_dir_remote=workflow_dir_remote,
             logger_name=logger_name,

fractal_server/app/runner/v2/{_slurm_sudo/__init__.py → _slurm_sudo.py} RENAMED Viewed

@@ -19,12 +19,12 @@ Executor objects.
 from pathlib import Path
 from typing import Optional
-from ....models.v2 import DatasetV2
-from ....models.v2 import WorkflowV2
-from ...executors.slurm.sudo.executor import FractalSlurmSudoExecutor
-from ...set_start_and_last_task_index import set_start_and_last_task_index
-from ..runner import execute_tasks_v2
-from ._submit_setup import _slurm_submit_setup
+from ...models.v2 import DatasetV2
+from ...models.v2 import WorkflowV2
+from ..executors.slurm_common._submit_setup import _slurm_submit_setup
+from ..executors.slurm_sudo.runner import RunnerSlurmSudo
+from ..set_start_and_last_task_index import set_start_and_last_task_index
+from .runner import execute_tasks_v2
 from fractal_server.images.models import AttributeFiltersType
@@ -65,13 +65,11 @@ def process_workflow(
     if isinstance(worker_init, str):
         worker_init = worker_init.split("\n")
-    with FractalSlurmSudoExecutor(
-        debug=True,
-        keep_logs=True,
+    with RunnerSlurmSudo(
         slurm_user=slurm_user,
         user_cache_dir=user_cache_dir,
-        workflow_dir_local=workflow_dir_local,
-        workflow_dir_remote=workflow_dir_remote,
+        root_dir_local=workflow_dir_local,
+        root_dir_remote=workflow_dir_remote,
         common_script_lines=worker_init,
         slurm_account=slurm_account,
     ) as executor:
@@ -80,7 +78,7 @@ def process_workflow(
                 first_task_index : (last_task_index + 1)
             ],
             dataset=dataset,
-            executor=executor,
+            runner=executor,
             workflow_dir_local=workflow_dir_local,
             workflow_dir_remote=workflow_dir_remote,
             logger_name=logger_name,

fractal_server/app/runner/v2/runner.py CHANGED Viewed

@@ -1,5 +1,5 @@
+import json
 import logging
-from concurrent.futures import ThreadPoolExecutor
 from copy import copy
 from copy import deepcopy
 from pathlib import Path
@@ -18,11 +18,14 @@ from .runner_functions import run_v2_task_non_parallel
 from .runner_functions import run_v2_task_parallel
 from .task_interface import TaskOutput
 from fractal_server.app.db import get_sync_db
+from fractal_server.app.history.status_enum import HistoryItemImageStatus
 from fractal_server.app.models.v2 import AccountingRecord
 from fractal_server.app.models.v2 import DatasetV2
+from fractal_server.app.models.v2 import HistoryItemV2
+from fractal_server.app.models.v2 import ImageStatus
+from fractal_server.app.models.v2 import TaskGroupV2
 from fractal_server.app.models.v2 import WorkflowTaskV2
-from fractal_server.app.schemas.v2.dataset import _DatasetHistoryItemV2
-from fractal_server.app.schemas.v2.workflowtask import WorkflowTaskStatusTypeV2
+from fractal_server.app.runner.executors.base_runner import BaseRunner
 from fractal_server.images.models import AttributeFiltersType
 from fractal_server.images.tools import merge_type_filters
@@ -31,7 +34,7 @@ def execute_tasks_v2(
     *,
     wf_task_list: list[WorkflowTaskV2],
     dataset: DatasetV2,
-    executor: ThreadPoolExecutor,
+    runner: BaseRunner,
     user_id: int,
     workflow_dir_local: Path,
     workflow_dir_remote: Optional[Path] = None,
@@ -43,8 +46,8 @@ def execute_tasks_v2(
     if not workflow_dir_local.exists():
         logger.warning(
-            f"Now creating {workflow_dir_local}, "
-            "but it should have already happened."
+            f"Now creating {workflow_dir_local}, but it "
+            "should have already happened."
         )
         workflow_dir_local.mkdir()
@@ -60,66 +63,116 @@ def execute_tasks_v2(
         # PRE TASK EXECUTION
-        # Get filtered images
+        # Filter images by types and attributes (in two steps)
         type_filters = copy(current_dataset_type_filters)
         type_filters_patch = merge_type_filters(
             task_input_types=task.input_types,
             wftask_type_filters=wftask.type_filters,
         )
         type_filters.update(type_filters_patch)
-        filtered_images = filter_image_list(
+        type_filtered_images = filter_image_list(
             images=tmp_images,
             type_filters=type_filters,
+            attribute_filters=None,
+        )
+        filtered_images = filter_image_list(
+            images=type_filtered_images,
+            type_filters=None,
             attribute_filters=job_attribute_filters,
         )
-        # First, set status SUBMITTED in dataset.history for each wftask
+        # Create history item
         with next(get_sync_db()) as db:
-            db_dataset = db.get(DatasetV2, dataset.id)
-            new_history_item = _DatasetHistoryItemV2(
-                workflowtask=dict(
-                    **wftask.model_dump(exclude={"task"}),
-                    task=wftask.task.model_dump(),
-                ),
-                status=WorkflowTaskStatusTypeV2.SUBMITTED,
-                parallelization=dict(),  # FIXME: re-include parallelization
-            ).model_dump()
-            db_dataset.history.append(new_history_item)
-            flag_modified(db_dataset, "history")
-            db.merge(db_dataset)
+            workflowtask_dump = dict(
+                **wftask.model_dump(exclude={"task"}),
+                task=wftask.task.model_dump(),
+            )
+            # Exclude timestamps since they'd need to be serialized properly
+            task_group = db.get(TaskGroupV2, wftask.task.taskgroupv2_id)
+            task_group_dump = task_group.model_dump(
+                exclude={
+                    "timestamp_created",
+                    "timestamp_last_used",
+                }
+            )
+            parameters_hash = str(
+                hash(
+                    json.dumps(
+                        [workflowtask_dump, task_group_dump],
+                        sort_keys=True,
+                        indent=None,
+                    ).encode("utf-8")
+                )
+            )
+            images = {
+                image["zarr_url"]: HistoryItemImageStatus.SUBMITTED
+                for image in filtered_images
+            }
+            history_item = HistoryItemV2(
+                dataset_id=dataset.id,
+                workflowtask_id=wftask.id,
+                workflowtask_dump=workflowtask_dump,
+                task_group_dump=task_group_dump,
+                parameters_hash=parameters_hash,
+                num_available_images=len(type_filtered_images),
+                num_current_images=len(filtered_images),
+                images=images,
+            )
+            db.add(history_item)
+            for image in filtered_images:
+                db.merge(
+                    ImageStatus(
+                        zarr_url=image["zarr_url"],
+                        workflowtask_id=wftask.id,
+                        dataset_id=dataset.id,
+                        parameters_hash=parameters_hash,
+                        status=HistoryItemImageStatus.SUBMITTED,
+                        logfile="/placeholder",
+                    )
+                )
             db.commit()
+            db.refresh(history_item)
+            history_item_id = history_item.id
         # TASK EXECUTION (V2)
         if task.type == "non_parallel":
-            current_task_output, num_tasks = run_v2_task_non_parallel(
+            (
+                current_task_output,
+                num_tasks,
+                exceptions,
+            ) = run_v2_task_non_parallel(
                 images=filtered_images,
                 zarr_dir=zarr_dir,
                 wftask=wftask,
                 task=task,
                 workflow_dir_local=workflow_dir_local,
                 workflow_dir_remote=workflow_dir_remote,
-                executor=executor,
+                executor=runner,
                 submit_setup_call=submit_setup_call,
+                history_item_id=history_item_id,
             )
         elif task.type == "parallel":
-            current_task_output, num_tasks = run_v2_task_parallel(
+            current_task_output, num_tasks, exceptions = run_v2_task_parallel(
                 images=filtered_images,
                 wftask=wftask,
                 task=task,
                 workflow_dir_local=workflow_dir_local,
                 workflow_dir_remote=workflow_dir_remote,
-                executor=executor,
+                executor=runner,
                 submit_setup_call=submit_setup_call,
+                history_item_id=history_item_id,
             )
         elif task.type == "compound":
-            current_task_output, num_tasks = run_v2_task_compound(
+            current_task_output, num_tasks, exceptions = run_v2_task_compound(
                 images=filtered_images,
                 zarr_dir=zarr_dir,
                 wftask=wftask,
                 task=task,
                 workflow_dir_local=workflow_dir_local,
                 workflow_dir_remote=workflow_dir_remote,
-                executor=executor,
+                executor=runner,
                 submit_setup_call=submit_setup_call,
+                history_item_id=history_item_id,
             )
         else:
             raise ValueError(f"Unexpected error: Invalid {task.type=}.")
@@ -145,6 +198,8 @@ def execute_tasks_v2(
         # Update image list
         num_new_images = 0
         current_task_output.check_zarr_urls_are_unique()
+        # FIXME: Introduce for loop over task outputs, and processe them sequentially
+        # each failure should lead to an update of the specific image status
         for image_obj in current_task_output.image_list_updates:
             image = image_obj.model_dump()
             # Edit existing image
@@ -270,7 +325,6 @@ def execute_tasks_v2(
         # information
         with next(get_sync_db()) as db:
             db_dataset = db.get(DatasetV2, dataset.id)
-            db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.DONE
             db_dataset.type_filters = current_dataset_type_filters
             db_dataset.images = tmp_images
             for attribute_name in [
@@ -291,4 +345,15 @@ def execute_tasks_v2(
             db.add(record)
             db.commit()
+        if exceptions != {}:
+            logger.error(
+                f'END    {wftask.order}-th task (name="{task_name}") '
+                "- ERROR."
+            )
+            # Raise first error
+            for key, value in exceptions.items():
+                raise JobExecutionError(
+                    info=(f"An error occurred.\nOriginal error:\n{value}")
+                )
         logger.debug(f'END    {wftask.order}-th task (name="{task_name}")')

fractal-server 2.13.1__py3-none-any.whl → 2.14.0a1__py3-none-any.whl

fractal-server 2.13.1py3-none-any.whl → 2.14.0a1py3-none-any.whl