PyPI - fractal-server - Versions diffs - 2.14.0a10__py3-none-any.whl → 2.14.0a11__py3-none-any.whl - Mend

fractal-server 2.14.0a10py3-none-any.whl → 2.14.0a11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

fractal_server/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __VERSION__ = "2.14.~~0a10~~"
1	+ __VERSION__ = "2.14.0a11"

fractal_server/app/routes/api/v2/submit.py CHANGED Viewed

@@ -30,7 +30,7 @@ from fractal_server.app.routes.aux.validate_user_settings import (
 from fractal_server.app.runner.set_start_and_last_task_index import (
     set_start_and_last_task_index,
 )
-from fractal_server.app.runner.v2 import submit_workflow
+from fractal_server.app.runner.v2.submit_workflow import submit_workflow
 from fractal_server.app.schemas.v2 import JobCreateV2
 from fractal_server.app.schemas.v2 import JobReadV2
 from fractal_server.app.schemas.v2 import JobStatusTypeV2

fractal_server/app/runner/components.py CHANGED Viewed

@@ -1,5 +1,2 @@
 def _index_to_component(ind: int) -> str:
     return f"{ind:07d}"
-_COMPONENT_KEY_ = "__FRACTAL_PARALLEL_COMPONENT__"

fractal_server/app/runner/exceptions.py CHANGED Viewed

@@ -37,6 +37,10 @@ class TaskExecutionError(RuntimeError):
         self.task_name = task_name
+class TaskOutputValidationError(ValueError):
+    pass
 class JobExecutionError(RuntimeError):
     """
     Forwards errors in the execution of a task that are due to external factors

fractal_server/app/runner/executors/base_runner.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Any
-from fractal_server.app.runner.components import _COMPONENT_KEY_
+from fractal_server.app.runner.task_files import TaskFiles
 from fractal_server.app.schemas.v2.task import TaskTypeType
@@ -29,9 +29,10 @@ class BaseRunner(object):
         self,
         func: callable,
         parameters: dict[str, Any],
-        history_item_id: int,
+        history_unit_id: int,
+        task_files: TaskFiles,
         task_type: TaskTypeType,
-        **kwargs,
+        config: Any,
     ) -> tuple[Any, BaseException]:
         """
         Run a single fractal task.
@@ -45,7 +46,7 @@ class BaseRunner(object):
             history_item_id:
                 Database ID of the corresponding `HistoryItemV2` entry.
             task_type: Task type.
-            kwargs: Runner-specific parameters.
+            config: Runner-specific parameters.
         """
         raise NotImplementedError()
@@ -53,9 +54,10 @@ class BaseRunner(object):
         self,
         func: callable,
         list_parameters: list[dict[str, Any]],
-        history_item_id: int,
+        history_unit_ids: list[int],
+        list_task_files: list[TaskFiles],
         task_type: TaskTypeType,
-        **kwargs,
+        config: Any,
     ) -> tuple[dict[int, Any], dict[int, BaseException]]:
         """
         Run a parallel fractal task.
@@ -70,7 +72,7 @@ class BaseRunner(object):
             history_item_id:
                 Database ID of the corresponding `HistoryItemV2` entry.
             task_type: Task type.
-            kwargs: Runner-specific parameters.
+            config: Runner-specific parameters.
         """
         raise NotImplementedError()
@@ -101,15 +103,11 @@ class BaseRunner(object):
                     f"Forbidden 'zarr_urls' key in {list(parameters.keys())}"
                 )
-        if _COMPONENT_KEY_ not in parameters.keys():
-            raise ValueError(
-                f"No '{_COMPONENT_KEY_}' key in in {list(parameters.keys())}"
-            )
     def validate_multisubmit_parameters(
         self,
         list_parameters: list[dict[str, Any]],
         task_type: TaskTypeType,
+        list_task_files: list[TaskFiles],
     ) -> None:
         """
         Validate parameters for `multi_submit` method
@@ -121,6 +119,12 @@ class BaseRunner(object):
         if task_type not in TASK_TYPES_MULTISUBMIT:
             raise ValueError(f"Invalid {task_type=} for `multisubmit`.")
+        subfolders = set(
+            task_file.wftask_subfolder_local for task_file in list_task_files
+        )
+        if len(subfolders) != 1:
+            raise ValueError(f"More than one subfolders: {subfolders}.")
         if not isinstance(list_parameters, list):
             raise ValueError("`parameters` must be a list.")
@@ -131,11 +135,6 @@ class BaseRunner(object):
                 raise ValueError(
                     f"No 'zarr_url' key in in {list(single_kwargs.keys())}"
                 )
-            if _COMPONENT_KEY_ not in single_kwargs.keys():
-                raise ValueError(
-                    f"No '{_COMPONENT_KEY_}' key "
-                    f"in {list(single_kwargs.keys())}"
-                )
         if task_type == "parallel":
             zarr_urls = [kwargs["zarr_url"] for kwargs in list_parameters]
             if len(zarr_urls) != len(set(zarr_urls)):

fractal_server/app/runner/executors/local/{_local_config.py → get_local_config.py} RENAMED Viewed

@@ -48,13 +48,6 @@ class LocalBackendConfig(BaseModel):
     parallel_tasks_per_job: Optional[int] = None
-def get_default_local_backend_config():
-    """
-    Return a default `LocalBackendConfig` configuration object
-    """
-    return LocalBackendConfig(parallel_tasks_per_job=None)
 def get_local_backend_config(
     wftask: WorkflowTaskV2,
     which_type: Literal["non_parallel", "parallel"],

fractal_server/app/runner/executors/local/runner.py CHANGED Viewed

@@ -2,14 +2,14 @@ from concurrent.futures import Future
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
 from typing import Any
-from typing import Optional
+from typing import Literal
-from ._local_config import get_default_local_backend_config
-from ._local_config import LocalBackendConfig
-from fractal_server.app.runner.components import _COMPONENT_KEY_
+from .get_local_config import LocalBackendConfig
+from fractal_server.app.db import get_sync_db
 from fractal_server.app.runner.executors.base_runner import BaseRunner
 from fractal_server.app.runner.task_files import TaskFiles
-from fractal_server.app.schemas.v2.task import TaskTypeType
+from fractal_server.app.runner.v2.db_tools import update_status_of_history_unit
+from fractal_server.app.schemas.v2 import HistoryUnitStatus
 from fractal_server.logger import set_logger
 logger = set_logger(__name__)
@@ -49,67 +49,106 @@ class LocalRunner(BaseRunner):
         self,
         func: callable,
         parameters: dict[str, Any],
+        history_unit_id: int,
         task_files: TaskFiles,
-        task_type: TaskTypeType,
-        local_backend_config: Optional[LocalBackendConfig] = None,
+        task_type: Literal[
+            "non_parallel",
+            "converter_non_parallel",
+            "compound",
+            "converter_compound",
+        ],
+        config: LocalBackendConfig,
     ) -> tuple[Any, Exception]:
         logger.debug("[submit] START")
-        current_task_files = TaskFiles(
-            **task_files.model_dump(
-                exclude={"component"},
-            ),
-            component=parameters[_COMPONENT_KEY_],
-        )
         self.validate_submit_parameters(parameters, task_type=task_type)
-        workdir_local = current_task_files.wftask_subfolder_local
+        workdir_local = task_files.wftask_subfolder_local
         workdir_local.mkdir()
         # SUBMISSION PHASE
-        future = self.executor.submit(func, parameters=parameters)
+        future = self.executor.submit(
+            func,
+            parameters=parameters,
+            remote_files=task_files.remote_files_dict,
+        )
         # RETRIEVAL PHASE
-        try:
-            result = future.result()
-            logger.debug(f"[submit] END {result=}")
-            return result, None
-        except Exception as e:
-            exception = e
-            logger.debug(f"[submit] END {exception=}")
-            return None, exception
+        with next(get_sync_db()) as db:
+            try:
+                result = future.result()
+                logger.debug("[submit] END with result")
+                if task_type not in ["compound", "converter_compound"]:
+                    update_status_of_history_unit(
+                        history_unit_id=history_unit_id,
+                        status=HistoryUnitStatus.DONE,
+                        db_sync=db,
+                    )
+                return result, None
+            except Exception as e:
+                exception = e
+                logger.debug("[submit] END with exception")
+                update_status_of_history_unit(
+                    history_unit_id=history_unit_id,
+                    status=HistoryUnitStatus.FAILED,
+                    db_sync=db,
+                )
+                return None, exception
     def multisubmit(
         self,
         func: callable,
         list_parameters: list[dict],
-        task_files: TaskFiles,
-        task_type: TaskTypeType,
-        local_backend_config: Optional[LocalBackendConfig] = None,
+        history_unit_ids: list[int],
+        list_task_files: list[TaskFiles],
+        task_type: Literal["parallel", "compound", "converter_compound"],
+        config: LocalBackendConfig,
     ):
+        """
+        Note:
+        1. The number of sruns and futures is equal to `len(list_parameters)`.
+        2. The number of `HistoryUnit`s is equal to `len(history_unit_ids)`.
+        3. For compound tasks, these two numbers are not the same.
+        For this reason, we defer database updates to the caller function,
+        when we are in one of the "compound" cases
+        """
+        # FIXME: De-duplicate this check
+        if task_type in ["compound", "converter_compound"]:
+            if len(history_unit_ids) != 1:
+                raise NotImplementedError(
+                    "We are breaking the assumption that compound/multisubmit "
+                    "is associated to a single HistoryUnit. This is not "
+                    "supported."
+                )
+        elif task_type == "parallel" and len(history_unit_ids) != len(
+            list_parameters
+        ):
+            raise ValueError(
+                f"{len(history_unit_ids)=} differs from "
+                f"{len(list_parameters)=}."
+            )
         logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
         self.validate_multisubmit_parameters(
             list_parameters=list_parameters,
             task_type=task_type,
+            list_task_files=list_task_files,
         )
-        workdir_local = task_files.wftask_subfolder_local
-        if task_type not in ["compound", "converter_compound"]:
+        workdir_local = list_task_files[0].wftask_subfolder_local
+        if task_type == "parallel":
             workdir_local.mkdir()
-        # Get local_backend_config
-        if local_backend_config is None:
-            local_backend_config = get_default_local_backend_config()
         # Set `n_elements` and `parallel_tasks_per_job`
         n_elements = len(list_parameters)
-        parallel_tasks_per_job = local_backend_config.parallel_tasks_per_job
+        parallel_tasks_per_job = config.parallel_tasks_per_job
         if parallel_tasks_per_job is None:
             parallel_tasks_per_job = n_elements
-        original_task_files = task_files
         # Execute tasks, in chunks of size `parallel_tasks_per_job`
         results: dict[int, Any] = {}
         exceptions: dict[int, BaseException] = {}
@@ -119,37 +158,57 @@ class LocalRunner(BaseRunner):
             ]
             active_futures: dict[int, Future] = {}
-            active_task_files: dict[int, TaskFiles] = {}
             for ind_within_chunk, kwargs in enumerate(list_parameters_chunk):
                 positional_index = ind_chunk + ind_within_chunk
-                component = kwargs[_COMPONENT_KEY_]
-                future = self.executor.submit(func, parameters=kwargs)
-                active_futures[positional_index] = future
-                active_task_files[positional_index] = TaskFiles(
-                    **original_task_files.model_dump(exclude={"component"}),
-                    component=component,
+                future = self.executor.submit(
+                    func,
+                    parameters=kwargs,
+                    remote_files=list_task_files[
+                        positional_index
+                    ].remote_files_dict,
                 )
+                active_futures[positional_index] = future
             while active_futures:
                 # FIXME: add shutdown detection
                 # if file exists: cancel all futures, and raise
                 finished_futures = [
-                    keyval
-                    for keyval in active_futures.items()
-                    if not keyval[1].running()
+                    index_and_future
+                    for index_and_future in active_futures.items()
+                    if not index_and_future[1].running()
                 ]
-                for positional_index, fut in finished_futures:
-                    active_futures.pop(positional_index)
-                    # current_task_files = active_task_files.pop(
-                    #     positional_index
-                    # )
-                    zarr_url = list_parameters[positional_index]["zarr_url"]
-                    try:
-                        results[positional_index] = fut.result()
-                        print(f"Mark {zarr_url=} as done, {kwargs}")
-                    except Exception as e:
-                        print(f"Mark {zarr_url=} as failed, {kwargs} - {e}")
-                        exceptions[positional_index] = e
+                if len(finished_futures) == 0:
+                    continue
+                with next(get_sync_db()) as db:
+                    for positional_index, fut in finished_futures:
+                        active_futures.pop(positional_index)
+                        if task_type == "parallel":
+                            current_history_unit_id = history_unit_ids[
+                                positional_index
+                            ]
+                        try:
+                            results[positional_index] = fut.result()
+                            if task_type == "parallel":
+                                update_status_of_history_unit(
+                                    history_unit_id=current_history_unit_id,
+                                    status=HistoryUnitStatus.DONE,
+                                    db_sync=db,
+                                )
+                        except Exception as e:
+                            exceptions[positional_index] = e
+                            if task_type == "parallel":
+                                update_status_of_history_unit(
+                                    history_unit_id=current_history_unit_id,
+                                    status=HistoryUnitStatus.FAILED,
+                                    db_sync=db,
+                                )
+                            # FIXME: what should happen here? Option 1: stop
+                            # all existing tasks and shutdown runner (for the
+                            # compound-task case)
         logger.debug(f"[multisubmit] END, {results=}, {exceptions=}")

fractal_server/app/runner/executors/slurm_common/_check_jobs_status.py CHANGED Viewed

@@ -32,6 +32,10 @@ def run_squeue(job_ids: list[str]) -> subprocess.CompletedProcess:
     return res
+def are_all_jobs_on_squeue(job_ids: list[str]) -> bool:
+    pass
 def get_finished_jobs(job_ids: list[str]) -> set[str]:
     """
     Check which ones of the given Slurm jobs already finished

fractal_server/app/runner/executors/slurm_ssh/executor.py CHANGED Viewed

@@ -24,7 +24,6 @@ from ..slurm_common.utils_executors import get_pickle_file_path
 from ..slurm_common.utils_executors import get_slurm_file_path
 from ..slurm_common.utils_executors import get_slurm_script_file_path
 from ._executor_wait_thread import FractalSlurmSSHWaitThread
-from fractal_server.app.runner.components import _COMPONENT_KEY_
 from fractal_server.app.runner.compress_folder import compress_folder
 from fractal_server.app.runner.exceptions import JobExecutionError
 from fractal_server.app.runner.exceptions import TaskExecutionError
@@ -526,10 +525,13 @@ class FractalSlurmSSHExecutor(Executor):
                 # `component = {"zarr_url": "/something", "param": 1}``). The
                 # try/except covers the case of e.g. `executor.map([1, 2])`,
                 # which is useful for testing.
-                try:
-                    actual_component = component.get(_COMPONENT_KEY_, None)
-                except AttributeError:
-                    actual_component = str(component)
+                # FIXME: the use of _COMPONENT_KEY_ is now deprecated
+                # try:
+                #     actual_component = component.get(_COMPONENT_KEY_, None)
+                # except AttributeError:
+                #     actual_component = str(component)
+                actual_component = "FAKE_INVALID_VALUE_FIXME"
                 _task_file_paths = TaskFiles(
                     root_dir_local=task_files.workflow_dir_local,

fractal_server/app/runner/executors/slurm_ssh/runner.py CHANGED Viewed

@@ -13,7 +13,6 @@ from pydantic import ConfigDict
 from ._check_job_status_ssh import get_finished_jobs_ssh
 from fractal_server import __VERSION__
-from fractal_server.app.runner.components import _COMPONENT_KEY_
 from fractal_server.app.runner.exceptions import JobExecutionError
 from fractal_server.app.runner.exceptions import TaskExecutionError
 from fractal_server.app.runner.executors.base_runner import BaseRunner
@@ -31,11 +30,6 @@ from fractal_server.logger import set_logger
 from fractal_server.ssh._fabric import FractalSSH
 from fractal_server.syringe import Inject
-# from fractal_server.app.history import ImageStatus
-# from fractal_server.app.history import update_all_images
-# from fractal_server.app.history import update_single_image
-# from fractal_server.app.history import update_single_image_logfile
 logger = set_logger(__name__)
@@ -500,7 +494,9 @@ class RunnerSlurmSSH(BaseRunner):
             **task_files.model_dump(
                 exclude={"component"},
             ),
-            component=parameters[_COMPONENT_KEY_],
+            # FIXME _COMPONENT_KEY_ is deprecated
+            component="FIXME_INVALID_FAKE_VALUE",
+            # component=parameters[_COMPONENT_KEY_],
         )
         if self.jobs != {}:
@@ -546,8 +542,6 @@ class RunnerSlurmSSH(BaseRunner):
             slurm_config=slurm_config,
         )
-        # LOGFILE = task_files.log_file_local
         # Retrieval phase
         while len(self.jobs) > 0:
             if self.is_shutdown():
@@ -638,7 +632,9 @@ class RunnerSlurmSSH(BaseRunner):
             # TODO: replace with actual values
             tasks = []
             for ind_chunk, parameters in enumerate(chunk):
-                component = parameters[_COMPONENT_KEY_]
+                # FIXME: _COMPONENT_KEY_ is deprecated
+                # component = parameters[_COMPONENT_KEY_]
+                component = "INVALID_FAKE_VALUE_FIXME"
                 tasks.append(
                     SlurmTask(
                         index=(ind_batch * batch_size) + ind_chunk,

fractal-server 2.14.0a10__py3-none-any.whl → 2.14.0a11__py3-none-any.whl

fractal-server 2.14.0a10py3-none-any.whl → 2.14.0a11py3-none-any.whl