PyPI - fractal-server - Versions diffs - 2.13.0__py3-none-any.whl → 2.14.0a0__py3-none-any.whl - Mend

fractal-server 2.13.0py3-none-any.whl → 2.14.0a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

fractal_server/app/runner/executors/local/runner.py ADDED Viewed

@@ -0,0 +1,200 @@
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+from typing import Any
+from typing import Optional
+from ._local_config import get_default_local_backend_config
+from ._local_config import LocalBackendConfig
+from fractal_server.app.history import HistoryItemImageStatus
+from fractal_server.app.history import update_all_images
+from fractal_server.app.history import update_single_image
+from fractal_server.app.history import update_single_image_logfile
+from fractal_server.app.runner.components import _COMPONENT_KEY_
+from fractal_server.app.runner.executors.base_runner import BaseRunner
+from fractal_server.app.runner.task_files import TaskFiles
+from fractal_server.logger import set_logger
+logger = set_logger(__name__)
+class LocalRunner(BaseRunner):
+    executor: ThreadPoolExecutor
+    root_dir_local: Path
+    def __init__(
+        self,
+        root_dir_local: Path,
+    ):
+        self.root_dir_local = root_dir_local
+        self.root_dir_local.mkdir(parents=True, exist_ok=True)
+        self.executor = ThreadPoolExecutor()
+        logger.debug("Create LocalRunner")
+    def __enter__(self):
+        logger.debug("Enter LocalRunner")
+        return self
+    def shutdown(self):
+        logger.debug("Now shut LocalRunner.executor down")
+        self.executor.shutdown(
+            wait=False,
+            cancel_futures=True,
+        )
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        logger.debug("Exit LocalRunner")
+        self.shutdown()
+        return self.executor.__exit__(exc_type, exc_val, exc_tb)
+    def submit(
+        self,
+        func: callable,
+        parameters: dict[str, Any],
+        history_item_id: int,
+        task_files: TaskFiles,
+        in_compound_task: bool = False,
+        **kwargs,
+    ) -> tuple[Any, Exception]:
+        logger.debug("[submit] START")
+        current_task_files = TaskFiles(
+            **task_files.model_dump(
+                exclude={"component"},
+            ),
+            component=parameters[_COMPONENT_KEY_],
+        )
+        self.validate_submit_parameters(parameters)
+        workdir_local = current_task_files.wftask_subfolder_local
+        workdir_local.mkdir()
+        # SUBMISSION PHASE
+        future = self.executor.submit(func, parameters=parameters)
+        # RETRIEVAL PHASE
+        try:
+            result = future.result()
+            if not in_compound_task:
+                update_all_images(
+                    history_item_id=history_item_id,
+                    status=HistoryItemImageStatus.DONE,
+                    logfile=current_task_files.log_file_local,
+                )
+            logger.debug(f"[submit] END {result=}")
+            return result, None
+        except Exception as e:
+            exception = e
+            update_all_images(
+                history_item_id=history_item_id,
+                status=HistoryItemImageStatus.FAILED,
+                logfile=current_task_files.log_file_local,
+            )
+            logger.debug(f"[submit] END {exception=}")
+            return None, exception
+    def multisubmit(
+        self,
+        func: callable,
+        list_parameters: list[dict],
+        history_item_id: int,
+        task_files: TaskFiles,
+        in_compound_task: bool = False,
+        local_backend_config: Optional[LocalBackendConfig] = None,
+        **kwargs,
+    ):
+        logger.debug(f"[multisubmit] START, {len(list_parameters)=}")
+        self.validate_multisubmit_parameters(
+            list_parameters=list_parameters,
+            in_compound_task=in_compound_task,
+        )
+        workdir_local = task_files.wftask_subfolder_local
+        if not in_compound_task:
+            workdir_local.mkdir()
+        # Get local_backend_config
+        if local_backend_config is None:
+            local_backend_config = get_default_local_backend_config()
+        # Set `n_elements` and `parallel_tasks_per_job`
+        n_elements = len(list_parameters)
+        parallel_tasks_per_job = local_backend_config.parallel_tasks_per_job
+        if parallel_tasks_per_job is None:
+            parallel_tasks_per_job = n_elements
+        original_task_files = task_files
+        # Execute tasks, in chunks of size `parallel_tasks_per_job`
+        results = {}
+        exceptions = {}
+        for ind_chunk in range(0, n_elements, parallel_tasks_per_job):
+            list_parameters_chunk = list_parameters[
+                ind_chunk : ind_chunk + parallel_tasks_per_job
+            ]
+            from concurrent.futures import Future
+            active_futures: dict[int, Future] = {}
+            active_task_files: dict[int, TaskFiles] = {}
+            for ind_within_chunk, kwargs in enumerate(list_parameters_chunk):
+                positional_index = ind_chunk + ind_within_chunk
+                component = kwargs[_COMPONENT_KEY_]
+                future = self.executor.submit(func, parameters=kwargs)
+                active_futures[positional_index] = future
+                active_task_files[positional_index] = TaskFiles(
+                    **original_task_files.model_dump(exclude={"component"}),
+                    component=component,
+                )
+            while active_futures:
+                # FIXME: add shutdown detection
+                # if file exists: cancel all futures, and raise
+                finished_futures = [
+                    keyval
+                    for keyval in active_futures.items()
+                    if not keyval[1].running()
+                ]
+                for positional_index, fut in finished_futures:
+                    active_futures.pop(positional_index)
+                    current_task_files = active_task_files.pop(
+                        positional_index
+                    )
+                    zarr_url = list_parameters[positional_index]["zarr_url"]
+                    if not in_compound_task:
+                        update_single_image_logfile(
+                            history_item_id=history_item_id,
+                            zarr_url=zarr_url,
+                            logfile=current_task_files.log_file_local,
+                        )
+                    try:
+                        results[positional_index] = fut.result()
+                        print(f"Mark {zarr_url=} as done, {kwargs}")
+                        if not in_compound_task:
+                            update_single_image(
+                                history_item_id=history_item_id,
+                                zarr_url=zarr_url,
+                                status=HistoryItemImageStatus.DONE,
+                            )
+                    except Exception as e:
+                        print(f"Mark {zarr_url=} as failed, {kwargs} - {e}")
+                        exceptions[positional_index] = e
+                        if not in_compound_task:
+                            update_single_image(
+                                history_item_id=history_item_id,
+                                zarr_url=zarr_url,
+                                status=HistoryItemImageStatus.FAILED,
+                            )
+        if in_compound_task:
+            if exceptions == {}:
+                update_all_images(
+                    history_item_id=history_item_id,
+                    status=HistoryItemImageStatus.DONE,
+                )
+            else:
+                update_all_images(
+                    history_item_id=history_item_id,
+                    status=HistoryItemImageStatus.FAILED,
+                )
+        logger.debug(f"[multisubmit] END, {results=}, {exceptions=}")
+        return results, exceptions

fractal_server/app/runner/executors/{slurm → slurm_common}/_batching.py RENAMED Viewed

@@ -14,7 +14,7 @@ Submodule to determine the number of total/parallel tasks per SLURM job.
 import math
 from typing import Optional
-from .....logger import set_logger
+from fractal_server.logger import set_logger
 logger = set_logger(__name__)

fractal_server/app/runner/executors/{slurm → slurm_common}/_slurm_config.py RENAMED Viewed

@@ -22,9 +22,9 @@ from pydantic import ConfigDict
 from pydantic import Field
 from pydantic import ValidationError
-from .....config import get_settings
-from .....logger import set_logger
-from .....syringe import Inject
+from fractal_server.config import get_settings
+from fractal_server.logger import set_logger
+from fractal_server.syringe import Inject
 logger = set_logger(__name__)

fractal_server/app/runner/{v2/_slurm_ssh → executors/slurm_common}/_submit_setup.py RENAMED Viewed

@@ -14,11 +14,12 @@ Submodule to define _slurm_submit_setup, which is also the reference
 implementation of `submit_setup_call`.
 """
 from pathlib import Path
+from typing import Any
 from typing import Literal
-from ...task_files import get_task_file_paths
+from ...task_files import TaskFiles
 from fractal_server.app.models.v2 import WorkflowTaskV2
-from fractal_server.app.runner.v2._slurm_common.get_slurm_config import (
+from fractal_server.app.runner.executors.slurm_common.get_slurm_config import (
     get_slurm_config,
 )
@@ -26,14 +27,16 @@ from fractal_server.app.runner.v2._slurm_common.get_slurm_config import (
 def _slurm_submit_setup(
     *,
     wftask: WorkflowTaskV2,
-    workflow_dir_local: Path,
-    workflow_dir_remote: Path,
+    root_dir_local: Path,
+    root_dir_remote: Path,
     which_type: Literal["non_parallel", "parallel"],
-) -> dict[str, object]:
+) -> dict[str, Any]:
     """
-    Collect WorfklowTask-specific configuration parameters from different
+    Collect WorkflowTask-specific configuration parameters from different
     sources, and inject them for execution.
+    FIXME
     Here goes all the logic for reading attributes from the appropriate sources
     and transforming them into an appropriate `SlurmConfig` object (encoding
     SLURM configuration) and `TaskFiles` object (with details e.g. about file
@@ -68,16 +71,14 @@ def _slurm_submit_setup(
     )
     # Get TaskFiles object
-    task_files = get_task_file_paths(
-        workflow_dir_local=workflow_dir_local,
-        workflow_dir_remote=workflow_dir_remote,
+    task_files = TaskFiles(
+        root_dir_local=root_dir_local,
+        root_dir_remote=root_dir_remote,
         task_order=wftask.order,
         task_name=wftask.task.name,
     )
-    # Prepare and return output dictionary
-    submit_setup_dict = dict(
+    return dict(
         slurm_config=slurm_config,
         task_files=task_files,
     )
-    return submit_setup_dict

fractal_server/app/runner/{v2/_slurm_common → executors/slurm_common}/get_slurm_config.py RENAMED Viewed

@@ -2,18 +2,12 @@ from pathlib import Path
 from typing import Literal
 from typing import Optional
+from ._slurm_config import _parse_mem_value
+from ._slurm_config import load_slurm_config_file
+from ._slurm_config import logger
+from ._slurm_config import SlurmConfig
+from ._slurm_config import SlurmConfigError
 from fractal_server.app.models.v2 import WorkflowTaskV2
-from fractal_server.app.runner.executors.slurm._slurm_config import (
-    _parse_mem_value,
-)
-from fractal_server.app.runner.executors.slurm._slurm_config import (
-    load_slurm_config_file,
-)
-from fractal_server.app.runner.executors.slurm._slurm_config import logger
-from fractal_server.app.runner.executors.slurm._slurm_config import SlurmConfig
-from fractal_server.app.runner.executors.slurm._slurm_config import (
-    SlurmConfigError,
-)
 def get_slurm_config(
@@ -142,8 +136,8 @@ def get_slurm_config(
     extra_lines = slurm_dict.get("extra_lines", []) + extra_lines
     if len(set(extra_lines)) != len(extra_lines):
         logger.debug(
-            "[get_slurm_config] Removing repeated elements "
-            f"from {extra_lines=}."
+            "[get_slurm_config] Removing repeated elements from "
+            f"{extra_lines=}."
         )
         extra_lines = list(set(extra_lines))
     slurm_dict["extra_lines"] = extra_lines
@@ -162,8 +156,8 @@ def get_slurm_config(
     # Put everything together
     logger.debug(
-        "[get_slurm_config] Now create a SlurmConfig object based "
-        f"on {slurm_dict=}"
+        "[get_slurm_config] Now create a SlurmConfig object based on "
+        f"{slurm_dict=}"
     )
     slurm_config = SlurmConfig(**slurm_dict)

fractal_server/app/runner/executors/{slurm/ssh → slurm_ssh}/_executor_wait_thread.py RENAMED Viewed

@@ -4,8 +4,8 @@ import time
 import traceback
 from itertools import count
-from ......logger import set_logger
 from fractal_server.app.runner.exceptions import JobExecutionError
+from fractal_server.logger import set_logger
 logger = set_logger(__name__)

fractal_server/app/runner/executors/{slurm/ssh → slurm_ssh}/_slurm_job.py RENAMED Viewed

@@ -2,7 +2,7 @@ import uuid
 from pathlib import Path
 from typing import Optional
-from fractal_server.app.runner.executors.slurm._slurm_config import (
+from fractal_server.app.runner.executors.slurm_common._slurm_config import (
     SlurmConfig,
 )

fractal_server/app/runner/executors/{slurm/ssh → slurm_ssh}/executor.py RENAMED Viewed

@@ -15,22 +15,21 @@ from typing import Sequence
 import cloudpickle
-from ....filenames import SHUTDOWN_FILENAME
-from ....task_files import get_task_file_paths
-from ....task_files import TaskFiles
-from ....versions import get_versions
-from ..._job_states import STATES_FINISHED
-from ...slurm._slurm_config import SlurmConfig
-from .._batching import heuristics
-from ..utils_executors import get_pickle_file_path
-from ..utils_executors import get_slurm_file_path
-from ..utils_executors import get_slurm_script_file_path
+from ...filenames import SHUTDOWN_FILENAME
+from ...task_files import TaskFiles
+from ...versions import get_versions
+from ..slurm_common._batching import heuristics
+from ..slurm_common._job_states import STATES_FINISHED
+from ..slurm_common._slurm_config import SlurmConfig
+from ..slurm_common.utils_executors import get_pickle_file_path
+from ..slurm_common.utils_executors import get_slurm_file_path
+from ..slurm_common.utils_executors import get_slurm_script_file_path
 from ._executor_wait_thread import FractalSlurmSSHWaitThread
 from fractal_server.app.runner.components import _COMPONENT_KEY_
 from fractal_server.app.runner.compress_folder import compress_folder
 from fractal_server.app.runner.exceptions import JobExecutionError
 from fractal_server.app.runner.exceptions import TaskExecutionError
-from fractal_server.app.runner.executors.slurm.ssh._slurm_job import SlurmJob
+from fractal_server.app.runner.executors.slurm_ssh._slurm_job import SlurmJob
 from fractal_server.app.runner.extract_archive import extract_archive
 from fractal_server.config import get_settings
 from fractal_server.logger import set_logger
@@ -533,9 +532,9 @@ class FractalSlurmSSHExecutor(Executor):
                 except AttributeError:
                     actual_component = str(component)
-                _task_file_paths = get_task_file_paths(
-                    workflow_dir_local=task_files.workflow_dir_local,
-                    workflow_dir_remote=task_files.workflow_dir_remote,
+                _task_file_paths = TaskFiles(
+                    root_dir_local=task_files.workflow_dir_local,
+                    root_dir_remote=task_files.workflow_dir_remote,
                     task_name=task_files.task_name,
                     task_order=task_files.task_order,
                     component=actual_component,

fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_check_jobs_status.py RENAMED Viewed

@@ -1,18 +1,20 @@
-from subprocess import run  # nosec
+import subprocess  # nosec
-from ......logger import set_logger
-from ..._job_states import STATES_FINISHED
+from fractal_server.app.runner.executors.slurm_common._job_states import (
+    STATES_FINISHED,
+)
+from fractal_server.logger import set_logger
 logger = set_logger(__name__)
-def run_squeue(job_ids):
-    res = run(  # nosec
+def run_squeue(job_ids: list[str]) -> subprocess.CompletedProcess:
+    res = subprocess.run(  # nosec
         [
             "squeue",
             "--noheader",
-            "--format=%i %T",
+            "--format='%i %T'",
             "--jobs",
             ",".join([str(j) for j in job_ids]),
             "--states=all",
@@ -23,14 +25,14 @@ def run_squeue(job_ids):
     )
     if res.returncode != 0:
         logger.warning(
-            f"squeue command with {job_ids}"
-            f" failed with:\n{res.stderr=}\n{res.stdout=}"
+            f"squeue command with {job_ids} failed with:"
+            f"\n{res.stderr=}\n{res.stdout=}"
         )
     return res
-def _jobs_finished(job_ids) -> set[str]:
+def get_finished_jobs(job_ids: list[str]) -> set[str]:
     """
     Check which ones of the given Slurm jobs already finished

fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_executor_wait_thread.py RENAMED Viewed

@@ -5,9 +5,9 @@ import traceback
 from itertools import count
 from typing import Optional
-from ......logger import set_logger
-from ._check_jobs_status import _jobs_finished
+from ._check_jobs_status import get_finished_jobs
 from fractal_server.app.runner.exceptions import JobExecutionError
+from fractal_server.logger import set_logger
 logger = set_logger(__name__)
@@ -115,7 +115,7 @@ class FractalSlurmSudoWaitThread(threading.Thread):
         self.check_shutdown(i)
         if i % (self.slurm_poll_interval // self.interval) == 0:
             try:
-                finished_jobs = _jobs_finished(self.waiting.values())
+                finished_jobs = get_finished_jobs(self.waiting.values())
             except Exception:
                 # Don't abandon completion checking if jobs_finished errors
                 traceback.print_exc()

fractal_server/app/runner/executors/{slurm/sudo → slurm_sudo}/_subprocess_run_as_user.py RENAMED Viewed

@@ -19,7 +19,7 @@ import shlex
 import subprocess  # nosec
 from typing import Optional
-from ......logger import set_logger
+from fractal_server.logger import set_logger
 from fractal_server.string_tools import validate_cmd
 logger = set_logger(__name__)
@@ -65,10 +65,7 @@ def _run_command_as_user(
     if check and not res.returncode == 0:
         raise RuntimeError(
-            f"{cmd=}\n\n"
-            f"{res.returncode=}\n\n"
-            f"{res.stdout=}\n\n"
-            f"{res.stderr=}\n"
+            f"{cmd=}\n\n{res.returncode=}\n\n{res.stdout=}\n\n{res.stderr=}\n"
         )
     return res
@@ -93,69 +90,6 @@ def _mkdir_as_user(*, folder: str, user: str) -> None:
     _run_command_as_user(cmd=cmd, user=user, check=True)
-def _glob_as_user(
-    *, folder: str, user: str, startswith: Optional[str] = None
-) -> list[str]:
-    """
-    Run `ls` in a folder (as a user) and filter results
-    Execute `ls` on a folder (impersonating a user, if `user` is not `None`)
-    and select results that start with `startswith` (if not `None`).
-    Arguments:
-        folder: Absolute path to the folder
-        user: If not `None`, the user to be impersonated via `sudo -u`
-        startswith: If not `None`, this is used to filter output of `ls`.
-    """
-    res = _run_command_as_user(cmd=f"ls {folder}", user=user, check=True)
-    output = res.stdout.split()
-    if startswith:
-        output = [f for f in output if f.startswith(startswith)]
-    return output
-def _glob_as_user_strict(
-    *,
-    folder: str,
-    user: str,
-    startswith: str,
-) -> list[str]:
-    """
-    Run `ls` in a folder (as a user) and filter results
-    Execute `ls` on a folder (impersonating a user, if `user` is not `None`)
-    and select results that comply with a set of rules. They all start with
-    `startswith` (if not `None`), and they match one of the known filename
-    patterns. See details in
-    https://github.com/fractal-analytics-platform/fractal-server/issues/1240
-    Arguments:
-        folder: Absolute path to the folder
-        user: If not `None`, the user to be impersonated via `sudo -u`
-        startswith: If not `None`, this is used to filter output of `ls`.
-    """
-    res = _run_command_as_user(cmd=f"ls {folder}", user=user, check=True)
-    output = res.stdout.split()
-    new_output = []
-    known_filenames = [
-        f"{startswith}{suffix}"
-        for suffix in [".args.json", ".metadiff.json", ".err", ".out", ".log"]
-    ]
-    for filename in output:
-        if filename in known_filenames:
-            new_output.append(filename)
-        elif filename.startswith(f"{startswith}_out_") and filename.endswith(
-            ".pickle"
-        ):
-            new_output.append(filename)
-    return new_output
 def _path_exists_as_user(*, path: str, user: Optional[str] = None) -> bool:
     """
     Impersonate a user and check if `path` exists via `ls`

fractal-server 2.13.0__py3-none-any.whl → 2.14.0a0__py3-none-any.whl

fractal-server 2.13.0py3-none-any.whl → 2.14.0a0py3-none-any.whl