PyPI - executorlib - Versions diffs - 1.5.3__tar.gz → 1.6.0__tar.gz - Mend

executorlib 1.5.3tar.gz → 1.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{executorlib-1.5.3 → executorlib-1.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: executorlib
-Version: 1.5.3
+Version: 1.6.0
 Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
 Project-URL: Homepage, https://github.com/pyiron/executorlib
 Project-URL: Documentation, https://executorlib.readthedocs.io
@@ -41,13 +41,12 @@ Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: BSD License
 Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Physics
-Requires-Python: <3.14,>=3.9
+Requires-Python: <3.14,>3.9
 Requires-Dist: cloudpickle<=3.1.1,>=2.0.0
 Requires-Dist: pyzmq<=27.0.0,>=25.0.0
 Provides-Extra: all

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/__init__.py RENAMED Viewed

@@ -36,4 +36,11 @@ __all__: list[str] = [
     "SlurmClusterExecutor",
 ]
+try:
+    from executorlib.task_scheduler.file.queue_spawner import terminate_tasks_in_cache
+    __all__ += ["terminate_tasks_in_cache"]
+except ImportError:
+    pass
 __version__ = _version.__version__

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/_version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.5.3'
-__version_tuple__ = version_tuple = (1, 5, 3)
+__version__ = version = '1.6.0'
+__version_tuple__ = version_tuple = (1, 6, 0)

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/backend/cache_parallel.py RENAMED Viewed

@@ -4,6 +4,7 @@ import time
 import cloudpickle
+from executorlib.standalone.error import backend_write_error_file
 from executorlib.task_scheduler.file.backend import (
     backend_load_file,
     backend_write_file,
@@ -53,6 +54,10 @@ def main() -> None:
                 output={"error": error},
                 runtime=time.time() - time_start,
             )
+            backend_write_error_file(
+                error=error,
+                apply_dict=apply_dict,
+            )
     else:
         if mpi_rank_zero:
             backend_write_file(

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/backend/interactive_parallel.py RENAMED Viewed

@@ -6,6 +6,7 @@ from typing import Optional
 import cloudpickle
 import zmq
+from executorlib.standalone.error import backend_write_error_file
 from executorlib.standalone.interactive.backend import call_funct, parse_arguments
 from executorlib.standalone.interactive.communication import (
     interface_connect,
@@ -82,6 +83,10 @@ def main() -> None:
                         socket=socket,
                         result_dict={"error": error},
                     )
+                    backend_write_error_file(
+                        error=error,
+                        apply_dict=input_dict,
+                    )
             else:
                 # Send output
                 if mpi_rank_zero:

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/backend/interactive_serial.py RENAMED Viewed

@@ -2,6 +2,7 @@ import sys
 from os.path import abspath
 from typing import Optional
+from executorlib.standalone.error import backend_write_error_file
 from executorlib.standalone.interactive.backend import call_funct, parse_arguments
 from executorlib.standalone.interactive.communication import (
     interface_connect,
@@ -58,6 +59,10 @@ def main(argument_lst: Optional[list[str]] = None):
                     socket=socket,
                     result_dict={"error": error},
                 )
+                backend_write_error_file(
+                    error=error,
+                    apply_dict=input_dict,
+                )
             else:
                 # Send output
                 interface_send(socket=socket, result_dict={"result": output})

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/executor/flux.py RENAMED Viewed

@@ -41,6 +41,8 @@ class FluxJobExecutor(BaseExecutor):
                                                            Defaults to None.
                               - exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
                                                   compute notes. Defaults to False.
+                              - error_log_file (str): Name of the error log file to use for storing exceptions raised
+                                                      by the Python functions submitted to the Executor.
         flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
         flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
         flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
@@ -126,6 +128,8 @@ class FluxJobExecutor(BaseExecutor):
                                                                Defaults to None.
                                   - exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
                                                       compute notes. Defaults to False.
+                                  - error_log_file (str): Name of the error log file to use for storing exceptions
+                                                          raised by the Python functions submitted to the Executor.
             flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
             flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
             flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
@@ -229,6 +233,8 @@ class FluxClusterExecutor(BaseExecutor):
                               - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
                                                               SLURM only) - default False
                               - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
+                              - error_log_file (str): Name of the error log file to use for storing exceptions raised
+                                                      by the Python functions submitted to the Executor.
         pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
         hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
@@ -308,6 +314,8 @@ class FluxClusterExecutor(BaseExecutor):
                                                                   and SLURM only) - default False
                                   - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
                                                            only)
+                                  - error_log_file (str): Name of the error log file to use for storing exceptions
+                                                          raised by the Python functions submitted to the Executor.
             pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
             hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
@@ -424,6 +432,8 @@ def create_flux_executor(
                                                            Defaults to None.
                               - exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
                                                   compute notes. Defaults to False.
+                              - error_log_file (str): Name of the error log file to use for storing exceptions raised
+                                                      by the Python functions submitted to the Executor.
         flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
         flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
         flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/executor/single.py RENAMED Viewed

@@ -39,6 +39,8 @@ class SingleNodeExecutor(BaseExecutor):
                               - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
                                                               SLURM only) - default False
                               - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
+                              - error_log_file (str): Name of the error log file to use for storing exceptions raised
+                                                      by the Python functions submitted to the Executor.
         hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
                                       Executor running on a different compute node within the same allocation. And
@@ -116,6 +118,8 @@ class SingleNodeExecutor(BaseExecutor):
                                                                   and SLURM only) - default False
                                   - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
                                                            only)
+                                  - error_log_file (str): Name of the error log file to use for storing exceptions
+                                                          raised by the Python functions submitted to the Executor.
             hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
                                       Executor running on a different compute node within the same allocation. And
@@ -202,6 +206,8 @@ class TestClusterExecutor(BaseExecutor):
                               - threads_per_core (int): number of OpenMP threads to be used for each function call
                               - gpus_per_core (int): number of GPUs per worker - defaults to 0
                               - cwd (str/None): current working directory where the parallel python task is executed
+                              - error_log_file (str): Name of the error log file to use for storing exceptions raised
+                                                      by the Python functions submitted to the Executor.
         hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
                                       Executor running on a different compute node within the same allocation. And
@@ -273,6 +279,8 @@ class TestClusterExecutor(BaseExecutor):
                                   - threads_per_core (int): number of OpenMP threads to be used for each function call
                                   - gpus_per_core (int): number of GPUs per worker - defaults to 0
                                   - cwd (str/None): current working directory where the parallel python task is executed
+                                  - error_log_file (str): Name of the error log file to use for storing exceptions
+                                                          raised by the Python functions submitted to the Executor.
             hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
                                       Executor running on a different compute node within the same allocation. And
@@ -381,6 +389,8 @@ def create_single_node_executor(
                                                               and SLURM only) - default False
                               - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
                                                        only)
+                              - error_log_file (str): Name of the error log file to use for storing exceptions raised
+                                                      by the Python functions submitted to the Executor.
         hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                   context of an HPC cluster this essential to be able to communicate to an
                                   Executor running on a different compute node within the same allocation. And

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/executor/slurm.py RENAMED Viewed

@@ -41,6 +41,8 @@ class SlurmClusterExecutor(BaseExecutor):
                               - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
                                                               SLURM only) - default False
                               - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
+                              - error_log_file (str): Name of the error log file to use for storing exceptions raised
+                                                      by the Python functions submitted to the Executor.
         pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
         hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
@@ -120,6 +122,8 @@ class SlurmClusterExecutor(BaseExecutor):
                                                                   and SLURM only) - default False
                                   - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
                                                            only)
+                                  - error_log_file (str): Name of the error log file to use for storing exceptions
+                                                          raised by the Python functions submitted to the Executor.
             pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
             hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
@@ -226,6 +230,8 @@ class SlurmJobExecutor(BaseExecutor):
                                                            Defaults to None.
                               - exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
                                                   compute notes. Defaults to False.
+                              - error_log_file (str): Name of the error log file to use for storing exceptions raised
+                                                      by the Python functions submitted to the Executor.
         hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
                                       Executor running on a different compute node within the same allocation. And
@@ -307,6 +313,8 @@ class SlurmJobExecutor(BaseExecutor):
                                                            Defaults to None.
                                   - exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
                                                       compute notes. Defaults to False.
+                                  - error_log_file (str): Name of the error log file to use for storing exceptions
+                                                          raised by the Python functions submitted to the Executor.
             hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                       context of an HPC cluster this essential to be able to communicate to an
                                       Executor running on a different compute node within the same allocation. And
@@ -408,6 +416,8 @@ def create_slurm_executor(
                                                            Defaults to None.
                               - exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
                                                   compute notes. Defaults to False.
+                              - error_log_file (str): Name of the error log file to use for storing exceptions raised
+                                                      by the Python functions submitted to the Executor.
         hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
                                   context of an HPC cluster this essential to be able to communicate to an
                                   Executor running on a different compute node within the same allocation. And

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/cache.py RENAMED Viewed

@@ -10,6 +10,7 @@ group_dict = {
     "error": "error",
     "runtime": "runtime",
     "queue_id": "queue_id",
+    "error_log_file": "error_log_file",
 }

executorlib-1.6.0/executorlib/standalone/error.py ADDED Viewed

@@ -0,0 +1,21 @@
+import traceback
+def backend_write_error_file(error: Exception, apply_dict: dict) -> None:
+    """
+    Write an error to a file if specified in the apply_dict.
+    Args:
+        error (Exception): The error to be written.
+        apply_dict (dict): Dictionary containing additional parameters.
+    Returns:
+        None
+    """
+    error_log_file = apply_dict.get("error_log_file")
+    if error_log_file is not None:
+        with open(error_log_file, "a") as f:
+            f.write("function: " + str(apply_dict["fn"]) + "\n")
+            f.write("args: " + str(apply_dict["args"]) + "\n")
+            f.write("kwargs: " + str(apply_dict["kwargs"]) + "\n")
+            traceback.print_exception(error, file=f)

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/backend.py RENAMED Viewed

@@ -2,6 +2,7 @@ import os
 import time
 from typing import Any
+from executorlib.standalone.error import backend_write_error_file
 from executorlib.task_scheduler.file.hdf import dump, load
 from executorlib.task_scheduler.file.shared import FutureItem
@@ -77,6 +78,10 @@ def backend_execute_task_in_file(file_name: str) -> None:
         }
     except Exception as error:
         result = {"error": error}
+        backend_write_error_file(
+            error=error,
+            apply_dict=apply_dict,
+        )
     backend_write_file(
         file_name=file_name,

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/hdf.py RENAMED Viewed

@@ -52,6 +52,10 @@ def load(file_name: str) -> dict:
             data_dict["kwargs"] = cloudpickle.loads(np.void(hdf["/input_kwargs"]))
         else:
             data_dict["kwargs"] = {}
+        if "error_log_file" in hdf:
+            data_dict["error_log_file"] = cloudpickle.loads(
+                np.void(hdf["/error_log_file"])
+            )
         return data_dict

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/queue_spawner.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import contextlib
 import os
 import subprocess
 from typing import Optional, Union
@@ -32,7 +33,7 @@ def execute_with_pysqa(
                                   cwd: None,
                               }
         config_directory (str, optional): path to the config directory.
-        backend (str, optional): name of the backend used to spawn tasks.
+        backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
     Returns:
         int: queuing system ID
@@ -101,7 +102,7 @@ def terminate_with_pysqa(
     Args:
         queue_id (int): Queuing system ID of the job to delete.
         config_directory (str, optional): path to the config directory.
-        backend (str, optional): name of the backend used to spawn tasks.
+        backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
     """
     qa = QueueAdapter(
         directory=config_directory,
@@ -110,7 +111,35 @@ def terminate_with_pysqa(
     )
     status = qa.get_status_of_job(process_id=queue_id)
     if status is not None and status not in ["finished", "error"]:
-        qa.delete_job(process_id=queue_id)
+        with contextlib.suppress(subprocess.CalledProcessError):
+            qa.delete_job(process_id=queue_id)
+def terminate_tasks_in_cache(
+    cache_directory: str,
+    config_directory: Optional[str] = None,
+    backend: Optional[str] = None,
+):
+    """
+    Delete all jobs stored in the cache directory from the queuing system
+    Args:
+        cache_directory (str): The directory to store cache files.
+        config_directory (str, optional): path to the config directory.
+        backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
+    """
+    hdf5_file_lst = []
+    for root, _, files in os.walk(cache_directory):
+        hdf5_file_lst += [os.path.join(root, f) for f in files if f[-5:] == "_i.h5"]
+    for f in hdf5_file_lst:
+        queue_id = get_queue_id(f)
+        if queue_id is not None:
+            terminate_with_pysqa(
+                queue_id=queue_id,
+                config_directory=config_directory,
+                backend=backend,
+            )
 def _pysqa_execute_command(

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/shared.py RENAMED Viewed

@@ -126,6 +126,7 @@ def execute_tasks_h5(
             )
             cache_key = task_resource_dict.pop("cache_key", None)
             cache_directory = os.path.abspath(task_resource_dict.pop("cache_directory"))
+            error_log_file = task_resource_dict.pop("error_log_file", None)
             task_key, data_dict = serialize_funct_h5(
                 fn=task_dict["fn"],
                 fn_args=task_args,
@@ -133,6 +134,7 @@ def execute_tasks_h5(
                 resource_dict=task_resource_dict,
                 cache_key=cache_key,
             )
+            data_dict["error_log_file"] = error_log_file
             if task_key not in memory_dict:
                 if os.path.join(
                     cache_directory, task_key + "_o.h5"

{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/shared.py RENAMED Viewed

@@ -26,6 +26,7 @@ def execute_tasks(
     cache_key: Optional[str] = None,
     queue_join_on_shutdown: bool = True,
     log_obj_size: bool = False,
+    error_log_file: Optional[str] = None,
     **kwargs,
 ) -> None:
     """
@@ -70,6 +71,8 @@ def execute_tasks(
                 future_queue.join()
             break
         elif "fn" in task_dict and "future" in task_dict:
+            if error_log_file is not None:
+                task_dict["error_log_file"] = error_log_file
             if cache_directory is None:
                 _execute_task_without_cache(
                     interface=interface, task_dict=task_dict, future_queue=future_queue

{executorlib-1.5.3 → executorlib-1.6.0}/pyproject.toml RENAMED Viewed

@@ -16,14 +16,13 @@ authors = [
 readme = "README.md"
 license = { file = "LICENSE" }
 keywords = ["high performance computing", "hpc", "task scheduler", "slurm", "flux-framework", "executor"]
-requires-python = ">=3.9, <3.14"
+requires-python = ">3.9, <3.14"
 classifiers = [
     "Development Status :: 5 - Production/Stable",
     "Topic :: Scientific/Engineering :: Physics",
     "License :: OSI Approved :: BSD License",
     "Intended Audience :: Science/Research",
     "Operating System :: OS Independent",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",