PyPI - executorlib - Versions diffs - 1.5.2__tar.gz → 1.5.3__tar.gz - Mend

executorlib 1.5.2tar.gz → 1.5.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{executorlib-1.5.2 → executorlib-1.5.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: executorlib
-Version: 1.5.2
+Version: 1.5.3
 Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
 Project-URL: Homepage, https://github.com/pyiron/executorlib
 Project-URL: Documentation, https://executorlib.readthedocs.io

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/_version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.5.2'
-__version_tuple__ = version_tuple = (1, 5, 2)
+__version__ = version = '1.5.3'
+__version_tuple__ = version_tuple = (1, 5, 3)

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/api.py RENAMED Viewed

@@ -5,6 +5,7 @@ only use the functionality in this API in combination with the user interface de
 functionality is considered internal and might change during minor releases.
 """
+from executorlib.executor.single import TestClusterExecutor
 from executorlib.standalone.command import get_command_path
 from executorlib.standalone.interactive.communication import (
     SocketInterface,
@@ -19,6 +20,7 @@ from executorlib.standalone.queue import cancel_items_in_queue
 from executorlib.standalone.serialize import cloudpickle_register
 __all__: list[str] = [
+    "TestClusterExecutor",
     "cancel_items_in_queue",
     "cloudpickle_register",
     "get_command_path",

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/executor/flux.py RENAMED Viewed

@@ -4,6 +4,7 @@ from executorlib.executor.base import BaseExecutor
 from executorlib.standalone.inputcheck import (
     check_command_line_argument_lst,
     check_init_function,
+    check_log_obj_size,
     check_oversubscribe,
     check_plot_dependency_graph,
     check_pmi,
@@ -246,6 +247,7 @@ class FluxClusterExecutor(BaseExecutor):
         plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
                                       debugging purposes and to get an overview of the specified dependencies.
         plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
+        log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
     Examples:
         ```
@@ -282,6 +284,7 @@ class FluxClusterExecutor(BaseExecutor):
         refresh_rate: float = 0.01,
         plot_dependency_graph: bool = False,
         plot_dependency_graph_filename: Optional[str] = None,
+        log_obj_size: bool = False,
     ):
         """
         The executorlib.FluxClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -323,6 +326,7 @@ class FluxClusterExecutor(BaseExecutor):
             plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
                                           debugging purposes and to get an overview of the specified dependencies.
             plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
+            log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
         """
         default_resource_dict: dict = {
@@ -338,6 +342,7 @@ class FluxClusterExecutor(BaseExecutor):
         resource_dict.update(
             {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
         )
+        check_log_obj_size(log_obj_size=log_obj_size)
         if not plot_dependency_graph:
             import pysqa  # noqa
@@ -348,7 +353,7 @@ class FluxClusterExecutor(BaseExecutor):
             super().__init__(
                 executor=create_file_executor(
                     max_workers=max_workers,
-                    backend="flux_submission",
+                    backend="flux",
                     max_cores=max_cores,
                     cache_directory=cache_directory,
                     resource_dict=resource_dict,

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/executor/single.py RENAMED Viewed

@@ -56,6 +56,7 @@ class SingleNodeExecutor(BaseExecutor):
         plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
                                       debugging purposes and to get an overview of the specified dependencies.
         plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
+        log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
     Examples:
         ```
@@ -184,6 +185,174 @@ class SingleNodeExecutor(BaseExecutor):
             )
+class TestClusterExecutor(BaseExecutor):
+    """
+    The executorlib.api.TestClusterExecutor is designed to test the file based communication used in the
+    SlurmClusterExecutor and the FluxClusterExecutor locally. It is not recommended for production use, rather use the
+    SingleNodeExecutor.
+    Args:
+        max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
+                           cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
+                           recommended, as computers have a limited number of compute cores.
+        cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache".
+        max_cores (int): defines the number cores which can be used in parallel
+        resource_dict (dict): A dictionary of resources required by the task. With the following keys:
+                              - cores (int): number of MPI cores to be used for each function call
+                              - threads_per_core (int): number of OpenMP threads to be used for each function call
+                              - gpus_per_core (int): number of GPUs per worker - defaults to 0
+                              - cwd (str/None): current working directory where the parallel python task is executed
+        hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
+                                      context of an HPC cluster this essential to be able to communicate to an
+                                      Executor running on a different compute node within the same allocation. And
+                                      in principle any computer should be able to resolve that their own hostname
+                                      points to the same address as localhost. Still MacOS >= 12 seems to disable
+                                      this look up for security reasons. So on MacOS it is required to set this
+                                      option to true
+        block_allocation (boolean): To accelerate the submission of a series of python functions with the same resource
+                                    requirements, executorlib supports block allocation. In this case all resources have
+                                    to be defined on the executor, rather than during the submission of the individual
+                                    function.
+        init_function (None): optional function to preset arguments for functions which are submitted later
+        disable_dependencies (boolean): Disable resolving future objects during the submission.
+        refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
+        plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
+                                      debugging purposes and to get an overview of the specified dependencies.
+        plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
+        log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+    Examples:
+        ```
+        >>> import numpy as np
+        >>> from executorlib.api import TestClusterExecutor
+        >>>
+        >>> def calc(i, j, k):
+        >>>     from mpi4py import MPI
+        >>>     size = MPI.COMM_WORLD.Get_size()
+        >>>     rank = MPI.COMM_WORLD.Get_rank()
+        >>>     return np.array([i, j, k]), size, rank
+        >>>
+        >>> def init_k():
+        >>>     return {"k": 3}
+        >>>
+        >>> with TestClusterExecutor(max_workers=2, init_function=init_k) as p:
+        >>>     fs = p.submit(calc, 2, j=4)
+        >>>     print(fs.result())
+        [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)]
+        ```
+    """
+    def __init__(
+        self,
+        max_workers: Optional[int] = None,
+        cache_directory: Optional[str] = None,
+        max_cores: Optional[int] = None,
+        resource_dict: Optional[dict] = None,
+        hostname_localhost: Optional[bool] = None,
+        block_allocation: bool = False,
+        init_function: Optional[Callable] = None,
+        disable_dependencies: bool = False,
+        refresh_rate: float = 0.01,
+        plot_dependency_graph: bool = False,
+        plot_dependency_graph_filename: Optional[str] = None,
+        log_obj_size: bool = False,
+    ):
+        """
+        The executorlib.api.TestClusterExecutor is designed to test the file based communication used in the
+        SlurmClusterExecutor and the FluxClusterExecutor locally. It is not recommended for production use, rather use
+        the SingleNodeExecutor.
+        Args:
+            max_workers (int): for backwards compatibility with the standard library, max_workers also defines the
+                               number of cores which can be used in parallel - just like the max_cores parameter. Using
+                               max_cores is recommended, as computers have a limited number of compute cores.
+            cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache".
+            max_cores (int): defines the number cores which can be used in parallel
+            resource_dict (dict): A dictionary of resources required by the task. With the following keys:
+                                  - cores (int): number of MPI cores to be used for each function call
+                                  - threads_per_core (int): number of OpenMP threads to be used for each function call
+                                  - gpus_per_core (int): number of GPUs per worker - defaults to 0
+                                  - cwd (str/None): current working directory where the parallel python task is executed
+            hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
+                                      context of an HPC cluster this essential to be able to communicate to an
+                                      Executor running on a different compute node within the same allocation. And
+                                      in principle any computer should be able to resolve that their own hostname
+                                      points to the same address as localhost. Still MacOS >= 12 seems to disable
+                                      this look up for security reasons. So on MacOS it is required to set this
+                                      option to true
+            block_allocation (boolean): To accelerate the submission of a series of python functions with the same
+                                        resource requirements, executorlib supports block allocation. In this case all
+                                        resources have to be defined on the executor, rather than during the submission
+                                        of the individual function.
+            init_function (None): optional function to preset arguments for functions which are submitted later
+            disable_dependencies (boolean): Disable resolving future objects during the submission.
+            refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
+            plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
+                                          debugging purposes and to get an overview of the specified dependencies.
+            plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
+            log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
+        """
+        default_resource_dict: dict = {
+            "cores": 1,
+            "threads_per_core": 1,
+            "gpus_per_core": 0,
+            "cwd": None,
+            "openmpi_oversubscribe": False,
+        }
+        if resource_dict is None:
+            resource_dict = {}
+        resource_dict.update(
+            {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
+        )
+        if not plot_dependency_graph:
+            from executorlib.task_scheduler.file.subprocess_spawner import (
+                execute_in_subprocess,
+            )
+            from executorlib.task_scheduler.file.task_scheduler import (
+                create_file_executor,
+            )
+            super().__init__(
+                executor=create_file_executor(
+                    max_workers=max_workers,
+                    backend=None,
+                    max_cores=max_cores,
+                    cache_directory=cache_directory,
+                    resource_dict=resource_dict,
+                    flux_executor=None,
+                    flux_executor_pmi_mode=None,
+                    flux_executor_nesting=False,
+                    flux_log_files=False,
+                    pysqa_config_directory=None,
+                    hostname_localhost=hostname_localhost,
+                    block_allocation=block_allocation,
+                    init_function=init_function,
+                    disable_dependencies=disable_dependencies,
+                    execute_function=execute_in_subprocess,
+                )
+            )
+        else:
+            super().__init__(
+                executor=DependencyTaskScheduler(
+                    executor=create_single_node_executor(
+                        max_workers=max_workers,
+                        cache_directory=cache_directory,
+                        max_cores=max_cores,
+                        resource_dict=resource_dict,
+                        hostname_localhost=hostname_localhost,
+                        block_allocation=block_allocation,
+                        init_function=init_function,
+                        log_obj_size=log_obj_size,
+                    ),
+                    max_cores=max_cores,
+                    refresh_rate=refresh_rate,
+                    plot_dependency_graph=plot_dependency_graph,
+                    plot_dependency_graph_filename=plot_dependency_graph_filename,
+                )
+            )
 def create_single_node_executor(
     max_workers: Optional[int] = None,
     max_cores: Optional[int] = None,

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/executor/slurm.py RENAMED Viewed

@@ -3,6 +3,7 @@ from typing import Callable, Optional, Union
 from executorlib.executor.base import BaseExecutor
 from executorlib.standalone.inputcheck import (
     check_init_function,
+    check_log_obj_size,
     check_plot_dependency_graph,
     check_refresh_rate,
     validate_number_of_cores,
@@ -58,6 +59,7 @@ class SlurmClusterExecutor(BaseExecutor):
         plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
                                       debugging purposes and to get an overview of the specified dependencies.
         plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
+        log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
     Examples:
         ```
@@ -94,6 +96,7 @@ class SlurmClusterExecutor(BaseExecutor):
         refresh_rate: float = 0.01,
         plot_dependency_graph: bool = False,
         plot_dependency_graph_filename: Optional[str] = None,
+        log_obj_size: bool = False,
     ):
         """
         The executorlib.SlurmClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -135,6 +138,7 @@ class SlurmClusterExecutor(BaseExecutor):
             plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
                                           debugging purposes and to get an overview of the specified dependencies.
             plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
+            log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
         """
         default_resource_dict: dict = {
@@ -150,6 +154,7 @@ class SlurmClusterExecutor(BaseExecutor):
         resource_dict.update(
             {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
         )
+        check_log_obj_size(log_obj_size=log_obj_size)
         if not plot_dependency_graph:
             import pysqa  # noqa
@@ -160,7 +165,7 @@ class SlurmClusterExecutor(BaseExecutor):
             super().__init__(
                 executor=create_file_executor(
                     max_workers=max_workers,
-                    backend="slurm_submission",
+                    backend="slurm",
                     max_cores=max_cores,
                     cache_directory=cache_directory,
                     resource_dict=resource_dict,

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/inputcheck.py RENAMED Viewed

@@ -194,7 +194,21 @@ def validate_number_of_cores(
 def check_file_exists(file_name: Optional[str]):
+    """
+    Check if file exists and raise a ValueError if it does not or file_name is None.
+    """
     if file_name is None:
         raise ValueError("file_name is not set.")
     if not os.path.exists(file_name):
         raise ValueError("file_name is not written to the file system.")
+def check_log_obj_size(log_obj_size: bool) -> None:
+    """
+    Check if log_obj_size is True and raise a ValueError if it is.
+    """
+    if log_obj_size:
+        raise ValueError(
+            "log_obj_size is not supported for the executorlib.SlurmClusterExecutor and executorlib.FluxClusterExecutor."
+            "Please use log_obj_size=False instead of log_obj_size=True."
+        )

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/hdf.py RENAMED Viewed

@@ -101,7 +101,7 @@ def get_queue_id(file_name: Optional[str]) -> Optional[int]:
     Returns:
         int: queuing system id from the execution of the python function
     """
-    if file_name is not None:
+    if file_name is not None and os.path.exists(file_name):
         with h5py.File(file_name, "r") as hdf:
             if "queue_id" in hdf:
                 return cloudpickle.loads(np.void(hdf["/queue_id"]))

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/queue_spawner.py RENAMED Viewed

@@ -10,9 +10,10 @@ from executorlib.task_scheduler.file.hdf import dump, get_queue_id
 def execute_with_pysqa(
     command: list,
+    file_name: str,
+    data_dict: dict,
     cache_directory: str,
     task_dependent_lst: Optional[list[int]] = None,
-    file_name: Optional[str] = None,
     resource_dict: Optional[dict] = None,
     config_directory: Optional[str] = None,
     backend: Optional[str] = None,
@@ -22,9 +23,10 @@ def execute_with_pysqa(
     Args:
         command (list): The command to be executed.
+        file_name (str): Name of the HDF5 file which contains the Python function
+        data_dict (dict): dictionary containing the python function to be executed {"fn": ..., "args": (), "kwargs": {}}
         cache_directory (str): The directory to store the HDF5 files.
         task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to [].
-        file_name (str): Name of the HDF5 file which contains the Python function
         resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function.
                               Example resource dictionary: {
                                   cwd: None,
@@ -37,13 +39,20 @@ def execute_with_pysqa(
     """
     if task_dependent_lst is None:
         task_dependent_lst = []
-    check_file_exists(file_name=file_name)
-    queue_id = get_queue_id(file_name=file_name)
     qa = QueueAdapter(
         directory=config_directory,
         queue_type=backend,
         execute_command=_pysqa_execute_command,
     )
+    queue_id = get_queue_id(file_name=file_name)
+    if os.path.exists(file_name) and (
+        queue_id is None or qa.get_status_of_job(process_id=queue_id) is None
+    ):
+        os.remove(file_name)
+        dump(file_name=file_name, data_dict=data_dict)
+    elif not os.path.exists(file_name):
+        dump(file_name=file_name, data_dict=data_dict)
+    check_file_exists(file_name=file_name)
     if queue_id is None or qa.get_status_of_job(process_id=queue_id) is None:
         if resource_dict is None:
             resource_dict = {}
@@ -81,6 +90,29 @@ def execute_with_pysqa(
     return queue_id
+def terminate_with_pysqa(
+    queue_id: int,
+    config_directory: Optional[str] = None,
+    backend: Optional[str] = None,
+):
+    """
+    Delete job from queuing system
+    Args:
+        queue_id (int): Queuing system ID of the job to delete.
+        config_directory (str, optional): path to the config directory.
+        backend (str, optional): name of the backend used to spawn tasks.
+    """
+    qa = QueueAdapter(
+        directory=config_directory,
+        queue_type=backend,
+        execute_command=_pysqa_execute_command,
+    )
+    status = qa.get_status_of_job(process_id=queue_id)
+    if status is not None and status not in ["finished", "error"]:
+        qa.delete_job(process_id=queue_id)
 def _pysqa_execute_command(
     commands: str,
     working_directory: Optional[str] = None,

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/shared.py RENAMED Viewed

@@ -9,7 +9,8 @@ from typing import Any, Callable, Optional
 from executorlib.standalone.cache import get_cache_files
 from executorlib.standalone.command import get_command_path
 from executorlib.standalone.serialize import serialize_funct_h5
-from executorlib.task_scheduler.file.hdf import dump, get_output
+from executorlib.task_scheduler.file.hdf import get_output
+from executorlib.task_scheduler.file.subprocess_spawner import terminate_subprocess
 class FutureItem:
@@ -86,9 +87,30 @@ def execute_tasks_h5(
         with contextlib.suppress(queue.Empty):
             task_dict = future_queue.get_nowait()
         if task_dict is not None and "shutdown" in task_dict and task_dict["shutdown"]:
-            if terminate_function is not None:
+            if task_dict["wait"]:
+                while len(memory_dict) > 0:
+                    memory_dict = {
+                        key: _check_task_output(
+                            task_key=key,
+                            future_obj=value,
+                            cache_directory=cache_dir_dict[key],
+                        )
+                        for key, value in memory_dict.items()
+                        if not value.done()
+                    }
+            if (
+                terminate_function is not None
+                and terminate_function == terminate_subprocess
+            ):
                 for task in process_dict.values():
                     terminate_function(task=task)
+            elif terminate_function is not None:
+                for queue_id in process_dict.values():
+                    terminate_function(
+                        queue_id=queue_id,
+                        config_directory=pysqa_config_directory,
+                        backend=backend,
+                    )
             future_queue.task_done()
             future_queue.join()
             break
@@ -116,9 +138,6 @@ def execute_tasks_h5(
                     cache_directory, task_key + "_o.h5"
                 ) not in get_cache_files(cache_directory=cache_directory):
                     file_name = os.path.join(cache_directory, task_key + "_i.h5")
-                    if os.path.exists(file_name):
-                        os.remove(file_name)
-                    dump(file_name=file_name, data_dict=data_dict)
                     if not disable_dependencies:
                         task_dependent_lst = [
                             process_dict[k] for k in future_wait_key_lst
@@ -137,6 +156,7 @@ def execute_tasks_h5(
                             cores=task_resource_dict["cores"],
                         ),
                         file_name=file_name,
+                        data_dict=data_dict,
                         task_dependent_lst=task_dependent_lst,
                         resource_dict=task_resource_dict,
                         config_directory=pysqa_config_directory,

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/subprocess_spawner.py RENAMED Viewed

@@ -1,33 +1,37 @@
+import os
 import subprocess
 import time
 from typing import Optional
 from executorlib.standalone.inputcheck import check_file_exists
+from executorlib.task_scheduler.file.hdf import dump
 def execute_in_subprocess(
     command: list,
+    file_name: str,
+    data_dict: dict,
+    cache_directory: Optional[str] = None,
     task_dependent_lst: Optional[list] = None,
-    file_name: Optional[str] = None,
     resource_dict: Optional[dict] = None,
     config_directory: Optional[str] = None,
     backend: Optional[str] = None,
-    cache_directory: Optional[str] = None,
 ) -> subprocess.Popen:
     """
     Execute a command in a subprocess.
     Args:
         command (list): The command to be executed.
-        task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to [].
         file_name (str): Name of the HDF5 file which contains the Python function
+        data_dict (dict): dictionary containing the python function to be executed {"fn": ..., "args": (), "kwargs": {}}
+        cache_directory (str): The directory to store the HDF5 files.
+        task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to [].
         resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function.
                               Example resource dictionary: {
                                   cwd: None,
                               }
         config_directory (str, optional): path to the config directory.
         backend (str, optional): name of the backend used to spawn tasks.
-        cache_directory (str): The directory to store the HDF5 files.
     Returns:
         subprocess.Popen: The subprocess object.
@@ -35,6 +39,9 @@ def execute_in_subprocess(
     """
     if task_dependent_lst is None:
         task_dependent_lst = []
+    if os.path.exists(file_name):
+        os.remove(file_name)
+    dump(file_name=file_name, data_dict=data_dict)
     check_file_exists(file_name=file_name)
     while len(task_dependent_lst) > 0:
         task_dependent_lst = [

{executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/task_scheduler.py RENAMED Viewed

@@ -17,10 +17,14 @@ from executorlib.task_scheduler.file.subprocess_spawner import (
 )
 try:
-    from executorlib.task_scheduler.file.queue_spawner import execute_with_pysqa
+    from executorlib.task_scheduler.file.queue_spawner import (
+        execute_with_pysqa,
+        terminate_with_pysqa,
+    )
 except ImportError:
     # If pysqa is not available fall back to executing tasks in a subprocess
     execute_with_pysqa = execute_in_subprocess  # type: ignore
+    terminate_with_pysqa = None  # type: ignore
 class FileTaskScheduler(TaskSchedulerBase):
@@ -58,8 +62,6 @@ class FileTaskScheduler(TaskSchedulerBase):
         resource_dict.update(
             {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
         )
-        if execute_function == execute_in_subprocess and terminate_function is None:
-            terminate_function = terminate_subprocess
         self._process_kwargs = {
             "resource_dict": resource_dict,
             "future_queue": self._future_queue,
@@ -80,7 +82,7 @@ class FileTaskScheduler(TaskSchedulerBase):
 def create_file_executor(
     resource_dict: dict,
     max_workers: Optional[int] = None,
-    backend: str = "flux_submission",
+    backend: Optional[str] = None,
     max_cores: Optional[int] = None,
     cache_directory: Optional[str] = None,
     flux_executor=None,
@@ -92,6 +94,7 @@ def create_file_executor(
     block_allocation: bool = False,
     init_function: Optional[Callable] = None,
     disable_dependencies: bool = False,
+    execute_function: Callable = execute_with_pysqa,
 ):
     if block_allocation:
         raise ValueError(
@@ -109,9 +112,15 @@ def create_file_executor(
     check_executor(executor=flux_executor)
     check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
     check_flux_log_files(flux_log_files=flux_log_files)
+    if execute_function != execute_in_subprocess:
+        terminate_function = terminate_with_pysqa  # type: ignore
+    else:
+        terminate_function = terminate_subprocess  # type: ignore
     return FileTaskScheduler(
         resource_dict=resource_dict,
         pysqa_config_directory=pysqa_config_directory,
-        backend=backend.split("_submission")[0],
+        backend=backend,
         disable_dependencies=disable_dependencies,
+        execute_function=execute_function,
+        terminate_function=terminate_function,
     )