executorlib 0.0.8__tar.gz → 0.0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {executorlib-0.0.8/executorlib.egg-info → executorlib-0.0.10}/PKG-INFO +2 -2
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/__init__.py +15 -12
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/_version.py +3 -3
- executorlib-0.0.10/executorlib/interactive/create.py +287 -0
- executorlib-0.0.10/executorlib/interactive/executor.py +132 -0
- {executorlib-0.0.8 → executorlib-0.0.10/executorlib.egg-info}/PKG-INFO +2 -2
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib.egg-info/SOURCES.txt +1 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib.egg-info/requires.txt +1 -1
- {executorlib-0.0.8 → executorlib-0.0.10}/pyproject.toml +1 -1
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_dependencies_executor.py +1 -1
- executorlib-0.0.8/executorlib/interactive/executor.py +0 -329
- {executorlib-0.0.8 → executorlib-0.0.10}/LICENSE +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/MANIFEST.in +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/README.md +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/__init__.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/cache_parallel.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/cache_serial.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/interactive_parallel.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/interactive_serial.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/base/__init__.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/base/executor.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/__init__.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/backend.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/executor.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/queue_spawner.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/shared.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/subprocess_spawner.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/interactive/__init__.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/interactive/flux.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/interactive/shared.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/interactive/slurm.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/__init__.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/command.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/hdf.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/inputcheck.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/interactive/__init__.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/interactive/backend.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/interactive/communication.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/interactive/spawner.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/plot.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/queue.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/serialize.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/thread.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib.egg-info/dependency_links.txt +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/executorlib.egg-info/top_level.txt +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/setup.cfg +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/setup.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_backend_serial.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_executor_interactive.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_executor_mpi.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_executor_pysqa_flux.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_executor_serial.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_hdf.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_shared.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_executor_backend_flux.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_executor_backend_mpi.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_executor_backend_mpi_noblock.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_flux_executor.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_integration_pyiron_workflow.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_local_executor.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_local_executor_future.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_pysqa_subprocess.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_backend.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_communication.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_executorbase.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_input_check.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_thread.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shell_executor.py +0 -0
- {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shell_interactive.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: executorlib
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.10
|
|
4
4
|
Summary: Scale serial and MPI-parallel python functions over hundreds of compute nodes all from within a jupyter notebook or serial python process.
|
|
5
5
|
Author-email: Jan Janssen <janssen@lanl.gov>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -51,7 +51,7 @@ Requires-Python: <3.14,>=3.9
|
|
|
51
51
|
Description-Content-Type: text/markdown
|
|
52
52
|
License-File: LICENSE
|
|
53
53
|
Requires-Dist: cloudpickle<=3.1.1,>=2.0.0
|
|
54
|
-
Requires-Dist: pyzmq<=26.2.
|
|
54
|
+
Requires-Dist: pyzmq<=26.2.1,>=25.0.0
|
|
55
55
|
Provides-Extra: cache
|
|
56
56
|
Requires-Dist: h5py<=3.12.1,>=3.6.0; extra == "cache"
|
|
57
57
|
Provides-Extra: graph
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from typing import Callable, Optional
|
|
2
2
|
|
|
3
3
|
from executorlib._version import get_versions as _get_versions
|
|
4
|
+
from executorlib.interactive.create import create_executor as _create_executor
|
|
4
5
|
from executorlib.interactive.executor import (
|
|
5
6
|
ExecutorWithDependencies as _ExecutorWithDependencies,
|
|
6
7
|
)
|
|
7
|
-
from executorlib.interactive.executor import create_executor as _create_executor
|
|
8
8
|
from executorlib.standalone.inputcheck import (
|
|
9
9
|
check_plot_dependency_graph as _check_plot_dependency_graph,
|
|
10
10
|
)
|
|
@@ -212,18 +212,21 @@ class Executor:
|
|
|
212
212
|
elif not disable_dependencies:
|
|
213
213
|
_check_pysqa_config_directory(pysqa_config_directory=pysqa_config_directory)
|
|
214
214
|
return _ExecutorWithDependencies(
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
215
|
+
executor=_create_executor(
|
|
216
|
+
max_workers=max_workers,
|
|
217
|
+
backend=backend,
|
|
218
|
+
cache_directory=cache_directory,
|
|
219
|
+
max_cores=max_cores,
|
|
220
|
+
resource_dict=resource_dict,
|
|
221
|
+
flux_executor=flux_executor,
|
|
222
|
+
flux_executor_pmi_mode=flux_executor_pmi_mode,
|
|
223
|
+
flux_executor_nesting=flux_executor_nesting,
|
|
224
|
+
flux_log_files=flux_log_files,
|
|
225
|
+
hostname_localhost=hostname_localhost,
|
|
226
|
+
block_allocation=block_allocation,
|
|
227
|
+
init_function=init_function,
|
|
228
|
+
),
|
|
218
229
|
max_cores=max_cores,
|
|
219
|
-
resource_dict=resource_dict,
|
|
220
|
-
flux_executor=flux_executor,
|
|
221
|
-
flux_executor_pmi_mode=flux_executor_pmi_mode,
|
|
222
|
-
flux_executor_nesting=flux_executor_nesting,
|
|
223
|
-
flux_log_files=flux_log_files,
|
|
224
|
-
hostname_localhost=hostname_localhost,
|
|
225
|
-
block_allocation=block_allocation,
|
|
226
|
-
init_function=init_function,
|
|
227
230
|
refresh_rate=refresh_rate,
|
|
228
231
|
plot_dependency_graph=plot_dependency_graph,
|
|
229
232
|
plot_dependency_graph_filename=plot_dependency_graph_filename,
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-
|
|
11
|
+
"date": "2025-02-01T14:41:16+0100",
|
|
12
12
|
"dirty": true,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.0.
|
|
14
|
+
"full-revisionid": "2a5c109632ab691cd7e4309ca43a29354424b091",
|
|
15
|
+
"version": "0.0.10"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
from typing import Callable, Optional, Union
|
|
2
|
+
|
|
3
|
+
from executorlib.interactive.shared import (
|
|
4
|
+
InteractiveExecutor,
|
|
5
|
+
InteractiveStepExecutor,
|
|
6
|
+
)
|
|
7
|
+
from executorlib.interactive.slurm import SrunSpawner
|
|
8
|
+
from executorlib.interactive.slurm import (
|
|
9
|
+
validate_max_workers as validate_max_workers_slurm,
|
|
10
|
+
)
|
|
11
|
+
from executorlib.standalone.inputcheck import (
|
|
12
|
+
check_command_line_argument_lst,
|
|
13
|
+
check_executor,
|
|
14
|
+
check_flux_log_files,
|
|
15
|
+
check_gpus_per_worker,
|
|
16
|
+
check_init_function,
|
|
17
|
+
check_nested_flux_executor,
|
|
18
|
+
check_oversubscribe,
|
|
19
|
+
check_pmi,
|
|
20
|
+
validate_number_of_cores,
|
|
21
|
+
)
|
|
22
|
+
from executorlib.standalone.interactive.spawner import MpiExecSpawner
|
|
23
|
+
|
|
24
|
+
try: # The PyFluxExecutor requires flux-base to be installed.
|
|
25
|
+
from executorlib.interactive.flux import FluxPythonSpawner
|
|
26
|
+
from executorlib.interactive.flux import (
|
|
27
|
+
validate_max_workers as validate_max_workers_flux,
|
|
28
|
+
)
|
|
29
|
+
except ImportError:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def create_executor(
|
|
34
|
+
max_workers: Optional[int] = None,
|
|
35
|
+
backend: str = "local",
|
|
36
|
+
max_cores: Optional[int] = None,
|
|
37
|
+
cache_directory: Optional[str] = None,
|
|
38
|
+
resource_dict: dict = {},
|
|
39
|
+
flux_executor=None,
|
|
40
|
+
flux_executor_pmi_mode: Optional[str] = None,
|
|
41
|
+
flux_executor_nesting: bool = False,
|
|
42
|
+
flux_log_files: bool = False,
|
|
43
|
+
hostname_localhost: Optional[bool] = None,
|
|
44
|
+
block_allocation: bool = False,
|
|
45
|
+
init_function: Optional[Callable] = None,
|
|
46
|
+
) -> Union[InteractiveStepExecutor, InteractiveExecutor]:
|
|
47
|
+
"""
|
|
48
|
+
Instead of returning a executorlib.Executor object this function returns either a executorlib.mpi.PyMPIExecutor,
|
|
49
|
+
executorlib.slurm.PySlurmExecutor or executorlib.flux.PyFluxExecutor depending on which backend is available. The
|
|
50
|
+
executorlib.flux.PyFluxExecutor is the preferred choice while the executorlib.mpi.PyMPIExecutor is primarily used
|
|
51
|
+
for development and testing. The executorlib.flux.PyFluxExecutor requires flux-base from the flux-framework to be
|
|
52
|
+
installed and in addition flux-sched to enable GPU scheduling. Finally, the executorlib.slurm.PySlurmExecutor
|
|
53
|
+
requires the SLURM workload manager to be installed on the system.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
|
|
57
|
+
cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
|
|
58
|
+
recommended, as computers have a limited number of compute cores.
|
|
59
|
+
backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
|
|
60
|
+
max_cores (int): defines the number cores which can be used in parallel
|
|
61
|
+
cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
|
|
62
|
+
resource_dict (dict): A dictionary of resources required by the task. With the following keys:
|
|
63
|
+
- cores (int): number of MPI cores to be used for each function call
|
|
64
|
+
- threads_per_core (int): number of OpenMP threads to be used for each function call
|
|
65
|
+
- gpus_per_core (int): number of GPUs per worker - defaults to 0
|
|
66
|
+
- cwd (str/None): current working directory where the parallel python task is executed
|
|
67
|
+
- openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
|
|
68
|
+
SLURM only) - default False
|
|
69
|
+
- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
|
|
70
|
+
flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
|
|
71
|
+
flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
|
|
72
|
+
flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
|
|
73
|
+
flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
|
|
74
|
+
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
75
|
+
context of an HPC cluster this essential to be able to communicate to an Executor
|
|
76
|
+
running on a different compute node within the same allocation. And in principle
|
|
77
|
+
any computer should be able to resolve that their own hostname points to the same
|
|
78
|
+
address as localhost. Still MacOS >= 12 seems to disable this look up for security
|
|
79
|
+
reasons. So on MacOS it is required to set this option to true
|
|
80
|
+
block_allocation (boolean): To accelerate the submission of a series of python functions with the same
|
|
81
|
+
resource requirements, executorlib supports block allocation. In this case all
|
|
82
|
+
resources have to be defined on the executor, rather than during the submission
|
|
83
|
+
of the individual function.
|
|
84
|
+
init_function (None): optional function to preset arguments for functions which are submitted later
|
|
85
|
+
"""
|
|
86
|
+
if flux_executor is not None and backend != "flux_allocation":
|
|
87
|
+
backend = "flux_allocation"
|
|
88
|
+
if backend == "flux_allocation":
|
|
89
|
+
check_init_function(
|
|
90
|
+
block_allocation=block_allocation, init_function=init_function
|
|
91
|
+
)
|
|
92
|
+
check_pmi(backend=backend, pmi=flux_executor_pmi_mode)
|
|
93
|
+
resource_dict["cache_directory"] = cache_directory
|
|
94
|
+
resource_dict["hostname_localhost"] = hostname_localhost
|
|
95
|
+
check_oversubscribe(
|
|
96
|
+
oversubscribe=resource_dict.get("openmpi_oversubscribe", False)
|
|
97
|
+
)
|
|
98
|
+
check_command_line_argument_lst(
|
|
99
|
+
command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
|
|
100
|
+
)
|
|
101
|
+
return create_flux_allocation_executor(
|
|
102
|
+
max_workers=max_workers,
|
|
103
|
+
max_cores=max_cores,
|
|
104
|
+
cache_directory=cache_directory,
|
|
105
|
+
resource_dict=resource_dict,
|
|
106
|
+
flux_executor=flux_executor,
|
|
107
|
+
flux_executor_pmi_mode=flux_executor_pmi_mode,
|
|
108
|
+
flux_executor_nesting=flux_executor_nesting,
|
|
109
|
+
flux_log_files=flux_log_files,
|
|
110
|
+
hostname_localhost=hostname_localhost,
|
|
111
|
+
block_allocation=block_allocation,
|
|
112
|
+
init_function=init_function,
|
|
113
|
+
)
|
|
114
|
+
elif backend == "slurm_allocation":
|
|
115
|
+
check_pmi(backend=backend, pmi=flux_executor_pmi_mode)
|
|
116
|
+
check_executor(executor=flux_executor)
|
|
117
|
+
check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
|
|
118
|
+
check_flux_log_files(flux_log_files=flux_log_files)
|
|
119
|
+
return create_slurm_allocation_executor(
|
|
120
|
+
max_workers=max_workers,
|
|
121
|
+
max_cores=max_cores,
|
|
122
|
+
cache_directory=cache_directory,
|
|
123
|
+
resource_dict=resource_dict,
|
|
124
|
+
hostname_localhost=hostname_localhost,
|
|
125
|
+
block_allocation=block_allocation,
|
|
126
|
+
init_function=init_function,
|
|
127
|
+
)
|
|
128
|
+
elif backend == "local":
|
|
129
|
+
check_pmi(backend=backend, pmi=flux_executor_pmi_mode)
|
|
130
|
+
check_executor(executor=flux_executor)
|
|
131
|
+
check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
|
|
132
|
+
check_flux_log_files(flux_log_files=flux_log_files)
|
|
133
|
+
return create_local_executor(
|
|
134
|
+
max_workers=max_workers,
|
|
135
|
+
max_cores=max_cores,
|
|
136
|
+
cache_directory=cache_directory,
|
|
137
|
+
resource_dict=resource_dict,
|
|
138
|
+
hostname_localhost=hostname_localhost,
|
|
139
|
+
block_allocation=block_allocation,
|
|
140
|
+
init_function=init_function,
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
raise ValueError(
|
|
144
|
+
"The supported backends are slurm_allocation, slurm_submission, flux_allocation, flux_submission and local."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def create_flux_allocation_executor(
|
|
149
|
+
max_workers: Optional[int] = None,
|
|
150
|
+
max_cores: Optional[int] = None,
|
|
151
|
+
cache_directory: Optional[str] = None,
|
|
152
|
+
resource_dict: dict = {},
|
|
153
|
+
flux_executor=None,
|
|
154
|
+
flux_executor_pmi_mode: Optional[str] = None,
|
|
155
|
+
flux_executor_nesting: bool = False,
|
|
156
|
+
flux_log_files: bool = False,
|
|
157
|
+
hostname_localhost: Optional[bool] = None,
|
|
158
|
+
block_allocation: bool = False,
|
|
159
|
+
init_function: Optional[Callable] = None,
|
|
160
|
+
) -> Union[InteractiveStepExecutor, InteractiveExecutor]:
|
|
161
|
+
check_init_function(block_allocation=block_allocation, init_function=init_function)
|
|
162
|
+
check_pmi(backend="flux_allocation", pmi=flux_executor_pmi_mode)
|
|
163
|
+
cores_per_worker = resource_dict.get("cores", 1)
|
|
164
|
+
resource_dict["cache_directory"] = cache_directory
|
|
165
|
+
resource_dict["hostname_localhost"] = hostname_localhost
|
|
166
|
+
check_oversubscribe(oversubscribe=resource_dict.get("openmpi_oversubscribe", False))
|
|
167
|
+
check_command_line_argument_lst(
|
|
168
|
+
command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
|
|
169
|
+
)
|
|
170
|
+
if "openmpi_oversubscribe" in resource_dict.keys():
|
|
171
|
+
del resource_dict["openmpi_oversubscribe"]
|
|
172
|
+
if "slurm_cmd_args" in resource_dict.keys():
|
|
173
|
+
del resource_dict["slurm_cmd_args"]
|
|
174
|
+
resource_dict["flux_executor"] = flux_executor
|
|
175
|
+
resource_dict["flux_executor_pmi_mode"] = flux_executor_pmi_mode
|
|
176
|
+
resource_dict["flux_executor_nesting"] = flux_executor_nesting
|
|
177
|
+
resource_dict["flux_log_files"] = flux_log_files
|
|
178
|
+
if block_allocation:
|
|
179
|
+
resource_dict["init_function"] = init_function
|
|
180
|
+
max_workers = validate_number_of_cores(
|
|
181
|
+
max_cores=max_cores,
|
|
182
|
+
max_workers=max_workers,
|
|
183
|
+
cores_per_worker=cores_per_worker,
|
|
184
|
+
set_local_cores=False,
|
|
185
|
+
)
|
|
186
|
+
validate_max_workers_flux(
|
|
187
|
+
max_workers=max_workers,
|
|
188
|
+
cores=cores_per_worker,
|
|
189
|
+
threads_per_core=resource_dict.get("threads_per_core", 1),
|
|
190
|
+
)
|
|
191
|
+
return InteractiveExecutor(
|
|
192
|
+
max_workers=max_workers,
|
|
193
|
+
executor_kwargs=resource_dict,
|
|
194
|
+
spawner=FluxPythonSpawner,
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
return InteractiveStepExecutor(
|
|
198
|
+
max_cores=max_cores,
|
|
199
|
+
max_workers=max_workers,
|
|
200
|
+
executor_kwargs=resource_dict,
|
|
201
|
+
spawner=FluxPythonSpawner,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def create_slurm_allocation_executor(
|
|
206
|
+
max_workers: Optional[int] = None,
|
|
207
|
+
max_cores: Optional[int] = None,
|
|
208
|
+
cache_directory: Optional[str] = None,
|
|
209
|
+
resource_dict: dict = {},
|
|
210
|
+
hostname_localhost: Optional[bool] = None,
|
|
211
|
+
block_allocation: bool = False,
|
|
212
|
+
init_function: Optional[Callable] = None,
|
|
213
|
+
) -> Union[InteractiveStepExecutor, InteractiveExecutor]:
|
|
214
|
+
check_init_function(block_allocation=block_allocation, init_function=init_function)
|
|
215
|
+
cores_per_worker = resource_dict.get("cores", 1)
|
|
216
|
+
resource_dict["cache_directory"] = cache_directory
|
|
217
|
+
resource_dict["hostname_localhost"] = hostname_localhost
|
|
218
|
+
if block_allocation:
|
|
219
|
+
resource_dict["init_function"] = init_function
|
|
220
|
+
max_workers = validate_number_of_cores(
|
|
221
|
+
max_cores=max_cores,
|
|
222
|
+
max_workers=max_workers,
|
|
223
|
+
cores_per_worker=cores_per_worker,
|
|
224
|
+
set_local_cores=False,
|
|
225
|
+
)
|
|
226
|
+
validate_max_workers_slurm(
|
|
227
|
+
max_workers=max_workers,
|
|
228
|
+
cores=cores_per_worker,
|
|
229
|
+
threads_per_core=resource_dict.get("threads_per_core", 1),
|
|
230
|
+
)
|
|
231
|
+
return InteractiveExecutor(
|
|
232
|
+
max_workers=max_workers,
|
|
233
|
+
executor_kwargs=resource_dict,
|
|
234
|
+
spawner=SrunSpawner,
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
return InteractiveStepExecutor(
|
|
238
|
+
max_cores=max_cores,
|
|
239
|
+
max_workers=max_workers,
|
|
240
|
+
executor_kwargs=resource_dict,
|
|
241
|
+
spawner=SrunSpawner,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def create_local_executor(
|
|
246
|
+
max_workers: Optional[int] = None,
|
|
247
|
+
max_cores: Optional[int] = None,
|
|
248
|
+
cache_directory: Optional[str] = None,
|
|
249
|
+
resource_dict: dict = {},
|
|
250
|
+
hostname_localhost: Optional[bool] = None,
|
|
251
|
+
block_allocation: bool = False,
|
|
252
|
+
init_function: Optional[Callable] = None,
|
|
253
|
+
) -> Union[InteractiveStepExecutor, InteractiveExecutor]:
|
|
254
|
+
check_init_function(block_allocation=block_allocation, init_function=init_function)
|
|
255
|
+
cores_per_worker = resource_dict.get("cores", 1)
|
|
256
|
+
resource_dict["cache_directory"] = cache_directory
|
|
257
|
+
resource_dict["hostname_localhost"] = hostname_localhost
|
|
258
|
+
|
|
259
|
+
check_gpus_per_worker(gpus_per_worker=resource_dict.get("gpus_per_core", 0))
|
|
260
|
+
check_command_line_argument_lst(
|
|
261
|
+
command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
|
|
262
|
+
)
|
|
263
|
+
if "threads_per_core" in resource_dict.keys():
|
|
264
|
+
del resource_dict["threads_per_core"]
|
|
265
|
+
if "gpus_per_core" in resource_dict.keys():
|
|
266
|
+
del resource_dict["gpus_per_core"]
|
|
267
|
+
if "slurm_cmd_args" in resource_dict.keys():
|
|
268
|
+
del resource_dict["slurm_cmd_args"]
|
|
269
|
+
if block_allocation:
|
|
270
|
+
resource_dict["init_function"] = init_function
|
|
271
|
+
return InteractiveExecutor(
|
|
272
|
+
max_workers=validate_number_of_cores(
|
|
273
|
+
max_cores=max_cores,
|
|
274
|
+
max_workers=max_workers,
|
|
275
|
+
cores_per_worker=cores_per_worker,
|
|
276
|
+
set_local_cores=True,
|
|
277
|
+
),
|
|
278
|
+
executor_kwargs=resource_dict,
|
|
279
|
+
spawner=MpiExecSpawner,
|
|
280
|
+
)
|
|
281
|
+
else:
|
|
282
|
+
return InteractiveStepExecutor(
|
|
283
|
+
max_cores=max_cores,
|
|
284
|
+
max_workers=max_workers,
|
|
285
|
+
executor_kwargs=resource_dict,
|
|
286
|
+
spawner=MpiExecSpawner,
|
|
287
|
+
)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
from concurrent.futures import Future
|
|
2
|
+
from typing import Any, Callable, Dict, Optional
|
|
3
|
+
|
|
4
|
+
from executorlib.base.executor import ExecutorBase
|
|
5
|
+
from executorlib.interactive.shared import execute_tasks_with_dependencies
|
|
6
|
+
from executorlib.standalone.plot import (
|
|
7
|
+
draw,
|
|
8
|
+
generate_nodes_and_edges,
|
|
9
|
+
generate_task_hash,
|
|
10
|
+
)
|
|
11
|
+
from executorlib.standalone.thread import RaisingThread
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ExecutorWithDependencies(ExecutorBase):
|
|
15
|
+
"""
|
|
16
|
+
ExecutorWithDependencies is a class that extends ExecutorBase and provides functionality for executing tasks with
|
|
17
|
+
dependencies.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
refresh_rate (float, optional): The refresh rate for updating the executor queue. Defaults to 0.01.
|
|
21
|
+
plot_dependency_graph (bool, optional): Whether to generate and plot the dependency graph. Defaults to False.
|
|
22
|
+
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
23
|
+
*args: Variable length argument list.
|
|
24
|
+
**kwargs: Arbitrary keyword arguments.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
_future_hash_dict (Dict[str, Future]): A dictionary mapping task hash to future object.
|
|
28
|
+
_task_hash_dict (Dict[str, Dict]): A dictionary mapping task hash to task dictionary.
|
|
29
|
+
_generate_dependency_graph (bool): Whether to generate the dependency graph.
|
|
30
|
+
_generate_dependency_graph (str): Name of the file to store the plotted graph in.
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
executor: ExecutorBase,
|
|
37
|
+
max_cores: Optional[int] = None,
|
|
38
|
+
refresh_rate: float = 0.01,
|
|
39
|
+
plot_dependency_graph: bool = False,
|
|
40
|
+
plot_dependency_graph_filename: Optional[str] = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
super().__init__(max_cores=max_cores)
|
|
43
|
+
self._set_process(
|
|
44
|
+
RaisingThread(
|
|
45
|
+
target=execute_tasks_with_dependencies,
|
|
46
|
+
kwargs={
|
|
47
|
+
# Executor Arguments
|
|
48
|
+
"future_queue": self._future_queue,
|
|
49
|
+
"executor_queue": executor._future_queue,
|
|
50
|
+
"executor": executor,
|
|
51
|
+
"refresh_rate": refresh_rate,
|
|
52
|
+
},
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
self._future_hash_dict: dict = {}
|
|
56
|
+
self._task_hash_dict: dict = {}
|
|
57
|
+
self._plot_dependency_graph_filename = plot_dependency_graph_filename
|
|
58
|
+
if plot_dependency_graph_filename is None:
|
|
59
|
+
self._generate_dependency_graph = plot_dependency_graph
|
|
60
|
+
else:
|
|
61
|
+
self._generate_dependency_graph = True
|
|
62
|
+
|
|
63
|
+
def submit( # type: ignore
|
|
64
|
+
self,
|
|
65
|
+
fn: Callable[..., Any],
|
|
66
|
+
*args: Any,
|
|
67
|
+
resource_dict: Dict[str, Any] = {},
|
|
68
|
+
**kwargs: Any,
|
|
69
|
+
) -> Future:
|
|
70
|
+
"""
|
|
71
|
+
Submits a task to the executor.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
fn (Callable): The function to be executed.
|
|
75
|
+
*args: Variable length argument list.
|
|
76
|
+
resource_dict (dict, optional): A dictionary of resources required by the task. Defaults to {}.
|
|
77
|
+
**kwargs: Arbitrary keyword arguments.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Future: A future object representing the result of the task.
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
if not self._generate_dependency_graph:
|
|
84
|
+
f = super().submit(fn, *args, resource_dict=resource_dict, **kwargs)
|
|
85
|
+
else:
|
|
86
|
+
f = Future()
|
|
87
|
+
f.set_result(None)
|
|
88
|
+
task_dict = {
|
|
89
|
+
"fn": fn,
|
|
90
|
+
"args": args,
|
|
91
|
+
"kwargs": kwargs,
|
|
92
|
+
"future": f,
|
|
93
|
+
"resource_dict": resource_dict,
|
|
94
|
+
}
|
|
95
|
+
task_hash = generate_task_hash(
|
|
96
|
+
task_dict=task_dict,
|
|
97
|
+
future_hash_inverse_dict={
|
|
98
|
+
v: k for k, v in self._future_hash_dict.items()
|
|
99
|
+
},
|
|
100
|
+
)
|
|
101
|
+
self._future_hash_dict[task_hash] = f
|
|
102
|
+
self._task_hash_dict[task_hash] = task_dict
|
|
103
|
+
return f
|
|
104
|
+
|
|
105
|
+
def __exit__(
|
|
106
|
+
self,
|
|
107
|
+
exc_type: Any,
|
|
108
|
+
exc_val: Any,
|
|
109
|
+
exc_tb: Any,
|
|
110
|
+
) -> None:
|
|
111
|
+
"""
|
|
112
|
+
Exit method called when exiting the context manager.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
exc_type: The type of the exception.
|
|
116
|
+
exc_val: The exception instance.
|
|
117
|
+
exc_tb: The traceback object.
|
|
118
|
+
|
|
119
|
+
"""
|
|
120
|
+
super().__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb) # type: ignore
|
|
121
|
+
if self._generate_dependency_graph:
|
|
122
|
+
node_lst, edge_lst = generate_nodes_and_edges(
|
|
123
|
+
task_hash_dict=self._task_hash_dict,
|
|
124
|
+
future_hash_inverse_dict={
|
|
125
|
+
v: k for k, v in self._future_hash_dict.items()
|
|
126
|
+
},
|
|
127
|
+
)
|
|
128
|
+
return draw(
|
|
129
|
+
node_lst=node_lst,
|
|
130
|
+
edge_lst=edge_lst,
|
|
131
|
+
filename=self._plot_dependency_graph_filename,
|
|
132
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: executorlib
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.10
|
|
4
4
|
Summary: Scale serial and MPI-parallel python functions over hundreds of compute nodes all from within a jupyter notebook or serial python process.
|
|
5
5
|
Author-email: Jan Janssen <janssen@lanl.gov>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -51,7 +51,7 @@ Requires-Python: <3.14,>=3.9
|
|
|
51
51
|
Description-Content-Type: text/markdown
|
|
52
52
|
License-File: LICENSE
|
|
53
53
|
Requires-Dist: cloudpickle<=3.1.1,>=2.0.0
|
|
54
|
-
Requires-Dist: pyzmq<=26.2.
|
|
54
|
+
Requires-Dist: pyzmq<=26.2.1,>=25.0.0
|
|
55
55
|
Provides-Extra: cache
|
|
56
56
|
Requires-Dist: h5py<=3.12.1,>=3.6.0; extra == "cache"
|
|
57
57
|
Provides-Extra: graph
|
|
@@ -24,6 +24,7 @@ executorlib/cache/queue_spawner.py
|
|
|
24
24
|
executorlib/cache/shared.py
|
|
25
25
|
executorlib/cache/subprocess_spawner.py
|
|
26
26
|
executorlib/interactive/__init__.py
|
|
27
|
+
executorlib/interactive/create.py
|
|
27
28
|
executorlib/interactive/executor.py
|
|
28
29
|
executorlib/interactive/flux.py
|
|
29
30
|
executorlib/interactive/shared.py
|
|
@@ -5,7 +5,7 @@ from time import sleep
|
|
|
5
5
|
from queue import Queue
|
|
6
6
|
|
|
7
7
|
from executorlib import Executor
|
|
8
|
-
from executorlib.interactive.
|
|
8
|
+
from executorlib.interactive.create import create_executor
|
|
9
9
|
from executorlib.interactive.shared import execute_tasks_with_dependencies
|
|
10
10
|
from executorlib.standalone.plot import generate_nodes_and_edges
|
|
11
11
|
from executorlib.standalone.serialize import cloudpickle_register
|
|
@@ -1,329 +0,0 @@
|
|
|
1
|
-
from concurrent.futures import Future
|
|
2
|
-
from typing import Any, Callable, Dict, Optional
|
|
3
|
-
|
|
4
|
-
from executorlib.base.executor import ExecutorBase
|
|
5
|
-
from executorlib.interactive.shared import (
|
|
6
|
-
InteractiveExecutor,
|
|
7
|
-
InteractiveStepExecutor,
|
|
8
|
-
execute_tasks_with_dependencies,
|
|
9
|
-
)
|
|
10
|
-
from executorlib.interactive.slurm import SrunSpawner
|
|
11
|
-
from executorlib.interactive.slurm import (
|
|
12
|
-
validate_max_workers as validate_max_workers_slurm,
|
|
13
|
-
)
|
|
14
|
-
from executorlib.standalone.inputcheck import (
|
|
15
|
-
check_command_line_argument_lst,
|
|
16
|
-
check_executor,
|
|
17
|
-
check_flux_log_files,
|
|
18
|
-
check_gpus_per_worker,
|
|
19
|
-
check_init_function,
|
|
20
|
-
check_nested_flux_executor,
|
|
21
|
-
check_oversubscribe,
|
|
22
|
-
check_pmi,
|
|
23
|
-
validate_number_of_cores,
|
|
24
|
-
)
|
|
25
|
-
from executorlib.standalone.interactive.spawner import MpiExecSpawner
|
|
26
|
-
from executorlib.standalone.plot import (
|
|
27
|
-
draw,
|
|
28
|
-
generate_nodes_and_edges,
|
|
29
|
-
generate_task_hash,
|
|
30
|
-
)
|
|
31
|
-
from executorlib.standalone.thread import RaisingThread
|
|
32
|
-
|
|
33
|
-
try: # The PyFluxExecutor requires flux-base to be installed.
|
|
34
|
-
from executorlib.interactive.flux import FluxPythonSpawner
|
|
35
|
-
from executorlib.interactive.flux import (
|
|
36
|
-
validate_max_workers as validate_max_workers_flux,
|
|
37
|
-
)
|
|
38
|
-
except ImportError:
|
|
39
|
-
pass
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class ExecutorWithDependencies(ExecutorBase):
|
|
43
|
-
"""
|
|
44
|
-
ExecutorWithDependencies is a class that extends ExecutorBase and provides functionality for executing tasks with
|
|
45
|
-
dependencies.
|
|
46
|
-
|
|
47
|
-
Args:
|
|
48
|
-
refresh_rate (float, optional): The refresh rate for updating the executor queue. Defaults to 0.01.
|
|
49
|
-
plot_dependency_graph (bool, optional): Whether to generate and plot the dependency graph. Defaults to False.
|
|
50
|
-
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
51
|
-
*args: Variable length argument list.
|
|
52
|
-
**kwargs: Arbitrary keyword arguments.
|
|
53
|
-
|
|
54
|
-
Attributes:
|
|
55
|
-
_future_hash_dict (Dict[str, Future]): A dictionary mapping task hash to future object.
|
|
56
|
-
_task_hash_dict (Dict[str, Dict]): A dictionary mapping task hash to task dictionary.
|
|
57
|
-
_generate_dependency_graph (bool): Whether to generate the dependency graph.
|
|
58
|
-
_generate_dependency_graph (str): Name of the file to store the plotted graph in.
|
|
59
|
-
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
def __init__(
|
|
63
|
-
self,
|
|
64
|
-
*args: Any,
|
|
65
|
-
refresh_rate: float = 0.01,
|
|
66
|
-
plot_dependency_graph: bool = False,
|
|
67
|
-
plot_dependency_graph_filename: Optional[str] = None,
|
|
68
|
-
**kwargs: Any,
|
|
69
|
-
) -> None:
|
|
70
|
-
super().__init__(max_cores=kwargs.get("max_cores", None))
|
|
71
|
-
executor = create_executor(*args, **kwargs)
|
|
72
|
-
self._set_process(
|
|
73
|
-
RaisingThread(
|
|
74
|
-
target=execute_tasks_with_dependencies,
|
|
75
|
-
kwargs={
|
|
76
|
-
# Executor Arguments
|
|
77
|
-
"future_queue": self._future_queue,
|
|
78
|
-
"executor_queue": executor._future_queue,
|
|
79
|
-
"executor": executor,
|
|
80
|
-
"refresh_rate": refresh_rate,
|
|
81
|
-
},
|
|
82
|
-
)
|
|
83
|
-
)
|
|
84
|
-
self._future_hash_dict: dict = {}
|
|
85
|
-
self._task_hash_dict: dict = {}
|
|
86
|
-
self._plot_dependency_graph_filename = plot_dependency_graph_filename
|
|
87
|
-
if plot_dependency_graph_filename is None:
|
|
88
|
-
self._generate_dependency_graph = plot_dependency_graph
|
|
89
|
-
else:
|
|
90
|
-
self._generate_dependency_graph = True
|
|
91
|
-
|
|
92
|
-
def submit( # type: ignore
|
|
93
|
-
self,
|
|
94
|
-
fn: Callable[..., Any],
|
|
95
|
-
*args: Any,
|
|
96
|
-
resource_dict: Dict[str, Any] = {},
|
|
97
|
-
**kwargs: Any,
|
|
98
|
-
) -> Future:
|
|
99
|
-
"""
|
|
100
|
-
Submits a task to the executor.
|
|
101
|
-
|
|
102
|
-
Args:
|
|
103
|
-
fn (Callable): The function to be executed.
|
|
104
|
-
*args: Variable length argument list.
|
|
105
|
-
resource_dict (dict, optional): A dictionary of resources required by the task. Defaults to {}.
|
|
106
|
-
**kwargs: Arbitrary keyword arguments.
|
|
107
|
-
|
|
108
|
-
Returns:
|
|
109
|
-
Future: A future object representing the result of the task.
|
|
110
|
-
|
|
111
|
-
"""
|
|
112
|
-
if not self._generate_dependency_graph:
|
|
113
|
-
f = super().submit(fn, *args, resource_dict=resource_dict, **kwargs)
|
|
114
|
-
else:
|
|
115
|
-
f = Future()
|
|
116
|
-
f.set_result(None)
|
|
117
|
-
task_dict = {
|
|
118
|
-
"fn": fn,
|
|
119
|
-
"args": args,
|
|
120
|
-
"kwargs": kwargs,
|
|
121
|
-
"future": f,
|
|
122
|
-
"resource_dict": resource_dict,
|
|
123
|
-
}
|
|
124
|
-
task_hash = generate_task_hash(
|
|
125
|
-
task_dict=task_dict,
|
|
126
|
-
future_hash_inverse_dict={
|
|
127
|
-
v: k for k, v in self._future_hash_dict.items()
|
|
128
|
-
},
|
|
129
|
-
)
|
|
130
|
-
self._future_hash_dict[task_hash] = f
|
|
131
|
-
self._task_hash_dict[task_hash] = task_dict
|
|
132
|
-
return f
|
|
133
|
-
|
|
134
|
-
def __exit__(
|
|
135
|
-
self,
|
|
136
|
-
exc_type: Any,
|
|
137
|
-
exc_val: Any,
|
|
138
|
-
exc_tb: Any,
|
|
139
|
-
) -> None:
|
|
140
|
-
"""
|
|
141
|
-
Exit method called when exiting the context manager.
|
|
142
|
-
|
|
143
|
-
Args:
|
|
144
|
-
exc_type: The type of the exception.
|
|
145
|
-
exc_val: The exception instance.
|
|
146
|
-
exc_tb: The traceback object.
|
|
147
|
-
|
|
148
|
-
"""
|
|
149
|
-
super().__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb) # type: ignore
|
|
150
|
-
if self._generate_dependency_graph:
|
|
151
|
-
node_lst, edge_lst = generate_nodes_and_edges(
|
|
152
|
-
task_hash_dict=self._task_hash_dict,
|
|
153
|
-
future_hash_inverse_dict={
|
|
154
|
-
v: k for k, v in self._future_hash_dict.items()
|
|
155
|
-
},
|
|
156
|
-
)
|
|
157
|
-
return draw(
|
|
158
|
-
node_lst=node_lst,
|
|
159
|
-
edge_lst=edge_lst,
|
|
160
|
-
filename=self._plot_dependency_graph_filename,
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def create_executor(
|
|
165
|
-
max_workers: Optional[int] = None,
|
|
166
|
-
backend: str = "local",
|
|
167
|
-
max_cores: Optional[int] = None,
|
|
168
|
-
cache_directory: Optional[str] = None,
|
|
169
|
-
resource_dict: dict = {},
|
|
170
|
-
flux_executor=None,
|
|
171
|
-
flux_executor_pmi_mode: Optional[str] = None,
|
|
172
|
-
flux_executor_nesting: bool = False,
|
|
173
|
-
flux_log_files: bool = False,
|
|
174
|
-
hostname_localhost: Optional[bool] = None,
|
|
175
|
-
block_allocation: bool = False,
|
|
176
|
-
init_function: Optional[Callable] = None,
|
|
177
|
-
):
|
|
178
|
-
"""
|
|
179
|
-
Instead of returning a executorlib.Executor object this function returns either a executorlib.mpi.PyMPIExecutor,
|
|
180
|
-
executorlib.slurm.PySlurmExecutor or executorlib.flux.PyFluxExecutor depending on which backend is available. The
|
|
181
|
-
executorlib.flux.PyFluxExecutor is the preferred choice while the executorlib.mpi.PyMPIExecutor is primarily used
|
|
182
|
-
for development and testing. The executorlib.flux.PyFluxExecutor requires flux-base from the flux-framework to be
|
|
183
|
-
installed and in addition flux-sched to enable GPU scheduling. Finally, the executorlib.slurm.PySlurmExecutor
|
|
184
|
-
requires the SLURM workload manager to be installed on the system.
|
|
185
|
-
|
|
186
|
-
Args:
|
|
187
|
-
max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
|
|
188
|
-
cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
|
|
189
|
-
recommended, as computers have a limited number of compute cores.
|
|
190
|
-
backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
|
|
191
|
-
max_cores (int): defines the number cores which can be used in parallel
|
|
192
|
-
cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
|
|
193
|
-
resource_dict (dict): A dictionary of resources required by the task. With the following keys:
|
|
194
|
-
- cores (int): number of MPI cores to be used for each function call
|
|
195
|
-
- threads_per_core (int): number of OpenMP threads to be used for each function call
|
|
196
|
-
- gpus_per_core (int): number of GPUs per worker - defaults to 0
|
|
197
|
-
- cwd (str/None): current working directory where the parallel python task is executed
|
|
198
|
-
- openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
|
|
199
|
-
SLURM only) - default False
|
|
200
|
-
- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
|
|
201
|
-
flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
|
|
202
|
-
flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
|
|
203
|
-
flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
|
|
204
|
-
flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
|
|
205
|
-
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
206
|
-
context of an HPC cluster this essential to be able to communicate to an Executor
|
|
207
|
-
running on a different compute node within the same allocation. And in principle
|
|
208
|
-
any computer should be able to resolve that their own hostname points to the same
|
|
209
|
-
address as localhost. Still MacOS >= 12 seems to disable this look up for security
|
|
210
|
-
reasons. So on MacOS it is required to set this option to true
|
|
211
|
-
block_allocation (boolean): To accelerate the submission of a series of python functions with the same
|
|
212
|
-
resource requirements, executorlib supports block allocation. In this case all
|
|
213
|
-
resources have to be defined on the executor, rather than during the submission
|
|
214
|
-
of the individual function.
|
|
215
|
-
init_function (None): optional function to preset arguments for functions which are submitted later
|
|
216
|
-
"""
|
|
217
|
-
check_init_function(block_allocation=block_allocation, init_function=init_function)
|
|
218
|
-
if flux_executor is not None and backend != "flux_allocation":
|
|
219
|
-
backend = "flux_allocation"
|
|
220
|
-
check_pmi(backend=backend, pmi=flux_executor_pmi_mode)
|
|
221
|
-
cores_per_worker = resource_dict.get("cores", 1)
|
|
222
|
-
resource_dict["cache_directory"] = cache_directory
|
|
223
|
-
resource_dict["hostname_localhost"] = hostname_localhost
|
|
224
|
-
if backend == "flux_allocation":
|
|
225
|
-
check_oversubscribe(
|
|
226
|
-
oversubscribe=resource_dict.get("openmpi_oversubscribe", False)
|
|
227
|
-
)
|
|
228
|
-
check_command_line_argument_lst(
|
|
229
|
-
command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
|
|
230
|
-
)
|
|
231
|
-
if "openmpi_oversubscribe" in resource_dict.keys():
|
|
232
|
-
del resource_dict["openmpi_oversubscribe"]
|
|
233
|
-
if "slurm_cmd_args" in resource_dict.keys():
|
|
234
|
-
del resource_dict["slurm_cmd_args"]
|
|
235
|
-
resource_dict["flux_executor"] = flux_executor
|
|
236
|
-
resource_dict["flux_executor_pmi_mode"] = flux_executor_pmi_mode
|
|
237
|
-
resource_dict["flux_executor_nesting"] = flux_executor_nesting
|
|
238
|
-
resource_dict["flux_log_files"] = flux_log_files
|
|
239
|
-
if block_allocation:
|
|
240
|
-
resource_dict["init_function"] = init_function
|
|
241
|
-
max_workers = validate_number_of_cores(
|
|
242
|
-
max_cores=max_cores,
|
|
243
|
-
max_workers=max_workers,
|
|
244
|
-
cores_per_worker=cores_per_worker,
|
|
245
|
-
set_local_cores=False,
|
|
246
|
-
)
|
|
247
|
-
validate_max_workers_flux(
|
|
248
|
-
max_workers=max_workers,
|
|
249
|
-
cores=cores_per_worker,
|
|
250
|
-
threads_per_core=resource_dict.get("threads_per_core", 1),
|
|
251
|
-
)
|
|
252
|
-
return InteractiveExecutor(
|
|
253
|
-
max_workers=max_workers,
|
|
254
|
-
executor_kwargs=resource_dict,
|
|
255
|
-
spawner=FluxPythonSpawner,
|
|
256
|
-
)
|
|
257
|
-
else:
|
|
258
|
-
return InteractiveStepExecutor(
|
|
259
|
-
max_cores=max_cores,
|
|
260
|
-
max_workers=max_workers,
|
|
261
|
-
executor_kwargs=resource_dict,
|
|
262
|
-
spawner=FluxPythonSpawner,
|
|
263
|
-
)
|
|
264
|
-
elif backend == "slurm_allocation":
|
|
265
|
-
check_executor(executor=flux_executor)
|
|
266
|
-
check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
|
|
267
|
-
check_flux_log_files(flux_log_files=flux_log_files)
|
|
268
|
-
if block_allocation:
|
|
269
|
-
resource_dict["init_function"] = init_function
|
|
270
|
-
max_workers = validate_number_of_cores(
|
|
271
|
-
max_cores=max_cores,
|
|
272
|
-
max_workers=max_workers,
|
|
273
|
-
cores_per_worker=cores_per_worker,
|
|
274
|
-
set_local_cores=False,
|
|
275
|
-
)
|
|
276
|
-
validate_max_workers_slurm(
|
|
277
|
-
max_workers=max_workers,
|
|
278
|
-
cores=cores_per_worker,
|
|
279
|
-
threads_per_core=resource_dict.get("threads_per_core", 1),
|
|
280
|
-
)
|
|
281
|
-
return InteractiveExecutor(
|
|
282
|
-
max_workers=max_workers,
|
|
283
|
-
executor_kwargs=resource_dict,
|
|
284
|
-
spawner=SrunSpawner,
|
|
285
|
-
)
|
|
286
|
-
else:
|
|
287
|
-
return InteractiveStepExecutor(
|
|
288
|
-
max_cores=max_cores,
|
|
289
|
-
max_workers=max_workers,
|
|
290
|
-
executor_kwargs=resource_dict,
|
|
291
|
-
spawner=SrunSpawner,
|
|
292
|
-
)
|
|
293
|
-
elif backend == "local":
|
|
294
|
-
check_executor(executor=flux_executor)
|
|
295
|
-
check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
|
|
296
|
-
check_flux_log_files(flux_log_files=flux_log_files)
|
|
297
|
-
check_gpus_per_worker(gpus_per_worker=resource_dict.get("gpus_per_core", 0))
|
|
298
|
-
check_command_line_argument_lst(
|
|
299
|
-
command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
|
|
300
|
-
)
|
|
301
|
-
if "threads_per_core" in resource_dict.keys():
|
|
302
|
-
del resource_dict["threads_per_core"]
|
|
303
|
-
if "gpus_per_core" in resource_dict.keys():
|
|
304
|
-
del resource_dict["gpus_per_core"]
|
|
305
|
-
if "slurm_cmd_args" in resource_dict.keys():
|
|
306
|
-
del resource_dict["slurm_cmd_args"]
|
|
307
|
-
if block_allocation:
|
|
308
|
-
resource_dict["init_function"] = init_function
|
|
309
|
-
return InteractiveExecutor(
|
|
310
|
-
max_workers=validate_number_of_cores(
|
|
311
|
-
max_cores=max_cores,
|
|
312
|
-
max_workers=max_workers,
|
|
313
|
-
cores_per_worker=cores_per_worker,
|
|
314
|
-
set_local_cores=True,
|
|
315
|
-
),
|
|
316
|
-
executor_kwargs=resource_dict,
|
|
317
|
-
spawner=MpiExecSpawner,
|
|
318
|
-
)
|
|
319
|
-
else:
|
|
320
|
-
return InteractiveStepExecutor(
|
|
321
|
-
max_cores=max_cores,
|
|
322
|
-
max_workers=max_workers,
|
|
323
|
-
executor_kwargs=resource_dict,
|
|
324
|
-
spawner=MpiExecSpawner,
|
|
325
|
-
)
|
|
326
|
-
else:
|
|
327
|
-
raise ValueError(
|
|
328
|
-
"The supported backends are slurm_allocation, slurm_submission, flux_allocation, flux_submission and local."
|
|
329
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/interactive/communication.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|