executorlib 0.0.11__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {executorlib-0.0.11/executorlib.egg-info → executorlib-0.2.0}/PKG-INFO +41 -41
- {executorlib-0.0.11 → executorlib-0.2.0}/README.md +35 -35
- executorlib-0.2.0/executorlib/__init__.py +19 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/_version.py +3 -3
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/interactive/flux.py +10 -4
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/interactive/shared.py +4 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/interactive/slurm.py +17 -1
- executorlib-0.2.0/executorlib/interfaces/flux.py +515 -0
- executorlib-0.0.11/executorlib/__init__.py → executorlib-0.2.0/executorlib/interfaces/single.py +102 -79
- executorlib-0.2.0/executorlib/interfaces/slurm.py +470 -0
- executorlib-0.2.0/executorlib/standalone/interactive/__init__.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/plot.py +27 -3
- {executorlib-0.0.11 → executorlib-0.2.0/executorlib.egg-info}/PKG-INFO +41 -41
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib.egg-info/SOURCES.txt +6 -1
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib.egg-info/requires.txt +4 -4
- {executorlib-0.0.11 → executorlib-0.2.0}/pyproject.toml +3 -3
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_cache_executor_interactive.py +2 -2
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_cache_executor_pysqa_flux.py +3 -4
- executorlib-0.2.0/tests/test_dependencies_executor.py +169 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_executor_backend_flux.py +24 -18
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_executor_backend_mpi.py +9 -14
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_executor_backend_mpi_noblock.py +7 -13
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_flux_executor.py +1 -1
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_integration_pyiron_workflow.py +7 -7
- executorlib-0.2.0/tests/test_plot_dependency.py +291 -0
- executorlib-0.2.0/tests/test_plot_dependency_flux.py +179 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_pysqa_subprocess.py +16 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_shell_executor.py +5 -5
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_shell_interactive.py +2 -2
- executorlib-0.0.11/executorlib/interactive/create.py +0 -295
- executorlib-0.0.11/tests/test_dependencies_executor.py +0 -259
- {executorlib-0.0.11 → executorlib-0.2.0}/LICENSE +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/MANIFEST.in +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/backend/__init__.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/backend/cache_parallel.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/backend/cache_serial.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/backend/interactive_parallel.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/backend/interactive_serial.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/base/__init__.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/base/executor.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/cache/__init__.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/cache/backend.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/cache/executor.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/cache/queue_spawner.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/cache/shared.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/cache/subprocess_spawner.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/interactive/__init__.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/interactive/executor.py +0 -0
- {executorlib-0.0.11/executorlib/standalone/interactive → executorlib-0.2.0/executorlib/interfaces}/__init__.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/__init__.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/command.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/hdf.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/inputcheck.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/interactive/backend.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/interactive/communication.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/interactive/spawner.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/queue.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/serialize.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib/standalone/thread.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib.egg-info/dependency_links.txt +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/executorlib.egg-info/top_level.txt +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/setup.cfg +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/setup.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_backend_serial.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_cache_executor_mpi.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_cache_executor_serial.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_cache_hdf.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_cache_shared.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_local_executor.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_local_executor_future.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_shared_backend.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_shared_communication.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_shared_executorbase.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_shared_input_check.py +0 -0
- {executorlib-0.0.11 → executorlib-0.2.0}/tests/test_shared_thread.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: executorlib
|
|
3
|
-
Version: 0.0
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
|
|
5
5
|
Author-email: Jan Janssen <janssen@lanl.gov>
|
|
6
6
|
License: BSD 3-Clause License
|
|
7
7
|
|
|
@@ -36,7 +36,7 @@ License: BSD 3-Clause License
|
|
|
36
36
|
Project-URL: Homepage, https://github.com/pyiron/executorlib
|
|
37
37
|
Project-URL: Documentation, https://executorlib.readthedocs.io
|
|
38
38
|
Project-URL: Repository, https://github.com/pyiron/executorlib
|
|
39
|
-
Keywords:
|
|
39
|
+
Keywords: high performance computing,hpc,task scheduler,slurm,flux-framework,executor
|
|
40
40
|
Classifier: Development Status :: 5 - Production/Stable
|
|
41
41
|
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
42
42
|
Classifier: License :: OSI Approved :: BSD License
|
|
@@ -63,9 +63,9 @@ Requires-Dist: networkx<=3.4.2,>=2.8.8; extra == "graphnotebook"
|
|
|
63
63
|
Requires-Dist: ipython<=8.32.0,>=7.33.0; extra == "graphnotebook"
|
|
64
64
|
Provides-Extra: mpi
|
|
65
65
|
Requires-Dist: mpi4py<=4.0.1,>=3.1.4; extra == "mpi"
|
|
66
|
-
Provides-Extra:
|
|
67
|
-
Requires-Dist: pysqa==0.2.3; extra == "
|
|
68
|
-
Requires-Dist: h5py<=3.12.1,>=3.6.0; extra == "
|
|
66
|
+
Provides-Extra: cluster
|
|
67
|
+
Requires-Dist: pysqa==0.2.3; extra == "cluster"
|
|
68
|
+
Requires-Dist: h5py<=3.12.1,>=3.6.0; extra == "cluster"
|
|
69
69
|
Provides-Extra: all
|
|
70
70
|
Requires-Dist: mpi4py<=4.0.1,>=3.1.4; extra == "all"
|
|
71
71
|
Requires-Dist: pysqa==0.2.3; extra == "all"
|
|
@@ -75,8 +75,8 @@ Requires-Dist: networkx<=3.4.2,>=2.8.8; extra == "all"
|
|
|
75
75
|
Requires-Dist: ipython<=8.32.0,>=7.33.0; extra == "all"
|
|
76
76
|
|
|
77
77
|
# executorlib
|
|
78
|
-
[](https://github.com/pyiron/executorlib/actions/workflows/pipeline.yml)
|
|
79
|
+
[](https://codecov.io/gh/pyiron/executorlib)
|
|
80
80
|
[](https://mybinder.org/v2/gh/pyiron/executorlib/HEAD?labpath=notebooks%2Fexamples.ipynb)
|
|
81
81
|
|
|
82
82
|
Up-scale python functions for high performance computing (HPC) with executorlib.
|
|
@@ -99,17 +99,17 @@ with the [ProcessPoolExecutor](https://docs.python.org/3/library/concurrent.futu
|
|
|
99
99
|
[ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) for parallel
|
|
100
100
|
execution of Python functions on a single computer. executorlib extends this functionality to distribute Python
|
|
101
101
|
functions over multiple computers within a high performance computing (HPC) cluster. This can be either achieved by
|
|
102
|
-
submitting each function as individual job to the HPC job scheduler
|
|
103
|
-
or by requesting a
|
|
104
|
-
[HPC
|
|
105
|
-
development process executorlib also provides a
|
|
106
|
-
to use the executorlib functionality on a
|
|
107
|
-
|
|
102
|
+
submitting each function as individual job to the HPC job scheduler with an [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html) -
|
|
103
|
+
or by requesting a job from the HPC cluster and then distribute the Python functions within this job with an
|
|
104
|
+
[HPC Job Executor](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html). Finally, to accelerate the
|
|
105
|
+
development process executorlib also provides a [Single Node Executor](https://executorlib.readthedocs.io/en/latest/1-single-node.html) -
|
|
106
|
+
to use the executorlib functionality on a laptop, workstation or single compute node for testing. Starting with the
|
|
107
|
+
[Single Node Executor](https://executorlib.readthedocs.io/en/latest/1-single-node.html):
|
|
108
108
|
```python
|
|
109
|
-
from executorlib import
|
|
109
|
+
from executorlib import SingleNodeExecutor
|
|
110
110
|
|
|
111
111
|
|
|
112
|
-
with
|
|
112
|
+
with SingleNodeExecutor() as exe:
|
|
113
113
|
future_lst = [exe.submit(sum, [i, i]) for i in range(1, 5)]
|
|
114
114
|
print([f.result() for f in future_lst])
|
|
115
115
|
```
|
|
@@ -117,7 +117,7 @@ In the same way executorlib can also execute Python functions which use addition
|
|
|
117
117
|
CPU cores, CPU threads or GPUs. For example if the Python function internally uses the Message Passing Interface (MPI)
|
|
118
118
|
via the [mpi4py](https://mpi4py.readthedocs.io) Python libary:
|
|
119
119
|
```python
|
|
120
|
-
from executorlib import
|
|
120
|
+
from executorlib import SingleNodeExecutor
|
|
121
121
|
|
|
122
122
|
|
|
123
123
|
def calc(i):
|
|
@@ -128,7 +128,7 @@ def calc(i):
|
|
|
128
128
|
return i, size, rank
|
|
129
129
|
|
|
130
130
|
|
|
131
|
-
with
|
|
131
|
+
with SingleNodeExecutor() as exe:
|
|
132
132
|
fs = exe.submit(calc, 3, resource_dict={"cores": 2})
|
|
133
133
|
print(fs.result())
|
|
134
134
|
```
|
|
@@ -142,11 +142,11 @@ This flexibility to assign computing resources on a per-function-call basis simp
|
|
|
142
142
|
Only the part of the Python functions which benefit from parallel execution are implemented as MPI parallel Python
|
|
143
143
|
funtions, while the rest of the program remains serial.
|
|
144
144
|
|
|
145
|
-
The same function can be submitted to the [SLURM](https://slurm.schedmd.com)
|
|
146
|
-
|
|
147
|
-
the rapid prototyping and up-scaling of HPC Python programs.
|
|
145
|
+
The same function can be submitted to the [SLURM](https://slurm.schedmd.com) job scheduler by replacing the
|
|
146
|
+
`SingleNodeExecutor` with the `SlurmClusterExecutor`. The rest of the example remains the same, which highlights how
|
|
147
|
+
executorlib accelerates the rapid prototyping and up-scaling of HPC Python programs.
|
|
148
148
|
```python
|
|
149
|
-
from executorlib import
|
|
149
|
+
from executorlib import SlurmClusterExecutor
|
|
150
150
|
|
|
151
151
|
|
|
152
152
|
def calc(i):
|
|
@@ -157,21 +157,21 @@ def calc(i):
|
|
|
157
157
|
return i, size, rank
|
|
158
158
|
|
|
159
159
|
|
|
160
|
-
with
|
|
160
|
+
with SlurmClusterExecutor() as exe:
|
|
161
161
|
fs = exe.submit(calc, 3, resource_dict={"cores": 2})
|
|
162
162
|
print(fs.result())
|
|
163
163
|
```
|
|
164
164
|
In this case the [Python simple queuing system adapter (pysqa)](https://pysqa.readthedocs.io) is used to submit the
|
|
165
165
|
`calc()` function to the [SLURM](https://slurm.schedmd.com) job scheduler and request an allocation with two CPU cores
|
|
166
|
-
for the execution of the function - [HPC
|
|
166
|
+
for the execution of the function - [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html). In the background the [sbatch](https://slurm.schedmd.com/sbatch.html)
|
|
167
167
|
command is used to request the allocation to execute the Python function.
|
|
168
168
|
|
|
169
|
-
Within a given [SLURM](https://slurm.schedmd.com)
|
|
169
|
+
Within a given [SLURM](https://slurm.schedmd.com) job executorlib can also be used to assign a subset of the
|
|
170
170
|
available computing resources to execute a given Python function. In terms of the [SLURM](https://slurm.schedmd.com)
|
|
171
171
|
commands, this functionality internally uses the [srun](https://slurm.schedmd.com/srun.html) command to receive a subset
|
|
172
172
|
of the resources of a given queuing system allocation.
|
|
173
173
|
```python
|
|
174
|
-
from executorlib import
|
|
174
|
+
from executorlib import SlurmJobExecutor
|
|
175
175
|
|
|
176
176
|
|
|
177
177
|
def calc(i):
|
|
@@ -182,7 +182,7 @@ def calc(i):
|
|
|
182
182
|
return i, size, rank
|
|
183
183
|
|
|
184
184
|
|
|
185
|
-
with
|
|
185
|
+
with SlurmJobExecutor() as exe:
|
|
186
186
|
fs = exe.submit(calc, 3, resource_dict={"cores": 2})
|
|
187
187
|
print(fs.result())
|
|
188
188
|
```
|
|
@@ -192,7 +192,7 @@ In addition, to support for [SLURM](https://slurm.schedmd.com) executorlib also
|
|
|
192
192
|
to address the needs for the up-coming generation of Exascale computers. Still even on traditional HPC clusters the
|
|
193
193
|
hierarchical approach of the [flux](http://flux-framework.org) is beneficial to distribute hundreds of tasks within a
|
|
194
194
|
given allocation. Even when [SLURM](https://slurm.schedmd.com) is used as primary job scheduler of your HPC, it is
|
|
195
|
-
recommended to use [SLURM with flux](https://executorlib.readthedocs.io/en/latest/3-hpc-
|
|
195
|
+
recommended to use [SLURM with flux](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html#slurm-with-flux)
|
|
196
196
|
as hierarchical job scheduler within the allocations.
|
|
197
197
|
|
|
198
198
|
## Documentation
|
|
@@ -200,21 +200,21 @@ as hierarchical job scheduler within the allocations.
|
|
|
200
200
|
* [Minimal](https://executorlib.readthedocs.io/en/latest/installation.html#minimal)
|
|
201
201
|
* [MPI Support](https://executorlib.readthedocs.io/en/latest/installation.html#mpi-support)
|
|
202
202
|
* [Caching](https://executorlib.readthedocs.io/en/latest/installation.html#caching)
|
|
203
|
-
* [HPC
|
|
204
|
-
* [HPC
|
|
203
|
+
* [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/installation.html#hpc-cluster-executor)
|
|
204
|
+
* [HPC Job Executor](https://executorlib.readthedocs.io/en/latest/installation.html#hpc-job-executor)
|
|
205
205
|
* [Visualisation](https://executorlib.readthedocs.io/en/latest/installation.html#visualisation)
|
|
206
206
|
* [For Developers](https://executorlib.readthedocs.io/en/latest/installation.html#for-developers)
|
|
207
|
-
* [
|
|
208
|
-
* [Basic Functionality](https://executorlib.readthedocs.io/en/latest/1-
|
|
209
|
-
* [Parallel Functions](https://executorlib.readthedocs.io/en/latest/1-
|
|
210
|
-
* [Performance Optimization](https://executorlib.readthedocs.io/en/latest/1-
|
|
211
|
-
* [HPC
|
|
212
|
-
* [SLURM](https://executorlib.readthedocs.io/en/latest/2-hpc-
|
|
213
|
-
* [Flux](https://executorlib.readthedocs.io/en/latest/2-hpc-
|
|
214
|
-
* [HPC
|
|
215
|
-
* [SLURM](https://executorlib.readthedocs.io/en/latest/3-hpc-
|
|
216
|
-
* [SLURM with Flux](https://executorlib.readthedocs.io/en/latest/3-hpc-
|
|
217
|
-
* [Flux](https://executorlib.readthedocs.io/en/latest/3-hpc-
|
|
207
|
+
* [Single Node Executor](https://executorlib.readthedocs.io/en/latest/1-single-node.html)
|
|
208
|
+
* [Basic Functionality](https://executorlib.readthedocs.io/en/latest/1-single-node.html#basic-functionality)
|
|
209
|
+
* [Parallel Functions](https://executorlib.readthedocs.io/en/latest/1-single-node.html#parallel-functions)
|
|
210
|
+
* [Performance Optimization](https://executorlib.readthedocs.io/en/latest/1-single-node.html#performance-optimization)
|
|
211
|
+
* [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html)
|
|
212
|
+
* [SLURM](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#slurm)
|
|
213
|
+
* [Flux](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#flux)
|
|
214
|
+
* [HPC Job Executor](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html)
|
|
215
|
+
* [SLURM](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html#slurm)
|
|
216
|
+
* [SLURM with Flux](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html#slurm-with-flux)
|
|
217
|
+
* [Flux](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html#flux)
|
|
218
218
|
* [Trouble Shooting](https://executorlib.readthedocs.io/en/latest/trouble_shooting.html)
|
|
219
219
|
* [Filesystem Usage](https://executorlib.readthedocs.io/en/latest/trouble_shooting.html#filesystem-usage)
|
|
220
220
|
* [Firewall Issues](https://executorlib.readthedocs.io/en/latest/trouble_shooting.html#firewall-issues)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# executorlib
|
|
2
|
-
[](https://github.com/pyiron/executorlib/actions/workflows/pipeline.yml)
|
|
3
|
+
[](https://codecov.io/gh/pyiron/executorlib)
|
|
4
4
|
[](https://mybinder.org/v2/gh/pyiron/executorlib/HEAD?labpath=notebooks%2Fexamples.ipynb)
|
|
5
5
|
|
|
6
6
|
Up-scale python functions for high performance computing (HPC) with executorlib.
|
|
@@ -23,17 +23,17 @@ with the [ProcessPoolExecutor](https://docs.python.org/3/library/concurrent.futu
|
|
|
23
23
|
[ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) for parallel
|
|
24
24
|
execution of Python functions on a single computer. executorlib extends this functionality to distribute Python
|
|
25
25
|
functions over multiple computers within a high performance computing (HPC) cluster. This can be either achieved by
|
|
26
|
-
submitting each function as individual job to the HPC job scheduler
|
|
27
|
-
or by requesting a
|
|
28
|
-
[HPC
|
|
29
|
-
development process executorlib also provides a
|
|
30
|
-
to use the executorlib functionality on a
|
|
31
|
-
|
|
26
|
+
submitting each function as individual job to the HPC job scheduler with an [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html) -
|
|
27
|
+
or by requesting a job from the HPC cluster and then distribute the Python functions within this job with an
|
|
28
|
+
[HPC Job Executor](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html). Finally, to accelerate the
|
|
29
|
+
development process executorlib also provides a [Single Node Executor](https://executorlib.readthedocs.io/en/latest/1-single-node.html) -
|
|
30
|
+
to use the executorlib functionality on a laptop, workstation or single compute node for testing. Starting with the
|
|
31
|
+
[Single Node Executor](https://executorlib.readthedocs.io/en/latest/1-single-node.html):
|
|
32
32
|
```python
|
|
33
|
-
from executorlib import
|
|
33
|
+
from executorlib import SingleNodeExecutor
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
with
|
|
36
|
+
with SingleNodeExecutor() as exe:
|
|
37
37
|
future_lst = [exe.submit(sum, [i, i]) for i in range(1, 5)]
|
|
38
38
|
print([f.result() for f in future_lst])
|
|
39
39
|
```
|
|
@@ -41,7 +41,7 @@ In the same way executorlib can also execute Python functions which use addition
|
|
|
41
41
|
CPU cores, CPU threads or GPUs. For example if the Python function internally uses the Message Passing Interface (MPI)
|
|
42
42
|
via the [mpi4py](https://mpi4py.readthedocs.io) Python libary:
|
|
43
43
|
```python
|
|
44
|
-
from executorlib import
|
|
44
|
+
from executorlib import SingleNodeExecutor
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
def calc(i):
|
|
@@ -52,7 +52,7 @@ def calc(i):
|
|
|
52
52
|
return i, size, rank
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
with
|
|
55
|
+
with SingleNodeExecutor() as exe:
|
|
56
56
|
fs = exe.submit(calc, 3, resource_dict={"cores": 2})
|
|
57
57
|
print(fs.result())
|
|
58
58
|
```
|
|
@@ -66,11 +66,11 @@ This flexibility to assign computing resources on a per-function-call basis simp
|
|
|
66
66
|
Only the part of the Python functions which benefit from parallel execution are implemented as MPI parallel Python
|
|
67
67
|
funtions, while the rest of the program remains serial.
|
|
68
68
|
|
|
69
|
-
The same function can be submitted to the [SLURM](https://slurm.schedmd.com)
|
|
70
|
-
|
|
71
|
-
the rapid prototyping and up-scaling of HPC Python programs.
|
|
69
|
+
The same function can be submitted to the [SLURM](https://slurm.schedmd.com) job scheduler by replacing the
|
|
70
|
+
`SingleNodeExecutor` with the `SlurmClusterExecutor`. The rest of the example remains the same, which highlights how
|
|
71
|
+
executorlib accelerates the rapid prototyping and up-scaling of HPC Python programs.
|
|
72
72
|
```python
|
|
73
|
-
from executorlib import
|
|
73
|
+
from executorlib import SlurmClusterExecutor
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
def calc(i):
|
|
@@ -81,21 +81,21 @@ def calc(i):
|
|
|
81
81
|
return i, size, rank
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
with
|
|
84
|
+
with SlurmClusterExecutor() as exe:
|
|
85
85
|
fs = exe.submit(calc, 3, resource_dict={"cores": 2})
|
|
86
86
|
print(fs.result())
|
|
87
87
|
```
|
|
88
88
|
In this case the [Python simple queuing system adapter (pysqa)](https://pysqa.readthedocs.io) is used to submit the
|
|
89
89
|
`calc()` function to the [SLURM](https://slurm.schedmd.com) job scheduler and request an allocation with two CPU cores
|
|
90
|
-
for the execution of the function - [HPC
|
|
90
|
+
for the execution of the function - [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html). In the background the [sbatch](https://slurm.schedmd.com/sbatch.html)
|
|
91
91
|
command is used to request the allocation to execute the Python function.
|
|
92
92
|
|
|
93
|
-
Within a given [SLURM](https://slurm.schedmd.com)
|
|
93
|
+
Within a given [SLURM](https://slurm.schedmd.com) job executorlib can also be used to assign a subset of the
|
|
94
94
|
available computing resources to execute a given Python function. In terms of the [SLURM](https://slurm.schedmd.com)
|
|
95
95
|
commands, this functionality internally uses the [srun](https://slurm.schedmd.com/srun.html) command to receive a subset
|
|
96
96
|
of the resources of a given queuing system allocation.
|
|
97
97
|
```python
|
|
98
|
-
from executorlib import
|
|
98
|
+
from executorlib import SlurmJobExecutor
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
def calc(i):
|
|
@@ -106,7 +106,7 @@ def calc(i):
|
|
|
106
106
|
return i, size, rank
|
|
107
107
|
|
|
108
108
|
|
|
109
|
-
with
|
|
109
|
+
with SlurmJobExecutor() as exe:
|
|
110
110
|
fs = exe.submit(calc, 3, resource_dict={"cores": 2})
|
|
111
111
|
print(fs.result())
|
|
112
112
|
```
|
|
@@ -116,7 +116,7 @@ In addition, to support for [SLURM](https://slurm.schedmd.com) executorlib also
|
|
|
116
116
|
to address the needs for the up-coming generation of Exascale computers. Still even on traditional HPC clusters the
|
|
117
117
|
hierarchical approach of the [flux](http://flux-framework.org) is beneficial to distribute hundreds of tasks within a
|
|
118
118
|
given allocation. Even when [SLURM](https://slurm.schedmd.com) is used as primary job scheduler of your HPC, it is
|
|
119
|
-
recommended to use [SLURM with flux](https://executorlib.readthedocs.io/en/latest/3-hpc-
|
|
119
|
+
recommended to use [SLURM with flux](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html#slurm-with-flux)
|
|
120
120
|
as hierarchical job scheduler within the allocations.
|
|
121
121
|
|
|
122
122
|
## Documentation
|
|
@@ -124,21 +124,21 @@ as hierarchical job scheduler within the allocations.
|
|
|
124
124
|
* [Minimal](https://executorlib.readthedocs.io/en/latest/installation.html#minimal)
|
|
125
125
|
* [MPI Support](https://executorlib.readthedocs.io/en/latest/installation.html#mpi-support)
|
|
126
126
|
* [Caching](https://executorlib.readthedocs.io/en/latest/installation.html#caching)
|
|
127
|
-
* [HPC
|
|
128
|
-
* [HPC
|
|
127
|
+
* [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/installation.html#hpc-cluster-executor)
|
|
128
|
+
* [HPC Job Executor](https://executorlib.readthedocs.io/en/latest/installation.html#hpc-job-executor)
|
|
129
129
|
* [Visualisation](https://executorlib.readthedocs.io/en/latest/installation.html#visualisation)
|
|
130
130
|
* [For Developers](https://executorlib.readthedocs.io/en/latest/installation.html#for-developers)
|
|
131
|
-
* [
|
|
132
|
-
* [Basic Functionality](https://executorlib.readthedocs.io/en/latest/1-
|
|
133
|
-
* [Parallel Functions](https://executorlib.readthedocs.io/en/latest/1-
|
|
134
|
-
* [Performance Optimization](https://executorlib.readthedocs.io/en/latest/1-
|
|
135
|
-
* [HPC
|
|
136
|
-
* [SLURM](https://executorlib.readthedocs.io/en/latest/2-hpc-
|
|
137
|
-
* [Flux](https://executorlib.readthedocs.io/en/latest/2-hpc-
|
|
138
|
-
* [HPC
|
|
139
|
-
* [SLURM](https://executorlib.readthedocs.io/en/latest/3-hpc-
|
|
140
|
-
* [SLURM with Flux](https://executorlib.readthedocs.io/en/latest/3-hpc-
|
|
141
|
-
* [Flux](https://executorlib.readthedocs.io/en/latest/3-hpc-
|
|
131
|
+
* [Single Node Executor](https://executorlib.readthedocs.io/en/latest/1-single-node.html)
|
|
132
|
+
* [Basic Functionality](https://executorlib.readthedocs.io/en/latest/1-single-node.html#basic-functionality)
|
|
133
|
+
* [Parallel Functions](https://executorlib.readthedocs.io/en/latest/1-single-node.html#parallel-functions)
|
|
134
|
+
* [Performance Optimization](https://executorlib.readthedocs.io/en/latest/1-single-node.html#performance-optimization)
|
|
135
|
+
* [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html)
|
|
136
|
+
* [SLURM](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#slurm)
|
|
137
|
+
* [Flux](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#flux)
|
|
138
|
+
* [HPC Job Executor](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html)
|
|
139
|
+
* [SLURM](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html#slurm)
|
|
140
|
+
* [SLURM with Flux](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html#slurm-with-flux)
|
|
141
|
+
* [Flux](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html#flux)
|
|
142
142
|
* [Trouble Shooting](https://executorlib.readthedocs.io/en/latest/trouble_shooting.html)
|
|
143
143
|
* [Filesystem Usage](https://executorlib.readthedocs.io/en/latest/trouble_shooting.html#filesystem-usage)
|
|
144
144
|
* [Firewall Issues](https://executorlib.readthedocs.io/en/latest/trouble_shooting.html#firewall-issues)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from executorlib._version import get_versions as _get_versions
|
|
2
|
+
from executorlib.interfaces.flux import (
|
|
3
|
+
FluxClusterExecutor,
|
|
4
|
+
FluxJobExecutor,
|
|
5
|
+
)
|
|
6
|
+
from executorlib.interfaces.single import SingleNodeExecutor
|
|
7
|
+
from executorlib.interfaces.slurm import (
|
|
8
|
+
SlurmClusterExecutor,
|
|
9
|
+
SlurmJobExecutor,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__version__ = _get_versions()["version"]
|
|
13
|
+
__all__: list = [
|
|
14
|
+
"FluxJobExecutor",
|
|
15
|
+
"FluxClusterExecutor",
|
|
16
|
+
"SingleNodeExecutor",
|
|
17
|
+
"SlurmJobExecutor",
|
|
18
|
+
"SlurmClusterExecutor",
|
|
19
|
+
]
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-02-
|
|
11
|
+
"date": "2025-02-11T12:47:28+0100",
|
|
12
12
|
"dirty": true,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.0
|
|
14
|
+
"full-revisionid": "0ffd31288952f78be3b0c810eac1890759634d35",
|
|
15
|
+
"version": "0.2.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -29,6 +29,8 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
29
29
|
cores (int, optional): The number of cores. Defaults to 1.
|
|
30
30
|
threads_per_core (int, optional): The number of threads per base. Defaults to 1.
|
|
31
31
|
gpus_per_core (int, optional): The number of GPUs per base. Defaults to 0.
|
|
32
|
+
num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
|
|
33
|
+
exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
|
|
32
34
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe. Defaults to False.
|
|
33
35
|
flux_executor (flux.job.FluxExecutor, optional): The FluxExecutor instance. Defaults to None.
|
|
34
36
|
flux_executor_pmi_mode (str, optional): The PMI option. Defaults to None.
|
|
@@ -42,6 +44,8 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
42
44
|
cores: int = 1,
|
|
43
45
|
threads_per_core: int = 1,
|
|
44
46
|
gpus_per_core: int = 0,
|
|
47
|
+
num_nodes: Optional[int] = None,
|
|
48
|
+
exclusive: bool = False,
|
|
45
49
|
openmpi_oversubscribe: bool = False,
|
|
46
50
|
flux_executor: Optional[flux.job.FluxExecutor] = None,
|
|
47
51
|
flux_executor_pmi_mode: Optional[str] = None,
|
|
@@ -55,6 +59,8 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
55
59
|
)
|
|
56
60
|
self._threads_per_core = threads_per_core
|
|
57
61
|
self._gpus_per_core = gpus_per_core
|
|
62
|
+
self._num_nodes = num_nodes
|
|
63
|
+
self._exclusive = exclusive
|
|
58
64
|
self._flux_executor = flux_executor
|
|
59
65
|
self._flux_executor_pmi_mode = flux_executor_pmi_mode
|
|
60
66
|
self._flux_executor_nesting = flux_executor_nesting
|
|
@@ -85,8 +91,8 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
85
91
|
num_tasks=self._cores,
|
|
86
92
|
cores_per_task=self._threads_per_core,
|
|
87
93
|
gpus_per_task=self._gpus_per_core,
|
|
88
|
-
num_nodes=
|
|
89
|
-
exclusive=
|
|
94
|
+
num_nodes=self._num_nodes,
|
|
95
|
+
exclusive=self._exclusive,
|
|
90
96
|
)
|
|
91
97
|
else:
|
|
92
98
|
jobspec = flux.job.JobspecV1.from_nest_command(
|
|
@@ -94,8 +100,8 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
94
100
|
num_slots=self._cores,
|
|
95
101
|
cores_per_slot=self._threads_per_core,
|
|
96
102
|
gpus_per_slot=self._gpus_per_core,
|
|
97
|
-
num_nodes=
|
|
98
|
-
exclusive=
|
|
103
|
+
num_nodes=self._num_nodes,
|
|
104
|
+
exclusive=self._exclusive,
|
|
99
105
|
)
|
|
100
106
|
jobspec.environment = dict(os.environ)
|
|
101
107
|
if self._flux_executor_pmi_mode is not None:
|
|
@@ -483,6 +483,8 @@ def _update_futures_in_input(args: tuple, kwargs: dict) -> tuple[tuple, dict]:
|
|
|
483
483
|
return arg.result()
|
|
484
484
|
elif isinstance(arg, list):
|
|
485
485
|
return [get_result(arg=el) for el in arg]
|
|
486
|
+
elif isinstance(arg, dict):
|
|
487
|
+
return {k: get_result(arg=v) for k, v in arg.items()}
|
|
486
488
|
else:
|
|
487
489
|
return arg
|
|
488
490
|
|
|
@@ -510,6 +512,8 @@ def _get_future_objects_from_input(task_dict: dict):
|
|
|
510
512
|
future_lst.append(el)
|
|
511
513
|
elif isinstance(el, list):
|
|
512
514
|
find_future_in_list(lst=el)
|
|
515
|
+
elif isinstance(el, dict):
|
|
516
|
+
find_future_in_list(lst=el.values())
|
|
513
517
|
|
|
514
518
|
find_future_in_list(lst=task_dict["args"])
|
|
515
519
|
find_future_in_list(lst=task_dict["kwargs"].values())
|
|
@@ -27,6 +27,8 @@ class SrunSpawner(SubprocessSpawner):
|
|
|
27
27
|
cores: int = 1,
|
|
28
28
|
threads_per_core: int = 1,
|
|
29
29
|
gpus_per_core: int = 0,
|
|
30
|
+
num_nodes: Optional[int] = None,
|
|
31
|
+
exclusive: bool = False,
|
|
30
32
|
openmpi_oversubscribe: bool = False,
|
|
31
33
|
slurm_cmd_args: Optional[list[str]] = None,
|
|
32
34
|
):
|
|
@@ -38,6 +40,8 @@ class SrunSpawner(SubprocessSpawner):
|
|
|
38
40
|
cores (int, optional): The number of cores to use. Defaults to 1.
|
|
39
41
|
threads_per_core (int, optional): The number of threads per core. Defaults to 1.
|
|
40
42
|
gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0.
|
|
43
|
+
num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
|
|
44
|
+
exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
|
|
41
45
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
|
|
42
46
|
slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
|
|
43
47
|
"""
|
|
@@ -49,6 +53,8 @@ class SrunSpawner(SubprocessSpawner):
|
|
|
49
53
|
)
|
|
50
54
|
self._gpus_per_core = gpus_per_core
|
|
51
55
|
self._slurm_cmd_args = slurm_cmd_args
|
|
56
|
+
self._num_nodes = num_nodes
|
|
57
|
+
self._exclusive = exclusive
|
|
52
58
|
|
|
53
59
|
def generate_command(self, command_lst: list[str]) -> list[str]:
|
|
54
60
|
"""
|
|
@@ -65,6 +71,8 @@ class SrunSpawner(SubprocessSpawner):
|
|
|
65
71
|
cwd=self._cwd,
|
|
66
72
|
threads_per_core=self._threads_per_core,
|
|
67
73
|
gpus_per_core=self._gpus_per_core,
|
|
74
|
+
num_nodes=self._num_nodes,
|
|
75
|
+
exclusive=self._exclusive,
|
|
68
76
|
openmpi_oversubscribe=self._openmpi_oversubscribe,
|
|
69
77
|
slurm_cmd_args=self._slurm_cmd_args,
|
|
70
78
|
)
|
|
@@ -78,6 +86,8 @@ def generate_slurm_command(
|
|
|
78
86
|
cwd: Optional[str],
|
|
79
87
|
threads_per_core: int = 1,
|
|
80
88
|
gpus_per_core: int = 0,
|
|
89
|
+
num_nodes: Optional[int] = None,
|
|
90
|
+
exclusive: bool = False,
|
|
81
91
|
openmpi_oversubscribe: bool = False,
|
|
82
92
|
slurm_cmd_args: Optional[list[str]] = None,
|
|
83
93
|
) -> list[str]:
|
|
@@ -89,6 +99,8 @@ def generate_slurm_command(
|
|
|
89
99
|
cwd (str): The current working directory.
|
|
90
100
|
threads_per_core (int, optional): The number of threads per core. Defaults to 1.
|
|
91
101
|
gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0.
|
|
102
|
+
num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
|
|
103
|
+
exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
|
|
92
104
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
|
|
93
105
|
slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
|
|
94
106
|
|
|
@@ -98,10 +110,14 @@ def generate_slurm_command(
|
|
|
98
110
|
command_prepend_lst = [SLURM_COMMAND, "-n", str(cores)]
|
|
99
111
|
if cwd is not None:
|
|
100
112
|
command_prepend_lst += ["-D", cwd]
|
|
113
|
+
if num_nodes is not None:
|
|
114
|
+
command_prepend_lst += ["-N", str(num_nodes)]
|
|
101
115
|
if threads_per_core > 1:
|
|
102
|
-
command_prepend_lst += ["--cpus-per-task" + str(threads_per_core)]
|
|
116
|
+
command_prepend_lst += ["--cpus-per-task=" + str(threads_per_core)]
|
|
103
117
|
if gpus_per_core > 0:
|
|
104
118
|
command_prepend_lst += ["--gpus-per-task=" + str(gpus_per_core)]
|
|
119
|
+
if exclusive:
|
|
120
|
+
command_prepend_lst += ["--exact"]
|
|
105
121
|
if openmpi_oversubscribe:
|
|
106
122
|
command_prepend_lst += ["--oversubscribe"]
|
|
107
123
|
if slurm_cmd_args is not None and len(slurm_cmd_args) > 0:
|