executorlib 1.7.3__tar.gz → 1.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {executorlib-1.7.3 → executorlib-1.8.0}/PKG-INFO +9 -8
- {executorlib-1.7.3 → executorlib-1.8.0}/README.md +1 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/pyproject.toml +8 -8
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/_version.py +2 -2
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/executor/base.py +37 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/executor/flux.py +21 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/executor/single.py +21 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/executor/slurm.py +21 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/hdf.py +7 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/inputcheck.py +12 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/interactive/spawner.py +6 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/serialize.py +8 -1
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/base.py +40 -1
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/file/shared.py +29 -13
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/file/task_scheduler.py +5 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/interactive/blockallocation.py +2 -2
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/interactive/dependency.py +17 -6
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/interactive/dependency_plot.py +167 -15
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/interactive/spawner_flux.py +8 -4
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/interactive/spawner_pysqa.py +3 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/interactive/spawner_slurm.py +3 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/.gitignore +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/LICENSE +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/__init__.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/api.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/backend/__init__.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/backend/cache_parallel.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/backend/cache_serial.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/backend/interactive_parallel.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/backend/interactive_serial.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/executor/__init__.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/__init__.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/batched.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/command.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/error.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/interactive/__init__.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/interactive/arguments.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/interactive/backend.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/interactive/communication.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/queue.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/scheduler.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/standalone/select.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/__init__.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/file/__init__.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/file/backend.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/file/spawner_pysqa.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/file/spawner_subprocess.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/interactive/__init__.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/interactive/onetoone.py +0 -0
- {executorlib-1.7.3 → executorlib-1.8.0}/src/executorlib/task_scheduler/interactive/shared.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: executorlib
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.0
|
|
4
4
|
Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
|
|
5
5
|
Project-URL: Homepage, https://github.com/pyiron/executorlib
|
|
6
6
|
Project-URL: Documentation, https://executorlib.readthedocs.io
|
|
@@ -51,22 +51,22 @@ Requires-Dist: cloudpickle<=3.1.2,>=2.0.0
|
|
|
51
51
|
Requires-Dist: pyzmq<=27.1.0,>=25.0.0
|
|
52
52
|
Provides-Extra: all
|
|
53
53
|
Requires-Dist: h5py<=3.15.1,>=3.6.0; extra == 'all'
|
|
54
|
-
Requires-Dist: ipython<=9.0
|
|
54
|
+
Requires-Dist: ipython<=9.9.0,>=7.33.0; extra == 'all'
|
|
55
55
|
Requires-Dist: mpi4py<=4.1.1,>=3.1.4; extra == 'all'
|
|
56
|
-
Requires-Dist: networkx<=3.
|
|
56
|
+
Requires-Dist: networkx<=3.6.1,>=2.8.8; extra == 'all'
|
|
57
57
|
Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'all'
|
|
58
|
-
Requires-Dist: pysqa==0.3.
|
|
58
|
+
Requires-Dist: pysqa==0.3.4; extra == 'all'
|
|
59
59
|
Provides-Extra: cache
|
|
60
60
|
Requires-Dist: h5py<=3.15.1,>=3.6.0; extra == 'cache'
|
|
61
61
|
Provides-Extra: cluster
|
|
62
62
|
Requires-Dist: h5py<=3.15.1,>=3.6.0; extra == 'cluster'
|
|
63
|
-
Requires-Dist: pysqa==0.3.
|
|
63
|
+
Requires-Dist: pysqa==0.3.4; extra == 'cluster'
|
|
64
64
|
Provides-Extra: graph
|
|
65
|
-
Requires-Dist: networkx<=3.
|
|
65
|
+
Requires-Dist: networkx<=3.6.1,>=2.8.8; extra == 'graph'
|
|
66
66
|
Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'graph'
|
|
67
67
|
Provides-Extra: graphnotebook
|
|
68
|
-
Requires-Dist: ipython<=9.0
|
|
69
|
-
Requires-Dist: networkx<=3.
|
|
68
|
+
Requires-Dist: ipython<=9.9.0,>=7.33.0; extra == 'graphnotebook'
|
|
69
|
+
Requires-Dist: networkx<=3.6.1,>=2.8.8; extra == 'graphnotebook'
|
|
70
70
|
Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'graphnotebook'
|
|
71
71
|
Provides-Extra: mpi
|
|
72
72
|
Requires-Dist: mpi4py<=4.1.1,>=3.1.4; extra == 'mpi'
|
|
@@ -208,6 +208,7 @@ as hierarchical job scheduler within the allocations.
|
|
|
208
208
|
* [Basic Functionality](https://executorlib.readthedocs.io/en/latest/1-single-node.html#basic-functionality)
|
|
209
209
|
* [Parallel Functions](https://executorlib.readthedocs.io/en/latest/1-single-node.html#parallel-functions)
|
|
210
210
|
* [Performance Optimization](https://executorlib.readthedocs.io/en/latest/1-single-node.html#performance-optimization)
|
|
211
|
+
* [Advanced Scheduling](https://executorlib.readthedocs.io/en/latest/1-single-node.html#advanced-scheduling)
|
|
211
212
|
* [Testing and Debugging](https://executorlib.readthedocs.io/en/latest/1-single-node.html#testing-and-debugging)
|
|
212
213
|
* [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html)
|
|
213
214
|
* [SLURM](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#slurm)
|
|
@@ -134,6 +134,7 @@ as hierarchical job scheduler within the allocations.
|
|
|
134
134
|
* [Basic Functionality](https://executorlib.readthedocs.io/en/latest/1-single-node.html#basic-functionality)
|
|
135
135
|
* [Parallel Functions](https://executorlib.readthedocs.io/en/latest/1-single-node.html#parallel-functions)
|
|
136
136
|
* [Performance Optimization](https://executorlib.readthedocs.io/en/latest/1-single-node.html#performance-optimization)
|
|
137
|
+
* [Advanced Scheduling](https://executorlib.readthedocs.io/en/latest/1-single-node.html#advanced-scheduling)
|
|
137
138
|
* [Testing and Debugging](https://executorlib.readthedocs.io/en/latest/1-single-node.html#testing-and-debugging)
|
|
138
139
|
* [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html)
|
|
139
140
|
* [SLURM](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#slurm)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[build-system]
|
|
2
2
|
requires = [
|
|
3
|
-
"hatchling
|
|
3
|
+
"hatchling>=1.27.0,<=1.28.0",
|
|
4
4
|
"hatch-vcs==0.5.0",
|
|
5
5
|
"cloudpickle>=2.0.0,<=3.1.2",
|
|
6
6
|
"pyzmq>=25.0.0,<=27.1.0",
|
|
@@ -43,25 +43,25 @@ Repository = "https://github.com/pyiron/executorlib"
|
|
|
43
43
|
cache = ["h5py>=3.6.0,<=3.15.1"]
|
|
44
44
|
graph = [
|
|
45
45
|
"pygraphviz>=1.10,<=1.14",
|
|
46
|
-
"networkx>=2.8.8,<=3.
|
|
46
|
+
"networkx>=2.8.8,<=3.6.1",
|
|
47
47
|
]
|
|
48
48
|
graphnotebook = [
|
|
49
49
|
"pygraphviz>=1.10,<=1.14",
|
|
50
|
-
"networkx>=2.8.8,<=3.
|
|
51
|
-
"ipython>=7.33.0,<=9.0
|
|
50
|
+
"networkx>=2.8.8,<=3.6.1",
|
|
51
|
+
"ipython>=7.33.0,<=9.9.0",
|
|
52
52
|
]
|
|
53
53
|
mpi = ["mpi4py>=3.1.4,<=4.1.1"]
|
|
54
54
|
cluster = [
|
|
55
|
-
"pysqa==0.3.
|
|
55
|
+
"pysqa==0.3.4",
|
|
56
56
|
"h5py>=3.6.0,<=3.15.1",
|
|
57
57
|
]
|
|
58
58
|
all = [
|
|
59
59
|
"mpi4py>=3.1.4,<=4.1.1",
|
|
60
|
-
"pysqa==0.3.
|
|
60
|
+
"pysqa==0.3.4",
|
|
61
61
|
"h5py>=3.6.0,<=3.15.1",
|
|
62
62
|
"pygraphviz>=1.10,<=1.14",
|
|
63
|
-
"networkx>=2.8.8,<=3.
|
|
64
|
-
"ipython>=7.33.0,<=9.0
|
|
63
|
+
"networkx>=2.8.8,<=3.6.1",
|
|
64
|
+
"ipython>=7.33.0,<=9.9.0",
|
|
65
65
|
]
|
|
66
66
|
|
|
67
67
|
[tool.ruff]
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '1.
|
|
32
|
-
__version_tuple__ = version_tuple = (1,
|
|
31
|
+
__version__ = version = '1.8.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (1, 8, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -107,6 +107,43 @@ class BaseExecutor(FutureExecutor, ABC):
|
|
|
107
107
|
else:
|
|
108
108
|
raise RuntimeError("cannot schedule new futures after shutdown")
|
|
109
109
|
|
|
110
|
+
def map(
|
|
111
|
+
self,
|
|
112
|
+
fn: Callable,
|
|
113
|
+
*iterables,
|
|
114
|
+
timeout: Optional[float] = None,
|
|
115
|
+
chunksize: int = 1,
|
|
116
|
+
):
|
|
117
|
+
"""Returns an iterator equivalent to map(fn, iter).
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
fn: A callable that will take as many arguments as there are
|
|
121
|
+
passed iterables.
|
|
122
|
+
timeout: The maximum number of seconds to wait. If None, then there
|
|
123
|
+
is no limit on the wait time.
|
|
124
|
+
chunksize: The size of the chunks the iterable will be broken into
|
|
125
|
+
before being passed to a child process. This argument is only
|
|
126
|
+
used by ProcessPoolExecutor; it is ignored by
|
|
127
|
+
ThreadPoolExecutor.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
An iterator equivalent to: map(func, *iterables) but the calls may
|
|
131
|
+
be evaluated out-of-order.
|
|
132
|
+
|
|
133
|
+
Raises:
|
|
134
|
+
TimeoutError: If the entire result iterator could not be generated
|
|
135
|
+
before the given timeout.
|
|
136
|
+
Exception: If fn(*args) raises for any values.
|
|
137
|
+
"""
|
|
138
|
+
if self._is_active:
|
|
139
|
+
return self._task_scheduler.map(
|
|
140
|
+
*([fn] + list(iterables)),
|
|
141
|
+
timeout=timeout,
|
|
142
|
+
chunksize=chunksize,
|
|
143
|
+
)
|
|
144
|
+
else:
|
|
145
|
+
raise RuntimeError("cannot schedule new futures after shutdown")
|
|
146
|
+
|
|
110
147
|
def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
|
|
111
148
|
"""
|
|
112
149
|
Clean-up the resources associated with the Executor.
|
|
@@ -9,6 +9,7 @@ from executorlib.standalone.inputcheck import (
|
|
|
9
9
|
check_plot_dependency_graph,
|
|
10
10
|
check_pmi,
|
|
11
11
|
check_refresh_rate,
|
|
12
|
+
check_wait_on_shutdown,
|
|
12
13
|
validate_number_of_cores,
|
|
13
14
|
)
|
|
14
15
|
from executorlib.task_scheduler.interactive.blockallocation import (
|
|
@@ -65,7 +66,9 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
65
66
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
66
67
|
debugging purposes and to get an overview of the specified dependencies.
|
|
67
68
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
69
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
68
70
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
71
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
69
72
|
|
|
70
73
|
Examples:
|
|
71
74
|
```
|
|
@@ -105,7 +108,9 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
105
108
|
refresh_rate: float = 0.01,
|
|
106
109
|
plot_dependency_graph: bool = False,
|
|
107
110
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
111
|
+
export_workflow_filename: Optional[str] = None,
|
|
108
112
|
log_obj_size: bool = False,
|
|
113
|
+
wait: bool = True,
|
|
109
114
|
):
|
|
110
115
|
"""
|
|
111
116
|
The executorlib.FluxJobExecutor leverages either the message passing interface (MPI), the SLURM workload manager
|
|
@@ -152,7 +157,9 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
152
157
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
153
158
|
debugging purposes and to get an overview of the specified dependencies.
|
|
154
159
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
160
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
155
161
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
162
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
156
163
|
|
|
157
164
|
"""
|
|
158
165
|
default_resource_dict: dict = {
|
|
@@ -184,11 +191,13 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
184
191
|
block_allocation=block_allocation,
|
|
185
192
|
init_function=init_function,
|
|
186
193
|
log_obj_size=log_obj_size,
|
|
194
|
+
wait=wait,
|
|
187
195
|
),
|
|
188
196
|
max_cores=max_cores,
|
|
189
197
|
refresh_rate=refresh_rate,
|
|
190
198
|
plot_dependency_graph=plot_dependency_graph,
|
|
191
199
|
plot_dependency_graph_filename=plot_dependency_graph_filename,
|
|
200
|
+
export_workflow_filename=export_workflow_filename,
|
|
192
201
|
)
|
|
193
202
|
)
|
|
194
203
|
else:
|
|
@@ -208,6 +217,7 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
208
217
|
block_allocation=block_allocation,
|
|
209
218
|
init_function=init_function,
|
|
210
219
|
log_obj_size=log_obj_size,
|
|
220
|
+
wait=wait,
|
|
211
221
|
)
|
|
212
222
|
)
|
|
213
223
|
|
|
@@ -255,7 +265,9 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
255
265
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
256
266
|
debugging purposes and to get an overview of the specified dependencies.
|
|
257
267
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
268
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
258
269
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
270
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
259
271
|
|
|
260
272
|
Examples:
|
|
261
273
|
```
|
|
@@ -293,7 +305,9 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
293
305
|
refresh_rate: float = 0.01,
|
|
294
306
|
plot_dependency_graph: bool = False,
|
|
295
307
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
308
|
+
export_workflow_filename: Optional[str] = None,
|
|
296
309
|
log_obj_size: bool = False,
|
|
310
|
+
wait: bool = True,
|
|
297
311
|
):
|
|
298
312
|
"""
|
|
299
313
|
The executorlib.FluxClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
|
|
@@ -338,7 +352,9 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
338
352
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
339
353
|
debugging purposes and to get an overview of the specified dependencies.
|
|
340
354
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
355
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
341
356
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
357
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
342
358
|
|
|
343
359
|
"""
|
|
344
360
|
default_resource_dict: dict = {
|
|
@@ -398,6 +414,7 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
398
414
|
block_allocation=block_allocation,
|
|
399
415
|
init_function=init_function,
|
|
400
416
|
disable_dependencies=disable_dependencies,
|
|
417
|
+
wait=wait,
|
|
401
418
|
)
|
|
402
419
|
)
|
|
403
420
|
else:
|
|
@@ -420,6 +437,7 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
420
437
|
refresh_rate=refresh_rate,
|
|
421
438
|
plot_dependency_graph=plot_dependency_graph,
|
|
422
439
|
plot_dependency_graph_filename=plot_dependency_graph_filename,
|
|
440
|
+
export_workflow_filename=export_workflow_filename,
|
|
423
441
|
)
|
|
424
442
|
)
|
|
425
443
|
|
|
@@ -437,6 +455,7 @@ def create_flux_executor(
|
|
|
437
455
|
block_allocation: bool = False,
|
|
438
456
|
init_function: Optional[Callable] = None,
|
|
439
457
|
log_obj_size: bool = False,
|
|
458
|
+
wait: bool = True,
|
|
440
459
|
) -> Union[OneProcessTaskScheduler, BlockAllocationTaskScheduler]:
|
|
441
460
|
"""
|
|
442
461
|
Create a flux executor
|
|
@@ -475,6 +494,7 @@ def create_flux_executor(
|
|
|
475
494
|
of the individual function.
|
|
476
495
|
init_function (None): optional function to preset arguments for functions which are submitted later
|
|
477
496
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
497
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
478
498
|
|
|
479
499
|
Returns:
|
|
480
500
|
InteractiveStepExecutor/ InteractiveExecutor
|
|
@@ -496,6 +516,7 @@ def create_flux_executor(
|
|
|
496
516
|
check_command_line_argument_lst(
|
|
497
517
|
command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
|
|
498
518
|
)
|
|
519
|
+
check_wait_on_shutdown(wait_on_shutdown=wait)
|
|
499
520
|
if "openmpi_oversubscribe" in resource_dict:
|
|
500
521
|
del resource_dict["openmpi_oversubscribe"]
|
|
501
522
|
if "slurm_cmd_args" in resource_dict:
|
|
@@ -7,6 +7,7 @@ from executorlib.standalone.inputcheck import (
|
|
|
7
7
|
check_init_function,
|
|
8
8
|
check_plot_dependency_graph,
|
|
9
9
|
check_refresh_rate,
|
|
10
|
+
check_wait_on_shutdown,
|
|
10
11
|
validate_number_of_cores,
|
|
11
12
|
)
|
|
12
13
|
from executorlib.standalone.interactive.spawner import MpiExecSpawner
|
|
@@ -58,7 +59,9 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
58
59
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
59
60
|
debugging purposes and to get an overview of the specified dependencies.
|
|
60
61
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
62
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
61
63
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
64
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
62
65
|
|
|
63
66
|
Examples:
|
|
64
67
|
```
|
|
@@ -94,7 +97,9 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
94
97
|
refresh_rate: float = 0.01,
|
|
95
98
|
plot_dependency_graph: bool = False,
|
|
96
99
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
100
|
+
export_workflow_filename: Optional[str] = None,
|
|
97
101
|
log_obj_size: bool = False,
|
|
102
|
+
wait: bool = True,
|
|
98
103
|
):
|
|
99
104
|
"""
|
|
100
105
|
The executorlib.SingleNodeExecutor leverages either the message passing interface (MPI), the SLURM workload
|
|
@@ -138,7 +143,9 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
138
143
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
139
144
|
debugging purposes and to get an overview of the specified dependencies.
|
|
140
145
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
146
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
141
147
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
148
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
142
149
|
|
|
143
150
|
"""
|
|
144
151
|
default_resource_dict: dict = {
|
|
@@ -166,11 +173,13 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
166
173
|
block_allocation=block_allocation,
|
|
167
174
|
init_function=init_function,
|
|
168
175
|
log_obj_size=log_obj_size,
|
|
176
|
+
wait=wait,
|
|
169
177
|
),
|
|
170
178
|
max_cores=max_cores,
|
|
171
179
|
refresh_rate=refresh_rate,
|
|
172
180
|
plot_dependency_graph=plot_dependency_graph,
|
|
173
181
|
plot_dependency_graph_filename=plot_dependency_graph_filename,
|
|
182
|
+
export_workflow_filename=export_workflow_filename,
|
|
174
183
|
)
|
|
175
184
|
)
|
|
176
185
|
else:
|
|
@@ -186,6 +195,7 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
186
195
|
block_allocation=block_allocation,
|
|
187
196
|
init_function=init_function,
|
|
188
197
|
log_obj_size=log_obj_size,
|
|
198
|
+
wait=wait,
|
|
189
199
|
)
|
|
190
200
|
)
|
|
191
201
|
|
|
@@ -226,7 +236,9 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
226
236
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
227
237
|
debugging purposes and to get an overview of the specified dependencies.
|
|
228
238
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
239
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
229
240
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
241
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
230
242
|
|
|
231
243
|
Examples:
|
|
232
244
|
```
|
|
@@ -262,7 +274,9 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
262
274
|
refresh_rate: float = 0.01,
|
|
263
275
|
plot_dependency_graph: bool = False,
|
|
264
276
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
277
|
+
export_workflow_filename: Optional[str] = None,
|
|
265
278
|
log_obj_size: bool = False,
|
|
279
|
+
wait: bool = True,
|
|
266
280
|
):
|
|
267
281
|
"""
|
|
268
282
|
The executorlib.api.TestClusterExecutor is designed to test the file based communication used in the
|
|
@@ -299,7 +313,9 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
299
313
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
300
314
|
debugging purposes and to get an overview of the specified dependencies.
|
|
301
315
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
316
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
302
317
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
318
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
303
319
|
|
|
304
320
|
"""
|
|
305
321
|
default_resource_dict: dict = {
|
|
@@ -339,6 +355,7 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
339
355
|
init_function=init_function,
|
|
340
356
|
disable_dependencies=disable_dependencies,
|
|
341
357
|
execute_function=execute_in_subprocess,
|
|
358
|
+
wait=wait,
|
|
342
359
|
)
|
|
343
360
|
)
|
|
344
361
|
else:
|
|
@@ -358,6 +375,7 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
358
375
|
refresh_rate=refresh_rate,
|
|
359
376
|
plot_dependency_graph=plot_dependency_graph,
|
|
360
377
|
plot_dependency_graph_filename=plot_dependency_graph_filename,
|
|
378
|
+
export_workflow_filename=export_workflow_filename,
|
|
361
379
|
)
|
|
362
380
|
)
|
|
363
381
|
|
|
@@ -371,6 +389,7 @@ def create_single_node_executor(
|
|
|
371
389
|
block_allocation: bool = False,
|
|
372
390
|
init_function: Optional[Callable] = None,
|
|
373
391
|
log_obj_size: bool = False,
|
|
392
|
+
wait: bool = True,
|
|
374
393
|
) -> Union[OneProcessTaskScheduler, BlockAllocationTaskScheduler]:
|
|
375
394
|
"""
|
|
376
395
|
Create a single node executor
|
|
@@ -405,6 +424,7 @@ def create_single_node_executor(
|
|
|
405
424
|
of the individual function.
|
|
406
425
|
init_function (None): optional function to preset arguments for functions which are submitted later
|
|
407
426
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
427
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
408
428
|
|
|
409
429
|
Returns:
|
|
410
430
|
InteractiveStepExecutor/ InteractiveExecutor
|
|
@@ -421,6 +441,7 @@ def create_single_node_executor(
|
|
|
421
441
|
check_command_line_argument_lst(
|
|
422
442
|
command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
|
|
423
443
|
)
|
|
444
|
+
check_wait_on_shutdown(wait_on_shutdown=wait)
|
|
424
445
|
if "threads_per_core" in resource_dict:
|
|
425
446
|
del resource_dict["threads_per_core"]
|
|
426
447
|
if "gpus_per_core" in resource_dict:
|
|
@@ -6,6 +6,7 @@ from executorlib.standalone.inputcheck import (
|
|
|
6
6
|
check_log_obj_size,
|
|
7
7
|
check_plot_dependency_graph,
|
|
8
8
|
check_refresh_rate,
|
|
9
|
+
check_wait_on_shutdown,
|
|
9
10
|
validate_number_of_cores,
|
|
10
11
|
)
|
|
11
12
|
from executorlib.task_scheduler.interactive.blockallocation import (
|
|
@@ -63,7 +64,9 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
63
64
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
64
65
|
debugging purposes and to get an overview of the specified dependencies.
|
|
65
66
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
67
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
66
68
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
69
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
67
70
|
|
|
68
71
|
Examples:
|
|
69
72
|
```
|
|
@@ -101,7 +104,9 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
101
104
|
refresh_rate: float = 0.01,
|
|
102
105
|
plot_dependency_graph: bool = False,
|
|
103
106
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
107
|
+
export_workflow_filename: Optional[str] = None,
|
|
104
108
|
log_obj_size: bool = False,
|
|
109
|
+
wait: bool = True,
|
|
105
110
|
):
|
|
106
111
|
"""
|
|
107
112
|
The executorlib.SlurmClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
|
|
@@ -146,7 +151,9 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
146
151
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
147
152
|
debugging purposes and to get an overview of the specified dependencies.
|
|
148
153
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
154
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
149
155
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
156
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
150
157
|
|
|
151
158
|
"""
|
|
152
159
|
default_resource_dict: dict = {
|
|
@@ -207,6 +214,7 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
207
214
|
block_allocation=block_allocation,
|
|
208
215
|
init_function=init_function,
|
|
209
216
|
disable_dependencies=disable_dependencies,
|
|
217
|
+
wait=wait,
|
|
210
218
|
)
|
|
211
219
|
)
|
|
212
220
|
else:
|
|
@@ -225,6 +233,7 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
225
233
|
refresh_rate=refresh_rate,
|
|
226
234
|
plot_dependency_graph=plot_dependency_graph,
|
|
227
235
|
plot_dependency_graph_filename=plot_dependency_graph_filename,
|
|
236
|
+
export_workflow_filename=export_workflow_filename,
|
|
228
237
|
)
|
|
229
238
|
)
|
|
230
239
|
|
|
@@ -275,7 +284,9 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
275
284
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
276
285
|
debugging purposes and to get an overview of the specified dependencies.
|
|
277
286
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
287
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
278
288
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
289
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
279
290
|
|
|
280
291
|
Examples:
|
|
281
292
|
```
|
|
@@ -312,7 +323,9 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
312
323
|
refresh_rate: float = 0.01,
|
|
313
324
|
plot_dependency_graph: bool = False,
|
|
314
325
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
326
|
+
export_workflow_filename: Optional[str] = None,
|
|
315
327
|
log_obj_size: bool = False,
|
|
328
|
+
wait: bool = True,
|
|
316
329
|
):
|
|
317
330
|
"""
|
|
318
331
|
The executorlib.SlurmJobExecutor leverages either the message passing interface (MPI), the SLURM workload
|
|
@@ -360,7 +373,9 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
360
373
|
plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
|
|
361
374
|
debugging purposes and to get an overview of the specified dependencies.
|
|
362
375
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
376
|
+
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
363
377
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
378
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
364
379
|
|
|
365
380
|
"""
|
|
366
381
|
default_resource_dict: dict = {
|
|
@@ -389,11 +404,13 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
389
404
|
block_allocation=block_allocation,
|
|
390
405
|
init_function=init_function,
|
|
391
406
|
log_obj_size=log_obj_size,
|
|
407
|
+
wait=wait,
|
|
392
408
|
),
|
|
393
409
|
max_cores=max_cores,
|
|
394
410
|
refresh_rate=refresh_rate,
|
|
395
411
|
plot_dependency_graph=plot_dependency_graph,
|
|
396
412
|
plot_dependency_graph_filename=plot_dependency_graph_filename,
|
|
413
|
+
export_workflow_filename=export_workflow_filename,
|
|
397
414
|
)
|
|
398
415
|
)
|
|
399
416
|
else:
|
|
@@ -410,6 +427,7 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
410
427
|
block_allocation=block_allocation,
|
|
411
428
|
init_function=init_function,
|
|
412
429
|
log_obj_size=log_obj_size,
|
|
430
|
+
wait=wait,
|
|
413
431
|
)
|
|
414
432
|
)
|
|
415
433
|
|
|
@@ -424,6 +442,7 @@ def create_slurm_executor(
|
|
|
424
442
|
block_allocation: bool = False,
|
|
425
443
|
init_function: Optional[Callable] = None,
|
|
426
444
|
log_obj_size: bool = False,
|
|
445
|
+
wait: bool = True,
|
|
427
446
|
) -> Union[OneProcessTaskScheduler, BlockAllocationTaskScheduler]:
|
|
428
447
|
"""
|
|
429
448
|
Create a SLURM executor
|
|
@@ -463,6 +482,7 @@ def create_slurm_executor(
|
|
|
463
482
|
of the individual function.
|
|
464
483
|
init_function (None): optional function to preset arguments for functions which are submitted later
|
|
465
484
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
485
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
466
486
|
|
|
467
487
|
Returns:
|
|
468
488
|
InteractiveStepExecutor/ InteractiveExecutor
|
|
@@ -475,6 +495,7 @@ def create_slurm_executor(
|
|
|
475
495
|
resource_dict["log_obj_size"] = log_obj_size
|
|
476
496
|
resource_dict["pmi_mode"] = pmi_mode
|
|
477
497
|
check_init_function(block_allocation=block_allocation, init_function=init_function)
|
|
498
|
+
check_wait_on_shutdown(wait_on_shutdown=wait)
|
|
478
499
|
if block_allocation:
|
|
479
500
|
resource_dict["init_function"] = init_function
|
|
480
501
|
max_workers = validate_number_of_cores(
|
|
@@ -11,6 +11,7 @@ group_dict = {
|
|
|
11
11
|
"kwargs": "input_kwargs",
|
|
12
12
|
"output": "output",
|
|
13
13
|
"error": "error",
|
|
14
|
+
"resource_dict": "resource_dict",
|
|
14
15
|
"runtime": "runtime",
|
|
15
16
|
"queue_id": "queue_id",
|
|
16
17
|
"error_log_file": "error_log_file",
|
|
@@ -61,6 +62,12 @@ def load(file_name: str) -> dict:
|
|
|
61
62
|
data_dict["kwargs"] = cloudpickle.loads(np.void(hdf["/input_kwargs"]))
|
|
62
63
|
else:
|
|
63
64
|
data_dict["kwargs"] = {}
|
|
65
|
+
if "resource_dict" in hdf:
|
|
66
|
+
data_dict["resource_dict"] = cloudpickle.loads(
|
|
67
|
+
np.void(hdf["/resource_dict"])
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
data_dict["resource_dict"] = {}
|
|
64
71
|
if "error_log_file" in hdf:
|
|
65
72
|
data_dict["error_log_file"] = cloudpickle.loads(
|
|
66
73
|
np.void(hdf["/error_log_file"])
|
|
@@ -17,6 +17,18 @@ def check_oversubscribe(oversubscribe: bool) -> None:
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
def check_wait_on_shutdown(
|
|
21
|
+
wait_on_shutdown: bool,
|
|
22
|
+
) -> None:
|
|
23
|
+
"""
|
|
24
|
+
Check if wait_on_shutdown is False and raise a ValueError if it is.
|
|
25
|
+
"""
|
|
26
|
+
if not wait_on_shutdown:
|
|
27
|
+
raise ValueError(
|
|
28
|
+
"The wait_on_shutdown parameter is only supported for the executorlib.FluxClusterExecutor and executorlib.SlurmClusterExecutor."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
20
32
|
def check_command_line_argument_lst(command_line_argument_lst: list[str]) -> None:
|
|
21
33
|
"""
|
|
22
34
|
Check if command_line_argument_lst is not empty and raise a ValueError if it is.
|
|
@@ -11,6 +11,7 @@ class BaseSpawner(ABC):
|
|
|
11
11
|
self,
|
|
12
12
|
cwd: Optional[str] = None,
|
|
13
13
|
cores: int = 1,
|
|
14
|
+
worker_id: int = 0,
|
|
14
15
|
openmpi_oversubscribe: bool = False,
|
|
15
16
|
):
|
|
16
17
|
"""
|
|
@@ -20,9 +21,11 @@ class BaseSpawner(ABC):
|
|
|
20
21
|
cwd (str): The current working directory.
|
|
21
22
|
cores (int, optional): The number of cores to use. Defaults to 1.
|
|
22
23
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
|
|
24
|
+
worker_id (int): The worker ID. Defaults to 0.
|
|
23
25
|
"""
|
|
24
26
|
self._cwd = cwd
|
|
25
27
|
self._cores = cores
|
|
28
|
+
self._worker_id = worker_id
|
|
26
29
|
self._openmpi_oversubscribe = openmpi_oversubscribe
|
|
27
30
|
|
|
28
31
|
@abstractmethod
|
|
@@ -69,6 +72,7 @@ class SubprocessSpawner(BaseSpawner):
|
|
|
69
72
|
self,
|
|
70
73
|
cwd: Optional[str] = None,
|
|
71
74
|
cores: int = 1,
|
|
75
|
+
worker_id: int = 0,
|
|
72
76
|
openmpi_oversubscribe: bool = False,
|
|
73
77
|
threads_per_core: int = 1,
|
|
74
78
|
):
|
|
@@ -79,11 +83,13 @@ class SubprocessSpawner(BaseSpawner):
|
|
|
79
83
|
cwd (str, optional): The current working directory. Defaults to None.
|
|
80
84
|
cores (int, optional): The number of cores to use. Defaults to 1.
|
|
81
85
|
threads_per_core (int, optional): The number of threads per core. Defaults to 1.
|
|
86
|
+
worker_id (int): The worker ID. Defaults to 0.
|
|
82
87
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
|
|
83
88
|
"""
|
|
84
89
|
super().__init__(
|
|
85
90
|
cwd=cwd,
|
|
86
91
|
cores=cores,
|
|
92
|
+
worker_id=worker_id,
|
|
87
93
|
openmpi_oversubscribe=openmpi_oversubscribe,
|
|
88
94
|
)
|
|
89
95
|
self._process: Optional[subprocess.Popen] = None
|
|
@@ -75,7 +75,7 @@ def serialize_funct(
|
|
|
75
75
|
"kwargs": fn_kwargs,
|
|
76
76
|
}
|
|
77
77
|
)
|
|
78
|
-
task_key = fn
|
|
78
|
+
task_key = _get_function_name(fn=fn) + _get_hash(binary=binary_all)
|
|
79
79
|
data = {
|
|
80
80
|
"fn": fn,
|
|
81
81
|
"args": fn_args,
|
|
@@ -99,3 +99,10 @@ def _get_hash(binary: bytes) -> str:
|
|
|
99
99
|
# Remove specification of jupyter kernel from hash to be deterministic
|
|
100
100
|
binary_no_ipykernel = re.sub(b"(?<=/ipykernel_)(.*)(?=/)", b"", binary)
|
|
101
101
|
return str(hashlib.md5(binary_no_ipykernel).hexdigest())
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _get_function_name(fn: Callable) -> str:
|
|
105
|
+
if hasattr(fn, "__name__"):
|
|
106
|
+
return fn.__name__
|
|
107
|
+
else:
|
|
108
|
+
return str(fn.__class__).split("'")[-2].split(".")[-1]
|