executorlib 1.7.4__tar.gz → 1.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {executorlib-1.7.4 → executorlib-1.8.1}/PKG-INFO +3 -3
- {executorlib-1.7.4 → executorlib-1.8.1}/pyproject.toml +2 -2
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/_version.py +2 -2
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/flux.py +13 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/single.py +13 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/slurm.py +13 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/hdf.py +7 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/inputcheck.py +12 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/spawner.py +19 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/base.py +3 -1
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/shared.py +105 -30
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/spawner_pysqa.py +4 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/spawner_subprocess.py +18 -3
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/task_scheduler.py +5 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/blockallocation.py +2 -2
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/dependency_plot.py +35 -2
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/spawner_flux.py +13 -5
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/spawner_pysqa.py +8 -1
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/spawner_slurm.py +3 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/.gitignore +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/LICENSE +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/README.md +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/__init__.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/api.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/__init__.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/cache_parallel.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/cache_serial.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/interactive_parallel.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/interactive_serial.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/__init__.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/base.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/__init__.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/batched.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/command.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/error.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/__init__.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/arguments.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/backend.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/communication.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/queue.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/scheduler.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/select.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/serialize.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/__init__.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/__init__.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/backend.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/__init__.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/dependency.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/onetoone.py +0 -0
- {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/shared.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: executorlib
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.1
|
|
4
4
|
Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
|
|
5
5
|
Project-URL: Homepage, https://github.com/pyiron/executorlib
|
|
6
6
|
Project-URL: Documentation, https://executorlib.readthedocs.io
|
|
@@ -55,12 +55,12 @@ Requires-Dist: ipython<=9.9.0,>=7.33.0; extra == 'all'
|
|
|
55
55
|
Requires-Dist: mpi4py<=4.1.1,>=3.1.4; extra == 'all'
|
|
56
56
|
Requires-Dist: networkx<=3.6.1,>=2.8.8; extra == 'all'
|
|
57
57
|
Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'all'
|
|
58
|
-
Requires-Dist: pysqa==0.3.
|
|
58
|
+
Requires-Dist: pysqa==0.3.4; extra == 'all'
|
|
59
59
|
Provides-Extra: cache
|
|
60
60
|
Requires-Dist: h5py<=3.15.1,>=3.6.0; extra == 'cache'
|
|
61
61
|
Provides-Extra: cluster
|
|
62
62
|
Requires-Dist: h5py<=3.15.1,>=3.6.0; extra == 'cluster'
|
|
63
|
-
Requires-Dist: pysqa==0.3.
|
|
63
|
+
Requires-Dist: pysqa==0.3.4; extra == 'cluster'
|
|
64
64
|
Provides-Extra: graph
|
|
65
65
|
Requires-Dist: networkx<=3.6.1,>=2.8.8; extra == 'graph'
|
|
66
66
|
Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'graph'
|
|
@@ -52,12 +52,12 @@ graphnotebook = [
|
|
|
52
52
|
]
|
|
53
53
|
mpi = ["mpi4py>=3.1.4,<=4.1.1"]
|
|
54
54
|
cluster = [
|
|
55
|
-
"pysqa==0.3.
|
|
55
|
+
"pysqa==0.3.4",
|
|
56
56
|
"h5py>=3.6.0,<=3.15.1",
|
|
57
57
|
]
|
|
58
58
|
all = [
|
|
59
59
|
"mpi4py>=3.1.4,<=4.1.1",
|
|
60
|
-
"pysqa==0.3.
|
|
60
|
+
"pysqa==0.3.4",
|
|
61
61
|
"h5py>=3.6.0,<=3.15.1",
|
|
62
62
|
"pygraphviz>=1.10,<=1.14",
|
|
63
63
|
"networkx>=2.8.8,<=3.6.1",
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '1.
|
|
32
|
-
__version_tuple__ = version_tuple = (1,
|
|
31
|
+
__version__ = version = '1.8.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (1, 8, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -9,6 +9,7 @@ from executorlib.standalone.inputcheck import (
|
|
|
9
9
|
check_plot_dependency_graph,
|
|
10
10
|
check_pmi,
|
|
11
11
|
check_refresh_rate,
|
|
12
|
+
check_wait_on_shutdown,
|
|
12
13
|
validate_number_of_cores,
|
|
13
14
|
)
|
|
14
15
|
from executorlib.task_scheduler.interactive.blockallocation import (
|
|
@@ -67,6 +68,7 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
67
68
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
68
69
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
69
70
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
71
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
70
72
|
|
|
71
73
|
Examples:
|
|
72
74
|
```
|
|
@@ -108,6 +110,7 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
108
110
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
109
111
|
export_workflow_filename: Optional[str] = None,
|
|
110
112
|
log_obj_size: bool = False,
|
|
113
|
+
wait: bool = True,
|
|
111
114
|
):
|
|
112
115
|
"""
|
|
113
116
|
The executorlib.FluxJobExecutor leverages either the message passing interface (MPI), the SLURM workload manager
|
|
@@ -156,6 +159,7 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
156
159
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
157
160
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
158
161
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
162
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
159
163
|
|
|
160
164
|
"""
|
|
161
165
|
default_resource_dict: dict = {
|
|
@@ -187,6 +191,7 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
187
191
|
block_allocation=block_allocation,
|
|
188
192
|
init_function=init_function,
|
|
189
193
|
log_obj_size=log_obj_size,
|
|
194
|
+
wait=wait,
|
|
190
195
|
),
|
|
191
196
|
max_cores=max_cores,
|
|
192
197
|
refresh_rate=refresh_rate,
|
|
@@ -212,6 +217,7 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
212
217
|
block_allocation=block_allocation,
|
|
213
218
|
init_function=init_function,
|
|
214
219
|
log_obj_size=log_obj_size,
|
|
220
|
+
wait=wait,
|
|
215
221
|
)
|
|
216
222
|
)
|
|
217
223
|
|
|
@@ -261,6 +267,7 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
261
267
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
262
268
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
263
269
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
270
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
264
271
|
|
|
265
272
|
Examples:
|
|
266
273
|
```
|
|
@@ -300,6 +307,7 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
300
307
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
301
308
|
export_workflow_filename: Optional[str] = None,
|
|
302
309
|
log_obj_size: bool = False,
|
|
310
|
+
wait: bool = True,
|
|
303
311
|
):
|
|
304
312
|
"""
|
|
305
313
|
The executorlib.FluxClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
|
|
@@ -346,6 +354,7 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
346
354
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
347
355
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
348
356
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
357
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
349
358
|
|
|
350
359
|
"""
|
|
351
360
|
default_resource_dict: dict = {
|
|
@@ -405,6 +414,7 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
405
414
|
block_allocation=block_allocation,
|
|
406
415
|
init_function=init_function,
|
|
407
416
|
disable_dependencies=disable_dependencies,
|
|
417
|
+
wait=wait,
|
|
408
418
|
)
|
|
409
419
|
)
|
|
410
420
|
else:
|
|
@@ -445,6 +455,7 @@ def create_flux_executor(
|
|
|
445
455
|
block_allocation: bool = False,
|
|
446
456
|
init_function: Optional[Callable] = None,
|
|
447
457
|
log_obj_size: bool = False,
|
|
458
|
+
wait: bool = True,
|
|
448
459
|
) -> Union[OneProcessTaskScheduler, BlockAllocationTaskScheduler]:
|
|
449
460
|
"""
|
|
450
461
|
Create a flux executor
|
|
@@ -483,6 +494,7 @@ def create_flux_executor(
|
|
|
483
494
|
of the individual function.
|
|
484
495
|
init_function (None): optional function to preset arguments for functions which are submitted later
|
|
485
496
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
497
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
486
498
|
|
|
487
499
|
Returns:
|
|
488
500
|
InteractiveStepExecutor/ InteractiveExecutor
|
|
@@ -504,6 +516,7 @@ def create_flux_executor(
|
|
|
504
516
|
check_command_line_argument_lst(
|
|
505
517
|
command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
|
|
506
518
|
)
|
|
519
|
+
check_wait_on_shutdown(wait_on_shutdown=wait)
|
|
507
520
|
if "openmpi_oversubscribe" in resource_dict:
|
|
508
521
|
del resource_dict["openmpi_oversubscribe"]
|
|
509
522
|
if "slurm_cmd_args" in resource_dict:
|
|
@@ -7,6 +7,7 @@ from executorlib.standalone.inputcheck import (
|
|
|
7
7
|
check_init_function,
|
|
8
8
|
check_plot_dependency_graph,
|
|
9
9
|
check_refresh_rate,
|
|
10
|
+
check_wait_on_shutdown,
|
|
10
11
|
validate_number_of_cores,
|
|
11
12
|
)
|
|
12
13
|
from executorlib.standalone.interactive.spawner import MpiExecSpawner
|
|
@@ -60,6 +61,7 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
60
61
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
61
62
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
62
63
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
64
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
63
65
|
|
|
64
66
|
Examples:
|
|
65
67
|
```
|
|
@@ -97,6 +99,7 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
97
99
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
98
100
|
export_workflow_filename: Optional[str] = None,
|
|
99
101
|
log_obj_size: bool = False,
|
|
102
|
+
wait: bool = True,
|
|
100
103
|
):
|
|
101
104
|
"""
|
|
102
105
|
The executorlib.SingleNodeExecutor leverages either the message passing interface (MPI), the SLURM workload
|
|
@@ -142,6 +145,7 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
142
145
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
143
146
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
144
147
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
148
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
145
149
|
|
|
146
150
|
"""
|
|
147
151
|
default_resource_dict: dict = {
|
|
@@ -169,6 +173,7 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
169
173
|
block_allocation=block_allocation,
|
|
170
174
|
init_function=init_function,
|
|
171
175
|
log_obj_size=log_obj_size,
|
|
176
|
+
wait=wait,
|
|
172
177
|
),
|
|
173
178
|
max_cores=max_cores,
|
|
174
179
|
refresh_rate=refresh_rate,
|
|
@@ -190,6 +195,7 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
190
195
|
block_allocation=block_allocation,
|
|
191
196
|
init_function=init_function,
|
|
192
197
|
log_obj_size=log_obj_size,
|
|
198
|
+
wait=wait,
|
|
193
199
|
)
|
|
194
200
|
)
|
|
195
201
|
|
|
@@ -232,6 +238,7 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
232
238
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
233
239
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
234
240
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
241
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
235
242
|
|
|
236
243
|
Examples:
|
|
237
244
|
```
|
|
@@ -269,6 +276,7 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
269
276
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
270
277
|
export_workflow_filename: Optional[str] = None,
|
|
271
278
|
log_obj_size: bool = False,
|
|
279
|
+
wait: bool = True,
|
|
272
280
|
):
|
|
273
281
|
"""
|
|
274
282
|
The executorlib.api.TestClusterExecutor is designed to test the file based communication used in the
|
|
@@ -307,6 +315,7 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
307
315
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
308
316
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
309
317
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
318
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
310
319
|
|
|
311
320
|
"""
|
|
312
321
|
default_resource_dict: dict = {
|
|
@@ -346,6 +355,7 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
346
355
|
init_function=init_function,
|
|
347
356
|
disable_dependencies=disable_dependencies,
|
|
348
357
|
execute_function=execute_in_subprocess,
|
|
358
|
+
wait=wait,
|
|
349
359
|
)
|
|
350
360
|
)
|
|
351
361
|
else:
|
|
@@ -379,6 +389,7 @@ def create_single_node_executor(
|
|
|
379
389
|
block_allocation: bool = False,
|
|
380
390
|
init_function: Optional[Callable] = None,
|
|
381
391
|
log_obj_size: bool = False,
|
|
392
|
+
wait: bool = True,
|
|
382
393
|
) -> Union[OneProcessTaskScheduler, BlockAllocationTaskScheduler]:
|
|
383
394
|
"""
|
|
384
395
|
Create a single node executor
|
|
@@ -413,6 +424,7 @@ def create_single_node_executor(
|
|
|
413
424
|
of the individual function.
|
|
414
425
|
init_function (None): optional function to preset arguments for functions which are submitted later
|
|
415
426
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
427
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
416
428
|
|
|
417
429
|
Returns:
|
|
418
430
|
InteractiveStepExecutor/ InteractiveExecutor
|
|
@@ -429,6 +441,7 @@ def create_single_node_executor(
|
|
|
429
441
|
check_command_line_argument_lst(
|
|
430
442
|
command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
|
|
431
443
|
)
|
|
444
|
+
check_wait_on_shutdown(wait_on_shutdown=wait)
|
|
432
445
|
if "threads_per_core" in resource_dict:
|
|
433
446
|
del resource_dict["threads_per_core"]
|
|
434
447
|
if "gpus_per_core" in resource_dict:
|
|
@@ -6,6 +6,7 @@ from executorlib.standalone.inputcheck import (
|
|
|
6
6
|
check_log_obj_size,
|
|
7
7
|
check_plot_dependency_graph,
|
|
8
8
|
check_refresh_rate,
|
|
9
|
+
check_wait_on_shutdown,
|
|
9
10
|
validate_number_of_cores,
|
|
10
11
|
)
|
|
11
12
|
from executorlib.task_scheduler.interactive.blockallocation import (
|
|
@@ -65,6 +66,7 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
65
66
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
66
67
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
67
68
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
69
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
68
70
|
|
|
69
71
|
Examples:
|
|
70
72
|
```
|
|
@@ -104,6 +106,7 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
104
106
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
105
107
|
export_workflow_filename: Optional[str] = None,
|
|
106
108
|
log_obj_size: bool = False,
|
|
109
|
+
wait: bool = True,
|
|
107
110
|
):
|
|
108
111
|
"""
|
|
109
112
|
The executorlib.SlurmClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
|
|
@@ -150,6 +153,7 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
150
153
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
151
154
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
152
155
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
156
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
153
157
|
|
|
154
158
|
"""
|
|
155
159
|
default_resource_dict: dict = {
|
|
@@ -210,6 +214,7 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
210
214
|
block_allocation=block_allocation,
|
|
211
215
|
init_function=init_function,
|
|
212
216
|
disable_dependencies=disable_dependencies,
|
|
217
|
+
wait=wait,
|
|
213
218
|
)
|
|
214
219
|
)
|
|
215
220
|
else:
|
|
@@ -281,6 +286,7 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
281
286
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
282
287
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
283
288
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
289
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
284
290
|
|
|
285
291
|
Examples:
|
|
286
292
|
```
|
|
@@ -319,6 +325,7 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
319
325
|
plot_dependency_graph_filename: Optional[str] = None,
|
|
320
326
|
export_workflow_filename: Optional[str] = None,
|
|
321
327
|
log_obj_size: bool = False,
|
|
328
|
+
wait: bool = True,
|
|
322
329
|
):
|
|
323
330
|
"""
|
|
324
331
|
The executorlib.SlurmJobExecutor leverages either the message passing interface (MPI), the SLURM workload
|
|
@@ -368,6 +375,7 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
368
375
|
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
369
376
|
export_workflow_filename (str): Name of the file to store the exported workflow graph in.
|
|
370
377
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
378
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
371
379
|
|
|
372
380
|
"""
|
|
373
381
|
default_resource_dict: dict = {
|
|
@@ -396,6 +404,7 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
396
404
|
block_allocation=block_allocation,
|
|
397
405
|
init_function=init_function,
|
|
398
406
|
log_obj_size=log_obj_size,
|
|
407
|
+
wait=wait,
|
|
399
408
|
),
|
|
400
409
|
max_cores=max_cores,
|
|
401
410
|
refresh_rate=refresh_rate,
|
|
@@ -418,6 +427,7 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
418
427
|
block_allocation=block_allocation,
|
|
419
428
|
init_function=init_function,
|
|
420
429
|
log_obj_size=log_obj_size,
|
|
430
|
+
wait=wait,
|
|
421
431
|
)
|
|
422
432
|
)
|
|
423
433
|
|
|
@@ -432,6 +442,7 @@ def create_slurm_executor(
|
|
|
432
442
|
block_allocation: bool = False,
|
|
433
443
|
init_function: Optional[Callable] = None,
|
|
434
444
|
log_obj_size: bool = False,
|
|
445
|
+
wait: bool = True,
|
|
435
446
|
) -> Union[OneProcessTaskScheduler, BlockAllocationTaskScheduler]:
|
|
436
447
|
"""
|
|
437
448
|
Create a SLURM executor
|
|
@@ -471,6 +482,7 @@ def create_slurm_executor(
|
|
|
471
482
|
of the individual function.
|
|
472
483
|
init_function (None): optional function to preset arguments for functions which are submitted later
|
|
473
484
|
log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
|
|
485
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
474
486
|
|
|
475
487
|
Returns:
|
|
476
488
|
InteractiveStepExecutor/ InteractiveExecutor
|
|
@@ -483,6 +495,7 @@ def create_slurm_executor(
|
|
|
483
495
|
resource_dict["log_obj_size"] = log_obj_size
|
|
484
496
|
resource_dict["pmi_mode"] = pmi_mode
|
|
485
497
|
check_init_function(block_allocation=block_allocation, init_function=init_function)
|
|
498
|
+
check_wait_on_shutdown(wait_on_shutdown=wait)
|
|
486
499
|
if block_allocation:
|
|
487
500
|
resource_dict["init_function"] = init_function
|
|
488
501
|
max_workers = validate_number_of_cores(
|
|
@@ -11,6 +11,7 @@ group_dict = {
|
|
|
11
11
|
"kwargs": "input_kwargs",
|
|
12
12
|
"output": "output",
|
|
13
13
|
"error": "error",
|
|
14
|
+
"resource_dict": "resource_dict",
|
|
14
15
|
"runtime": "runtime",
|
|
15
16
|
"queue_id": "queue_id",
|
|
16
17
|
"error_log_file": "error_log_file",
|
|
@@ -61,6 +62,12 @@ def load(file_name: str) -> dict:
|
|
|
61
62
|
data_dict["kwargs"] = cloudpickle.loads(np.void(hdf["/input_kwargs"]))
|
|
62
63
|
else:
|
|
63
64
|
data_dict["kwargs"] = {}
|
|
65
|
+
if "resource_dict" in hdf:
|
|
66
|
+
data_dict["resource_dict"] = cloudpickle.loads(
|
|
67
|
+
np.void(hdf["/resource_dict"])
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
data_dict["resource_dict"] = {}
|
|
64
71
|
if "error_log_file" in hdf:
|
|
65
72
|
data_dict["error_log_file"] = cloudpickle.loads(
|
|
66
73
|
np.void(hdf["/error_log_file"])
|
|
@@ -17,6 +17,18 @@ def check_oversubscribe(oversubscribe: bool) -> None:
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
def check_wait_on_shutdown(
|
|
21
|
+
wait_on_shutdown: bool,
|
|
22
|
+
) -> None:
|
|
23
|
+
"""
|
|
24
|
+
Check if wait_on_shutdown is False and raise a ValueError if it is.
|
|
25
|
+
"""
|
|
26
|
+
if not wait_on_shutdown:
|
|
27
|
+
raise ValueError(
|
|
28
|
+
"The wait_on_shutdown parameter is only supported for the executorlib.FluxClusterExecutor and executorlib.SlurmClusterExecutor."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
20
32
|
def check_command_line_argument_lst(command_line_argument_lst: list[str]) -> None:
|
|
21
33
|
"""
|
|
22
34
|
Check if command_line_argument_lst is not empty and raise a ValueError if it is.
|
|
@@ -11,6 +11,7 @@ class BaseSpawner(ABC):
|
|
|
11
11
|
self,
|
|
12
12
|
cwd: Optional[str] = None,
|
|
13
13
|
cores: int = 1,
|
|
14
|
+
worker_id: int = 0,
|
|
14
15
|
openmpi_oversubscribe: bool = False,
|
|
15
16
|
):
|
|
16
17
|
"""
|
|
@@ -20,9 +21,11 @@ class BaseSpawner(ABC):
|
|
|
20
21
|
cwd (str): The current working directory.
|
|
21
22
|
cores (int, optional): The number of cores to use. Defaults to 1.
|
|
22
23
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
|
|
24
|
+
worker_id (int): The worker ID. Defaults to 0.
|
|
23
25
|
"""
|
|
24
26
|
self._cwd = cwd
|
|
25
27
|
self._cores = cores
|
|
28
|
+
self._worker_id = worker_id
|
|
26
29
|
self._openmpi_oversubscribe = openmpi_oversubscribe
|
|
27
30
|
|
|
28
31
|
@abstractmethod
|
|
@@ -69,6 +72,7 @@ class SubprocessSpawner(BaseSpawner):
|
|
|
69
72
|
self,
|
|
70
73
|
cwd: Optional[str] = None,
|
|
71
74
|
cores: int = 1,
|
|
75
|
+
worker_id: int = 0,
|
|
72
76
|
openmpi_oversubscribe: bool = False,
|
|
73
77
|
threads_per_core: int = 1,
|
|
74
78
|
):
|
|
@@ -79,11 +83,13 @@ class SubprocessSpawner(BaseSpawner):
|
|
|
79
83
|
cwd (str, optional): The current working directory. Defaults to None.
|
|
80
84
|
cores (int, optional): The number of cores to use. Defaults to 1.
|
|
81
85
|
threads_per_core (int, optional): The number of threads per core. Defaults to 1.
|
|
86
|
+
worker_id (int): The worker ID. Defaults to 0.
|
|
82
87
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
|
|
83
88
|
"""
|
|
84
89
|
super().__init__(
|
|
85
90
|
cwd=cwd,
|
|
86
91
|
cores=cores,
|
|
92
|
+
worker_id=worker_id,
|
|
87
93
|
openmpi_oversubscribe=openmpi_oversubscribe,
|
|
88
94
|
)
|
|
89
95
|
self._process: Optional[subprocess.Popen] = None
|
|
@@ -106,6 +112,7 @@ class SubprocessSpawner(BaseSpawner):
|
|
|
106
112
|
"""
|
|
107
113
|
if self._cwd is not None:
|
|
108
114
|
os.makedirs(self._cwd, exist_ok=True)
|
|
115
|
+
set_current_directory_in_environment()
|
|
109
116
|
self._process = subprocess.Popen(
|
|
110
117
|
args=self.generate_command(command_lst=command_lst),
|
|
111
118
|
cwd=self._cwd,
|
|
@@ -189,3 +196,15 @@ def generate_mpiexec_command(
|
|
|
189
196
|
if openmpi_oversubscribe:
|
|
190
197
|
command_prepend_lst += ["--oversubscribe"]
|
|
191
198
|
return command_prepend_lst
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def set_current_directory_in_environment():
|
|
202
|
+
"""
|
|
203
|
+
Add the current directory to the PYTHONPATH to be able to access local Python modules.
|
|
204
|
+
"""
|
|
205
|
+
environment = os.environ
|
|
206
|
+
current_path = os.getcwd()
|
|
207
|
+
if "PYTHONPATH" in environment and current_path not in environment["PYTHONPATH"]:
|
|
208
|
+
environment["PYTHONPATH"] = os.getcwd() + ":" + environment["PYTHONPATH"]
|
|
209
|
+
elif "PYTHONPATH" not in environment:
|
|
210
|
+
environment["PYTHONPATH"] = os.getcwd()
|
|
@@ -198,7 +198,9 @@ class TaskSchedulerBase(FutureExecutor):
|
|
|
198
198
|
if cancel_futures and self._future_queue is not None:
|
|
199
199
|
cancel_items_in_queue(que=self._future_queue)
|
|
200
200
|
if self._process is not None and self._future_queue is not None:
|
|
201
|
-
self._future_queue.put(
|
|
201
|
+
self._future_queue.put(
|
|
202
|
+
{"shutdown": True, "wait": wait, "cancel_futures": cancel_futures}
|
|
203
|
+
)
|
|
202
204
|
if wait and isinstance(self._process, Thread):
|
|
203
205
|
self._process.join()
|
|
204
206
|
self._future_queue.join()
|
|
@@ -57,6 +57,7 @@ def execute_tasks_h5(
|
|
|
57
57
|
backend: Optional[str] = None,
|
|
58
58
|
disable_dependencies: bool = False,
|
|
59
59
|
pmi_mode: Optional[str] = None,
|
|
60
|
+
wait: bool = True,
|
|
60
61
|
) -> None:
|
|
61
62
|
"""
|
|
62
63
|
Execute tasks stored in a queue using HDF5 files.
|
|
@@ -72,6 +73,7 @@ def execute_tasks_h5(
|
|
|
72
73
|
backend (str, optional): name of the backend used to spawn tasks.
|
|
73
74
|
disable_dependencies (boolean): Disable resolving future objects during the submission.
|
|
74
75
|
pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
|
|
76
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
75
77
|
|
|
76
78
|
Returns:
|
|
77
79
|
None
|
|
@@ -86,30 +88,35 @@ def execute_tasks_h5(
|
|
|
86
88
|
with contextlib.suppress(queue.Empty):
|
|
87
89
|
task_dict = future_queue.get_nowait()
|
|
88
90
|
if task_dict is not None and "shutdown" in task_dict and task_dict["shutdown"]:
|
|
89
|
-
if task_dict["wait"]:
|
|
91
|
+
if task_dict["wait"] and wait:
|
|
90
92
|
while len(memory_dict) > 0:
|
|
91
|
-
memory_dict =
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
for key, value in memory_dict.items()
|
|
98
|
-
if not value.done()
|
|
99
|
-
}
|
|
100
|
-
if (
|
|
101
|
-
terminate_function is not None
|
|
102
|
-
and terminate_function == terminate_subprocess
|
|
103
|
-
):
|
|
104
|
-
for task in process_dict.values():
|
|
105
|
-
terminate_function(task=task)
|
|
106
|
-
elif terminate_function is not None:
|
|
107
|
-
for queue_id in process_dict.values():
|
|
108
|
-
terminate_function(
|
|
109
|
-
queue_id=queue_id,
|
|
110
|
-
config_directory=pysqa_config_directory,
|
|
93
|
+
memory_dict = _refresh_memory_dict(
|
|
94
|
+
memory_dict=memory_dict,
|
|
95
|
+
cache_dir_dict=cache_dir_dict,
|
|
96
|
+
process_dict=process_dict,
|
|
97
|
+
terminate_function=terminate_function,
|
|
98
|
+
pysqa_config_directory=pysqa_config_directory,
|
|
111
99
|
backend=backend,
|
|
112
100
|
)
|
|
101
|
+
if not task_dict["cancel_futures"] and wait:
|
|
102
|
+
_cancel_processes(
|
|
103
|
+
process_dict=process_dict,
|
|
104
|
+
terminate_function=terminate_function,
|
|
105
|
+
pysqa_config_directory=pysqa_config_directory,
|
|
106
|
+
backend=backend,
|
|
107
|
+
)
|
|
108
|
+
else:
|
|
109
|
+
memory_dict = _refresh_memory_dict(
|
|
110
|
+
memory_dict=memory_dict,
|
|
111
|
+
cache_dir_dict=cache_dir_dict,
|
|
112
|
+
process_dict=process_dict,
|
|
113
|
+
terminate_function=terminate_function,
|
|
114
|
+
pysqa_config_directory=pysqa_config_directory,
|
|
115
|
+
backend=backend,
|
|
116
|
+
)
|
|
117
|
+
for value in memory_dict.values():
|
|
118
|
+
if not value.done():
|
|
119
|
+
value.cancel()
|
|
113
120
|
future_queue.task_done()
|
|
114
121
|
future_queue.join()
|
|
115
122
|
break
|
|
@@ -177,15 +184,14 @@ def execute_tasks_h5(
|
|
|
177
184
|
cache_dir_dict[task_key] = cache_directory
|
|
178
185
|
future_queue.task_done()
|
|
179
186
|
else:
|
|
180
|
-
memory_dict =
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
}
|
|
187
|
+
memory_dict = _refresh_memory_dict(
|
|
188
|
+
memory_dict=memory_dict,
|
|
189
|
+
cache_dir_dict=cache_dir_dict,
|
|
190
|
+
process_dict=process_dict,
|
|
191
|
+
terminate_function=terminate_function,
|
|
192
|
+
pysqa_config_directory=pysqa_config_directory,
|
|
193
|
+
backend=backend,
|
|
194
|
+
)
|
|
189
195
|
|
|
190
196
|
|
|
191
197
|
def _check_task_output(
|
|
@@ -259,3 +265,72 @@ def _convert_args_and_kwargs(
|
|
|
259
265
|
else:
|
|
260
266
|
task_kwargs[key] = arg
|
|
261
267
|
return task_args, task_kwargs, future_wait_key_lst
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _refresh_memory_dict(
|
|
271
|
+
memory_dict: dict,
|
|
272
|
+
cache_dir_dict: dict,
|
|
273
|
+
process_dict: dict,
|
|
274
|
+
terminate_function: Optional[Callable] = None,
|
|
275
|
+
pysqa_config_directory: Optional[str] = None,
|
|
276
|
+
backend: Optional[str] = None,
|
|
277
|
+
) -> dict:
|
|
278
|
+
"""
|
|
279
|
+
Refresh memory dictionary
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
memory_dict (dict): dictionary with task keys and future objects
|
|
283
|
+
cache_dir_dict (dict): dictionary with task keys and cache directories
|
|
284
|
+
process_dict (dict): dictionary with task keys and process reference.
|
|
285
|
+
terminate_function (callable): The function to terminate the tasks.
|
|
286
|
+
pysqa_config_directory (str): path to the pysqa config directory (only for pysqa based backend).
|
|
287
|
+
backend (str): name of the backend used to spawn tasks.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
dict: Updated memory dictionary
|
|
291
|
+
"""
|
|
292
|
+
cancelled_lst = [
|
|
293
|
+
key for key, value in memory_dict.items() if value.done() and value.cancelled()
|
|
294
|
+
]
|
|
295
|
+
_cancel_processes(
|
|
296
|
+
process_dict={k: v for k, v in process_dict.items() if k in cancelled_lst},
|
|
297
|
+
terminate_function=terminate_function,
|
|
298
|
+
pysqa_config_directory=pysqa_config_directory,
|
|
299
|
+
backend=backend,
|
|
300
|
+
)
|
|
301
|
+
return {
|
|
302
|
+
key: _check_task_output(
|
|
303
|
+
task_key=key,
|
|
304
|
+
future_obj=value,
|
|
305
|
+
cache_directory=cache_dir_dict[key],
|
|
306
|
+
)
|
|
307
|
+
for key, value in memory_dict.items()
|
|
308
|
+
if not value.done()
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _cancel_processes(
|
|
313
|
+
process_dict: dict,
|
|
314
|
+
terminate_function: Optional[Callable] = None,
|
|
315
|
+
pysqa_config_directory: Optional[str] = None,
|
|
316
|
+
backend: Optional[str] = None,
|
|
317
|
+
):
|
|
318
|
+
"""
|
|
319
|
+
Cancel processes
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
process_dict (dict): dictionary with task keys and process reference.
|
|
323
|
+
terminate_function (callable): The function to terminate the tasks.
|
|
324
|
+
pysqa_config_directory (str): path to the pysqa config directory (only for pysqa based backend).
|
|
325
|
+
backend (str): name of the backend used to spawn tasks.
|
|
326
|
+
"""
|
|
327
|
+
if terminate_function is not None and terminate_function == terminate_subprocess:
|
|
328
|
+
for task in process_dict.values():
|
|
329
|
+
terminate_function(task=task)
|
|
330
|
+
elif terminate_function is not None and backend is not None:
|
|
331
|
+
for queue_id in process_dict.values():
|
|
332
|
+
terminate_function(
|
|
333
|
+
queue_id=queue_id,
|
|
334
|
+
config_directory=pysqa_config_directory,
|
|
335
|
+
backend=backend,
|
|
336
|
+
)
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/spawner_pysqa.py
RENAMED
|
@@ -5,6 +5,9 @@ from pysqa import QueueAdapter
|
|
|
5
5
|
|
|
6
6
|
from executorlib.standalone.hdf import dump, get_queue_id
|
|
7
7
|
from executorlib.standalone.inputcheck import check_file_exists
|
|
8
|
+
from executorlib.standalone.interactive.spawner import (
|
|
9
|
+
set_current_directory_in_environment,
|
|
10
|
+
)
|
|
8
11
|
from executorlib.standalone.scheduler import pysqa_execute_command, terminate_with_pysqa
|
|
9
12
|
|
|
10
13
|
|
|
@@ -85,6 +88,7 @@ def execute_with_pysqa(
|
|
|
85
88
|
os.path.dirname(os.path.abspath(cwd))
|
|
86
89
|
)
|
|
87
90
|
submit_kwargs.update(resource_dict)
|
|
91
|
+
set_current_directory_in_environment()
|
|
88
92
|
queue_id = qa.submit_job(**submit_kwargs)
|
|
89
93
|
dump(file_name=file_name, data_dict={"queue_id": queue_id})
|
|
90
94
|
return queue_id
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/spawner_subprocess.py
RENAMED
|
@@ -5,6 +5,9 @@ from typing import Optional
|
|
|
5
5
|
|
|
6
6
|
from executorlib.standalone.hdf import dump
|
|
7
7
|
from executorlib.standalone.inputcheck import check_file_exists
|
|
8
|
+
from executorlib.standalone.interactive.spawner import (
|
|
9
|
+
set_current_directory_in_environment,
|
|
10
|
+
)
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
def execute_in_subprocess(
|
|
@@ -53,11 +56,12 @@ def execute_in_subprocess(
|
|
|
53
56
|
)
|
|
54
57
|
if backend is not None:
|
|
55
58
|
raise ValueError("backend parameter is not supported for subprocess spawner.")
|
|
56
|
-
|
|
57
|
-
resource_dict
|
|
58
|
-
|
|
59
|
+
cwd = _get_working_directory(
|
|
60
|
+
cache_directory=cache_directory, resource_dict=resource_dict
|
|
61
|
+
)
|
|
59
62
|
if cwd is not None:
|
|
60
63
|
os.makedirs(cwd, exist_ok=True)
|
|
64
|
+
set_current_directory_in_environment()
|
|
61
65
|
return subprocess.Popen(command, universal_newlines=True, cwd=cwd)
|
|
62
66
|
|
|
63
67
|
|
|
@@ -71,3 +75,14 @@ def terminate_subprocess(task):
|
|
|
71
75
|
task.terminate()
|
|
72
76
|
while task.poll() is None:
|
|
73
77
|
time.sleep(0.1)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _get_working_directory(
|
|
81
|
+
cache_directory: Optional[str] = None, resource_dict: Optional[dict] = None
|
|
82
|
+
):
|
|
83
|
+
if resource_dict is None:
|
|
84
|
+
resource_dict = {}
|
|
85
|
+
if "cwd" in resource_dict and resource_dict["cwd"] is not None:
|
|
86
|
+
return resource_dict["cwd"]
|
|
87
|
+
else:
|
|
88
|
+
return cache_directory
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/task_scheduler.py
RENAMED
|
@@ -35,6 +35,7 @@ class FileTaskScheduler(TaskSchedulerBase):
|
|
|
35
35
|
backend: Optional[str] = None,
|
|
36
36
|
disable_dependencies: bool = False,
|
|
37
37
|
pmi_mode: Optional[str] = None,
|
|
38
|
+
wait: bool = True,
|
|
38
39
|
):
|
|
39
40
|
"""
|
|
40
41
|
Initialize the FileExecutor.
|
|
@@ -50,6 +51,7 @@ class FileTaskScheduler(TaskSchedulerBase):
|
|
|
50
51
|
backend (str, optional): name of the backend used to spawn tasks.
|
|
51
52
|
disable_dependencies (boolean): Disable resolving future objects during the submission.
|
|
52
53
|
pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
|
|
54
|
+
wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
|
|
53
55
|
"""
|
|
54
56
|
super().__init__(max_cores=None)
|
|
55
57
|
default_resource_dict = {
|
|
@@ -73,6 +75,7 @@ class FileTaskScheduler(TaskSchedulerBase):
|
|
|
73
75
|
"backend": backend,
|
|
74
76
|
"disable_dependencies": disable_dependencies,
|
|
75
77
|
"pmi_mode": pmi_mode,
|
|
78
|
+
"wait": wait,
|
|
76
79
|
}
|
|
77
80
|
self._set_process(
|
|
78
81
|
Thread(
|
|
@@ -98,6 +101,7 @@ def create_file_executor(
|
|
|
98
101
|
init_function: Optional[Callable] = None,
|
|
99
102
|
disable_dependencies: bool = False,
|
|
100
103
|
execute_function: Callable = execute_with_pysqa,
|
|
104
|
+
wait: bool = True,
|
|
101
105
|
):
|
|
102
106
|
if block_allocation:
|
|
103
107
|
raise ValueError(
|
|
@@ -128,4 +132,5 @@ def create_file_executor(
|
|
|
128
132
|
execute_function=execute_function,
|
|
129
133
|
terminate_function=terminate_function,
|
|
130
134
|
pmi_mode=pmi_mode,
|
|
135
|
+
wait=wait,
|
|
131
136
|
)
|
|
@@ -208,7 +208,7 @@ def _execute_multiple_tasks(
|
|
|
208
208
|
queue_join_on_shutdown: bool = True,
|
|
209
209
|
log_obj_size: bool = False,
|
|
210
210
|
error_log_file: Optional[str] = None,
|
|
211
|
-
worker_id:
|
|
211
|
+
worker_id: int = 0,
|
|
212
212
|
stop_function: Optional[Callable] = None,
|
|
213
213
|
restart_limit: int = 0,
|
|
214
214
|
**kwargs,
|
|
@@ -244,7 +244,7 @@ def _execute_multiple_tasks(
|
|
|
244
244
|
command_lst=get_interactive_execute_command(
|
|
245
245
|
cores=cores,
|
|
246
246
|
),
|
|
247
|
-
connections=spawner(cores=cores, **kwargs),
|
|
247
|
+
connections=spawner(cores=cores, worker_id=worker_id, **kwargs),
|
|
248
248
|
hostname_localhost=hostname_localhost,
|
|
249
249
|
log_obj_size=log_obj_size,
|
|
250
250
|
worker_id=worker_id,
|
|
@@ -5,7 +5,6 @@ from concurrent.futures import Future
|
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
7
|
import cloudpickle
|
|
8
|
-
import numpy as np
|
|
9
8
|
|
|
10
9
|
from executorlib.standalone.select import FutureSelector
|
|
11
10
|
|
|
@@ -219,7 +218,11 @@ def plot_dependency_graph_function(
|
|
|
219
218
|
graph = nx.DiGraph()
|
|
220
219
|
for node in node_lst:
|
|
221
220
|
if node["type"] == "input":
|
|
222
|
-
graph.add_node(
|
|
221
|
+
graph.add_node(
|
|
222
|
+
node["id"],
|
|
223
|
+
label=_short_object_name(node=node["value"]),
|
|
224
|
+
shape=node["shape"],
|
|
225
|
+
)
|
|
223
226
|
else:
|
|
224
227
|
graph.add_node(node["id"], label=str(node["name"]), shape=node["shape"])
|
|
225
228
|
for edge in edge_lst:
|
|
@@ -245,6 +248,8 @@ def export_dependency_graph_function(
|
|
|
245
248
|
edge_lst (list): List of edges.
|
|
246
249
|
file_name (str): Name of the file to store the exported graph in.
|
|
247
250
|
"""
|
|
251
|
+
import numpy as np
|
|
252
|
+
|
|
248
253
|
pwd_nodes_lst = []
|
|
249
254
|
for n in node_lst:
|
|
250
255
|
if n["type"] == "function":
|
|
@@ -305,3 +310,31 @@ def export_dependency_graph_function(
|
|
|
305
310
|
}
|
|
306
311
|
with open(file_name, "w") as f:
|
|
307
312
|
json.dump(pwd_dict, f, indent=4)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _short_object_name(node):
|
|
316
|
+
node_value_str = str(node)
|
|
317
|
+
if isinstance(node, tuple):
|
|
318
|
+
short_name = str(tuple(_short_object_name(node=el) for el in node))
|
|
319
|
+
elif isinstance(node, list):
|
|
320
|
+
short_name = str([_short_object_name(node=el) for el in node])
|
|
321
|
+
elif isinstance(node, dict):
|
|
322
|
+
short_name = str(
|
|
323
|
+
{
|
|
324
|
+
_short_object_name(node=key): _short_object_name(node=value)
|
|
325
|
+
for key, value in node.items()
|
|
326
|
+
}
|
|
327
|
+
)
|
|
328
|
+
elif "object at" in node_value_str:
|
|
329
|
+
short_name = node_value_str[1:-1].split(maxsplit=1)[0].split(".")[-1] + "()"
|
|
330
|
+
elif "<function" in node_value_str:
|
|
331
|
+
short_name = node_value_str.split()[1] + "()"
|
|
332
|
+
elif "\n" in node_value_str:
|
|
333
|
+
short_name = str(type(node)).split("'")[1].split(".")[-1] + "()"
|
|
334
|
+
elif "(" in node_value_str and ")" in node_value_str:
|
|
335
|
+
short_name = node_value_str.split("(", maxsplit=1)[0] + "()"
|
|
336
|
+
elif len(node_value_str) > 20:
|
|
337
|
+
short_name = node_value_str[:21] + "..."
|
|
338
|
+
else:
|
|
339
|
+
short_name = node_value_str
|
|
340
|
+
return short_name
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/spawner_flux.py
RENAMED
|
@@ -5,7 +5,10 @@ from typing import Callable, Optional
|
|
|
5
5
|
import flux
|
|
6
6
|
import flux.job
|
|
7
7
|
|
|
8
|
-
from executorlib.standalone.interactive.spawner import
|
|
8
|
+
from executorlib.standalone.interactive.spawner import (
|
|
9
|
+
BaseSpawner,
|
|
10
|
+
set_current_directory_in_environment,
|
|
11
|
+
)
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
def validate_max_workers(max_workers: int, cores: int, threads_per_core: int):
|
|
@@ -31,6 +34,7 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
31
34
|
threads_per_core (int, optional): The number of threads per base. Defaults to 1.
|
|
32
35
|
gpus_per_core (int, optional): The number of GPUs per base. Defaults to 0.
|
|
33
36
|
num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
|
|
37
|
+
worker_id (int): The worker ID. Defaults to 0.
|
|
34
38
|
exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to
|
|
35
39
|
False.
|
|
36
40
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe. Defaults to False.
|
|
@@ -49,6 +53,7 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
49
53
|
threads_per_core: int = 1,
|
|
50
54
|
gpus_per_core: int = 0,
|
|
51
55
|
num_nodes: Optional[int] = None,
|
|
56
|
+
worker_id: int = 0,
|
|
52
57
|
exclusive: bool = False,
|
|
53
58
|
priority: Optional[int] = None,
|
|
54
59
|
openmpi_oversubscribe: bool = False,
|
|
@@ -60,6 +65,7 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
60
65
|
super().__init__(
|
|
61
66
|
cwd=cwd,
|
|
62
67
|
cores=cores,
|
|
68
|
+
worker_id=worker_id,
|
|
63
69
|
openmpi_oversubscribe=openmpi_oversubscribe,
|
|
64
70
|
)
|
|
65
71
|
self._threads_per_core = threads_per_core
|
|
@@ -115,18 +121,20 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
115
121
|
num_nodes=self._num_nodes,
|
|
116
122
|
exclusive=self._exclusive,
|
|
117
123
|
)
|
|
124
|
+
set_current_directory_in_environment()
|
|
118
125
|
jobspec.environment = dict(os.environ)
|
|
119
126
|
if self._pmi_mode is not None:
|
|
120
127
|
jobspec.setattr_shell_option("pmi", self._pmi_mode)
|
|
121
128
|
if self._cwd is not None:
|
|
122
129
|
jobspec.cwd = self._cwd
|
|
123
130
|
os.makedirs(self._cwd, exist_ok=True)
|
|
131
|
+
file_prefix = "flux_" + str(self._worker_id)
|
|
124
132
|
if self._flux_log_files and self._cwd is not None:
|
|
125
|
-
jobspec.stderr = os.path.join(self._cwd, "
|
|
126
|
-
jobspec.stdout = os.path.join(self._cwd, "
|
|
133
|
+
jobspec.stderr = os.path.join(self._cwd, file_prefix + ".err")
|
|
134
|
+
jobspec.stdout = os.path.join(self._cwd, file_prefix + ".out")
|
|
127
135
|
elif self._flux_log_files:
|
|
128
|
-
jobspec.stderr = os.path.abspath("
|
|
129
|
-
jobspec.stdout = os.path.abspath("
|
|
136
|
+
jobspec.stderr = os.path.abspath(file_prefix + ".err")
|
|
137
|
+
jobspec.stdout = os.path.abspath(file_prefix + ".out")
|
|
130
138
|
if self._priority is not None:
|
|
131
139
|
self._future = self._flux_executor.submit(
|
|
132
140
|
jobspec=jobspec, urgency=self._priority
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/spawner_pysqa.py
RENAMED
|
@@ -6,7 +6,10 @@ from typing import Callable, Optional
|
|
|
6
6
|
from pysqa import QueueAdapter
|
|
7
7
|
|
|
8
8
|
from executorlib.standalone.inputcheck import validate_number_of_cores
|
|
9
|
-
from executorlib.standalone.interactive.spawner import
|
|
9
|
+
from executorlib.standalone.interactive.spawner import (
|
|
10
|
+
BaseSpawner,
|
|
11
|
+
set_current_directory_in_environment,
|
|
12
|
+
)
|
|
10
13
|
from executorlib.standalone.scheduler import pysqa_execute_command, terminate_with_pysqa
|
|
11
14
|
from executorlib.task_scheduler.interactive.blockallocation import (
|
|
12
15
|
BlockAllocationTaskScheduler,
|
|
@@ -21,6 +24,7 @@ class PysqaSpawner(BaseSpawner):
|
|
|
21
24
|
threads_per_core: int = 1,
|
|
22
25
|
gpus_per_core: int = 0,
|
|
23
26
|
num_nodes: Optional[int] = None,
|
|
27
|
+
worker_id: int = 0,
|
|
24
28
|
exclusive: bool = False,
|
|
25
29
|
openmpi_oversubscribe: bool = False,
|
|
26
30
|
slurm_cmd_args: Optional[list[str]] = None,
|
|
@@ -38,6 +42,7 @@ class PysqaSpawner(BaseSpawner):
|
|
|
38
42
|
threads_per_core (int): The number of threads per core. Defaults to 1.
|
|
39
43
|
gpus_per_core (int): number of GPUs per worker - defaults to 0
|
|
40
44
|
num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
|
|
45
|
+
worker_id (int): The worker ID. Defaults to 0.
|
|
41
46
|
exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults
|
|
42
47
|
to False.
|
|
43
48
|
openmpi_oversubscribe (bool): Whether to oversubscribe the cores. Defaults to False.
|
|
@@ -49,6 +54,7 @@ class PysqaSpawner(BaseSpawner):
|
|
|
49
54
|
super().__init__(
|
|
50
55
|
cwd=cwd,
|
|
51
56
|
cores=cores,
|
|
57
|
+
worker_id=worker_id,
|
|
52
58
|
openmpi_oversubscribe=openmpi_oversubscribe,
|
|
53
59
|
)
|
|
54
60
|
self._threads_per_core = threads_per_core
|
|
@@ -180,6 +186,7 @@ class PysqaSpawner(BaseSpawner):
|
|
|
180
186
|
working_directory = os.path.join(self._cwd, hash)
|
|
181
187
|
else:
|
|
182
188
|
working_directory = os.path.abspath(hash)
|
|
189
|
+
set_current_directory_in_environment()
|
|
183
190
|
return queue_adapter.submit_job(
|
|
184
191
|
command=" ".join(self.generate_command(command_lst=command_lst)),
|
|
185
192
|
working_directory=working_directory,
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/spawner_slurm.py
RENAMED
|
@@ -27,6 +27,7 @@ class SrunSpawner(SubprocessSpawner):
|
|
|
27
27
|
threads_per_core: int = 1,
|
|
28
28
|
gpus_per_core: int = 0,
|
|
29
29
|
num_nodes: Optional[int] = None,
|
|
30
|
+
worker_id: int = 0,
|
|
30
31
|
exclusive: bool = False,
|
|
31
32
|
openmpi_oversubscribe: bool = False,
|
|
32
33
|
slurm_cmd_args: Optional[list[str]] = None,
|
|
@@ -41,6 +42,7 @@ class SrunSpawner(SubprocessSpawner):
|
|
|
41
42
|
threads_per_core (int, optional): The number of threads per core. Defaults to 1.
|
|
42
43
|
gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0.
|
|
43
44
|
num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
|
|
45
|
+
worker_id (int): The worker ID. Defaults to 0.
|
|
44
46
|
exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
|
|
45
47
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
|
|
46
48
|
slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
|
|
@@ -49,6 +51,7 @@ class SrunSpawner(SubprocessSpawner):
|
|
|
49
51
|
super().__init__(
|
|
50
52
|
cwd=cwd,
|
|
51
53
|
cores=cores,
|
|
54
|
+
worker_id=worker_id,
|
|
52
55
|
openmpi_oversubscribe=openmpi_oversubscribe,
|
|
53
56
|
threads_per_core=threads_per_core,
|
|
54
57
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/communication.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/__init__.py
RENAMED
|
File without changes
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/dependency.py
RENAMED
|
File without changes
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/onetoone.py
RENAMED
|
File without changes
|
{executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/shared.py
RENAMED
|
File without changes
|