executorlib 1.5.3__tar.gz → 1.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {executorlib-1.5.3 → executorlib-1.6.0}/PKG-INFO +2 -3
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/__init__.py +7 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/_version.py +2 -2
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/backend/cache_parallel.py +5 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/backend/interactive_parallel.py +5 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/backend/interactive_serial.py +5 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/executor/flux.py +10 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/executor/single.py +10 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/executor/slurm.py +10 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/cache.py +1 -0
- executorlib-1.6.0/executorlib/standalone/error.py +21 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/backend.py +5 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/hdf.py +4 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/queue_spawner.py +32 -3
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/shared.py +2 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/shared.py +3 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/pyproject.toml +1 -2
- {executorlib-1.5.3 → executorlib-1.6.0}/.gitignore +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/LICENSE +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/README.md +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/api.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/backend/__init__.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/backend/cache_serial.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/executor/__init__.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/executor/base.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/__init__.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/command.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/inputcheck.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/interactive/__init__.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/interactive/arguments.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/interactive/backend.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/interactive/communication.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/interactive/spawner.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/plot.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/queue.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/standalone/serialize.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/__init__.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/base.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/__init__.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/subprocess_spawner.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/task_scheduler.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/__init__.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/blockallocation.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/dependency.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/fluxspawner.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/onetoone.py +0 -0
- {executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/slurmspawner.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: executorlib
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.6.0
|
|
4
4
|
Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
|
|
5
5
|
Project-URL: Homepage, https://github.com/pyiron/executorlib
|
|
6
6
|
Project-URL: Documentation, https://executorlib.readthedocs.io
|
|
@@ -41,13 +41,12 @@ Classifier: Development Status :: 5 - Production/Stable
|
|
|
41
41
|
Classifier: Intended Audience :: Science/Research
|
|
42
42
|
Classifier: License :: OSI Approved :: BSD License
|
|
43
43
|
Classifier: Operating System :: OS Independent
|
|
44
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
45
44
|
Classifier: Programming Language :: Python :: 3.10
|
|
46
45
|
Classifier: Programming Language :: Python :: 3.11
|
|
47
46
|
Classifier: Programming Language :: Python :: 3.12
|
|
48
47
|
Classifier: Programming Language :: Python :: 3.13
|
|
49
48
|
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
50
|
-
Requires-Python: <3.14
|
|
49
|
+
Requires-Python: <3.14,>3.9
|
|
51
50
|
Requires-Dist: cloudpickle<=3.1.1,>=2.0.0
|
|
52
51
|
Requires-Dist: pyzmq<=27.0.0,>=25.0.0
|
|
53
52
|
Provides-Extra: all
|
|
@@ -36,4 +36,11 @@ __all__: list[str] = [
|
|
|
36
36
|
"SlurmClusterExecutor",
|
|
37
37
|
]
|
|
38
38
|
|
|
39
|
+
try:
|
|
40
|
+
from executorlib.task_scheduler.file.queue_spawner import terminate_tasks_in_cache
|
|
41
|
+
|
|
42
|
+
__all__ += ["terminate_tasks_in_cache"]
|
|
43
|
+
except ImportError:
|
|
44
|
+
pass
|
|
45
|
+
|
|
39
46
|
__version__ = _version.__version__
|
|
@@ -4,6 +4,7 @@ import time
|
|
|
4
4
|
|
|
5
5
|
import cloudpickle
|
|
6
6
|
|
|
7
|
+
from executorlib.standalone.error import backend_write_error_file
|
|
7
8
|
from executorlib.task_scheduler.file.backend import (
|
|
8
9
|
backend_load_file,
|
|
9
10
|
backend_write_file,
|
|
@@ -53,6 +54,10 @@ def main() -> None:
|
|
|
53
54
|
output={"error": error},
|
|
54
55
|
runtime=time.time() - time_start,
|
|
55
56
|
)
|
|
57
|
+
backend_write_error_file(
|
|
58
|
+
error=error,
|
|
59
|
+
apply_dict=apply_dict,
|
|
60
|
+
)
|
|
56
61
|
else:
|
|
57
62
|
if mpi_rank_zero:
|
|
58
63
|
backend_write_file(
|
|
@@ -6,6 +6,7 @@ from typing import Optional
|
|
|
6
6
|
import cloudpickle
|
|
7
7
|
import zmq
|
|
8
8
|
|
|
9
|
+
from executorlib.standalone.error import backend_write_error_file
|
|
9
10
|
from executorlib.standalone.interactive.backend import call_funct, parse_arguments
|
|
10
11
|
from executorlib.standalone.interactive.communication import (
|
|
11
12
|
interface_connect,
|
|
@@ -82,6 +83,10 @@ def main() -> None:
|
|
|
82
83
|
socket=socket,
|
|
83
84
|
result_dict={"error": error},
|
|
84
85
|
)
|
|
86
|
+
backend_write_error_file(
|
|
87
|
+
error=error,
|
|
88
|
+
apply_dict=input_dict,
|
|
89
|
+
)
|
|
85
90
|
else:
|
|
86
91
|
# Send output
|
|
87
92
|
if mpi_rank_zero:
|
|
@@ -2,6 +2,7 @@ import sys
|
|
|
2
2
|
from os.path import abspath
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
|
+
from executorlib.standalone.error import backend_write_error_file
|
|
5
6
|
from executorlib.standalone.interactive.backend import call_funct, parse_arguments
|
|
6
7
|
from executorlib.standalone.interactive.communication import (
|
|
7
8
|
interface_connect,
|
|
@@ -58,6 +59,10 @@ def main(argument_lst: Optional[list[str]] = None):
|
|
|
58
59
|
socket=socket,
|
|
59
60
|
result_dict={"error": error},
|
|
60
61
|
)
|
|
62
|
+
backend_write_error_file(
|
|
63
|
+
error=error,
|
|
64
|
+
apply_dict=input_dict,
|
|
65
|
+
)
|
|
61
66
|
else:
|
|
62
67
|
# Send output
|
|
63
68
|
interface_send(socket=socket, result_dict={"result": output})
|
|
@@ -41,6 +41,8 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
41
41
|
Defaults to None.
|
|
42
42
|
- exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
|
|
43
43
|
compute notes. Defaults to False.
|
|
44
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions raised
|
|
45
|
+
by the Python functions submitted to the Executor.
|
|
44
46
|
flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
|
|
45
47
|
flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
|
|
46
48
|
flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
|
|
@@ -126,6 +128,8 @@ class FluxJobExecutor(BaseExecutor):
|
|
|
126
128
|
Defaults to None.
|
|
127
129
|
- exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
|
|
128
130
|
compute notes. Defaults to False.
|
|
131
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions
|
|
132
|
+
raised by the Python functions submitted to the Executor.
|
|
129
133
|
flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
|
|
130
134
|
flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
|
|
131
135
|
flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
|
|
@@ -229,6 +233,8 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
229
233
|
- openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
|
|
230
234
|
SLURM only) - default False
|
|
231
235
|
- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
|
|
236
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions raised
|
|
237
|
+
by the Python functions submitted to the Executor.
|
|
232
238
|
pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
|
|
233
239
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
234
240
|
context of an HPC cluster this essential to be able to communicate to an
|
|
@@ -308,6 +314,8 @@ class FluxClusterExecutor(BaseExecutor):
|
|
|
308
314
|
and SLURM only) - default False
|
|
309
315
|
- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
|
|
310
316
|
only)
|
|
317
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions
|
|
318
|
+
raised by the Python functions submitted to the Executor.
|
|
311
319
|
pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
|
|
312
320
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
313
321
|
context of an HPC cluster this essential to be able to communicate to an
|
|
@@ -424,6 +432,8 @@ def create_flux_executor(
|
|
|
424
432
|
Defaults to None.
|
|
425
433
|
- exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
|
|
426
434
|
compute notes. Defaults to False.
|
|
435
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions raised
|
|
436
|
+
by the Python functions submitted to the Executor.
|
|
427
437
|
flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
|
|
428
438
|
flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
|
|
429
439
|
flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
|
|
@@ -39,6 +39,8 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
39
39
|
- openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
|
|
40
40
|
SLURM only) - default False
|
|
41
41
|
- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
|
|
42
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions raised
|
|
43
|
+
by the Python functions submitted to the Executor.
|
|
42
44
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
43
45
|
context of an HPC cluster this essential to be able to communicate to an
|
|
44
46
|
Executor running on a different compute node within the same allocation. And
|
|
@@ -116,6 +118,8 @@ class SingleNodeExecutor(BaseExecutor):
|
|
|
116
118
|
and SLURM only) - default False
|
|
117
119
|
- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
|
|
118
120
|
only)
|
|
121
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions
|
|
122
|
+
raised by the Python functions submitted to the Executor.
|
|
119
123
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
120
124
|
context of an HPC cluster this essential to be able to communicate to an
|
|
121
125
|
Executor running on a different compute node within the same allocation. And
|
|
@@ -202,6 +206,8 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
202
206
|
- threads_per_core (int): number of OpenMP threads to be used for each function call
|
|
203
207
|
- gpus_per_core (int): number of GPUs per worker - defaults to 0
|
|
204
208
|
- cwd (str/None): current working directory where the parallel python task is executed
|
|
209
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions raised
|
|
210
|
+
by the Python functions submitted to the Executor.
|
|
205
211
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
206
212
|
context of an HPC cluster this essential to be able to communicate to an
|
|
207
213
|
Executor running on a different compute node within the same allocation. And
|
|
@@ -273,6 +279,8 @@ class TestClusterExecutor(BaseExecutor):
|
|
|
273
279
|
- threads_per_core (int): number of OpenMP threads to be used for each function call
|
|
274
280
|
- gpus_per_core (int): number of GPUs per worker - defaults to 0
|
|
275
281
|
- cwd (str/None): current working directory where the parallel python task is executed
|
|
282
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions
|
|
283
|
+
raised by the Python functions submitted to the Executor.
|
|
276
284
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
277
285
|
context of an HPC cluster this essential to be able to communicate to an
|
|
278
286
|
Executor running on a different compute node within the same allocation. And
|
|
@@ -381,6 +389,8 @@ def create_single_node_executor(
|
|
|
381
389
|
and SLURM only) - default False
|
|
382
390
|
- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
|
|
383
391
|
only)
|
|
392
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions raised
|
|
393
|
+
by the Python functions submitted to the Executor.
|
|
384
394
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
385
395
|
context of an HPC cluster this essential to be able to communicate to an
|
|
386
396
|
Executor running on a different compute node within the same allocation. And
|
|
@@ -41,6 +41,8 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
41
41
|
- openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
|
|
42
42
|
SLURM only) - default False
|
|
43
43
|
- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
|
|
44
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions raised
|
|
45
|
+
by the Python functions submitted to the Executor.
|
|
44
46
|
pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
|
|
45
47
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
46
48
|
context of an HPC cluster this essential to be able to communicate to an
|
|
@@ -120,6 +122,8 @@ class SlurmClusterExecutor(BaseExecutor):
|
|
|
120
122
|
and SLURM only) - default False
|
|
121
123
|
- slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM
|
|
122
124
|
only)
|
|
125
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions
|
|
126
|
+
raised by the Python functions submitted to the Executor.
|
|
123
127
|
pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
|
|
124
128
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
125
129
|
context of an HPC cluster this essential to be able to communicate to an
|
|
@@ -226,6 +230,8 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
226
230
|
Defaults to None.
|
|
227
231
|
- exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
|
|
228
232
|
compute notes. Defaults to False.
|
|
233
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions raised
|
|
234
|
+
by the Python functions submitted to the Executor.
|
|
229
235
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
230
236
|
context of an HPC cluster this essential to be able to communicate to an
|
|
231
237
|
Executor running on a different compute node within the same allocation. And
|
|
@@ -307,6 +313,8 @@ class SlurmJobExecutor(BaseExecutor):
|
|
|
307
313
|
Defaults to None.
|
|
308
314
|
- exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
|
|
309
315
|
compute notes. Defaults to False.
|
|
316
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions
|
|
317
|
+
raised by the Python functions submitted to the Executor.
|
|
310
318
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
311
319
|
context of an HPC cluster this essential to be able to communicate to an
|
|
312
320
|
Executor running on a different compute node within the same allocation. And
|
|
@@ -408,6 +416,8 @@ def create_slurm_executor(
|
|
|
408
416
|
Defaults to None.
|
|
409
417
|
- exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing
|
|
410
418
|
compute notes. Defaults to False.
|
|
419
|
+
- error_log_file (str): Name of the error log file to use for storing exceptions raised
|
|
420
|
+
by the Python functions submitted to the Executor.
|
|
411
421
|
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
|
|
412
422
|
context of an HPC cluster this essential to be able to communicate to an
|
|
413
423
|
Executor running on a different compute node within the same allocation. And
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import traceback
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def backend_write_error_file(error: Exception, apply_dict: dict) -> None:
|
|
5
|
+
"""
|
|
6
|
+
Write an error to a file if specified in the apply_dict.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
error (Exception): The error to be written.
|
|
10
|
+
apply_dict (dict): Dictionary containing additional parameters.
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
None
|
|
14
|
+
"""
|
|
15
|
+
error_log_file = apply_dict.get("error_log_file")
|
|
16
|
+
if error_log_file is not None:
|
|
17
|
+
with open(error_log_file, "a") as f:
|
|
18
|
+
f.write("function: " + str(apply_dict["fn"]) + "\n")
|
|
19
|
+
f.write("args: " + str(apply_dict["args"]) + "\n")
|
|
20
|
+
f.write("kwargs: " + str(apply_dict["kwargs"]) + "\n")
|
|
21
|
+
traceback.print_exception(error, file=f)
|
|
@@ -2,6 +2,7 @@ import os
|
|
|
2
2
|
import time
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
+
from executorlib.standalone.error import backend_write_error_file
|
|
5
6
|
from executorlib.task_scheduler.file.hdf import dump, load
|
|
6
7
|
from executorlib.task_scheduler.file.shared import FutureItem
|
|
7
8
|
|
|
@@ -77,6 +78,10 @@ def backend_execute_task_in_file(file_name: str) -> None:
|
|
|
77
78
|
}
|
|
78
79
|
except Exception as error:
|
|
79
80
|
result = {"error": error}
|
|
81
|
+
backend_write_error_file(
|
|
82
|
+
error=error,
|
|
83
|
+
apply_dict=apply_dict,
|
|
84
|
+
)
|
|
80
85
|
|
|
81
86
|
backend_write_file(
|
|
82
87
|
file_name=file_name,
|
|
@@ -52,6 +52,10 @@ def load(file_name: str) -> dict:
|
|
|
52
52
|
data_dict["kwargs"] = cloudpickle.loads(np.void(hdf["/input_kwargs"]))
|
|
53
53
|
else:
|
|
54
54
|
data_dict["kwargs"] = {}
|
|
55
|
+
if "error_log_file" in hdf:
|
|
56
|
+
data_dict["error_log_file"] = cloudpickle.loads(
|
|
57
|
+
np.void(hdf["/error_log_file"])
|
|
58
|
+
)
|
|
55
59
|
return data_dict
|
|
56
60
|
|
|
57
61
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import os
|
|
2
3
|
import subprocess
|
|
3
4
|
from typing import Optional, Union
|
|
@@ -32,7 +33,7 @@ def execute_with_pysqa(
|
|
|
32
33
|
cwd: None,
|
|
33
34
|
}
|
|
34
35
|
config_directory (str, optional): path to the config directory.
|
|
35
|
-
backend (str, optional): name of the backend used to spawn tasks.
|
|
36
|
+
backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
|
|
36
37
|
|
|
37
38
|
Returns:
|
|
38
39
|
int: queuing system ID
|
|
@@ -101,7 +102,7 @@ def terminate_with_pysqa(
|
|
|
101
102
|
Args:
|
|
102
103
|
queue_id (int): Queuing system ID of the job to delete.
|
|
103
104
|
config_directory (str, optional): path to the config directory.
|
|
104
|
-
backend (str, optional): name of the backend used to spawn tasks.
|
|
105
|
+
backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
|
|
105
106
|
"""
|
|
106
107
|
qa = QueueAdapter(
|
|
107
108
|
directory=config_directory,
|
|
@@ -110,7 +111,35 @@ def terminate_with_pysqa(
|
|
|
110
111
|
)
|
|
111
112
|
status = qa.get_status_of_job(process_id=queue_id)
|
|
112
113
|
if status is not None and status not in ["finished", "error"]:
|
|
113
|
-
|
|
114
|
+
with contextlib.suppress(subprocess.CalledProcessError):
|
|
115
|
+
qa.delete_job(process_id=queue_id)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def terminate_tasks_in_cache(
|
|
119
|
+
cache_directory: str,
|
|
120
|
+
config_directory: Optional[str] = None,
|
|
121
|
+
backend: Optional[str] = None,
|
|
122
|
+
):
|
|
123
|
+
"""
|
|
124
|
+
Delete all jobs stored in the cache directory from the queuing system
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
cache_directory (str): The directory to store cache files.
|
|
128
|
+
config_directory (str, optional): path to the config directory.
|
|
129
|
+
backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
|
|
130
|
+
"""
|
|
131
|
+
hdf5_file_lst = []
|
|
132
|
+
for root, _, files in os.walk(cache_directory):
|
|
133
|
+
hdf5_file_lst += [os.path.join(root, f) for f in files if f[-5:] == "_i.h5"]
|
|
134
|
+
|
|
135
|
+
for f in hdf5_file_lst:
|
|
136
|
+
queue_id = get_queue_id(f)
|
|
137
|
+
if queue_id is not None:
|
|
138
|
+
terminate_with_pysqa(
|
|
139
|
+
queue_id=queue_id,
|
|
140
|
+
config_directory=config_directory,
|
|
141
|
+
backend=backend,
|
|
142
|
+
)
|
|
114
143
|
|
|
115
144
|
|
|
116
145
|
def _pysqa_execute_command(
|
|
@@ -126,6 +126,7 @@ def execute_tasks_h5(
|
|
|
126
126
|
)
|
|
127
127
|
cache_key = task_resource_dict.pop("cache_key", None)
|
|
128
128
|
cache_directory = os.path.abspath(task_resource_dict.pop("cache_directory"))
|
|
129
|
+
error_log_file = task_resource_dict.pop("error_log_file", None)
|
|
129
130
|
task_key, data_dict = serialize_funct_h5(
|
|
130
131
|
fn=task_dict["fn"],
|
|
131
132
|
fn_args=task_args,
|
|
@@ -133,6 +134,7 @@ def execute_tasks_h5(
|
|
|
133
134
|
resource_dict=task_resource_dict,
|
|
134
135
|
cache_key=cache_key,
|
|
135
136
|
)
|
|
137
|
+
data_dict["error_log_file"] = error_log_file
|
|
136
138
|
if task_key not in memory_dict:
|
|
137
139
|
if os.path.join(
|
|
138
140
|
cache_directory, task_key + "_o.h5"
|
|
@@ -26,6 +26,7 @@ def execute_tasks(
|
|
|
26
26
|
cache_key: Optional[str] = None,
|
|
27
27
|
queue_join_on_shutdown: bool = True,
|
|
28
28
|
log_obj_size: bool = False,
|
|
29
|
+
error_log_file: Optional[str] = None,
|
|
29
30
|
**kwargs,
|
|
30
31
|
) -> None:
|
|
31
32
|
"""
|
|
@@ -70,6 +71,8 @@ def execute_tasks(
|
|
|
70
71
|
future_queue.join()
|
|
71
72
|
break
|
|
72
73
|
elif "fn" in task_dict and "future" in task_dict:
|
|
74
|
+
if error_log_file is not None:
|
|
75
|
+
task_dict["error_log_file"] = error_log_file
|
|
73
76
|
if cache_directory is None:
|
|
74
77
|
_execute_task_without_cache(
|
|
75
78
|
interface=interface, task_dict=task_dict, future_queue=future_queue
|
|
@@ -16,14 +16,13 @@ authors = [
|
|
|
16
16
|
readme = "README.md"
|
|
17
17
|
license = { file = "LICENSE" }
|
|
18
18
|
keywords = ["high performance computing", "hpc", "task scheduler", "slurm", "flux-framework", "executor"]
|
|
19
|
-
requires-python = "
|
|
19
|
+
requires-python = ">3.9, <3.14"
|
|
20
20
|
classifiers = [
|
|
21
21
|
"Development Status :: 5 - Production/Stable",
|
|
22
22
|
"Topic :: Scientific/Engineering :: Physics",
|
|
23
23
|
"License :: OSI Approved :: BSD License",
|
|
24
24
|
"Intended Audience :: Science/Research",
|
|
25
25
|
"Operating System :: OS Independent",
|
|
26
|
-
"Programming Language :: Python :: 3.9",
|
|
27
26
|
"Programming Language :: Python :: 3.10",
|
|
28
27
|
"Programming Language :: Python :: 3.11",
|
|
29
28
|
"Programming Language :: Python :: 3.12",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/file/subprocess_spawner.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/blockallocation.py
RENAMED
|
File without changes
|
{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/dependency.py
RENAMED
|
File without changes
|
{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/fluxspawner.py
RENAMED
|
File without changes
|
|
File without changes
|
{executorlib-1.5.3 → executorlib-1.6.0}/executorlib/task_scheduler/interactive/slurmspawner.py
RENAMED
|
File without changes
|