executorlib 1.6.0__tar.gz → 1.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {executorlib-1.6.0 → executorlib-1.6.2}/PKG-INFO +5 -4
  2. {executorlib-1.6.0 → executorlib-1.6.2}/README.md +1 -0
  3. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/__init__.py +42 -10
  4. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/_version.py +16 -3
  5. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/backend/interactive_parallel.py +2 -2
  6. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/backend/interactive_serial.py +2 -2
  7. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/executor/base.py +27 -10
  8. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/executor/flux.py +14 -11
  9. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/executor/single.py +1 -1
  10. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/executor/slurm.py +12 -1
  11. executorlib-1.6.2/executorlib/standalone/batched.py +27 -0
  12. executorlib-1.6.2/executorlib/standalone/command.py +114 -0
  13. {executorlib-1.6.0/executorlib/task_scheduler/file → executorlib-1.6.2/executorlib/standalone}/hdf.py +61 -1
  14. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/inputcheck.py +11 -4
  15. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/interactive/backend.py +2 -1
  16. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/interactive/communication.py +5 -0
  17. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/interactive/spawner.py +4 -1
  18. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/plot.py +7 -3
  19. executorlib-1.6.2/executorlib/standalone/scheduler.py +65 -0
  20. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/serialize.py +11 -11
  21. executorlib-1.6.2/executorlib/standalone/slurm_command.py +51 -0
  22. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/base.py +27 -10
  23. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/file/backend.py +1 -1
  24. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/file/queue_spawner.py +5 -66
  25. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/file/shared.py +13 -35
  26. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/file/subprocess_spawner.py +3 -1
  27. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/file/task_scheduler.py +12 -7
  28. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/interactive/blockallocation.py +12 -12
  29. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/interactive/dependency.py +56 -7
  30. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/interactive/fluxspawner.py +6 -5
  31. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/interactive/shared.py +11 -32
  32. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/interactive/slurmspawner.py +16 -57
  33. {executorlib-1.6.0 → executorlib-1.6.2}/pyproject.toml +8 -4
  34. executorlib-1.6.0/executorlib/standalone/cache.py +0 -57
  35. executorlib-1.6.0/executorlib/standalone/command.py +0 -14
  36. {executorlib-1.6.0 → executorlib-1.6.2}/.gitignore +0 -0
  37. {executorlib-1.6.0 → executorlib-1.6.2}/LICENSE +0 -0
  38. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/api.py +0 -0
  39. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/backend/__init__.py +0 -0
  40. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/backend/cache_parallel.py +0 -0
  41. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/backend/cache_serial.py +0 -0
  42. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/executor/__init__.py +0 -0
  43. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/__init__.py +0 -0
  44. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/error.py +0 -0
  45. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/interactive/__init__.py +0 -0
  46. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/interactive/arguments.py +0 -0
  47. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/standalone/queue.py +0 -0
  48. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/__init__.py +0 -0
  49. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/file/__init__.py +0 -0
  50. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/interactive/__init__.py +0 -0
  51. {executorlib-1.6.0 → executorlib-1.6.2}/executorlib/task_scheduler/interactive/onetoone.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: executorlib
3
- Version: 1.6.0
3
+ Version: 1.6.2
4
4
  Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
5
5
  Project-URL: Homepage, https://github.com/pyiron/executorlib
6
6
  Project-URL: Documentation, https://executorlib.readthedocs.io
@@ -48,19 +48,19 @@ Classifier: Programming Language :: Python :: 3.13
48
48
  Classifier: Topic :: Scientific/Engineering :: Physics
49
49
  Requires-Python: <3.14,>3.9
50
50
  Requires-Dist: cloudpickle<=3.1.1,>=2.0.0
51
- Requires-Dist: pyzmq<=27.0.0,>=25.0.0
51
+ Requires-Dist: pyzmq<=27.0.2,>=25.0.0
52
52
  Provides-Extra: all
53
53
  Requires-Dist: h5py<=3.14.0,>=3.6.0; extra == 'all'
54
54
  Requires-Dist: ipython<=9.0.2,>=7.33.0; extra == 'all'
55
55
  Requires-Dist: mpi4py<=4.0.1,>=3.1.4; extra == 'all'
56
56
  Requires-Dist: networkx<=3.4.2,>=2.8.8; extra == 'all'
57
57
  Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'all'
58
- Requires-Dist: pysqa==0.2.7; extra == 'all'
58
+ Requires-Dist: pysqa==0.3.1; extra == 'all'
59
59
  Provides-Extra: cache
60
60
  Requires-Dist: h5py<=3.14.0,>=3.6.0; extra == 'cache'
61
61
  Provides-Extra: cluster
62
62
  Requires-Dist: h5py<=3.14.0,>=3.6.0; extra == 'cluster'
63
- Requires-Dist: pysqa==0.2.7; extra == 'cluster'
63
+ Requires-Dist: pysqa==0.3.1; extra == 'cluster'
64
64
  Provides-Extra: graph
65
65
  Requires-Dist: networkx<=3.4.2,>=2.8.8; extra == 'graph'
66
66
  Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'graph'
@@ -208,6 +208,7 @@ as hierarchical job scheduler within the allocations.
208
208
  * [Basic Functionality](https://executorlib.readthedocs.io/en/latest/1-single-node.html#basic-functionality)
209
209
  * [Parallel Functions](https://executorlib.readthedocs.io/en/latest/1-single-node.html#parallel-functions)
210
210
  * [Performance Optimization](https://executorlib.readthedocs.io/en/latest/1-single-node.html#performance-optimization)
211
+ * [Testing and Debugging](https://executorlib.readthedocs.io/en/latest/1-single-node.html#testing-and-debugging)
211
212
  * [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html)
212
213
  * [SLURM](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#slurm)
213
214
  * [Flux](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#flux)
@@ -134,6 +134,7 @@ as hierarchical job scheduler within the allocations.
134
134
  * [Basic Functionality](https://executorlib.readthedocs.io/en/latest/1-single-node.html#basic-functionality)
135
135
  * [Parallel Functions](https://executorlib.readthedocs.io/en/latest/1-single-node.html#parallel-functions)
136
136
  * [Performance Optimization](https://executorlib.readthedocs.io/en/latest/1-single-node.html#performance-optimization)
137
+ * [Testing and Debugging](https://executorlib.readthedocs.io/en/latest/1-single-node.html#testing-and-debugging)
137
138
  * [HPC Cluster Executor](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html)
138
139
  * [SLURM](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#slurm)
139
140
  * [Flux](https://executorlib.readthedocs.io/en/latest/2-hpc-cluster.html#flux)
@@ -12,6 +12,9 @@ Finally, the get_cache_data() function allows users to cache the content of thei
12
12
  pandas.DataFrame.
13
13
  """
14
14
 
15
+ from typing import Optional
16
+
17
+ import executorlib._version
15
18
  from executorlib.executor.base import BaseExecutor
16
19
  from executorlib.executor.flux import (
17
20
  FluxClusterExecutor,
@@ -22,12 +25,48 @@ from executorlib.executor.slurm import (
22
25
  SlurmClusterExecutor,
23
26
  SlurmJobExecutor,
24
27
  )
25
- from executorlib.standalone.cache import get_cache_data
26
28
 
27
- from . import _version
29
+
30
+ def get_cache_data(cache_directory: str) -> list[dict]:
31
+ """
32
+ Collect all HDF5 files in the cache directory
33
+
34
+ Args:
35
+ cache_directory (str): The directory to store cache files.
36
+
37
+ Returns:
38
+ list[dict]: List of dictionaries each representing on of the HDF5 files in the cache directory.
39
+ """
40
+ from executorlib.standalone.hdf import get_cache_data
41
+
42
+ return get_cache_data(cache_directory=cache_directory)
43
+
44
+
45
+ def terminate_tasks_in_cache(
46
+ cache_directory: str,
47
+ config_directory: Optional[str] = None,
48
+ backend: Optional[str] = None,
49
+ ):
50
+ """
51
+ Delete all jobs stored in the cache directory from the queuing system
52
+
53
+ Args:
54
+ cache_directory (str): The directory to store cache files.
55
+ config_directory (str, optional): path to the config directory.
56
+ backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
57
+ """
58
+ from executorlib.task_scheduler.file.queue_spawner import terminate_tasks_in_cache
59
+
60
+ return terminate_tasks_in_cache(
61
+ cache_directory=cache_directory,
62
+ config_directory=config_directory,
63
+ backend=backend,
64
+ )
65
+
28
66
 
29
67
  __all__: list[str] = [
30
68
  "get_cache_data",
69
+ "terminate_tasks_in_cache",
31
70
  "BaseExecutor",
32
71
  "FluxJobExecutor",
33
72
  "FluxClusterExecutor",
@@ -36,11 +75,4 @@ __all__: list[str] = [
36
75
  "SlurmClusterExecutor",
37
76
  ]
38
77
 
39
- try:
40
- from executorlib.task_scheduler.file.queue_spawner import terminate_tasks_in_cache
41
-
42
- __all__ += ["terminate_tasks_in_cache"]
43
- except ImportError:
44
- pass
45
-
46
- __version__ = _version.__version__
78
+ __version__ = executorlib._version.__version__
@@ -1,7 +1,14 @@
1
1
  # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
3
 
4
- __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
5
12
 
6
13
  TYPE_CHECKING = False
7
14
  if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
9
16
  from typing import Union
10
17
 
11
18
  VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
12
20
  else:
13
21
  VERSION_TUPLE = object
22
+ COMMIT_ID = object
14
23
 
15
24
  version: str
16
25
  __version__: str
17
26
  __version_tuple__: VERSION_TUPLE
18
27
  version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
19
30
 
20
- __version__ = version = '1.6.0'
21
- __version_tuple__ = version_tuple = (1, 6, 0)
31
+ __version__ = version = '1.6.2'
32
+ __version_tuple__ = version_tuple = (1, 6, 2)
33
+
34
+ __commit_id__ = commit_id = None
@@ -43,7 +43,7 @@ def main() -> None:
43
43
  host=argument_dict["host"], port=argument_dict["zmqport"]
44
44
  )
45
45
 
46
- memory = None
46
+ memory = {"executorlib_worker_id": int(argument_dict["worker_id"])}
47
47
 
48
48
  # required for flux interface - otherwise the current path is not included in the python path
49
49
  cwd = abspath(".")
@@ -97,7 +97,7 @@ def main() -> None:
97
97
  and "args" in input_dict
98
98
  and "kwargs" in input_dict
99
99
  ):
100
- memory = call_funct(input_dict=input_dict, funct=None)
100
+ memory.update(call_funct(input_dict=input_dict, funct=None, memory=memory))
101
101
 
102
102
 
103
103
  if __name__ == "__main__":
@@ -29,7 +29,7 @@ def main(argument_lst: Optional[list[str]] = None):
29
29
  host=argument_dict["host"], port=argument_dict["zmqport"]
30
30
  )
31
31
 
32
- memory = None
32
+ memory = {"executorlib_worker_id": int(argument_dict["worker_id"])}
33
33
 
34
34
  # required for flux interface - otherwise the current path is not included in the python path
35
35
  cwd = abspath(".")
@@ -72,7 +72,7 @@ def main(argument_lst: Optional[list[str]] = None):
72
72
  and "args" in input_dict
73
73
  and "kwargs" in input_dict
74
74
  ):
75
- memory = call_funct(input_dict=input_dict, funct=None)
75
+ memory.update(call_funct(input_dict=input_dict, funct=None, memory=memory))
76
76
 
77
77
 
78
78
  if __name__ == "__main__":
@@ -50,6 +50,23 @@ class BaseExecutor(FutureExecutor, ABC):
50
50
  """
51
51
  return self._task_scheduler.future_queue
52
52
 
53
+ def batched(
54
+ self,
55
+ iterable: list[Future],
56
+ n: int,
57
+ ) -> list[Future]:
58
+ """
59
+ Batch futures from the iterable into tuples of length n. The last batch may be shorter than n.
60
+
61
+ Args:
62
+ iterable (list): list of future objects to batch based on which future objects finish first
63
+ n (int): badge size
64
+
65
+ Returns:
66
+ list[Future]: list of future objects one for each batch
67
+ """
68
+ return self._task_scheduler.batched(iterable=iterable, n=n)
69
+
53
70
  def submit( # type: ignore
54
71
  self,
55
72
  fn: Callable,
@@ -68,16 +85,16 @@ class BaseExecutor(FutureExecutor, ABC):
68
85
  fn (callable): function to submit for execution
69
86
  args: arguments for the submitted function
70
87
  kwargs: keyword arguments for the submitted function
71
- resource_dict (dict): resource dictionary, which defines the resources used for the execution of the
72
- function. Example resource dictionary: {
73
- cores: 1,
74
- threads_per_core: 1,
75
- gpus_per_worker: 0,
76
- oversubscribe: False,
77
- cwd: None,
78
- executor: None,
79
- hostname_localhost: False,
80
- }
88
+ resource_dict (dict): A dictionary of resources required by the task. With the following keys:
89
+ - cores (int): number of MPI cores to be used for each function call
90
+ - threads_per_core (int): number of OpenMP threads to be used for each function call
91
+ - gpus_per_core (int): number of GPUs per worker - defaults to 0
92
+ - cwd (str/None): current working directory where the parallel python task is executed
93
+ - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
94
+ SLURM only) - default False
95
+ - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
96
+ - error_log_file (str): Name of the error log file to use for storing exceptions raised
97
+ by the Python functions submitted to the Executor.
81
98
 
82
99
  Returns:
83
100
  Future: A Future representing the given call.
@@ -43,8 +43,8 @@ class FluxJobExecutor(BaseExecutor):
43
43
  compute notes. Defaults to False.
44
44
  - error_log_file (str): Name of the error log file to use for storing exceptions raised
45
45
  by the Python functions submitted to the Executor.
46
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
46
47
  flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
47
- flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
48
48
  flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
49
49
  flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
50
50
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
@@ -93,8 +93,8 @@ class FluxJobExecutor(BaseExecutor):
93
93
  cache_directory: Optional[str] = None,
94
94
  max_cores: Optional[int] = None,
95
95
  resource_dict: Optional[dict] = None,
96
+ pmi_mode: Optional[str] = None,
96
97
  flux_executor=None,
97
- flux_executor_pmi_mode: Optional[str] = None,
98
98
  flux_executor_nesting: bool = False,
99
99
  flux_log_files: bool = False,
100
100
  hostname_localhost: Optional[bool] = None,
@@ -130,8 +130,8 @@ class FluxJobExecutor(BaseExecutor):
130
130
  compute notes. Defaults to False.
131
131
  - error_log_file (str): Name of the error log file to use for storing exceptions
132
132
  raised by the Python functions submitted to the Executor.
133
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
133
134
  flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
134
- flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
135
135
  flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
136
136
  flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
137
137
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
@@ -175,8 +175,8 @@ class FluxJobExecutor(BaseExecutor):
175
175
  cache_directory=cache_directory,
176
176
  max_cores=max_cores,
177
177
  resource_dict=resource_dict,
178
+ pmi_mode=pmi_mode,
178
179
  flux_executor=flux_executor,
179
- flux_executor_pmi_mode=flux_executor_pmi_mode,
180
180
  flux_executor_nesting=flux_executor_nesting,
181
181
  flux_log_files=flux_log_files,
182
182
  hostname_localhost=hostname_localhost,
@@ -199,8 +199,8 @@ class FluxJobExecutor(BaseExecutor):
199
199
  cache_directory=cache_directory,
200
200
  max_cores=max_cores,
201
201
  resource_dict=resource_dict,
202
+ pmi_mode=pmi_mode,
202
203
  flux_executor=flux_executor,
203
- flux_executor_pmi_mode=flux_executor_pmi_mode,
204
204
  flux_executor_nesting=flux_executor_nesting,
205
205
  flux_log_files=flux_log_files,
206
206
  hostname_localhost=hostname_localhost,
@@ -236,6 +236,7 @@ class FluxClusterExecutor(BaseExecutor):
236
236
  - error_log_file (str): Name of the error log file to use for storing exceptions raised
237
237
  by the Python functions submitted to the Executor.
238
238
  pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
239
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
239
240
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
240
241
  context of an HPC cluster this essential to be able to communicate to an
241
242
  Executor running on a different compute node within the same allocation. And
@@ -283,6 +284,7 @@ class FluxClusterExecutor(BaseExecutor):
283
284
  max_cores: Optional[int] = None,
284
285
  resource_dict: Optional[dict] = None,
285
286
  pysqa_config_directory: Optional[str] = None,
287
+ pmi_mode: Optional[str] = None,
286
288
  hostname_localhost: Optional[bool] = None,
287
289
  block_allocation: bool = False,
288
290
  init_function: Optional[Callable] = None,
@@ -317,6 +319,7 @@ class FluxClusterExecutor(BaseExecutor):
317
319
  - error_log_file (str): Name of the error log file to use for storing exceptions
318
320
  raised by the Python functions submitted to the Executor.
319
321
  pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
322
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
320
323
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
321
324
  context of an HPC cluster this essential to be able to communicate to an
322
325
  Executor running on a different compute node within the same allocation. And
@@ -366,7 +369,7 @@ class FluxClusterExecutor(BaseExecutor):
366
369
  cache_directory=cache_directory,
367
370
  resource_dict=resource_dict,
368
371
  flux_executor=None,
369
- flux_executor_pmi_mode=None,
372
+ pmi_mode=pmi_mode,
370
373
  flux_executor_nesting=False,
371
374
  flux_log_files=False,
372
375
  pysqa_config_directory=pysqa_config_directory,
@@ -384,8 +387,8 @@ class FluxClusterExecutor(BaseExecutor):
384
387
  cache_directory=cache_directory,
385
388
  max_cores=max_cores,
386
389
  resource_dict=resource_dict,
390
+ pmi_mode=None,
387
391
  flux_executor=None,
388
- flux_executor_pmi_mode=None,
389
392
  flux_executor_nesting=False,
390
393
  flux_log_files=False,
391
394
  hostname_localhost=hostname_localhost,
@@ -405,8 +408,8 @@ def create_flux_executor(
405
408
  max_cores: Optional[int] = None,
406
409
  cache_directory: Optional[str] = None,
407
410
  resource_dict: Optional[dict] = None,
411
+ pmi_mode: Optional[str] = None,
408
412
  flux_executor=None,
409
- flux_executor_pmi_mode: Optional[str] = None,
410
413
  flux_executor_nesting: bool = False,
411
414
  flux_log_files: bool = False,
412
415
  hostname_localhost: Optional[bool] = None,
@@ -434,8 +437,8 @@ def create_flux_executor(
434
437
  compute notes. Defaults to False.
435
438
  - error_log_file (str): Name of the error log file to use for storing exceptions raised
436
439
  by the Python functions submitted to the Executor.
440
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
437
441
  flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
438
- flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
439
442
  flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
440
443
  flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
441
444
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
@@ -467,7 +470,7 @@ def create_flux_executor(
467
470
  resource_dict["hostname_localhost"] = hostname_localhost
468
471
  resource_dict["log_obj_size"] = log_obj_size
469
472
  check_init_function(block_allocation=block_allocation, init_function=init_function)
470
- check_pmi(backend="flux_allocation", pmi=flux_executor_pmi_mode)
473
+ check_pmi(backend="flux_allocation", pmi=pmi_mode)
471
474
  check_oversubscribe(oversubscribe=resource_dict.get("openmpi_oversubscribe", False))
472
475
  check_command_line_argument_lst(
473
476
  command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
@@ -476,8 +479,8 @@ def create_flux_executor(
476
479
  del resource_dict["openmpi_oversubscribe"]
477
480
  if "slurm_cmd_args" in resource_dict:
478
481
  del resource_dict["slurm_cmd_args"]
482
+ resource_dict["pmi_mode"] = pmi_mode
479
483
  resource_dict["flux_executor"] = flux_executor
480
- resource_dict["flux_executor_pmi_mode"] = flux_executor_pmi_mode
481
484
  resource_dict["flux_executor_nesting"] = flux_executor_nesting
482
485
  resource_dict["flux_log_files"] = flux_log_files
483
486
  if block_allocation:
@@ -329,7 +329,7 @@ class TestClusterExecutor(BaseExecutor):
329
329
  cache_directory=cache_directory,
330
330
  resource_dict=resource_dict,
331
331
  flux_executor=None,
332
- flux_executor_pmi_mode=None,
332
+ pmi_mode=None,
333
333
  flux_executor_nesting=False,
334
334
  flux_log_files=False,
335
335
  pysqa_config_directory=None,
@@ -44,6 +44,7 @@ class SlurmClusterExecutor(BaseExecutor):
44
44
  - error_log_file (str): Name of the error log file to use for storing exceptions raised
45
45
  by the Python functions submitted to the Executor.
46
46
  pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
47
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
47
48
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
48
49
  context of an HPC cluster this essential to be able to communicate to an
49
50
  Executor running on a different compute node within the same allocation. And
@@ -91,6 +92,7 @@ class SlurmClusterExecutor(BaseExecutor):
91
92
  max_cores: Optional[int] = None,
92
93
  resource_dict: Optional[dict] = None,
93
94
  pysqa_config_directory: Optional[str] = None,
95
+ pmi_mode: Optional[str] = None,
94
96
  hostname_localhost: Optional[bool] = None,
95
97
  block_allocation: bool = False,
96
98
  init_function: Optional[Callable] = None,
@@ -125,6 +127,7 @@ class SlurmClusterExecutor(BaseExecutor):
125
127
  - error_log_file (str): Name of the error log file to use for storing exceptions
126
128
  raised by the Python functions submitted to the Executor.
127
129
  pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
130
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
128
131
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
129
132
  context of an HPC cluster this essential to be able to communicate to an
130
133
  Executor running on a different compute node within the same allocation. And
@@ -173,8 +176,8 @@ class SlurmClusterExecutor(BaseExecutor):
173
176
  max_cores=max_cores,
174
177
  cache_directory=cache_directory,
175
178
  resource_dict=resource_dict,
179
+ pmi_mode=pmi_mode,
176
180
  flux_executor=None,
177
- flux_executor_pmi_mode=None,
178
181
  flux_executor_nesting=False,
179
182
  flux_log_files=False,
180
183
  pysqa_config_directory=pysqa_config_directory,
@@ -232,6 +235,7 @@ class SlurmJobExecutor(BaseExecutor):
232
235
  compute notes. Defaults to False.
233
236
  - error_log_file (str): Name of the error log file to use for storing exceptions raised
234
237
  by the Python functions submitted to the Executor.
238
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
235
239
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
236
240
  context of an HPC cluster this essential to be able to communicate to an
237
241
  Executor running on a different compute node within the same allocation. And
@@ -278,6 +282,7 @@ class SlurmJobExecutor(BaseExecutor):
278
282
  cache_directory: Optional[str] = None,
279
283
  max_cores: Optional[int] = None,
280
284
  resource_dict: Optional[dict] = None,
285
+ pmi_mode: Optional[str] = None,
281
286
  hostname_localhost: Optional[bool] = None,
282
287
  block_allocation: bool = False,
283
288
  init_function: Optional[Callable] = None,
@@ -315,6 +320,7 @@ class SlurmJobExecutor(BaseExecutor):
315
320
  compute notes. Defaults to False.
316
321
  - error_log_file (str): Name of the error log file to use for storing exceptions
317
322
  raised by the Python functions submitted to the Executor.
323
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
318
324
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
319
325
  context of an HPC cluster this essential to be able to communicate to an
320
326
  Executor running on a different compute node within the same allocation. And
@@ -356,6 +362,7 @@ class SlurmJobExecutor(BaseExecutor):
356
362
  cache_directory=cache_directory,
357
363
  max_cores=max_cores,
358
364
  resource_dict=resource_dict,
365
+ pmi_mode=pmi_mode,
359
366
  hostname_localhost=hostname_localhost,
360
367
  block_allocation=block_allocation,
361
368
  init_function=init_function,
@@ -376,6 +383,7 @@ class SlurmJobExecutor(BaseExecutor):
376
383
  cache_directory=cache_directory,
377
384
  max_cores=max_cores,
378
385
  resource_dict=resource_dict,
386
+ pmi_mode=pmi_mode,
379
387
  hostname_localhost=hostname_localhost,
380
388
  block_allocation=block_allocation,
381
389
  init_function=init_function,
@@ -389,6 +397,7 @@ def create_slurm_executor(
389
397
  max_cores: Optional[int] = None,
390
398
  cache_directory: Optional[str] = None,
391
399
  resource_dict: Optional[dict] = None,
400
+ pmi_mode: Optional[str] = None,
392
401
  hostname_localhost: Optional[bool] = None,
393
402
  block_allocation: bool = False,
394
403
  init_function: Optional[Callable] = None,
@@ -418,6 +427,7 @@ def create_slurm_executor(
418
427
  compute notes. Defaults to False.
419
428
  - error_log_file (str): Name of the error log file to use for storing exceptions raised
420
429
  by the Python functions submitted to the Executor.
430
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
421
431
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
422
432
  context of an HPC cluster this essential to be able to communicate to an
423
433
  Executor running on a different compute node within the same allocation. And
@@ -441,6 +451,7 @@ def create_slurm_executor(
441
451
  resource_dict["cache_directory"] = cache_directory
442
452
  resource_dict["hostname_localhost"] = hostname_localhost
443
453
  resource_dict["log_obj_size"] = log_obj_size
454
+ resource_dict["pmi_mode"] = pmi_mode
444
455
  check_init_function(block_allocation=block_allocation, init_function=init_function)
445
456
  if block_allocation:
446
457
  resource_dict["init_function"] = init_function
@@ -0,0 +1,27 @@
1
+ from concurrent.futures import Future
2
+
3
+
4
+ def batched_futures(lst: list[Future], skip_lst: list[list], n: int) -> list[list]:
5
+ """
6
+ Batch n completed future objects. If the number of completed futures is smaller than n and the end of the batch is
7
+ not reached yet, then an empty list is returned. If n future objects are done, which are not included in the skip_lst
8
+ then they are returned as batch.
9
+
10
+ Args:
11
+ lst (list): list of all future objects
12
+ skip_lst (list): list of previous batches of future objects
13
+ n (int): batch size
14
+
15
+ Returns:
16
+ list: results of the batched futures
17
+ """
18
+ skipped_elements_lst = [item for items in skip_lst for item in items]
19
+
20
+ done_lst = []
21
+ n_expected = min(n, len(lst) - len(skipped_elements_lst))
22
+ for v in lst:
23
+ if v.done() and v.result() not in skipped_elements_lst:
24
+ done_lst.append(v.result())
25
+ if len(done_lst) == n_expected:
26
+ return done_lst
27
+ return []
@@ -0,0 +1,114 @@
1
+ import importlib.util
2
+ import os
3
+ import sys
4
+ from typing import Optional
5
+
6
+
7
+ def get_command_path(executable: str) -> str:
8
+ """
9
+ Get path of the backend executable script
10
+
11
+ Args:
12
+ executable (str): Name of the backend executable script, either mpiexec.py or serial.py
13
+
14
+ Returns:
15
+ str: absolute path to the executable script
16
+ """
17
+ return os.path.abspath(os.path.join(__file__, "..", "..", "backend", executable))
18
+
19
+
20
+ def get_cache_execute_command(
21
+ file_name: str,
22
+ cores: int = 1,
23
+ backend: Optional[str] = None,
24
+ exclusive: bool = False,
25
+ openmpi_oversubscribe: bool = False,
26
+ pmi_mode: Optional[str] = None,
27
+ ) -> list:
28
+ """
29
+ Get command to call backend as a list of two strings
30
+
31
+ Args:
32
+ file_name (str): The name of the file.
33
+ cores (int, optional): Number of cores used to execute the task. Defaults to 1.
34
+ backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
35
+ exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
36
+ openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
37
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
38
+
39
+ Returns:
40
+ list[str]: List of strings containing the python executable path and the backend script to execute
41
+ """
42
+ command_lst = [sys.executable]
43
+ if cores > 1 and importlib.util.find_spec("mpi4py") is not None:
44
+ if backend is None:
45
+ command_lst = (
46
+ ["mpiexec", "-n", str(cores)]
47
+ + command_lst
48
+ + [get_command_path(executable="cache_parallel.py"), file_name]
49
+ )
50
+ elif backend == "slurm":
51
+ command_prepend = ["srun", "-n", str(cores)]
52
+ if pmi_mode is not None:
53
+ command_prepend += ["--mpi=" + pmi_mode]
54
+ if openmpi_oversubscribe:
55
+ command_prepend += ["--oversubscribe"]
56
+ if exclusive:
57
+ command_prepend += ["--exact"]
58
+ command_lst = (
59
+ command_prepend
60
+ + command_lst
61
+ + [get_command_path(executable="cache_parallel.py"), file_name]
62
+ )
63
+ elif backend == "flux":
64
+ flux_command = ["flux", "run"]
65
+ if pmi_mode is not None:
66
+ flux_command += ["-o", "pmi=" + pmi_mode]
67
+ if openmpi_oversubscribe:
68
+ raise ValueError(
69
+ "The option openmpi_oversubscribe is not available with the flux backend."
70
+ )
71
+ if exclusive:
72
+ raise ValueError(
73
+ "The option exclusive is not available with the flux backend."
74
+ )
75
+ command_lst = (
76
+ flux_command
77
+ + ["-n", str(cores)]
78
+ + command_lst
79
+ + [get_command_path(executable="cache_parallel.py"), file_name]
80
+ )
81
+ else:
82
+ raise ValueError(f"backend should be None, slurm or flux, not {backend}")
83
+ elif cores > 1:
84
+ raise ImportError(
85
+ "mpi4py is required for parallel calculations. Please install mpi4py."
86
+ )
87
+ else:
88
+ command_lst += [get_command_path(executable="cache_serial.py"), file_name]
89
+ return command_lst
90
+
91
+
92
+ def get_interactive_execute_command(
93
+ cores: int,
94
+ ) -> list:
95
+ """
96
+ Get command to call backend as a list of two strings
97
+
98
+ Args:
99
+ cores (int): Number of cores used to execute the task, if it is greater than one use interactive_parallel.py
100
+ else interactive_serial.py
101
+
102
+ Returns:
103
+ list[str]: List of strings containing the python executable path and the backend script to execute
104
+ """
105
+ command_lst = [sys.executable]
106
+ if cores > 1 and importlib.util.find_spec("mpi4py") is not None:
107
+ command_lst += [get_command_path(executable="interactive_parallel.py")]
108
+ elif cores > 1:
109
+ raise ImportError(
110
+ "mpi4py is required for parallel calculations. Please install mpi4py."
111
+ )
112
+ else:
113
+ command_lst += [get_command_path(executable="interactive_serial.py")]
114
+ return command_lst