executorlib 0.0.8__tar.gz → 0.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {executorlib-0.0.8/executorlib.egg-info → executorlib-0.0.10}/PKG-INFO +2 -2
  2. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/__init__.py +15 -12
  3. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/_version.py +3 -3
  4. executorlib-0.0.10/executorlib/interactive/create.py +287 -0
  5. executorlib-0.0.10/executorlib/interactive/executor.py +132 -0
  6. {executorlib-0.0.8 → executorlib-0.0.10/executorlib.egg-info}/PKG-INFO +2 -2
  7. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib.egg-info/SOURCES.txt +1 -0
  8. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib.egg-info/requires.txt +1 -1
  9. {executorlib-0.0.8 → executorlib-0.0.10}/pyproject.toml +1 -1
  10. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_dependencies_executor.py +1 -1
  11. executorlib-0.0.8/executorlib/interactive/executor.py +0 -329
  12. {executorlib-0.0.8 → executorlib-0.0.10}/LICENSE +0 -0
  13. {executorlib-0.0.8 → executorlib-0.0.10}/MANIFEST.in +0 -0
  14. {executorlib-0.0.8 → executorlib-0.0.10}/README.md +0 -0
  15. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/__init__.py +0 -0
  16. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/cache_parallel.py +0 -0
  17. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/cache_serial.py +0 -0
  18. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/interactive_parallel.py +0 -0
  19. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/backend/interactive_serial.py +0 -0
  20. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/base/__init__.py +0 -0
  21. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/base/executor.py +0 -0
  22. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/__init__.py +0 -0
  23. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/backend.py +0 -0
  24. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/executor.py +0 -0
  25. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/queue_spawner.py +0 -0
  26. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/shared.py +0 -0
  27. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/cache/subprocess_spawner.py +0 -0
  28. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/interactive/__init__.py +0 -0
  29. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/interactive/flux.py +0 -0
  30. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/interactive/shared.py +0 -0
  31. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/interactive/slurm.py +0 -0
  32. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/__init__.py +0 -0
  33. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/command.py +0 -0
  34. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/hdf.py +0 -0
  35. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/inputcheck.py +0 -0
  36. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/interactive/__init__.py +0 -0
  37. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/interactive/backend.py +0 -0
  38. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/interactive/communication.py +0 -0
  39. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/interactive/spawner.py +0 -0
  40. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/plot.py +0 -0
  41. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/queue.py +0 -0
  42. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/serialize.py +0 -0
  43. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib/standalone/thread.py +0 -0
  44. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib.egg-info/dependency_links.txt +0 -0
  45. {executorlib-0.0.8 → executorlib-0.0.10}/executorlib.egg-info/top_level.txt +0 -0
  46. {executorlib-0.0.8 → executorlib-0.0.10}/setup.cfg +0 -0
  47. {executorlib-0.0.8 → executorlib-0.0.10}/setup.py +0 -0
  48. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_backend_serial.py +0 -0
  49. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_executor_interactive.py +0 -0
  50. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_executor_mpi.py +0 -0
  51. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_executor_pysqa_flux.py +0 -0
  52. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_executor_serial.py +0 -0
  53. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_hdf.py +0 -0
  54. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_cache_shared.py +0 -0
  55. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_executor_backend_flux.py +0 -0
  56. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_executor_backend_mpi.py +0 -0
  57. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_executor_backend_mpi_noblock.py +0 -0
  58. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_flux_executor.py +0 -0
  59. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_integration_pyiron_workflow.py +0 -0
  60. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_local_executor.py +0 -0
  61. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_local_executor_future.py +0 -0
  62. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_pysqa_subprocess.py +0 -0
  63. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_backend.py +0 -0
  64. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_communication.py +0 -0
  65. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_executorbase.py +0 -0
  66. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_input_check.py +0 -0
  67. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shared_thread.py +0 -0
  68. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shell_executor.py +0 -0
  69. {executorlib-0.0.8 → executorlib-0.0.10}/tests/test_shell_interactive.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: executorlib
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: Scale serial and MPI-parallel python functions over hundreds of compute nodes all from within a jupyter notebook or serial python process.
5
5
  Author-email: Jan Janssen <janssen@lanl.gov>
6
6
  License: BSD 3-Clause License
@@ -51,7 +51,7 @@ Requires-Python: <3.14,>=3.9
51
51
  Description-Content-Type: text/markdown
52
52
  License-File: LICENSE
53
53
  Requires-Dist: cloudpickle<=3.1.1,>=2.0.0
54
- Requires-Dist: pyzmq<=26.2.0,>=25.0.0
54
+ Requires-Dist: pyzmq<=26.2.1,>=25.0.0
55
55
  Provides-Extra: cache
56
56
  Requires-Dist: h5py<=3.12.1,>=3.6.0; extra == "cache"
57
57
  Provides-Extra: graph
@@ -1,10 +1,10 @@
1
1
  from typing import Callable, Optional
2
2
 
3
3
  from executorlib._version import get_versions as _get_versions
4
+ from executorlib.interactive.create import create_executor as _create_executor
4
5
  from executorlib.interactive.executor import (
5
6
  ExecutorWithDependencies as _ExecutorWithDependencies,
6
7
  )
7
- from executorlib.interactive.executor import create_executor as _create_executor
8
8
  from executorlib.standalone.inputcheck import (
9
9
  check_plot_dependency_graph as _check_plot_dependency_graph,
10
10
  )
@@ -212,18 +212,21 @@ class Executor:
212
212
  elif not disable_dependencies:
213
213
  _check_pysqa_config_directory(pysqa_config_directory=pysqa_config_directory)
214
214
  return _ExecutorWithDependencies(
215
- max_workers=max_workers,
216
- backend=backend,
217
- cache_directory=cache_directory,
215
+ executor=_create_executor(
216
+ max_workers=max_workers,
217
+ backend=backend,
218
+ cache_directory=cache_directory,
219
+ max_cores=max_cores,
220
+ resource_dict=resource_dict,
221
+ flux_executor=flux_executor,
222
+ flux_executor_pmi_mode=flux_executor_pmi_mode,
223
+ flux_executor_nesting=flux_executor_nesting,
224
+ flux_log_files=flux_log_files,
225
+ hostname_localhost=hostname_localhost,
226
+ block_allocation=block_allocation,
227
+ init_function=init_function,
228
+ ),
218
229
  max_cores=max_cores,
219
- resource_dict=resource_dict,
220
- flux_executor=flux_executor,
221
- flux_executor_pmi_mode=flux_executor_pmi_mode,
222
- flux_executor_nesting=flux_executor_nesting,
223
- flux_log_files=flux_log_files,
224
- hostname_localhost=hostname_localhost,
225
- block_allocation=block_allocation,
226
- init_function=init_function,
227
230
  refresh_rate=refresh_rate,
228
231
  plot_dependency_graph=plot_dependency_graph,
229
232
  plot_dependency_graph_filename=plot_dependency_graph_filename,
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-01-15T15:37:07+0100",
11
+ "date": "2025-02-01T14:41:16+0100",
12
12
  "dirty": true,
13
13
  "error": null,
14
- "full-revisionid": "ca46b327d9dba74c8e57180646887464cca1a758",
15
- "version": "0.0.8"
14
+ "full-revisionid": "2a5c109632ab691cd7e4309ca43a29354424b091",
15
+ "version": "0.0.10"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -0,0 +1,287 @@
1
+ from typing import Callable, Optional, Union
2
+
3
+ from executorlib.interactive.shared import (
4
+ InteractiveExecutor,
5
+ InteractiveStepExecutor,
6
+ )
7
+ from executorlib.interactive.slurm import SrunSpawner
8
+ from executorlib.interactive.slurm import (
9
+ validate_max_workers as validate_max_workers_slurm,
10
+ )
11
+ from executorlib.standalone.inputcheck import (
12
+ check_command_line_argument_lst,
13
+ check_executor,
14
+ check_flux_log_files,
15
+ check_gpus_per_worker,
16
+ check_init_function,
17
+ check_nested_flux_executor,
18
+ check_oversubscribe,
19
+ check_pmi,
20
+ validate_number_of_cores,
21
+ )
22
+ from executorlib.standalone.interactive.spawner import MpiExecSpawner
23
+
24
+ try: # The PyFluxExecutor requires flux-base to be installed.
25
+ from executorlib.interactive.flux import FluxPythonSpawner
26
+ from executorlib.interactive.flux import (
27
+ validate_max_workers as validate_max_workers_flux,
28
+ )
29
+ except ImportError:
30
+ pass
31
+
32
+
33
+ def create_executor(
34
+ max_workers: Optional[int] = None,
35
+ backend: str = "local",
36
+ max_cores: Optional[int] = None,
37
+ cache_directory: Optional[str] = None,
38
+ resource_dict: dict = {},
39
+ flux_executor=None,
40
+ flux_executor_pmi_mode: Optional[str] = None,
41
+ flux_executor_nesting: bool = False,
42
+ flux_log_files: bool = False,
43
+ hostname_localhost: Optional[bool] = None,
44
+ block_allocation: bool = False,
45
+ init_function: Optional[Callable] = None,
46
+ ) -> Union[InteractiveStepExecutor, InteractiveExecutor]:
47
+ """
48
+ Instead of returning a executorlib.Executor object this function returns either a executorlib.mpi.PyMPIExecutor,
49
+ executorlib.slurm.PySlurmExecutor or executorlib.flux.PyFluxExecutor depending on which backend is available. The
50
+ executorlib.flux.PyFluxExecutor is the preferred choice while the executorlib.mpi.PyMPIExecutor is primarily used
51
+ for development and testing. The executorlib.flux.PyFluxExecutor requires flux-base from the flux-framework to be
52
+ installed and in addition flux-sched to enable GPU scheduling. Finally, the executorlib.slurm.PySlurmExecutor
53
+ requires the SLURM workload manager to be installed on the system.
54
+
55
+ Args:
56
+ max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
57
+ cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
58
+ recommended, as computers have a limited number of compute cores.
59
+ backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
60
+ max_cores (int): defines the number cores which can be used in parallel
61
+ cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
62
+ resource_dict (dict): A dictionary of resources required by the task. With the following keys:
63
+ - cores (int): number of MPI cores to be used for each function call
64
+ - threads_per_core (int): number of OpenMP threads to be used for each function call
65
+ - gpus_per_core (int): number of GPUs per worker - defaults to 0
66
+ - cwd (str/None): current working directory where the parallel python task is executed
67
+ - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
68
+ SLURM only) - default False
69
+ - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
70
+ flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
71
+ flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
72
+ flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
73
+ flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
74
+ hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
75
+ context of an HPC cluster this essential to be able to communicate to an Executor
76
+ running on a different compute node within the same allocation. And in principle
77
+ any computer should be able to resolve that their own hostname points to the same
78
+ address as localhost. Still MacOS >= 12 seems to disable this look up for security
79
+ reasons. So on MacOS it is required to set this option to true
80
+ block_allocation (boolean): To accelerate the submission of a series of python functions with the same
81
+ resource requirements, executorlib supports block allocation. In this case all
82
+ resources have to be defined on the executor, rather than during the submission
83
+ of the individual function.
84
+ init_function (None): optional function to preset arguments for functions which are submitted later
85
+ """
86
+ if flux_executor is not None and backend != "flux_allocation":
87
+ backend = "flux_allocation"
88
+ if backend == "flux_allocation":
89
+ check_init_function(
90
+ block_allocation=block_allocation, init_function=init_function
91
+ )
92
+ check_pmi(backend=backend, pmi=flux_executor_pmi_mode)
93
+ resource_dict["cache_directory"] = cache_directory
94
+ resource_dict["hostname_localhost"] = hostname_localhost
95
+ check_oversubscribe(
96
+ oversubscribe=resource_dict.get("openmpi_oversubscribe", False)
97
+ )
98
+ check_command_line_argument_lst(
99
+ command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
100
+ )
101
+ return create_flux_allocation_executor(
102
+ max_workers=max_workers,
103
+ max_cores=max_cores,
104
+ cache_directory=cache_directory,
105
+ resource_dict=resource_dict,
106
+ flux_executor=flux_executor,
107
+ flux_executor_pmi_mode=flux_executor_pmi_mode,
108
+ flux_executor_nesting=flux_executor_nesting,
109
+ flux_log_files=flux_log_files,
110
+ hostname_localhost=hostname_localhost,
111
+ block_allocation=block_allocation,
112
+ init_function=init_function,
113
+ )
114
+ elif backend == "slurm_allocation":
115
+ check_pmi(backend=backend, pmi=flux_executor_pmi_mode)
116
+ check_executor(executor=flux_executor)
117
+ check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
118
+ check_flux_log_files(flux_log_files=flux_log_files)
119
+ return create_slurm_allocation_executor(
120
+ max_workers=max_workers,
121
+ max_cores=max_cores,
122
+ cache_directory=cache_directory,
123
+ resource_dict=resource_dict,
124
+ hostname_localhost=hostname_localhost,
125
+ block_allocation=block_allocation,
126
+ init_function=init_function,
127
+ )
128
+ elif backend == "local":
129
+ check_pmi(backend=backend, pmi=flux_executor_pmi_mode)
130
+ check_executor(executor=flux_executor)
131
+ check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
132
+ check_flux_log_files(flux_log_files=flux_log_files)
133
+ return create_local_executor(
134
+ max_workers=max_workers,
135
+ max_cores=max_cores,
136
+ cache_directory=cache_directory,
137
+ resource_dict=resource_dict,
138
+ hostname_localhost=hostname_localhost,
139
+ block_allocation=block_allocation,
140
+ init_function=init_function,
141
+ )
142
+ else:
143
+ raise ValueError(
144
+ "The supported backends are slurm_allocation, slurm_submission, flux_allocation, flux_submission and local."
145
+ )
146
+
147
+
148
+ def create_flux_allocation_executor(
149
+ max_workers: Optional[int] = None,
150
+ max_cores: Optional[int] = None,
151
+ cache_directory: Optional[str] = None,
152
+ resource_dict: dict = {},
153
+ flux_executor=None,
154
+ flux_executor_pmi_mode: Optional[str] = None,
155
+ flux_executor_nesting: bool = False,
156
+ flux_log_files: bool = False,
157
+ hostname_localhost: Optional[bool] = None,
158
+ block_allocation: bool = False,
159
+ init_function: Optional[Callable] = None,
160
+ ) -> Union[InteractiveStepExecutor, InteractiveExecutor]:
161
+ check_init_function(block_allocation=block_allocation, init_function=init_function)
162
+ check_pmi(backend="flux_allocation", pmi=flux_executor_pmi_mode)
163
+ cores_per_worker = resource_dict.get("cores", 1)
164
+ resource_dict["cache_directory"] = cache_directory
165
+ resource_dict["hostname_localhost"] = hostname_localhost
166
+ check_oversubscribe(oversubscribe=resource_dict.get("openmpi_oversubscribe", False))
167
+ check_command_line_argument_lst(
168
+ command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
169
+ )
170
+ if "openmpi_oversubscribe" in resource_dict.keys():
171
+ del resource_dict["openmpi_oversubscribe"]
172
+ if "slurm_cmd_args" in resource_dict.keys():
173
+ del resource_dict["slurm_cmd_args"]
174
+ resource_dict["flux_executor"] = flux_executor
175
+ resource_dict["flux_executor_pmi_mode"] = flux_executor_pmi_mode
176
+ resource_dict["flux_executor_nesting"] = flux_executor_nesting
177
+ resource_dict["flux_log_files"] = flux_log_files
178
+ if block_allocation:
179
+ resource_dict["init_function"] = init_function
180
+ max_workers = validate_number_of_cores(
181
+ max_cores=max_cores,
182
+ max_workers=max_workers,
183
+ cores_per_worker=cores_per_worker,
184
+ set_local_cores=False,
185
+ )
186
+ validate_max_workers_flux(
187
+ max_workers=max_workers,
188
+ cores=cores_per_worker,
189
+ threads_per_core=resource_dict.get("threads_per_core", 1),
190
+ )
191
+ return InteractiveExecutor(
192
+ max_workers=max_workers,
193
+ executor_kwargs=resource_dict,
194
+ spawner=FluxPythonSpawner,
195
+ )
196
+ else:
197
+ return InteractiveStepExecutor(
198
+ max_cores=max_cores,
199
+ max_workers=max_workers,
200
+ executor_kwargs=resource_dict,
201
+ spawner=FluxPythonSpawner,
202
+ )
203
+
204
+
205
+ def create_slurm_allocation_executor(
206
+ max_workers: Optional[int] = None,
207
+ max_cores: Optional[int] = None,
208
+ cache_directory: Optional[str] = None,
209
+ resource_dict: dict = {},
210
+ hostname_localhost: Optional[bool] = None,
211
+ block_allocation: bool = False,
212
+ init_function: Optional[Callable] = None,
213
+ ) -> Union[InteractiveStepExecutor, InteractiveExecutor]:
214
+ check_init_function(block_allocation=block_allocation, init_function=init_function)
215
+ cores_per_worker = resource_dict.get("cores", 1)
216
+ resource_dict["cache_directory"] = cache_directory
217
+ resource_dict["hostname_localhost"] = hostname_localhost
218
+ if block_allocation:
219
+ resource_dict["init_function"] = init_function
220
+ max_workers = validate_number_of_cores(
221
+ max_cores=max_cores,
222
+ max_workers=max_workers,
223
+ cores_per_worker=cores_per_worker,
224
+ set_local_cores=False,
225
+ )
226
+ validate_max_workers_slurm(
227
+ max_workers=max_workers,
228
+ cores=cores_per_worker,
229
+ threads_per_core=resource_dict.get("threads_per_core", 1),
230
+ )
231
+ return InteractiveExecutor(
232
+ max_workers=max_workers,
233
+ executor_kwargs=resource_dict,
234
+ spawner=SrunSpawner,
235
+ )
236
+ else:
237
+ return InteractiveStepExecutor(
238
+ max_cores=max_cores,
239
+ max_workers=max_workers,
240
+ executor_kwargs=resource_dict,
241
+ spawner=SrunSpawner,
242
+ )
243
+
244
+
245
+ def create_local_executor(
246
+ max_workers: Optional[int] = None,
247
+ max_cores: Optional[int] = None,
248
+ cache_directory: Optional[str] = None,
249
+ resource_dict: dict = {},
250
+ hostname_localhost: Optional[bool] = None,
251
+ block_allocation: bool = False,
252
+ init_function: Optional[Callable] = None,
253
+ ) -> Union[InteractiveStepExecutor, InteractiveExecutor]:
254
+ check_init_function(block_allocation=block_allocation, init_function=init_function)
255
+ cores_per_worker = resource_dict.get("cores", 1)
256
+ resource_dict["cache_directory"] = cache_directory
257
+ resource_dict["hostname_localhost"] = hostname_localhost
258
+
259
+ check_gpus_per_worker(gpus_per_worker=resource_dict.get("gpus_per_core", 0))
260
+ check_command_line_argument_lst(
261
+ command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
262
+ )
263
+ if "threads_per_core" in resource_dict.keys():
264
+ del resource_dict["threads_per_core"]
265
+ if "gpus_per_core" in resource_dict.keys():
266
+ del resource_dict["gpus_per_core"]
267
+ if "slurm_cmd_args" in resource_dict.keys():
268
+ del resource_dict["slurm_cmd_args"]
269
+ if block_allocation:
270
+ resource_dict["init_function"] = init_function
271
+ return InteractiveExecutor(
272
+ max_workers=validate_number_of_cores(
273
+ max_cores=max_cores,
274
+ max_workers=max_workers,
275
+ cores_per_worker=cores_per_worker,
276
+ set_local_cores=True,
277
+ ),
278
+ executor_kwargs=resource_dict,
279
+ spawner=MpiExecSpawner,
280
+ )
281
+ else:
282
+ return InteractiveStepExecutor(
283
+ max_cores=max_cores,
284
+ max_workers=max_workers,
285
+ executor_kwargs=resource_dict,
286
+ spawner=MpiExecSpawner,
287
+ )
@@ -0,0 +1,132 @@
1
+ from concurrent.futures import Future
2
+ from typing import Any, Callable, Dict, Optional
3
+
4
+ from executorlib.base.executor import ExecutorBase
5
+ from executorlib.interactive.shared import execute_tasks_with_dependencies
6
+ from executorlib.standalone.plot import (
7
+ draw,
8
+ generate_nodes_and_edges,
9
+ generate_task_hash,
10
+ )
11
+ from executorlib.standalone.thread import RaisingThread
12
+
13
+
14
+ class ExecutorWithDependencies(ExecutorBase):
15
+ """
16
+ ExecutorWithDependencies is a class that extends ExecutorBase and provides functionality for executing tasks with
17
+ dependencies.
18
+
19
+ Args:
20
+ refresh_rate (float, optional): The refresh rate for updating the executor queue. Defaults to 0.01.
21
+ plot_dependency_graph (bool, optional): Whether to generate and plot the dependency graph. Defaults to False.
22
+ plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
23
+ *args: Variable length argument list.
24
+ **kwargs: Arbitrary keyword arguments.
25
+
26
+ Attributes:
27
+ _future_hash_dict (Dict[str, Future]): A dictionary mapping task hash to future object.
28
+ _task_hash_dict (Dict[str, Dict]): A dictionary mapping task hash to task dictionary.
29
+ _generate_dependency_graph (bool): Whether to generate the dependency graph.
30
+ _generate_dependency_graph (str): Name of the file to store the plotted graph in.
31
+
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ executor: ExecutorBase,
37
+ max_cores: Optional[int] = None,
38
+ refresh_rate: float = 0.01,
39
+ plot_dependency_graph: bool = False,
40
+ plot_dependency_graph_filename: Optional[str] = None,
41
+ ) -> None:
42
+ super().__init__(max_cores=max_cores)
43
+ self._set_process(
44
+ RaisingThread(
45
+ target=execute_tasks_with_dependencies,
46
+ kwargs={
47
+ # Executor Arguments
48
+ "future_queue": self._future_queue,
49
+ "executor_queue": executor._future_queue,
50
+ "executor": executor,
51
+ "refresh_rate": refresh_rate,
52
+ },
53
+ )
54
+ )
55
+ self._future_hash_dict: dict = {}
56
+ self._task_hash_dict: dict = {}
57
+ self._plot_dependency_graph_filename = plot_dependency_graph_filename
58
+ if plot_dependency_graph_filename is None:
59
+ self._generate_dependency_graph = plot_dependency_graph
60
+ else:
61
+ self._generate_dependency_graph = True
62
+
63
+ def submit( # type: ignore
64
+ self,
65
+ fn: Callable[..., Any],
66
+ *args: Any,
67
+ resource_dict: Dict[str, Any] = {},
68
+ **kwargs: Any,
69
+ ) -> Future:
70
+ """
71
+ Submits a task to the executor.
72
+
73
+ Args:
74
+ fn (Callable): The function to be executed.
75
+ *args: Variable length argument list.
76
+ resource_dict (dict, optional): A dictionary of resources required by the task. Defaults to {}.
77
+ **kwargs: Arbitrary keyword arguments.
78
+
79
+ Returns:
80
+ Future: A future object representing the result of the task.
81
+
82
+ """
83
+ if not self._generate_dependency_graph:
84
+ f = super().submit(fn, *args, resource_dict=resource_dict, **kwargs)
85
+ else:
86
+ f = Future()
87
+ f.set_result(None)
88
+ task_dict = {
89
+ "fn": fn,
90
+ "args": args,
91
+ "kwargs": kwargs,
92
+ "future": f,
93
+ "resource_dict": resource_dict,
94
+ }
95
+ task_hash = generate_task_hash(
96
+ task_dict=task_dict,
97
+ future_hash_inverse_dict={
98
+ v: k for k, v in self._future_hash_dict.items()
99
+ },
100
+ )
101
+ self._future_hash_dict[task_hash] = f
102
+ self._task_hash_dict[task_hash] = task_dict
103
+ return f
104
+
105
+ def __exit__(
106
+ self,
107
+ exc_type: Any,
108
+ exc_val: Any,
109
+ exc_tb: Any,
110
+ ) -> None:
111
+ """
112
+ Exit method called when exiting the context manager.
113
+
114
+ Args:
115
+ exc_type: The type of the exception.
116
+ exc_val: The exception instance.
117
+ exc_tb: The traceback object.
118
+
119
+ """
120
+ super().__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb) # type: ignore
121
+ if self._generate_dependency_graph:
122
+ node_lst, edge_lst = generate_nodes_and_edges(
123
+ task_hash_dict=self._task_hash_dict,
124
+ future_hash_inverse_dict={
125
+ v: k for k, v in self._future_hash_dict.items()
126
+ },
127
+ )
128
+ return draw(
129
+ node_lst=node_lst,
130
+ edge_lst=edge_lst,
131
+ filename=self._plot_dependency_graph_filename,
132
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: executorlib
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: Scale serial and MPI-parallel python functions over hundreds of compute nodes all from within a jupyter notebook or serial python process.
5
5
  Author-email: Jan Janssen <janssen@lanl.gov>
6
6
  License: BSD 3-Clause License
@@ -51,7 +51,7 @@ Requires-Python: <3.14,>=3.9
51
51
  Description-Content-Type: text/markdown
52
52
  License-File: LICENSE
53
53
  Requires-Dist: cloudpickle<=3.1.1,>=2.0.0
54
- Requires-Dist: pyzmq<=26.2.0,>=25.0.0
54
+ Requires-Dist: pyzmq<=26.2.1,>=25.0.0
55
55
  Provides-Extra: cache
56
56
  Requires-Dist: h5py<=3.12.1,>=3.6.0; extra == "cache"
57
57
  Provides-Extra: graph
@@ -24,6 +24,7 @@ executorlib/cache/queue_spawner.py
24
24
  executorlib/cache/shared.py
25
25
  executorlib/cache/subprocess_spawner.py
26
26
  executorlib/interactive/__init__.py
27
+ executorlib/interactive/create.py
27
28
  executorlib/interactive/executor.py
28
29
  executorlib/interactive/flux.py
29
30
  executorlib/interactive/shared.py
@@ -1,5 +1,5 @@
1
1
  cloudpickle<=3.1.1,>=2.0.0
2
- pyzmq<=26.2.0,>=25.0.0
2
+ pyzmq<=26.2.1,>=25.0.0
3
3
 
4
4
  [all]
5
5
  mpi4py<=4.0.1,>=3.1.4
@@ -26,7 +26,7 @@ classifiers = [
26
26
  ]
27
27
  dependencies = [
28
28
  "cloudpickle>=2.0.0,<=3.1.1",
29
- "pyzmq>=25.0.0,<=26.2.0",
29
+ "pyzmq>=25.0.0,<=26.2.1",
30
30
  ]
31
31
  dynamic = ["version"]
32
32
 
@@ -5,7 +5,7 @@ from time import sleep
5
5
  from queue import Queue
6
6
 
7
7
  from executorlib import Executor
8
- from executorlib.interactive.executor import create_executor
8
+ from executorlib.interactive.create import create_executor
9
9
  from executorlib.interactive.shared import execute_tasks_with_dependencies
10
10
  from executorlib.standalone.plot import generate_nodes_and_edges
11
11
  from executorlib.standalone.serialize import cloudpickle_register
@@ -1,329 +0,0 @@
1
- from concurrent.futures import Future
2
- from typing import Any, Callable, Dict, Optional
3
-
4
- from executorlib.base.executor import ExecutorBase
5
- from executorlib.interactive.shared import (
6
- InteractiveExecutor,
7
- InteractiveStepExecutor,
8
- execute_tasks_with_dependencies,
9
- )
10
- from executorlib.interactive.slurm import SrunSpawner
11
- from executorlib.interactive.slurm import (
12
- validate_max_workers as validate_max_workers_slurm,
13
- )
14
- from executorlib.standalone.inputcheck import (
15
- check_command_line_argument_lst,
16
- check_executor,
17
- check_flux_log_files,
18
- check_gpus_per_worker,
19
- check_init_function,
20
- check_nested_flux_executor,
21
- check_oversubscribe,
22
- check_pmi,
23
- validate_number_of_cores,
24
- )
25
- from executorlib.standalone.interactive.spawner import MpiExecSpawner
26
- from executorlib.standalone.plot import (
27
- draw,
28
- generate_nodes_and_edges,
29
- generate_task_hash,
30
- )
31
- from executorlib.standalone.thread import RaisingThread
32
-
33
- try: # The PyFluxExecutor requires flux-base to be installed.
34
- from executorlib.interactive.flux import FluxPythonSpawner
35
- from executorlib.interactive.flux import (
36
- validate_max_workers as validate_max_workers_flux,
37
- )
38
- except ImportError:
39
- pass
40
-
41
-
42
- class ExecutorWithDependencies(ExecutorBase):
43
- """
44
- ExecutorWithDependencies is a class that extends ExecutorBase and provides functionality for executing tasks with
45
- dependencies.
46
-
47
- Args:
48
- refresh_rate (float, optional): The refresh rate for updating the executor queue. Defaults to 0.01.
49
- plot_dependency_graph (bool, optional): Whether to generate and plot the dependency graph. Defaults to False.
50
- plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
51
- *args: Variable length argument list.
52
- **kwargs: Arbitrary keyword arguments.
53
-
54
- Attributes:
55
- _future_hash_dict (Dict[str, Future]): A dictionary mapping task hash to future object.
56
- _task_hash_dict (Dict[str, Dict]): A dictionary mapping task hash to task dictionary.
57
- _generate_dependency_graph (bool): Whether to generate the dependency graph.
58
- _generate_dependency_graph (str): Name of the file to store the plotted graph in.
59
-
60
- """
61
-
62
- def __init__(
63
- self,
64
- *args: Any,
65
- refresh_rate: float = 0.01,
66
- plot_dependency_graph: bool = False,
67
- plot_dependency_graph_filename: Optional[str] = None,
68
- **kwargs: Any,
69
- ) -> None:
70
- super().__init__(max_cores=kwargs.get("max_cores", None))
71
- executor = create_executor(*args, **kwargs)
72
- self._set_process(
73
- RaisingThread(
74
- target=execute_tasks_with_dependencies,
75
- kwargs={
76
- # Executor Arguments
77
- "future_queue": self._future_queue,
78
- "executor_queue": executor._future_queue,
79
- "executor": executor,
80
- "refresh_rate": refresh_rate,
81
- },
82
- )
83
- )
84
- self._future_hash_dict: dict = {}
85
- self._task_hash_dict: dict = {}
86
- self._plot_dependency_graph_filename = plot_dependency_graph_filename
87
- if plot_dependency_graph_filename is None:
88
- self._generate_dependency_graph = plot_dependency_graph
89
- else:
90
- self._generate_dependency_graph = True
91
-
92
- def submit( # type: ignore
93
- self,
94
- fn: Callable[..., Any],
95
- *args: Any,
96
- resource_dict: Dict[str, Any] = {},
97
- **kwargs: Any,
98
- ) -> Future:
99
- """
100
- Submits a task to the executor.
101
-
102
- Args:
103
- fn (Callable): The function to be executed.
104
- *args: Variable length argument list.
105
- resource_dict (dict, optional): A dictionary of resources required by the task. Defaults to {}.
106
- **kwargs: Arbitrary keyword arguments.
107
-
108
- Returns:
109
- Future: A future object representing the result of the task.
110
-
111
- """
112
- if not self._generate_dependency_graph:
113
- f = super().submit(fn, *args, resource_dict=resource_dict, **kwargs)
114
- else:
115
- f = Future()
116
- f.set_result(None)
117
- task_dict = {
118
- "fn": fn,
119
- "args": args,
120
- "kwargs": kwargs,
121
- "future": f,
122
- "resource_dict": resource_dict,
123
- }
124
- task_hash = generate_task_hash(
125
- task_dict=task_dict,
126
- future_hash_inverse_dict={
127
- v: k for k, v in self._future_hash_dict.items()
128
- },
129
- )
130
- self._future_hash_dict[task_hash] = f
131
- self._task_hash_dict[task_hash] = task_dict
132
- return f
133
-
134
- def __exit__(
135
- self,
136
- exc_type: Any,
137
- exc_val: Any,
138
- exc_tb: Any,
139
- ) -> None:
140
- """
141
- Exit method called when exiting the context manager.
142
-
143
- Args:
144
- exc_type: The type of the exception.
145
- exc_val: The exception instance.
146
- exc_tb: The traceback object.
147
-
148
- """
149
- super().__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb) # type: ignore
150
- if self._generate_dependency_graph:
151
- node_lst, edge_lst = generate_nodes_and_edges(
152
- task_hash_dict=self._task_hash_dict,
153
- future_hash_inverse_dict={
154
- v: k for k, v in self._future_hash_dict.items()
155
- },
156
- )
157
- return draw(
158
- node_lst=node_lst,
159
- edge_lst=edge_lst,
160
- filename=self._plot_dependency_graph_filename,
161
- )
162
-
163
-
164
- def create_executor(
165
- max_workers: Optional[int] = None,
166
- backend: str = "local",
167
- max_cores: Optional[int] = None,
168
- cache_directory: Optional[str] = None,
169
- resource_dict: dict = {},
170
- flux_executor=None,
171
- flux_executor_pmi_mode: Optional[str] = None,
172
- flux_executor_nesting: bool = False,
173
- flux_log_files: bool = False,
174
- hostname_localhost: Optional[bool] = None,
175
- block_allocation: bool = False,
176
- init_function: Optional[Callable] = None,
177
- ):
178
- """
179
- Instead of returning a executorlib.Executor object this function returns either a executorlib.mpi.PyMPIExecutor,
180
- executorlib.slurm.PySlurmExecutor or executorlib.flux.PyFluxExecutor depending on which backend is available. The
181
- executorlib.flux.PyFluxExecutor is the preferred choice while the executorlib.mpi.PyMPIExecutor is primarily used
182
- for development and testing. The executorlib.flux.PyFluxExecutor requires flux-base from the flux-framework to be
183
- installed and in addition flux-sched to enable GPU scheduling. Finally, the executorlib.slurm.PySlurmExecutor
184
- requires the SLURM workload manager to be installed on the system.
185
-
186
- Args:
187
- max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
188
- cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
189
- recommended, as computers have a limited number of compute cores.
190
- backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
191
- max_cores (int): defines the number cores which can be used in parallel
192
- cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
193
- resource_dict (dict): A dictionary of resources required by the task. With the following keys:
194
- - cores (int): number of MPI cores to be used for each function call
195
- - threads_per_core (int): number of OpenMP threads to be used for each function call
196
- - gpus_per_core (int): number of GPUs per worker - defaults to 0
197
- - cwd (str/None): current working directory where the parallel python task is executed
198
- - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
199
- SLURM only) - default False
200
- - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
201
- flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
202
- flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
203
- flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
204
- flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
205
- hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
206
- context of an HPC cluster this essential to be able to communicate to an Executor
207
- running on a different compute node within the same allocation. And in principle
208
- any computer should be able to resolve that their own hostname points to the same
209
- address as localhost. Still MacOS >= 12 seems to disable this look up for security
210
- reasons. So on MacOS it is required to set this option to true
211
- block_allocation (boolean): To accelerate the submission of a series of python functions with the same
212
- resource requirements, executorlib supports block allocation. In this case all
213
- resources have to be defined on the executor, rather than during the submission
214
- of the individual function.
215
- init_function (None): optional function to preset arguments for functions which are submitted later
216
- """
217
- check_init_function(block_allocation=block_allocation, init_function=init_function)
218
- if flux_executor is not None and backend != "flux_allocation":
219
- backend = "flux_allocation"
220
- check_pmi(backend=backend, pmi=flux_executor_pmi_mode)
221
- cores_per_worker = resource_dict.get("cores", 1)
222
- resource_dict["cache_directory"] = cache_directory
223
- resource_dict["hostname_localhost"] = hostname_localhost
224
- if backend == "flux_allocation":
225
- check_oversubscribe(
226
- oversubscribe=resource_dict.get("openmpi_oversubscribe", False)
227
- )
228
- check_command_line_argument_lst(
229
- command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
230
- )
231
- if "openmpi_oversubscribe" in resource_dict.keys():
232
- del resource_dict["openmpi_oversubscribe"]
233
- if "slurm_cmd_args" in resource_dict.keys():
234
- del resource_dict["slurm_cmd_args"]
235
- resource_dict["flux_executor"] = flux_executor
236
- resource_dict["flux_executor_pmi_mode"] = flux_executor_pmi_mode
237
- resource_dict["flux_executor_nesting"] = flux_executor_nesting
238
- resource_dict["flux_log_files"] = flux_log_files
239
- if block_allocation:
240
- resource_dict["init_function"] = init_function
241
- max_workers = validate_number_of_cores(
242
- max_cores=max_cores,
243
- max_workers=max_workers,
244
- cores_per_worker=cores_per_worker,
245
- set_local_cores=False,
246
- )
247
- validate_max_workers_flux(
248
- max_workers=max_workers,
249
- cores=cores_per_worker,
250
- threads_per_core=resource_dict.get("threads_per_core", 1),
251
- )
252
- return InteractiveExecutor(
253
- max_workers=max_workers,
254
- executor_kwargs=resource_dict,
255
- spawner=FluxPythonSpawner,
256
- )
257
- else:
258
- return InteractiveStepExecutor(
259
- max_cores=max_cores,
260
- max_workers=max_workers,
261
- executor_kwargs=resource_dict,
262
- spawner=FluxPythonSpawner,
263
- )
264
- elif backend == "slurm_allocation":
265
- check_executor(executor=flux_executor)
266
- check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
267
- check_flux_log_files(flux_log_files=flux_log_files)
268
- if block_allocation:
269
- resource_dict["init_function"] = init_function
270
- max_workers = validate_number_of_cores(
271
- max_cores=max_cores,
272
- max_workers=max_workers,
273
- cores_per_worker=cores_per_worker,
274
- set_local_cores=False,
275
- )
276
- validate_max_workers_slurm(
277
- max_workers=max_workers,
278
- cores=cores_per_worker,
279
- threads_per_core=resource_dict.get("threads_per_core", 1),
280
- )
281
- return InteractiveExecutor(
282
- max_workers=max_workers,
283
- executor_kwargs=resource_dict,
284
- spawner=SrunSpawner,
285
- )
286
- else:
287
- return InteractiveStepExecutor(
288
- max_cores=max_cores,
289
- max_workers=max_workers,
290
- executor_kwargs=resource_dict,
291
- spawner=SrunSpawner,
292
- )
293
- elif backend == "local":
294
- check_executor(executor=flux_executor)
295
- check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
296
- check_flux_log_files(flux_log_files=flux_log_files)
297
- check_gpus_per_worker(gpus_per_worker=resource_dict.get("gpus_per_core", 0))
298
- check_command_line_argument_lst(
299
- command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
300
- )
301
- if "threads_per_core" in resource_dict.keys():
302
- del resource_dict["threads_per_core"]
303
- if "gpus_per_core" in resource_dict.keys():
304
- del resource_dict["gpus_per_core"]
305
- if "slurm_cmd_args" in resource_dict.keys():
306
- del resource_dict["slurm_cmd_args"]
307
- if block_allocation:
308
- resource_dict["init_function"] = init_function
309
- return InteractiveExecutor(
310
- max_workers=validate_number_of_cores(
311
- max_cores=max_cores,
312
- max_workers=max_workers,
313
- cores_per_worker=cores_per_worker,
314
- set_local_cores=True,
315
- ),
316
- executor_kwargs=resource_dict,
317
- spawner=MpiExecSpawner,
318
- )
319
- else:
320
- return InteractiveStepExecutor(
321
- max_cores=max_cores,
322
- max_workers=max_workers,
323
- executor_kwargs=resource_dict,
324
- spawner=MpiExecSpawner,
325
- )
326
- else:
327
- raise ValueError(
328
- "The supported backends are slurm_allocation, slurm_submission, flux_allocation, flux_submission and local."
329
- )
File without changes
File without changes
File without changes
File without changes
File without changes