executorlib 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. executorlib/__init__.py +248 -0
  2. executorlib/_version.py +716 -0
  3. executorlib/backend/__init__.py +0 -0
  4. executorlib/backend/cache_parallel.py +57 -0
  5. executorlib/backend/cache_serial.py +6 -0
  6. executorlib/backend/interactive_parallel.py +99 -0
  7. executorlib/backend/interactive_serial.py +74 -0
  8. executorlib/base/__init__.py +0 -0
  9. executorlib/base/executor.py +167 -0
  10. executorlib/cache/__init__.py +0 -0
  11. executorlib/cache/backend.py +75 -0
  12. executorlib/cache/executor.py +121 -0
  13. executorlib/cache/queue_spawner.py +109 -0
  14. executorlib/cache/shared.py +249 -0
  15. executorlib/cache/subprocess_spawner.py +65 -0
  16. executorlib/interactive/__init__.py +0 -0
  17. executorlib/interactive/executor.py +329 -0
  18. executorlib/interactive/flux.py +135 -0
  19. executorlib/interactive/shared.py +657 -0
  20. executorlib/interactive/slurm.py +109 -0
  21. executorlib/standalone/__init__.py +21 -0
  22. executorlib/standalone/command.py +14 -0
  23. executorlib/standalone/hdf.py +116 -0
  24. executorlib/standalone/inputcheck.py +201 -0
  25. executorlib/standalone/interactive/__init__.py +0 -0
  26. executorlib/standalone/interactive/backend.py +98 -0
  27. executorlib/standalone/interactive/communication.py +213 -0
  28. executorlib/standalone/interactive/spawner.py +174 -0
  29. executorlib/standalone/plot.py +134 -0
  30. executorlib/standalone/queue.py +19 -0
  31. executorlib/standalone/serialize.py +82 -0
  32. executorlib/standalone/thread.py +42 -0
  33. executorlib-0.0.8.dist-info/LICENSE +29 -0
  34. executorlib-0.0.8.dist-info/METADATA +230 -0
  35. executorlib-0.0.8.dist-info/RECORD +37 -0
  36. executorlib-0.0.8.dist-info/WHEEL +5 -0
  37. executorlib-0.0.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,329 @@
1
+ from concurrent.futures import Future
2
+ from typing import Any, Callable, Dict, Optional
3
+
4
+ from executorlib.base.executor import ExecutorBase
5
+ from executorlib.interactive.shared import (
6
+ InteractiveExecutor,
7
+ InteractiveStepExecutor,
8
+ execute_tasks_with_dependencies,
9
+ )
10
+ from executorlib.interactive.slurm import SrunSpawner
11
+ from executorlib.interactive.slurm import (
12
+ validate_max_workers as validate_max_workers_slurm,
13
+ )
14
+ from executorlib.standalone.inputcheck import (
15
+ check_command_line_argument_lst,
16
+ check_executor,
17
+ check_flux_log_files,
18
+ check_gpus_per_worker,
19
+ check_init_function,
20
+ check_nested_flux_executor,
21
+ check_oversubscribe,
22
+ check_pmi,
23
+ validate_number_of_cores,
24
+ )
25
+ from executorlib.standalone.interactive.spawner import MpiExecSpawner
26
+ from executorlib.standalone.plot import (
27
+ draw,
28
+ generate_nodes_and_edges,
29
+ generate_task_hash,
30
+ )
31
+ from executorlib.standalone.thread import RaisingThread
32
+
33
+ try: # The PyFluxExecutor requires flux-base to be installed.
34
+ from executorlib.interactive.flux import FluxPythonSpawner
35
+ from executorlib.interactive.flux import (
36
+ validate_max_workers as validate_max_workers_flux,
37
+ )
38
+ except ImportError:
39
+ pass
40
+
41
+
42
+ class ExecutorWithDependencies(ExecutorBase):
43
+ """
44
+ ExecutorWithDependencies is a class that extends ExecutorBase and provides functionality for executing tasks with
45
+ dependencies.
46
+
47
+ Args:
48
+ refresh_rate (float, optional): The refresh rate for updating the executor queue. Defaults to 0.01.
49
+ plot_dependency_graph (bool, optional): Whether to generate and plot the dependency graph. Defaults to False.
50
+ plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
51
+ *args: Variable length argument list.
52
+ **kwargs: Arbitrary keyword arguments.
53
+
54
+ Attributes:
55
+ _future_hash_dict (Dict[str, Future]): A dictionary mapping task hash to future object.
56
+ _task_hash_dict (Dict[str, Dict]): A dictionary mapping task hash to task dictionary.
57
+ _generate_dependency_graph (bool): Whether to generate the dependency graph.
58
+ _generate_dependency_graph (str): Name of the file to store the plotted graph in.
59
+
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ *args: Any,
65
+ refresh_rate: float = 0.01,
66
+ plot_dependency_graph: bool = False,
67
+ plot_dependency_graph_filename: Optional[str] = None,
68
+ **kwargs: Any,
69
+ ) -> None:
70
+ super().__init__(max_cores=kwargs.get("max_cores", None))
71
+ executor = create_executor(*args, **kwargs)
72
+ self._set_process(
73
+ RaisingThread(
74
+ target=execute_tasks_with_dependencies,
75
+ kwargs={
76
+ # Executor Arguments
77
+ "future_queue": self._future_queue,
78
+ "executor_queue": executor._future_queue,
79
+ "executor": executor,
80
+ "refresh_rate": refresh_rate,
81
+ },
82
+ )
83
+ )
84
+ self._future_hash_dict: dict = {}
85
+ self._task_hash_dict: dict = {}
86
+ self._plot_dependency_graph_filename = plot_dependency_graph_filename
87
+ if plot_dependency_graph_filename is None:
88
+ self._generate_dependency_graph = plot_dependency_graph
89
+ else:
90
+ self._generate_dependency_graph = True
91
+
92
+ def submit( # type: ignore
93
+ self,
94
+ fn: Callable[..., Any],
95
+ *args: Any,
96
+ resource_dict: Dict[str, Any] = {},
97
+ **kwargs: Any,
98
+ ) -> Future:
99
+ """
100
+ Submits a task to the executor.
101
+
102
+ Args:
103
+ fn (Callable): The function to be executed.
104
+ *args: Variable length argument list.
105
+ resource_dict (dict, optional): A dictionary of resources required by the task. Defaults to {}.
106
+ **kwargs: Arbitrary keyword arguments.
107
+
108
+ Returns:
109
+ Future: A future object representing the result of the task.
110
+
111
+ """
112
+ if not self._generate_dependency_graph:
113
+ f = super().submit(fn, *args, resource_dict=resource_dict, **kwargs)
114
+ else:
115
+ f = Future()
116
+ f.set_result(None)
117
+ task_dict = {
118
+ "fn": fn,
119
+ "args": args,
120
+ "kwargs": kwargs,
121
+ "future": f,
122
+ "resource_dict": resource_dict,
123
+ }
124
+ task_hash = generate_task_hash(
125
+ task_dict=task_dict,
126
+ future_hash_inverse_dict={
127
+ v: k for k, v in self._future_hash_dict.items()
128
+ },
129
+ )
130
+ self._future_hash_dict[task_hash] = f
131
+ self._task_hash_dict[task_hash] = task_dict
132
+ return f
133
+
134
+ def __exit__(
135
+ self,
136
+ exc_type: Any,
137
+ exc_val: Any,
138
+ exc_tb: Any,
139
+ ) -> None:
140
+ """
141
+ Exit method called when exiting the context manager.
142
+
143
+ Args:
144
+ exc_type: The type of the exception.
145
+ exc_val: The exception instance.
146
+ exc_tb: The traceback object.
147
+
148
+ """
149
+ super().__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb) # type: ignore
150
+ if self._generate_dependency_graph:
151
+ node_lst, edge_lst = generate_nodes_and_edges(
152
+ task_hash_dict=self._task_hash_dict,
153
+ future_hash_inverse_dict={
154
+ v: k for k, v in self._future_hash_dict.items()
155
+ },
156
+ )
157
+ return draw(
158
+ node_lst=node_lst,
159
+ edge_lst=edge_lst,
160
+ filename=self._plot_dependency_graph_filename,
161
+ )
162
+
163
+
164
+ def create_executor(
165
+ max_workers: Optional[int] = None,
166
+ backend: str = "local",
167
+ max_cores: Optional[int] = None,
168
+ cache_directory: Optional[str] = None,
169
+ resource_dict: dict = {},
170
+ flux_executor=None,
171
+ flux_executor_pmi_mode: Optional[str] = None,
172
+ flux_executor_nesting: bool = False,
173
+ flux_log_files: bool = False,
174
+ hostname_localhost: Optional[bool] = None,
175
+ block_allocation: bool = False,
176
+ init_function: Optional[Callable] = None,
177
+ ):
178
+ """
179
+ Instead of returning a executorlib.Executor object this function returns either a executorlib.mpi.PyMPIExecutor,
180
+ executorlib.slurm.PySlurmExecutor or executorlib.flux.PyFluxExecutor depending on which backend is available. The
181
+ executorlib.flux.PyFluxExecutor is the preferred choice while the executorlib.mpi.PyMPIExecutor is primarily used
182
+ for development and testing. The executorlib.flux.PyFluxExecutor requires flux-base from the flux-framework to be
183
+ installed and in addition flux-sched to enable GPU scheduling. Finally, the executorlib.slurm.PySlurmExecutor
184
+ requires the SLURM workload manager to be installed on the system.
185
+
186
+ Args:
187
+ max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
188
+ cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
189
+ recommended, as computers have a limited number of compute cores.
190
+ backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
191
+ max_cores (int): defines the number cores which can be used in parallel
192
+ cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
193
+ resource_dict (dict): A dictionary of resources required by the task. With the following keys:
194
+ - cores (int): number of MPI cores to be used for each function call
195
+ - threads_per_core (int): number of OpenMP threads to be used for each function call
196
+ - gpus_per_core (int): number of GPUs per worker - defaults to 0
197
+ - cwd (str/None): current working directory where the parallel python task is executed
198
+ - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and
199
+ SLURM only) - default False
200
+ - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
201
+ flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
202
+ flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
203
+ flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
204
+ flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
205
+ hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
206
+ context of an HPC cluster this essential to be able to communicate to an Executor
207
+ running on a different compute node within the same allocation. And in principle
208
+ any computer should be able to resolve that their own hostname points to the same
209
+ address as localhost. Still MacOS >= 12 seems to disable this look up for security
210
+ reasons. So on MacOS it is required to set this option to true
211
+ block_allocation (boolean): To accelerate the submission of a series of python functions with the same
212
+ resource requirements, executorlib supports block allocation. In this case all
213
+ resources have to be defined on the executor, rather than during the submission
214
+ of the individual function.
215
+ init_function (None): optional function to preset arguments for functions which are submitted later
216
+ """
217
+ check_init_function(block_allocation=block_allocation, init_function=init_function)
218
+ if flux_executor is not None and backend != "flux_allocation":
219
+ backend = "flux_allocation"
220
+ check_pmi(backend=backend, pmi=flux_executor_pmi_mode)
221
+ cores_per_worker = resource_dict.get("cores", 1)
222
+ resource_dict["cache_directory"] = cache_directory
223
+ resource_dict["hostname_localhost"] = hostname_localhost
224
+ if backend == "flux_allocation":
225
+ check_oversubscribe(
226
+ oversubscribe=resource_dict.get("openmpi_oversubscribe", False)
227
+ )
228
+ check_command_line_argument_lst(
229
+ command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
230
+ )
231
+ if "openmpi_oversubscribe" in resource_dict.keys():
232
+ del resource_dict["openmpi_oversubscribe"]
233
+ if "slurm_cmd_args" in resource_dict.keys():
234
+ del resource_dict["slurm_cmd_args"]
235
+ resource_dict["flux_executor"] = flux_executor
236
+ resource_dict["flux_executor_pmi_mode"] = flux_executor_pmi_mode
237
+ resource_dict["flux_executor_nesting"] = flux_executor_nesting
238
+ resource_dict["flux_log_files"] = flux_log_files
239
+ if block_allocation:
240
+ resource_dict["init_function"] = init_function
241
+ max_workers = validate_number_of_cores(
242
+ max_cores=max_cores,
243
+ max_workers=max_workers,
244
+ cores_per_worker=cores_per_worker,
245
+ set_local_cores=False,
246
+ )
247
+ validate_max_workers_flux(
248
+ max_workers=max_workers,
249
+ cores=cores_per_worker,
250
+ threads_per_core=resource_dict.get("threads_per_core", 1),
251
+ )
252
+ return InteractiveExecutor(
253
+ max_workers=max_workers,
254
+ executor_kwargs=resource_dict,
255
+ spawner=FluxPythonSpawner,
256
+ )
257
+ else:
258
+ return InteractiveStepExecutor(
259
+ max_cores=max_cores,
260
+ max_workers=max_workers,
261
+ executor_kwargs=resource_dict,
262
+ spawner=FluxPythonSpawner,
263
+ )
264
+ elif backend == "slurm_allocation":
265
+ check_executor(executor=flux_executor)
266
+ check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
267
+ check_flux_log_files(flux_log_files=flux_log_files)
268
+ if block_allocation:
269
+ resource_dict["init_function"] = init_function
270
+ max_workers = validate_number_of_cores(
271
+ max_cores=max_cores,
272
+ max_workers=max_workers,
273
+ cores_per_worker=cores_per_worker,
274
+ set_local_cores=False,
275
+ )
276
+ validate_max_workers_slurm(
277
+ max_workers=max_workers,
278
+ cores=cores_per_worker,
279
+ threads_per_core=resource_dict.get("threads_per_core", 1),
280
+ )
281
+ return InteractiveExecutor(
282
+ max_workers=max_workers,
283
+ executor_kwargs=resource_dict,
284
+ spawner=SrunSpawner,
285
+ )
286
+ else:
287
+ return InteractiveStepExecutor(
288
+ max_cores=max_cores,
289
+ max_workers=max_workers,
290
+ executor_kwargs=resource_dict,
291
+ spawner=SrunSpawner,
292
+ )
293
+ elif backend == "local":
294
+ check_executor(executor=flux_executor)
295
+ check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
296
+ check_flux_log_files(flux_log_files=flux_log_files)
297
+ check_gpus_per_worker(gpus_per_worker=resource_dict.get("gpus_per_core", 0))
298
+ check_command_line_argument_lst(
299
+ command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
300
+ )
301
+ if "threads_per_core" in resource_dict.keys():
302
+ del resource_dict["threads_per_core"]
303
+ if "gpus_per_core" in resource_dict.keys():
304
+ del resource_dict["gpus_per_core"]
305
+ if "slurm_cmd_args" in resource_dict.keys():
306
+ del resource_dict["slurm_cmd_args"]
307
+ if block_allocation:
308
+ resource_dict["init_function"] = init_function
309
+ return InteractiveExecutor(
310
+ max_workers=validate_number_of_cores(
311
+ max_cores=max_cores,
312
+ max_workers=max_workers,
313
+ cores_per_worker=cores_per_worker,
314
+ set_local_cores=True,
315
+ ),
316
+ executor_kwargs=resource_dict,
317
+ spawner=MpiExecSpawner,
318
+ )
319
+ else:
320
+ return InteractiveStepExecutor(
321
+ max_cores=max_cores,
322
+ max_workers=max_workers,
323
+ executor_kwargs=resource_dict,
324
+ spawner=MpiExecSpawner,
325
+ )
326
+ else:
327
+ raise ValueError(
328
+ "The supported backends are slurm_allocation, slurm_submission, flux_allocation, flux_submission and local."
329
+ )
@@ -0,0 +1,135 @@
1
+ import os
2
+ from typing import Optional
3
+
4
+ import flux
5
+ import flux.job
6
+
7
+ from executorlib.standalone.interactive.spawner import BaseSpawner
8
+
9
+
10
+ def validate_max_workers(max_workers: int, cores: int, threads_per_core: int):
11
+ handle = flux.Flux()
12
+ cores_total = flux.resource.list.resource_list(handle).get().up.ncores
13
+ cores_requested = max_workers * cores * threads_per_core
14
+ if cores_total < cores_requested:
15
+ raise ValueError(
16
+ "The number of requested cores is larger than the available cores "
17
+ + str(cores_total)
18
+ + " < "
19
+ + str(cores_requested)
20
+ )
21
+
22
+
23
+ class FluxPythonSpawner(BaseSpawner):
24
+ """
25
+ A class representing the FluxPythonInterface.
26
+
27
+ Args:
28
+ cwd (str, optional): The current working directory. Defaults to None.
29
+ cores (int, optional): The number of cores. Defaults to 1.
30
+ threads_per_core (int, optional): The number of threads per base. Defaults to 1.
31
+ gpus_per_core (int, optional): The number of GPUs per base. Defaults to 0.
32
+ openmpi_oversubscribe (bool, optional): Whether to oversubscribe. Defaults to False.
33
+ flux_executor (flux.job.FluxExecutor, optional): The FluxExecutor instance. Defaults to None.
34
+ flux_executor_pmi_mode (str, optional): The PMI option. Defaults to None.
35
+ flux_executor_nesting (bool, optional): Whether to use nested FluxExecutor. Defaults to False.
36
+ flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ cwd: Optional[str] = None,
42
+ cores: int = 1,
43
+ threads_per_core: int = 1,
44
+ gpus_per_core: int = 0,
45
+ openmpi_oversubscribe: bool = False,
46
+ flux_executor: Optional[flux.job.FluxExecutor] = None,
47
+ flux_executor_pmi_mode: Optional[str] = None,
48
+ flux_executor_nesting: bool = False,
49
+ flux_log_files: bool = False,
50
+ ):
51
+ super().__init__(
52
+ cwd=cwd,
53
+ cores=cores,
54
+ openmpi_oversubscribe=openmpi_oversubscribe,
55
+ )
56
+ self._threads_per_core = threads_per_core
57
+ self._gpus_per_core = gpus_per_core
58
+ self._flux_executor = flux_executor
59
+ self._flux_executor_pmi_mode = flux_executor_pmi_mode
60
+ self._flux_executor_nesting = flux_executor_nesting
61
+ self._flux_log_files = flux_log_files
62
+ self._future = None
63
+
64
+ def bootup(
65
+ self,
66
+ command_lst: list[str],
67
+ ):
68
+ """
69
+ Boot up the client process to connect to the SocketInterface.
70
+
71
+ Args:
72
+ command_lst (list[str]): List of strings to start the client process.
73
+ Raises:
74
+ ValueError: If oversubscribing is not supported for the Flux adapter or if conda environments are not supported.
75
+ """
76
+ if self._openmpi_oversubscribe:
77
+ raise ValueError(
78
+ "Oversubscribing is currently not supported for the Flux adapter."
79
+ )
80
+ if self._flux_executor is None:
81
+ self._flux_executor = flux.job.FluxExecutor()
82
+ if not self._flux_executor_nesting:
83
+ jobspec = flux.job.JobspecV1.from_command(
84
+ command=command_lst,
85
+ num_tasks=self._cores,
86
+ cores_per_task=self._threads_per_core,
87
+ gpus_per_task=self._gpus_per_core,
88
+ num_nodes=None,
89
+ exclusive=False,
90
+ )
91
+ else:
92
+ jobspec = flux.job.JobspecV1.from_nest_command(
93
+ command=command_lst,
94
+ num_slots=self._cores,
95
+ cores_per_slot=self._threads_per_core,
96
+ gpus_per_slot=self._gpus_per_core,
97
+ num_nodes=None,
98
+ exclusive=False,
99
+ )
100
+ jobspec.environment = dict(os.environ)
101
+ if self._flux_executor_pmi_mode is not None:
102
+ jobspec.setattr_shell_option("pmi", self._flux_executor_pmi_mode)
103
+ if self._cwd is not None:
104
+ jobspec.cwd = self._cwd
105
+ if self._flux_log_files and self._cwd is not None:
106
+ jobspec.stderr = os.path.join(self._cwd, "flux.err")
107
+ jobspec.stdout = os.path.join(self._cwd, "flux.out")
108
+ elif self._flux_log_files:
109
+ jobspec.stderr = os.path.abspath("flux.err")
110
+ jobspec.stdout = os.path.abspath("flux.out")
111
+ self._future = self._flux_executor.submit(jobspec)
112
+
113
+ def shutdown(self, wait: bool = True):
114
+ """
115
+ Shutdown the FluxPythonInterface.
116
+
117
+ Args:
118
+ wait (bool, optional): Whether to wait for the execution to complete. Defaults to True.
119
+ """
120
+ if self._future is not None:
121
+ if self.poll():
122
+ self._future.cancel()
123
+ # The flux future objects are not instantly updated,
124
+ # still showing running after cancel was called,
125
+ # so we wait until the execution is completed.
126
+ self._future.result()
127
+
128
+ def poll(self):
129
+ """
130
+ Check if the FluxPythonInterface is running.
131
+
132
+ Returns:
133
+ bool: True if the interface is running, False otherwise.
134
+ """
135
+ return self._future is not None and not self._future.done()