executorlib 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {executorlib-0.0.1/executorlib.egg-info → executorlib-0.0.3}/PKG-INFO +6 -9
  2. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/__init__.py +37 -45
  3. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/_version.py +3 -3
  4. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/backend/cache_parallel.py +15 -1
  5. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/backend/interactive_parallel.py +9 -1
  6. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/backend/interactive_serial.py +11 -2
  7. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/cache/executor.py +8 -0
  8. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/cache/hdf.py +9 -20
  9. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/cache/shared.py +130 -13
  10. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/interactive/__init__.py +39 -55
  11. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/interactive/backend.py +26 -4
  12. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/interactive/dependencies.py +56 -5
  13. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/interactive/executor.py +7 -7
  14. executorlib-0.0.3/executorlib/interactive/flux.py +111 -0
  15. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/shared/__init__.py +3 -3
  16. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/shared/communication.py +27 -20
  17. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/shared/executor.py +65 -41
  18. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/shared/inputcheck.py +59 -40
  19. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/shared/plot.py +46 -0
  20. executorlib-0.0.3/executorlib/shared/spawner.py +255 -0
  21. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/shared/thread.py +10 -2
  22. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/shell/executor.py +13 -31
  23. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/shell/interactive.py +7 -4
  24. {executorlib-0.0.1 → executorlib-0.0.3/executorlib.egg-info}/PKG-INFO +6 -9
  25. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib.egg-info/SOURCES.txt +1 -2
  26. executorlib-0.0.3/executorlib.egg-info/requires.txt +14 -0
  27. {executorlib-0.0.1 → executorlib-0.0.3}/pyproject.toml +5 -7
  28. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_dependencies_executor.py +1 -1
  29. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_executor_backend_flux.py +8 -8
  30. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_flux_executor.py +29 -24
  31. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_local_executor.py +40 -40
  32. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_local_executor_future.py +5 -5
  33. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_shared_backend.py +10 -7
  34. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_shared_communication.py +3 -3
  35. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_shared_input_check.py +17 -7
  36. executorlib-0.0.1/executorlib/interactive/flux.py +0 -79
  37. executorlib-0.0.1/executorlib/shared/interface.py +0 -154
  38. executorlib-0.0.1/executorlib.egg-info/requires.txt +0 -18
  39. executorlib-0.0.1/tests/test_executor_conda.py +0 -76
  40. {executorlib-0.0.1 → executorlib-0.0.3}/LICENSE +0 -0
  41. {executorlib-0.0.1 → executorlib-0.0.3}/MANIFEST.in +0 -0
  42. {executorlib-0.0.1 → executorlib-0.0.3}/README.md +0 -0
  43. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/backend/__init__.py +0 -0
  44. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/backend/cache_serial.py +0 -0
  45. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/cache/__init__.py +0 -0
  46. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib/shell/__init__.py +0 -0
  47. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib.egg-info/dependency_links.txt +0 -0
  48. {executorlib-0.0.1 → executorlib-0.0.3}/executorlib.egg-info/top_level.txt +0 -0
  49. {executorlib-0.0.1 → executorlib-0.0.3}/setup.cfg +0 -0
  50. {executorlib-0.0.1 → executorlib-0.0.3}/setup.py +0 -0
  51. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_backend_serial.py +0 -0
  52. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_cache_executor_mpi.py +0 -0
  53. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_cache_executor_serial.py +0 -0
  54. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_cache_hdf.py +0 -0
  55. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_cache_shared.py +0 -0
  56. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_executor_backend_mpi.py +0 -0
  57. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_executor_backend_mpi_noblock.py +0 -0
  58. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_integration_pyiron_workflow.py +0 -0
  59. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_shared_executorbase.py +0 -0
  60. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_shared_thread.py +0 -0
  61. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_shell_executor.py +0 -0
  62. {executorlib-0.0.1 → executorlib-0.0.3}/tests/test_shell_interactive.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: executorlib
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: Scale serial and MPI-parallel python functions over hundreds of compute nodes all from within a jupyter notebook or serial python process.
5
5
  Author-email: Jan Janssen <janssen@lanl.gov>
6
6
  License: BSD 3-Clause License
@@ -50,19 +50,16 @@ Requires-Python: <3.13,>=3.9
50
50
  Description-Content-Type: text/markdown
51
51
  License-File: LICENSE
52
52
  Requires-Dist: cloudpickle<=3.0.0,>=2.0.0
53
- Requires-Dist: pyzmq<=26.0.3,>=25.0.0
54
- Provides-Extra: conda
55
- Requires-Dist: conda_subprocess<=0.0.4,>=0.0.3; extra == "conda"
53
+ Requires-Dist: pyzmq<=26.2.0,>=25.0.0
56
54
  Provides-Extra: mpi
57
- Requires-Dist: mpi4py<=3.1.6,>=3.1.4; extra == "mpi"
55
+ Requires-Dist: mpi4py<=4.0.0,>=3.1.4; extra == "mpi"
58
56
  Provides-Extra: hdf
59
57
  Requires-Dist: h5py<=3.11.0,>=3.6.0; extra == "hdf"
60
- Requires-Dist: h5io<=0.2.3,>=0.2.1; extra == "hdf"
61
58
  Provides-Extra: graph
62
- Requires-Dist: pygraphviz<=1.13,>=1.10; extra == "graph"
63
- Requires-Dist: matplotlib<=3.9.1,>=3.5.3; extra == "graph"
59
+ Requires-Dist: pygraphviz<=1.14,>=1.10; extra == "graph"
60
+ Requires-Dist: matplotlib<=3.9.2,>=3.5.3; extra == "graph"
64
61
  Requires-Dist: networkx<=3.3,>=2.8.8; extra == "graph"
65
- Requires-Dist: ipython<=8.26.0,>=7.33.0; extra == "graph"
62
+ Requires-Dist: ipython<=8.27.0,>=7.33.0; extra == "graph"
66
63
 
67
64
  # executorlib
68
65
  [![Unittests](https://github.com/pyiron/executorlib/actions/workflows/unittest-openmpi.yml/badge.svg)](https://github.com/pyiron/executorlib/actions/workflows/unittest-openmpi.yml)
@@ -40,15 +40,17 @@ class Executor:
40
40
  max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
41
41
  cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
42
42
  recommended, as computers have a limited number of compute cores.
43
+ backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
43
44
  max_cores (int): defines the number cores which can be used in parallel
44
45
  cores_per_worker (int): number of MPI cores to be used for each function call
45
46
  threads_per_core (int): number of OpenMP threads to be used for each function call
46
47
  gpus_per_worker (int): number of GPUs per worker - defaults to 0
47
- oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and SLURM only) - default False
48
48
  cwd (str/None): current working directory where the parallel python task is executed
49
- conda_environment_name (str): name of the conda environment to initialize
50
- conda_environment_path (str): path of the conda environment to initialize
51
- executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
49
+ openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and SLURM only) - default False
50
+ slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
51
+ flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
52
+ flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
53
+ flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
52
54
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
53
55
  context of an HPC cluster this essential to be able to communicate to an
54
56
  Executor running on a different compute node within the same allocation. And
@@ -56,15 +58,11 @@ class Executor:
56
58
  points to the same address as localhost. Still MacOS >= 12 seems to disable
57
59
  this look up for security reasons. So on MacOS it is required to set this
58
60
  option to true
59
- backend (str): Switch between the different backends "flux", "local" or "slurm". Alternatively, when "auto"
60
- is selected (the default) the available backend is determined automatically.
61
61
  block_allocation (boolean): To accelerate the submission of a series of python functions with the same resource
62
62
  requirements, executorlib supports block allocation. In this case all resources have
63
63
  to be defined on the executor, rather than during the submission of the individual
64
64
  function.
65
65
  init_function (None): optional function to preset arguments for functions which are submitted later
66
- command_line_argument_lst (list): Additional command line arguments for the srun call (SLURM only)
67
- pmi (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
68
66
  disable_dependencies (boolean): Disable resolving future objects during the submission.
69
67
  refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
70
68
  plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
@@ -94,21 +92,20 @@ class Executor:
94
92
  def __init__(
95
93
  self,
96
94
  max_workers: int = 1,
95
+ backend: str = "local",
97
96
  max_cores: int = 1,
98
97
  cores_per_worker: int = 1,
99
98
  threads_per_core: int = 1,
100
99
  gpus_per_worker: int = 0,
101
- oversubscribe: bool = False,
102
100
  cwd: Optional[str] = None,
103
- conda_environment_name: Optional[str] = None,
104
- conda_environment_path: Optional[str] = None,
105
- executor=None,
101
+ openmpi_oversubscribe: bool = False,
102
+ slurm_cmd_args: list[str] = [],
103
+ flux_executor=None,
104
+ flux_executor_pmi_mode: Optional[str] = None,
105
+ flux_executor_nesting: bool = False,
106
106
  hostname_localhost: bool = False,
107
- backend: str = "auto",
108
107
  block_allocation: bool = True,
109
108
  init_function: Optional[callable] = None,
110
- command_line_argument_lst: list[str] = [],
111
- pmi: Optional[str] = None,
112
109
  disable_dependencies: bool = False,
113
110
  refresh_rate: float = 0.01,
114
111
  plot_dependency_graph: bool = False,
@@ -119,21 +116,20 @@ class Executor:
119
116
  def __new__(
120
117
  cls,
121
118
  max_workers: int = 1,
119
+ backend: str = "local",
122
120
  max_cores: int = 1,
123
121
  cores_per_worker: int = 1,
124
122
  threads_per_core: int = 1,
125
123
  gpus_per_worker: int = 0,
126
- oversubscribe: bool = False,
127
124
  cwd: Optional[str] = None,
128
- conda_environment_name: Optional[str] = None,
129
- conda_environment_path: Optional[str] = None,
130
- executor=None,
125
+ openmpi_oversubscribe: bool = False,
126
+ slurm_cmd_args: list[str] = [],
127
+ flux_executor=None,
128
+ flux_executor_pmi_mode: Optional[str] = None,
129
+ flux_executor_nesting: bool = False,
131
130
  hostname_localhost: bool = False,
132
- backend: str = "auto",
133
- block_allocation: bool = False,
131
+ block_allocation: bool = True,
134
132
  init_function: Optional[callable] = None,
135
- command_line_argument_lst: list[str] = [],
136
- pmi: Optional[str] = None,
137
133
  disable_dependencies: bool = False,
138
134
  refresh_rate: float = 0.01,
139
135
  plot_dependency_graph: bool = False,
@@ -150,15 +146,17 @@ class Executor:
150
146
  max_workers (int): for backwards compatibility with the standard library, max_workers also defines the
151
147
  number of cores which can be used in parallel - just like the max_cores parameter. Using
152
148
  max_cores is recommended, as computers have a limited number of compute cores.
149
+ backend (str): Switch between the different backends "flux", "local" or "slurm". The default is "local".
153
150
  max_cores (int): defines the number cores which can be used in parallel
154
151
  cores_per_worker (int): number of MPI cores to be used for each function call
155
152
  threads_per_core (int): number of OpenMP threads to be used for each function call
156
153
  gpus_per_worker (int): number of GPUs per worker - defaults to 0
157
- oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and SLURM only) - default False
154
+ openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and SLURM only) - default False
155
+ slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
158
156
  cwd (str/None): current working directory where the parallel python task is executed
159
- conda_environment_name (str): name of the conda environment to initialize
160
- conda_environment_path (str): path of the conda environment to initialize
161
- executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
157
+ flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
158
+ flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
159
+ flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
162
160
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
163
161
  context of an HPC cluster this essential to be able to communicate to an
164
162
  Executor running on a different compute node within the same allocation. And
@@ -166,15 +164,11 @@ class Executor:
166
164
  points to the same address as localhost. Still MacOS >= 12 seems to disable
167
165
  this look up for security reasons. So on MacOS it is required to set this
168
166
  option to true
169
- backend (str): Switch between the different backends "flux", "local" or "slurm". Alternatively, when "auto"
170
- is selected (the default) the available backend is determined automatically.
171
167
  block_allocation (boolean): To accelerate the submission of a series of python functions with the same
172
168
  resource requirements, executorlib supports block allocation. In this case all
173
169
  resources have to be defined on the executor, rather than during the submission
174
170
  of the individual function.
175
171
  init_function (None): optional function to preset arguments for functions which are submitted later
176
- command_line_argument_lst (list): Additional command line arguments for the srun call (SLURM only)
177
- pmi (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
178
172
  disable_dependencies (boolean): Disable resolving future objects during the submission.
179
173
  refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
180
174
  plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
@@ -184,21 +178,20 @@ class Executor:
184
178
  if not disable_dependencies:
185
179
  return ExecutorWithDependencies(
186
180
  max_workers=max_workers,
181
+ backend=backend,
187
182
  max_cores=max_cores,
188
183
  cores_per_worker=cores_per_worker,
189
184
  threads_per_core=threads_per_core,
190
185
  gpus_per_worker=gpus_per_worker,
191
- oversubscribe=oversubscribe,
192
186
  cwd=cwd,
193
- conda_environment_name=conda_environment_name,
194
- conda_environment_path=conda_environment_path,
195
- executor=executor,
187
+ openmpi_oversubscribe=openmpi_oversubscribe,
188
+ slurm_cmd_args=slurm_cmd_args,
189
+ flux_executor=flux_executor,
190
+ flux_executor_pmi_mode=flux_executor_pmi_mode,
191
+ flux_executor_nesting=flux_executor_nesting,
196
192
  hostname_localhost=hostname_localhost,
197
- backend=backend,
198
193
  block_allocation=block_allocation,
199
194
  init_function=init_function,
200
- command_line_argument_lst=command_line_argument_lst,
201
- pmi=pmi,
202
195
  refresh_rate=refresh_rate,
203
196
  plot_dependency_graph=plot_dependency_graph,
204
197
  )
@@ -207,19 +200,18 @@ class Executor:
207
200
  _check_refresh_rate(refresh_rate=refresh_rate)
208
201
  return create_executor(
209
202
  max_workers=max_workers,
203
+ backend=backend,
210
204
  max_cores=max_cores,
211
205
  cores_per_worker=cores_per_worker,
212
206
  threads_per_core=threads_per_core,
213
207
  gpus_per_worker=gpus_per_worker,
214
- oversubscribe=oversubscribe,
215
208
  cwd=cwd,
216
- conda_environment_name=conda_environment_name,
217
- conda_environment_path=conda_environment_path,
218
- executor=executor,
209
+ openmpi_oversubscribe=openmpi_oversubscribe,
210
+ slurm_cmd_args=slurm_cmd_args,
211
+ flux_executor=flux_executor,
212
+ flux_executor_pmi_mode=flux_executor_pmi_mode,
213
+ flux_executor_nesting=flux_executor_nesting,
219
214
  hostname_localhost=hostname_localhost,
220
- backend=backend,
221
215
  block_allocation=block_allocation,
222
216
  init_function=init_function,
223
- command_line_argument_lst=command_line_argument_lst,
224
- pmi=pmi,
225
217
  )
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-07-14T21:06:50+0200",
11
+ "date": "2024-10-01T11:57:02+0200",
12
12
  "dirty": true,
13
13
  "error": null,
14
- "full-revisionid": "0a84450d76b7081b62a9d948bac56f905d013b5d",
15
- "version": "0.0.1"
14
+ "full-revisionid": "aea7b11eb2b5399d4a943914aee3107f64d7f6cf",
15
+ "version": "0.0.3"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -6,7 +6,21 @@ import cloudpickle
6
6
  from executorlib.cache.shared import backend_load_file, backend_write_file
7
7
 
8
8
 
9
- def main():
9
+ def main() -> None:
10
+ """
11
+ Main function for executing the cache_parallel script.
12
+
13
+ This function uses MPI (Message Passing Interface) to distribute the execution of a function
14
+ across multiple processes. It loads a file, broadcasts the data to all processes, executes
15
+ the function, gathers the results (if there are multiple processes), and writes the output
16
+ to a file.
17
+
18
+ Args:
19
+ None
20
+
21
+ Returns:
22
+ None
23
+ """
10
24
  from mpi4py import MPI
11
25
 
12
26
  MPI.pickle.__init__(
@@ -13,7 +13,15 @@ from executorlib.shared.communication import (
13
13
  )
14
14
 
15
15
 
16
- def main():
16
+ def main() -> None:
17
+ """
18
+ Entry point of the program.
19
+
20
+ This function initializes MPI, sets up the necessary communication, and executes the requested functions.
21
+
22
+ Returns:
23
+ None
24
+ """
17
25
  from mpi4py import MPI
18
26
 
19
27
  MPI.pickle.__init__(
@@ -1,6 +1,6 @@
1
1
  import sys
2
2
  from os.path import abspath
3
- from typing import Optional
3
+ from typing import List, Optional
4
4
 
5
5
  from executorlib.interactive.backend import call_funct, parse_arguments
6
6
  from executorlib.shared.communication import (
@@ -11,7 +11,16 @@ from executorlib.shared.communication import (
11
11
  )
12
12
 
13
13
 
14
- def main(argument_lst: Optional[list[str]] = None):
14
+ def main(argument_lst: Optional[List[str]] = None):
15
+ """
16
+ The main function of the program.
17
+
18
+ Args:
19
+ argument_lst (Optional[List[str]]): List of command line arguments. If None, sys.argv will be used.
20
+
21
+ Returns:
22
+ None
23
+ """
15
24
  if argument_lst is None:
16
25
  argument_lst = sys.argv
17
26
  argument_dict = parse_arguments(argument_lst=argument_lst)
@@ -12,6 +12,14 @@ class FileExecutor(ExecutorBase):
12
12
  execute_function: callable = execute_in_subprocess,
13
13
  cores_per_worker: int = 1,
14
14
  ):
15
+ """
16
+ Initialize the FileExecutor.
17
+
18
+ Args:
19
+ cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
20
+ execute_function (callable, optional): The function to execute tasks. Defaults to execute_in_subprocess.
21
+ cores_per_worker (int, optional): The number of CPU cores per worker. Defaults to 1.
22
+ """
15
23
  super().__init__()
16
24
  cache_directory_path = os.path.abspath(cache_directory)
17
25
  os.makedirs(cache_directory_path, exist_ok=True)
@@ -1,12 +1,11 @@
1
1
  from typing import Tuple
2
2
 
3
3
  import cloudpickle
4
- import h5io
5
4
  import h5py
6
5
  import numpy as np
7
6
 
8
7
 
9
- def dump(file_name: str, data_dict: dict):
8
+ def dump(file_name: str, data_dict: dict) -> None:
10
9
  """
11
10
  Dump data dictionary into HDF5 file
12
11
 
@@ -23,11 +22,9 @@ def dump(file_name: str, data_dict: dict):
23
22
  with h5py.File(file_name, "a") as fname:
24
23
  for data_key, data_value in data_dict.items():
25
24
  if data_key in group_dict.keys():
26
- h5io.write_hdf5(
27
- fname=fname,
25
+ fname.create_dataset(
26
+ name="/" + group_dict[data_key],
28
27
  data=np.void(cloudpickle.dumps(data_value)),
29
- overwrite="update",
30
- title=group_dict[data_key],
31
28
  )
32
29
 
33
30
 
@@ -44,21 +41,15 @@ def load(file_name: str) -> dict:
44
41
  with h5py.File(file_name, "r") as hdf:
45
42
  data_dict = {}
46
43
  if "function" in hdf:
47
- data_dict["fn"] = cloudpickle.loads(
48
- h5io.read_hdf5(fname=hdf, title="function", slash="ignore")
49
- )
44
+ data_dict["fn"] = cloudpickle.loads(np.void(hdf["/function"]))
50
45
  else:
51
- raise TypeError
46
+ raise TypeError("Function not found in HDF5 file.")
52
47
  if "input_args" in hdf:
53
- data_dict["args"] = cloudpickle.loads(
54
- h5io.read_hdf5(fname=hdf, title="input_args", slash="ignore")
55
- )
48
+ data_dict["args"] = cloudpickle.loads(np.void(hdf["/input_args"]))
56
49
  else:
57
50
  data_dict["args"] = ()
58
51
  if "input_kwargs" in hdf:
59
- data_dict["kwargs"] = cloudpickle.loads(
60
- h5io.read_hdf5(fname=hdf, title="input_kwargs", slash="ignore")
61
- )
52
+ data_dict["kwargs"] = cloudpickle.loads(np.void(hdf["/input_kwargs"]))
62
53
  else:
63
54
  data_dict["kwargs"] = {}
64
55
  return data_dict
@@ -72,12 +63,10 @@ def get_output(file_name: str) -> Tuple[bool, object]:
72
63
  file_name (str): file name of the HDF5 file as absolute path
73
64
 
74
65
  Returns:
75
- (bool, object): boolean flag if output is available and the output object itself
66
+ Tuple[bool, object]: boolean flag indicating if output is available and the output object itself
76
67
  """
77
68
  with h5py.File(file_name, "r") as hdf:
78
69
  if "output" in hdf:
79
- return True, cloudpickle.loads(
80
- h5io.read_hdf5(fname=hdf, title="output", slash="ignore")
81
- )
70
+ return True, cloudpickle.loads(np.void(hdf["/output"]))
82
71
  else:
83
72
  return False, None
@@ -6,7 +6,7 @@ import re
6
6
  import subprocess
7
7
  import sys
8
8
  from concurrent.futures import Future
9
- from typing import Tuple
9
+ from typing import Any, Tuple
10
10
 
11
11
  import cloudpickle
12
12
 
@@ -16,20 +16,51 @@ from executorlib.shared.executor import get_command_path
16
16
 
17
17
  class FutureItem:
18
18
  def __init__(self, file_name: str):
19
+ """
20
+ Initialize a FutureItem object.
21
+
22
+ Args:
23
+ file_name (str): The name of the file.
24
+
25
+ """
19
26
  self._file_name = file_name
20
27
 
21
- def result(self):
28
+ def result(self) -> str:
29
+ """
30
+ Get the result of the future item.
31
+
32
+ Returns:
33
+ str: The result of the future item.
34
+
35
+ """
22
36
  exec_flag, result = get_output(file_name=self._file_name)
23
37
  if exec_flag:
24
38
  return result
25
39
  else:
26
40
  return self.result()
27
41
 
28
- def done(self):
42
+ def done(self) -> bool:
43
+ """
44
+ Check if the future item is done.
45
+
46
+ Returns:
47
+ bool: True if the future item is done, False otherwise.
48
+
49
+ """
29
50
  return get_output(file_name=self._file_name)[0]
30
51
 
31
52
 
32
53
  def backend_load_file(file_name: str) -> dict:
54
+ """
55
+ Load the data from an HDF5 file and convert FutureItem objects to their results.
56
+
57
+ Args:
58
+ file_name (str): The name of the HDF5 file.
59
+
60
+ Returns:
61
+ dict: The loaded data from the file.
62
+
63
+ """
33
64
  apply_dict = load(file_name=file_name)
34
65
  apply_dict["args"] = [
35
66
  arg if not isinstance(arg, FutureItem) else arg.result()
@@ -42,7 +73,18 @@ def backend_load_file(file_name: str) -> dict:
42
73
  return apply_dict
43
74
 
44
75
 
45
- def backend_write_file(file_name: str, output):
76
+ def backend_write_file(file_name: str, output: Any) -> None:
77
+ """
78
+ Write the output to an HDF5 file.
79
+
80
+ Args:
81
+ file_name (str): The name of the HDF5 file.
82
+ output (Any): The output to be written.
83
+
84
+ Returns:
85
+ None
86
+
87
+ """
46
88
  file_name_out = os.path.splitext(file_name)[0]
47
89
  os.rename(file_name, file_name_out + ".h5ready")
48
90
  dump(file_name=file_name_out + ".h5ready", data_dict={"output": output})
@@ -52,6 +94,17 @@ def backend_write_file(file_name: str, output):
52
94
  def execute_in_subprocess(
53
95
  command: list, task_dependent_lst: list = []
54
96
  ) -> subprocess.Popen:
97
+ """
98
+ Execute a command in a subprocess.
99
+
100
+ Args:
101
+ command (list): The command to be executed.
102
+ task_dependent_lst (list, optional): A list of subprocesses that the current subprocess depends on. Defaults to [].
103
+
104
+ Returns:
105
+ subprocess.Popen: The subprocess object.
106
+
107
+ """
55
108
  while len(task_dependent_lst) > 0:
56
109
  task_dependent_lst = [
57
110
  task for task in task_dependent_lst if task.poll() is None
@@ -64,7 +117,20 @@ def execute_tasks_h5(
64
117
  cache_directory: str,
65
118
  cores_per_worker: int,
66
119
  execute_function: callable,
67
- ):
120
+ ) -> None:
121
+ """
122
+ Execute tasks stored in a queue using HDF5 files.
123
+
124
+ Args:
125
+ future_queue (queue.Queue): The queue containing the tasks.
126
+ cache_directory (str): The directory to store the HDF5 files.
127
+ cores_per_worker (int): The number of cores per worker.
128
+ execute_function (callable): The function to execute the tasks.
129
+
130
+ Returns:
131
+ None
132
+
133
+ """
68
134
  memory_dict, process_dict, file_name_dict = {}, {}, {}
69
135
  while True:
70
136
  task_dict = None
@@ -117,12 +183,15 @@ def execute_tasks_h5(
117
183
  }
118
184
 
119
185
 
120
- def execute_task_in_file(file_name: str):
186
+ def execute_task_in_file(file_name: str) -> None:
121
187
  """
122
- Execute the task stored in a given HDF5 file
188
+ Execute the task stored in a given HDF5 file.
123
189
 
124
190
  Args:
125
- file_name (str): file name of the HDF5 file as absolute path
191
+ file_name (str): The file name of the HDF5 file as an absolute path.
192
+
193
+ Returns:
194
+ None
126
195
  """
127
196
  apply_dict = backend_load_file(file_name=file_name)
128
197
  result = apply_dict["fn"].__call__(*apply_dict["args"], **apply_dict["kwargs"])
@@ -135,9 +204,11 @@ def execute_task_in_file(file_name: str):
135
204
  def _get_execute_command(file_name: str, cores: int = 1) -> list:
136
205
  """
137
206
  Get command to call backend as a list of two strings
207
+
138
208
  Args:
139
- file_name (str):
140
- cores (int): Number of cores used to execute the task, if it is greater than one use interactive_parallel.py else interactive_serial.py
209
+ file_name (str): The name of the file.
210
+ cores (int, optional): Number of cores used to execute the task. Defaults to 1.
211
+
141
212
  Returns:
142
213
  list[str]: List of strings containing the python executable path and the backend script to execute
143
214
  """
@@ -157,13 +228,35 @@ def _get_execute_command(file_name: str, cores: int = 1) -> list:
157
228
  return command_lst
158
229
 
159
230
 
160
- def _get_hash(binary: bytes):
231
+ def _get_hash(binary: bytes) -> str:
232
+ """
233
+ Get the hash of a binary.
234
+
235
+ Args:
236
+ binary (bytes): The binary to be hashed.
237
+
238
+ Returns:
239
+ str: The hash of the binary.
240
+
241
+ """
161
242
  # Remove specification of jupyter kernel from hash to be deterministic
162
243
  binary_no_ipykernel = re.sub(b"(?<=/ipykernel_)(.*)(?=/)", b"", binary)
163
244
  return str(hashlib.md5(binary_no_ipykernel).hexdigest())
164
245
 
165
246
 
166
- def _serialize_funct_h5(fn: callable, *args, **kwargs):
247
+ def _serialize_funct_h5(fn: callable, *args: Any, **kwargs: Any) -> Tuple[str, dict]:
248
+ """
249
+ Serialize a function and its arguments and keyword arguments into an HDF5 file.
250
+
251
+ Args:
252
+ fn (callable): The function to be serialized.
253
+ *args (Any): The arguments of the function.
254
+ **kwargs (Any): The keyword arguments of the function.
255
+
256
+ Returns:
257
+ Tuple[str, dict]: A tuple containing the task key and the serialized data.
258
+
259
+ """
167
260
  binary_all = cloudpickle.dumps({"fn": fn, "args": args, "kwargs": kwargs})
168
261
  task_key = fn.__name__ + _get_hash(binary=binary_all)
169
262
  data = {"fn": fn, "args": args, "kwargs": kwargs}
@@ -173,6 +266,18 @@ def _serialize_funct_h5(fn: callable, *args, **kwargs):
173
266
  def _check_task_output(
174
267
  task_key: str, future_obj: Future, cache_directory: str
175
268
  ) -> Future:
269
+ """
270
+ Check the output of a task and set the result of the future object if available.
271
+
272
+ Args:
273
+ task_key (str): The key of the task.
274
+ future_obj (Future): The future object associated with the task.
275
+ cache_directory (str): The directory where the HDF5 files are stored.
276
+
277
+ Returns:
278
+ Future: The updated future object.
279
+
280
+ """
176
281
  file_name = os.path.join(cache_directory, task_key + ".h5out")
177
282
  if not os.path.exists(file_name):
178
283
  return future_obj
@@ -184,7 +289,19 @@ def _check_task_output(
184
289
 
185
290
  def _convert_args_and_kwargs(
186
291
  task_dict: dict, memory_dict: dict, file_name_dict: dict
187
- ) -> Tuple:
292
+ ) -> Tuple[list, dict, list]:
293
+ """
294
+ Convert the arguments and keyword arguments in a task dictionary to the appropriate types.
295
+
296
+ Args:
297
+ task_dict (dict): The task dictionary containing the arguments and keyword arguments.
298
+ memory_dict (dict): The dictionary mapping future objects to their associated task keys.
299
+ file_name_dict (dict): The dictionary mapping task keys to their corresponding file names.
300
+
301
+ Returns:
302
+ Tuple[list, dict, list]: A tuple containing the converted arguments, converted keyword arguments, and a list of future wait keys.
303
+
304
+ """
188
305
  task_args = []
189
306
  task_kwargs = {}
190
307
  future_wait_key_lst = []