executorlib 1.5.2__tar.gz → 1.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {executorlib-1.5.2 → executorlib-1.5.3}/PKG-INFO +1 -1
  2. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/_version.py +2 -2
  3. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/api.py +2 -0
  4. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/executor/flux.py +6 -1
  5. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/executor/single.py +169 -0
  6. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/executor/slurm.py +6 -1
  7. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/inputcheck.py +14 -0
  8. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/hdf.py +1 -1
  9. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/queue_spawner.py +36 -4
  10. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/shared.py +25 -5
  11. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/subprocess_spawner.py +11 -4
  12. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/task_scheduler.py +14 -5
  13. {executorlib-1.5.2 → executorlib-1.5.3}/.gitignore +0 -0
  14. {executorlib-1.5.2 → executorlib-1.5.3}/LICENSE +0 -0
  15. {executorlib-1.5.2 → executorlib-1.5.3}/README.md +0 -0
  16. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/__init__.py +0 -0
  17. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/backend/__init__.py +0 -0
  18. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/backend/cache_parallel.py +0 -0
  19. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/backend/cache_serial.py +0 -0
  20. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/backend/interactive_parallel.py +0 -0
  21. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/backend/interactive_serial.py +0 -0
  22. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/executor/__init__.py +0 -0
  23. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/executor/base.py +0 -0
  24. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/__init__.py +0 -0
  25. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/cache.py +0 -0
  26. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/command.py +0 -0
  27. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/interactive/__init__.py +0 -0
  28. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/interactive/arguments.py +0 -0
  29. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/interactive/backend.py +0 -0
  30. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/interactive/communication.py +0 -0
  31. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/interactive/spawner.py +0 -0
  32. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/plot.py +0 -0
  33. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/queue.py +0 -0
  34. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/standalone/serialize.py +0 -0
  35. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/__init__.py +0 -0
  36. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/base.py +0 -0
  37. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/__init__.py +0 -0
  38. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/file/backend.py +0 -0
  39. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/interactive/__init__.py +0 -0
  40. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/interactive/blockallocation.py +0 -0
  41. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/interactive/dependency.py +0 -0
  42. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/interactive/fluxspawner.py +0 -0
  43. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/interactive/onetoone.py +0 -0
  44. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/interactive/shared.py +0 -0
  45. {executorlib-1.5.2 → executorlib-1.5.3}/executorlib/task_scheduler/interactive/slurmspawner.py +0 -0
  46. {executorlib-1.5.2 → executorlib-1.5.3}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: executorlib
3
- Version: 1.5.2
3
+ Version: 1.5.3
4
4
  Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
5
5
  Project-URL: Homepage, https://github.com/pyiron/executorlib
6
6
  Project-URL: Documentation, https://executorlib.readthedocs.io
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.5.2'
21
- __version_tuple__ = version_tuple = (1, 5, 2)
20
+ __version__ = version = '1.5.3'
21
+ __version_tuple__ = version_tuple = (1, 5, 3)
@@ -5,6 +5,7 @@ only use the functionality in this API in combination with the user interface de
5
5
  functionality is considered internal and might change during minor releases.
6
6
  """
7
7
 
8
+ from executorlib.executor.single import TestClusterExecutor
8
9
  from executorlib.standalone.command import get_command_path
9
10
  from executorlib.standalone.interactive.communication import (
10
11
  SocketInterface,
@@ -19,6 +20,7 @@ from executorlib.standalone.queue import cancel_items_in_queue
19
20
  from executorlib.standalone.serialize import cloudpickle_register
20
21
 
21
22
  __all__: list[str] = [
23
+ "TestClusterExecutor",
22
24
  "cancel_items_in_queue",
23
25
  "cloudpickle_register",
24
26
  "get_command_path",
@@ -4,6 +4,7 @@ from executorlib.executor.base import BaseExecutor
4
4
  from executorlib.standalone.inputcheck import (
5
5
  check_command_line_argument_lst,
6
6
  check_init_function,
7
+ check_log_obj_size,
7
8
  check_oversubscribe,
8
9
  check_plot_dependency_graph,
9
10
  check_pmi,
@@ -246,6 +247,7 @@ class FluxClusterExecutor(BaseExecutor):
246
247
  plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
247
248
  debugging purposes and to get an overview of the specified dependencies.
248
249
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
250
+ log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
249
251
 
250
252
  Examples:
251
253
  ```
@@ -282,6 +284,7 @@ class FluxClusterExecutor(BaseExecutor):
282
284
  refresh_rate: float = 0.01,
283
285
  plot_dependency_graph: bool = False,
284
286
  plot_dependency_graph_filename: Optional[str] = None,
287
+ log_obj_size: bool = False,
285
288
  ):
286
289
  """
287
290
  The executorlib.FluxClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -323,6 +326,7 @@ class FluxClusterExecutor(BaseExecutor):
323
326
  plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
324
327
  debugging purposes and to get an overview of the specified dependencies.
325
328
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
329
+ log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
326
330
 
327
331
  """
328
332
  default_resource_dict: dict = {
@@ -338,6 +342,7 @@ class FluxClusterExecutor(BaseExecutor):
338
342
  resource_dict.update(
339
343
  {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
340
344
  )
345
+ check_log_obj_size(log_obj_size=log_obj_size)
341
346
  if not plot_dependency_graph:
342
347
  import pysqa # noqa
343
348
 
@@ -348,7 +353,7 @@ class FluxClusterExecutor(BaseExecutor):
348
353
  super().__init__(
349
354
  executor=create_file_executor(
350
355
  max_workers=max_workers,
351
- backend="flux_submission",
356
+ backend="flux",
352
357
  max_cores=max_cores,
353
358
  cache_directory=cache_directory,
354
359
  resource_dict=resource_dict,
@@ -56,6 +56,7 @@ class SingleNodeExecutor(BaseExecutor):
56
56
  plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
57
57
  debugging purposes and to get an overview of the specified dependencies.
58
58
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
59
+ log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
59
60
 
60
61
  Examples:
61
62
  ```
@@ -184,6 +185,174 @@ class SingleNodeExecutor(BaseExecutor):
184
185
  )
185
186
 
186
187
 
188
+ class TestClusterExecutor(BaseExecutor):
189
+ """
190
+ The executorlib.api.TestClusterExecutor is designed to test the file based communication used in the
191
+ SlurmClusterExecutor and the FluxClusterExecutor locally. It is not recommended for production use, rather use the
192
+ SingleNodeExecutor.
193
+
194
+ Args:
195
+ max_workers (int): for backwards compatibility with the standard library, max_workers also defines the number of
196
+ cores which can be used in parallel - just like the max_cores parameter. Using max_cores is
197
+ recommended, as computers have a limited number of compute cores.
198
+ cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache".
199
+ max_cores (int): defines the number cores which can be used in parallel
200
+ resource_dict (dict): A dictionary of resources required by the task. With the following keys:
201
+ - cores (int): number of MPI cores to be used for each function call
202
+ - threads_per_core (int): number of OpenMP threads to be used for each function call
203
+ - gpus_per_core (int): number of GPUs per worker - defaults to 0
204
+ - cwd (str/None): current working directory where the parallel python task is executed
205
+ hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
206
+ context of an HPC cluster this essential to be able to communicate to an
207
+ Executor running on a different compute node within the same allocation. And
208
+ in principle any computer should be able to resolve that their own hostname
209
+ points to the same address as localhost. Still MacOS >= 12 seems to disable
210
+ this look up for security reasons. So on MacOS it is required to set this
211
+ option to true
212
+ block_allocation (boolean): To accelerate the submission of a series of python functions with the same resource
213
+ requirements, executorlib supports block allocation. In this case all resources have
214
+ to be defined on the executor, rather than during the submission of the individual
215
+ function.
216
+ init_function (None): optional function to preset arguments for functions which are submitted later
217
+ disable_dependencies (boolean): Disable resolving future objects during the submission.
218
+ refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
219
+ plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
220
+ debugging purposes and to get an overview of the specified dependencies.
221
+ plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
222
+ log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
223
+
224
+ Examples:
225
+ ```
226
+ >>> import numpy as np
227
+ >>> from executorlib.api import TestClusterExecutor
228
+ >>>
229
+ >>> def calc(i, j, k):
230
+ >>> from mpi4py import MPI
231
+ >>> size = MPI.COMM_WORLD.Get_size()
232
+ >>> rank = MPI.COMM_WORLD.Get_rank()
233
+ >>> return np.array([i, j, k]), size, rank
234
+ >>>
235
+ >>> def init_k():
236
+ >>> return {"k": 3}
237
+ >>>
238
+ >>> with TestClusterExecutor(max_workers=2, init_function=init_k) as p:
239
+ >>> fs = p.submit(calc, 2, j=4)
240
+ >>> print(fs.result())
241
+ [(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)]
242
+ ```
243
+ """
244
+
245
+ def __init__(
246
+ self,
247
+ max_workers: Optional[int] = None,
248
+ cache_directory: Optional[str] = None,
249
+ max_cores: Optional[int] = None,
250
+ resource_dict: Optional[dict] = None,
251
+ hostname_localhost: Optional[bool] = None,
252
+ block_allocation: bool = False,
253
+ init_function: Optional[Callable] = None,
254
+ disable_dependencies: bool = False,
255
+ refresh_rate: float = 0.01,
256
+ plot_dependency_graph: bool = False,
257
+ plot_dependency_graph_filename: Optional[str] = None,
258
+ log_obj_size: bool = False,
259
+ ):
260
+ """
261
+ The executorlib.api.TestClusterExecutor is designed to test the file based communication used in the
262
+ SlurmClusterExecutor and the FluxClusterExecutor locally. It is not recommended for production use, rather use
263
+ the SingleNodeExecutor.
264
+
265
+ Args:
266
+ max_workers (int): for backwards compatibility with the standard library, max_workers also defines the
267
+ number of cores which can be used in parallel - just like the max_cores parameter. Using
268
+ max_cores is recommended, as computers have a limited number of compute cores.
269
+ cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache".
270
+ max_cores (int): defines the number cores which can be used in parallel
271
+ resource_dict (dict): A dictionary of resources required by the task. With the following keys:
272
+ - cores (int): number of MPI cores to be used for each function call
273
+ - threads_per_core (int): number of OpenMP threads to be used for each function call
274
+ - gpus_per_core (int): number of GPUs per worker - defaults to 0
275
+ - cwd (str/None): current working directory where the parallel python task is executed
276
+ hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
277
+ context of an HPC cluster this essential to be able to communicate to an
278
+ Executor running on a different compute node within the same allocation. And
279
+ in principle any computer should be able to resolve that their own hostname
280
+ points to the same address as localhost. Still MacOS >= 12 seems to disable
281
+ this look up for security reasons. So on MacOS it is required to set this
282
+ option to true
283
+ block_allocation (boolean): To accelerate the submission of a series of python functions with the same
284
+ resource requirements, executorlib supports block allocation. In this case all
285
+ resources have to be defined on the executor, rather than during the submission
286
+ of the individual function.
287
+ init_function (None): optional function to preset arguments for functions which are submitted later
288
+ disable_dependencies (boolean): Disable resolving future objects during the submission.
289
+ refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
290
+ plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
291
+ debugging purposes and to get an overview of the specified dependencies.
292
+ plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
293
+ log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
294
+
295
+ """
296
+ default_resource_dict: dict = {
297
+ "cores": 1,
298
+ "threads_per_core": 1,
299
+ "gpus_per_core": 0,
300
+ "cwd": None,
301
+ "openmpi_oversubscribe": False,
302
+ }
303
+ if resource_dict is None:
304
+ resource_dict = {}
305
+ resource_dict.update(
306
+ {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
307
+ )
308
+ if not plot_dependency_graph:
309
+ from executorlib.task_scheduler.file.subprocess_spawner import (
310
+ execute_in_subprocess,
311
+ )
312
+ from executorlib.task_scheduler.file.task_scheduler import (
313
+ create_file_executor,
314
+ )
315
+
316
+ super().__init__(
317
+ executor=create_file_executor(
318
+ max_workers=max_workers,
319
+ backend=None,
320
+ max_cores=max_cores,
321
+ cache_directory=cache_directory,
322
+ resource_dict=resource_dict,
323
+ flux_executor=None,
324
+ flux_executor_pmi_mode=None,
325
+ flux_executor_nesting=False,
326
+ flux_log_files=False,
327
+ pysqa_config_directory=None,
328
+ hostname_localhost=hostname_localhost,
329
+ block_allocation=block_allocation,
330
+ init_function=init_function,
331
+ disable_dependencies=disable_dependencies,
332
+ execute_function=execute_in_subprocess,
333
+ )
334
+ )
335
+ else:
336
+ super().__init__(
337
+ executor=DependencyTaskScheduler(
338
+ executor=create_single_node_executor(
339
+ max_workers=max_workers,
340
+ cache_directory=cache_directory,
341
+ max_cores=max_cores,
342
+ resource_dict=resource_dict,
343
+ hostname_localhost=hostname_localhost,
344
+ block_allocation=block_allocation,
345
+ init_function=init_function,
346
+ log_obj_size=log_obj_size,
347
+ ),
348
+ max_cores=max_cores,
349
+ refresh_rate=refresh_rate,
350
+ plot_dependency_graph=plot_dependency_graph,
351
+ plot_dependency_graph_filename=plot_dependency_graph_filename,
352
+ )
353
+ )
354
+
355
+
187
356
  def create_single_node_executor(
188
357
  max_workers: Optional[int] = None,
189
358
  max_cores: Optional[int] = None,
@@ -3,6 +3,7 @@ from typing import Callable, Optional, Union
3
3
  from executorlib.executor.base import BaseExecutor
4
4
  from executorlib.standalone.inputcheck import (
5
5
  check_init_function,
6
+ check_log_obj_size,
6
7
  check_plot_dependency_graph,
7
8
  check_refresh_rate,
8
9
  validate_number_of_cores,
@@ -58,6 +59,7 @@ class SlurmClusterExecutor(BaseExecutor):
58
59
  plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
59
60
  debugging purposes and to get an overview of the specified dependencies.
60
61
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
62
+ log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
61
63
 
62
64
  Examples:
63
65
  ```
@@ -94,6 +96,7 @@ class SlurmClusterExecutor(BaseExecutor):
94
96
  refresh_rate: float = 0.01,
95
97
  plot_dependency_graph: bool = False,
96
98
  plot_dependency_graph_filename: Optional[str] = None,
99
+ log_obj_size: bool = False,
97
100
  ):
98
101
  """
99
102
  The executorlib.SlurmClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -135,6 +138,7 @@ class SlurmClusterExecutor(BaseExecutor):
135
138
  plot_dependency_graph (bool): Plot the dependencies of multiple future objects without executing them. For
136
139
  debugging purposes and to get an overview of the specified dependencies.
137
140
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
141
+ log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
138
142
 
139
143
  """
140
144
  default_resource_dict: dict = {
@@ -150,6 +154,7 @@ class SlurmClusterExecutor(BaseExecutor):
150
154
  resource_dict.update(
151
155
  {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
152
156
  )
157
+ check_log_obj_size(log_obj_size=log_obj_size)
153
158
  if not plot_dependency_graph:
154
159
  import pysqa # noqa
155
160
 
@@ -160,7 +165,7 @@ class SlurmClusterExecutor(BaseExecutor):
160
165
  super().__init__(
161
166
  executor=create_file_executor(
162
167
  max_workers=max_workers,
163
- backend="slurm_submission",
168
+ backend="slurm",
164
169
  max_cores=max_cores,
165
170
  cache_directory=cache_directory,
166
171
  resource_dict=resource_dict,
@@ -194,7 +194,21 @@ def validate_number_of_cores(
194
194
 
195
195
 
196
196
  def check_file_exists(file_name: Optional[str]):
197
+ """
198
+ Check if file exists and raise a ValueError if it does not or file_name is None.
199
+ """
197
200
  if file_name is None:
198
201
  raise ValueError("file_name is not set.")
199
202
  if not os.path.exists(file_name):
200
203
  raise ValueError("file_name is not written to the file system.")
204
+
205
+
206
+ def check_log_obj_size(log_obj_size: bool) -> None:
207
+ """
208
+ Check if log_obj_size is True and raise a ValueError if it is.
209
+ """
210
+ if log_obj_size:
211
+ raise ValueError(
212
+ "log_obj_size is not supported for the executorlib.SlurmClusterExecutor and executorlib.FluxClusterExecutor."
213
+ "Please use log_obj_size=False instead of log_obj_size=True."
214
+ )
@@ -101,7 +101,7 @@ def get_queue_id(file_name: Optional[str]) -> Optional[int]:
101
101
  Returns:
102
102
  int: queuing system id from the execution of the python function
103
103
  """
104
- if file_name is not None:
104
+ if file_name is not None and os.path.exists(file_name):
105
105
  with h5py.File(file_name, "r") as hdf:
106
106
  if "queue_id" in hdf:
107
107
  return cloudpickle.loads(np.void(hdf["/queue_id"]))
@@ -10,9 +10,10 @@ from executorlib.task_scheduler.file.hdf import dump, get_queue_id
10
10
 
11
11
  def execute_with_pysqa(
12
12
  command: list,
13
+ file_name: str,
14
+ data_dict: dict,
13
15
  cache_directory: str,
14
16
  task_dependent_lst: Optional[list[int]] = None,
15
- file_name: Optional[str] = None,
16
17
  resource_dict: Optional[dict] = None,
17
18
  config_directory: Optional[str] = None,
18
19
  backend: Optional[str] = None,
@@ -22,9 +23,10 @@ def execute_with_pysqa(
22
23
 
23
24
  Args:
24
25
  command (list): The command to be executed.
26
+ file_name (str): Name of the HDF5 file which contains the Python function
27
+ data_dict (dict): dictionary containing the python function to be executed {"fn": ..., "args": (), "kwargs": {}}
25
28
  cache_directory (str): The directory to store the HDF5 files.
26
29
  task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to [].
27
- file_name (str): Name of the HDF5 file which contains the Python function
28
30
  resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function.
29
31
  Example resource dictionary: {
30
32
  cwd: None,
@@ -37,13 +39,20 @@ def execute_with_pysqa(
37
39
  """
38
40
  if task_dependent_lst is None:
39
41
  task_dependent_lst = []
40
- check_file_exists(file_name=file_name)
41
- queue_id = get_queue_id(file_name=file_name)
42
42
  qa = QueueAdapter(
43
43
  directory=config_directory,
44
44
  queue_type=backend,
45
45
  execute_command=_pysqa_execute_command,
46
46
  )
47
+ queue_id = get_queue_id(file_name=file_name)
48
+ if os.path.exists(file_name) and (
49
+ queue_id is None or qa.get_status_of_job(process_id=queue_id) is None
50
+ ):
51
+ os.remove(file_name)
52
+ dump(file_name=file_name, data_dict=data_dict)
53
+ elif not os.path.exists(file_name):
54
+ dump(file_name=file_name, data_dict=data_dict)
55
+ check_file_exists(file_name=file_name)
47
56
  if queue_id is None or qa.get_status_of_job(process_id=queue_id) is None:
48
57
  if resource_dict is None:
49
58
  resource_dict = {}
@@ -81,6 +90,29 @@ def execute_with_pysqa(
81
90
  return queue_id
82
91
 
83
92
 
93
+ def terminate_with_pysqa(
94
+ queue_id: int,
95
+ config_directory: Optional[str] = None,
96
+ backend: Optional[str] = None,
97
+ ):
98
+ """
99
+ Delete job from queuing system
100
+
101
+ Args:
102
+ queue_id (int): Queuing system ID of the job to delete.
103
+ config_directory (str, optional): path to the config directory.
104
+ backend (str, optional): name of the backend used to spawn tasks.
105
+ """
106
+ qa = QueueAdapter(
107
+ directory=config_directory,
108
+ queue_type=backend,
109
+ execute_command=_pysqa_execute_command,
110
+ )
111
+ status = qa.get_status_of_job(process_id=queue_id)
112
+ if status is not None and status not in ["finished", "error"]:
113
+ qa.delete_job(process_id=queue_id)
114
+
115
+
84
116
  def _pysqa_execute_command(
85
117
  commands: str,
86
118
  working_directory: Optional[str] = None,
@@ -9,7 +9,8 @@ from typing import Any, Callable, Optional
9
9
  from executorlib.standalone.cache import get_cache_files
10
10
  from executorlib.standalone.command import get_command_path
11
11
  from executorlib.standalone.serialize import serialize_funct_h5
12
- from executorlib.task_scheduler.file.hdf import dump, get_output
12
+ from executorlib.task_scheduler.file.hdf import get_output
13
+ from executorlib.task_scheduler.file.subprocess_spawner import terminate_subprocess
13
14
 
14
15
 
15
16
  class FutureItem:
@@ -86,9 +87,30 @@ def execute_tasks_h5(
86
87
  with contextlib.suppress(queue.Empty):
87
88
  task_dict = future_queue.get_nowait()
88
89
  if task_dict is not None and "shutdown" in task_dict and task_dict["shutdown"]:
89
- if terminate_function is not None:
90
+ if task_dict["wait"]:
91
+ while len(memory_dict) > 0:
92
+ memory_dict = {
93
+ key: _check_task_output(
94
+ task_key=key,
95
+ future_obj=value,
96
+ cache_directory=cache_dir_dict[key],
97
+ )
98
+ for key, value in memory_dict.items()
99
+ if not value.done()
100
+ }
101
+ if (
102
+ terminate_function is not None
103
+ and terminate_function == terminate_subprocess
104
+ ):
90
105
  for task in process_dict.values():
91
106
  terminate_function(task=task)
107
+ elif terminate_function is not None:
108
+ for queue_id in process_dict.values():
109
+ terminate_function(
110
+ queue_id=queue_id,
111
+ config_directory=pysqa_config_directory,
112
+ backend=backend,
113
+ )
92
114
  future_queue.task_done()
93
115
  future_queue.join()
94
116
  break
@@ -116,9 +138,6 @@ def execute_tasks_h5(
116
138
  cache_directory, task_key + "_o.h5"
117
139
  ) not in get_cache_files(cache_directory=cache_directory):
118
140
  file_name = os.path.join(cache_directory, task_key + "_i.h5")
119
- if os.path.exists(file_name):
120
- os.remove(file_name)
121
- dump(file_name=file_name, data_dict=data_dict)
122
141
  if not disable_dependencies:
123
142
  task_dependent_lst = [
124
143
  process_dict[k] for k in future_wait_key_lst
@@ -137,6 +156,7 @@ def execute_tasks_h5(
137
156
  cores=task_resource_dict["cores"],
138
157
  ),
139
158
  file_name=file_name,
159
+ data_dict=data_dict,
140
160
  task_dependent_lst=task_dependent_lst,
141
161
  resource_dict=task_resource_dict,
142
162
  config_directory=pysqa_config_directory,
@@ -1,33 +1,37 @@
1
+ import os
1
2
  import subprocess
2
3
  import time
3
4
  from typing import Optional
4
5
 
5
6
  from executorlib.standalone.inputcheck import check_file_exists
7
+ from executorlib.task_scheduler.file.hdf import dump
6
8
 
7
9
 
8
10
  def execute_in_subprocess(
9
11
  command: list,
12
+ file_name: str,
13
+ data_dict: dict,
14
+ cache_directory: Optional[str] = None,
10
15
  task_dependent_lst: Optional[list] = None,
11
- file_name: Optional[str] = None,
12
16
  resource_dict: Optional[dict] = None,
13
17
  config_directory: Optional[str] = None,
14
18
  backend: Optional[str] = None,
15
- cache_directory: Optional[str] = None,
16
19
  ) -> subprocess.Popen:
17
20
  """
18
21
  Execute a command in a subprocess.
19
22
 
20
23
  Args:
21
24
  command (list): The command to be executed.
22
- task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to [].
23
25
  file_name (str): Name of the HDF5 file which contains the Python function
26
+ data_dict (dict): dictionary containing the python function to be executed {"fn": ..., "args": (), "kwargs": {}}
27
+ cache_directory (str): The directory to store the HDF5 files.
28
+ task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to [].
24
29
  resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function.
25
30
  Example resource dictionary: {
26
31
  cwd: None,
27
32
  }
28
33
  config_directory (str, optional): path to the config directory.
29
34
  backend (str, optional): name of the backend used to spawn tasks.
30
- cache_directory (str): The directory to store the HDF5 files.
31
35
 
32
36
  Returns:
33
37
  subprocess.Popen: The subprocess object.
@@ -35,6 +39,9 @@ def execute_in_subprocess(
35
39
  """
36
40
  if task_dependent_lst is None:
37
41
  task_dependent_lst = []
42
+ if os.path.exists(file_name):
43
+ os.remove(file_name)
44
+ dump(file_name=file_name, data_dict=data_dict)
38
45
  check_file_exists(file_name=file_name)
39
46
  while len(task_dependent_lst) > 0:
40
47
  task_dependent_lst = [
@@ -17,10 +17,14 @@ from executorlib.task_scheduler.file.subprocess_spawner import (
17
17
  )
18
18
 
19
19
  try:
20
- from executorlib.task_scheduler.file.queue_spawner import execute_with_pysqa
20
+ from executorlib.task_scheduler.file.queue_spawner import (
21
+ execute_with_pysqa,
22
+ terminate_with_pysqa,
23
+ )
21
24
  except ImportError:
22
25
  # If pysqa is not available fall back to executing tasks in a subprocess
23
26
  execute_with_pysqa = execute_in_subprocess # type: ignore
27
+ terminate_with_pysqa = None # type: ignore
24
28
 
25
29
 
26
30
  class FileTaskScheduler(TaskSchedulerBase):
@@ -58,8 +62,6 @@ class FileTaskScheduler(TaskSchedulerBase):
58
62
  resource_dict.update(
59
63
  {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
60
64
  )
61
- if execute_function == execute_in_subprocess and terminate_function is None:
62
- terminate_function = terminate_subprocess
63
65
  self._process_kwargs = {
64
66
  "resource_dict": resource_dict,
65
67
  "future_queue": self._future_queue,
@@ -80,7 +82,7 @@ class FileTaskScheduler(TaskSchedulerBase):
80
82
  def create_file_executor(
81
83
  resource_dict: dict,
82
84
  max_workers: Optional[int] = None,
83
- backend: str = "flux_submission",
85
+ backend: Optional[str] = None,
84
86
  max_cores: Optional[int] = None,
85
87
  cache_directory: Optional[str] = None,
86
88
  flux_executor=None,
@@ -92,6 +94,7 @@ def create_file_executor(
92
94
  block_allocation: bool = False,
93
95
  init_function: Optional[Callable] = None,
94
96
  disable_dependencies: bool = False,
97
+ execute_function: Callable = execute_with_pysqa,
95
98
  ):
96
99
  if block_allocation:
97
100
  raise ValueError(
@@ -109,9 +112,15 @@ def create_file_executor(
109
112
  check_executor(executor=flux_executor)
110
113
  check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
111
114
  check_flux_log_files(flux_log_files=flux_log_files)
115
+ if execute_function != execute_in_subprocess:
116
+ terminate_function = terminate_with_pysqa # type: ignore
117
+ else:
118
+ terminate_function = terminate_subprocess # type: ignore
112
119
  return FileTaskScheduler(
113
120
  resource_dict=resource_dict,
114
121
  pysqa_config_directory=pysqa_config_directory,
115
- backend=backend.split("_submission")[0],
122
+ backend=backend,
116
123
  disable_dependencies=disable_dependencies,
124
+ execute_function=execute_function,
125
+ terminate_function=terminate_function,
117
126
  )
File without changes
File without changes
File without changes
File without changes