executorlib 1.6.2__tar.gz → 1.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {executorlib-1.6.2 → executorlib-1.7.0}/PKG-INFO +1 -1
  2. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/__init__.py +1 -1
  3. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/_version.py +2 -2
  4. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/backend/interactive_parallel.py +17 -1
  5. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/backend/interactive_serial.py +15 -1
  6. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/executor/flux.py +42 -21
  7. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/executor/single.py +2 -1
  8. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/executor/slurm.py +43 -21
  9. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/command.py +50 -0
  10. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/interactive/communication.py +63 -14
  11. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/interactive/spawner.py +14 -3
  12. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/task_scheduler/file/shared.py +1 -1
  13. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/task_scheduler/file/task_scheduler.py +2 -2
  14. executorlib-1.7.0/executorlib/task_scheduler/interactive/blockallocation.py +313 -0
  15. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/task_scheduler/interactive/onetoone.py +93 -35
  16. executorlib-1.7.0/executorlib/task_scheduler/interactive/shared.py +159 -0
  17. executorlib-1.6.2/executorlib/task_scheduler/interactive/fluxspawner.py → executorlib-1.7.0/executorlib/task_scheduler/interactive/spawner_flux.py +9 -2
  18. executorlib-1.7.0/executorlib/task_scheduler/interactive/spawner_pysqa.py +248 -0
  19. executorlib-1.6.2/executorlib/task_scheduler/interactive/slurmspawner.py → executorlib-1.7.0/executorlib/task_scheduler/interactive/spawner_slurm.py +1 -1
  20. executorlib-1.6.2/executorlib/standalone/slurm_command.py +0 -51
  21. executorlib-1.6.2/executorlib/task_scheduler/interactive/blockallocation.py +0 -177
  22. executorlib-1.6.2/executorlib/task_scheduler/interactive/shared.py +0 -172
  23. {executorlib-1.6.2 → executorlib-1.7.0}/.gitignore +0 -0
  24. {executorlib-1.6.2 → executorlib-1.7.0}/LICENSE +0 -0
  25. {executorlib-1.6.2 → executorlib-1.7.0}/README.md +0 -0
  26. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/api.py +0 -0
  27. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/backend/__init__.py +0 -0
  28. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/backend/cache_parallel.py +0 -0
  29. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/backend/cache_serial.py +0 -0
  30. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/executor/__init__.py +0 -0
  31. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/executor/base.py +0 -0
  32. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/__init__.py +0 -0
  33. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/batched.py +0 -0
  34. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/error.py +0 -0
  35. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/hdf.py +0 -0
  36. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/inputcheck.py +0 -0
  37. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/interactive/__init__.py +0 -0
  38. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/interactive/arguments.py +0 -0
  39. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/interactive/backend.py +0 -0
  40. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/plot.py +0 -0
  41. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/queue.py +0 -0
  42. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/scheduler.py +0 -0
  43. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/standalone/serialize.py +0 -0
  44. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/task_scheduler/__init__.py +0 -0
  45. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/task_scheduler/base.py +0 -0
  46. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/task_scheduler/file/__init__.py +0 -0
  47. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/task_scheduler/file/backend.py +0 -0
  48. /executorlib-1.6.2/executorlib/task_scheduler/file/queue_spawner.py → /executorlib-1.7.0/executorlib/task_scheduler/file/spawner_pysqa.py +0 -0
  49. /executorlib-1.6.2/executorlib/task_scheduler/file/subprocess_spawner.py → /executorlib-1.7.0/executorlib/task_scheduler/file/spawner_subprocess.py +0 -0
  50. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/task_scheduler/interactive/__init__.py +0 -0
  51. {executorlib-1.6.2 → executorlib-1.7.0}/executorlib/task_scheduler/interactive/dependency.py +0 -0
  52. {executorlib-1.6.2 → executorlib-1.7.0}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: executorlib
3
- Version: 1.6.2
3
+ Version: 1.7.0
4
4
  Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
5
5
  Project-URL: Homepage, https://github.com/pyiron/executorlib
6
6
  Project-URL: Documentation, https://executorlib.readthedocs.io
@@ -55,7 +55,7 @@ def terminate_tasks_in_cache(
55
55
  config_directory (str, optional): path to the config directory.
56
56
  backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
57
57
  """
58
- from executorlib.task_scheduler.file.queue_spawner import terminate_tasks_in_cache
58
+ from executorlib.task_scheduler.file.spawner_pysqa import terminate_tasks_in_cache
59
59
 
60
60
  return terminate_tasks_in_cache(
61
61
  cache_directory=cache_directory,
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '1.6.2'
32
- __version_tuple__ = version_tuple = (1, 6, 2)
31
+ __version__ = version = '1.7.0'
32
+ __version_tuple__ = version_tuple = (1, 7, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -97,7 +97,23 @@ def main() -> None:
97
97
  and "args" in input_dict
98
98
  and "kwargs" in input_dict
99
99
  ):
100
- memory.update(call_funct(input_dict=input_dict, funct=None, memory=memory))
100
+ try:
101
+ memory.update(
102
+ call_funct(input_dict=input_dict, funct=None, memory=memory)
103
+ )
104
+ except Exception as error:
105
+ if mpi_rank_zero:
106
+ interface_send(
107
+ socket=socket,
108
+ result_dict={"error": error},
109
+ )
110
+ backend_write_error_file(
111
+ error=error,
112
+ apply_dict=input_dict,
113
+ )
114
+ else:
115
+ if mpi_rank_zero:
116
+ interface_send(socket=socket, result_dict={"result": True})
101
117
 
102
118
 
103
119
  if __name__ == "__main__":
@@ -72,7 +72,21 @@ def main(argument_lst: Optional[list[str]] = None):
72
72
  and "args" in input_dict
73
73
  and "kwargs" in input_dict
74
74
  ):
75
- memory.update(call_funct(input_dict=input_dict, funct=None, memory=memory))
75
+ try:
76
+ memory.update(
77
+ call_funct(input_dict=input_dict, funct=None, memory=memory)
78
+ )
79
+ except Exception as error:
80
+ interface_send(
81
+ socket=socket,
82
+ result_dict={"error": error},
83
+ )
84
+ backend_write_error_file(
85
+ error=error,
86
+ apply_dict=input_dict,
87
+ )
88
+ else:
89
+ interface_send(socket=socket, result_dict={"result": True})
76
90
 
77
91
 
78
92
  if __name__ == "__main__":
@@ -43,6 +43,7 @@ class FluxJobExecutor(BaseExecutor):
43
43
  compute notes. Defaults to False.
44
44
  - error_log_file (str): Name of the error log file to use for storing exceptions raised
45
45
  by the Python functions submitted to the Executor.
46
+ - restart_limit (int): The maximum number of restarting worker processes. Default: 0
46
47
  pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
47
48
  flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
48
49
  flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
@@ -357,28 +358,48 @@ class FluxClusterExecutor(BaseExecutor):
357
358
  if not plot_dependency_graph:
358
359
  import pysqa # noqa
359
360
 
360
- from executorlib.task_scheduler.file.task_scheduler import (
361
- create_file_executor,
362
- )
361
+ if block_allocation:
362
+ from executorlib.task_scheduler.interactive.spawner_pysqa import (
363
+ create_pysqa_block_allocation_scheduler,
364
+ )
363
365
 
364
- super().__init__(
365
- executor=create_file_executor(
366
- max_workers=max_workers,
367
- backend="flux",
368
- max_cores=max_cores,
369
- cache_directory=cache_directory,
370
- resource_dict=resource_dict,
371
- flux_executor=None,
372
- pmi_mode=pmi_mode,
373
- flux_executor_nesting=False,
374
- flux_log_files=False,
375
- pysqa_config_directory=pysqa_config_directory,
376
- hostname_localhost=hostname_localhost,
377
- block_allocation=block_allocation,
378
- init_function=init_function,
379
- disable_dependencies=disable_dependencies,
366
+ super().__init__(
367
+ executor=create_pysqa_block_allocation_scheduler(
368
+ max_cores=max_cores,
369
+ cache_directory=cache_directory,
370
+ hostname_localhost=hostname_localhost,
371
+ log_obj_size=log_obj_size,
372
+ pmi_mode=pmi_mode,
373
+ init_function=init_function,
374
+ max_workers=max_workers,
375
+ resource_dict=resource_dict,
376
+ pysqa_config_directory=pysqa_config_directory,
377
+ backend="flux",
378
+ )
379
+ )
380
+ else:
381
+ from executorlib.task_scheduler.file.task_scheduler import (
382
+ create_file_executor,
383
+ )
384
+
385
+ super().__init__(
386
+ executor=create_file_executor(
387
+ max_workers=max_workers,
388
+ backend="flux",
389
+ max_cores=max_cores,
390
+ cache_directory=cache_directory,
391
+ resource_dict=resource_dict,
392
+ flux_executor=None,
393
+ pmi_mode=pmi_mode,
394
+ flux_executor_nesting=False,
395
+ flux_log_files=False,
396
+ pysqa_config_directory=pysqa_config_directory,
397
+ hostname_localhost=hostname_localhost,
398
+ block_allocation=block_allocation,
399
+ init_function=init_function,
400
+ disable_dependencies=disable_dependencies,
401
+ )
380
402
  )
381
- )
382
403
  else:
383
404
  super().__init__(
384
405
  executor=DependencyTaskScheduler(
@@ -458,7 +479,7 @@ def create_flux_executor(
458
479
  Returns:
459
480
  InteractiveStepExecutor/ InteractiveExecutor
460
481
  """
461
- from executorlib.task_scheduler.interactive.fluxspawner import (
482
+ from executorlib.task_scheduler.interactive.spawner_flux import (
462
483
  FluxPythonSpawner,
463
484
  validate_max_workers,
464
485
  )
@@ -120,6 +120,7 @@ class SingleNodeExecutor(BaseExecutor):
120
120
  only)
121
121
  - error_log_file (str): Name of the error log file to use for storing exceptions
122
122
  raised by the Python functions submitted to the Executor.
123
+ - restart_limit (int): The maximum number of restarting worker processes. Default: 0
123
124
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
124
125
  context of an HPC cluster this essential to be able to communicate to an
125
126
  Executor running on a different compute node within the same allocation. And
@@ -314,7 +315,7 @@ class TestClusterExecutor(BaseExecutor):
314
315
  {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
315
316
  )
316
317
  if not plot_dependency_graph:
317
- from executorlib.task_scheduler.file.subprocess_spawner import (
318
+ from executorlib.task_scheduler.file.spawner_subprocess import (
318
319
  execute_in_subprocess,
319
320
  )
320
321
  from executorlib.task_scheduler.file.task_scheduler import (
@@ -13,7 +13,7 @@ from executorlib.task_scheduler.interactive.blockallocation import (
13
13
  )
14
14
  from executorlib.task_scheduler.interactive.dependency import DependencyTaskScheduler
15
15
  from executorlib.task_scheduler.interactive.onetoone import OneProcessTaskScheduler
16
- from executorlib.task_scheduler.interactive.slurmspawner import (
16
+ from executorlib.task_scheduler.interactive.spawner_slurm import (
17
17
  SrunSpawner,
18
18
  validate_max_workers,
19
19
  )
@@ -43,6 +43,7 @@ class SlurmClusterExecutor(BaseExecutor):
43
43
  - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only)
44
44
  - error_log_file (str): Name of the error log file to use for storing exceptions raised
45
45
  by the Python functions submitted to the Executor.
46
+ - restart_limit (int): The maximum number of restarting worker processes. Default: 0
46
47
  pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
47
48
  pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
48
49
  hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
@@ -165,28 +166,49 @@ class SlurmClusterExecutor(BaseExecutor):
165
166
  if not plot_dependency_graph:
166
167
  import pysqa # noqa
167
168
 
168
- from executorlib.task_scheduler.file.task_scheduler import (
169
- create_file_executor,
170
- )
169
+ if block_allocation:
170
+ from executorlib.task_scheduler.interactive.spawner_pysqa import (
171
+ create_pysqa_block_allocation_scheduler,
172
+ )
171
173
 
172
- super().__init__(
173
- executor=create_file_executor(
174
- max_workers=max_workers,
175
- backend="slurm",
176
- max_cores=max_cores,
177
- cache_directory=cache_directory,
178
- resource_dict=resource_dict,
179
- pmi_mode=pmi_mode,
180
- flux_executor=None,
181
- flux_executor_nesting=False,
182
- flux_log_files=False,
183
- pysqa_config_directory=pysqa_config_directory,
184
- hostname_localhost=hostname_localhost,
185
- block_allocation=block_allocation,
186
- init_function=init_function,
187
- disable_dependencies=disable_dependencies,
174
+ super().__init__(
175
+ executor=create_pysqa_block_allocation_scheduler(
176
+ max_cores=max_cores,
177
+ cache_directory=cache_directory,
178
+ hostname_localhost=hostname_localhost,
179
+ log_obj_size=log_obj_size,
180
+ pmi_mode=pmi_mode,
181
+ init_function=init_function,
182
+ max_workers=max_workers,
183
+ resource_dict=resource_dict,
184
+ pysqa_config_directory=pysqa_config_directory,
185
+ backend="slurm",
186
+ ),
187
+ )
188
+
189
+ else:
190
+ from executorlib.task_scheduler.file.task_scheduler import (
191
+ create_file_executor,
192
+ )
193
+
194
+ super().__init__(
195
+ executor=create_file_executor(
196
+ max_workers=max_workers,
197
+ backend="slurm",
198
+ max_cores=max_cores,
199
+ cache_directory=cache_directory,
200
+ resource_dict=resource_dict,
201
+ pmi_mode=pmi_mode,
202
+ flux_executor=None,
203
+ flux_executor_nesting=False,
204
+ flux_log_files=False,
205
+ pysqa_config_directory=pysqa_config_directory,
206
+ hostname_localhost=hostname_localhost,
207
+ block_allocation=block_allocation,
208
+ init_function=init_function,
209
+ disable_dependencies=disable_dependencies,
210
+ )
188
211
  )
189
- )
190
212
  else:
191
213
  super().__init__(
192
214
  executor=DependencyTaskScheduler(
@@ -3,6 +3,8 @@ import os
3
3
  import sys
4
4
  from typing import Optional
5
5
 
6
+ SLURM_COMMAND = "srun"
7
+
6
8
 
7
9
  def get_command_path(executable: str) -> str:
8
10
  """
@@ -112,3 +114,51 @@ def get_interactive_execute_command(
112
114
  else:
113
115
  command_lst += [get_command_path(executable="interactive_serial.py")]
114
116
  return command_lst
117
+
118
+
119
+ def generate_slurm_command(
120
+ cores: int,
121
+ cwd: Optional[str],
122
+ threads_per_core: int = 1,
123
+ gpus_per_core: int = 0,
124
+ num_nodes: Optional[int] = None,
125
+ exclusive: bool = False,
126
+ openmpi_oversubscribe: bool = False,
127
+ slurm_cmd_args: Optional[list[str]] = None,
128
+ pmi_mode: Optional[str] = None,
129
+ ) -> list[str]:
130
+ """
131
+ Generate the command list for the SLURM interface.
132
+
133
+ Args:
134
+ cores (int): The number of cores.
135
+ cwd (str): The current working directory.
136
+ threads_per_core (int, optional): The number of threads per core. Defaults to 1.
137
+ gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0.
138
+ num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
139
+ exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
140
+ openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
141
+ slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
142
+ pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
143
+
144
+ Returns:
145
+ list[str]: The generated command list.
146
+ """
147
+ command_prepend_lst = [SLURM_COMMAND, "-n", str(cores)]
148
+ if cwd is not None:
149
+ command_prepend_lst += ["-D", cwd]
150
+ if pmi_mode is not None:
151
+ command_prepend_lst += ["--mpi=" + pmi_mode]
152
+ if num_nodes is not None:
153
+ command_prepend_lst += ["-N", str(num_nodes)]
154
+ if threads_per_core > 1:
155
+ command_prepend_lst += ["--cpus-per-task=" + str(threads_per_core)]
156
+ if gpus_per_core > 0:
157
+ command_prepend_lst += ["--gpus-per-task=" + str(gpus_per_core)]
158
+ if exclusive:
159
+ command_prepend_lst += ["--exact"]
160
+ if openmpi_oversubscribe:
161
+ command_prepend_lst += ["--oversubscribe"]
162
+ if slurm_cmd_args is not None and len(slurm_cmd_args) > 0:
163
+ command_prepend_lst += slurm_cmd_args
164
+ return command_prepend_lst
@@ -1,12 +1,16 @@
1
1
  import logging
2
2
  import sys
3
3
  from socket import gethostname
4
- from typing import Optional
4
+ from typing import Any, Callable, Optional
5
5
 
6
6
  import cloudpickle
7
7
  import zmq
8
8
 
9
9
 
10
+ class ExecutorlibSocketError(RuntimeError):
11
+ pass
12
+
13
+
10
14
  class SocketInterface:
11
15
  """
12
16
  The SocketInterface is an abstraction layer on top of the zero message queue.
@@ -14,23 +18,41 @@ class SocketInterface:
14
18
  Args:
15
19
  spawner (executorlib.shared.spawner.BaseSpawner): Interface for starting the parallel process
16
20
  log_obj_size (boolean): Enable debug mode which reports the size of the communicated objects.
21
+ time_out_ms (int): Time out for waiting for a message on socket in milliseconds.
17
22
  """
18
23
 
19
- def __init__(self, spawner=None, log_obj_size=False):
24
+ def __init__(
25
+ self, spawner=None, log_obj_size: bool = False, time_out_ms: int = 1000
26
+ ):
20
27
  """
21
28
  Initialize the SocketInterface.
22
29
 
23
30
  Args:
24
31
  spawner (executorlib.shared.spawner.BaseSpawner): Interface for starting the parallel process
32
+ log_obj_size (boolean): Enable debug mode which reports the size of the communicated objects.
33
+ time_out_ms (int): Time out for waiting for a message on socket in milliseconds.
25
34
  """
26
35
  self._context = zmq.Context()
27
36
  self._socket = self._context.socket(zmq.PAIR)
37
+ self._poller = zmq.Poller()
38
+ self._poller.register(self._socket, zmq.POLLIN)
28
39
  self._process = None
40
+ self._time_out_ms = time_out_ms
41
+ self._logger: Optional[logging.Logger] = None
29
42
  if log_obj_size:
30
43
  self._logger = logging.getLogger("executorlib")
31
- else:
32
- self._logger = None
33
44
  self._spawner = spawner
45
+ self._command_lst: list[str] = []
46
+ self._booted_sucessfully: bool = False
47
+ self._stop_function: Optional[Callable] = None
48
+
49
+ @property
50
+ def status(self) -> bool:
51
+ return self._booted_sucessfully
52
+
53
+ @status.setter
54
+ def status(self, status: bool):
55
+ self._booted_sucessfully = status
34
56
 
35
57
  def send_dict(self, input_dict: dict):
36
58
  """
@@ -52,7 +74,14 @@ class SocketInterface:
52
74
  Returns:
53
75
  dict: dictionary with response received from the connected client
54
76
  """
55
- data = self._socket.recv()
77
+ response_lst: list[tuple[Any, int]] = []
78
+ while len(response_lst) == 0:
79
+ response_lst = self._poller.poll(self._time_out_ms)
80
+ if not self._spawner.poll():
81
+ raise ExecutorlibSocketError(
82
+ "SocketInterface crashed during execution."
83
+ )
84
+ data = self._socket.recv(zmq.NOBLOCK)
56
85
  if self._logger is not None:
57
86
  self._logger.warning(
58
87
  "Received dictionary of size: " + str(sys.getsizeof(data))
@@ -69,7 +98,7 @@ class SocketInterface:
69
98
 
70
99
  Args:
71
100
  input_dict (dict): dictionary of commands to be communicated. The key "shutdown" is reserved to stop the
72
- connected client from listening.
101
+ connected client from listening.
73
102
 
74
103
  Returns:
75
104
  dict: dictionary with response received from the connected client
@@ -89,17 +118,30 @@ class SocketInterface:
89
118
 
90
119
  def bootup(
91
120
  self,
92
- command_lst: list[str],
121
+ command_lst: Optional[list[str]] = None,
122
+ stop_function: Optional[Callable] = None,
93
123
  ):
94
124
  """
95
125
  Boot up the client process to connect to the SocketInterface.
96
126
 
97
127
  Args:
98
128
  command_lst (list): list of strings to start the client process
129
+ stop_function (Callable): Function to stop the interface.
99
130
  """
100
- self._spawner.bootup(
101
- command_lst=command_lst,
102
- )
131
+ if command_lst is not None:
132
+ self._command_lst = command_lst
133
+ if stop_function is not None:
134
+ self._stop_function = stop_function
135
+ if len(self._command_lst) == 0:
136
+ raise ValueError("No command defined to boot up SocketInterface.")
137
+ if not self._spawner.bootup(
138
+ command_lst=self._command_lst,
139
+ stop_function=self._stop_function,
140
+ ):
141
+ self._reset_socket()
142
+ self._booted_sucessfully = False
143
+ else:
144
+ self._booted_sucessfully = True
103
145
 
104
146
  def shutdown(self, wait: bool = True):
105
147
  """
@@ -114,6 +156,13 @@ class SocketInterface:
114
156
  input_dict={"shutdown": True, "wait": wait}
115
157
  )
116
158
  self._spawner.shutdown(wait=wait)
159
+ self._reset_socket()
160
+ return result
161
+
162
+ def _reset_socket(self):
163
+ """
164
+ Reset the socket and context of the SocketInterface instance.
165
+ """
117
166
  if self._socket is not None:
118
167
  self._socket.close()
119
168
  if self._context is not None:
@@ -121,7 +170,6 @@ class SocketInterface:
121
170
  self._process = None
122
171
  self._socket = None
123
172
  self._context = None
124
- return result
125
173
 
126
174
  def __del__(self):
127
175
  """
@@ -137,6 +185,7 @@ def interface_bootup(
137
185
  hostname_localhost: Optional[bool] = None,
138
186
  log_obj_size: bool = False,
139
187
  worker_id: Optional[int] = None,
188
+ stop_function: Optional[Callable] = None,
140
189
  ) -> SocketInterface:
141
190
  """
142
191
  Start interface for ZMQ communication
@@ -155,13 +204,12 @@ def interface_bootup(
155
204
  log_obj_size (boolean): Enable debug mode which reports the size of the communicated objects.
156
205
  worker_id (int): Communicate the worker which ID was assigned to it for future reference and resource
157
206
  distribution.
207
+ stop_function (Callable): Function to stop the interface.
158
208
 
159
209
  Returns:
160
210
  executorlib.shared.communication.SocketInterface: socket interface for zmq communication
161
211
  """
162
- if hostname_localhost is None and sys.platform == "darwin":
163
- hostname_localhost = True
164
- elif hostname_localhost is None:
212
+ if hostname_localhost is None and sys.platform != "darwin":
165
213
  hostname_localhost = False
166
214
  if not hostname_localhost:
167
215
  command_lst += [
@@ -180,6 +228,7 @@ def interface_bootup(
180
228
  ]
181
229
  interface.bootup(
182
230
  command_lst=command_lst,
231
+ stop_function=stop_function,
183
232
  )
184
233
  return interface
185
234
 
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import subprocess
3
3
  from abc import ABC, abstractmethod
4
- from typing import Optional
4
+ from typing import Callable, Optional
5
5
 
6
6
  MPI_COMMAND = "mpiexec"
7
7
 
@@ -29,12 +29,17 @@ class BaseSpawner(ABC):
29
29
  def bootup(
30
30
  self,
31
31
  command_lst: list[str],
32
- ):
32
+ stop_function: Optional[Callable] = None,
33
+ ) -> bool:
33
34
  """
34
35
  Method to start the interface.
35
36
 
36
37
  Args:
37
38
  command_lst (list[str]): The command list to execute.
39
+ stop_function (Callable): Function to stop the interface.
40
+
41
+ Returns:
42
+ bool: Whether the interface was successfully started.
38
43
  """
39
44
  raise NotImplementedError
40
45
 
@@ -87,12 +92,17 @@ class SubprocessSpawner(BaseSpawner):
87
92
  def bootup(
88
93
  self,
89
94
  command_lst: list[str],
90
- ):
95
+ stop_function: Optional[Callable] = None,
96
+ ) -> bool:
91
97
  """
92
98
  Method to start the subprocess interface.
93
99
 
94
100
  Args:
95
101
  command_lst (list[str]): The command list to execute.
102
+ stop_function (Callable): Function to stop the interface.
103
+
104
+ Returns:
105
+ bool: Whether the interface was successfully started.
96
106
  """
97
107
  if self._cwd is not None:
98
108
  os.makedirs(self._cwd, exist_ok=True)
@@ -101,6 +111,7 @@ class SubprocessSpawner(BaseSpawner):
101
111
  cwd=self._cwd,
102
112
  stdin=subprocess.DEVNULL,
103
113
  )
114
+ return self.poll()
104
115
 
105
116
  def generate_command(self, command_lst: list[str]) -> list[str]:
106
117
  """
@@ -7,7 +7,7 @@ from typing import Any, Callable, Optional
7
7
  from executorlib.standalone.command import get_cache_execute_command
8
8
  from executorlib.standalone.hdf import get_cache_files, get_output
9
9
  from executorlib.standalone.serialize import serialize_funct
10
- from executorlib.task_scheduler.file.subprocess_spawner import terminate_subprocess
10
+ from executorlib.task_scheduler.file.spawner_subprocess import terminate_subprocess
11
11
 
12
12
 
13
13
  class FutureItem:
@@ -11,14 +11,14 @@ from executorlib.standalone.inputcheck import (
11
11
  )
12
12
  from executorlib.task_scheduler.base import TaskSchedulerBase
13
13
  from executorlib.task_scheduler.file.shared import execute_tasks_h5
14
- from executorlib.task_scheduler.file.subprocess_spawner import (
14
+ from executorlib.task_scheduler.file.spawner_subprocess import (
15
15
  execute_in_subprocess,
16
16
  terminate_subprocess,
17
17
  )
18
18
 
19
19
  try:
20
20
  from executorlib.standalone.scheduler import terminate_with_pysqa
21
- from executorlib.task_scheduler.file.queue_spawner import execute_with_pysqa
21
+ from executorlib.task_scheduler.file.spawner_pysqa import execute_with_pysqa
22
22
  except ImportError:
23
23
  # If pysqa is not available fall back to executing tasks in a subprocess
24
24
  execute_with_pysqa = execute_in_subprocess # type: ignore