executorlib 1.7.4__tar.gz → 1.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {executorlib-1.7.4 → executorlib-1.8.1}/PKG-INFO +3 -3
  2. {executorlib-1.7.4 → executorlib-1.8.1}/pyproject.toml +2 -2
  3. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/_version.py +2 -2
  4. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/flux.py +13 -0
  5. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/single.py +13 -0
  6. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/slurm.py +13 -0
  7. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/hdf.py +7 -0
  8. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/inputcheck.py +12 -0
  9. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/spawner.py +19 -0
  10. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/base.py +3 -1
  11. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/shared.py +105 -30
  12. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/spawner_pysqa.py +4 -0
  13. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/spawner_subprocess.py +18 -3
  14. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/task_scheduler.py +5 -0
  15. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/blockallocation.py +2 -2
  16. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/dependency_plot.py +35 -2
  17. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/spawner_flux.py +13 -5
  18. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/spawner_pysqa.py +8 -1
  19. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/spawner_slurm.py +3 -0
  20. {executorlib-1.7.4 → executorlib-1.8.1}/.gitignore +0 -0
  21. {executorlib-1.7.4 → executorlib-1.8.1}/LICENSE +0 -0
  22. {executorlib-1.7.4 → executorlib-1.8.1}/README.md +0 -0
  23. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/__init__.py +0 -0
  24. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/api.py +0 -0
  25. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/__init__.py +0 -0
  26. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/cache_parallel.py +0 -0
  27. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/cache_serial.py +0 -0
  28. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/interactive_parallel.py +0 -0
  29. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/backend/interactive_serial.py +0 -0
  30. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/__init__.py +0 -0
  31. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/executor/base.py +0 -0
  32. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/__init__.py +0 -0
  33. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/batched.py +0 -0
  34. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/command.py +0 -0
  35. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/error.py +0 -0
  36. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/__init__.py +0 -0
  37. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/arguments.py +0 -0
  38. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/backend.py +0 -0
  39. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/interactive/communication.py +0 -0
  40. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/queue.py +0 -0
  41. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/scheduler.py +0 -0
  42. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/select.py +0 -0
  43. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/standalone/serialize.py +0 -0
  44. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/__init__.py +0 -0
  45. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/__init__.py +0 -0
  46. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/file/backend.py +0 -0
  47. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/__init__.py +0 -0
  48. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/dependency.py +0 -0
  49. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/onetoone.py +0 -0
  50. {executorlib-1.7.4 → executorlib-1.8.1}/src/executorlib/task_scheduler/interactive/shared.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: executorlib
3
- Version: 1.7.4
3
+ Version: 1.8.1
4
4
  Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
5
5
  Project-URL: Homepage, https://github.com/pyiron/executorlib
6
6
  Project-URL: Documentation, https://executorlib.readthedocs.io
@@ -55,12 +55,12 @@ Requires-Dist: ipython<=9.9.0,>=7.33.0; extra == 'all'
55
55
  Requires-Dist: mpi4py<=4.1.1,>=3.1.4; extra == 'all'
56
56
  Requires-Dist: networkx<=3.6.1,>=2.8.8; extra == 'all'
57
57
  Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'all'
58
- Requires-Dist: pysqa==0.3.3; extra == 'all'
58
+ Requires-Dist: pysqa==0.3.4; extra == 'all'
59
59
  Provides-Extra: cache
60
60
  Requires-Dist: h5py<=3.15.1,>=3.6.0; extra == 'cache'
61
61
  Provides-Extra: cluster
62
62
  Requires-Dist: h5py<=3.15.1,>=3.6.0; extra == 'cluster'
63
- Requires-Dist: pysqa==0.3.3; extra == 'cluster'
63
+ Requires-Dist: pysqa==0.3.4; extra == 'cluster'
64
64
  Provides-Extra: graph
65
65
  Requires-Dist: networkx<=3.6.1,>=2.8.8; extra == 'graph'
66
66
  Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'graph'
@@ -52,12 +52,12 @@ graphnotebook = [
52
52
  ]
53
53
  mpi = ["mpi4py>=3.1.4,<=4.1.1"]
54
54
  cluster = [
55
- "pysqa==0.3.3",
55
+ "pysqa==0.3.4",
56
56
  "h5py>=3.6.0,<=3.15.1",
57
57
  ]
58
58
  all = [
59
59
  "mpi4py>=3.1.4,<=4.1.1",
60
- "pysqa==0.3.3",
60
+ "pysqa==0.3.4",
61
61
  "h5py>=3.6.0,<=3.15.1",
62
62
  "pygraphviz>=1.10,<=1.14",
63
63
  "networkx>=2.8.8,<=3.6.1",
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '1.7.4'
32
- __version_tuple__ = version_tuple = (1, 7, 4)
31
+ __version__ = version = '1.8.1'
32
+ __version_tuple__ = version_tuple = (1, 8, 1)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -9,6 +9,7 @@ from executorlib.standalone.inputcheck import (
9
9
  check_plot_dependency_graph,
10
10
  check_pmi,
11
11
  check_refresh_rate,
12
+ check_wait_on_shutdown,
12
13
  validate_number_of_cores,
13
14
  )
14
15
  from executorlib.task_scheduler.interactive.blockallocation import (
@@ -67,6 +68,7 @@ class FluxJobExecutor(BaseExecutor):
67
68
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
68
69
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
69
70
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
71
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
70
72
 
71
73
  Examples:
72
74
  ```
@@ -108,6 +110,7 @@ class FluxJobExecutor(BaseExecutor):
108
110
  plot_dependency_graph_filename: Optional[str] = None,
109
111
  export_workflow_filename: Optional[str] = None,
110
112
  log_obj_size: bool = False,
113
+ wait: bool = True,
111
114
  ):
112
115
  """
113
116
  The executorlib.FluxJobExecutor leverages either the message passing interface (MPI), the SLURM workload manager
@@ -156,6 +159,7 @@ class FluxJobExecutor(BaseExecutor):
156
159
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
157
160
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
158
161
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
162
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
159
163
 
160
164
  """
161
165
  default_resource_dict: dict = {
@@ -187,6 +191,7 @@ class FluxJobExecutor(BaseExecutor):
187
191
  block_allocation=block_allocation,
188
192
  init_function=init_function,
189
193
  log_obj_size=log_obj_size,
194
+ wait=wait,
190
195
  ),
191
196
  max_cores=max_cores,
192
197
  refresh_rate=refresh_rate,
@@ -212,6 +217,7 @@ class FluxJobExecutor(BaseExecutor):
212
217
  block_allocation=block_allocation,
213
218
  init_function=init_function,
214
219
  log_obj_size=log_obj_size,
220
+ wait=wait,
215
221
  )
216
222
  )
217
223
 
@@ -261,6 +267,7 @@ class FluxClusterExecutor(BaseExecutor):
261
267
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
262
268
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
263
269
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
270
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
264
271
 
265
272
  Examples:
266
273
  ```
@@ -300,6 +307,7 @@ class FluxClusterExecutor(BaseExecutor):
300
307
  plot_dependency_graph_filename: Optional[str] = None,
301
308
  export_workflow_filename: Optional[str] = None,
302
309
  log_obj_size: bool = False,
310
+ wait: bool = True,
303
311
  ):
304
312
  """
305
313
  The executorlib.FluxClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -346,6 +354,7 @@ class FluxClusterExecutor(BaseExecutor):
346
354
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
347
355
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
348
356
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
357
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
349
358
 
350
359
  """
351
360
  default_resource_dict: dict = {
@@ -405,6 +414,7 @@ class FluxClusterExecutor(BaseExecutor):
405
414
  block_allocation=block_allocation,
406
415
  init_function=init_function,
407
416
  disable_dependencies=disable_dependencies,
417
+ wait=wait,
408
418
  )
409
419
  )
410
420
  else:
@@ -445,6 +455,7 @@ def create_flux_executor(
445
455
  block_allocation: bool = False,
446
456
  init_function: Optional[Callable] = None,
447
457
  log_obj_size: bool = False,
458
+ wait: bool = True,
448
459
  ) -> Union[OneProcessTaskScheduler, BlockAllocationTaskScheduler]:
449
460
  """
450
461
  Create a flux executor
@@ -483,6 +494,7 @@ def create_flux_executor(
483
494
  of the individual function.
484
495
  init_function (None): optional function to preset arguments for functions which are submitted later
485
496
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
497
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
486
498
 
487
499
  Returns:
488
500
  InteractiveStepExecutor/ InteractiveExecutor
@@ -504,6 +516,7 @@ def create_flux_executor(
504
516
  check_command_line_argument_lst(
505
517
  command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
506
518
  )
519
+ check_wait_on_shutdown(wait_on_shutdown=wait)
507
520
  if "openmpi_oversubscribe" in resource_dict:
508
521
  del resource_dict["openmpi_oversubscribe"]
509
522
  if "slurm_cmd_args" in resource_dict:
@@ -7,6 +7,7 @@ from executorlib.standalone.inputcheck import (
7
7
  check_init_function,
8
8
  check_plot_dependency_graph,
9
9
  check_refresh_rate,
10
+ check_wait_on_shutdown,
10
11
  validate_number_of_cores,
11
12
  )
12
13
  from executorlib.standalone.interactive.spawner import MpiExecSpawner
@@ -60,6 +61,7 @@ class SingleNodeExecutor(BaseExecutor):
60
61
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
61
62
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
62
63
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
64
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
63
65
 
64
66
  Examples:
65
67
  ```
@@ -97,6 +99,7 @@ class SingleNodeExecutor(BaseExecutor):
97
99
  plot_dependency_graph_filename: Optional[str] = None,
98
100
  export_workflow_filename: Optional[str] = None,
99
101
  log_obj_size: bool = False,
102
+ wait: bool = True,
100
103
  ):
101
104
  """
102
105
  The executorlib.SingleNodeExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -142,6 +145,7 @@ class SingleNodeExecutor(BaseExecutor):
142
145
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
143
146
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
144
147
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
148
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
145
149
 
146
150
  """
147
151
  default_resource_dict: dict = {
@@ -169,6 +173,7 @@ class SingleNodeExecutor(BaseExecutor):
169
173
  block_allocation=block_allocation,
170
174
  init_function=init_function,
171
175
  log_obj_size=log_obj_size,
176
+ wait=wait,
172
177
  ),
173
178
  max_cores=max_cores,
174
179
  refresh_rate=refresh_rate,
@@ -190,6 +195,7 @@ class SingleNodeExecutor(BaseExecutor):
190
195
  block_allocation=block_allocation,
191
196
  init_function=init_function,
192
197
  log_obj_size=log_obj_size,
198
+ wait=wait,
193
199
  )
194
200
  )
195
201
 
@@ -232,6 +238,7 @@ class TestClusterExecutor(BaseExecutor):
232
238
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
233
239
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
234
240
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
241
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
235
242
 
236
243
  Examples:
237
244
  ```
@@ -269,6 +276,7 @@ class TestClusterExecutor(BaseExecutor):
269
276
  plot_dependency_graph_filename: Optional[str] = None,
270
277
  export_workflow_filename: Optional[str] = None,
271
278
  log_obj_size: bool = False,
279
+ wait: bool = True,
272
280
  ):
273
281
  """
274
282
  The executorlib.api.TestClusterExecutor is designed to test the file based communication used in the
@@ -307,6 +315,7 @@ class TestClusterExecutor(BaseExecutor):
307
315
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
308
316
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
309
317
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
318
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
310
319
 
311
320
  """
312
321
  default_resource_dict: dict = {
@@ -346,6 +355,7 @@ class TestClusterExecutor(BaseExecutor):
346
355
  init_function=init_function,
347
356
  disable_dependencies=disable_dependencies,
348
357
  execute_function=execute_in_subprocess,
358
+ wait=wait,
349
359
  )
350
360
  )
351
361
  else:
@@ -379,6 +389,7 @@ def create_single_node_executor(
379
389
  block_allocation: bool = False,
380
390
  init_function: Optional[Callable] = None,
381
391
  log_obj_size: bool = False,
392
+ wait: bool = True,
382
393
  ) -> Union[OneProcessTaskScheduler, BlockAllocationTaskScheduler]:
383
394
  """
384
395
  Create a single node executor
@@ -413,6 +424,7 @@ def create_single_node_executor(
413
424
  of the individual function.
414
425
  init_function (None): optional function to preset arguments for functions which are submitted later
415
426
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
427
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
416
428
 
417
429
  Returns:
418
430
  InteractiveStepExecutor/ InteractiveExecutor
@@ -429,6 +441,7 @@ def create_single_node_executor(
429
441
  check_command_line_argument_lst(
430
442
  command_line_argument_lst=resource_dict.get("slurm_cmd_args", [])
431
443
  )
444
+ check_wait_on_shutdown(wait_on_shutdown=wait)
432
445
  if "threads_per_core" in resource_dict:
433
446
  del resource_dict["threads_per_core"]
434
447
  if "gpus_per_core" in resource_dict:
@@ -6,6 +6,7 @@ from executorlib.standalone.inputcheck import (
6
6
  check_log_obj_size,
7
7
  check_plot_dependency_graph,
8
8
  check_refresh_rate,
9
+ check_wait_on_shutdown,
9
10
  validate_number_of_cores,
10
11
  )
11
12
  from executorlib.task_scheduler.interactive.blockallocation import (
@@ -65,6 +66,7 @@ class SlurmClusterExecutor(BaseExecutor):
65
66
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
66
67
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
67
68
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
69
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
68
70
 
69
71
  Examples:
70
72
  ```
@@ -104,6 +106,7 @@ class SlurmClusterExecutor(BaseExecutor):
104
106
  plot_dependency_graph_filename: Optional[str] = None,
105
107
  export_workflow_filename: Optional[str] = None,
106
108
  log_obj_size: bool = False,
109
+ wait: bool = True,
107
110
  ):
108
111
  """
109
112
  The executorlib.SlurmClusterExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -150,6 +153,7 @@ class SlurmClusterExecutor(BaseExecutor):
150
153
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
151
154
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
152
155
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
156
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
153
157
 
154
158
  """
155
159
  default_resource_dict: dict = {
@@ -210,6 +214,7 @@ class SlurmClusterExecutor(BaseExecutor):
210
214
  block_allocation=block_allocation,
211
215
  init_function=init_function,
212
216
  disable_dependencies=disable_dependencies,
217
+ wait=wait,
213
218
  )
214
219
  )
215
220
  else:
@@ -281,6 +286,7 @@ class SlurmJobExecutor(BaseExecutor):
281
286
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
282
287
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
283
288
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
289
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
284
290
 
285
291
  Examples:
286
292
  ```
@@ -319,6 +325,7 @@ class SlurmJobExecutor(BaseExecutor):
319
325
  plot_dependency_graph_filename: Optional[str] = None,
320
326
  export_workflow_filename: Optional[str] = None,
321
327
  log_obj_size: bool = False,
328
+ wait: bool = True,
322
329
  ):
323
330
  """
324
331
  The executorlib.SlurmJobExecutor leverages either the message passing interface (MPI), the SLURM workload
@@ -368,6 +375,7 @@ class SlurmJobExecutor(BaseExecutor):
368
375
  plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
369
376
  export_workflow_filename (str): Name of the file to store the exported workflow graph in.
370
377
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
378
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
371
379
 
372
380
  """
373
381
  default_resource_dict: dict = {
@@ -396,6 +404,7 @@ class SlurmJobExecutor(BaseExecutor):
396
404
  block_allocation=block_allocation,
397
405
  init_function=init_function,
398
406
  log_obj_size=log_obj_size,
407
+ wait=wait,
399
408
  ),
400
409
  max_cores=max_cores,
401
410
  refresh_rate=refresh_rate,
@@ -418,6 +427,7 @@ class SlurmJobExecutor(BaseExecutor):
418
427
  block_allocation=block_allocation,
419
428
  init_function=init_function,
420
429
  log_obj_size=log_obj_size,
430
+ wait=wait,
421
431
  )
422
432
  )
423
433
 
@@ -432,6 +442,7 @@ def create_slurm_executor(
432
442
  block_allocation: bool = False,
433
443
  init_function: Optional[Callable] = None,
434
444
  log_obj_size: bool = False,
445
+ wait: bool = True,
435
446
  ) -> Union[OneProcessTaskScheduler, BlockAllocationTaskScheduler]:
436
447
  """
437
448
  Create a SLURM executor
@@ -471,6 +482,7 @@ def create_slurm_executor(
471
482
  of the individual function.
472
483
  init_function (None): optional function to preset arguments for functions which are submitted later
473
484
  log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
485
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
474
486
 
475
487
  Returns:
476
488
  InteractiveStepExecutor/ InteractiveExecutor
@@ -483,6 +495,7 @@ def create_slurm_executor(
483
495
  resource_dict["log_obj_size"] = log_obj_size
484
496
  resource_dict["pmi_mode"] = pmi_mode
485
497
  check_init_function(block_allocation=block_allocation, init_function=init_function)
498
+ check_wait_on_shutdown(wait_on_shutdown=wait)
486
499
  if block_allocation:
487
500
  resource_dict["init_function"] = init_function
488
501
  max_workers = validate_number_of_cores(
@@ -11,6 +11,7 @@ group_dict = {
11
11
  "kwargs": "input_kwargs",
12
12
  "output": "output",
13
13
  "error": "error",
14
+ "resource_dict": "resource_dict",
14
15
  "runtime": "runtime",
15
16
  "queue_id": "queue_id",
16
17
  "error_log_file": "error_log_file",
@@ -61,6 +62,12 @@ def load(file_name: str) -> dict:
61
62
  data_dict["kwargs"] = cloudpickle.loads(np.void(hdf["/input_kwargs"]))
62
63
  else:
63
64
  data_dict["kwargs"] = {}
65
+ if "resource_dict" in hdf:
66
+ data_dict["resource_dict"] = cloudpickle.loads(
67
+ np.void(hdf["/resource_dict"])
68
+ )
69
+ else:
70
+ data_dict["resource_dict"] = {}
64
71
  if "error_log_file" in hdf:
65
72
  data_dict["error_log_file"] = cloudpickle.loads(
66
73
  np.void(hdf["/error_log_file"])
@@ -17,6 +17,18 @@ def check_oversubscribe(oversubscribe: bool) -> None:
17
17
  )
18
18
 
19
19
 
20
+ def check_wait_on_shutdown(
21
+ wait_on_shutdown: bool,
22
+ ) -> None:
23
+ """
24
+ Check if wait_on_shutdown is False and raise a ValueError if it is.
25
+ """
26
+ if not wait_on_shutdown:
27
+ raise ValueError(
28
+ "The wait_on_shutdown parameter is only supported for the executorlib.FluxClusterExecutor and executorlib.SlurmClusterExecutor."
29
+ )
30
+
31
+
20
32
  def check_command_line_argument_lst(command_line_argument_lst: list[str]) -> None:
21
33
  """
22
34
  Check if command_line_argument_lst is not empty and raise a ValueError if it is.
@@ -11,6 +11,7 @@ class BaseSpawner(ABC):
11
11
  self,
12
12
  cwd: Optional[str] = None,
13
13
  cores: int = 1,
14
+ worker_id: int = 0,
14
15
  openmpi_oversubscribe: bool = False,
15
16
  ):
16
17
  """
@@ -20,9 +21,11 @@ class BaseSpawner(ABC):
20
21
  cwd (str): The current working directory.
21
22
  cores (int, optional): The number of cores to use. Defaults to 1.
22
23
  openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
24
+ worker_id (int): The worker ID. Defaults to 0.
23
25
  """
24
26
  self._cwd = cwd
25
27
  self._cores = cores
28
+ self._worker_id = worker_id
26
29
  self._openmpi_oversubscribe = openmpi_oversubscribe
27
30
 
28
31
  @abstractmethod
@@ -69,6 +72,7 @@ class SubprocessSpawner(BaseSpawner):
69
72
  self,
70
73
  cwd: Optional[str] = None,
71
74
  cores: int = 1,
75
+ worker_id: int = 0,
72
76
  openmpi_oversubscribe: bool = False,
73
77
  threads_per_core: int = 1,
74
78
  ):
@@ -79,11 +83,13 @@ class SubprocessSpawner(BaseSpawner):
79
83
  cwd (str, optional): The current working directory. Defaults to None.
80
84
  cores (int, optional): The number of cores to use. Defaults to 1.
81
85
  threads_per_core (int, optional): The number of threads per core. Defaults to 1.
86
+ worker_id (int): The worker ID. Defaults to 0.
82
87
  openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
83
88
  """
84
89
  super().__init__(
85
90
  cwd=cwd,
86
91
  cores=cores,
92
+ worker_id=worker_id,
87
93
  openmpi_oversubscribe=openmpi_oversubscribe,
88
94
  )
89
95
  self._process: Optional[subprocess.Popen] = None
@@ -106,6 +112,7 @@ class SubprocessSpawner(BaseSpawner):
106
112
  """
107
113
  if self._cwd is not None:
108
114
  os.makedirs(self._cwd, exist_ok=True)
115
+ set_current_directory_in_environment()
109
116
  self._process = subprocess.Popen(
110
117
  args=self.generate_command(command_lst=command_lst),
111
118
  cwd=self._cwd,
@@ -189,3 +196,15 @@ def generate_mpiexec_command(
189
196
  if openmpi_oversubscribe:
190
197
  command_prepend_lst += ["--oversubscribe"]
191
198
  return command_prepend_lst
199
+
200
+
201
+ def set_current_directory_in_environment():
202
+ """
203
+ Add the current directory to the PYTHONPATH to be able to access local Python modules.
204
+ """
205
+ environment = os.environ
206
+ current_path = os.getcwd()
207
+ if "PYTHONPATH" in environment and current_path not in environment["PYTHONPATH"]:
208
+ environment["PYTHONPATH"] = os.getcwd() + ":" + environment["PYTHONPATH"]
209
+ elif "PYTHONPATH" not in environment:
210
+ environment["PYTHONPATH"] = os.getcwd()
@@ -198,7 +198,9 @@ class TaskSchedulerBase(FutureExecutor):
198
198
  if cancel_futures and self._future_queue is not None:
199
199
  cancel_items_in_queue(que=self._future_queue)
200
200
  if self._process is not None and self._future_queue is not None:
201
- self._future_queue.put({"shutdown": True, "wait": wait})
201
+ self._future_queue.put(
202
+ {"shutdown": True, "wait": wait, "cancel_futures": cancel_futures}
203
+ )
202
204
  if wait and isinstance(self._process, Thread):
203
205
  self._process.join()
204
206
  self._future_queue.join()
@@ -57,6 +57,7 @@ def execute_tasks_h5(
57
57
  backend: Optional[str] = None,
58
58
  disable_dependencies: bool = False,
59
59
  pmi_mode: Optional[str] = None,
60
+ wait: bool = True,
60
61
  ) -> None:
61
62
  """
62
63
  Execute tasks stored in a queue using HDF5 files.
@@ -72,6 +73,7 @@ def execute_tasks_h5(
72
73
  backend (str, optional): name of the backend used to spawn tasks.
73
74
  disable_dependencies (boolean): Disable resolving future objects during the submission.
74
75
  pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
76
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
75
77
 
76
78
  Returns:
77
79
  None
@@ -86,30 +88,35 @@ def execute_tasks_h5(
86
88
  with contextlib.suppress(queue.Empty):
87
89
  task_dict = future_queue.get_nowait()
88
90
  if task_dict is not None and "shutdown" in task_dict and task_dict["shutdown"]:
89
- if task_dict["wait"]:
91
+ if task_dict["wait"] and wait:
90
92
  while len(memory_dict) > 0:
91
- memory_dict = {
92
- key: _check_task_output(
93
- task_key=key,
94
- future_obj=value,
95
- cache_directory=cache_dir_dict[key],
96
- )
97
- for key, value in memory_dict.items()
98
- if not value.done()
99
- }
100
- if (
101
- terminate_function is not None
102
- and terminate_function == terminate_subprocess
103
- ):
104
- for task in process_dict.values():
105
- terminate_function(task=task)
106
- elif terminate_function is not None:
107
- for queue_id in process_dict.values():
108
- terminate_function(
109
- queue_id=queue_id,
110
- config_directory=pysqa_config_directory,
93
+ memory_dict = _refresh_memory_dict(
94
+ memory_dict=memory_dict,
95
+ cache_dir_dict=cache_dir_dict,
96
+ process_dict=process_dict,
97
+ terminate_function=terminate_function,
98
+ pysqa_config_directory=pysqa_config_directory,
111
99
  backend=backend,
112
100
  )
101
+ if not task_dict["cancel_futures"] and wait:
102
+ _cancel_processes(
103
+ process_dict=process_dict,
104
+ terminate_function=terminate_function,
105
+ pysqa_config_directory=pysqa_config_directory,
106
+ backend=backend,
107
+ )
108
+ else:
109
+ memory_dict = _refresh_memory_dict(
110
+ memory_dict=memory_dict,
111
+ cache_dir_dict=cache_dir_dict,
112
+ process_dict=process_dict,
113
+ terminate_function=terminate_function,
114
+ pysqa_config_directory=pysqa_config_directory,
115
+ backend=backend,
116
+ )
117
+ for value in memory_dict.values():
118
+ if not value.done():
119
+ value.cancel()
113
120
  future_queue.task_done()
114
121
  future_queue.join()
115
122
  break
@@ -177,15 +184,14 @@ def execute_tasks_h5(
177
184
  cache_dir_dict[task_key] = cache_directory
178
185
  future_queue.task_done()
179
186
  else:
180
- memory_dict = {
181
- key: _check_task_output(
182
- task_key=key,
183
- future_obj=value,
184
- cache_directory=cache_dir_dict[key],
185
- )
186
- for key, value in memory_dict.items()
187
- if not value.done()
188
- }
187
+ memory_dict = _refresh_memory_dict(
188
+ memory_dict=memory_dict,
189
+ cache_dir_dict=cache_dir_dict,
190
+ process_dict=process_dict,
191
+ terminate_function=terminate_function,
192
+ pysqa_config_directory=pysqa_config_directory,
193
+ backend=backend,
194
+ )
189
195
 
190
196
 
191
197
  def _check_task_output(
@@ -259,3 +265,72 @@ def _convert_args_and_kwargs(
259
265
  else:
260
266
  task_kwargs[key] = arg
261
267
  return task_args, task_kwargs, future_wait_key_lst
268
+
269
+
270
+ def _refresh_memory_dict(
271
+ memory_dict: dict,
272
+ cache_dir_dict: dict,
273
+ process_dict: dict,
274
+ terminate_function: Optional[Callable] = None,
275
+ pysqa_config_directory: Optional[str] = None,
276
+ backend: Optional[str] = None,
277
+ ) -> dict:
278
+ """
279
+ Refresh memory dictionary
280
+
281
+ Args:
282
+ memory_dict (dict): dictionary with task keys and future objects
283
+ cache_dir_dict (dict): dictionary with task keys and cache directories
284
+ process_dict (dict): dictionary with task keys and process reference.
285
+ terminate_function (callable): The function to terminate the tasks.
286
+ pysqa_config_directory (str): path to the pysqa config directory (only for pysqa based backend).
287
+ backend (str): name of the backend used to spawn tasks.
288
+
289
+ Returns:
290
+ dict: Updated memory dictionary
291
+ """
292
+ cancelled_lst = [
293
+ key for key, value in memory_dict.items() if value.done() and value.cancelled()
294
+ ]
295
+ _cancel_processes(
296
+ process_dict={k: v for k, v in process_dict.items() if k in cancelled_lst},
297
+ terminate_function=terminate_function,
298
+ pysqa_config_directory=pysqa_config_directory,
299
+ backend=backend,
300
+ )
301
+ return {
302
+ key: _check_task_output(
303
+ task_key=key,
304
+ future_obj=value,
305
+ cache_directory=cache_dir_dict[key],
306
+ )
307
+ for key, value in memory_dict.items()
308
+ if not value.done()
309
+ }
310
+
311
+
312
+ def _cancel_processes(
313
+ process_dict: dict,
314
+ terminate_function: Optional[Callable] = None,
315
+ pysqa_config_directory: Optional[str] = None,
316
+ backend: Optional[str] = None,
317
+ ):
318
+ """
319
+ Cancel processes
320
+
321
+ Args:
322
+ process_dict (dict): dictionary with task keys and process reference.
323
+ terminate_function (callable): The function to terminate the tasks.
324
+ pysqa_config_directory (str): path to the pysqa config directory (only for pysqa based backend).
325
+ backend (str): name of the backend used to spawn tasks.
326
+ """
327
+ if terminate_function is not None and terminate_function == terminate_subprocess:
328
+ for task in process_dict.values():
329
+ terminate_function(task=task)
330
+ elif terminate_function is not None and backend is not None:
331
+ for queue_id in process_dict.values():
332
+ terminate_function(
333
+ queue_id=queue_id,
334
+ config_directory=pysqa_config_directory,
335
+ backend=backend,
336
+ )
@@ -5,6 +5,9 @@ from pysqa import QueueAdapter
5
5
 
6
6
  from executorlib.standalone.hdf import dump, get_queue_id
7
7
  from executorlib.standalone.inputcheck import check_file_exists
8
+ from executorlib.standalone.interactive.spawner import (
9
+ set_current_directory_in_environment,
10
+ )
8
11
  from executorlib.standalone.scheduler import pysqa_execute_command, terminate_with_pysqa
9
12
 
10
13
 
@@ -85,6 +88,7 @@ def execute_with_pysqa(
85
88
  os.path.dirname(os.path.abspath(cwd))
86
89
  )
87
90
  submit_kwargs.update(resource_dict)
91
+ set_current_directory_in_environment()
88
92
  queue_id = qa.submit_job(**submit_kwargs)
89
93
  dump(file_name=file_name, data_dict={"queue_id": queue_id})
90
94
  return queue_id
@@ -5,6 +5,9 @@ from typing import Optional
5
5
 
6
6
  from executorlib.standalone.hdf import dump
7
7
  from executorlib.standalone.inputcheck import check_file_exists
8
+ from executorlib.standalone.interactive.spawner import (
9
+ set_current_directory_in_environment,
10
+ )
8
11
 
9
12
 
10
13
  def execute_in_subprocess(
@@ -53,11 +56,12 @@ def execute_in_subprocess(
53
56
  )
54
57
  if backend is not None:
55
58
  raise ValueError("backend parameter is not supported for subprocess spawner.")
56
- if resource_dict is None:
57
- resource_dict = {}
58
- cwd = resource_dict.get("cwd", cache_directory)
59
+ cwd = _get_working_directory(
60
+ cache_directory=cache_directory, resource_dict=resource_dict
61
+ )
59
62
  if cwd is not None:
60
63
  os.makedirs(cwd, exist_ok=True)
64
+ set_current_directory_in_environment()
61
65
  return subprocess.Popen(command, universal_newlines=True, cwd=cwd)
62
66
 
63
67
 
@@ -71,3 +75,14 @@ def terminate_subprocess(task):
71
75
  task.terminate()
72
76
  while task.poll() is None:
73
77
  time.sleep(0.1)
78
+
79
+
80
+ def _get_working_directory(
81
+ cache_directory: Optional[str] = None, resource_dict: Optional[dict] = None
82
+ ):
83
+ if resource_dict is None:
84
+ resource_dict = {}
85
+ if "cwd" in resource_dict and resource_dict["cwd"] is not None:
86
+ return resource_dict["cwd"]
87
+ else:
88
+ return cache_directory
@@ -35,6 +35,7 @@ class FileTaskScheduler(TaskSchedulerBase):
35
35
  backend: Optional[str] = None,
36
36
  disable_dependencies: bool = False,
37
37
  pmi_mode: Optional[str] = None,
38
+ wait: bool = True,
38
39
  ):
39
40
  """
40
41
  Initialize the FileExecutor.
@@ -50,6 +51,7 @@ class FileTaskScheduler(TaskSchedulerBase):
50
51
  backend (str, optional): name of the backend used to spawn tasks.
51
52
  disable_dependencies (boolean): Disable resolving future objects during the submission.
52
53
  pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
54
+ wait (bool): Whether to wait for the completion of all tasks before shutting down the executor.
53
55
  """
54
56
  super().__init__(max_cores=None)
55
57
  default_resource_dict = {
@@ -73,6 +75,7 @@ class FileTaskScheduler(TaskSchedulerBase):
73
75
  "backend": backend,
74
76
  "disable_dependencies": disable_dependencies,
75
77
  "pmi_mode": pmi_mode,
78
+ "wait": wait,
76
79
  }
77
80
  self._set_process(
78
81
  Thread(
@@ -98,6 +101,7 @@ def create_file_executor(
98
101
  init_function: Optional[Callable] = None,
99
102
  disable_dependencies: bool = False,
100
103
  execute_function: Callable = execute_with_pysqa,
104
+ wait: bool = True,
101
105
  ):
102
106
  if block_allocation:
103
107
  raise ValueError(
@@ -128,4 +132,5 @@ def create_file_executor(
128
132
  execute_function=execute_function,
129
133
  terminate_function=terminate_function,
130
134
  pmi_mode=pmi_mode,
135
+ wait=wait,
131
136
  )
@@ -208,7 +208,7 @@ def _execute_multiple_tasks(
208
208
  queue_join_on_shutdown: bool = True,
209
209
  log_obj_size: bool = False,
210
210
  error_log_file: Optional[str] = None,
211
- worker_id: Optional[int] = None,
211
+ worker_id: int = 0,
212
212
  stop_function: Optional[Callable] = None,
213
213
  restart_limit: int = 0,
214
214
  **kwargs,
@@ -244,7 +244,7 @@ def _execute_multiple_tasks(
244
244
  command_lst=get_interactive_execute_command(
245
245
  cores=cores,
246
246
  ),
247
- connections=spawner(cores=cores, **kwargs),
247
+ connections=spawner(cores=cores, worker_id=worker_id, **kwargs),
248
248
  hostname_localhost=hostname_localhost,
249
249
  log_obj_size=log_obj_size,
250
250
  worker_id=worker_id,
@@ -5,7 +5,6 @@ from concurrent.futures import Future
5
5
  from typing import Optional
6
6
 
7
7
  import cloudpickle
8
- import numpy as np
9
8
 
10
9
  from executorlib.standalone.select import FutureSelector
11
10
 
@@ -219,7 +218,11 @@ def plot_dependency_graph_function(
219
218
  graph = nx.DiGraph()
220
219
  for node in node_lst:
221
220
  if node["type"] == "input":
222
- graph.add_node(node["id"], label=str(node["value"]), shape=node["shape"])
221
+ graph.add_node(
222
+ node["id"],
223
+ label=_short_object_name(node=node["value"]),
224
+ shape=node["shape"],
225
+ )
223
226
  else:
224
227
  graph.add_node(node["id"], label=str(node["name"]), shape=node["shape"])
225
228
  for edge in edge_lst:
@@ -245,6 +248,8 @@ def export_dependency_graph_function(
245
248
  edge_lst (list): List of edges.
246
249
  file_name (str): Name of the file to store the exported graph in.
247
250
  """
251
+ import numpy as np
252
+
248
253
  pwd_nodes_lst = []
249
254
  for n in node_lst:
250
255
  if n["type"] == "function":
@@ -305,3 +310,31 @@ def export_dependency_graph_function(
305
310
  }
306
311
  with open(file_name, "w") as f:
307
312
  json.dump(pwd_dict, f, indent=4)
313
+
314
+
315
+ def _short_object_name(node):
316
+ node_value_str = str(node)
317
+ if isinstance(node, tuple):
318
+ short_name = str(tuple(_short_object_name(node=el) for el in node))
319
+ elif isinstance(node, list):
320
+ short_name = str([_short_object_name(node=el) for el in node])
321
+ elif isinstance(node, dict):
322
+ short_name = str(
323
+ {
324
+ _short_object_name(node=key): _short_object_name(node=value)
325
+ for key, value in node.items()
326
+ }
327
+ )
328
+ elif "object at" in node_value_str:
329
+ short_name = node_value_str[1:-1].split(maxsplit=1)[0].split(".")[-1] + "()"
330
+ elif "<function" in node_value_str:
331
+ short_name = node_value_str.split()[1] + "()"
332
+ elif "\n" in node_value_str:
333
+ short_name = str(type(node)).split("'")[1].split(".")[-1] + "()"
334
+ elif "(" in node_value_str and ")" in node_value_str:
335
+ short_name = node_value_str.split("(", maxsplit=1)[0] + "()"
336
+ elif len(node_value_str) > 20:
337
+ short_name = node_value_str[:21] + "..."
338
+ else:
339
+ short_name = node_value_str
340
+ return short_name
@@ -5,7 +5,10 @@ from typing import Callable, Optional
5
5
  import flux
6
6
  import flux.job
7
7
 
8
- from executorlib.standalone.interactive.spawner import BaseSpawner
8
+ from executorlib.standalone.interactive.spawner import (
9
+ BaseSpawner,
10
+ set_current_directory_in_environment,
11
+ )
9
12
 
10
13
 
11
14
  def validate_max_workers(max_workers: int, cores: int, threads_per_core: int):
@@ -31,6 +34,7 @@ class FluxPythonSpawner(BaseSpawner):
31
34
  threads_per_core (int, optional): The number of threads per base. Defaults to 1.
32
35
  gpus_per_core (int, optional): The number of GPUs per base. Defaults to 0.
33
36
  num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
37
+ worker_id (int): The worker ID. Defaults to 0.
34
38
  exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to
35
39
  False.
36
40
  openmpi_oversubscribe (bool, optional): Whether to oversubscribe. Defaults to False.
@@ -49,6 +53,7 @@ class FluxPythonSpawner(BaseSpawner):
49
53
  threads_per_core: int = 1,
50
54
  gpus_per_core: int = 0,
51
55
  num_nodes: Optional[int] = None,
56
+ worker_id: int = 0,
52
57
  exclusive: bool = False,
53
58
  priority: Optional[int] = None,
54
59
  openmpi_oversubscribe: bool = False,
@@ -60,6 +65,7 @@ class FluxPythonSpawner(BaseSpawner):
60
65
  super().__init__(
61
66
  cwd=cwd,
62
67
  cores=cores,
68
+ worker_id=worker_id,
63
69
  openmpi_oversubscribe=openmpi_oversubscribe,
64
70
  )
65
71
  self._threads_per_core = threads_per_core
@@ -115,18 +121,20 @@ class FluxPythonSpawner(BaseSpawner):
115
121
  num_nodes=self._num_nodes,
116
122
  exclusive=self._exclusive,
117
123
  )
124
+ set_current_directory_in_environment()
118
125
  jobspec.environment = dict(os.environ)
119
126
  if self._pmi_mode is not None:
120
127
  jobspec.setattr_shell_option("pmi", self._pmi_mode)
121
128
  if self._cwd is not None:
122
129
  jobspec.cwd = self._cwd
123
130
  os.makedirs(self._cwd, exist_ok=True)
131
+ file_prefix = "flux_" + str(self._worker_id)
124
132
  if self._flux_log_files and self._cwd is not None:
125
- jobspec.stderr = os.path.join(self._cwd, "flux.err")
126
- jobspec.stdout = os.path.join(self._cwd, "flux.out")
133
+ jobspec.stderr = os.path.join(self._cwd, file_prefix + ".err")
134
+ jobspec.stdout = os.path.join(self._cwd, file_prefix + ".out")
127
135
  elif self._flux_log_files:
128
- jobspec.stderr = os.path.abspath("flux.err")
129
- jobspec.stdout = os.path.abspath("flux.out")
136
+ jobspec.stderr = os.path.abspath(file_prefix + ".err")
137
+ jobspec.stdout = os.path.abspath(file_prefix + ".out")
130
138
  if self._priority is not None:
131
139
  self._future = self._flux_executor.submit(
132
140
  jobspec=jobspec, urgency=self._priority
@@ -6,7 +6,10 @@ from typing import Callable, Optional
6
6
  from pysqa import QueueAdapter
7
7
 
8
8
  from executorlib.standalone.inputcheck import validate_number_of_cores
9
- from executorlib.standalone.interactive.spawner import BaseSpawner
9
+ from executorlib.standalone.interactive.spawner import (
10
+ BaseSpawner,
11
+ set_current_directory_in_environment,
12
+ )
10
13
  from executorlib.standalone.scheduler import pysqa_execute_command, terminate_with_pysqa
11
14
  from executorlib.task_scheduler.interactive.blockallocation import (
12
15
  BlockAllocationTaskScheduler,
@@ -21,6 +24,7 @@ class PysqaSpawner(BaseSpawner):
21
24
  threads_per_core: int = 1,
22
25
  gpus_per_core: int = 0,
23
26
  num_nodes: Optional[int] = None,
27
+ worker_id: int = 0,
24
28
  exclusive: bool = False,
25
29
  openmpi_oversubscribe: bool = False,
26
30
  slurm_cmd_args: Optional[list[str]] = None,
@@ -38,6 +42,7 @@ class PysqaSpawner(BaseSpawner):
38
42
  threads_per_core (int): The number of threads per core. Defaults to 1.
39
43
  gpus_per_core (int): number of GPUs per worker - defaults to 0
40
44
  num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
45
+ worker_id (int): The worker ID. Defaults to 0.
41
46
  exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults
42
47
  to False.
43
48
  openmpi_oversubscribe (bool): Whether to oversubscribe the cores. Defaults to False.
@@ -49,6 +54,7 @@ class PysqaSpawner(BaseSpawner):
49
54
  super().__init__(
50
55
  cwd=cwd,
51
56
  cores=cores,
57
+ worker_id=worker_id,
52
58
  openmpi_oversubscribe=openmpi_oversubscribe,
53
59
  )
54
60
  self._threads_per_core = threads_per_core
@@ -180,6 +186,7 @@ class PysqaSpawner(BaseSpawner):
180
186
  working_directory = os.path.join(self._cwd, hash)
181
187
  else:
182
188
  working_directory = os.path.abspath(hash)
189
+ set_current_directory_in_environment()
183
190
  return queue_adapter.submit_job(
184
191
  command=" ".join(self.generate_command(command_lst=command_lst)),
185
192
  working_directory=working_directory,
@@ -27,6 +27,7 @@ class SrunSpawner(SubprocessSpawner):
27
27
  threads_per_core: int = 1,
28
28
  gpus_per_core: int = 0,
29
29
  num_nodes: Optional[int] = None,
30
+ worker_id: int = 0,
30
31
  exclusive: bool = False,
31
32
  openmpi_oversubscribe: bool = False,
32
33
  slurm_cmd_args: Optional[list[str]] = None,
@@ -41,6 +42,7 @@ class SrunSpawner(SubprocessSpawner):
41
42
  threads_per_core (int, optional): The number of threads per core. Defaults to 1.
42
43
  gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0.
43
44
  num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
45
+ worker_id (int): The worker ID. Defaults to 0.
44
46
  exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
45
47
  openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
46
48
  slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
@@ -49,6 +51,7 @@ class SrunSpawner(SubprocessSpawner):
49
51
  super().__init__(
50
52
  cwd=cwd,
51
53
  cores=cores,
54
+ worker_id=worker_id,
52
55
  openmpi_oversubscribe=openmpi_oversubscribe,
53
56
  threads_per_core=threads_per_core,
54
57
  )
File without changes
File without changes
File without changes