executorlib 0.3.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {executorlib-0.3.0/executorlib.egg-info → executorlib-0.4.1}/PKG-INFO +4 -4
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/_version.py +3 -3
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/base/executor.py +12 -6
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/cache/executor.py +11 -10
- executorlib-0.4.1/executorlib/interactive/blockallocation.py +176 -0
- executorlib-0.4.1/executorlib/interactive/dependency.py +287 -0
- executorlib-0.3.0/executorlib/interactive/flux.py → executorlib-0.4.1/executorlib/interactive/fluxspawner.py +12 -2
- executorlib-0.4.1/executorlib/interactive/onetoone.py +222 -0
- executorlib-0.4.1/executorlib/interactive/shared.py +179 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/interfaces/flux.py +14 -17
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/interfaces/single.py +7 -9
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/interfaces/slurm.py +10 -15
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/__init__.py +5 -0
- executorlib-0.4.1/executorlib/standalone/interactive/arguments.py +93 -0
- {executorlib-0.3.0 → executorlib-0.4.1/executorlib.egg-info}/PKG-INFO +4 -4
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib.egg-info/SOURCES.txt +8 -3
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib.egg-info/requires.txt +3 -3
- {executorlib-0.3.0 → executorlib-0.4.1}/pyproject.toml +3 -3
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_dependencies_executor.py +38 -5
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_executor_backend_flux.py +1 -1
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_flux_executor.py +11 -11
- executorlib-0.4.1/tests/test_interactive_future_arguments.py +69 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_local_executor.py +47 -33
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_local_executor_future.py +5 -5
- executorlib-0.4.1/tests/test_local_executor_resize.py +80 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_plot_dependency_flux.py +1 -1
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_pysqa_subprocess.py +1 -1
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_shared_backend.py +1 -1
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_shell_executor.py +4 -4
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_shell_interactive.py +2 -2
- executorlib-0.3.0/executorlib/interactive/executor.py +0 -134
- executorlib-0.3.0/executorlib/interactive/shared.py +0 -684
- {executorlib-0.3.0 → executorlib-0.4.1}/LICENSE +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/MANIFEST.in +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/README.md +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/__init__.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/backend/__init__.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/backend/cache_parallel.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/backend/cache_serial.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/backend/interactive_parallel.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/backend/interactive_serial.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/base/__init__.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/cache/__init__.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/cache/backend.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/cache/queue_spawner.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/cache/shared.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/cache/subprocess_spawner.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/interactive/__init__.py +0 -0
- /executorlib-0.3.0/executorlib/interactive/slurm.py → /executorlib-0.4.1/executorlib/interactive/slurmspawner.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/interfaces/__init__.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/command.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/hdf.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/inputcheck.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/interactive/__init__.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/interactive/backend.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/interactive/communication.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/interactive/spawner.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/plot.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/queue.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib/standalone/serialize.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib.egg-info/dependency_links.txt +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/executorlib.egg-info/top_level.txt +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/setup.cfg +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/setup.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_backend_serial.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_cache_executor_interactive.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_cache_executor_mpi.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_cache_executor_pysqa_flux.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_cache_executor_serial.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_cache_hdf.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_cache_shared.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_executor_backend_mpi.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_executor_backend_mpi_noblock.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_integration_pyiron_workflow.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_plot_dependency.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_shared_communication.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_shared_executorbase.py +0 -0
- {executorlib-0.3.0 → executorlib-0.4.1}/tests/test_shared_input_check.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: executorlib
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Up-scale python functions for high performance computing (HPC) with executorlib.
|
|
5
5
|
Author-email: Jan Janssen <janssen@lanl.gov>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -53,7 +53,7 @@ License-File: LICENSE
|
|
|
53
53
|
Requires-Dist: cloudpickle<=3.1.1,>=2.0.0
|
|
54
54
|
Requires-Dist: pyzmq<=26.2.1,>=25.0.0
|
|
55
55
|
Provides-Extra: cache
|
|
56
|
-
Requires-Dist: h5py<=3.
|
|
56
|
+
Requires-Dist: h5py<=3.13.0,>=3.6.0; extra == "cache"
|
|
57
57
|
Provides-Extra: graph
|
|
58
58
|
Requires-Dist: pygraphviz<=1.14,>=1.10; extra == "graph"
|
|
59
59
|
Requires-Dist: networkx<=3.4.2,>=2.8.8; extra == "graph"
|
|
@@ -65,11 +65,11 @@ Provides-Extra: mpi
|
|
|
65
65
|
Requires-Dist: mpi4py<=4.0.1,>=3.1.4; extra == "mpi"
|
|
66
66
|
Provides-Extra: cluster
|
|
67
67
|
Requires-Dist: pysqa==0.2.3; extra == "cluster"
|
|
68
|
-
Requires-Dist: h5py<=3.
|
|
68
|
+
Requires-Dist: h5py<=3.13.0,>=3.6.0; extra == "cluster"
|
|
69
69
|
Provides-Extra: all
|
|
70
70
|
Requires-Dist: mpi4py<=4.0.1,>=3.1.4; extra == "all"
|
|
71
71
|
Requires-Dist: pysqa==0.2.3; extra == "all"
|
|
72
|
-
Requires-Dist: h5py<=3.
|
|
72
|
+
Requires-Dist: h5py<=3.13.0,>=3.6.0; extra == "all"
|
|
73
73
|
Requires-Dist: pygraphviz<=1.14,>=1.10; extra == "all"
|
|
74
74
|
Requires-Dist: networkx<=3.4.2,>=2.8.8; extra == "all"
|
|
75
75
|
Requires-Dist: ipython<=8.32.0,>=7.33.0; extra == "all"
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-02-
|
|
11
|
+
"date": "2025-02-19T13:21:35+0100",
|
|
12
12
|
"dirty": true,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.
|
|
14
|
+
"full-revisionid": "4f9ef5fb9005188f0b6cc87ef7c3e7fc6006f3e0",
|
|
15
|
+
"version": "0.4.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -27,10 +27,19 @@ class ExecutorBase(FutureExecutor):
|
|
|
27
27
|
Initialize the ExecutorBase class.
|
|
28
28
|
"""
|
|
29
29
|
cloudpickle_register(ind=3)
|
|
30
|
+
self._process_kwargs: dict = {}
|
|
30
31
|
self._max_cores = max_cores
|
|
31
32
|
self._future_queue: Optional[queue.Queue] = queue.Queue()
|
|
32
33
|
self._process: Optional[Union[Thread, list[Thread]]] = None
|
|
33
34
|
|
|
35
|
+
@property
|
|
36
|
+
def max_workers(self) -> Optional[int]:
|
|
37
|
+
return self._process_kwargs.get("max_workers")
|
|
38
|
+
|
|
39
|
+
@max_workers.setter
|
|
40
|
+
def max_workers(self, max_workers: int):
|
|
41
|
+
raise NotImplementedError("The max_workers setter is not implemented.")
|
|
42
|
+
|
|
34
43
|
@property
|
|
35
44
|
def info(self) -> Optional[dict]:
|
|
36
45
|
"""
|
|
@@ -39,16 +48,13 @@ class ExecutorBase(FutureExecutor):
|
|
|
39
48
|
Returns:
|
|
40
49
|
Optional[dict]: Information about the executor.
|
|
41
50
|
"""
|
|
51
|
+
meta_data_dict = self._process_kwargs.copy()
|
|
52
|
+
if "future_queue" in meta_data_dict:
|
|
53
|
+
del meta_data_dict["future_queue"]
|
|
42
54
|
if self._process is not None and isinstance(self._process, list):
|
|
43
|
-
meta_data_dict = self._process[0]._kwargs.copy() # type: ignore
|
|
44
|
-
if "future_queue" in meta_data_dict:
|
|
45
|
-
del meta_data_dict["future_queue"]
|
|
46
55
|
meta_data_dict["max_workers"] = len(self._process)
|
|
47
56
|
return meta_data_dict
|
|
48
57
|
elif self._process is not None:
|
|
49
|
-
meta_data_dict = self._process._kwargs.copy() # type: ignore
|
|
50
|
-
if "future_queue" in meta_data_dict:
|
|
51
|
-
del meta_data_dict["future_queue"]
|
|
52
58
|
return meta_data_dict
|
|
53
59
|
else:
|
|
54
60
|
return None
|
|
@@ -63,19 +63,20 @@ class FileExecutor(ExecutorBase):
|
|
|
63
63
|
terminate_function = terminate_subprocess
|
|
64
64
|
cache_directory_path = os.path.abspath(cache_directory)
|
|
65
65
|
os.makedirs(cache_directory_path, exist_ok=True)
|
|
66
|
+
self._process_kwargs = {
|
|
67
|
+
"future_queue": self._future_queue,
|
|
68
|
+
"execute_function": execute_function,
|
|
69
|
+
"cache_directory": cache_directory_path,
|
|
70
|
+
"resource_dict": resource_dict,
|
|
71
|
+
"terminate_function": terminate_function,
|
|
72
|
+
"pysqa_config_directory": pysqa_config_directory,
|
|
73
|
+
"backend": backend,
|
|
74
|
+
"disable_dependencies": disable_dependencies,
|
|
75
|
+
}
|
|
66
76
|
self._set_process(
|
|
67
77
|
Thread(
|
|
68
78
|
target=execute_tasks_h5,
|
|
69
|
-
kwargs=
|
|
70
|
-
"future_queue": self._future_queue,
|
|
71
|
-
"execute_function": execute_function,
|
|
72
|
-
"cache_directory": cache_directory_path,
|
|
73
|
-
"resource_dict": resource_dict,
|
|
74
|
-
"terminate_function": terminate_function,
|
|
75
|
-
"pysqa_config_directory": pysqa_config_directory,
|
|
76
|
-
"backend": backend,
|
|
77
|
-
"disable_dependencies": disable_dependencies,
|
|
78
|
-
},
|
|
79
|
+
kwargs=self._process_kwargs,
|
|
79
80
|
)
|
|
80
81
|
)
|
|
81
82
|
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import queue
|
|
2
|
+
from concurrent.futures import Future
|
|
3
|
+
from threading import Thread
|
|
4
|
+
from typing import Callable, Optional
|
|
5
|
+
|
|
6
|
+
from executorlib.base.executor import ExecutorBase, cancel_items_in_queue
|
|
7
|
+
from executorlib.interactive.shared import execute_tasks
|
|
8
|
+
from executorlib.standalone.inputcheck import (
|
|
9
|
+
check_resource_dict,
|
|
10
|
+
check_resource_dict_is_empty,
|
|
11
|
+
)
|
|
12
|
+
from executorlib.standalone.interactive.spawner import BaseSpawner, MpiExecSpawner
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BlockAllocationExecutor(ExecutorBase):
|
|
16
|
+
"""
|
|
17
|
+
The executorlib.interactive.executor.InteractiveExecutor leverages the exeutorlib interfaces to distribute python
|
|
18
|
+
tasks on a workstation or inside a queuing system allocation. In contrast to the mpi4py.futures.MPIPoolExecutor the
|
|
19
|
+
executorlib.interactive.executor.InteractiveExecutor can be executed in a serial python process and does not require
|
|
20
|
+
the python script to be executed with MPI. Consequently, it is primarily an abstraction of its functionality to
|
|
21
|
+
improves the usability in particular when used in combination with Jupyter notebooks.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
max_workers (int): defines the number workers which can execute functions in parallel
|
|
25
|
+
executor_kwargs (dict): keyword arguments for the executor
|
|
26
|
+
spawner (BaseSpawner): interface class to initiate python processes
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
|
|
30
|
+
>>> import numpy as np
|
|
31
|
+
>>> from executorlib.interactive.blockallocation import BlockAllocationExecutor
|
|
32
|
+
>>>
|
|
33
|
+
>>> def calc(i, j, k):
|
|
34
|
+
>>> from mpi4py import MPI
|
|
35
|
+
>>> size = MPI.COMM_WORLD.Get_size()
|
|
36
|
+
>>> rank = MPI.COMM_WORLD.Get_rank()
|
|
37
|
+
>>> return np.array([i, j, k]), size, rank
|
|
38
|
+
>>>
|
|
39
|
+
>>> def init_k():
|
|
40
|
+
>>> return {"k": 3}
|
|
41
|
+
>>>
|
|
42
|
+
>>> with BlockAllocationExecutor(max_workers=2, executor_kwargs={"init_function": init_k}) as p:
|
|
43
|
+
>>> fs = p.submit(calc, 2, j=4)
|
|
44
|
+
>>> print(fs.result())
|
|
45
|
+
[(array([2, 4, 3]), 2, 0), (array([2, 4, 3]), 2, 1)]
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
max_workers: int = 1,
|
|
52
|
+
executor_kwargs: Optional[dict] = None,
|
|
53
|
+
spawner: type[BaseSpawner] = MpiExecSpawner,
|
|
54
|
+
):
|
|
55
|
+
if executor_kwargs is None:
|
|
56
|
+
executor_kwargs = {}
|
|
57
|
+
super().__init__(max_cores=executor_kwargs.get("max_cores"))
|
|
58
|
+
executor_kwargs["future_queue"] = self._future_queue
|
|
59
|
+
executor_kwargs["spawner"] = spawner
|
|
60
|
+
executor_kwargs["queue_join_on_shutdown"] = False
|
|
61
|
+
self._process_kwargs = executor_kwargs
|
|
62
|
+
self._max_workers = max_workers
|
|
63
|
+
self._set_process(
|
|
64
|
+
process=[
|
|
65
|
+
Thread(
|
|
66
|
+
target=execute_tasks,
|
|
67
|
+
kwargs=executor_kwargs,
|
|
68
|
+
)
|
|
69
|
+
for _ in range(self._max_workers)
|
|
70
|
+
],
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def max_workers(self) -> int:
|
|
75
|
+
return self._max_workers
|
|
76
|
+
|
|
77
|
+
@max_workers.setter
|
|
78
|
+
def max_workers(self, max_workers: int):
|
|
79
|
+
if isinstance(self._future_queue, queue.Queue) and isinstance(
|
|
80
|
+
self._process, list
|
|
81
|
+
):
|
|
82
|
+
if self._max_workers > max_workers:
|
|
83
|
+
for _ in range(self._max_workers - max_workers):
|
|
84
|
+
self._future_queue.queue.insert(0, {"shutdown": True, "wait": True})
|
|
85
|
+
while len(self._process) > max_workers:
|
|
86
|
+
self._process = [
|
|
87
|
+
process for process in self._process if process.is_alive()
|
|
88
|
+
]
|
|
89
|
+
elif self._max_workers < max_workers:
|
|
90
|
+
new_process_lst = [
|
|
91
|
+
Thread(
|
|
92
|
+
target=execute_tasks,
|
|
93
|
+
kwargs=self._process_kwargs,
|
|
94
|
+
)
|
|
95
|
+
for _ in range(max_workers - self._max_workers)
|
|
96
|
+
]
|
|
97
|
+
for process_instance in new_process_lst:
|
|
98
|
+
process_instance.start()
|
|
99
|
+
self._process += new_process_lst
|
|
100
|
+
self._max_workers = max_workers
|
|
101
|
+
|
|
102
|
+
def submit( # type: ignore
|
|
103
|
+
self, fn: Callable, *args, resource_dict: Optional[dict] = None, **kwargs
|
|
104
|
+
) -> Future:
|
|
105
|
+
"""
|
|
106
|
+
Submits a callable to be executed with the given arguments.
|
|
107
|
+
|
|
108
|
+
Schedules the callable to be executed as fn(*args, **kwargs) and returns
|
|
109
|
+
a Future instance representing the execution of the callable.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
fn (Callable): function to submit for execution
|
|
113
|
+
args: arguments for the submitted function
|
|
114
|
+
kwargs: keyword arguments for the submitted function
|
|
115
|
+
resource_dict (dict): resource dictionary, which defines the resources used for the execution of the
|
|
116
|
+
function. Example resource dictionary: {
|
|
117
|
+
cores: 1,
|
|
118
|
+
threads_per_core: 1,
|
|
119
|
+
gpus_per_worker: 0,
|
|
120
|
+
oversubscribe: False,
|
|
121
|
+
cwd: None,
|
|
122
|
+
executor: None,
|
|
123
|
+
hostname_localhost: False,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Future: A Future representing the given call.
|
|
128
|
+
"""
|
|
129
|
+
if resource_dict is None:
|
|
130
|
+
resource_dict = {}
|
|
131
|
+
check_resource_dict_is_empty(resource_dict=resource_dict)
|
|
132
|
+
check_resource_dict(function=fn)
|
|
133
|
+
f: Future = Future()
|
|
134
|
+
if self._future_queue is not None:
|
|
135
|
+
self._future_queue.put(
|
|
136
|
+
{"fn": fn, "args": args, "kwargs": kwargs, "future": f}
|
|
137
|
+
)
|
|
138
|
+
return f
|
|
139
|
+
|
|
140
|
+
def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
|
|
141
|
+
"""Clean-up the resources associated with the Executor.
|
|
142
|
+
|
|
143
|
+
It is safe to call this method several times. Otherwise, no other
|
|
144
|
+
methods can be called after this one.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
wait: If True then shutdown will not return until all running
|
|
148
|
+
futures have finished executing and the resources used by the
|
|
149
|
+
parallel_executors have been reclaimed.
|
|
150
|
+
cancel_futures: If True then shutdown will cancel all pending
|
|
151
|
+
futures. Futures that are completed or running will not be
|
|
152
|
+
cancelled.
|
|
153
|
+
"""
|
|
154
|
+
if self._future_queue is not None:
|
|
155
|
+
if cancel_futures:
|
|
156
|
+
cancel_items_in_queue(que=self._future_queue)
|
|
157
|
+
if isinstance(self._process, list):
|
|
158
|
+
for _ in range(len(self._process)):
|
|
159
|
+
self._future_queue.put({"shutdown": True, "wait": wait})
|
|
160
|
+
if wait:
|
|
161
|
+
for process in self._process:
|
|
162
|
+
process.join()
|
|
163
|
+
self._future_queue.join()
|
|
164
|
+
self._process = None
|
|
165
|
+
self._future_queue = None
|
|
166
|
+
|
|
167
|
+
def _set_process(self, process: list[Thread]): # type: ignore
|
|
168
|
+
"""
|
|
169
|
+
Set the process for the executor.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
process (List[RaisingThread]): The process for the executor.
|
|
173
|
+
"""
|
|
174
|
+
self._process = process
|
|
175
|
+
for process_instance in self._process:
|
|
176
|
+
process_instance.start()
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
import queue
|
|
2
|
+
from concurrent.futures import Future
|
|
3
|
+
from threading import Thread
|
|
4
|
+
from time import sleep
|
|
5
|
+
from typing import Any, Callable, Optional
|
|
6
|
+
|
|
7
|
+
from executorlib.base.executor import ExecutorBase
|
|
8
|
+
from executorlib.standalone.interactive.arguments import (
|
|
9
|
+
check_exception_was_raised,
|
|
10
|
+
get_exception_lst,
|
|
11
|
+
get_future_objects_from_input,
|
|
12
|
+
update_futures_in_input,
|
|
13
|
+
)
|
|
14
|
+
from executorlib.standalone.plot import (
|
|
15
|
+
draw,
|
|
16
|
+
generate_nodes_and_edges,
|
|
17
|
+
generate_task_hash,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DependencyExecutor(ExecutorBase):
|
|
22
|
+
"""
|
|
23
|
+
ExecutorWithDependencies is a class that extends ExecutorBase and provides functionality for executing tasks with
|
|
24
|
+
dependencies.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
refresh_rate (float, optional): The refresh rate for updating the executor queue. Defaults to 0.01.
|
|
28
|
+
plot_dependency_graph (bool, optional): Whether to generate and plot the dependency graph. Defaults to False.
|
|
29
|
+
plot_dependency_graph_filename (str): Name of the file to store the plotted graph in.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
_future_hash_dict (Dict[str, Future]): A dictionary mapping task hash to future object.
|
|
33
|
+
_task_hash_dict (Dict[str, Dict]): A dictionary mapping task hash to task dictionary.
|
|
34
|
+
_generate_dependency_graph (bool): Whether to generate the dependency graph.
|
|
35
|
+
_generate_dependency_graph (str): Name of the file to store the plotted graph in.
|
|
36
|
+
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
executor: ExecutorBase,
|
|
42
|
+
max_cores: Optional[int] = None,
|
|
43
|
+
refresh_rate: float = 0.01,
|
|
44
|
+
plot_dependency_graph: bool = False,
|
|
45
|
+
plot_dependency_graph_filename: Optional[str] = None,
|
|
46
|
+
) -> None:
|
|
47
|
+
super().__init__(max_cores=max_cores)
|
|
48
|
+
self._process_kwargs = {
|
|
49
|
+
"future_queue": self._future_queue,
|
|
50
|
+
"executor_queue": executor._future_queue,
|
|
51
|
+
"executor": executor,
|
|
52
|
+
"refresh_rate": refresh_rate,
|
|
53
|
+
}
|
|
54
|
+
self._set_process(
|
|
55
|
+
Thread(
|
|
56
|
+
target=_execute_tasks_with_dependencies,
|
|
57
|
+
kwargs=self._process_kwargs,
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
self._future_hash_dict: dict = {}
|
|
61
|
+
self._task_hash_dict: dict = {}
|
|
62
|
+
self._plot_dependency_graph_filename = plot_dependency_graph_filename
|
|
63
|
+
if plot_dependency_graph_filename is None:
|
|
64
|
+
self._generate_dependency_graph = plot_dependency_graph
|
|
65
|
+
else:
|
|
66
|
+
self._generate_dependency_graph = True
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def info(self) -> Optional[dict]:
|
|
70
|
+
"""
|
|
71
|
+
Get the information about the executor.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Optional[dict]: Information about the executor.
|
|
75
|
+
"""
|
|
76
|
+
if isinstance(self._future_queue, queue.Queue):
|
|
77
|
+
f: Future = Future()
|
|
78
|
+
self._future_queue.queue.insert(
|
|
79
|
+
0, {"internal": True, "task": "get_info", "future": f}
|
|
80
|
+
)
|
|
81
|
+
return f.result()
|
|
82
|
+
else:
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def max_workers(self) -> Optional[int]:
|
|
87
|
+
if isinstance(self._future_queue, queue.Queue):
|
|
88
|
+
f: Future = Future()
|
|
89
|
+
self._future_queue.queue.insert(
|
|
90
|
+
0, {"internal": True, "task": "get_max_workers", "future": f}
|
|
91
|
+
)
|
|
92
|
+
return f.result()
|
|
93
|
+
else:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
@max_workers.setter
|
|
97
|
+
def max_workers(self, max_workers: int):
|
|
98
|
+
if isinstance(self._future_queue, queue.Queue):
|
|
99
|
+
f: Future = Future()
|
|
100
|
+
self._future_queue.queue.insert(
|
|
101
|
+
0,
|
|
102
|
+
{
|
|
103
|
+
"internal": True,
|
|
104
|
+
"task": "set_max_workers",
|
|
105
|
+
"max_workers": max_workers,
|
|
106
|
+
"future": f,
|
|
107
|
+
},
|
|
108
|
+
)
|
|
109
|
+
if not f.result():
|
|
110
|
+
raise NotImplementedError("The max_workers setter is not implemented.")
|
|
111
|
+
|
|
112
|
+
def submit( # type: ignore
|
|
113
|
+
self,
|
|
114
|
+
fn: Callable[..., Any],
|
|
115
|
+
*args: Any,
|
|
116
|
+
resource_dict: Optional[dict[str, Any]] = None,
|
|
117
|
+
**kwargs: Any,
|
|
118
|
+
) -> Future:
|
|
119
|
+
"""
|
|
120
|
+
Submits a task to the executor.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
fn (Callable): The function to be executed.
|
|
124
|
+
*args: Variable length argument list.
|
|
125
|
+
resource_dict (dict, optional): A dictionary of resources required by the task. Defaults to {}.
|
|
126
|
+
**kwargs: Arbitrary keyword arguments.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Future: A future object representing the result of the task.
|
|
130
|
+
|
|
131
|
+
"""
|
|
132
|
+
if resource_dict is None:
|
|
133
|
+
resource_dict = {}
|
|
134
|
+
if not self._generate_dependency_graph:
|
|
135
|
+
f = super().submit(fn, *args, resource_dict=resource_dict, **kwargs)
|
|
136
|
+
else:
|
|
137
|
+
f = Future()
|
|
138
|
+
f.set_result(None)
|
|
139
|
+
task_dict = {
|
|
140
|
+
"fn": fn,
|
|
141
|
+
"args": args,
|
|
142
|
+
"kwargs": kwargs,
|
|
143
|
+
"future": f,
|
|
144
|
+
"resource_dict": resource_dict,
|
|
145
|
+
}
|
|
146
|
+
task_hash = generate_task_hash(
|
|
147
|
+
task_dict=task_dict,
|
|
148
|
+
future_hash_inverse_dict={
|
|
149
|
+
v: k for k, v in self._future_hash_dict.items()
|
|
150
|
+
},
|
|
151
|
+
)
|
|
152
|
+
self._future_hash_dict[task_hash] = f
|
|
153
|
+
self._task_hash_dict[task_hash] = task_dict
|
|
154
|
+
return f
|
|
155
|
+
|
|
156
|
+
def __exit__(
|
|
157
|
+
self,
|
|
158
|
+
exc_type: Any,
|
|
159
|
+
exc_val: Any,
|
|
160
|
+
exc_tb: Any,
|
|
161
|
+
) -> None:
|
|
162
|
+
"""
|
|
163
|
+
Exit method called when exiting the context manager.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
exc_type: The type of the exception.
|
|
167
|
+
exc_val: The exception instance.
|
|
168
|
+
exc_tb: The traceback object.
|
|
169
|
+
|
|
170
|
+
"""
|
|
171
|
+
super().__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb) # type: ignore
|
|
172
|
+
if self._generate_dependency_graph:
|
|
173
|
+
node_lst, edge_lst = generate_nodes_and_edges(
|
|
174
|
+
task_hash_dict=self._task_hash_dict,
|
|
175
|
+
future_hash_inverse_dict={
|
|
176
|
+
v: k for k, v in self._future_hash_dict.items()
|
|
177
|
+
},
|
|
178
|
+
)
|
|
179
|
+
return draw(
|
|
180
|
+
node_lst=node_lst,
|
|
181
|
+
edge_lst=edge_lst,
|
|
182
|
+
filename=self._plot_dependency_graph_filename,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _execute_tasks_with_dependencies(
|
|
187
|
+
future_queue: queue.Queue,
|
|
188
|
+
executor_queue: queue.Queue,
|
|
189
|
+
executor: ExecutorBase,
|
|
190
|
+
refresh_rate: float = 0.01,
|
|
191
|
+
):
|
|
192
|
+
"""
|
|
193
|
+
Resolve the dependencies of multiple tasks, by analysing which task requires concurrent.future.Futures objects from
|
|
194
|
+
other tasks.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
future_queue (Queue): Queue for receiving new tasks.
|
|
198
|
+
executor_queue (Queue): Queue for the internal executor.
|
|
199
|
+
executor (ExecutorBase): Executor to execute the tasks with after the dependencies are resolved.
|
|
200
|
+
refresh_rate (float): Set the refresh rate in seconds, how frequently the input queue is checked.
|
|
201
|
+
"""
|
|
202
|
+
wait_lst = []
|
|
203
|
+
while True:
|
|
204
|
+
try:
|
|
205
|
+
task_dict = future_queue.get_nowait()
|
|
206
|
+
except queue.Empty:
|
|
207
|
+
task_dict = None
|
|
208
|
+
if ( # shutdown the executor
|
|
209
|
+
task_dict is not None and "shutdown" in task_dict and task_dict["shutdown"]
|
|
210
|
+
):
|
|
211
|
+
executor.shutdown(wait=task_dict["wait"])
|
|
212
|
+
future_queue.task_done()
|
|
213
|
+
future_queue.join()
|
|
214
|
+
break
|
|
215
|
+
if ( # shutdown the executor
|
|
216
|
+
task_dict is not None and "internal" in task_dict and task_dict["internal"]
|
|
217
|
+
):
|
|
218
|
+
if task_dict["task"] == "get_info":
|
|
219
|
+
task_dict["future"].set_result(executor.info)
|
|
220
|
+
elif task_dict["task"] == "get_max_workers":
|
|
221
|
+
task_dict["future"].set_result(executor.max_workers)
|
|
222
|
+
elif task_dict["task"] == "set_max_workers":
|
|
223
|
+
try:
|
|
224
|
+
executor.max_workers = task_dict["max_workers"]
|
|
225
|
+
except NotImplementedError:
|
|
226
|
+
task_dict["future"].set_result(False)
|
|
227
|
+
else:
|
|
228
|
+
task_dict["future"].set_result(True)
|
|
229
|
+
elif ( # handle function submitted to the executor
|
|
230
|
+
task_dict is not None and "fn" in task_dict and "future" in task_dict
|
|
231
|
+
):
|
|
232
|
+
future_lst, ready_flag = get_future_objects_from_input(
|
|
233
|
+
args=task_dict["args"], kwargs=task_dict["kwargs"]
|
|
234
|
+
)
|
|
235
|
+
exception_lst = get_exception_lst(future_lst=future_lst)
|
|
236
|
+
if not check_exception_was_raised(future_obj=task_dict["future"]):
|
|
237
|
+
if len(exception_lst) > 0:
|
|
238
|
+
task_dict["future"].set_exception(exception_lst[0])
|
|
239
|
+
elif len(future_lst) == 0 or ready_flag:
|
|
240
|
+
# No future objects are used in the input or all future objects are already done
|
|
241
|
+
task_dict["args"], task_dict["kwargs"] = update_futures_in_input(
|
|
242
|
+
args=task_dict["args"], kwargs=task_dict["kwargs"]
|
|
243
|
+
)
|
|
244
|
+
executor_queue.put(task_dict)
|
|
245
|
+
else: # Otherwise add the function to the wait list
|
|
246
|
+
task_dict["future_lst"] = future_lst
|
|
247
|
+
wait_lst.append(task_dict)
|
|
248
|
+
future_queue.task_done()
|
|
249
|
+
elif len(wait_lst) > 0:
|
|
250
|
+
number_waiting = len(wait_lst)
|
|
251
|
+
# Check functions in the wait list and execute them if all future objects are now ready
|
|
252
|
+
wait_lst = _update_waiting_task(
|
|
253
|
+
wait_lst=wait_lst, executor_queue=executor_queue
|
|
254
|
+
)
|
|
255
|
+
# if no job is ready, sleep for a moment
|
|
256
|
+
if len(wait_lst) == number_waiting:
|
|
257
|
+
sleep(refresh_rate)
|
|
258
|
+
else:
|
|
259
|
+
# If there is nothing else to do, sleep for a moment
|
|
260
|
+
sleep(refresh_rate)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _update_waiting_task(wait_lst: list[dict], executor_queue: queue.Queue) -> list:
|
|
264
|
+
"""
|
|
265
|
+
Submit the waiting tasks, which future inputs have been completed, to the executor
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
wait_lst (list): List of waiting tasks
|
|
269
|
+
executor_queue (Queue): Queue of the internal executor
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
list: list tasks which future inputs have not been completed
|
|
273
|
+
"""
|
|
274
|
+
wait_tmp_lst = []
|
|
275
|
+
for task_wait_dict in wait_lst:
|
|
276
|
+
exception_lst = get_exception_lst(future_lst=task_wait_dict["future_lst"])
|
|
277
|
+
if len(exception_lst) > 0:
|
|
278
|
+
task_wait_dict["future"].set_exception(exception_lst[0])
|
|
279
|
+
elif all(future.done() for future in task_wait_dict["future_lst"]):
|
|
280
|
+
del task_wait_dict["future_lst"]
|
|
281
|
+
task_wait_dict["args"], task_wait_dict["kwargs"] = update_futures_in_input(
|
|
282
|
+
args=task_wait_dict["args"], kwargs=task_wait_dict["kwargs"]
|
|
283
|
+
)
|
|
284
|
+
executor_queue.put(task_wait_dict)
|
|
285
|
+
else:
|
|
286
|
+
wait_tmp_lst.append(task_wait_dict)
|
|
287
|
+
return wait_tmp_lst
|
|
@@ -30,8 +30,11 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
30
30
|
threads_per_core (int, optional): The number of threads per base. Defaults to 1.
|
|
31
31
|
gpus_per_core (int, optional): The number of GPUs per base. Defaults to 0.
|
|
32
32
|
num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
|
|
33
|
-
exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to
|
|
33
|
+
exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to
|
|
34
|
+
False.
|
|
34
35
|
openmpi_oversubscribe (bool, optional): Whether to oversubscribe. Defaults to False.
|
|
36
|
+
priority (int, optional): job urgency 0 (lowest) through 31 (highest) (default is 16). Priorities 0 through 15
|
|
37
|
+
are restricted to the instance owner.
|
|
35
38
|
flux_executor (flux.job.FluxExecutor, optional): The FluxExecutor instance. Defaults to None.
|
|
36
39
|
flux_executor_pmi_mode (str, optional): The PMI option. Defaults to None.
|
|
37
40
|
flux_executor_nesting (bool, optional): Whether to use nested FluxExecutor. Defaults to False.
|
|
@@ -46,6 +49,7 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
46
49
|
gpus_per_core: int = 0,
|
|
47
50
|
num_nodes: Optional[int] = None,
|
|
48
51
|
exclusive: bool = False,
|
|
52
|
+
priority: Optional[int] = None,
|
|
49
53
|
openmpi_oversubscribe: bool = False,
|
|
50
54
|
flux_executor: Optional[flux.job.FluxExecutor] = None,
|
|
51
55
|
flux_executor_pmi_mode: Optional[str] = None,
|
|
@@ -65,6 +69,7 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
65
69
|
self._flux_executor_pmi_mode = flux_executor_pmi_mode
|
|
66
70
|
self._flux_executor_nesting = flux_executor_nesting
|
|
67
71
|
self._flux_log_files = flux_log_files
|
|
72
|
+
self._priority = priority
|
|
68
73
|
self._future = None
|
|
69
74
|
|
|
70
75
|
def bootup(
|
|
@@ -114,7 +119,12 @@ class FluxPythonSpawner(BaseSpawner):
|
|
|
114
119
|
elif self._flux_log_files:
|
|
115
120
|
jobspec.stderr = os.path.abspath("flux.err")
|
|
116
121
|
jobspec.stdout = os.path.abspath("flux.out")
|
|
117
|
-
self.
|
|
122
|
+
if self._priority is not None:
|
|
123
|
+
self._future = self._flux_executor.submit(
|
|
124
|
+
jobspec=jobspec, urgency=self._priority
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
self._future = self._flux_executor.submit(jobspec=jobspec)
|
|
118
128
|
|
|
119
129
|
def shutdown(self, wait: bool = True):
|
|
120
130
|
"""
|