gpustack-runtime 0.1.40.post1__py3-none-any.whl → 0.1.41.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpustack_runtime/__init__.py +1 -1
- gpustack_runtime/__main__.py +5 -3
- gpustack_runtime/_version.py +2 -2
- gpustack_runtime/_version_appendix.py +1 -1
- gpustack_runtime/cmds/__init__.py +5 -3
- gpustack_runtime/cmds/__types__.py +1 -1
- gpustack_runtime/cmds/deployer.py +140 -18
- gpustack_runtime/cmds/detector.py +1 -1
- gpustack_runtime/cmds/images.py +1 -1
- gpustack_runtime/deployer/__init__.py +28 -2
- gpustack_runtime/deployer/__patches__.py +1 -1
- gpustack_runtime/deployer/__types__.py +2 -1
- gpustack_runtime/deployer/__utils__.py +2 -2
- gpustack_runtime/deployer/cdi/__init__.py +86 -5
- gpustack_runtime/deployer/cdi/__types__.py +92 -29
- gpustack_runtime/deployer/cdi/__utils__.py +180 -0
- gpustack_runtime/deployer/cdi/amd.py +146 -0
- gpustack_runtime/deployer/cdi/ascend.py +164 -0
- gpustack_runtime/deployer/cdi/hygon.py +147 -0
- gpustack_runtime/deployer/cdi/iluvatar.py +136 -0
- gpustack_runtime/deployer/cdi/metax.py +148 -0
- gpustack_runtime/deployer/cdi/thead.py +57 -23
- gpustack_runtime/deployer/docker.py +9 -8
- gpustack_runtime/deployer/k8s/deviceplugin/__init__.py +325 -0
- gpustack_runtime/deployer/k8s/deviceplugin/__types__.py +131 -0
- gpustack_runtime/deployer/k8s/deviceplugin/plugin.py +590 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py +3 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto +212 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py +86 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi +168 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py +358 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py +34 -0
- gpustack_runtime/deployer/kuberentes.py +50 -4
- gpustack_runtime/deployer/podman.py +9 -8
- gpustack_runtime/detector/__init__.py +42 -5
- gpustack_runtime/detector/__types__.py +8 -24
- gpustack_runtime/detector/__utils__.py +46 -39
- gpustack_runtime/detector/amd.py +55 -66
- gpustack_runtime/detector/ascend.py +29 -41
- gpustack_runtime/detector/cambricon.py +3 -3
- gpustack_runtime/detector/hygon.py +21 -49
- gpustack_runtime/detector/iluvatar.py +44 -60
- gpustack_runtime/detector/metax.py +54 -37
- gpustack_runtime/detector/mthreads.py +74 -36
- gpustack_runtime/detector/nvidia.py +130 -93
- gpustack_runtime/detector/pyacl/__init__.py +1 -1
- gpustack_runtime/detector/pyamdgpu/__init__.py +1 -1
- gpustack_runtime/detector/pyamdsmi/__init__.py +1 -1
- gpustack_runtime/detector/pycuda/__init__.py +1 -1
- gpustack_runtime/detector/pydcmi/__init__.py +1 -1
- gpustack_runtime/detector/pyhsa/__init__.py +1 -1
- gpustack_runtime/detector/pymxsml/__init__.py +1553 -1
- gpustack_runtime/detector/pyrocmcore/__init__.py +1 -1
- gpustack_runtime/detector/pyrocmsmi/__init__.py +1 -1
- gpustack_runtime/detector/thead.py +41 -60
- gpustack_runtime/envs.py +106 -12
- gpustack_runtime/logging.py +6 -2
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/METADATA +6 -1
- gpustack_runtime-0.1.41.post1.dist-info/RECORD +67 -0
- gpustack_runtime/detector/pymxsml/mxsml.py +0 -1580
- gpustack_runtime/detector/pymxsml/mxsml_extension.py +0 -816
- gpustack_runtime/detector/pymxsml/mxsml_mcm.py +0 -476
- gpustack_runtime-0.1.40.post1.dist-info/RECORD +0 -55
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/WHEEL +0 -0
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/entry_points.txt +0 -0
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/licenses/LICENSE +0 -0
gpustack_runtime/__init__.py
CHANGED
gpustack_runtime/__main__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# PYTHON_ARGCOMPLETE_OK
|
|
2
|
-
from __future__ import annotations
|
|
2
|
+
from __future__ import annotations as __future_annotations__
|
|
3
3
|
|
|
4
4
|
import contextlib
|
|
5
5
|
import sys
|
|
@@ -12,7 +12,6 @@ from gpustack_runner.cmds import LoadImagesSubCommand
|
|
|
12
12
|
from . import deployer, detector
|
|
13
13
|
from ._version import commit_id, version
|
|
14
14
|
from .cmds import (
|
|
15
|
-
CDIGenerateSubCommand,
|
|
16
15
|
CopyImagesSubCommand,
|
|
17
16
|
CreateWorkloadSubCommand,
|
|
18
17
|
DeleteWorkloadsSubCommand,
|
|
@@ -20,6 +19,7 @@ from .cmds import (
|
|
|
20
19
|
DetectDevicesSubCommand,
|
|
21
20
|
ExecSelfSubCommand,
|
|
22
21
|
ExecWorkloadSubCommand,
|
|
22
|
+
GenerateCDIConfigSubCommand,
|
|
23
23
|
GetDevicesTopologySubCommand,
|
|
24
24
|
GetWorkloadSubCommand,
|
|
25
25
|
InspectSelfSubCommand,
|
|
@@ -29,6 +29,7 @@ from .cmds import (
|
|
|
29
29
|
LogsSelfSubCommand,
|
|
30
30
|
LogsWorkloadSubCommand,
|
|
31
31
|
SaveImagesSubCommand,
|
|
32
|
+
ServeDevicePluginSubCommand,
|
|
32
33
|
)
|
|
33
34
|
from .logging import setup_logging
|
|
34
35
|
|
|
@@ -76,7 +77,8 @@ def main():
|
|
|
76
77
|
LogsSelfSubCommand.register(subcommand_parser)
|
|
77
78
|
ExecSelfSubCommand.register(subcommand_parser)
|
|
78
79
|
InspectSelfSubCommand.register(subcommand_parser)
|
|
79
|
-
|
|
80
|
+
GenerateCDIConfigSubCommand.register(subcommand_parser)
|
|
81
|
+
ServeDevicePluginSubCommand.register(subcommand_parser)
|
|
80
82
|
ListImagesSubCommand.register(subcommand_parser)
|
|
81
83
|
SaveImagesSubCommand.register(subcommand_parser)
|
|
82
84
|
LoadImagesSubCommand.register(subcommand_parser)
|
gpustack_runtime/_version.py
CHANGED
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
30
|
+
__version__ = version = '0.1.41.post1'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 41, 'post1')
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -1 +1 @@
|
|
|
1
|
-
git_commit = "
|
|
1
|
+
git_commit = "8671a00"
|
|
@@ -1,18 +1,19 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
1
|
+
from __future__ import annotations as __future_annotations__
|
|
2
2
|
|
|
3
3
|
from .deployer import (
|
|
4
|
-
CDIGenerateSubCommand,
|
|
5
4
|
CreateWorkloadSubCommand,
|
|
6
5
|
DeleteWorkloadsSubCommand,
|
|
7
6
|
DeleteWorkloadSubCommand,
|
|
8
7
|
ExecSelfSubCommand,
|
|
9
8
|
ExecWorkloadSubCommand,
|
|
9
|
+
GenerateCDIConfigSubCommand,
|
|
10
10
|
GetWorkloadSubCommand,
|
|
11
11
|
InspectSelfSubCommand,
|
|
12
12
|
InspectWorkloadSubCommand,
|
|
13
13
|
ListWorkloadsSubCommand,
|
|
14
14
|
LogsSelfSubCommand,
|
|
15
15
|
LogsWorkloadSubCommand,
|
|
16
|
+
ServeDevicePluginSubCommand,
|
|
16
17
|
)
|
|
17
18
|
from .detector import DetectDevicesSubCommand, GetDevicesTopologySubCommand
|
|
18
19
|
from .images import (
|
|
@@ -26,7 +27,6 @@ from .images import (
|
|
|
26
27
|
)
|
|
27
28
|
|
|
28
29
|
__all__ = [
|
|
29
|
-
"CDIGenerateSubCommand",
|
|
30
30
|
"CopyImagesSubCommand",
|
|
31
31
|
"CreateWorkloadSubCommand",
|
|
32
32
|
"DeleteWorkloadSubCommand",
|
|
@@ -34,6 +34,7 @@ __all__ = [
|
|
|
34
34
|
"DetectDevicesSubCommand",
|
|
35
35
|
"ExecSelfSubCommand",
|
|
36
36
|
"ExecWorkloadSubCommand",
|
|
37
|
+
"GenerateCDIConfigSubCommand",
|
|
37
38
|
"GetDevicesTopologySubCommand",
|
|
38
39
|
"GetWorkloadSubCommand",
|
|
39
40
|
"InspectSelfSubCommand",
|
|
@@ -45,6 +46,7 @@ __all__ = [
|
|
|
45
46
|
"LogsWorkloadSubCommand",
|
|
46
47
|
"PlatformedImage",
|
|
47
48
|
"SaveImagesSubCommand",
|
|
49
|
+
"ServeDevicePluginSubCommand",
|
|
48
50
|
"append_images",
|
|
49
51
|
"list_images",
|
|
50
52
|
]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
1
|
+
from __future__ import annotations as __future_annotations__
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
@@ -25,7 +25,10 @@ from ..deployer import (
|
|
|
25
25
|
WorkloadStatusStateEnum,
|
|
26
26
|
async_logs_self,
|
|
27
27
|
async_logs_workload,
|
|
28
|
-
|
|
28
|
+
cdi_available_backends,
|
|
29
|
+
cdi_available_manufacturers,
|
|
30
|
+
cdi_dump_config,
|
|
31
|
+
cdi_supported_manufacturers,
|
|
29
32
|
create_workload,
|
|
30
33
|
delete_workload,
|
|
31
34
|
exec_self,
|
|
@@ -33,9 +36,16 @@ from ..deployer import (
|
|
|
33
36
|
get_workload,
|
|
34
37
|
inspect_self,
|
|
35
38
|
inspect_workload,
|
|
39
|
+
k8s_deviceplugin_serve,
|
|
36
40
|
list_workloads,
|
|
37
41
|
)
|
|
38
|
-
from ..detector import
|
|
42
|
+
from ..detector import (
|
|
43
|
+
ManufacturerEnum,
|
|
44
|
+
available_backends,
|
|
45
|
+
available_manufacturers,
|
|
46
|
+
backend_to_manufacturer,
|
|
47
|
+
manufacturer_to_backend,
|
|
48
|
+
)
|
|
39
49
|
from .__types__ import SubCommand
|
|
40
50
|
|
|
41
51
|
if TYPE_CHECKING:
|
|
@@ -72,6 +82,11 @@ _IGNORE_ENVS_PREFIX = (
|
|
|
72
82
|
"COMMAND_MODE",
|
|
73
83
|
"TMPDIR",
|
|
74
84
|
"GPUSTACK_",
|
|
85
|
+
"SUDO_",
|
|
86
|
+
"HOSTNAME",
|
|
87
|
+
"KUBECONFIG",
|
|
88
|
+
"MAIL",
|
|
89
|
+
"HIS",
|
|
75
90
|
)
|
|
76
91
|
|
|
77
92
|
_IGNORE_ENVS_SUFFIX = (
|
|
@@ -88,7 +103,7 @@ class CreateWorkloadSubCommand(SubCommand):
|
|
|
88
103
|
Command to create a workload deployment.
|
|
89
104
|
"""
|
|
90
105
|
|
|
91
|
-
|
|
106
|
+
manufacturer: ManufacturerEnum
|
|
92
107
|
device: str
|
|
93
108
|
command_script: str | None
|
|
94
109
|
port: int
|
|
@@ -108,11 +123,19 @@ class CreateWorkloadSubCommand(SubCommand):
|
|
|
108
123
|
help="Create a workload deployment",
|
|
109
124
|
)
|
|
110
125
|
|
|
126
|
+
deploy_parser.add_argument(
|
|
127
|
+
"--manufacturer",
|
|
128
|
+
"--manu",
|
|
129
|
+
type=ManufacturerEnum,
|
|
130
|
+
help="Manufacturer to use (default: detect from current environment)",
|
|
131
|
+
choices=list(map(str, available_manufacturers())),
|
|
132
|
+
)
|
|
133
|
+
|
|
111
134
|
deploy_parser.add_argument(
|
|
112
135
|
"--backend",
|
|
113
136
|
type=str,
|
|
114
137
|
help="Backend to use (default: detect from current environment)",
|
|
115
|
-
choices=
|
|
138
|
+
choices=available_backends(),
|
|
116
139
|
)
|
|
117
140
|
|
|
118
141
|
deploy_parser.add_argument(
|
|
@@ -188,7 +211,7 @@ class CreateWorkloadSubCommand(SubCommand):
|
|
|
188
211
|
deploy_parser.set_defaults(func=CreateWorkloadSubCommand)
|
|
189
212
|
|
|
190
213
|
def __init__(self, args: Namespace):
|
|
191
|
-
self.
|
|
214
|
+
self.manufacturer = args.manufacturer
|
|
192
215
|
self.device = args.device
|
|
193
216
|
self.command_script = None
|
|
194
217
|
self.port = args.port
|
|
@@ -201,6 +224,16 @@ class CreateWorkloadSubCommand(SubCommand):
|
|
|
201
224
|
self.volume = args.volume
|
|
202
225
|
self.extra_args = args.extra_args
|
|
203
226
|
|
|
227
|
+
if args.backend:
|
|
228
|
+
if not self.manufacturer:
|
|
229
|
+
self.manufacturer = backend_to_manufacturer(args.backend)
|
|
230
|
+
elif args.backend != manufacturer_to_backend(self.manufacturer):
|
|
231
|
+
msg = (
|
|
232
|
+
f"The backend '{args.backend}' is not compatible with "
|
|
233
|
+
f"the manufacturer '{args.manufacturer}'."
|
|
234
|
+
)
|
|
235
|
+
raise ValueError(msg)
|
|
236
|
+
|
|
204
237
|
if not self.name or not self.image or not self.volume:
|
|
205
238
|
msg = "The name, image, and volume arguments are required."
|
|
206
239
|
raise ValueError(msg)
|
|
@@ -226,12 +259,13 @@ class CreateWorkloadSubCommand(SubCommand):
|
|
|
226
259
|
]
|
|
227
260
|
resources = None
|
|
228
261
|
if self.device:
|
|
229
|
-
if self.
|
|
262
|
+
if self.manufacturer:
|
|
263
|
+
backend = manufacturer_to_backend(self.manufacturer)
|
|
230
264
|
resources = ContainerResources(
|
|
231
265
|
**{
|
|
232
266
|
v: self.device
|
|
233
267
|
for k, v in envs.GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY.items()
|
|
234
|
-
if k ==
|
|
268
|
+
if k == backend
|
|
235
269
|
},
|
|
236
270
|
)
|
|
237
271
|
else:
|
|
@@ -277,6 +311,7 @@ class CreateWorkloadSubCommand(SubCommand):
|
|
|
277
311
|
name=self.name,
|
|
278
312
|
namespace=self.namespace,
|
|
279
313
|
host_network=self.host_network,
|
|
314
|
+
shm_size=10 * 1 << 30, # 10 GiB
|
|
280
315
|
containers=[
|
|
281
316
|
Container(
|
|
282
317
|
restart_policy=(
|
|
@@ -957,23 +992,39 @@ class InspectSelfSubCommand(SubCommand):
|
|
|
957
992
|
print(inspect_self())
|
|
958
993
|
|
|
959
994
|
|
|
960
|
-
class
|
|
995
|
+
class GenerateCDIConfigSubCommand(SubCommand):
|
|
961
996
|
"""
|
|
962
997
|
Command to generate CDI configurations.
|
|
963
998
|
"""
|
|
964
999
|
|
|
1000
|
+
manufacturer: ManufacturerEnum
|
|
965
1001
|
format: str
|
|
966
1002
|
output: Path | None
|
|
967
1003
|
|
|
968
1004
|
@staticmethod
|
|
969
1005
|
def register(parser: _SubParsersAction):
|
|
970
|
-
|
|
971
|
-
"cdi
|
|
972
|
-
help="Generate CDI configurations according to the current environment",
|
|
973
|
-
aliases=["cdi
|
|
1006
|
+
generate_parser = parser.add_parser(
|
|
1007
|
+
"generate-cdi",
|
|
1008
|
+
help="Generate Container Device Interface(CDI) configurations according to the current environment",
|
|
1009
|
+
aliases=["gen-cdi"],
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
generate_parser.add_argument(
|
|
1013
|
+
"--manufacturer",
|
|
1014
|
+
"--manu",
|
|
1015
|
+
type=ManufacturerEnum,
|
|
1016
|
+
help="Manufacturer to generate (default: all)",
|
|
1017
|
+
choices=list(map(str, cdi_available_manufacturers())),
|
|
974
1018
|
)
|
|
975
1019
|
|
|
976
|
-
|
|
1020
|
+
generate_parser.add_argument(
|
|
1021
|
+
"--backend",
|
|
1022
|
+
type=str,
|
|
1023
|
+
help="Backend to generate (default: all)",
|
|
1024
|
+
choices=cdi_available_backends(),
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
generate_parser.add_argument(
|
|
977
1028
|
"--format",
|
|
978
1029
|
type=str,
|
|
979
1030
|
choices=["yaml", "json"],
|
|
@@ -981,18 +1032,29 @@ class CDIGenerateSubCommand(SubCommand):
|
|
|
981
1032
|
help="Format of the CDI configurations",
|
|
982
1033
|
)
|
|
983
1034
|
|
|
984
|
-
|
|
1035
|
+
generate_parser.add_argument(
|
|
985
1036
|
"output",
|
|
986
1037
|
nargs=OPTIONAL,
|
|
987
1038
|
help="Output directory to save CDI configurations (default: current directory)",
|
|
988
1039
|
)
|
|
989
1040
|
|
|
990
|
-
|
|
1041
|
+
generate_parser.set_defaults(func=GenerateCDIConfigSubCommand)
|
|
991
1042
|
|
|
992
1043
|
def __init__(self, args: Namespace):
|
|
1044
|
+
self.manufacturer = args.manufacturer
|
|
993
1045
|
self.format = args.format
|
|
994
1046
|
self.output = Path(args.output) if args.output else None
|
|
995
1047
|
|
|
1048
|
+
if args.backend:
|
|
1049
|
+
if not self.manufacturer:
|
|
1050
|
+
self.manufacturer = backend_to_manufacturer(args.backend)
|
|
1051
|
+
elif args.backend != manufacturer_to_backend(self.manufacturer):
|
|
1052
|
+
msg = (
|
|
1053
|
+
f"The backend '{args.backend}' is not compatible with "
|
|
1054
|
+
f"the manufacturer '{args.manufacturer}'."
|
|
1055
|
+
)
|
|
1056
|
+
raise ValueError(msg)
|
|
1057
|
+
|
|
996
1058
|
if self.output:
|
|
997
1059
|
try:
|
|
998
1060
|
if not self.output.exists():
|
|
@@ -1009,8 +1071,10 @@ class CDIGenerateSubCommand(SubCommand):
|
|
|
1009
1071
|
print("\033[2J\033[H", end="")
|
|
1010
1072
|
|
|
1011
1073
|
generated = False
|
|
1012
|
-
for manu in
|
|
1013
|
-
|
|
1074
|
+
for manu in cdi_supported_manufacturers():
|
|
1075
|
+
if self.manufacturer and self.manufacturer != manu:
|
|
1076
|
+
continue
|
|
1077
|
+
content, path = cdi_dump_config(
|
|
1014
1078
|
manufacturer=manu,
|
|
1015
1079
|
output=self.output,
|
|
1016
1080
|
)
|
|
@@ -1027,6 +1091,64 @@ class CDIGenerateSubCommand(SubCommand):
|
|
|
1027
1091
|
print("No CDI configurations were generated.")
|
|
1028
1092
|
|
|
1029
1093
|
|
|
1094
|
+
class ServeDevicePluginSubCommand(SubCommand):
|
|
1095
|
+
"""
|
|
1096
|
+
Command to serve Device Plugin.
|
|
1097
|
+
"""
|
|
1098
|
+
|
|
1099
|
+
manufacturer: ManufacturerEnum
|
|
1100
|
+
|
|
1101
|
+
@staticmethod
|
|
1102
|
+
def register(parser: _SubParsersAction):
|
|
1103
|
+
serve_parser = parser.add_parser(
|
|
1104
|
+
"serve-device-plugin",
|
|
1105
|
+
help="Serve Device Plugin according to the current environment",
|
|
1106
|
+
aliases=[
|
|
1107
|
+
"start-device-plugin",
|
|
1108
|
+
"serve-dp",
|
|
1109
|
+
"start-dp",
|
|
1110
|
+
],
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
serve_parser.add_argument(
|
|
1114
|
+
"--manufacturer",
|
|
1115
|
+
"--manu",
|
|
1116
|
+
type=ManufacturerEnum,
|
|
1117
|
+
help="Manufacturer to generate (default: all)",
|
|
1118
|
+
choices=list(map(str, cdi_available_manufacturers())),
|
|
1119
|
+
)
|
|
1120
|
+
|
|
1121
|
+
serve_parser.add_argument(
|
|
1122
|
+
"--backend",
|
|
1123
|
+
type=str,
|
|
1124
|
+
help="Backend to generate (default: all)",
|
|
1125
|
+
choices=cdi_available_backends(),
|
|
1126
|
+
)
|
|
1127
|
+
|
|
1128
|
+
serve_parser.set_defaults(func=ServeDevicePluginSubCommand)
|
|
1129
|
+
|
|
1130
|
+
def __init__(self, args: Namespace):
|
|
1131
|
+
self.manufacturer = args.manufacturer
|
|
1132
|
+
|
|
1133
|
+
if args.backend:
|
|
1134
|
+
if not self.manufacturer:
|
|
1135
|
+
self.manufacturer = backend_to_manufacturer(args.backend)
|
|
1136
|
+
elif args.backend != manufacturer_to_backend(self.manufacturer):
|
|
1137
|
+
msg = (
|
|
1138
|
+
f"The backend '{args.backend}' is not compatible with "
|
|
1139
|
+
f"the manufacturer '{args.manufacturer}'."
|
|
1140
|
+
)
|
|
1141
|
+
raise ValueError(msg)
|
|
1142
|
+
|
|
1143
|
+
def run(self):
|
|
1144
|
+
print("\033[2J\033[H", end="")
|
|
1145
|
+
|
|
1146
|
+
k8s_deviceplugin_serve(
|
|
1147
|
+
manufacturer=self.manufacturer,
|
|
1148
|
+
cdi_generation_output=envs.GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY,
|
|
1149
|
+
)
|
|
1150
|
+
|
|
1151
|
+
|
|
1030
1152
|
def format_workloads_json(sts: list[WorkloadStatus]) -> str:
|
|
1031
1153
|
return json.dumps([st.to_dict() for st in sts], indent=2)
|
|
1032
1154
|
|
gpustack_runtime/cmds/images.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
1
|
+
from __future__ import annotations as __future_annotations__
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
@@ -31,12 +31,32 @@ from .__types__ import (
|
|
|
31
31
|
WorkloadStatus,
|
|
32
32
|
WorkloadStatusStateEnum,
|
|
33
33
|
)
|
|
34
|
-
from .cdi import
|
|
34
|
+
from .cdi import (
|
|
35
|
+
available_backends as cdi_available_backends,
|
|
36
|
+
)
|
|
37
|
+
from .cdi import (
|
|
38
|
+
available_manufacturers as cdi_available_manufacturers,
|
|
39
|
+
)
|
|
40
|
+
from .cdi import (
|
|
41
|
+
dump_config as cdi_dump_config,
|
|
42
|
+
)
|
|
43
|
+
from .cdi import (
|
|
44
|
+
generate_config as cdi_generate_config,
|
|
45
|
+
)
|
|
46
|
+
from .cdi import (
|
|
47
|
+
supported_manufacturers as cdi_supported_manufacturers,
|
|
48
|
+
)
|
|
35
49
|
from .docker import (
|
|
36
50
|
DockerDeployer,
|
|
37
51
|
DockerWorkloadPlan,
|
|
38
52
|
DockerWorkloadStatus,
|
|
39
53
|
)
|
|
54
|
+
from .k8s.deviceplugin import (
|
|
55
|
+
serve as k8s_deviceplugin_serve,
|
|
56
|
+
)
|
|
57
|
+
from .k8s.deviceplugin import (
|
|
58
|
+
serve_async as k8s_deviceplugin_serve_async,
|
|
59
|
+
)
|
|
40
60
|
from .kuberentes import (
|
|
41
61
|
KubernetesDeployer,
|
|
42
62
|
KubernetesWorkloadPlan,
|
|
@@ -603,7 +623,11 @@ __all__ = [
|
|
|
603
623
|
"WorkloadStatusStateEnum",
|
|
604
624
|
"async_logs_self",
|
|
605
625
|
"async_logs_workload",
|
|
626
|
+
"cdi_available_backends",
|
|
627
|
+
"cdi_available_manufacturers",
|
|
628
|
+
"cdi_dump_config",
|
|
606
629
|
"cdi_generate_config",
|
|
630
|
+
"cdi_supported_manufacturers",
|
|
607
631
|
"create_workload",
|
|
608
632
|
"delete_workload",
|
|
609
633
|
"exec_self",
|
|
@@ -611,6 +635,8 @@ __all__ = [
|
|
|
611
635
|
"get_workload",
|
|
612
636
|
"inspect_self",
|
|
613
637
|
"inspect_workload",
|
|
638
|
+
"k8s_deviceplugin_serve",
|
|
639
|
+
"k8s_deviceplugin_serve_async",
|
|
614
640
|
"list_workloads",
|
|
615
641
|
"logs_self",
|
|
616
642
|
"logs_workload",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
1
|
+
from __future__ import annotations as __future_annotations__
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import atexit
|
|
@@ -1400,6 +1400,7 @@ class Deployer(ABC):
|
|
|
1400
1400
|
valued_uuid = (
|
|
1401
1401
|
ren
|
|
1402
1402
|
in envs.GPUSTACK_RUNTIME_DEPLOY_RUNTIME_VISIBLE_DEVICES_VALUE_UUID
|
|
1403
|
+
and manu != ManufacturerEnum.ASCEND
|
|
1403
1404
|
)
|
|
1404
1405
|
dev_uuids: list[str] = []
|
|
1405
1406
|
dev_indexes: list[str] = []
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
1
|
+
from __future__ import annotations as __future_annotations__
|
|
2
2
|
|
|
3
3
|
import base64
|
|
4
4
|
import enum
|
|
@@ -176,7 +176,7 @@ def correct_runner_image(
|
|
|
176
176
|
return corrected_image, True
|
|
177
177
|
|
|
178
178
|
|
|
179
|
-
@lru_cache
|
|
179
|
+
@lru_cache(maxsize=1)
|
|
180
180
|
def _get_backend() -> str:
|
|
181
181
|
"""
|
|
182
182
|
Get the first detected backend name.
|
|
@@ -1,7 +1,15 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
1
|
+
from __future__ import annotations as __future_annotations__
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING, Literal
|
|
4
4
|
|
|
5
|
+
from ...detector import Device, manufacturer_to_backend
|
|
6
|
+
from ...detector import supported_manufacturers as detector_supported_manufacturers
|
|
7
|
+
from .__types__ import Config, manufacturer_to_cdi_kind, manufacturer_to_runtime_env
|
|
8
|
+
from .amd import AMDGenerator
|
|
9
|
+
from .ascend import AscendGenerator
|
|
10
|
+
from .hygon import HygonGenerator
|
|
11
|
+
from .iluvatar import IluvatarGenerator
|
|
12
|
+
from .metax import MetaXGenerator
|
|
5
13
|
from .thead import THeadGenerator
|
|
6
14
|
|
|
7
15
|
if TYPE_CHECKING:
|
|
@@ -10,8 +18,12 @@ if TYPE_CHECKING:
|
|
|
10
18
|
from ...detector import ManufacturerEnum
|
|
11
19
|
from .__types__ import Generator
|
|
12
20
|
|
|
13
|
-
|
|
14
21
|
_GENERATORS: list[Generator] = [
|
|
22
|
+
AMDGenerator(),
|
|
23
|
+
AscendGenerator(),
|
|
24
|
+
HygonGenerator(),
|
|
25
|
+
IluvatarGenerator(),
|
|
26
|
+
MetaXGenerator(),
|
|
15
27
|
THeadGenerator(),
|
|
16
28
|
]
|
|
17
29
|
"""
|
|
@@ -26,13 +38,13 @@ Mapping from manufacturer to CDI generator.
|
|
|
26
38
|
"""
|
|
27
39
|
|
|
28
40
|
|
|
29
|
-
def
|
|
30
|
-
manufacturer: ManufacturerEnum
|
|
41
|
+
def dump_config(
|
|
42
|
+
manufacturer: ManufacturerEnum,
|
|
31
43
|
output: Path | None = None,
|
|
32
44
|
_format: Literal["yaml", "json"] = "yaml",
|
|
33
45
|
) -> tuple[str | None, str | None]:
|
|
34
46
|
"""
|
|
35
|
-
|
|
47
|
+
Dump the CDI configuration.
|
|
36
48
|
|
|
37
49
|
Args:
|
|
38
50
|
manufacturer:
|
|
@@ -76,6 +88,75 @@ def generate_config(
|
|
|
76
88
|
return expected, str(cdi_path)
|
|
77
89
|
|
|
78
90
|
|
|
91
|
+
def generate_config(
|
|
92
|
+
device: Device,
|
|
93
|
+
) -> Config | None:
|
|
94
|
+
"""
|
|
95
|
+
Generate the CDI configuration for the given devices.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
device:
|
|
99
|
+
The detected device.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
The Config object, or None if not supported.
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
gen = _GENERATORS_MAP.get(device.manufacturer)
|
|
106
|
+
if not gen:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
cfg = gen.generate(devices=[device])
|
|
110
|
+
return cfg
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def available_manufacturers() -> list[ManufacturerEnum]:
|
|
114
|
+
"""
|
|
115
|
+
Get a list of available manufacturers,
|
|
116
|
+
which allow CDI generation,
|
|
117
|
+
regardless of whether they are supported or not.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
A list of available manufacturers.
|
|
121
|
+
|
|
122
|
+
"""
|
|
123
|
+
return list(_GENERATORS_MAP.keys())
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def supported_manufacturers() -> list[ManufacturerEnum]:
|
|
127
|
+
"""
|
|
128
|
+
Get a list of supported manufacturers,
|
|
129
|
+
which allow CDI generation,
|
|
130
|
+
and must be supported in the current environment.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
A list of supported manufacturers.
|
|
134
|
+
|
|
135
|
+
"""
|
|
136
|
+
manus = detector_supported_manufacturers()
|
|
137
|
+
return [manu for manu in manus if manu in _GENERATORS_MAP]
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def available_backends() -> list[str]:
|
|
141
|
+
"""
|
|
142
|
+
Get a list of available backends,
|
|
143
|
+
which allow CDI generation,
|
|
144
|
+
regardless of whether they are supported or not.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
A list of available backends.
|
|
148
|
+
|
|
149
|
+
"""
|
|
150
|
+
return [manufacturer_to_backend(manu) for manu in _GENERATORS_MAP]
|
|
151
|
+
|
|
152
|
+
|
|
79
153
|
__all__ = [
|
|
154
|
+
"Config",
|
|
155
|
+
"available_backends",
|
|
156
|
+
"available_manufacturers",
|
|
157
|
+
"dump_config",
|
|
80
158
|
"generate_config",
|
|
159
|
+
"manufacturer_to_cdi_kind",
|
|
160
|
+
"manufacturer_to_runtime_env",
|
|
161
|
+
"supported_manufacturers",
|
|
81
162
|
]
|