gpustack-runtime 0.1.41__py3-none-any.whl → 0.1.41.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpustack_runtime/_version.py +2 -2
- gpustack_runtime/_version_appendix.py +1 -1
- gpustack_runtime/deployer/cdi/__init__.py +1 -0
- gpustack_runtime/deployer/cdi/__utils__.py +2 -0
- gpustack_runtime/deployer/k8s/deviceplugin/__init__.py +88 -2
- gpustack_runtime/deployer/k8s/deviceplugin/__types__.py +1 -1
- gpustack_runtime/deployer/k8s/deviceplugin/plugin.py +12 -16
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py +1115 -72
- gpustack_runtime/deployer/kuberentes.py +20 -7
- gpustack_runtime/envs.py +11 -9
- {gpustack_runtime-0.1.41.dist-info → gpustack_runtime-0.1.41.post2.dist-info}/METADATA +3 -3
- {gpustack_runtime-0.1.41.dist-info → gpustack_runtime-0.1.41.post2.dist-info}/RECORD +15 -15
- {gpustack_runtime-0.1.41.dist-info → gpustack_runtime-0.1.41.post2.dist-info}/WHEEL +0 -0
- {gpustack_runtime-0.1.41.dist-info → gpustack_runtime-0.1.41.post2.dist-info}/entry_points.txt +0 -0
- {gpustack_runtime-0.1.41.dist-info → gpustack_runtime-0.1.41.post2.dist-info}/licenses/LICENSE +0 -0
gpustack_runtime/_version.py
CHANGED
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.41'
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1, 41)
|
|
30
|
+
__version__ = version = '0.1.41.post2'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 41, 'post2')
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -1 +1 @@
|
|
|
1
|
-
git_commit = "
|
|
1
|
+
git_commit = "ed331dc"
|
|
@@ -4,12 +4,16 @@ import asyncio
|
|
|
4
4
|
import contextlib
|
|
5
5
|
import logging
|
|
6
6
|
import signal
|
|
7
|
+
import stat
|
|
7
8
|
import threading
|
|
8
|
-
from
|
|
9
|
+
from functools import lru_cache
|
|
10
|
+
from typing import TYPE_CHECKING, Literal
|
|
9
11
|
|
|
10
12
|
from .... import envs
|
|
11
13
|
from ....deployer.cdi import dump_config as cdi_dump_config
|
|
12
14
|
from ....detector import ManufacturerEnum, detect_devices, supported_manufacturers
|
|
15
|
+
from ...cdi import Config, manufacturer_to_cdi_kind
|
|
16
|
+
from ..types.kubelet.deviceplugin.v1beta1 import KubeletSocket
|
|
13
17
|
from .__types__ import GroupedError, PluginServer
|
|
14
18
|
from .plugin import SharableDevicePlugin, cdi_kind_to_kdp_resource
|
|
15
19
|
|
|
@@ -61,9 +65,19 @@ async def serve_async(
|
|
|
61
65
|
if not devices:
|
|
62
66
|
continue
|
|
63
67
|
|
|
68
|
+
allocation_policy = _get_device_allocation_policy(manu)
|
|
69
|
+
logger.info(
|
|
70
|
+
"Using device allocation policy '%s' for manufacturer '%s'",
|
|
71
|
+
allocation_policy,
|
|
72
|
+
manu,
|
|
73
|
+
)
|
|
74
|
+
|
|
64
75
|
# Also works if the manufacturer does not have a CDI generator,
|
|
65
76
|
# which means we are relying on other tools to generate CDI specs.
|
|
66
|
-
if
|
|
77
|
+
if (
|
|
78
|
+
allocation_policy == "cdi"
|
|
79
|
+
and envs.GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE
|
|
80
|
+
):
|
|
67
81
|
generated_content, generated_path = cdi_dump_config(
|
|
68
82
|
manufacturer=manu,
|
|
69
83
|
output=cdi_generation_output,
|
|
@@ -95,6 +109,8 @@ async def serve_async(
|
|
|
95
109
|
SharableDevicePlugin(
|
|
96
110
|
device=dev,
|
|
97
111
|
id_by="index" if manu == ManufacturerEnum.ASCEND else "uuid",
|
|
112
|
+
allocation_policy=allocation_policy,
|
|
113
|
+
max_allocations=envs.GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS,
|
|
98
114
|
),
|
|
99
115
|
)
|
|
100
116
|
|
|
@@ -104,6 +120,9 @@ async def serve_async(
|
|
|
104
120
|
logger.info("Stop event triggered, shutting down...")
|
|
105
121
|
return
|
|
106
122
|
|
|
123
|
+
if not kubelet_endpoint:
|
|
124
|
+
kubelet_endpoint = KubeletSocket
|
|
125
|
+
|
|
107
126
|
# Create tasks to start all servers.
|
|
108
127
|
serve_tasks = [
|
|
109
128
|
asyncio.create_task(
|
|
@@ -233,8 +252,75 @@ def serve(
|
|
|
233
252
|
pass
|
|
234
253
|
|
|
235
254
|
|
|
255
|
+
def is_kubelet_socket_accessible(
|
|
256
|
+
kubelet_endpoint: Path | None = None,
|
|
257
|
+
) -> bool:
|
|
258
|
+
"""
|
|
259
|
+
Check if the kubelet socket is accessible.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
kubelet_endpoint:
|
|
263
|
+
The path to the kubelet endpoint.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
True if the socket is accessible, False otherwise.
|
|
267
|
+
|
|
268
|
+
"""
|
|
269
|
+
if not kubelet_endpoint:
|
|
270
|
+
kubelet_endpoint = KubeletSocket
|
|
271
|
+
|
|
272
|
+
if kubelet_endpoint.exists():
|
|
273
|
+
path_stat = kubelet_endpoint.lstat()
|
|
274
|
+
if path_stat and stat.S_ISSOCK(path_stat.st_mode):
|
|
275
|
+
return True
|
|
276
|
+
return False
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@lru_cache
|
|
280
|
+
def _get_device_allocation_policy(
|
|
281
|
+
manufacturer: ManufacturerEnum,
|
|
282
|
+
) -> Literal["env", "cdi", "opaque"]:
|
|
283
|
+
"""
|
|
284
|
+
Get the device allocation policy (in lowercase) for the device plugin.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
manufacturer:
|
|
288
|
+
The manufacturer of the device.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
The device allocation policy.
|
|
292
|
+
|
|
293
|
+
"""
|
|
294
|
+
policy = envs.GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY.lower()
|
|
295
|
+
if policy != "auto":
|
|
296
|
+
return policy
|
|
297
|
+
|
|
298
|
+
cdi_kind = manufacturer_to_cdi_kind(manufacturer)
|
|
299
|
+
|
|
300
|
+
cdi_dir = envs.GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY
|
|
301
|
+
for suffix in ["*.yaml", "*.yml", "*.json"]:
|
|
302
|
+
for file in cdi_dir.glob(suffix):
|
|
303
|
+
with contextlib.suppress(Exception):
|
|
304
|
+
config = Config.from_file(file)
|
|
305
|
+
if config and config.kind == cdi_kind:
|
|
306
|
+
return "cdi"
|
|
307
|
+
|
|
308
|
+
if manufacturer in [
|
|
309
|
+
ManufacturerEnum.AMD,
|
|
310
|
+
# ManufacturerEnum.ASCEND, # Prioritize using Env policy for Ascend.
|
|
311
|
+
ManufacturerEnum.HYGON,
|
|
312
|
+
ManufacturerEnum.ILUVATAR,
|
|
313
|
+
ManufacturerEnum.METAX,
|
|
314
|
+
ManufacturerEnum.THEAD,
|
|
315
|
+
]:
|
|
316
|
+
return "opaque"
|
|
317
|
+
|
|
318
|
+
return "env"
|
|
319
|
+
|
|
320
|
+
|
|
236
321
|
__all__ = [
|
|
237
322
|
"cdi_kind_to_kdp_resource",
|
|
323
|
+
"is_kubelet_socket_accessible",
|
|
238
324
|
"serve",
|
|
239
325
|
"serve_async",
|
|
240
326
|
]
|
|
@@ -11,7 +11,6 @@ import grpc
|
|
|
11
11
|
from grpc_interceptor import AsyncServerInterceptor
|
|
12
12
|
from grpc_interceptor.exceptions import GrpcException
|
|
13
13
|
|
|
14
|
-
from .... import envs
|
|
15
14
|
from ....detector import Device, str_range_to_list
|
|
16
15
|
from ...cdi import (
|
|
17
16
|
generate_config,
|
|
@@ -31,7 +30,6 @@ from ..types.kubelet.deviceplugin.v1beta1 import (
|
|
|
31
30
|
DeviceSpec,
|
|
32
31
|
Empty,
|
|
33
32
|
Healthy,
|
|
34
|
-
KubeletSocket,
|
|
35
33
|
ListAndWatchResponse,
|
|
36
34
|
Mount,
|
|
37
35
|
NUMANode,
|
|
@@ -110,6 +108,10 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
|
|
|
110
108
|
"""
|
|
111
109
|
Controls how the device IDs of the Kubernetes Device Plugin are generated.
|
|
112
110
|
"""
|
|
111
|
+
_allocation_policy: Literal["env", "cdi", "opaque"]
|
|
112
|
+
"""
|
|
113
|
+
Controls the device allocation policy.
|
|
114
|
+
"""
|
|
113
115
|
_max_allocations: int
|
|
114
116
|
"""
|
|
115
117
|
Controls the maximum shards per underlying device.
|
|
@@ -131,7 +133,8 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
|
|
|
131
133
|
self,
|
|
132
134
|
device: Device,
|
|
133
135
|
id_by: Literal["uuid", "index"] = "uuid",
|
|
134
|
-
|
|
136
|
+
allocation_policy: Literal["env", "cdi", "opaque"] = "cdi",
|
|
137
|
+
max_allocations: int | None = 10,
|
|
135
138
|
):
|
|
136
139
|
"""
|
|
137
140
|
Initializes the SharableDevicePlugin.
|
|
@@ -142,19 +145,16 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
|
|
|
142
145
|
id_by:
|
|
143
146
|
Controls how the device IDs of the Kubernetes Device Plugin are generated.
|
|
144
147
|
Either "uuid" or "index". Default is "uuid".
|
|
148
|
+
allocation_policy:
|
|
149
|
+
Controls the device allocation policy.
|
|
145
150
|
max_allocations:
|
|
146
151
|
Controls the maximum allocations per underlying device.
|
|
147
|
-
If None, uses the environment variable `GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS`.
|
|
148
152
|
|
|
149
153
|
"""
|
|
150
154
|
self._device = device
|
|
151
155
|
self._id_by = id_by
|
|
152
|
-
self.
|
|
153
|
-
|
|
154
|
-
self._max_allocations = (
|
|
155
|
-
envs.GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS
|
|
156
|
-
)
|
|
157
|
-
self._max_allocations = max(self._max_allocations, 1)
|
|
156
|
+
self._allocation_policy = allocation_policy
|
|
157
|
+
self._max_allocations = max(max_allocations, 1)
|
|
158
158
|
self._cdi_kind = manufacturer_to_cdi_kind(device.manufacturer)
|
|
159
159
|
self._runtime_env = manufacturer_to_runtime_env(device.manufacturer)
|
|
160
160
|
self._kdp_resource = cdi_kind_to_kdp_resource(
|
|
@@ -229,7 +229,7 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
|
|
|
229
229
|
async def serve(
|
|
230
230
|
self,
|
|
231
231
|
stop_event: asyncio.Event,
|
|
232
|
-
kubelet_endpoint: Path
|
|
232
|
+
kubelet_endpoint: Path,
|
|
233
233
|
start_timeout: int = 5,
|
|
234
234
|
register_timeout: int = 5,
|
|
235
235
|
):
|
|
@@ -247,9 +247,6 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
|
|
|
247
247
|
The timeout in seconds for registering the device plugin.
|
|
248
248
|
|
|
249
249
|
"""
|
|
250
|
-
if not kubelet_endpoint:
|
|
251
|
-
kubelet_endpoint = KubeletSocket
|
|
252
|
-
|
|
253
250
|
resource_name = self._kdp_resource
|
|
254
251
|
endpoint = kubelet_endpoint.parent / f"{resource_name.replace('/', '.')}.sock"
|
|
255
252
|
|
|
@@ -421,8 +418,7 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
|
|
|
421
418
|
self,
|
|
422
419
|
req: ContainerAllocateRequest,
|
|
423
420
|
) -> ContainerAllocateResponse:
|
|
424
|
-
policy =
|
|
425
|
-
|
|
421
|
+
policy = self._allocation_policy
|
|
426
422
|
request_dp_device_ids = req.devices_ids
|
|
427
423
|
|
|
428
424
|
# CDI device allocation.
|