gpustack-runtime 0.1.41.post1__py3-none-any.whl → 0.1.41.post3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
27
27
  __commit_id__: COMMIT_ID
28
28
  commit_id: COMMIT_ID
29
29
 
30
- __version__ = version = '0.1.41.post1'
31
- __version_tuple__ = version_tuple = (0, 1, 41, 'post1')
30
+ __version__ = version = '0.1.41.post3'
31
+ __version_tuple__ = version_tuple = (0, 1, 41, 'post3')
32
32
  try:
33
33
  from ._version_appendix import git_commit
34
34
  __commit_id__ = commit_id = git_commit
@@ -1 +1 @@
1
- git_commit = "8671a00"
1
+ git_commit = "3840255"
@@ -65,7 +65,7 @@ async def serve_async(
65
65
  if not devices:
66
66
  continue
67
67
 
68
- allocation_policy = _get_device_allocation_policy(manu)
68
+ allocation_policy = get_device_allocation_policy(manu)
69
69
  logger.info(
70
70
  "Using device allocation policy '%s' for manufacturer '%s'",
71
71
  allocation_policy,
@@ -110,6 +110,7 @@ async def serve_async(
110
110
  device=dev,
111
111
  id_by="index" if manu == ManufacturerEnum.ASCEND else "uuid",
112
112
  allocation_policy=allocation_policy,
113
+ max_allocations=envs.GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS,
113
114
  ),
114
115
  )
115
116
 
@@ -276,7 +277,23 @@ def is_kubelet_socket_accessible(
276
277
 
277
278
 
278
279
  @lru_cache
279
- def _get_device_allocation_policy(
280
+ def get_resource_injection_policy() -> Literal["env", "kdp"]:
281
+ """
282
+ Get the resource injection policy (in lowercase) for the deployer.
283
+
284
+ Returns:
285
+ The resource injection policy.
286
+
287
+ """
288
+ policy = envs.GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY.lower()
289
+ if policy != "auto":
290
+ return policy
291
+
292
+ return "kdp" if is_kubelet_socket_accessible() else "env"
293
+
294
+
295
+ @lru_cache
296
+ def get_device_allocation_policy(
280
297
  manufacturer: ManufacturerEnum,
281
298
  ) -> Literal["env", "cdi", "opaque"]:
282
299
  """
@@ -319,6 +336,8 @@ def _get_device_allocation_policy(
319
336
 
320
337
  __all__ = [
321
338
  "cdi_kind_to_kdp_resource",
339
+ "get_device_allocation_policy",
340
+ "get_resource_injection_policy",
322
341
  "is_kubelet_socket_accessible",
323
342
  "serve",
324
343
  "serve_async",
@@ -11,7 +11,6 @@ import grpc
11
11
  from grpc_interceptor import AsyncServerInterceptor
12
12
  from grpc_interceptor.exceptions import GrpcException
13
13
 
14
- from .... import envs
15
14
  from ....detector import Device, str_range_to_list
16
15
  from ...cdi import (
17
16
  generate_config,
@@ -135,7 +134,7 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
135
134
  device: Device,
136
135
  id_by: Literal["uuid", "index"] = "uuid",
137
136
  allocation_policy: Literal["env", "cdi", "opaque"] = "cdi",
138
- max_allocations: int | None = None,
137
+ max_allocations: int | None = 10,
139
138
  ):
140
139
  """
141
140
  Initializes the SharableDevicePlugin.
@@ -150,18 +149,12 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
150
149
  Controls the device allocation policy.
151
150
  max_allocations:
152
151
  Controls the maximum allocations per underlying device.
153
- If None, uses the environment variable `GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS`.
154
152
 
155
153
  """
156
154
  self._device = device
157
155
  self._id_by = id_by
158
156
  self._allocation_policy = allocation_policy
159
- self._max_allocations = max_allocations
160
- if not self._max_allocations:
161
- self._max_allocations = (
162
- envs.GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS
163
- )
164
- self._max_allocations = max(self._max_allocations, 1)
157
+ self._max_allocations = max(max_allocations, 1)
165
158
  self._cdi_kind = manufacturer_to_cdi_kind(device.manufacturer)
166
159
  self._runtime_env = manufacturer_to_runtime_env(device.manufacturer)
167
160
  self._kdp_resource = cdi_kind_to_kdp_resource(
@@ -425,8 +418,7 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
425
418
  self,
426
419
  req: ContainerAllocateRequest,
427
420
  ) -> ContainerAllocateResponse:
428
- policy = envs.GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY.lower()
429
-
421
+ policy = self._allocation_policy
430
422
  request_dp_device_ids = req.devices_ids
431
423
 
432
424
  # CDI device allocation.