gpustack-runtime 0.1.39.post1__py3-none-any.whl → 0.1.39.post3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  import contextlib
4
4
  import logging
5
+ import math
6
+ import time
5
7
  from _ctypes import byref
6
8
  from functools import lru_cache
7
9
  from math import ceil
@@ -76,7 +78,7 @@ class NVIDIADetector(Detector):
76
78
  def __init__(self):
77
79
  super().__init__(ManufacturerEnum.NVIDIA)
78
80
 
79
- def detect(self) -> Devices | None:
81
+ def detect(self) -> Devices | None: # noqa: PLR0915
80
82
  """
81
83
  Detect NVIDIA GPUs using pynvml.
82
84
 
@@ -125,103 +127,110 @@ class NVIDIADetector(Detector):
125
127
  for dev_idx in range(dev_count):
126
128
  dev = pynvml.nvmlDeviceGetHandleByIndex(dev_idx)
127
129
 
128
- dev_index = dev_idx
129
- if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
130
- if dev_files is None:
131
- dev_files = get_device_files(pattern=r"nvidia(?P<number>\d+)")
132
- if len(dev_files) >= dev_count:
133
- dev_file = dev_files[dev_idx]
134
- if dev_file.number is not None:
135
- dev_index = dev_file.number
136
- dev_uuid = pynvml.nvmlDeviceGetUUID(dev)
137
-
138
- dev_cores = None
139
- if not envs.GPUSTACK_RUNTIME_DETECT_NO_TOOLKIT_CALL:
140
- with contextlib.suppress(pycuda.CUDAError):
141
- dev_gpudev = pycuda.cuDeviceGet(dev_idx)
142
- dev_cores = pycuda.cuDeviceGetAttribute(
143
- dev_gpudev,
144
- pycuda.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
145
- )
130
+ dev_cc_t = pynvml.nvmlDeviceGetCudaComputeCapability(dev)
131
+ dev_cc = ".".join(map(str, dev_cc_t))
146
132
 
147
- dev_mem = 0
148
- dev_mem_used = 0
133
+ dev_bdf = None
149
134
  with contextlib.suppress(pynvml.NVMLError):
150
- dev_mem_info = pynvml.nvmlDeviceGetMemoryInfo(dev)
151
- dev_mem = byte_to_mebibyte( # byte to MiB
152
- dev_mem_info.total,
153
- )
154
- dev_mem_used = byte_to_mebibyte( # byte to MiB
155
- dev_mem_info.used,
156
- )
157
- if dev_mem == 0:
158
- dev_mem, dev_mem_used = get_memory()
135
+ dev_pci_info = pynvml.nvmlDeviceGetPciInfo(dev)
136
+ dev_bdf = str(dev_pci_info.busIdLegacy).lower()
159
137
 
160
- dev_cores_util = None
161
- with contextlib.suppress(pynvml.NVMLError):
162
- dev_util_rates = pynvml.nvmlDeviceGetUtilizationRates(dev)
163
- dev_cores_util = dev_util_rates.gpu
164
- if dev_cores_util is None:
165
- debug_log_warning(
166
- logger,
167
- "Failed to get device %d cores utilization, setting to 0",
168
- dev_index,
169
- )
170
- dev_cores_util = 0
171
-
172
- dev_temp = None
138
+ dev_mig_mode = pynvml.NVML_DEVICE_MIG_DISABLE
173
139
  with contextlib.suppress(pynvml.NVMLError):
174
- dev_temp = pynvml.nvmlDeviceGetTemperature(
175
- dev,
176
- pynvml.NVML_TEMPERATURE_GPU,
177
- )
140
+ dev_mig_mode, _ = pynvml.nvmlDeviceGetMigMode(dev)
178
141
 
179
- dev_power = None
180
- dev_power_used = None
181
- with contextlib.suppress(pynvml.NVMLError):
182
- dev_power = pynvml.nvmlDeviceGetPowerManagementDefaultLimit(dev)
183
- dev_power = dev_power // 1000 # mW to W
184
- dev_power_used = (
185
- pynvml.nvmlDeviceGetPowerUsage(dev) // 1000
186
- ) # mW to W
142
+ # With MIG disabled, treat as a single device.
143
+ if dev_mig_mode == pynvml.NVML_DEVICE_MIG_DISABLE:
144
+ dev_index = dev_idx
145
+ if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
146
+ if dev_files is None:
147
+ dev_files = get_device_files(
148
+ pattern=r"nvidia(?P<number>\d+)",
149
+ )
150
+ if len(dev_files) >= dev_count:
151
+ dev_file = dev_files[dev_idx]
152
+ if dev_file.number is not None:
153
+ dev_index = dev_file.number
187
154
 
188
- dev_cc_t = pynvml.nvmlDeviceGetCudaComputeCapability(dev)
189
- dev_cc = ".".join(map(str, dev_cc_t))
155
+ dev_name = pynvml.nvmlDeviceGetName(dev)
190
156
 
191
- dev_is_vgpu = False
192
- dev_pci_info = pynvml.nvmlDeviceGetPciInfo(dev)
193
- for addr in [dev_pci_info.busIdLegacy, dev_pci_info.busId]:
194
- if addr in pci_devs:
195
- dev_is_vgpu = _is_vgpu(pci_devs[addr].config)
196
- break
157
+ dev_uuid = pynvml.nvmlDeviceGetUUID(dev)
158
+
159
+ dev_cores = None
160
+ if not envs.GPUSTACK_RUNTIME_DETECT_NO_TOOLKIT_CALL:
161
+ with contextlib.suppress(pycuda.CUDAError):
162
+ dev_gpudev = pycuda.cuDeviceGet(dev_idx)
163
+ dev_cores = pycuda.cuDeviceGetAttribute(
164
+ dev_gpudev,
165
+ pycuda.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
166
+ )
167
+
168
+ dev_cores_util = _get_sm_util_from_gpm_metrics(dev)
169
+ if dev_cores_util is None:
170
+ with contextlib.suppress(pynvml.NVMLError):
171
+ dev_util_rates = pynvml.nvmlDeviceGetUtilizationRates(dev)
172
+ dev_cores_util = dev_util_rates.gpu
173
+ if dev_cores_util is None:
174
+ debug_log_warning(
175
+ logger,
176
+ "Failed to get device %d cores utilization, setting to 0",
177
+ dev_index,
178
+ )
179
+ dev_cores_util = 0
197
180
 
198
- dev_appendix = {
199
- "arch_family": _get_arch_family(dev_cc_t),
200
- "vgpu": dev_is_vgpu,
201
- "bdf": str(dev_pci_info.busIdLegacy).lower(),
202
- }
181
+ dev_mem = 0
182
+ dev_mem_used = 0
183
+ with contextlib.suppress(pynvml.NVMLError):
184
+ dev_mem_info = pynvml.nvmlDeviceGetMemoryInfo(dev)
185
+ dev_mem = byte_to_mebibyte( # byte to MiB
186
+ dev_mem_info.total,
187
+ )
188
+ dev_mem_used = byte_to_mebibyte( # byte to MiB
189
+ dev_mem_info.used,
190
+ )
191
+ if dev_mem == 0:
192
+ dev_mem, dev_mem_used = get_memory()
203
193
 
204
- with contextlib.suppress(pynvml.NVMLError):
205
- dev_fabric = pynvml.c_nvmlGpuFabricInfoV_t()
206
- r = pynvml.nvmlDeviceGetGpuFabricInfoV(dev, byref(dev_fabric))
207
- if r != pynvml.NVML_SUCCESS:
208
- dev_fabric = None
209
- if dev_fabric.state != pynvml.NVML_GPU_FABRIC_STATE_COMPLETED:
210
- dev_fabric = None
211
- if dev_fabric:
212
- dev_appendix["fabric_cluster_uuid"] = stringify_uuid(
213
- bytes(dev_fabric.clusterUuid),
194
+ dev_temp = None
195
+ with contextlib.suppress(pynvml.NVMLError):
196
+ dev_temp = pynvml.nvmlDeviceGetTemperature(
197
+ dev,
198
+ pynvml.NVML_TEMPERATURE_GPU,
214
199
  )
215
- dev_appendix["fabric_clique_id"] = dev_fabric.cliqueId
216
200
 
217
- dev_mig_mode = pynvml.NVML_DEVICE_MIG_DISABLE
218
- with contextlib.suppress(pynvml.NVMLError):
219
- dev_mig_mode, _ = pynvml.nvmlDeviceGetMigMode(dev)
201
+ dev_power = None
202
+ dev_power_used = None
203
+ with contextlib.suppress(pynvml.NVMLError):
204
+ dev_power = pynvml.nvmlDeviceGetPowerManagementDefaultLimit(dev)
205
+ dev_power = dev_power // 1000 # mW to W
206
+ dev_power_used = (
207
+ pynvml.nvmlDeviceGetPowerUsage(dev) // 1000
208
+ ) # mW to W
209
+
210
+ dev_is_vgpu = False
211
+ if dev_bdf and dev_bdf in pci_devs:
212
+ dev_is_vgpu = _is_vgpu(pci_devs[dev_bdf].config)
213
+
214
+ dev_appendix = {
215
+ "arch_family": _get_arch_family(dev_cc_t),
216
+ "vgpu": dev_is_vgpu,
217
+ }
218
+ if dev_bdf:
219
+ dev_appendix["bdf"] = dev_bdf
220
220
 
221
- # If MIG is not enabled, return the GPU itself.
221
+ with contextlib.suppress(pynvml.NVMLError):
222
+ dev_fabric = pynvml.c_nvmlGpuFabricInfoV_t()
223
+ r = pynvml.nvmlDeviceGetGpuFabricInfoV(dev, byref(dev_fabric))
224
+ if r != pynvml.NVML_SUCCESS:
225
+ dev_fabric = None
226
+ if dev_fabric.state != pynvml.NVML_GPU_FABRIC_STATE_COMPLETED:
227
+ dev_fabric = None
228
+ if dev_fabric:
229
+ dev_appendix["fabric_cluster_uuid"] = stringify_uuid(
230
+ bytes(dev_fabric.clusterUuid),
231
+ )
232
+ dev_appendix["fabric_clique_id"] = dev_fabric.cliqueId
222
233
 
223
- if dev_mig_mode == pynvml.NVML_DEVICE_MIG_DISABLE:
224
- dev_name = pynvml.nvmlDeviceGetName(dev)
225
234
  ret.append(
226
235
  Device(
227
236
  manufacturer=self.manufacturer,
@@ -283,13 +292,20 @@ class NVIDIADetector(Detector):
283
292
  pynvml.nvmlDeviceGetPowerUsage(mdev) // 1000
284
293
  ) # mW to W
285
294
 
286
- mdev_appendix = dev_appendix.copy()
295
+ mdev_appendix = {
296
+ "arch_family": _get_arch_family(dev_cc_t),
297
+ "vgpu": True,
298
+ }
299
+ if dev_bdf:
300
+ mdev_appendix["bdf"] = dev_bdf
287
301
 
288
302
  mdev_gi_id = pynvml.nvmlDeviceGetGpuInstanceId(mdev)
289
303
  mdev_appendix["gpu_instance_id"] = mdev_gi_id
290
304
  mdev_ci_id = pynvml.nvmlDeviceGetComputeInstanceId(mdev)
291
305
  mdev_appendix["compute_instance_id"] = mdev_ci_id
292
306
 
307
+ mdev_cores_util = _get_sm_util_from_gpm_metrics(dev, mdev_gi_id)
308
+
293
309
  if not mdev_name:
294
310
  mdev_attrs = pynvml.nvmlDeviceGetAttributes(mdev)
295
311
 
@@ -374,6 +390,7 @@ class NVIDIADetector(Detector):
374
390
  runtime_version_original=sys_runtime_ver_original,
375
391
  compute_capability=dev_cc,
376
392
  cores=mdev_cores,
393
+ cores_utilization=mdev_cores_util,
377
394
  memory=mdev_mem,
378
395
  memory_used=mdev_mem_used,
379
396
  memory_utilization=get_utilization(mdev_mem_used, mdev_mem),
@@ -492,6 +509,97 @@ class NVIDIADetector(Detector):
492
509
  return ret
493
510
 
494
511
 
512
+ def _get_gpm_metrics(
513
+ metrics: list[int],
514
+ dev: pynvml.c_nvmlDevice_t,
515
+ gpu_instance_id: int | None = None,
516
+ interval: float = 0.1,
517
+ ) -> list[pynvml.c_nvmlGpmMetric_t] | None:
518
+ """
519
+ Get GPM metrics for a device or a MIG GPU instance.
520
+
521
+ Args:
522
+ metrics:
523
+ A list of GPM metric IDs to query.
524
+ dev:
525
+ The NVML device handle.
526
+ gpu_instance_id:
527
+ The GPU instance ID for MIG devices.
528
+ interval:
529
+ Interval in seconds between two samples.
530
+
531
+ Returns:
532
+ A list of GPM metric structures, or None if failed.
533
+
534
+ """
535
+ try:
536
+ dev_gpm_support = pynvml.nvmlGpmQueryDeviceSupport(dev)
537
+ if not bool(dev_gpm_support.isSupportedDevice):
538
+ return None
539
+ except pynvml.NVMLError:
540
+ debug_log_warning(logger, "Unsupported GPM query")
541
+ return None
542
+
543
+ dev_gpm_metrics = pynvml.c_nvmlGpmMetricsGet_t()
544
+ try:
545
+ dev_gpm_metrics.sample1 = pynvml.nvmlGpmSampleAlloc()
546
+ dev_gpm_metrics.sample2 = pynvml.nvmlGpmSampleAlloc()
547
+ if gpu_instance_id is None:
548
+ pynvml.nvmlGpmSampleGet(dev, dev_gpm_metrics.sample1)
549
+ time.sleep(interval)
550
+ pynvml.nvmlGpmSampleGet(dev, dev_gpm_metrics.sample2)
551
+ else:
552
+ pynvml.nvmlGpmMigSampleGet(dev, gpu_instance_id, dev_gpm_metrics.sample1)
553
+ time.sleep(interval)
554
+ pynvml.nvmlGpmMigSampleGet(dev, gpu_instance_id, dev_gpm_metrics.sample2)
555
+ dev_gpm_metrics.version = pynvml.NVML_GPM_METRICS_GET_VERSION
556
+ dev_gpm_metrics.numMetrics = len(metrics)
557
+ for metric_idx, metric in enumerate(metrics):
558
+ dev_gpm_metrics.metrics[metric_idx].metricId = metric
559
+ pynvml.nvmlGpmMetricsGet(dev_gpm_metrics)
560
+ except pynvml.NVMLError:
561
+ debug_log_exception(logger, "Failed to get GPM metrics")
562
+ return None
563
+ finally:
564
+ if dev_gpm_metrics.sample1:
565
+ pynvml.nvmlGpmSampleFree(dev_gpm_metrics.sample1)
566
+ if dev_gpm_metrics.sample2:
567
+ pynvml.nvmlGpmSampleFree(dev_gpm_metrics.sample2)
568
+ return list(dev_gpm_metrics.metrics)
569
+
570
+
571
+ def _get_sm_util_from_gpm_metrics(
572
+ dev: pynvml.c_nvmlDevice_t,
573
+ gpu_instance_id: int | None = None,
574
+ interval: float = 0.1,
575
+ ) -> int | None:
576
+ """
577
+ Get SM utilization from GPM metrics.
578
+
579
+ Args:
580
+ dev:
581
+ The NVML device handle.
582
+ gpu_instance_id:
583
+ The GPU instance ID for MIG devices.
584
+ interval:
585
+ Interval in seconds between two samples.
586
+
587
+ Returns:
588
+ The SM utilization as an integer percentage, or None if failed.
589
+
590
+ """
591
+ dev_gpm_metrics = _get_gpm_metrics(
592
+ metrics=[pynvml.NVML_GPM_METRIC_SM_UTIL],
593
+ dev=dev,
594
+ gpu_instance_id=gpu_instance_id,
595
+ interval=interval,
596
+ )
597
+ if dev_gpm_metrics and not math.isnan(dev_gpm_metrics[0].value):
598
+ return int(dev_gpm_metrics[0].value)
599
+
600
+ return None
601
+
602
+
495
603
  def _get_arch_family(dev_cc_t: list[int]) -> str:
496
604
  """
497
605
  Get the architecture family based on the CUDA compute capability.
@@ -446,13 +446,13 @@ def has_agent_get_asic_family_id(agent):
446
446
 
447
447
  @dataclass
448
448
  class Agent:
449
- device_type: int
450
- device_id: str
451
- uuid: str
452
- name: str
453
- compute_capability: str
454
- compute_units: int
455
- asic_family_id: int | None
449
+ device_type: int | None = None
450
+ device_id: str | None = None
451
+ uuid: str | None = None
452
+ name: str | None = None
453
+ compute_capability: str | None = None
454
+ compute_units: int | None = None
455
+ asic_family_id: int | None = None
456
456
 
457
457
 
458
458
  def get_agents() -> list[Agent]:
@@ -223,15 +223,9 @@ def rsmi_dev_target_graphics_version_get(device=0):
223
223
  c_version = c_uint64()
224
224
  ret = rocmsmiLib.rsmi_dev_target_graphics_version_get(device, byref(c_version))
225
225
  _rocmsmiCheckReturn(ret)
226
- version = str(c_version.value)
227
- if len(version) == 4:
228
- dev_name = rsmi_dev_name_get(device)
229
- if "Instinct MI2" in dev_name:
230
- hex_part = str(hex(int(version[2:]))).replace("0x", "")
231
- version = version[:2] + hex_part
232
- else:
233
- version = str(c_version.value // 10 + c_version.value % 10)
234
- return "gfx" + version
226
+ if c_version.value < 2000:
227
+ return "gfx" + str(c_version.value)
228
+ return "gfx" + hex(c_version.value)[2:]
235
229
  except AttributeError:
236
230
  return None
237
231
 
gpustack_runtime/envs.py CHANGED
@@ -119,25 +119,25 @@ if TYPE_CHECKING:
119
119
  """
120
120
  Correct the gpustack-runner image by rendering it with the host's detection.
121
121
  """
122
- GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY: str | None = None
122
+ GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY: str | None = None
123
123
  """
124
124
  Default container registry for deployer to pull images from.
125
125
  If not set, it should be "docker.io".
126
126
  If the image name already contains a registry, this setting will be ignored.
127
127
  """
128
- GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_NAMESPACE: str | None = None
129
- """
130
- Namespace for default runner images.
131
- If not set, it should be "gpustack".
132
- """
133
- GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME: str | None = None
128
+ GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME: str | None = None
134
129
  """
135
130
  Username for the default container registry.
136
131
  """
137
- GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD: str | None = None
132
+ GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD: str | None = None
138
133
  """
139
134
  Password for the default container registry.
140
135
  """
136
+ GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_NAMESPACE: str | None = None
137
+ """
138
+ Namespace for default runtime container images.
139
+ If not set, it should be "gpustack".
140
+ """
141
141
  GPUSTACK_RUNTIME_DEPLOY_IMAGE_PULL_POLICY: str | None = None
142
142
  """
143
143
  Image pull policy for the deployer (e.g., Always, IfNotPresent, Never).
@@ -383,36 +383,48 @@ variables: dict[str, Callable[[], Any]] = {
383
383
  "GPUSTACK_RUNTIME_DEPLOY_CORRECT_RUNNER_IMAGE": lambda: to_bool(
384
384
  getenv("GPUSTACK_RUNTIME_DEPLOY_CORRECT_RUNNER_IMAGE", "1"),
385
385
  ),
386
- "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY": lambda: trim_str(
386
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY": lambda: trim_str(
387
387
  getenvs(
388
388
  keys=[
389
- "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY",
389
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY",
390
390
  # TODO(thxCode): Backward compatibility, remove in v0.1.45 later.
391
391
  "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_REGISTRY",
392
392
  # Compatible with gpustack/gpustack.
393
393
  "GPUSTACK_SYSTEM_DEFAULT_CONTAINER_REGISTRY",
394
+ # Legacy compatibility.
395
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY",
394
396
  ],
395
397
  ),
396
398
  ),
397
- "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_NAMESPACE": lambda: trim_str(
398
- getenv("GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_NAMESPACE"),
399
- ),
400
- "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME": lambda: trim_str(
399
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME": lambda: trim_str(
401
400
  getenvs(
402
401
  keys=[
403
- "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME",
402
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME",
404
403
  # TODO(thxCode): Backward compatibility, remove in v0.1.45 later.
405
404
  "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_REGISTRY_USERNAME",
405
+ # Legacy compatibility.
406
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME",
406
407
  ],
407
408
  ),
408
409
  ),
409
- "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD": lambda: getenvs(
410
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD": lambda: getenvs(
410
411
  keys=[
411
- "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD",
412
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD",
412
413
  # TODO(thxCode): Backward compatibility, remove in v0.1.45 later.
413
414
  "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_REGISTRY_PASSWORD",
415
+ # Legacy compatibility.
416
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD",
414
417
  ],
415
418
  ),
419
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_NAMESPACE": lambda: trim_str(
420
+ getenvs(
421
+ keys=[
422
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_NAMESPACE",
423
+ # Legacy compatibility.
424
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_NAMESPACE",
425
+ ],
426
+ ),
427
+ ),
416
428
  "GPUSTACK_RUNTIME_DEPLOY_IMAGE_PULL_POLICY": lambda: choice(
417
429
  getenv(
418
430
  "GPUSTACK_RUNTIME_DEPLOY_IMAGE_PULL_POLICY",
@@ -464,7 +476,7 @@ variables: dict[str, Callable[[], Any]] = {
464
476
  "hygon.com/devices=HIP_VISIBLE_DEVICES;"
465
477
  "iluvatar.ai/devices=CUDA_VISIBLE_DEVICES;"
466
478
  "metax-tech.com/devices=CUDA_VISIBLE_DEVICES;"
467
- "mthreads.com/devices=CUDA_VISIBLE_DEVICES;"
479
+ "mthreads.com/devices=CUDA_VISIBLE_DEVICES,MUSA_VISIBLE_DEVICES;"
468
480
  "nvidia.com/devices=CUDA_VISIBLE_DEVICES;",
469
481
  ),
470
482
  list_sep=",",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runtime
3
- Version: 0.1.39.post1
3
+ Version: 0.1.39.post3
4
4
  Summary: GPUStack Runtime is library for detecting GPU resources and launching GPU workloads.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runtime
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -15,8 +15,9 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Requires-Dist: argcomplete>=3.6.3
17
17
  Requires-Dist: docker>=7.1.0
18
- Requires-Dist: gpustack-runner>=0.1.23.post2
18
+ Requires-Dist: gpustack-runner>=0.1.23.post5
19
19
  Requires-Dist: kubernetes>=33.1.0
20
+ Requires-Dist: mthreads-ml-py>=2.2.10
20
21
  Requires-Dist: nvidia-ml-py>=13.580.65
21
22
  Requires-Dist: podman==5.6.0
22
23
  Requires-Dist: pyyaml
@@ -1,49 +1,48 @@
1
1
  gpustack_runtime/__init__.py,sha256=kKcK6DMIXOdpWhgMS7xlsNrBNvEmY0L8eyQtBIC3CU4,160
2
- gpustack_runtime/__main__.py,sha256=ILEyXTfxn__jFvovxjxzRDIg7QJqQ2pQrP_2BCGQZRQ,3389
3
- gpustack_runtime/_version.py,sha256=nC-EU-83isDNnCdVSshJtz4OPykhWYA4jFlaXf9xxAY,792
2
+ gpustack_runtime/__main__.py,sha256=YfxBJy7Xu87jsl1g9EWXtEeAWkLqX9nbHuty7gGUgnw,3620
3
+ gpustack_runtime/_version.py,sha256=j_bFBefu7NnpCEW3cuer6_aWK5fUvN8thDABLTT31c8,792
4
4
  gpustack_runtime/_version.pyi,sha256=A42NoSgcqEXVy2OeNm4LXC9CbyonbooYrSUBlPm2lGY,156
5
- gpustack_runtime/envs.py,sha256=624z3eettjvXY3MW2KXFvPcWogp-gmjI5SoBnyfF9Vs,31852
5
+ gpustack_runtime/envs.py,sha256=1tNIZIzPuIMbBPMy6gUkadlUwmP_6qpMmCZQD_qwRNI,32429
6
6
  gpustack_runtime/logging.py,sha256=h_fvD5FV7GHCo00IUDLQmkPR-H6r66IX_WSwZwl0mCw,6869
7
- gpustack_runtime/cmds/__init__.py,sha256=9licqBPf2qLsGmv_cL6-SSUPVYCLavcRvryFfr_ZHUk,1010
7
+ gpustack_runtime/cmds/__init__.py,sha256=82MqustO4mRDCytBiT4SUMDIOaa_4n81sJEXQwwfqHE,1166
8
8
  gpustack_runtime/cmds/__types__.py,sha256=7C4kQM0EHPD8WpJpTo6kh9rEdkrYALcLQ-GAzMMsqV8,789
9
- gpustack_runtime/cmds/deployer.py,sha256=GlQKEXlrC35M7jFmUGylVlfndIu3Y9FhYTo5SsDMjPo,23206
9
+ gpustack_runtime/cmds/deployer.py,sha256=7tTBf2_pKtK3_jeGcwjxg3ibzUBUKMKyGU5W6Nms7vE,26222
10
10
  gpustack_runtime/cmds/detector.py,sha256=xTuPkl0Psi6T_n7RvwCHBeubX4AsjXAhc_emrMufY58,8724
11
11
  gpustack_runtime/cmds/images.py,sha256=CjmD82JMdzftRiaeEYSKTGBXudb_qla41v-UzwZQtTo,514
12
- gpustack_runtime/deployer/__init__.py,sha256=OGAr302rnH8ijmqUPxaF8MjyrDcCdw2BF0wnq_-GKXc,10346
12
+ gpustack_runtime/deployer/__init__.py,sha256=2LQSFBO4syh7Q1rp-Rd_122Vwh-4CNDM1CGxmu3GhXY,15498
13
13
  gpustack_runtime/deployer/__patches__.py,sha256=4TCUdDzKBiOYmr5s0UkrEjBU0UjT7U36R0aQbSvrFRE,17823
14
- gpustack_runtime/deployer/__types__.py,sha256=UWovjd3n1avWwNm7U2z3Ipftpa9_r7ABUKu0PE1sVy4,60531
15
- gpustack_runtime/deployer/__utils__.py,sha256=aKoHSeodwEPuUW19bXZNNPG-5ACFn-o2EosmGxtkxS0,21054
16
- gpustack_runtime/deployer/docker.py,sha256=moxCOn_IkdQCiHxZKgOvGGOT3TbemJTq74vhgyBlHZY,77361
17
- gpustack_runtime/deployer/kuberentes.py,sha256=BSlSwsp0CK0xoSdQf9U4EnbbOei8pIk1QQi3p2lzHz4,79614
18
- gpustack_runtime/deployer/podman.py,sha256=GUxMTnaQuErZK55wvhK2MbgFjkYLrXD8ThXuWcM06zI,74114
14
+ gpustack_runtime/deployer/__types__.py,sha256=va80QrwaVJx3WJSEIlQvjZYbeuwG6PL04sTXl90q3rg,71365
15
+ gpustack_runtime/deployer/__utils__.py,sha256=wBnyKhR1WLcdPQlhjKeEgJ9kPvBhxVlTAqbW2iBBLIE,21710
16
+ gpustack_runtime/deployer/docker.py,sha256=vrqglLLANiO5P4l0j828KbdZ_-wGnbol0PnB7QUi7qs,84391
17
+ gpustack_runtime/deployer/kuberentes.py,sha256=JR79UfdMCo_0GCpeOFHxDQFAnYZdu7oApNkcjhr3DzA,87333
18
+ gpustack_runtime/deployer/podman.py,sha256=nKjsNxgeAbSuc5Lwbyx4W_P-lY7dAvl6WpjExRT-ENQ,81520
19
19
  gpustack_runtime/detector/__init__.py,sha256=kLVxZ9iud2aLwqqAOanIYNb4QSHavUPhqay-FU5ndbU,5443
20
20
  gpustack_runtime/detector/__types__.py,sha256=nVh9OV8tZdHFjHPlYul3u2zMvnpa9KOL5GxlPJhEvGk,13163
21
- gpustack_runtime/detector/__utils__.py,sha256=Yj4GvGvuDuscDG5OpExYdv-1VhmEHBpRrBC9rTsB-kA,24584
22
- gpustack_runtime/detector/amd.py,sha256=_sRBRZn9pFQ9jITCetvsoHicjXA9RSVrFvo6zOwCHU8,18356
21
+ gpustack_runtime/detector/__utils__.py,sha256=3_6RsBmrFJ0TpNk0KkwTju_cWhXA-rP__ZNktjP_T_Q,25269
22
+ gpustack_runtime/detector/amd.py,sha256=JGERVoSx1MlZP9AHs_u92I_Zz4Cl4-fxMzLMBia_F74,18693
23
23
  gpustack_runtime/detector/ascend.py,sha256=VkmNM-enbjMNVXKFfv7z6S-4BkwD6YkBmcC0J1ZGEuc,18588
24
24
  gpustack_runtime/detector/cambricon.py,sha256=bqWo2mUuf3a5kV8NC1PzFtHIUhVQkSejUSWR5Z5XaGA,3899
25
- gpustack_runtime/detector/hygon.py,sha256=I4SWuq5hXLRPJQyIyCoSaU1VnRMDblzQtljKy7CBVdM,13337
26
- gpustack_runtime/detector/iluvatar.py,sha256=1fbkfE2w_MH36YgQDv21xb76sl6GvYxVd3qSYXjET0g,10372
25
+ gpustack_runtime/detector/hygon.py,sha256=n_spB46BFSQtUBYEact0bQo25uOWSn0fsZ8Uwcq9DHs,13540
26
+ gpustack_runtime/detector/iluvatar.py,sha256=IRc1teRB21b8L0ouCT61VL3eDaWcLWwoKpwFgjHILpg,10693
27
27
  gpustack_runtime/detector/metax.py,sha256=YRpZVfz_nG7QTEMqTxqpgi9x5aPYy4gfvYhiUpbz8R8,10146
28
- gpustack_runtime/detector/mthreads.py,sha256=PoGirid-rboH4Nm6sKnv424ULax1eu1rEaAsWKPT02g,11105
29
- gpustack_runtime/detector/nvidia.py,sha256=eKVyvyvwbXOIBtWlaCMltqB_ohW9yBnw8M_KYHzv81E,26445
28
+ gpustack_runtime/detector/mthreads.py,sha256=XW8dyb4i8qVKxHw1Yu08f2YRhpj0Xe8E-MSNfSeOwr4,11012
29
+ gpustack_runtime/detector/nvidia.py,sha256=HmctG6s_03FsISeIJ-mptoQ9CUnQyE0X3bh-Qfa4dnI,30306
30
30
  gpustack_runtime/detector/pyacl/__init__.py,sha256=13_zyQxo_yql99Ex7n3HTPzZVTSt8A-TN0-u5eDW8nE,15994
31
31
  gpustack_runtime/detector/pyamdgpu/__init__.py,sha256=vFIOA072L01Nxvzgixns-wNrQJK8sJMvEs90DEEE2-0,8378
32
32
  gpustack_runtime/detector/pyamdsmi/__init__.py,sha256=BXAOVnoew6MliU86ci73Rk2yWPeXRUBicFKFWh-r2s0,929
33
33
  gpustack_runtime/detector/pycuda/__init__.py,sha256=o2Oj3l0HsXNttwnUylxerbupK-45gZWbluqfEvBDQ38,16673
34
34
  gpustack_runtime/detector/pydcmi/__init__.py,sha256=KVmem7Cd6ZoQ-irB_WOsDVOiyGr5Cv69_rm1NNBpkxo,37129
35
- gpustack_runtime/detector/pyhsa/__init__.py,sha256=jQ6tOPG8kn5Xp3F99-Vu6aUc4hzSUOdoGkEcl8qxQOc,15354
35
+ gpustack_runtime/detector/pyhsa/__init__.py,sha256=v2rWLyyqNQ24Jpy-7MoBbctd797gHN_6JNrFA84AjxM,15445
36
36
  gpustack_runtime/detector/pyixml/__init__.py,sha256=oYFAfQ8ZbPg7oMFXFfLjJuCBEV7hWQpFqMMVcN99Sxs,166564
37
- gpustack_runtime/detector/pymtml/__init__.py,sha256=hfaVZysit_MUTDs0qsnOBgKXq_8DKlCb0Jv00WasU0w,22008
38
37
  gpustack_runtime/detector/pymxsml/__init__.py,sha256=spHMs9t2tiV-C9kO-cV_230CBVAZ-j2DWtLfiFU5V3E,25
39
38
  gpustack_runtime/detector/pymxsml/mxsml.py,sha256=nRuKSVxIsJj2SJX-SYnE9NGZ7hGjWO9nKczC6sYmSRg,47196
40
39
  gpustack_runtime/detector/pymxsml/mxsml_extension.py,sha256=zfWFRGa9eSup336t2lPhIvCbPpa7Prn-7xCHRljg4CI,26967
41
40
  gpustack_runtime/detector/pymxsml/mxsml_mcm.py,sha256=a4rX7hJNJKTqLodKU9rYeDaAEKef6UNVThl1w-aiFsA,15363
42
41
  gpustack_runtime/detector/pyrocmcore/__init__.py,sha256=8XxKmwLX4-uoP7cfxjGoEmk9qlrGf0804pgyb74mJKs,2108
43
- gpustack_runtime/detector/pyrocmsmi/__init__.py,sha256=OmvfJepAtEnDKBMLrktMXlRbJEZttMDQ2R4ztD8kLGg,11806
44
- gpustack_runtime/_version_appendix.py,sha256=8YQIRWVgq2vdpyqBhE6QHm4WYQetOtYCK_sPA5OF-E0,23
45
- gpustack_runtime-0.1.39.post1.dist-info/METADATA,sha256=E4bWmJgRHV2dKcb87lEDMQYapW5xoFeFwmtmMnPPIyU,2131
46
- gpustack_runtime-0.1.39.post1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
47
- gpustack_runtime-0.1.39.post1.dist-info/entry_points.txt,sha256=bBO_61GxP6dIT74uZwbSDgW5Vt2pTePUS3CgjUJkUgg,68
48
- gpustack_runtime-0.1.39.post1.dist-info/licenses/LICENSE,sha256=OiPibowBvB-NHV3TP_NOj18XNBlXcshXZFMpa3uvKVE,10362
49
- gpustack_runtime-0.1.39.post1.dist-info/RECORD,,
42
+ gpustack_runtime/detector/pyrocmsmi/__init__.py,sha256=gtyiS4vXpuc9Y9xt3cw_ObqiivbsU8_xi48TIqZA0hk,11538
43
+ gpustack_runtime/_version_appendix.py,sha256=8EQMM2co0WsEEaFTlEGHr7PFAY9g_8S8iAhM1Hgptog,23
44
+ gpustack_runtime-0.1.39.post3.dist-info/METADATA,sha256=XWfZJrvw3xPJsjfwnFtQoCNv3SR7xBNlFjppk9ErwgE,2169
45
+ gpustack_runtime-0.1.39.post3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
46
+ gpustack_runtime-0.1.39.post3.dist-info/entry_points.txt,sha256=bBO_61GxP6dIT74uZwbSDgW5Vt2pTePUS3CgjUJkUgg,68
47
+ gpustack_runtime-0.1.39.post3.dist-info/licenses/LICENSE,sha256=OiPibowBvB-NHV3TP_NOj18XNBlXcshXZFMpa3uvKVE,10362
48
+ gpustack_runtime-0.1.39.post3.dist-info/RECORD,,