gpustack-runtime 0.1.40.post1__py3-none-any.whl → 0.1.41.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. gpustack_runtime/__init__.py +1 -1
  2. gpustack_runtime/__main__.py +5 -3
  3. gpustack_runtime/_version.py +2 -2
  4. gpustack_runtime/_version_appendix.py +1 -1
  5. gpustack_runtime/cmds/__init__.py +5 -3
  6. gpustack_runtime/cmds/__types__.py +1 -1
  7. gpustack_runtime/cmds/deployer.py +140 -18
  8. gpustack_runtime/cmds/detector.py +1 -1
  9. gpustack_runtime/cmds/images.py +1 -1
  10. gpustack_runtime/deployer/__init__.py +28 -2
  11. gpustack_runtime/deployer/__patches__.py +1 -1
  12. gpustack_runtime/deployer/__types__.py +2 -1
  13. gpustack_runtime/deployer/__utils__.py +2 -2
  14. gpustack_runtime/deployer/cdi/__init__.py +86 -5
  15. gpustack_runtime/deployer/cdi/__types__.py +92 -29
  16. gpustack_runtime/deployer/cdi/__utils__.py +180 -0
  17. gpustack_runtime/deployer/cdi/amd.py +146 -0
  18. gpustack_runtime/deployer/cdi/ascend.py +164 -0
  19. gpustack_runtime/deployer/cdi/hygon.py +147 -0
  20. gpustack_runtime/deployer/cdi/iluvatar.py +136 -0
  21. gpustack_runtime/deployer/cdi/metax.py +148 -0
  22. gpustack_runtime/deployer/cdi/thead.py +57 -23
  23. gpustack_runtime/deployer/docker.py +9 -8
  24. gpustack_runtime/deployer/k8s/deviceplugin/__init__.py +325 -0
  25. gpustack_runtime/deployer/k8s/deviceplugin/__types__.py +131 -0
  26. gpustack_runtime/deployer/k8s/deviceplugin/plugin.py +590 -0
  27. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py +3 -0
  28. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto +212 -0
  29. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py +86 -0
  30. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi +168 -0
  31. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py +358 -0
  32. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py +34 -0
  33. gpustack_runtime/deployer/kuberentes.py +50 -4
  34. gpustack_runtime/deployer/podman.py +9 -8
  35. gpustack_runtime/detector/__init__.py +42 -5
  36. gpustack_runtime/detector/__types__.py +8 -24
  37. gpustack_runtime/detector/__utils__.py +46 -39
  38. gpustack_runtime/detector/amd.py +55 -66
  39. gpustack_runtime/detector/ascend.py +29 -41
  40. gpustack_runtime/detector/cambricon.py +3 -3
  41. gpustack_runtime/detector/hygon.py +21 -49
  42. gpustack_runtime/detector/iluvatar.py +44 -60
  43. gpustack_runtime/detector/metax.py +54 -37
  44. gpustack_runtime/detector/mthreads.py +74 -36
  45. gpustack_runtime/detector/nvidia.py +130 -93
  46. gpustack_runtime/detector/pyacl/__init__.py +1 -1
  47. gpustack_runtime/detector/pyamdgpu/__init__.py +1 -1
  48. gpustack_runtime/detector/pyamdsmi/__init__.py +1 -1
  49. gpustack_runtime/detector/pycuda/__init__.py +1 -1
  50. gpustack_runtime/detector/pydcmi/__init__.py +1 -1
  51. gpustack_runtime/detector/pyhsa/__init__.py +1 -1
  52. gpustack_runtime/detector/pymxsml/__init__.py +1553 -1
  53. gpustack_runtime/detector/pyrocmcore/__init__.py +1 -1
  54. gpustack_runtime/detector/pyrocmsmi/__init__.py +1 -1
  55. gpustack_runtime/detector/thead.py +41 -60
  56. gpustack_runtime/envs.py +106 -12
  57. gpustack_runtime/logging.py +6 -2
  58. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/METADATA +6 -1
  59. gpustack_runtime-0.1.41.post1.dist-info/RECORD +67 -0
  60. gpustack_runtime/detector/pymxsml/mxsml.py +0 -1580
  61. gpustack_runtime/detector/pymxsml/mxsml_extension.py +0 -816
  62. gpustack_runtime/detector/pymxsml/mxsml_mcm.py +0 -476
  63. gpustack_runtime-0.1.40.post1.dist-info/RECORD +0 -55
  64. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/WHEEL +0 -0
  65. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/entry_points.txt +0 -0
  66. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/licenses/LICENSE +0 -0
@@ -29,10 +29,11 @@ class ConfigDeviceNode(dict):
29
29
  self,
30
30
  path: str,
31
31
  host_path: str | None = None,
32
- permissions: str | None = None,
33
32
  type_: str = "c",
34
33
  major: int | None = None,
35
34
  minor: int | None = None,
35
+ file_mode: int | None = None,
36
+ permissions: str | None = None,
36
37
  uid: int | None = None,
37
38
  gid: int | None = None,
38
39
  ):
@@ -44,14 +45,22 @@ class ConfigDeviceNode(dict):
44
45
  The path inside the container.
45
46
  host_path:
46
47
  The path on the host system. Optional.
47
- permissions:
48
- The permissions for the device. Optional.
48
+ None means same as path.
49
49
  type_:
50
- The type of the device. Default is "c".
50
+ The type of the device.
51
51
  major:
52
52
  The major number of the device. Optional.
53
53
  minor:
54
54
  The minor number of the device. Optional.
55
+ file_mode:
56
+ The file mode for the device. Optional.
57
+ permissions:
58
+ The permissions for the device. Optional.
59
+ CGroup permissions of the device, candidates are one or more of
60
+ 'r' (read), 'w' (write), 'm' (mknod).
61
+ 'r': allows container to read from the device.
62
+ 'w': allows container to write to the device.
63
+ 'm': allows container to create device files that do not yet exist.
55
64
  uid:
56
65
  The user ID for the device. Optional.
57
66
  gid:
@@ -67,13 +76,15 @@ class ConfigDeviceNode(dict):
67
76
  self["path"] = path
68
77
  if host_path is not None:
69
78
  self["hostPath"] = host_path
70
- if permissions is not None:
71
- self["permissions"] = permissions
72
79
  if type_ is not None:
73
80
  self["type"] = type_
74
81
  if major is not None and minor is not None:
75
82
  self["major"] = major
76
83
  self["minor"] = minor
84
+ if file_mode is not None:
85
+ self["fileMode"] = file_mode
86
+ if permissions is not None:
87
+ self["permissions"] = permissions
77
88
  if uid is not None:
78
89
  self["uid"] = uid
79
90
  if gid is not None:
@@ -177,9 +188,9 @@ class ConfigMount(dict):
177
188
  def __init__(
178
189
  self,
179
190
  host_path: str,
180
- container_path: str,
181
- options: list[str] | None = None,
191
+ container_path: str | None = None,
182
192
  type_: str | None = None,
193
+ options: list[str] | None = None,
183
194
  ):
184
195
  """
185
196
  Initialize a CDI mount configuration.
@@ -188,28 +199,26 @@ class ConfigMount(dict):
188
199
  host_path:
189
200
  The path on the host system.
190
201
  container_path:
191
- The path inside the container.
192
- options:
193
- The mount options. Optional.
202
+ The path inside the container. Optional.
203
+ None means same as host_path.
194
204
  type_:
195
205
  The mount type. Optional.
206
+ options:
207
+ The mount options. Optional.
196
208
 
197
209
  """
198
210
  if not host_path:
199
211
  msg = "host_path cannot be empty"
200
212
  raise ValueError(msg)
201
- if not container_path:
202
- msg = "container_path cannot be empty"
203
- raise ValueError(msg)
204
213
 
205
214
  super().__init__()
206
215
 
207
216
  self["hostPath"] = host_path
208
- self["containerPath"] = container_path
209
- if options is not None:
210
- self["options"] = options
217
+ self["containerPath"] = host_path or container_path
211
218
  if type_ is not None:
212
219
  self["type"] = type_
220
+ if options is not None:
221
+ self["options"] = options
213
222
 
214
223
  @property
215
224
  def host_path(self) -> str:
@@ -333,7 +342,7 @@ class ConfigContainerEdits(dict):
333
342
  def __init__(
334
343
  self,
335
344
  env: list[str] | None = None,
336
- device_nodes: list[ConfigDeviceNode | str] | None = None,
345
+ device_nodes: list[ConfigDeviceNode] | None = None,
337
346
  mounts: list[ConfigMount] | None = None,
338
347
  hooks: list[ConfigHook] | None = None,
339
348
  ):
@@ -360,10 +369,7 @@ class ConfigContainerEdits(dict):
360
369
  if env is not None:
361
370
  self["env"] = env
362
371
  if device_nodes is not None:
363
- self["deviceNodes"] = [
364
- n if not isinstance(n, str) else ConfigDeviceNode(n)
365
- for n in device_nodes
366
- ]
372
+ self["deviceNodes"] = device_nodes
367
373
  if mounts is not None:
368
374
  self["mounts"] = mounts
369
375
  if hooks is not None:
@@ -521,6 +527,7 @@ class Config(dict):
521
527
  self,
522
528
  kind: str,
523
529
  devices: list[ConfigDevice],
530
+ container_edits: list[ConfigContainerEdits] | None = None,
524
531
  cdi_version: str = _DEFAULT_CDI_VERSION,
525
532
  annotations: dict[str, str] | None = None,
526
533
  ):
@@ -528,10 +535,17 @@ class Config(dict):
528
535
  Initialize a CDI configuration.
529
536
 
530
537
  Args:
531
- kind: The kind of the CDI configuration.
532
- devices: The list of devices in the CDI configuration.
533
- cdi_version: The CDI version. Default is "0.5.0".
534
- annotations: Optional annotations for the CDI configuration.
538
+ kind:
539
+ The kind of the CDI configuration.
540
+ devices:
541
+ The list of devices in the CDI configuration.
542
+ container_edits:
543
+ The list of container edits in the CDI configuration.
544
+ Applies to all devices. Optional.
545
+ cdi_version:
546
+ The CDI version. Default is "0.5.0".
547
+ annotations:
548
+ Optional annotations for the CDI configuration.
535
549
 
536
550
  """
537
551
  super().__init__()
@@ -539,6 +553,8 @@ class Config(dict):
539
553
  self["cdiVersion"] = cdi_version
540
554
  self["kind"] = kind
541
555
  self["devices"] = devices
556
+ if container_edits is not None:
557
+ self["containerEdits"] = container_edits
542
558
  if annotations is not None:
543
559
  self["annotations"] = annotations
544
560
 
@@ -564,6 +580,17 @@ class Config(dict):
564
580
  """
565
581
  return self["kind"]
566
582
 
583
+ @property
584
+ def container_edits(self) -> list[ConfigContainerEdits] | None:
585
+ """
586
+ Return the list of container edits in the CDI configuration.
587
+
588
+ Returns:
589
+ The list of container edits if present, else None.
590
+
591
+ """
592
+ return self.get("containerEdits", None)
593
+
567
594
  @property
568
595
  def cdi_version(self) -> str:
569
596
  """
@@ -605,12 +632,14 @@ class Config(dict):
605
632
 
606
633
 
607
634
  @lru_cache
608
- def manufacturer_to_config_kind(manufacturer: ManufacturerEnum) -> str | None:
635
+ def manufacturer_to_cdi_kind(manufacturer: ManufacturerEnum) -> str | None:
609
636
  """
610
637
  Map a manufacturer to its corresponding CDI config kind,
611
638
  based on `GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY`
612
639
  and `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_CDI` envs.
613
640
 
641
+ It is in form of `{Vendor}/{Class}`.
642
+
614
643
  Args:
615
644
  manufacturer:
616
645
  The manufacturer enum.
@@ -628,6 +657,32 @@ def manufacturer_to_config_kind(manufacturer: ManufacturerEnum) -> str | None:
628
657
  return kind
629
658
 
630
659
 
660
+ @lru_cache
661
+ def manufacturer_to_runtime_env(manufacturer: ManufacturerEnum) -> str | None:
662
+ """
663
+ Map a manufacturer to its corresponding runtime environment variable prefix,
664
+ based on `GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY`
665
+ and `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES` envs.
666
+
667
+ Args:
668
+ manufacturer:
669
+ The manufacturer enum.
670
+
671
+ Returns:
672
+ The corresponding runtime environment variable prefix as a string.
673
+ None if not found.
674
+
675
+ """
676
+ backend = manufacturer_to_backend(manufacturer)
677
+ resource_key = envs.GPUSTACK_RUNTIME_DETECT_BACKEND_MAP_RESOURCE_KEY.get(backend)
678
+ if not resource_key:
679
+ return None
680
+ env = envs.GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES.get(
681
+ resource_key,
682
+ )
683
+ return env
684
+
685
+
631
686
  class Generator(ABC):
632
687
  """
633
688
  Base class for all CDI generators.
@@ -653,12 +708,20 @@ class Generator(ABC):
653
708
  return str(self.manufacturer)
654
709
 
655
710
  @abstractmethod
656
- def generate(self, devices: Devices | None = None) -> Config | None:
711
+ def generate(
712
+ self,
713
+ devices: Devices | None = None,
714
+ include_all_devices: bool = True,
715
+ ) -> Config | None:
657
716
  """
658
717
  Generate the CDI specification.
659
718
 
660
719
  Args:
661
- devices: The devices to generate the CDI specification for.
720
+ devices:
721
+ The devices to generate the CDI specification for.
722
+ If None, all available devices are considered.
723
+ include_all_devices:
724
+ Whether to include a device entry that represents all AMD devices.
662
725
 
663
726
  Returns:
664
727
  The Config object, or None if not supported.
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations as __future_annotations__
2
+
3
+ import os
4
+ import stat
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+ from dataclasses_json import dataclass_json
9
+
10
+ from .__types__ import ConfigDeviceNode, ConfigMount
11
+
12
+
13
+ @dataclass_json
14
+ @dataclass
15
+ class LinuxDevice:
16
+ """
17
+ Linux device information.
18
+
19
+ """
20
+
21
+ path: str
22
+ """
23
+ Path to the device file.
24
+ """
25
+ type: str
26
+ """
27
+ Device type: 'b' for block, 'c' for character, 'p' for pipe.
28
+ """
29
+ major: int
30
+ """
31
+ Major device number.
32
+ """
33
+ minor: int
34
+ """
35
+ Minor device number.
36
+ """
37
+ file_mode: int | None = None
38
+ """
39
+ File mode (permissions) of the device.
40
+ """
41
+ uid: int | None = None
42
+ """
43
+ User ID of the device owner.
44
+ """
45
+ gid: int | None = None
46
+ """
47
+ Group ID of the device owner.
48
+ """
49
+
50
+
51
+ def linux_device_from_path(path: Path | str | None) -> LinuxDevice | None:
52
+ """
53
+ Get the Linux device information for a given path.
54
+
55
+ Args:
56
+ path:
57
+ The path to the device file.
58
+
59
+ Returns:
60
+ The LinuxDevice object, or None if the path does not exist or is not a device
61
+
62
+ """
63
+ if not path:
64
+ return None
65
+ if isinstance(path, str):
66
+ path = Path(path)
67
+ if not path.exists():
68
+ return None
69
+
70
+ path_stat = path.lstat()
71
+ if not path_stat:
72
+ return None
73
+
74
+ dev_mode = stat.S_IFMT(path_stat.st_mode)
75
+ match dev_mode:
76
+ case stat.S_IFBLK:
77
+ dev_type = "b"
78
+ case stat.S_IFCHR:
79
+ dev_type = "c"
80
+ case stat.S_IFIFO:
81
+ dev_type = "p"
82
+ case _:
83
+ return None
84
+
85
+ dev_number = path_stat.st_rdev
86
+ dev_major = os.major(dev_number)
87
+ dev_minor = os.minor(dev_number)
88
+
89
+ dev_file_mode = stat.S_IMODE(path_stat.st_mode)
90
+
91
+ dev_uid = path_stat.st_uid
92
+ dev_gid = path_stat.st_gid
93
+
94
+ return LinuxDevice(
95
+ path=str(path),
96
+ type=dev_type,
97
+ major=dev_major,
98
+ minor=dev_minor,
99
+ file_mode=dev_file_mode,
100
+ uid=dev_uid,
101
+ gid=dev_gid,
102
+ )
103
+
104
+
105
+ def device_to_cdi_device_node(
106
+ path: str,
107
+ container_path: str | None = None,
108
+ permission: str = "rw",
109
+ no_user: bool = False,
110
+ ) -> ConfigDeviceNode | None:
111
+ """
112
+ Convert a device path to a ConfigDeviceNode.
113
+
114
+ Args:
115
+ path:
116
+ Path to the device on the host.
117
+ container_path:
118
+ Path to the device inside the container.
119
+ permission:
120
+ Permissions for the device.
121
+ no_user:
122
+ Whether to omit user and group information.
123
+
124
+ Returns:
125
+ The ConfigDeviceNode object.
126
+ None if the device does not exist.
127
+
128
+ """
129
+ dev = linux_device_from_path(path)
130
+ if not dev:
131
+ return None
132
+
133
+ return ConfigDeviceNode(
134
+ path=dev.path,
135
+ host_path=container_path,
136
+ type_=dev.type,
137
+ major=dev.major,
138
+ minor=dev.minor,
139
+ file_mode=dev.file_mode,
140
+ permissions=permission,
141
+ uid=None if no_user else dev.uid,
142
+ gid=None if no_user else dev.gid,
143
+ )
144
+
145
+
146
+ def path_to_cdi_mount(
147
+ path: str,
148
+ container_path: str | None = None,
149
+ options: list[str] | None = None,
150
+ ) -> ConfigMount | None:
151
+ """
152
+ Convert a file/directory path to a ConfigMount.
153
+
154
+ Args:
155
+ path:
156
+ Path to the file or directory on the host.
157
+ container_path:
158
+ Path to the file or directory inside the container.
159
+ options:
160
+ Mount options.
161
+
162
+ Returns:
163
+ The ConfigMount object.
164
+ None if the path does not exist.
165
+
166
+ """
167
+ if not Path(path).exists():
168
+ return None
169
+
170
+ if container_path is None:
171
+ container_path = path
172
+
173
+ if options is None:
174
+ options = ["ro", "nosuid", "nodev", "rbind", "rprivate"]
175
+
176
+ return ConfigMount(
177
+ host_path=path,
178
+ container_path=container_path,
179
+ options=options,
180
+ )
@@ -0,0 +1,146 @@
1
+ from __future__ import annotations as __future_annotations__
2
+
3
+ from ...detector import (
4
+ Devices,
5
+ ManufacturerEnum,
6
+ detect_devices,
7
+ filter_devices_by_manufacturer,
8
+ )
9
+ from .__types__ import (
10
+ Config,
11
+ ConfigContainerEdits,
12
+ ConfigDevice,
13
+ Generator,
14
+ manufacturer_to_cdi_kind,
15
+ manufacturer_to_runtime_env,
16
+ )
17
+ from .__utils__ import device_to_cdi_device_node
18
+
19
+
20
+ class AMDGenerator(Generator):
21
+ """
22
+ CDI generator for AMD devices.
23
+ """
24
+
25
+ def __init__(self):
26
+ super().__init__(ManufacturerEnum.AMD)
27
+
28
+ def generate(
29
+ self,
30
+ devices: Devices | None = None,
31
+ include_all_devices: bool = True,
32
+ ) -> Config | None:
33
+ """
34
+ Generate the CDI configuration for AMD devices.
35
+
36
+ Args:
37
+ devices:
38
+ The detected devices.
39
+ If None, all available devices are considered.
40
+ include_all_devices:
41
+ Whether to include a device entry that represents all AMD devices.
42
+
43
+ Returns:
44
+ The Config object, or None if not supported.
45
+
46
+ """
47
+ if devices is None:
48
+ devices = detect_devices(manufacturer=self.manufacturer)
49
+ else:
50
+ devices = filter_devices_by_manufacturer(
51
+ devices,
52
+ manufacturer=self.manufacturer,
53
+ )
54
+
55
+ if not devices:
56
+ return None
57
+
58
+ kind = manufacturer_to_cdi_kind(self.manufacturer)
59
+ if not kind:
60
+ return None
61
+
62
+ common_device_nodes = []
63
+ for p in [
64
+ "/dev/kfd",
65
+ ]:
66
+ cdn = device_to_cdi_device_node(
67
+ path=p,
68
+ )
69
+ if cdn:
70
+ common_device_nodes.append(cdn)
71
+ if not common_device_nodes:
72
+ return None
73
+
74
+ cdi_devices: list[ConfigDevice] = []
75
+
76
+ all_device_nodes = []
77
+
78
+ for dev in devices:
79
+ if not dev:
80
+ continue
81
+
82
+ container_device_nodes = []
83
+
84
+ card_id = dev.appendix.get("card_id")
85
+ if card_id is not None:
86
+ cdn = device_to_cdi_device_node(
87
+ path=f"/dev/dri/card{card_id}",
88
+ )
89
+ if not cdn:
90
+ continue
91
+ all_device_nodes.append(cdn)
92
+ container_device_nodes.append(cdn)
93
+ renderd_id = dev.appendix.get("renderd_id")
94
+ if renderd_id is not None:
95
+ cdn = device_to_cdi_device_node(
96
+ path=f"/dev/dri/renderD{renderd_id}",
97
+ )
98
+ if cdn:
99
+ all_device_nodes.append(cdn)
100
+ container_device_nodes.append(cdn)
101
+
102
+ # Add specific container edits for each device.
103
+ cdi_container_edits = ConfigContainerEdits(
104
+ device_nodes=container_device_nodes,
105
+ )
106
+ cdi_devices.append(
107
+ ConfigDevice(
108
+ name=str(dev.index),
109
+ container_edits=cdi_container_edits,
110
+ ),
111
+ )
112
+ cdi_devices.append(
113
+ ConfigDevice(
114
+ name=dev.uuid,
115
+ container_edits=cdi_container_edits,
116
+ ),
117
+ )
118
+
119
+ if not cdi_devices:
120
+ return None
121
+
122
+ # Add common container edits for all devices.
123
+ if include_all_devices:
124
+ cdi_devices.append(
125
+ ConfigDevice(
126
+ name="all",
127
+ container_edits=ConfigContainerEdits(
128
+ device_nodes=all_device_nodes,
129
+ ),
130
+ ),
131
+ )
132
+
133
+ runtime_env = manufacturer_to_runtime_env(self.manufacturer)
134
+
135
+ return Config(
136
+ kind=kind,
137
+ devices=cdi_devices,
138
+ container_edits=[
139
+ ConfigContainerEdits(
140
+ env=[
141
+ f"{runtime_env}=void",
142
+ ],
143
+ device_nodes=common_device_nodes,
144
+ ),
145
+ ],
146
+ )