gpustack-runtime 0.1.40.post1__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. gpustack_runtime/__init__.py +1 -1
  2. gpustack_runtime/__main__.py +5 -3
  3. gpustack_runtime/_version.py +2 -2
  4. gpustack_runtime/_version_appendix.py +1 -1
  5. gpustack_runtime/cmds/__init__.py +5 -3
  6. gpustack_runtime/cmds/__types__.py +1 -1
  7. gpustack_runtime/cmds/deployer.py +140 -18
  8. gpustack_runtime/cmds/detector.py +1 -1
  9. gpustack_runtime/cmds/images.py +1 -1
  10. gpustack_runtime/deployer/__init__.py +28 -2
  11. gpustack_runtime/deployer/__patches__.py +1 -1
  12. gpustack_runtime/deployer/__types__.py +2 -1
  13. gpustack_runtime/deployer/__utils__.py +2 -2
  14. gpustack_runtime/deployer/cdi/__init__.py +85 -5
  15. gpustack_runtime/deployer/cdi/__types__.py +92 -29
  16. gpustack_runtime/deployer/cdi/__utils__.py +178 -0
  17. gpustack_runtime/deployer/cdi/amd.py +146 -0
  18. gpustack_runtime/deployer/cdi/ascend.py +164 -0
  19. gpustack_runtime/deployer/cdi/hygon.py +147 -0
  20. gpustack_runtime/deployer/cdi/iluvatar.py +136 -0
  21. gpustack_runtime/deployer/cdi/metax.py +148 -0
  22. gpustack_runtime/deployer/cdi/thead.py +57 -23
  23. gpustack_runtime/deployer/docker.py +9 -8
  24. gpustack_runtime/deployer/k8s/deviceplugin/__init__.py +240 -0
  25. gpustack_runtime/deployer/k8s/deviceplugin/__types__.py +131 -0
  26. gpustack_runtime/deployer/k8s/deviceplugin/plugin.py +586 -0
  27. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py +3 -0
  28. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto +212 -0
  29. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py +86 -0
  30. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi +168 -0
  31. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py +358 -0
  32. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py +34 -0
  33. gpustack_runtime/deployer/kuberentes.py +37 -4
  34. gpustack_runtime/deployer/podman.py +9 -8
  35. gpustack_runtime/detector/__init__.py +42 -5
  36. gpustack_runtime/detector/__types__.py +8 -24
  37. gpustack_runtime/detector/__utils__.py +46 -39
  38. gpustack_runtime/detector/amd.py +55 -66
  39. gpustack_runtime/detector/ascend.py +29 -41
  40. gpustack_runtime/detector/cambricon.py +3 -3
  41. gpustack_runtime/detector/hygon.py +21 -49
  42. gpustack_runtime/detector/iluvatar.py +44 -60
  43. gpustack_runtime/detector/metax.py +54 -37
  44. gpustack_runtime/detector/mthreads.py +74 -36
  45. gpustack_runtime/detector/nvidia.py +130 -93
  46. gpustack_runtime/detector/pyacl/__init__.py +1 -1
  47. gpustack_runtime/detector/pyamdgpu/__init__.py +1 -1
  48. gpustack_runtime/detector/pyamdsmi/__init__.py +1 -1
  49. gpustack_runtime/detector/pycuda/__init__.py +1 -1
  50. gpustack_runtime/detector/pydcmi/__init__.py +1 -1
  51. gpustack_runtime/detector/pyhsa/__init__.py +1 -1
  52. gpustack_runtime/detector/pymxsml/__init__.py +1553 -1
  53. gpustack_runtime/detector/pyrocmcore/__init__.py +1 -1
  54. gpustack_runtime/detector/pyrocmsmi/__init__.py +1 -1
  55. gpustack_runtime/detector/thead.py +41 -60
  56. gpustack_runtime/envs.py +104 -12
  57. gpustack_runtime/logging.py +6 -2
  58. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/METADATA +6 -1
  59. gpustack_runtime-0.1.41.dist-info/RECORD +67 -0
  60. gpustack_runtime/detector/pymxsml/mxsml.py +0 -1580
  61. gpustack_runtime/detector/pymxsml/mxsml_extension.py +0 -816
  62. gpustack_runtime/detector/pymxsml/mxsml_mcm.py +0 -476
  63. gpustack_runtime-0.1.40.post1.dist-info/RECORD +0 -55
  64. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/WHEEL +0 -0
  65. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/entry_points.txt +0 -0
  66. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,164 @@
1
+ from __future__ import annotations as __future_annotations__
2
+
3
+ from ...detector import (
4
+ Devices,
5
+ ManufacturerEnum,
6
+ detect_devices,
7
+ filter_devices_by_manufacturer,
8
+ )
9
+ from .__types__ import (
10
+ Config,
11
+ ConfigContainerEdits,
12
+ ConfigDevice,
13
+ Generator,
14
+ manufacturer_to_cdi_kind,
15
+ manufacturer_to_runtime_env,
16
+ )
17
+ from .__utils__ import device_to_cdi_device_node, path_to_cdi_mount
18
+
19
+
20
+ class AscendGenerator(Generator):
21
+ """
22
+ CDI generator for Ascend devices.
23
+ """
24
+
25
+ def __init__(self):
26
+ super().__init__(ManufacturerEnum.ASCEND)
27
+
28
+ def generate(
29
+ self,
30
+ devices: Devices | None = None,
31
+ include_all_devices: bool = True,
32
+ ) -> Config | None:
33
+ """
34
+ Generate the CDI configuration for Ascend devices.
35
+
36
+ Args:
37
+ devices:
38
+ The detected devices.
39
+ If None, all available devices are considered.
40
+ include_all_devices:
41
+ Whether to include a device entry that represents all Ascend devices.
42
+
43
+ Returns:
44
+ The Config object, or None if not supported.
45
+
46
+ """
47
+ if devices is None:
48
+ devices = detect_devices(manufacturer=self.manufacturer)
49
+ else:
50
+ devices = filter_devices_by_manufacturer(
51
+ devices,
52
+ manufacturer=self.manufacturer,
53
+ )
54
+
55
+ if not devices:
56
+ return None
57
+
58
+ kind = manufacturer_to_cdi_kind(self.manufacturer)
59
+ if not kind:
60
+ return None
61
+
62
+ common_device_nodes = []
63
+ for p in [
64
+ "/dev/davinci_manager_docker",
65
+ "/dev/davinci_manager",
66
+ ]:
67
+ cdn = device_to_cdi_device_node(
68
+ path=p,
69
+ container_path="/dev/davinci_manager",
70
+ )
71
+ if cdn:
72
+ common_device_nodes.append(cdn)
73
+ break
74
+ for p in [
75
+ "/dev/dvpp_cmdlist",
76
+ "/dev/uburma",
77
+ "/dev/ummu",
78
+ "/dev/devmm_svm",
79
+ "/dev/hisi_hdc",
80
+ ]:
81
+ cdn = device_to_cdi_device_node(
82
+ path=p,
83
+ )
84
+ if cdn:
85
+ common_device_nodes.append(cdn)
86
+ if not common_device_nodes:
87
+ return None
88
+
89
+ common_mounts = []
90
+ for p in [
91
+ "/etc/hccl_rootinfo.json",
92
+ "/usr/local/Ascend/driver/topo",
93
+ "/usr/local/Ascend/driver/lib64",
94
+ "/usr/local/Ascend/driver/include",
95
+ "/usr/local/dcmi",
96
+ "/usr/local/bin/npu-smi",
97
+ "/var/queue_schedule",
98
+ ]:
99
+ cm = path_to_cdi_mount(
100
+ path=p,
101
+ )
102
+ if cm:
103
+ common_mounts.append(cm)
104
+
105
+ cdi_devices: list[ConfigDevice] = []
106
+
107
+ all_device_nodes = []
108
+
109
+ for dev in devices:
110
+ if not dev:
111
+ continue
112
+
113
+ container_device_nodes = []
114
+
115
+ cdn_path = f"/dev/davinci{dev.index}"
116
+ if dev.appendix.get("vgpu", False):
117
+ cdn_path = f"/dev/vdavinci{dev.index}"
118
+ cdn = device_to_cdi_device_node(
119
+ path=cdn_path,
120
+ )
121
+ if not cdn:
122
+ continue
123
+ all_device_nodes.append(cdn)
124
+ container_device_nodes.append(cdn)
125
+
126
+ # Add specific container edits for each device.
127
+ cdi_devices.append(
128
+ ConfigDevice(
129
+ name=str(dev.index),
130
+ container_edits=ConfigContainerEdits(
131
+ device_nodes=container_device_nodes,
132
+ ),
133
+ ),
134
+ )
135
+
136
+ if not cdi_devices:
137
+ return None
138
+
139
+ # Add common container edits for all devices.
140
+ if include_all_devices:
141
+ cdi_devices.append(
142
+ ConfigDevice(
143
+ name="all",
144
+ container_edits=ConfigContainerEdits(
145
+ device_nodes=all_device_nodes,
146
+ ),
147
+ ),
148
+ )
149
+
150
+ runtime_env = manufacturer_to_runtime_env(self.manufacturer)
151
+
152
+ return Config(
153
+ kind=kind,
154
+ devices=cdi_devices,
155
+ container_edits=[
156
+ ConfigContainerEdits(
157
+ env=[
158
+ f"{runtime_env}=void",
159
+ ],
160
+ device_nodes=common_device_nodes,
161
+ mounts=common_mounts,
162
+ ),
163
+ ],
164
+ )
@@ -0,0 +1,147 @@
1
+ from __future__ import annotations as __future_annotations__
2
+
3
+ from ...detector import (
4
+ Devices,
5
+ ManufacturerEnum,
6
+ detect_devices,
7
+ filter_devices_by_manufacturer,
8
+ )
9
+ from .__types__ import (
10
+ Config,
11
+ ConfigContainerEdits,
12
+ ConfigDevice,
13
+ Generator,
14
+ manufacturer_to_cdi_kind,
15
+ manufacturer_to_runtime_env,
16
+ )
17
+ from .__utils__ import device_to_cdi_device_node
18
+
19
+
20
+ class HygonGenerator(Generator):
21
+ """
22
+ CDI generator for Hygon devices.
23
+ """
24
+
25
+ def __init__(self):
26
+ super().__init__(ManufacturerEnum.HYGON)
27
+
28
+ def generate(
29
+ self,
30
+ devices: Devices | None = None,
31
+ include_all_devices: bool = True,
32
+ ) -> Config | None:
33
+ """
34
+ Generate the CDI configuration for Hygon devices.
35
+
36
+ Args:
37
+ devices:
38
+ The detected devices.
39
+ If None, all available devices are considered.
40
+ include_all_devices:
41
+ Whether to include a device entry that represents all Hygon devices.
42
+
43
+ Returns:
44
+ The Config object, or None if not supported.
45
+
46
+ """
47
+ if devices is None:
48
+ devices = detect_devices(manufacturer=self.manufacturer)
49
+ else:
50
+ devices = filter_devices_by_manufacturer(
51
+ devices,
52
+ manufacturer=self.manufacturer,
53
+ )
54
+
55
+ if not devices:
56
+ return None
57
+
58
+ kind = manufacturer_to_cdi_kind(self.manufacturer)
59
+ if not kind:
60
+ return None
61
+
62
+ common_device_nodes = []
63
+ for p in [
64
+ "/dev/kfd",
65
+ "/dev/mkfd",
66
+ ]:
67
+ cdn = device_to_cdi_device_node(
68
+ path=p,
69
+ )
70
+ if cdn:
71
+ common_device_nodes.append(cdn)
72
+ if not common_device_nodes:
73
+ return None
74
+
75
+ cdi_devices: list[ConfigDevice] = []
76
+
77
+ all_device_nodes = []
78
+
79
+ for dev in devices:
80
+ if not dev:
81
+ continue
82
+
83
+ container_device_nodes = []
84
+
85
+ card_id = dev.appendix.get("card_id")
86
+ if card_id is not None:
87
+ cdn = device_to_cdi_device_node(
88
+ path=f"/dev/dri/card{card_id}",
89
+ )
90
+ if not cdn:
91
+ continue
92
+ all_device_nodes.append(cdn)
93
+ container_device_nodes.append(cdn)
94
+ renderd_id = dev.appendix.get("renderd_id")
95
+ if renderd_id is not None:
96
+ cdn = device_to_cdi_device_node(
97
+ path=f"/dev/dri/renderD{renderd_id}",
98
+ )
99
+ if cdn:
100
+ all_device_nodes.append(cdn)
101
+ container_device_nodes.append(cdn)
102
+
103
+ # Add specific container edits for each device.
104
+ cdi_container_edits = ConfigContainerEdits(
105
+ device_nodes=container_device_nodes,
106
+ )
107
+ cdi_devices.append(
108
+ ConfigDevice(
109
+ name=str(dev.index),
110
+ container_edits=cdi_container_edits,
111
+ ),
112
+ )
113
+ cdi_devices.append(
114
+ ConfigDevice(
115
+ name=dev.uuid,
116
+ container_edits=cdi_container_edits,
117
+ ),
118
+ )
119
+
120
+ if not cdi_devices:
121
+ return None
122
+
123
+ # Add common container edits for all devices.
124
+ if include_all_devices:
125
+ cdi_devices.append(
126
+ ConfigDevice(
127
+ name="all",
128
+ container_edits=ConfigContainerEdits(
129
+ device_nodes=all_device_nodes,
130
+ ),
131
+ ),
132
+ )
133
+
134
+ runtime_env = manufacturer_to_runtime_env(self.manufacturer)
135
+
136
+ return Config(
137
+ kind=kind,
138
+ devices=cdi_devices,
139
+ container_edits=[
140
+ ConfigContainerEdits(
141
+ env=[
142
+ f"{runtime_env}=void",
143
+ ],
144
+ device_nodes=common_device_nodes,
145
+ ),
146
+ ],
147
+ )
@@ -0,0 +1,136 @@
1
+ from __future__ import annotations as __future_annotations__
2
+
3
+ from ...detector import (
4
+ Devices,
5
+ ManufacturerEnum,
6
+ detect_devices,
7
+ filter_devices_by_manufacturer,
8
+ )
9
+ from .__types__ import (
10
+ Config,
11
+ ConfigContainerEdits,
12
+ ConfigDevice,
13
+ Generator,
14
+ manufacturer_to_cdi_kind,
15
+ manufacturer_to_runtime_env,
16
+ )
17
+ from .__utils__ import device_to_cdi_device_node
18
+
19
+
20
+ class IluvatarGenerator(Generator):
21
+ """
22
+ CDI generator for Iluvatar devices.
23
+ """
24
+
25
+ def __init__(self):
26
+ super().__init__(ManufacturerEnum.ILUVATAR)
27
+
28
+ def generate(
29
+ self,
30
+ devices: Devices | None = None,
31
+ include_all_devices: bool = True,
32
+ ) -> Config | None:
33
+ """
34
+ Generate the CDI configuration for Iluvatar devices.
35
+
36
+ Args:
37
+ devices:
38
+ The detected devices.
39
+ If None, all available devices are considered.
40
+ include_all_devices:
41
+ Whether to include a device entry that represents all Iluvatar devices.
42
+
43
+ Returns:
44
+ The Config object, or None if not supported.
45
+
46
+ """
47
+ if devices is None:
48
+ devices = detect_devices(manufacturer=self.manufacturer)
49
+ else:
50
+ devices = filter_devices_by_manufacturer(
51
+ devices,
52
+ manufacturer=self.manufacturer,
53
+ )
54
+
55
+ if not devices:
56
+ return None
57
+
58
+ kind = manufacturer_to_cdi_kind(self.manufacturer)
59
+ if not kind:
60
+ return None
61
+
62
+ cdi_devices: list[ConfigDevice] = []
63
+
64
+ common_device_nodes = []
65
+ for p in [
66
+ "/dev/itrctl",
67
+ ]:
68
+ cdn = device_to_cdi_device_node(
69
+ path=p,
70
+ )
71
+ if cdn:
72
+ common_device_nodes.append(cdn)
73
+ if not common_device_nodes:
74
+ return None
75
+
76
+ all_device_nodes = []
77
+
78
+ for dev in devices:
79
+ if not dev:
80
+ continue
81
+
82
+ container_device_nodes = []
83
+
84
+ cdn = device_to_cdi_device_node(
85
+ path=f"/dev/iluvatar{dev.index}",
86
+ )
87
+ if not cdn:
88
+ continue
89
+ all_device_nodes.append(cdn)
90
+ container_device_nodes.append(cdn)
91
+
92
+ # Add specific container edits for each device.
93
+ cdi_container_edits = ConfigContainerEdits(
94
+ device_nodes=container_device_nodes,
95
+ )
96
+ cdi_devices.append(
97
+ ConfigDevice(
98
+ name=str(dev.index),
99
+ container_edits=cdi_container_edits,
100
+ ),
101
+ )
102
+ cdi_devices.append(
103
+ ConfigDevice(
104
+ name=dev.uuid,
105
+ container_edits=cdi_container_edits,
106
+ ),
107
+ )
108
+
109
+ if not cdi_devices:
110
+ return None
111
+
112
+ # Add common container edits for all devices.
113
+ if include_all_devices:
114
+ cdi_devices.append(
115
+ ConfigDevice(
116
+ name="all",
117
+ container_edits=ConfigContainerEdits(
118
+ device_nodes=all_device_nodes,
119
+ ),
120
+ ),
121
+ )
122
+
123
+ runtime_env = manufacturer_to_runtime_env(self.manufacturer)
124
+
125
+ return Config(
126
+ kind=kind,
127
+ devices=cdi_devices,
128
+ container_edits=[
129
+ ConfigContainerEdits(
130
+ env=[
131
+ f"{runtime_env}=void",
132
+ ],
133
+ device_nodes=common_device_nodes,
134
+ ),
135
+ ],
136
+ )
@@ -0,0 +1,148 @@
1
+ from __future__ import annotations as __future_annotations__
2
+
3
+ from ...detector import (
4
+ Devices,
5
+ ManufacturerEnum,
6
+ detect_devices,
7
+ filter_devices_by_manufacturer,
8
+ )
9
+ from .__types__ import (
10
+ Config,
11
+ ConfigContainerEdits,
12
+ ConfigDevice,
13
+ Generator,
14
+ manufacturer_to_cdi_kind,
15
+ manufacturer_to_runtime_env,
16
+ )
17
+ from .__utils__ import device_to_cdi_device_node
18
+
19
+
20
+ class MetaXGenerator(Generator):
21
+ """
22
+ CDI generator for MetaX devices.
23
+ """
24
+
25
+ def __init__(self):
26
+ super().__init__(ManufacturerEnum.METAX)
27
+
28
+ def generate(
29
+ self,
30
+ devices: Devices | None = None,
31
+ include_all_devices: bool = True,
32
+ ) -> Config | None:
33
+ """
34
+ Generate the CDI configuration for MetaX devices.
35
+
36
+ Args:
37
+ devices:
38
+ The detected devices.
39
+ If None, all available devices are considered.
40
+ include_all_devices:
41
+ Whether to include a device entry that represents all MetaX devices.
42
+
43
+ Returns:
44
+ The Config object, or None if not supported.
45
+
46
+ """
47
+ if devices is None:
48
+ devices = detect_devices(manufacturer=self.manufacturer)
49
+ else:
50
+ devices = filter_devices_by_manufacturer(
51
+ devices,
52
+ manufacturer=self.manufacturer,
53
+ )
54
+
55
+ if not devices:
56
+ return None
57
+
58
+ kind = manufacturer_to_cdi_kind(self.manufacturer)
59
+ if not kind:
60
+ return None
61
+
62
+ cdi_devices: list[ConfigDevice] = []
63
+
64
+ common_device_nodes = []
65
+ for p in [
66
+ "/dev/mxcd",
67
+ "/dev/mxnd",
68
+ "/dev/mxgd",
69
+ ]:
70
+ cdn = device_to_cdi_device_node(
71
+ path=p,
72
+ )
73
+ if cdn:
74
+ common_device_nodes.append(cdn)
75
+ if not common_device_nodes:
76
+ return None
77
+
78
+ all_device_nodes = []
79
+
80
+ for dev in devices:
81
+ if not dev:
82
+ continue
83
+
84
+ container_device_nodes = []
85
+
86
+ card_id = dev.appendix.get("card_id")
87
+ if card_id is not None:
88
+ cdn = device_to_cdi_device_node(
89
+ path=f"/dev/dri/card{card_id}",
90
+ )
91
+ if not cdn:
92
+ continue
93
+ all_device_nodes.append(cdn)
94
+ container_device_nodes.append(cdn)
95
+ renderd_id = dev.appendix.get("renderd_id")
96
+ if renderd_id is not None:
97
+ cdn = device_to_cdi_device_node(
98
+ path=f"/dev/dri/renderD{renderd_id}",
99
+ )
100
+ if cdn:
101
+ all_device_nodes.append(cdn)
102
+ container_device_nodes.append(cdn)
103
+
104
+ # Add specific container edits for each device.
105
+ cdi_container_edits = ConfigContainerEdits(
106
+ device_nodes=container_device_nodes,
107
+ )
108
+ cdi_devices.append(
109
+ ConfigDevice(
110
+ name=str(dev.index),
111
+ container_edits=cdi_container_edits,
112
+ ),
113
+ )
114
+ cdi_devices.append(
115
+ ConfigDevice(
116
+ name=dev.uuid,
117
+ container_edits=cdi_container_edits,
118
+ ),
119
+ )
120
+
121
+ if not cdi_devices:
122
+ return None
123
+
124
+ # Add common container edits for all devices.
125
+ if include_all_devices:
126
+ cdi_devices.append(
127
+ ConfigDevice(
128
+ name="all",
129
+ container_edits=ConfigContainerEdits(
130
+ device_nodes=all_device_nodes,
131
+ ),
132
+ ),
133
+ )
134
+
135
+ runtime_env = manufacturer_to_runtime_env(self.manufacturer)
136
+
137
+ return Config(
138
+ kind=kind,
139
+ devices=cdi_devices,
140
+ container_edits=[
141
+ ConfigContainerEdits(
142
+ env=[
143
+ f"{runtime_env}=void",
144
+ ],
145
+ device_nodes=common_device_nodes,
146
+ ),
147
+ ],
148
+ )