gpustack-runner 0.1.22.post6__py3-none-any.whl → 0.1.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
27
27
  __commit_id__: COMMIT_ID
28
28
  commit_id: COMMIT_ID
29
29
 
30
- __version__ = version = '0.1.22.post6'
31
- __version_tuple__ = version_tuple = (0, 1, 22, 'post6')
30
+ __version__ = version = '0.1.23'
31
+ __version_tuple__ = version_tuple = (0, 1, 23)
32
32
  try:
33
33
  from ._version_appendix import git_commit
34
34
  __commit_id__ = commit_id = git_commit
@@ -1 +1 @@
1
- git_commit = "89e310e"
1
+ git_commit = "60fcf6e"
@@ -46,6 +46,10 @@ _AVAILABLE_PLATFORMS = [
46
46
  ]
47
47
 
48
48
 
49
+ # Disable overriding default namespace at images operations.
50
+ os.environ["GPUSTACK_RUNNER_DEFAULT_NAMESPACE"] = "gpustack"
51
+
52
+
49
53
  class ListImagesSubCommand(SubCommand):
50
54
  """
51
55
  Command to list images.
@@ -440,14 +444,14 @@ class SaveImagesSubCommand(SubCommand):
440
444
 
441
445
  command = [
442
446
  "skopeo",
443
- "copy",
444
- "--src-tls-verify=false",
445
- "--retry-times",
446
- str(self.max_retries),
447
447
  "--override-os",
448
448
  override_os,
449
449
  "--override-arch",
450
450
  override_arch,
451
+ "copy",
452
+ "--src-tls-verify=false",
453
+ "--retry-times",
454
+ str(self.max_retries),
451
455
  ]
452
456
  if self.source_username and self.source_password:
453
457
  command.extend(
@@ -767,10 +771,6 @@ class CopyImagesSubCommand(SubCommand):
767
771
  print(f"❌ Error syncing image '{img_name}'")
768
772
  failures.append((img_name, img_err))
769
773
 
770
- override_os, override_arch = None, None
771
- if self.platform:
772
- override_os, override_arch = self.platform.split("/", maxsplit=1)
773
-
774
774
  # Submit tasks
775
775
  for img in images:
776
776
  command = [
@@ -778,20 +778,10 @@ class CopyImagesSubCommand(SubCommand):
778
778
  "copy",
779
779
  "--src-tls-verify=false",
780
780
  "--dest-tls-verify=false",
781
+ "--all",
781
782
  "--retry-times",
782
783
  str(self.max_retries),
783
784
  ]
784
- if override_os and override_arch:
785
- command.extend(
786
- [
787
- "--override-os",
788
- override_os,
789
- "--override-arch",
790
- override_arch,
791
- ],
792
- )
793
- else:
794
- command.append("--all")
795
785
  if self.source_username and self.source_password:
796
786
  command.extend(
797
787
  [
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ from functools import lru_cache
4
+ from os import getenv as sys_getenv
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ if TYPE_CHECKING:
8
+ from collections.abc import Callable
9
+
10
+ # Global
11
+
12
+ GPUSTACK_RUNNER_DEFAULT_NAMESPACE: str | None = None
13
+ """
14
+ Namespace for default runner images.
15
+ If not set, it should be "gpustack".
16
+ """
17
+
18
+ # --8<-- [start:env-vars-definition]
19
+
20
+ variables: dict[str, Callable[[], Any]] = {
21
+ # Global
22
+ "GPUSTACK_RUNNER_DEFAULT_NAMESPACE": lambda: trim_str(
23
+ getenvs(
24
+ keys=[
25
+ "GPUSTACK_RUNNER_DEFAULT_NAMESPACE",
26
+ "GPUSTACK_RUNTIME_DEPLOY_DEFAULT_NAMESPACE", ## Compatible with gpustack/gpustack_runtime.
27
+ ],
28
+ ),
29
+ ),
30
+ }
31
+
32
+
33
+ # --8<-- [end:env-vars-definition]
34
+
35
+
36
+ @lru_cache
37
+ def __getattr__(name: str):
38
+ # lazy evaluation of environment variables
39
+ if name in variables:
40
+ return variables[name]()
41
+ msg = f"module {__name__} has no attribute {name}"
42
+ raise AttributeError(msg)
43
+
44
+
45
+ def __dir__():
46
+ return list(variables.keys())
47
+
48
+
49
+ def trim_str(value: str | None) -> str | None:
50
+ """
51
+ Trim leading and trailing whitespace from a string.
52
+
53
+ Args:
54
+ value:
55
+ The string to trim.
56
+
57
+ Returns:
58
+ The trimmed string, or None if the input is None.
59
+
60
+ """
61
+ if value is not None:
62
+ return value.strip()
63
+ return None
64
+
65
+
66
+ _ENV_PREFIX = "GPUSTACK_RUNNER_"
67
+
68
+
69
+ def getenv(key: str, default=None) -> any | None:
70
+ """
71
+ Get the value of an environment variable.
72
+ Try headless module variable if the key starts with "GPUSTACK_RUNNER_".
73
+
74
+ Args:
75
+ key:
76
+ The environment variable key.
77
+ default:
78
+ The default value if the key is not found.
79
+
80
+ Returns:
81
+ The value of the environment variable if it exists, otherwise None.
82
+
83
+ """
84
+ value = sys_getenv(key)
85
+ if value is not None:
86
+ return value
87
+ if key.startswith(_ENV_PREFIX):
88
+ headless_key = key.removeprefix(_ENV_PREFIX)
89
+ return sys_getenv(headless_key, default)
90
+ return default
91
+
92
+
93
+ def getenvs(keys: list[str], default=None) -> any | None:
94
+ """
95
+ Get the value of an environment variable.
96
+ Return the first found value among the provided keys.
97
+
98
+ Args:
99
+ keys:
100
+ The environment variable key(s).
101
+ default:
102
+ The default value if none of the keys are found.
103
+
104
+ Returns:
105
+ The value of the environment variable if it exists, otherwise None.
106
+
107
+ """
108
+ for key in keys:
109
+ value = getenv(key)
110
+ if value is not None:
111
+ return value
112
+ return default
gpustack_runner/runner.py CHANGED
@@ -10,13 +10,15 @@ from typing import Any
10
10
 
11
11
  from dataclasses_json import dataclass_json
12
12
 
13
+ from . import envs
14
+
13
15
  _RE_DOCKER_IMAGE = re.compile(
14
- r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?gpustack/runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
16
+ r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
15
17
  )
16
18
  """
17
19
  Regex for Docker image parsing,
18
20
  which captures the following named groups:
19
- - `prefix`: The optional prefix before `gpustack/runner`, e.g. a registry URL or namespace.
21
+ - `prefix`: The optional prefix before `runner`, e.g. a registry URL or namespace.
20
22
  - `backend`: The backend name, e.g. "cann", "cuda", "rocm", etc.
21
23
  - `backend_version`: The backend version, ignored patch version, e.g. "8.2", "12.4", "6.3", etc.
22
24
  - `backend_variant`: The optional backend variant, e.g. "910b", etc.
@@ -33,7 +35,7 @@ def set_re_docker_image(pattern: str):
33
35
  Args:
34
36
  pattern:
35
37
  The regex pattern to set. It should capture the following named groups:
36
- - `prefix`: The optional prefix before `gpustack/runner`, e.g. a registry URL or namespace.
38
+ - `prefix`: The optional prefix before `runner`, e.g. a registry URL or namespace.
37
39
  - `backend`: The backend name, e.g. "cann", "cuda",
38
40
  - `backend_version`: The backend version, ignored patch version, e.g. "8.2", "12.4", "6.3", etc.
39
41
  - `backend_variant`: The optional backend variant, e.g. "910b", etc
@@ -82,7 +84,7 @@ class DockerImage:
82
84
  Parse the Docker image string into a DockerImage object.
83
85
 
84
86
  The given image string must follow the below regex format:
85
- `[prefix/]gpustack/runner:{backend}{backend_version}[-backend_variant]-{service}{service_version}[-suffix]`
87
+ `[prefix/]runner:{backend}{backend_version}[-backend_variant]-{service}{service_version}[-suffix]`
86
88
 
87
89
  Args:
88
90
  image:
@@ -100,7 +102,7 @@ class DockerImage:
100
102
  def __str__(self):
101
103
  parts = [
102
104
  "",
103
- "gpustack/runner:",
105
+ "runner:",
104
106
  self.backend,
105
107
  self.backend_version,
106
108
  ]
@@ -235,7 +237,13 @@ def list_runners(**kwargs) -> Runners | list[dict]:
235
237
  data_path = Path(_data_path) if isinstance(_data_path, str) else _data_path
236
238
  with data_path.open("r", encoding="utf-8") as f:
237
239
  json_list = json.load(f)
238
- runners = [Runner.from_dict(item) for item in json_list]
240
+ runners = []
241
+ for item in json_list:
242
+ if namespace := envs.GPUSTACK_RUNNER_DEFAULT_NAMESPACE:
243
+ docker_image = item["docker_image"]
244
+ docker_image = docker_image.replace("gpustack/", f"{namespace}/")
245
+ item["docker_image"] = docker_image
246
+ runners.append(Runner.from_dict(item))
239
247
 
240
248
  todict = kwargs.pop("todict", False)
241
249
  if not kwargs:
@@ -32,6 +32,28 @@
32
32
  "docker_image": "gpustack/runner:cann8.3-a3-sglang0.5.6.post2",
33
33
  "deprecated": false
34
34
  },
35
+ {
36
+ "backend": "cann",
37
+ "backend_version": "8.3",
38
+ "original_backend_version": "8.3.rc2",
39
+ "backend_variant": "a3",
40
+ "service": "vllm",
41
+ "service_version": "0.13.0",
42
+ "platform": "linux/amd64",
43
+ "docker_image": "gpustack/runner:cann8.3-a3-vllm0.13.0",
44
+ "deprecated": false
45
+ },
46
+ {
47
+ "backend": "cann",
48
+ "backend_version": "8.3",
49
+ "original_backend_version": "8.3.rc2",
50
+ "backend_variant": "a3",
51
+ "service": "vllm",
52
+ "service_version": "0.13.0",
53
+ "platform": "linux/arm64",
54
+ "docker_image": "gpustack/runner:cann8.3-a3-vllm0.13.0",
55
+ "deprecated": false
56
+ },
35
57
  {
36
58
  "backend": "cann",
37
59
  "backend_version": "8.3",
@@ -219,6 +241,28 @@
219
241
  "docker_image": "gpustack/runner:cann8.3-910b-sglang0.5.6.post2",
220
242
  "deprecated": false
221
243
  },
244
+ {
245
+ "backend": "cann",
246
+ "backend_version": "8.3",
247
+ "original_backend_version": "8.3.rc2",
248
+ "backend_variant": "910b",
249
+ "service": "vllm",
250
+ "service_version": "0.13.0",
251
+ "platform": "linux/amd64",
252
+ "docker_image": "gpustack/runner:cann8.3-910b-vllm0.13.0",
253
+ "deprecated": false
254
+ },
255
+ {
256
+ "backend": "cann",
257
+ "backend_version": "8.3",
258
+ "original_backend_version": "8.3.rc2",
259
+ "backend_variant": "910b",
260
+ "service": "vllm",
261
+ "service_version": "0.13.0",
262
+ "platform": "linux/arm64",
263
+ "docker_image": "gpustack/runner:cann8.3-910b-vllm0.13.0",
264
+ "deprecated": false
265
+ },
222
266
  {
223
267
  "backend": "cann",
224
268
  "backend_version": "8.3",
@@ -272,7 +316,7 @@
272
316
  "service_version": "2.1.rc1",
273
317
  "platform": "linux/amd64",
274
318
  "docker_image": "gpustack/runner:cann8.2-910b-mindie2.1.rc1",
275
- "deprecated": false
319
+ "deprecated": true
276
320
  },
277
321
  {
278
322
  "backend": "cann",
@@ -294,7 +338,7 @@
294
338
  "service_version": "2.1.rc1",
295
339
  "platform": "linux/arm64",
296
340
  "docker_image": "gpustack/runner:cann8.2-910b-mindie2.1.rc1",
297
- "deprecated": false
341
+ "deprecated": true
298
342
  },
299
343
  {
300
344
  "backend": "cann",
@@ -492,7 +536,7 @@
492
536
  "service_version": "2.1.rc1",
493
537
  "platform": "linux/amd64",
494
538
  "docker_image": "gpustack/runner:cann8.2-310p-mindie2.1.rc1",
495
- "deprecated": false
539
+ "deprecated": true
496
540
  },
497
541
  {
498
542
  "backend": "cann",
@@ -514,7 +558,7 @@
514
558
  "service_version": "2.1.rc1",
515
559
  "platform": "linux/arm64",
516
560
  "docker_image": "gpustack/runner:cann8.2-310p-mindie2.1.rc1",
517
- "deprecated": false
561
+ "deprecated": true
518
562
  },
519
563
  {
520
564
  "backend": "cann",
@@ -701,7 +745,7 @@
701
745
  "service_version": "0.5.5",
702
746
  "platform": "linux/amd64",
703
747
  "docker_image": "gpustack/runner:cuda12.8-sglang0.5.5",
704
- "deprecated": false
748
+ "deprecated": true
705
749
  },
706
750
  {
707
751
  "backend": "cuda",
@@ -723,7 +767,7 @@
723
767
  "service_version": "0.5.5",
724
768
  "platform": "linux/arm64",
725
769
  "docker_image": "gpustack/runner:cuda12.8-sglang0.5.5",
726
- "deprecated": false
770
+ "deprecated": true
727
771
  },
728
772
  {
729
773
  "backend": "cuda",
@@ -822,7 +866,7 @@
822
866
  "service_version": "0.11.0",
823
867
  "platform": "linux/amd64",
824
868
  "docker_image": "gpustack/runner:cuda12.8-vllm0.11.0",
825
- "deprecated": false
869
+ "deprecated": true
826
870
  },
827
871
  {
828
872
  "backend": "cuda",
@@ -833,7 +877,7 @@
833
877
  "service_version": "0.11.0",
834
878
  "platform": "linux/arm64",
835
879
  "docker_image": "gpustack/runner:cuda12.8-vllm0.11.0",
836
- "deprecated": false
880
+ "deprecated": true
837
881
  },
838
882
  {
839
883
  "backend": "cuda",
@@ -866,7 +910,7 @@
866
910
  "service_version": "0.10.1.1",
867
911
  "platform": "linux/amd64",
868
912
  "docker_image": "gpustack/runner:cuda12.8-vllm0.10.1.1",
869
- "deprecated": false
913
+ "deprecated": true
870
914
  },
871
915
  {
872
916
  "backend": "cuda",
@@ -877,7 +921,7 @@
877
921
  "service_version": "0.10.1.1",
878
922
  "platform": "linux/arm64",
879
923
  "docker_image": "gpustack/runner:cuda12.8-vllm0.10.1.1",
880
- "deprecated": false
924
+ "deprecated": true
881
925
  },
882
926
  {
883
927
  "backend": "cuda",
@@ -888,7 +932,7 @@
888
932
  "service_version": "0.10.0",
889
933
  "platform": "linux/amd64",
890
934
  "docker_image": "gpustack/runner:cuda12.8-vllm0.10.0",
891
- "deprecated": false
935
+ "deprecated": true
892
936
  },
893
937
  {
894
938
  "backend": "cuda",
@@ -899,7 +943,7 @@
899
943
  "service_version": "0.10.0",
900
944
  "platform": "linux/arm64",
901
945
  "docker_image": "gpustack/runner:cuda12.8-vllm0.10.0",
902
- "deprecated": false
946
+ "deprecated": true
903
947
  },
904
948
  {
905
949
  "backend": "cuda",
@@ -932,7 +976,7 @@
932
976
  "service_version": "0.0.20",
933
977
  "platform": "linux/amd64",
934
978
  "docker_image": "gpustack/runner:cuda12.8-voxbox0.0.20",
935
- "deprecated": false
979
+ "deprecated": true
936
980
  },
937
981
  {
938
982
  "backend": "cuda",
@@ -943,7 +987,7 @@
943
987
  "service_version": "0.0.20",
944
988
  "platform": "linux/arm64",
945
989
  "docker_image": "gpustack/runner:cuda12.8-voxbox0.0.20",
946
- "deprecated": false
990
+ "deprecated": true
947
991
  },
948
992
  {
949
993
  "backend": "cuda",
@@ -1020,7 +1064,7 @@
1020
1064
  "service_version": "0.11.0",
1021
1065
  "platform": "linux/amd64",
1022
1066
  "docker_image": "gpustack/runner:cuda12.6-vllm0.11.0",
1023
- "deprecated": false
1067
+ "deprecated": true
1024
1068
  },
1025
1069
  {
1026
1070
  "backend": "cuda",
@@ -1031,7 +1075,7 @@
1031
1075
  "service_version": "0.11.0",
1032
1076
  "platform": "linux/arm64",
1033
1077
  "docker_image": "gpustack/runner:cuda12.6-vllm0.11.0",
1034
- "deprecated": false
1078
+ "deprecated": true
1035
1079
  },
1036
1080
  {
1037
1081
  "backend": "cuda",
@@ -1064,7 +1108,7 @@
1064
1108
  "service_version": "0.10.1.1",
1065
1109
  "platform": "linux/amd64",
1066
1110
  "docker_image": "gpustack/runner:cuda12.6-vllm0.10.1.1",
1067
- "deprecated": false
1111
+ "deprecated": true
1068
1112
  },
1069
1113
  {
1070
1114
  "backend": "cuda",
@@ -1075,7 +1119,7 @@
1075
1119
  "service_version": "0.10.1.1",
1076
1120
  "platform": "linux/arm64",
1077
1121
  "docker_image": "gpustack/runner:cuda12.6-vllm0.10.1.1",
1078
- "deprecated": false
1122
+ "deprecated": true
1079
1123
  },
1080
1124
  {
1081
1125
  "backend": "cuda",
@@ -1086,7 +1130,7 @@
1086
1130
  "service_version": "0.10.0",
1087
1131
  "platform": "linux/amd64",
1088
1132
  "docker_image": "gpustack/runner:cuda12.6-vllm0.10.0",
1089
- "deprecated": false
1133
+ "deprecated": true
1090
1134
  },
1091
1135
  {
1092
1136
  "backend": "cuda",
@@ -1097,7 +1141,7 @@
1097
1141
  "service_version": "0.10.0",
1098
1142
  "platform": "linux/arm64",
1099
1143
  "docker_image": "gpustack/runner:cuda12.6-vllm0.10.0",
1100
- "deprecated": false
1144
+ "deprecated": true
1101
1145
  },
1102
1146
  {
1103
1147
  "backend": "cuda",
@@ -1130,7 +1174,7 @@
1130
1174
  "service_version": "0.0.20",
1131
1175
  "platform": "linux/amd64",
1132
1176
  "docker_image": "gpustack/runner:cuda12.6-voxbox0.0.20",
1133
- "deprecated": false
1177
+ "deprecated": true
1134
1178
  },
1135
1179
  {
1136
1180
  "backend": "cuda",
@@ -1141,7 +1185,7 @@
1141
1185
  "service_version": "0.0.20",
1142
1186
  "platform": "linux/arm64",
1143
1187
  "docker_image": "gpustack/runner:cuda12.6-voxbox0.0.20",
1144
- "deprecated": false
1188
+ "deprecated": true
1145
1189
  },
1146
1190
  {
1147
1191
  "backend": "cuda",
@@ -1152,7 +1196,7 @@
1152
1196
  "service_version": "0.11.0",
1153
1197
  "platform": "linux/amd64",
1154
1198
  "docker_image": "gpustack/runner:cuda12.4-vllm0.11.0",
1155
- "deprecated": false
1199
+ "deprecated": true
1156
1200
  },
1157
1201
  {
1158
1202
  "backend": "cuda",
@@ -1163,7 +1207,7 @@
1163
1207
  "service_version": "0.11.0",
1164
1208
  "platform": "linux/arm64",
1165
1209
  "docker_image": "gpustack/runner:cuda12.4-vllm0.11.0",
1166
- "deprecated": false
1210
+ "deprecated": true
1167
1211
  },
1168
1212
  {
1169
1213
  "backend": "cuda",
@@ -1174,7 +1218,7 @@
1174
1218
  "service_version": "0.10.2",
1175
1219
  "platform": "linux/amd64",
1176
1220
  "docker_image": "gpustack/runner:cuda12.4-vllm0.10.2",
1177
- "deprecated": false
1221
+ "deprecated": true
1178
1222
  },
1179
1223
  {
1180
1224
  "backend": "cuda",
@@ -1185,7 +1229,7 @@
1185
1229
  "service_version": "0.10.2",
1186
1230
  "platform": "linux/arm64",
1187
1231
  "docker_image": "gpustack/runner:cuda12.4-vllm0.10.2",
1188
- "deprecated": false
1232
+ "deprecated": true
1189
1233
  },
1190
1234
  {
1191
1235
  "backend": "cuda",
@@ -1196,7 +1240,7 @@
1196
1240
  "service_version": "0.10.1.1",
1197
1241
  "platform": "linux/amd64",
1198
1242
  "docker_image": "gpustack/runner:cuda12.4-vllm0.10.1.1",
1199
- "deprecated": false
1243
+ "deprecated": true
1200
1244
  },
1201
1245
  {
1202
1246
  "backend": "cuda",
@@ -1207,7 +1251,7 @@
1207
1251
  "service_version": "0.10.1.1",
1208
1252
  "platform": "linux/arm64",
1209
1253
  "docker_image": "gpustack/runner:cuda12.4-vllm0.10.1.1",
1210
- "deprecated": false
1254
+ "deprecated": true
1211
1255
  },
1212
1256
  {
1213
1257
  "backend": "cuda",
@@ -1218,7 +1262,7 @@
1218
1262
  "service_version": "0.10.0",
1219
1263
  "platform": "linux/amd64",
1220
1264
  "docker_image": "gpustack/runner:cuda12.4-vllm0.10.0",
1221
- "deprecated": false
1265
+ "deprecated": true
1222
1266
  },
1223
1267
  {
1224
1268
  "backend": "cuda",
@@ -1229,7 +1273,7 @@
1229
1273
  "service_version": "0.10.0",
1230
1274
  "platform": "linux/arm64",
1231
1275
  "docker_image": "gpustack/runner:cuda12.4-vllm0.10.0",
1232
- "deprecated": false
1276
+ "deprecated": true
1233
1277
  },
1234
1278
  {
1235
1279
  "backend": "cuda",
@@ -1240,7 +1284,7 @@
1240
1284
  "service_version": "0.0.20",
1241
1285
  "platform": "linux/amd64",
1242
1286
  "docker_image": "gpustack/runner:cuda12.4-voxbox0.0.20",
1243
- "deprecated": false
1287
+ "deprecated": true
1244
1288
  },
1245
1289
  {
1246
1290
  "backend": "cuda",
@@ -1251,7 +1295,7 @@
1251
1295
  "service_version": "0.0.20",
1252
1296
  "platform": "linux/arm64",
1253
1297
  "docker_image": "gpustack/runner:cuda12.4-voxbox0.0.20",
1254
- "deprecated": false
1298
+ "deprecated": true
1255
1299
  },
1256
1300
  {
1257
1301
  "backend": "dtk",
@@ -1350,7 +1394,7 @@
1350
1394
  "service_version": "0.11.0",
1351
1395
  "platform": "linux/amd64",
1352
1396
  "docker_image": "gpustack/runner:rocm7.0-vllm0.11.0",
1353
- "deprecated": false
1397
+ "deprecated": true
1354
1398
  },
1355
1399
  {
1356
1400
  "backend": "rocm",
@@ -1427,7 +1471,7 @@
1427
1471
  "service_version": "0.10.1.1",
1428
1472
  "platform": "linux/amd64",
1429
1473
  "docker_image": "gpustack/runner:rocm6.3-vllm0.10.1.1",
1430
- "deprecated": false
1474
+ "deprecated": true
1431
1475
  },
1432
1476
  {
1433
1477
  "backend": "rocm",
@@ -1438,6 +1482,6 @@
1438
1482
  "service_version": "0.10.0",
1439
1483
  "platform": "linux/amd64",
1440
1484
  "docker_image": "gpustack/runner:rocm6.3-vllm0.10.0",
1441
- "deprecated": false
1485
+ "deprecated": true
1442
1486
  }
1443
1487
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runner
3
- Version: 0.1.22.post6
3
+ Version: 0.1.23
4
4
  Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runner
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -46,26 +46,24 @@ The following table lists the supported accelerated backends and their correspon
46
46
 
47
47
  ### Ascend CANN
48
48
 
49
+ > [!CAUTION]
50
+ > Since v0.1.23:
51
+ > - Deprecated MindIE `2.1.rc1`.
52
+
49
53
  > [!WARNING]
50
54
  > - The Atlas 300I series is currently experimental in vLLM, only supporting eager mode and float16 data type. And there
51
55
  are some known issues for running vLLM, you can refer to
52
56
  vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
53
57
  and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
54
58
 
55
- > [!IMPORTANT]
56
- > - Applied [ATB model patched](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3646603380) to MindIE 2.2.rc1/2.1.rc2.
57
- > - Applied [ATB config patched](https://github.com/gpustack/gpustack/issues/3551) to MindIE 2.2.rc1.
58
- > - Applied [av package](https://github.com/gpustack/gpustack/issues/2016#issuecomment-3631228085) to MindIE 2.2.rc1/2.1.rc2.
59
- > - Update vLLM 0.11.0 with stable vLLM Ascend plugin.
60
-
61
- | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
62
- |------------------------------|--------------------------|--------------------------------------------------------------------------|------------------------|
63
- | 8.3 (A3/910C) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
64
- | 8.3 (910B) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
65
- | 8.3 (310P) | **`2.2.rc1`** | | |
66
- | 8.2 (A3/910C) | **`2.1.rc2`** | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1` | `0.5.2`, `0.5.1.post3` |
67
- | 8.2 (910B) | **`2.1.rc2`**, `2.1.rc1` | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1`, `0.10.0`, <br/>`0.9.2`, `0.9.1` | `0.5.2`, `0.5.1.post3` |
68
- | 8.2 (310P) | **`2.1.rc2`**, `2.1.rc1` | `0.10.0`, `0.9.2` | |
59
+ | CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
60
+ |------------------------------|--------------------------|------------------------------------------------------------|------------------------|
61
+ | 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
62
+ | 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
63
+ | 8.3 (310P) | `2.2.rc1` | | |
64
+ | 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
65
+ | 8.2 (910B) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
66
+ | 8.2 (310P) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.0`, `0.9.2` | |
69
67
 
70
68
  ### Iluvatar CoreX
71
69
 
@@ -75,6 +73,13 @@ The following table lists the supported accelerated backends and their correspon
75
73
 
76
74
  ### NVIDIA CUDA
77
75
 
76
+ > [!CAUTION]
77
+ > Since v0.1.23:
78
+ > - Deprecated all services for CUDA 12.4.
79
+ > - Deprecated vLLM `0.11.0`, `0.10.1.1`, `0.10.0`.
80
+ > - Deprecated SGLang `0.5.5`.
81
+ > - Deprecated VoxBox `0.0.20`.
82
+
78
83
  > [!NOTE]
79
84
  > - CUDA 12.9 supports Compute Capabilities:
80
85
  `7.5 8.0+PTX 8.9 9.0 10.0 10.3 12.0 12.1+PTX`.
@@ -83,16 +88,12 @@ The following table lists the supported accelerated backends and their correspon
83
88
  > - CUDA 12.6/12.4 supports Compute Capabilities:
84
89
  `7.5 8.0+PTX 8.9 9.0+PTX`.
85
90
 
86
- > [!IMPORTANT]
87
- > - Applied [Qwen2.5 VL patched](https://github.com/gpustack/gpustack/issues/3606) to vLLM 0.11.2.
88
- > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
89
-
90
- | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
91
- |------------------------------|-------------------------------------------------------------------------------------------|-----------------------------------------------------------|--------------------|
92
- | 12.9 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`** | `0.5.6.post2` | |
93
- | 12.8 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.21`, `0.0.20` |
94
- | 12.6 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2` | `0.0.21`, `0.0.20` |
95
- | 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
91
+ | CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
92
+ |------------------------------|---------------------------------------------------------------------------------------------------|---------------------------------------------------------------|------------------------|
93
+ | 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.6.post2` | |
94
+ | 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.5`~~, `0.5.4.post3` | `0.0.21`, ~~`0.0.20`~~ |
95
+ | 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2` | `0.0.21`, ~~`0.0.20`~~ |
96
+ | 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ | | ~~`0.0.20`~~ |
96
97
 
97
98
  ### Hygon DTK
98
99
 
@@ -109,6 +110,11 @@ The following table lists the supported accelerated backends and their correspon
109
110
 
110
111
  ### AMD ROCm
111
112
 
113
+ > [!CAUTION]
114
+ > Since v0.1.23:
115
+ > Deprecated all services for ROCm 6.3.
116
+ > Deprecated vLLM `0.11.0`.
117
+
112
118
  > [!NOTE]
113
119
  > - ROCm 7.0 supports LLVM targets:
114
120
  `gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1200 gfx1201 gfx1150 gfx1151`.
@@ -118,18 +124,15 @@ The following table lists the supported accelerated backends and their correspon
118
124
  > [!WARNING]
119
125
  > - ROCm 7.0 vLLM `0.11.2/0.11.0` are reusing the official ROCm 6.4 PyTorch 2.9 wheel package rather than a ROCm
120
126
  7.0 specific PyTorch build. Although supports ROCm 7.0 in vLLM `0.11.2/0.11.0`, `gfx1150/gfx1151` are not supported yet.
121
- > - SGLang supports `gfx942` only.
122
127
  > - ROCm 6.4 vLLM `0.13.0` supports `gfx903 gfx90a gfx942` only.
123
-
124
- > [!IMPORTANT]
125
- > - Applied [vLLM[audio] packages](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L720-L724) to vLLM 0.11.2.
126
- > - Applied [petit-kernel package](https://github.com/vllm-project/vllm/blob/275de34170654274616082721348b7edd9741d32/setup.py#L728) to vLLM 0.11.2 and SGLang 0.5.5.post3.
127
-
128
- | ROCm Version <br/> (Variant) | vLLM | SGLang |
129
- |------------------------------|-------------------------------------------------|----------------------------------|
130
- | 7.0 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0` | `0.5.6.post2` |
131
- | 6.4 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
132
- | 6.3 | `0.10.1.1`, `0.10.0` | |
128
+ > - ROCm 6.4 SGLang supports `gfx942` only.
129
+ > - ROCm 7.0 SGLang supports `gfx950` only.
130
+
131
+ | ROCm Version <br/> (Variant) | vLLM | SGLang |
132
+ |------------------------------|-------------------------------------------------|------------------------------|
133
+ | 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~ | `0.5.6.post2` |
134
+ | 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.6.post2`, `0.5.5.post3` |
135
+ | 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ | |
133
136
 
134
137
  ## Directory Structure
135
138
 
@@ -0,0 +1,16 @@
1
+ gpustack_runner/__init__.py,sha256=0_0jsxo1xjLtHTOIEU0_-A1qFEANzsVw-uXGjcILDwk,530
2
+ gpustack_runner/__main__.py,sha256=wtcp9lwMkaXGbQkuOY08EQhKfIHcTLSjMdnj2W3UGwk,1285
3
+ gpustack_runner/_version.py,sha256=K62qjl2rYPWyHlip9dUMtefmyJJ6D7jByOIPNGERJAo,777
4
+ gpustack_runner/_version.pyi,sha256=A42NoSgcqEXVy2OeNm4LXC9CbyonbooYrSUBlPm2lGY,156
5
+ gpustack_runner/envs.py,sha256=zxnUw42fLRZxGmcg9amVIU6SvoeXDGejcqBRggLHkVE,2630
6
+ gpustack_runner/runner.py,sha256=YOaTDFQyOfF3rza6llIM_qWsMdeXyp43oXzNM1JutyQ,26148
7
+ gpustack_runner/runner.py.json,sha256=So5CUA7iPG8TvLKRrtqD3p5c2WEKSKdifXPLmyNn3aI,41462
8
+ gpustack_runner/cmds/__init__.py,sha256=Os8FdvqNjLYiVn_jnDo7rFEtAeVLJJI1odKHEqWF-Fw,417
9
+ gpustack_runner/cmds/__types__.py,sha256=7C4kQM0EHPD8WpJpTo6kh9rEdkrYALcLQ-GAzMMsqV8,789
10
+ gpustack_runner/cmds/images.py,sha256=ZbimSOqC1DRiEOl434MDugAgZ_1b66xYbpI9AUbZDxk,38194
11
+ gpustack_runner/_version_appendix.py,sha256=qDsJqPC7KSnVxuj64wzviBnNEX3UXxL0YUJIn8v6Nts,23
12
+ gpustack_runner-0.1.23.dist-info/METADATA,sha256=vN7evuTwPXLM5VzKvky7sQ5KYtHBaPgCEIhlKUXdkDs,12858
13
+ gpustack_runner-0.1.23.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ gpustack_runner-0.1.23.dist-info/entry_points.txt,sha256=M1Dxl6cY0kIgf2I4pPsV-_kU6BAtjj93spmsXAdwW3s,66
15
+ gpustack_runner-0.1.23.dist-info/licenses/LICENSE,sha256=OiPibowBvB-NHV3TP_NOj18XNBlXcshXZFMpa3uvKVE,10362
16
+ gpustack_runner-0.1.23.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- gpustack_runner/__init__.py,sha256=0_0jsxo1xjLtHTOIEU0_-A1qFEANzsVw-uXGjcILDwk,530
2
- gpustack_runner/__main__.py,sha256=wtcp9lwMkaXGbQkuOY08EQhKfIHcTLSjMdnj2W3UGwk,1285
3
- gpustack_runner/_version.py,sha256=pIUR8n7_4Bd9D7o3RWEFrNGX_pXd3M4slwMPehMqzpo,792
4
- gpustack_runner/_version.pyi,sha256=A42NoSgcqEXVy2OeNm4LXC9CbyonbooYrSUBlPm2lGY,156
5
- gpustack_runner/runner.py,sha256=GA7vy4xxaN63cFoc7_Ecft4mV5uYJe_DkQKkR1Htz3o,25881
6
- gpustack_runner/runner.py.json,sha256=cwWroAmmvF1H9mT1tOPmWAZESKeclUd67EqOEBlCvs0,40265
7
- gpustack_runner/cmds/__init__.py,sha256=Os8FdvqNjLYiVn_jnDo7rFEtAeVLJJI1odKHEqWF-Fw,417
8
- gpustack_runner/cmds/__types__.py,sha256=7C4kQM0EHPD8WpJpTo6kh9rEdkrYALcLQ-GAzMMsqV8,789
9
- gpustack_runner/cmds/images.py,sha256=8V2TAhXCuBge6rbCQoUKm35f1BO_YuvKBb8RyUbf1DA,38609
10
- gpustack_runner/_version_appendix.py,sha256=nZgvyWkZ4NwLqYqLz19uxgnRTsKI8k72-8S0HcpSbds,23
11
- gpustack_runner-0.1.22.post6.dist-info/METADATA,sha256=OAu8viR1pw70O7w69sug_YkBcHNpJmWQ5XJyHWQ8uto,13538
12
- gpustack_runner-0.1.22.post6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
13
- gpustack_runner-0.1.22.post6.dist-info/entry_points.txt,sha256=M1Dxl6cY0kIgf2I4pPsV-_kU6BAtjj93spmsXAdwW3s,66
14
- gpustack_runner-0.1.22.post6.dist-info/licenses/LICENSE,sha256=OiPibowBvB-NHV3TP_NOj18XNBlXcshXZFMpa3uvKVE,10362
15
- gpustack_runner-0.1.22.post6.dist-info/RECORD,,