gpustack-runner 0.1.22.post6__tar.gz → 0.1.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/.gitignore +3 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/PKG-INFO +39 -36
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/README.md +38 -35
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/gpustack_runner/_version.py +2 -2
- gpustack_runner-0.1.23/gpustack_runner/_version_appendix.py +1 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/gpustack_runner/cmds/images.py +9 -19
- gpustack_runner-0.1.23/gpustack_runner/envs.py +112 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/gpustack_runner/runner.py +14 -6
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/gpustack_runner/runner.py.json +79 -35
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/cann/Dockerfile +2 -1
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tests/gpustack_runner/fixtures/test_docker_image.json +5 -5
- gpustack_runner-0.1.23/tests/gpustack_runner/fixtures/test_list_backend_runners.json +51 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +79 -35
- gpustack_runner-0.1.23/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +68 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tests/gpustack_runner/fixtures/test_list_service_runners.json +69 -54
- gpustack_runner-0.1.22.post6/gpustack_runner/_version_appendix.py +0 -1
- gpustack_runner-0.1.22.post6/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -145
- gpustack_runner-0.1.22.post6/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -68
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/.codespelldict +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/.codespellrc +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/.gitattributes +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/.pre-commit-config.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/.python-version +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/LICENSE +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/Makefile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/docs/index.md +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/docs/modules/gpustack_runner.md +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/gpustack_runner/__init__.py +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/gpustack_runner/__main__.py +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/gpustack_runner/_version.pyi +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/gpustack_runner/cmds/__init__.py +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/gpustack_runner/cmds/__types__.py +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/hatch.toml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/mkdocs.yml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/.post_operation/README.md +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/cann/mindie-atb-models_2.2.rc1_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/cann/mindie-atb-models_2.2.rc1_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/cann/patches/mindie.zip +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/corex/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/discard_runner.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/dtk/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/expand_matrix.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/maca/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/matrix.yaml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/merge_runner.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/prune_runner.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pyproject.toml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/pytest.ini +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/ruff.toml +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tests/gpustack_runner/fixtures/__init__.py +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tests/gpustack_runner/test_runner.py +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/activate +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/chat.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/chat_tool_current_date_time.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/chat_tool_get_temperature.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/chat_tool_get_weather.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/chat_tool_square_of_number.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/chat_tool_square_root_of_number.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/chat_tool_where_am_i.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/run_runner.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/tools/run_runner_cluster.sh +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/uv.lock +0 -0
- {gpustack_runner-0.1.22.post6 → gpustack_runner-0.1.23}/uv.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gpustack-runner
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.23
|
|
4
4
|
Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
|
|
5
5
|
Project-URL: Homepage, https://github.com/gpustack/runner
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
|
|
@@ -46,26 +46,24 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
46
46
|
|
|
47
47
|
### Ascend CANN
|
|
48
48
|
|
|
49
|
+
> [!CAUTION]
|
|
50
|
+
> Since v0.1.23:
|
|
51
|
+
> - Deprecated MindIE `2.1.rc1`.
|
|
52
|
+
|
|
49
53
|
> [!WARNING]
|
|
50
54
|
> - The Atlas 300I series is currently experimental in vLLM, only supporting eager mode and float16 data type. And there
|
|
51
55
|
are some known issues for running vLLM, you can refer to
|
|
52
56
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
53
57
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
54
58
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
|
62
|
-
|
|
63
|
-
| 8.3 (A3/910C) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
|
|
64
|
-
| 8.3 (910B) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
|
|
65
|
-
| 8.3 (310P) | **`2.2.rc1`** | | |
|
|
66
|
-
| 8.2 (A3/910C) | **`2.1.rc2`** | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1` | `0.5.2`, `0.5.1.post3` |
|
|
67
|
-
| 8.2 (910B) | **`2.1.rc2`**, `2.1.rc1` | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1`, `0.10.0`, <br/>`0.9.2`, `0.9.1` | `0.5.2`, `0.5.1.post3` |
|
|
68
|
-
| 8.2 (310P) | **`2.1.rc2`**, `2.1.rc1` | `0.10.0`, `0.9.2` | |
|
|
59
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
60
|
+
|------------------------------|--------------------------|------------------------------------------------------------|------------------------|
|
|
61
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
|
|
62
|
+
| 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
|
|
63
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
64
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
|
|
65
|
+
| 8.2 (910B) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
|
|
66
|
+
| 8.2 (310P) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.0`, `0.9.2` | |
|
|
69
67
|
|
|
70
68
|
### Iluvatar CoreX
|
|
71
69
|
|
|
@@ -75,6 +73,13 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
75
73
|
|
|
76
74
|
### NVIDIA CUDA
|
|
77
75
|
|
|
76
|
+
> [!CAUTION]
|
|
77
|
+
> Since v0.1.23:
|
|
78
|
+
> - Deprecated all services for CUDA 12.4.
|
|
79
|
+
> - Deprecated vLLM `0.11.0`, `0.10.1.1`, `0.10.0`.
|
|
80
|
+
> - Deprecated SGLang `0.5.5`.
|
|
81
|
+
> - Deprecated VoxBox `0.0.20`.
|
|
82
|
+
|
|
78
83
|
> [!NOTE]
|
|
79
84
|
> - CUDA 12.9 supports Compute Capabilities:
|
|
80
85
|
`7.5 8.0+PTX 8.9 9.0 10.0 10.3 12.0 12.1+PTX`.
|
|
@@ -83,16 +88,12 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
83
88
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
84
89
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
85
90
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
|
91
|
-
|
|
92
|
-
| 12.9 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`** | `0.5.6.post2` | |
|
|
93
|
-
| 12.8 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.21`, `0.0.20` |
|
|
94
|
-
| 12.6 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2` | `0.0.21`, `0.0.20` |
|
|
95
|
-
| 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
|
|
91
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
92
|
+
|------------------------------|---------------------------------------------------------------------------------------------------|---------------------------------------------------------------|------------------------|
|
|
93
|
+
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.6.post2` | |
|
|
94
|
+
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.5`~~, `0.5.4.post3` | `0.0.21`, ~~`0.0.20`~~ |
|
|
95
|
+
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2` | `0.0.21`, ~~`0.0.20`~~ |
|
|
96
|
+
| 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ | | ~~`0.0.20`~~ |
|
|
96
97
|
|
|
97
98
|
### Hygon DTK
|
|
98
99
|
|
|
@@ -109,6 +110,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
109
110
|
|
|
110
111
|
### AMD ROCm
|
|
111
112
|
|
|
113
|
+
> [!CAUTION]
|
|
114
|
+
> Since v0.1.23:
|
|
115
|
+
> Deprecated all services for ROCm 6.3.
|
|
116
|
+
> Deprecated vLLM `0.11.0`.
|
|
117
|
+
|
|
112
118
|
> [!NOTE]
|
|
113
119
|
> - ROCm 7.0 supports LLVM targets:
|
|
114
120
|
`gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1200 gfx1201 gfx1150 gfx1151`.
|
|
@@ -118,18 +124,15 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
118
124
|
> [!WARNING]
|
|
119
125
|
> - ROCm 7.0 vLLM `0.11.2/0.11.0` are reusing the official ROCm 6.4 PyTorch 2.9 wheel package rather than a ROCm
|
|
120
126
|
7.0 specific PyTorch build. Although supports ROCm 7.0 in vLLM `0.11.2/0.11.0`, `gfx1150/gfx1151` are not supported yet.
|
|
121
|
-
> - SGLang supports `gfx942` only.
|
|
122
127
|
> - ROCm 6.4 vLLM `0.13.0` supports `gfx903 gfx90a gfx942` only.
|
|
123
|
-
|
|
124
|
-
>
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
|
129
|
-
|
|
130
|
-
|
|
|
131
|
-
| 6.4 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
|
|
132
|
-
| 6.3 | `0.10.1.1`, `0.10.0` | |
|
|
128
|
+
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
129
|
+
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
130
|
+
|
|
131
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
132
|
+
|------------------------------|-------------------------------------------------|------------------------------|
|
|
133
|
+
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~ | `0.5.6.post2` |
|
|
134
|
+
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.6.post2`, `0.5.5.post3` |
|
|
135
|
+
| 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ | |
|
|
133
136
|
|
|
134
137
|
## Directory Structure
|
|
135
138
|
|
|
@@ -26,26 +26,24 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
26
26
|
|
|
27
27
|
### Ascend CANN
|
|
28
28
|
|
|
29
|
+
> [!CAUTION]
|
|
30
|
+
> Since v0.1.23:
|
|
31
|
+
> - Deprecated MindIE `2.1.rc1`.
|
|
32
|
+
|
|
29
33
|
> [!WARNING]
|
|
30
34
|
> - The Atlas 300I series is currently experimental in vLLM, only supporting eager mode and float16 data type. And there
|
|
31
35
|
are some known issues for running vLLM, you can refer to
|
|
32
36
|
vllm-ascend [#3316](https://github.com/vllm-project/vllm-ascend/issues/3316)
|
|
33
37
|
and [#2795](https://github.com/vllm-project/vllm-ascend/issues/2795).
|
|
34
38
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
|
42
|
-
|
|
43
|
-
| 8.3 (A3/910C) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
|
|
44
|
-
| 8.3 (910B) | **`2.2.rc1`** | `0.12.0`, **`0.11.0`** | `0.5.6.post2` |
|
|
45
|
-
| 8.3 (310P) | **`2.2.rc1`** | | |
|
|
46
|
-
| 8.2 (A3/910C) | **`2.1.rc2`** | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1` | `0.5.2`, `0.5.1.post3` |
|
|
47
|
-
| 8.2 (910B) | **`2.1.rc2`**, `2.1.rc1` | ~~`0.11.0`~~, `0.10.2`, <br/>`0.10.1.1`, `0.10.0`, <br/>`0.9.2`, `0.9.1` | `0.5.2`, `0.5.1.post3` |
|
|
48
|
-
| 8.2 (310P) | **`2.1.rc2`**, `2.1.rc1` | `0.10.0`, `0.9.2` | |
|
|
39
|
+
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
40
|
+
|------------------------------|--------------------------|------------------------------------------------------------|------------------------|
|
|
41
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
|
|
42
|
+
| 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2` |
|
|
43
|
+
| 8.3 (310P) | `2.2.rc1` | | |
|
|
44
|
+
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
|
|
45
|
+
| 8.2 (910B) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
|
|
46
|
+
| 8.2 (310P) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.0`, `0.9.2` | |
|
|
49
47
|
|
|
50
48
|
### Iluvatar CoreX
|
|
51
49
|
|
|
@@ -55,6 +53,13 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
55
53
|
|
|
56
54
|
### NVIDIA CUDA
|
|
57
55
|
|
|
56
|
+
> [!CAUTION]
|
|
57
|
+
> Since v0.1.23:
|
|
58
|
+
> - Deprecated all services for CUDA 12.4.
|
|
59
|
+
> - Deprecated vLLM `0.11.0`, `0.10.1.1`, `0.10.0`.
|
|
60
|
+
> - Deprecated SGLang `0.5.5`.
|
|
61
|
+
> - Deprecated VoxBox `0.0.20`.
|
|
62
|
+
|
|
58
63
|
> [!NOTE]
|
|
59
64
|
> - CUDA 12.9 supports Compute Capabilities:
|
|
60
65
|
`7.5 8.0+PTX 8.9 9.0 10.0 10.3 12.0 12.1+PTX`.
|
|
@@ -63,16 +68,12 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
63
68
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
64
69
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
65
70
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
|
71
|
-
|
|
72
|
-
| 12.9 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`** | `0.5.6.post2` | |
|
|
73
|
-
| 12.8 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2`, `0.5.5.post3`, <br/>`0.5.5`, `0.5.4.post3` | `0.0.21`, `0.0.20` |
|
|
74
|
-
| 12.6 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.11.0`, <br/>`0.10.2`, `0.10.1.1`, <br/>`0.10.0` | `0.5.6.post2` | `0.0.21`, `0.0.20` |
|
|
75
|
-
| 12.4 | `0.11.0`, `0.10.2`, <br/>`0.10.1.1`, `0.10.0` | | `0.0.20` |
|
|
71
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
72
|
+
|------------------------------|---------------------------------------------------------------------------------------------------|---------------------------------------------------------------|------------------------|
|
|
73
|
+
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.6.post2` | |
|
|
74
|
+
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.5`~~, `0.5.4.post3` | `0.0.21`, ~~`0.0.20`~~ |
|
|
75
|
+
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2` | `0.0.21`, ~~`0.0.20`~~ |
|
|
76
|
+
| 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ | | ~~`0.0.20`~~ |
|
|
76
77
|
|
|
77
78
|
### Hygon DTK
|
|
78
79
|
|
|
@@ -89,6 +90,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
89
90
|
|
|
90
91
|
### AMD ROCm
|
|
91
92
|
|
|
93
|
+
> [!CAUTION]
|
|
94
|
+
> Since v0.1.23:
|
|
95
|
+
> Deprecated all services for ROCm 6.3.
|
|
96
|
+
> Deprecated vLLM `0.11.0`.
|
|
97
|
+
|
|
92
98
|
> [!NOTE]
|
|
93
99
|
> - ROCm 7.0 supports LLVM targets:
|
|
94
100
|
`gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1200 gfx1201 gfx1150 gfx1151`.
|
|
@@ -98,18 +104,15 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
98
104
|
> [!WARNING]
|
|
99
105
|
> - ROCm 7.0 vLLM `0.11.2/0.11.0` are reusing the official ROCm 6.4 PyTorch 2.9 wheel package rather than a ROCm
|
|
100
106
|
7.0 specific PyTorch build. Although supports ROCm 7.0 in vLLM `0.11.2/0.11.0`, `gfx1150/gfx1151` are not supported yet.
|
|
101
|
-
> - SGLang supports `gfx942` only.
|
|
102
107
|
> - ROCm 6.4 vLLM `0.13.0` supports `gfx903 gfx90a gfx942` only.
|
|
103
|
-
|
|
104
|
-
>
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
|
109
|
-
|
|
110
|
-
|
|
|
111
|
-
| 6.4 | `0.13.0`, `0.12.0`, <br/>**`0.11.2`**, `0.10.2` | `0.5.6.post2`, **`0.5.5.post3`** |
|
|
112
|
-
| 6.3 | `0.10.1.1`, `0.10.0` | |
|
|
108
|
+
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
109
|
+
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
110
|
+
|
|
111
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
112
|
+
|------------------------------|-------------------------------------------------|------------------------------|
|
|
113
|
+
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~ | `0.5.6.post2` |
|
|
114
|
+
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.6.post2`, `0.5.5.post3` |
|
|
115
|
+
| 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ | |
|
|
113
116
|
|
|
114
117
|
## Directory Structure
|
|
115
118
|
|
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
30
|
+
__version__ = version = '0.1.23'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 23)
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
git_commit = "60fcf6e"
|
|
@@ -46,6 +46,10 @@ _AVAILABLE_PLATFORMS = [
|
|
|
46
46
|
]
|
|
47
47
|
|
|
48
48
|
|
|
49
|
+
# Disable overriding default namespace at images operations.
|
|
50
|
+
os.environ["GPUSTACK_RUNNER_DEFAULT_NAMESPACE"] = "gpustack"
|
|
51
|
+
|
|
52
|
+
|
|
49
53
|
class ListImagesSubCommand(SubCommand):
|
|
50
54
|
"""
|
|
51
55
|
Command to list images.
|
|
@@ -440,14 +444,14 @@ class SaveImagesSubCommand(SubCommand):
|
|
|
440
444
|
|
|
441
445
|
command = [
|
|
442
446
|
"skopeo",
|
|
443
|
-
"copy",
|
|
444
|
-
"--src-tls-verify=false",
|
|
445
|
-
"--retry-times",
|
|
446
|
-
str(self.max_retries),
|
|
447
447
|
"--override-os",
|
|
448
448
|
override_os,
|
|
449
449
|
"--override-arch",
|
|
450
450
|
override_arch,
|
|
451
|
+
"copy",
|
|
452
|
+
"--src-tls-verify=false",
|
|
453
|
+
"--retry-times",
|
|
454
|
+
str(self.max_retries),
|
|
451
455
|
]
|
|
452
456
|
if self.source_username and self.source_password:
|
|
453
457
|
command.extend(
|
|
@@ -767,10 +771,6 @@ class CopyImagesSubCommand(SubCommand):
|
|
|
767
771
|
print(f"❌ Error syncing image '{img_name}'")
|
|
768
772
|
failures.append((img_name, img_err))
|
|
769
773
|
|
|
770
|
-
override_os, override_arch = None, None
|
|
771
|
-
if self.platform:
|
|
772
|
-
override_os, override_arch = self.platform.split("/", maxsplit=1)
|
|
773
|
-
|
|
774
774
|
# Submit tasks
|
|
775
775
|
for img in images:
|
|
776
776
|
command = [
|
|
@@ -778,20 +778,10 @@ class CopyImagesSubCommand(SubCommand):
|
|
|
778
778
|
"copy",
|
|
779
779
|
"--src-tls-verify=false",
|
|
780
780
|
"--dest-tls-verify=false",
|
|
781
|
+
"--all",
|
|
781
782
|
"--retry-times",
|
|
782
783
|
str(self.max_retries),
|
|
783
784
|
]
|
|
784
|
-
if override_os and override_arch:
|
|
785
|
-
command.extend(
|
|
786
|
-
[
|
|
787
|
-
"--override-os",
|
|
788
|
-
override_os,
|
|
789
|
-
"--override-arch",
|
|
790
|
-
override_arch,
|
|
791
|
-
],
|
|
792
|
-
)
|
|
793
|
-
else:
|
|
794
|
-
command.append("--all")
|
|
795
785
|
if self.source_username and self.source_password:
|
|
796
786
|
command.extend(
|
|
797
787
|
[
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from os import getenv as sys_getenv
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
|
|
10
|
+
# Global
|
|
11
|
+
|
|
12
|
+
GPUSTACK_RUNNER_DEFAULT_NAMESPACE: str | None = None
|
|
13
|
+
"""
|
|
14
|
+
Namespace for default runner images.
|
|
15
|
+
If not set, it should be "gpustack".
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# --8<-- [start:env-vars-definition]
|
|
19
|
+
|
|
20
|
+
variables: dict[str, Callable[[], Any]] = {
|
|
21
|
+
# Global
|
|
22
|
+
"GPUSTACK_RUNNER_DEFAULT_NAMESPACE": lambda: trim_str(
|
|
23
|
+
getenvs(
|
|
24
|
+
keys=[
|
|
25
|
+
"GPUSTACK_RUNNER_DEFAULT_NAMESPACE",
|
|
26
|
+
"GPUSTACK_RUNTIME_DEPLOY_DEFAULT_NAMESPACE", ## Compatible with gpustack/gpustack_runtime.
|
|
27
|
+
],
|
|
28
|
+
),
|
|
29
|
+
),
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# --8<-- [end:env-vars-definition]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@lru_cache
|
|
37
|
+
def __getattr__(name: str):
|
|
38
|
+
# lazy evaluation of environment variables
|
|
39
|
+
if name in variables:
|
|
40
|
+
return variables[name]()
|
|
41
|
+
msg = f"module {__name__} has no attribute {name}"
|
|
42
|
+
raise AttributeError(msg)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def __dir__():
|
|
46
|
+
return list(variables.keys())
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def trim_str(value: str | None) -> str | None:
|
|
50
|
+
"""
|
|
51
|
+
Trim leading and trailing whitespace from a string.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
value:
|
|
55
|
+
The string to trim.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
The trimmed string, or None if the input is None.
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
if value is not None:
|
|
62
|
+
return value.strip()
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_ENV_PREFIX = "GPUSTACK_RUNNER_"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def getenv(key: str, default=None) -> any | None:
|
|
70
|
+
"""
|
|
71
|
+
Get the value of an environment variable.
|
|
72
|
+
Try headless module variable if the key starts with "GPUSTACK_RUNNER_".
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
key:
|
|
76
|
+
The environment variable key.
|
|
77
|
+
default:
|
|
78
|
+
The default value if the key is not found.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
The value of the environment variable if it exists, otherwise None.
|
|
82
|
+
|
|
83
|
+
"""
|
|
84
|
+
value = sys_getenv(key)
|
|
85
|
+
if value is not None:
|
|
86
|
+
return value
|
|
87
|
+
if key.startswith(_ENV_PREFIX):
|
|
88
|
+
headless_key = key.removeprefix(_ENV_PREFIX)
|
|
89
|
+
return sys_getenv(headless_key, default)
|
|
90
|
+
return default
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def getenvs(keys: list[str], default=None) -> any | None:
|
|
94
|
+
"""
|
|
95
|
+
Get the value of an environment variable.
|
|
96
|
+
Return the first found value among the provided keys.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
keys:
|
|
100
|
+
The environment variable key(s).
|
|
101
|
+
default:
|
|
102
|
+
The default value if none of the keys are found.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
The value of the environment variable if it exists, otherwise None.
|
|
106
|
+
|
|
107
|
+
"""
|
|
108
|
+
for key in keys:
|
|
109
|
+
value = getenv(key)
|
|
110
|
+
if value is not None:
|
|
111
|
+
return value
|
|
112
|
+
return default
|
|
@@ -10,13 +10,15 @@ from typing import Any
|
|
|
10
10
|
|
|
11
11
|
from dataclasses_json import dataclass_json
|
|
12
12
|
|
|
13
|
+
from . import envs
|
|
14
|
+
|
|
13
15
|
_RE_DOCKER_IMAGE = re.compile(
|
|
14
|
-
r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?
|
|
16
|
+
r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie|sglang))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
|
|
15
17
|
)
|
|
16
18
|
"""
|
|
17
19
|
Regex for Docker image parsing,
|
|
18
20
|
which captures the following named groups:
|
|
19
|
-
- `prefix`: The optional prefix before `
|
|
21
|
+
- `prefix`: The optional prefix before `runner`, e.g. a registry URL or namespace.
|
|
20
22
|
- `backend`: The backend name, e.g. "cann", "cuda", "rocm", etc.
|
|
21
23
|
- `backend_version`: The backend version, ignored patch version, e.g. "8.2", "12.4", "6.3", etc.
|
|
22
24
|
- `backend_variant`: The optional backend variant, e.g. "910b", etc.
|
|
@@ -33,7 +35,7 @@ def set_re_docker_image(pattern: str):
|
|
|
33
35
|
Args:
|
|
34
36
|
pattern:
|
|
35
37
|
The regex pattern to set. It should capture the following named groups:
|
|
36
|
-
- `prefix`: The optional prefix before `
|
|
38
|
+
- `prefix`: The optional prefix before `runner`, e.g. a registry URL or namespace.
|
|
37
39
|
- `backend`: The backend name, e.g. "cann", "cuda",
|
|
38
40
|
- `backend_version`: The backend version, ignored patch version, e.g. "8.2", "12.4", "6.3", etc.
|
|
39
41
|
- `backend_variant`: The optional backend variant, e.g. "910b", etc
|
|
@@ -82,7 +84,7 @@ class DockerImage:
|
|
|
82
84
|
Parse the Docker image string into a DockerImage object.
|
|
83
85
|
|
|
84
86
|
The given image string must follow the below regex format:
|
|
85
|
-
`[prefix/]
|
|
87
|
+
`[prefix/]runner:{backend}{backend_version}[-backend_variant]-{service}{service_version}[-suffix]`
|
|
86
88
|
|
|
87
89
|
Args:
|
|
88
90
|
image:
|
|
@@ -100,7 +102,7 @@ class DockerImage:
|
|
|
100
102
|
def __str__(self):
|
|
101
103
|
parts = [
|
|
102
104
|
"",
|
|
103
|
-
"
|
|
105
|
+
"runner:",
|
|
104
106
|
self.backend,
|
|
105
107
|
self.backend_version,
|
|
106
108
|
]
|
|
@@ -235,7 +237,13 @@ def list_runners(**kwargs) -> Runners | list[dict]:
|
|
|
235
237
|
data_path = Path(_data_path) if isinstance(_data_path, str) else _data_path
|
|
236
238
|
with data_path.open("r", encoding="utf-8") as f:
|
|
237
239
|
json_list = json.load(f)
|
|
238
|
-
runners = [
|
|
240
|
+
runners = []
|
|
241
|
+
for item in json_list:
|
|
242
|
+
if namespace := envs.GPUSTACK_RUNNER_DEFAULT_NAMESPACE:
|
|
243
|
+
docker_image = item["docker_image"]
|
|
244
|
+
docker_image = docker_image.replace("gpustack/", f"{namespace}/")
|
|
245
|
+
item["docker_image"] = docker_image
|
|
246
|
+
runners.append(Runner.from_dict(item))
|
|
239
247
|
|
|
240
248
|
todict = kwargs.pop("todict", False)
|
|
241
249
|
if not kwargs:
|