gpustack-runner 0.1.23.post1__tar.gz → 0.1.23.post2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/PKG-INFO +16 -16
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/README.md +15 -15
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/_version.py +2 -2
- gpustack_runner-0.1.23.post2/gpustack_runner/_version_appendix.py +1 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/cmds/images.py +4 -1
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/runner.py.json +88 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/cann/Dockerfile +3 -3
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/cuda/Dockerfile +2 -7
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/matrix.yaml +8 -8
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/rocm/Dockerfile +12 -2
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tests/gpustack_runner/fixtures/test_list_runners_by_backend.json +88 -0
- gpustack_runner-0.1.23.post1/gpustack_runner/_version_appendix.py +0 -1
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/.codespelldict +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/.codespellrc +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/.gitattributes +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/.gitignore +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/.pre-commit-config.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/.python-version +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/LICENSE +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/Makefile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/docs/index.md +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/docs/modules/gpustack_runner.md +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/__init__.py +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/__main__.py +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/_version.pyi +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/cmds/__init__.py +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/cmds/__types__.py +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/envs.py +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/runner.py +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/hatch.toml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/mkdocs.yml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251020_vllm_install_lmcache/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251020_vllm_install_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251020_vllm_install_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251020_vllm_install_lmcache/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251022_vllm_install_ray_client/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251022_vllm_install_ray_client/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251022_vllm_install_ray_client/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251022_vllm_install_ray_client/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251022_vllm_install_ray_default/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251022_vllm_install_ray_default/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251022_vllm_install_ray_default/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251024_vllm_install_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251024_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251024_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251029_vllm_reinstall_ray/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251029_vllm_reinstall_ray/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251103_mindie_refresh_entrypoint/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251103_mindie_refresh_entrypoint/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251105_vllm_polish_nvidia_hpcx/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251106_vllm_install_ep_kernel/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251106_vllm_install_ep_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251107_vllm_reinstall_lmcache/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251107_vllm_reinstall_lmcache/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251110_sglang_install_diffusion/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251110_sglang_install_diffusion/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251110_sglang_install_flashattn/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251110_sglang_install_flashattn/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251125_mindie_install_posix_ipc/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251125_mindie_install_posix_ipc/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/cuda/patches/vllm_001_disable_flashatten_in_qwen2_5_vl.patch +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251201_vllm_patch_qwen2_5_vl/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251209_mindie_install_av/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251209_mindie_install_av/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/cann/patches.zip +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251213_mindie_patch_minicpm_qwen2_v2/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251213_sglang_patch_server_args/cuda/patches/sglang_001_fix_server_args.patch +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251213_sglang_patch_server_args/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251214_cuda_several_patches/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251214_cuda_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251215_cann_several_patches/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251215_cann_several_patches/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251216_sglang_uninstall_runai_model_streamer/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251219_rocm_install_petit_kernel/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251219_rocm_install_petit_kernel/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251219_vllm_install_audio_extra/cuda/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251219_vllm_install_audio_extra/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251219_vllm_install_audio_extra/rocm/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251224_mindie_patch_atb_config/cann/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/20251224_mindie_patch_atb_config/matrix.yaml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/README.md +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/cann/mindie-atb-models_2.2.rc1_linux-amd64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/cann/mindie-atb-models_2.2.rc1_linux-arm64_py3.11_torch2.1.0-abi0.tar.gz +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/cann/patches/mindie.zip +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/corex/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/discard_runner.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/dtk/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/expand_matrix.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/maca/Dockerfile +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/merge_runner.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/prune_runner.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/rocm/patches/sglang_001_wrong_vram.patch +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pyproject.toml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pytest.ini +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/ruff.toml +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tests/gpustack_runner/fixtures/__init__.py +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tests/gpustack_runner/fixtures/test_docker_image.json +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tests/gpustack_runner/fixtures/test_list_backend_runners.json +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tests/gpustack_runner/fixtures/test_list_runners_by_prefix.json +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tests/gpustack_runner/fixtures/test_list_service_runners.json +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tests/gpustack_runner/test_runner.py +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/activate +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_current_date_time.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_get_temperature.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_get_weather.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_square_of_number.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_square_root_of_number.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_where_am_i.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/run_runner.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/run_runner_cluster.sh +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/uv.lock +0 -0
- {gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/uv.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gpustack-runner
|
|
3
|
-
Version: 0.1.23.
|
|
3
|
+
Version: 0.1.23.post2
|
|
4
4
|
Summary: GPUStack Runner is library for registering runnable accelerated backends and services in GPUStack.
|
|
5
5
|
Project-URL: Homepage, https://github.com/gpustack/runner
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
|
|
@@ -58,8 +58,8 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
58
58
|
|
|
59
59
|
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
60
60
|
|------------------------------|--------------------------|------------------------------------------------------------|------------------------|
|
|
61
|
-
| 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2`
|
|
62
|
-
| 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2`
|
|
61
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
62
|
+
| 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
63
63
|
| 8.3 (310P) | `2.2.rc1` | | |
|
|
64
64
|
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
|
|
65
65
|
| 8.2 (910B) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
|
|
@@ -88,12 +88,12 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
88
88
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
89
89
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
90
90
|
|
|
91
|
-
| CUDA Version <br/> (Variant) | vLLM | SGLang
|
|
92
|
-
|
|
93
|
-
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.6.post2` | |
|
|
94
|
-
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.5`~~, `0.5.4.post3` | `0.0.21`, ~~`0.0.20`~~ |
|
|
95
|
-
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ |
|
|
96
|
-
| 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ |
|
|
91
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
92
|
+
|------------------------------|---------------------------------------------------------------------------------------------------|------------------------------------------------------------------------|------------------------|
|
|
93
|
+
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` | |
|
|
94
|
+
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.7`, `0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.5`~~, `0.5.4.post3` | `0.0.21`, ~~`0.0.20`~~ |
|
|
95
|
+
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | | `0.0.21`, ~~`0.0.20`~~ |
|
|
96
|
+
| 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ | | ~~`0.0.20`~~ |
|
|
97
97
|
|
|
98
98
|
### Hygon DTK
|
|
99
99
|
|
|
@@ -112,8 +112,8 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
112
112
|
|
|
113
113
|
> [!CAUTION]
|
|
114
114
|
> Since v0.1.23:
|
|
115
|
-
> Deprecated all services for ROCm 6.3.
|
|
116
|
-
> Deprecated vLLM `0.11.0`.
|
|
115
|
+
> - Deprecated all services for ROCm 6.3.
|
|
116
|
+
> - Deprecated vLLM `0.11.0`.
|
|
117
117
|
|
|
118
118
|
> [!NOTE]
|
|
119
119
|
> - ROCm 7.0 supports LLVM targets:
|
|
@@ -128,11 +128,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
128
128
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
129
129
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
130
130
|
|
|
131
|
-
| ROCm Version <br/> (Variant) | vLLM | SGLang
|
|
132
|
-
|
|
133
|
-
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~ | `0.5.6.post2` |
|
|
134
|
-
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.6.post2`, `0.5.5.post3` |
|
|
135
|
-
| 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ |
|
|
131
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
132
|
+
|------------------------------|-------------------------------------------------|---------------------------------------|
|
|
133
|
+
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~ | `0.5.7`, `0.5.6.post2` |
|
|
134
|
+
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, `0.5.5.post3` |
|
|
135
|
+
| 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ | |
|
|
136
136
|
|
|
137
137
|
## Directory Structure
|
|
138
138
|
|
|
@@ -38,8 +38,8 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
38
38
|
|
|
39
39
|
| CANN Version <br/> (Variant) | MindIE | vLLM | SGLang |
|
|
40
40
|
|------------------------------|--------------------------|------------------------------------------------------------|------------------------|
|
|
41
|
-
| 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2`
|
|
42
|
-
| 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.6.post2`
|
|
41
|
+
| 8.3 (A3/910C) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
42
|
+
| 8.3 (910B) | `2.2.rc1` | `0.13.0`, `0.12.0`, `0.11.0` | `0.5.7`, `0.5.6.post2` |
|
|
43
43
|
| 8.3 (310P) | `2.2.rc1` | | |
|
|
44
44
|
| 8.2 (A3/910C) | `2.1.rc2` | `0.10.2`, `0.10.1.1` | `0.5.2`, `0.5.1.post3` |
|
|
45
45
|
| 8.2 (910B) | `2.1.rc2`, ~~`2.1.rc1`~~ | `0.10.2`, `0.10.1.1`, <br/>`0.10.0`, `0.9.2`, <br/>`0.9.1` | `0.5.2`, `0.5.1.post3` |
|
|
@@ -68,12 +68,12 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
68
68
|
> - CUDA 12.6/12.4 supports Compute Capabilities:
|
|
69
69
|
`7.5 8.0+PTX 8.9 9.0+PTX`.
|
|
70
70
|
|
|
71
|
-
| CUDA Version <br/> (Variant) | vLLM | SGLang
|
|
72
|
-
|
|
73
|
-
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.6.post2` | |
|
|
74
|
-
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.5`~~, `0.5.4.post3` | `0.0.21`, ~~`0.0.20`~~ |
|
|
75
|
-
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ |
|
|
76
|
-
| 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ |
|
|
71
|
+
| CUDA Version <br/> (Variant) | vLLM | SGLang | VoxBox |
|
|
72
|
+
|------------------------------|---------------------------------------------------------------------------------------------------|------------------------------------------------------------------------|------------------------|
|
|
73
|
+
| 12.9 | `0.13.0`, `0.12.0`, <br/>`0.11.2` | `0.5.7`, `0.5.6.post2` | |
|
|
74
|
+
| 12.8 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | `0.5.7`, `0.5.6.post2`, `0.5.5.post3`, <br/>~~`0.5.5`~~, `0.5.4.post3` | `0.0.21`, ~~`0.0.20`~~ |
|
|
75
|
+
| 12.6 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~, <br/>`0.10.2`, ~~`0.10.1.1`~~, <br/>~~`0.10.0`~~ | | `0.0.21`, ~~`0.0.20`~~ |
|
|
76
|
+
| 12.4 | ~~`0.11.0`~~, ~~`0.10.2`~~, <br/>~~`0.10.1.1`~~, ~~`0.10.0`~~ | | ~~`0.0.20`~~ |
|
|
77
77
|
|
|
78
78
|
### Hygon DTK
|
|
79
79
|
|
|
@@ -92,8 +92,8 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
92
92
|
|
|
93
93
|
> [!CAUTION]
|
|
94
94
|
> Since v0.1.23:
|
|
95
|
-
> Deprecated all services for ROCm 6.3.
|
|
96
|
-
> Deprecated vLLM `0.11.0`.
|
|
95
|
+
> - Deprecated all services for ROCm 6.3.
|
|
96
|
+
> - Deprecated vLLM `0.11.0`.
|
|
97
97
|
|
|
98
98
|
> [!NOTE]
|
|
99
99
|
> - ROCm 7.0 supports LLVM targets:
|
|
@@ -108,11 +108,11 @@ The following table lists the supported accelerated backends and their correspon
|
|
|
108
108
|
> - ROCm 6.4 SGLang supports `gfx942` only.
|
|
109
109
|
> - ROCm 7.0 SGLang supports `gfx950` only.
|
|
110
110
|
|
|
111
|
-
| ROCm Version <br/> (Variant) | vLLM | SGLang
|
|
112
|
-
|
|
113
|
-
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~ | `0.5.6.post2` |
|
|
114
|
-
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.6.post2`, `0.5.5.post3` |
|
|
115
|
-
| 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ |
|
|
111
|
+
| ROCm Version <br/> (Variant) | vLLM | SGLang |
|
|
112
|
+
|------------------------------|-------------------------------------------------|---------------------------------------|
|
|
113
|
+
| 7.0 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, ~~`0.11.0`~~ | `0.5.7`, `0.5.6.post2` |
|
|
114
|
+
| 6.4 | `0.13.0`, `0.12.0`, <br/>`0.11.2`, `0.10.2` | `0.5.7`, `0.5.6.post2`, `0.5.5.post3` |
|
|
115
|
+
| 6.3 | ~~`0.10.1.1`~~, ~~`0.10.0`~~ | |
|
|
116
116
|
|
|
117
117
|
## Directory Structure
|
|
118
118
|
|
|
@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
|
|
|
27
27
|
__commit_id__: COMMIT_ID
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
|
|
30
|
-
__version__ = version = '0.1.23.
|
|
31
|
-
__version_tuple__ = version_tuple = (0, 1, 23, '
|
|
30
|
+
__version__ = version = '0.1.23.post2'
|
|
31
|
+
__version_tuple__ = version_tuple = (0, 1, 23, 'post2')
|
|
32
32
|
try:
|
|
33
33
|
from ._version_appendix import git_commit
|
|
34
34
|
__commit_id__ = commit_id = git_commit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
git_commit = "6d771e2"
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/cmds/images.py
RENAMED
|
@@ -17,7 +17,7 @@ from typing import TYPE_CHECKING
|
|
|
17
17
|
import requests
|
|
18
18
|
from dataclasses_json import dataclass_json
|
|
19
19
|
|
|
20
|
-
from gpustack_runner import BackendRunners, list_backend_runners
|
|
20
|
+
from gpustack_runner import BackendRunners, envs, list_backend_runners
|
|
21
21
|
|
|
22
22
|
from .__types__ import SubCommand
|
|
23
23
|
|
|
@@ -1094,6 +1094,9 @@ def list_images(**kwargs) -> list[PlatformedImage]:
|
|
|
1094
1094
|
name = img.name
|
|
1095
1095
|
if not name:
|
|
1096
1096
|
continue
|
|
1097
|
+
if namespace := envs.GPUSTACK_RUNNER_DEFAULT_IMAGE_NAMESPACE:
|
|
1098
|
+
name = name.replace("gpustack/", f"{namespace}/")
|
|
1099
|
+
img.name = name
|
|
1097
1100
|
if name not in image_names_index:
|
|
1098
1101
|
image_names_index[name] = len(images)
|
|
1099
1102
|
images.append(img)
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/runner.py.json
RENAMED
|
@@ -21,6 +21,17 @@
|
|
|
21
21
|
"docker_image": "gpustack/runner:cann8.3-a3-mindie2.2.rc1",
|
|
22
22
|
"deprecated": false
|
|
23
23
|
},
|
|
24
|
+
{
|
|
25
|
+
"backend": "cann",
|
|
26
|
+
"backend_version": "8.3",
|
|
27
|
+
"original_backend_version": "8.3.rc2",
|
|
28
|
+
"backend_variant": "a3",
|
|
29
|
+
"service": "sglang",
|
|
30
|
+
"service_version": "0.5.7",
|
|
31
|
+
"platform": "linux/arm64",
|
|
32
|
+
"docker_image": "gpustack/runner:cann8.3-a3-sglang0.5.7",
|
|
33
|
+
"deprecated": false
|
|
34
|
+
},
|
|
24
35
|
{
|
|
25
36
|
"backend": "cann",
|
|
26
37
|
"backend_version": "8.3",
|
|
@@ -230,6 +241,17 @@
|
|
|
230
241
|
"docker_image": "gpustack/runner:cann8.3-910b-mindie2.2.rc1",
|
|
231
242
|
"deprecated": false
|
|
232
243
|
},
|
|
244
|
+
{
|
|
245
|
+
"backend": "cann",
|
|
246
|
+
"backend_version": "8.3",
|
|
247
|
+
"original_backend_version": "8.3.rc2",
|
|
248
|
+
"backend_variant": "910b",
|
|
249
|
+
"service": "sglang",
|
|
250
|
+
"service_version": "0.5.7",
|
|
251
|
+
"platform": "linux/arm64",
|
|
252
|
+
"docker_image": "gpustack/runner:cann8.3-910b-sglang0.5.7",
|
|
253
|
+
"deprecated": false
|
|
254
|
+
},
|
|
233
255
|
{
|
|
234
256
|
"backend": "cann",
|
|
235
257
|
"backend_version": "8.3",
|
|
@@ -626,6 +648,28 @@
|
|
|
626
648
|
"docker_image": "gpustack/runner:corex4.2-vllm0.8.3",
|
|
627
649
|
"deprecated": false
|
|
628
650
|
},
|
|
651
|
+
{
|
|
652
|
+
"backend": "cuda",
|
|
653
|
+
"backend_version": "12.9",
|
|
654
|
+
"original_backend_version": "12.9.1",
|
|
655
|
+
"backend_variant": "",
|
|
656
|
+
"service": "sglang",
|
|
657
|
+
"service_version": "0.5.7",
|
|
658
|
+
"platform": "linux/amd64",
|
|
659
|
+
"docker_image": "gpustack/runner:cuda12.9-sglang0.5.7",
|
|
660
|
+
"deprecated": false
|
|
661
|
+
},
|
|
662
|
+
{
|
|
663
|
+
"backend": "cuda",
|
|
664
|
+
"backend_version": "12.9",
|
|
665
|
+
"original_backend_version": "12.9.1",
|
|
666
|
+
"backend_variant": "",
|
|
667
|
+
"service": "sglang",
|
|
668
|
+
"service_version": "0.5.7",
|
|
669
|
+
"platform": "linux/arm64",
|
|
670
|
+
"docker_image": "gpustack/runner:cuda12.9-sglang0.5.7",
|
|
671
|
+
"deprecated": false
|
|
672
|
+
},
|
|
629
673
|
{
|
|
630
674
|
"backend": "cuda",
|
|
631
675
|
"backend_version": "12.9",
|
|
@@ -714,6 +758,28 @@
|
|
|
714
758
|
"docker_image": "gpustack/runner:cuda12.9-vllm0.11.2",
|
|
715
759
|
"deprecated": false
|
|
716
760
|
},
|
|
761
|
+
{
|
|
762
|
+
"backend": "cuda",
|
|
763
|
+
"backend_version": "12.8",
|
|
764
|
+
"original_backend_version": "12.8.1",
|
|
765
|
+
"backend_variant": "",
|
|
766
|
+
"service": "sglang",
|
|
767
|
+
"service_version": "0.5.7",
|
|
768
|
+
"platform": "linux/amd64",
|
|
769
|
+
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.7",
|
|
770
|
+
"deprecated": false
|
|
771
|
+
},
|
|
772
|
+
{
|
|
773
|
+
"backend": "cuda",
|
|
774
|
+
"backend_version": "12.8",
|
|
775
|
+
"original_backend_version": "12.8.1",
|
|
776
|
+
"backend_variant": "",
|
|
777
|
+
"service": "sglang",
|
|
778
|
+
"service_version": "0.5.7",
|
|
779
|
+
"platform": "linux/arm64",
|
|
780
|
+
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.7",
|
|
781
|
+
"deprecated": false
|
|
782
|
+
},
|
|
717
783
|
{
|
|
718
784
|
"backend": "cuda",
|
|
719
785
|
"backend_version": "12.8",
|
|
@@ -1341,6 +1407,17 @@
|
|
|
1341
1407
|
"docker_image": "gpustack/runner:maca3.0-vllm0.9.1",
|
|
1342
1408
|
"deprecated": false
|
|
1343
1409
|
},
|
|
1410
|
+
{
|
|
1411
|
+
"backend": "rocm",
|
|
1412
|
+
"backend_version": "7.0",
|
|
1413
|
+
"original_backend_version": "7.0.2",
|
|
1414
|
+
"backend_variant": "",
|
|
1415
|
+
"service": "sglang",
|
|
1416
|
+
"service_version": "0.5.7",
|
|
1417
|
+
"platform": "linux/amd64",
|
|
1418
|
+
"docker_image": "gpustack/runner:rocm7.0-sglang0.5.7",
|
|
1419
|
+
"deprecated": false
|
|
1420
|
+
},
|
|
1344
1421
|
{
|
|
1345
1422
|
"backend": "rocm",
|
|
1346
1423
|
"backend_version": "7.0",
|
|
@@ -1396,6 +1473,17 @@
|
|
|
1396
1473
|
"docker_image": "gpustack/runner:rocm7.0-vllm0.11.0",
|
|
1397
1474
|
"deprecated": true
|
|
1398
1475
|
},
|
|
1476
|
+
{
|
|
1477
|
+
"backend": "rocm",
|
|
1478
|
+
"backend_version": "6.4",
|
|
1479
|
+
"original_backend_version": "6.4.4",
|
|
1480
|
+
"backend_variant": "",
|
|
1481
|
+
"service": "sglang",
|
|
1482
|
+
"service_version": "0.5.7",
|
|
1483
|
+
"platform": "linux/amd64",
|
|
1484
|
+
"docker_image": "gpustack/runner:rocm6.4-sglang0.5.7",
|
|
1485
|
+
"deprecated": false
|
|
1486
|
+
},
|
|
1399
1487
|
{
|
|
1400
1488
|
"backend": "rocm",
|
|
1401
1489
|
"backend_version": "6.4",
|
|
@@ -58,9 +58,9 @@ ARG VLLM_ASCEND_VERSION
|
|
|
58
58
|
ARG VLLM_TORCH_VERSION=2.8.0
|
|
59
59
|
ARG VLLM_MOONCAKE_VERSION=0.3.7.post2
|
|
60
60
|
ARG SGLANG_BASE_IMAGE=gpustack/runner:cann${CANN_VERSION}-${CANN_ARCHS}-python${PYTHON_VERSION}
|
|
61
|
-
ARG SGLANG_VERSION=0.5.
|
|
61
|
+
ARG SGLANG_VERSION=0.5.7
|
|
62
62
|
ARG SGLANG_TORCH_VERSION=2.8.0
|
|
63
|
-
ARG SGLANG_KERNEL_VERSION=
|
|
63
|
+
ARG SGLANG_KERNEL_VERSION=2025.12.31
|
|
64
64
|
ARG SGLANG_VLLM_VERSION=0.9.2
|
|
65
65
|
|
|
66
66
|
#
|
|
@@ -1023,7 +1023,7 @@ EOT
|
|
|
1023
1023
|
-r /tmp/requirements.txt
|
|
1024
1024
|
fi
|
|
1025
1025
|
uv pip install --verbose \
|
|
1026
|
-
https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/torch_npu/torch_npu-2.8.0.post2.
|
|
1026
|
+
https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/torch_npu/torch_npu-2.8.0.post2.dev20251224-cp311-cp311-manylinux_2_28_$(uname -m).whl
|
|
1027
1027
|
uv pip install \
|
|
1028
1028
|
numpy==1.26.4 scipy==1.13.1
|
|
1029
1029
|
|
|
@@ -115,9 +115,9 @@ ARG VLLM_FLASHATTENTION_VERSION=2.8.3
|
|
|
115
115
|
ARG VLLM_LMCACHE_VERSION=0.3.11
|
|
116
116
|
ARG VLLM_MOONCAKE_VERSION=0.3.7.post2
|
|
117
117
|
ARG SGLANG_BASE_IMAGE=vllm
|
|
118
|
-
ARG SGLANG_VERSION=0.5.
|
|
118
|
+
ARG SGLANG_VERSION=0.5.7
|
|
119
119
|
ARG SGLANG_BUILD_BASE_IMAGE=vllm-build
|
|
120
|
-
ARG SGLANG_KERNEL_VERSION=0.3.
|
|
120
|
+
ARG SGLANG_KERNEL_VERSION=0.3.20
|
|
121
121
|
|
|
122
122
|
#
|
|
123
123
|
# Stage Bake Runtime
|
|
@@ -1264,11 +1264,6 @@ RUN --mount=type=bind,from=vllm-build-vllm,source=/,target=/vllm,rw <<EOF
|
|
|
1264
1264
|
uv pip install \
|
|
1265
1265
|
/vllm/workspace/*.whl
|
|
1266
1266
|
|
|
1267
|
-
# Uninstall
|
|
1268
|
-
## Remove Run:AI Model Streamer to avoid peak memory usage in model loading.
|
|
1269
|
-
uv pip uninstall \
|
|
1270
|
-
runai-model-streamer || true
|
|
1271
|
-
|
|
1272
1267
|
# Cleanup
|
|
1273
1268
|
rm -rf /var/tmp/* \
|
|
1274
1269
|
&& rm -rf /tmp/*
|
|
@@ -96,8 +96,8 @@ rules:
|
|
|
96
96
|
- "sglang"
|
|
97
97
|
args:
|
|
98
98
|
- "CUDA_VERSION=12.8.1"
|
|
99
|
-
- "SGLANG_BASE_IMAGE=gpustack/runner:cuda12.8-vllm0.
|
|
100
|
-
- "SGLANG_BUILD_BASE_IMAGE=gpustack/runner:cuda12.8-vllm0.
|
|
99
|
+
- "SGLANG_BASE_IMAGE=gpustack/runner:cuda12.8-vllm0.13.0"
|
|
100
|
+
- "SGLANG_BUILD_BASE_IMAGE=gpustack/runner:cuda12.8-vllm0.13.0"
|
|
101
101
|
## NVIDIA CUDA 12.9.1, using PyTorch +cu129.
|
|
102
102
|
##
|
|
103
103
|
- backend: "cuda"
|
|
@@ -106,8 +106,8 @@ rules:
|
|
|
106
106
|
- "sglang"
|
|
107
107
|
args:
|
|
108
108
|
- "CUDA_VERSION=12.9.1"
|
|
109
|
-
- "SGLANG_BASE_IMAGE=gpustack/runner:cuda12.9-vllm0.
|
|
110
|
-
- "SGLANG_BUILD_BASE_IMAGE=gpustack/runner:cuda12.9-vllm0.
|
|
109
|
+
- "SGLANG_BASE_IMAGE=gpustack/runner:cuda12.9-vllm0.13.0"
|
|
110
|
+
- "SGLANG_BUILD_BASE_IMAGE=gpustack/runner:cuda12.9-vllm0.13.0"
|
|
111
111
|
|
|
112
112
|
#
|
|
113
113
|
# Hygon DTK
|
|
@@ -152,8 +152,8 @@ rules:
|
|
|
152
152
|
args:
|
|
153
153
|
- "ROCM_VERSION=7.0.2"
|
|
154
154
|
- "VLLM_TORCH_SOURCE=radeon"
|
|
155
|
-
- "SGLANG_BASE_IMAGE=gpustack/runner:rocm7.0-vllm0.
|
|
156
|
-
- "SGLANG_BUILD_BASE_IMAGE=gpustack/runner:rocm7.0-vllm0.
|
|
155
|
+
- "SGLANG_BASE_IMAGE=gpustack/runner:rocm7.0-vllm0.13.0"
|
|
156
|
+
- "SGLANG_BUILD_BASE_IMAGE=gpustack/runner:rocm7.0-vllm0.13.0"
|
|
157
157
|
##
|
|
158
158
|
- backend: "rocm"
|
|
159
159
|
services:
|
|
@@ -164,5 +164,5 @@ rules:
|
|
|
164
164
|
args:
|
|
165
165
|
- "ROCM_VERSION=6.4.4"
|
|
166
166
|
- "VLLM_AITER_VERSION=0.1.7.post2"
|
|
167
|
-
- "SGLANG_BASE_IMAGE=gpustack/runner:rocm6.4-vllm0.
|
|
168
|
-
- "SGLANG_BUILD_BASE_IMAGE=gpustack/runner:rocm6.4-vllm0.
|
|
167
|
+
- "SGLANG_BASE_IMAGE=gpustack/runner:rocm6.4-vllm0.13.0"
|
|
168
|
+
- "SGLANG_BUILD_BASE_IMAGE=gpustack/runner:rocm6.4-vllm0.13.0"
|
|
@@ -83,7 +83,7 @@ ARG VLLM_AITER_VERSION=0.1.7.post5
|
|
|
83
83
|
ARG VLLM_LMCACHE_VERSION=0.3.11
|
|
84
84
|
ARG VLLM_MOONCAKE_VERSION=0.3.7.post2
|
|
85
85
|
ARG SGLANG_BASE_IMAGE=vllm
|
|
86
|
-
ARG SGLANG_VERSION=0.5.
|
|
86
|
+
ARG SGLANG_VERSION=0.5.7
|
|
87
87
|
ARG SGLANG_BUILD_BASE_IMAGE=vllm-build
|
|
88
88
|
|
|
89
89
|
# Stage Bake Runtime
|
|
@@ -1349,12 +1349,22 @@ RUN --mount=type=bind,from=sglang-build-sglang,source=/,target=/sglang,rw \
|
|
|
1349
1349
|
|
|
1350
1350
|
# Install
|
|
1351
1351
|
uv pip install \
|
|
1352
|
-
"$(ls /sglang/workspace/sglang-*.whl)[
|
|
1352
|
+
"$(ls /sglang/workspace/sglang-*.whl)[srt_hip]"
|
|
1353
|
+
if [[ "${TARGETARCH}" == "amd64" ]]; then
|
|
1354
|
+
# Install SGLang Diffusion Extension
|
|
1355
|
+
uv pip install \
|
|
1356
|
+
"$(ls /sglang/workspace/sglang-*.whl)[diffusion]"
|
|
1357
|
+
fi
|
|
1353
1358
|
uv pip install \
|
|
1354
1359
|
"$(ls /sglang/workspace/sgl_kernel-*.whl)"
|
|
1355
1360
|
uv pip install --force-reinstall \
|
|
1356
1361
|
/sglangrouter/workspace/*.whl
|
|
1357
1362
|
|
|
1363
|
+
# Uninstall
|
|
1364
|
+
## Remove Run:AI Model Streamer to avoid peak memory usage in model loading.
|
|
1365
|
+
uv pip uninstall \
|
|
1366
|
+
runai-model-streamer || true
|
|
1367
|
+
|
|
1358
1368
|
# Cleanup
|
|
1359
1369
|
rm -rf /var/tmp/* \
|
|
1360
1370
|
&& rm -rf /tmp/*
|
|
@@ -27,6 +27,17 @@
|
|
|
27
27
|
"docker_image": "gpustack/runner:cann8.3-a3-mindie2.2.rc1",
|
|
28
28
|
"deprecated": false
|
|
29
29
|
},
|
|
30
|
+
{
|
|
31
|
+
"backend": "cann",
|
|
32
|
+
"backend_version": "8.3",
|
|
33
|
+
"original_backend_version": "8.3.rc2",
|
|
34
|
+
"backend_variant": "a3",
|
|
35
|
+
"service": "sglang",
|
|
36
|
+
"service_version": "0.5.7",
|
|
37
|
+
"platform": "linux/arm64",
|
|
38
|
+
"docker_image": "gpustack/runner:cann8.3-a3-sglang0.5.7",
|
|
39
|
+
"deprecated": false
|
|
40
|
+
},
|
|
30
41
|
{
|
|
31
42
|
"backend": "cann",
|
|
32
43
|
"backend_version": "8.3",
|
|
@@ -236,6 +247,17 @@
|
|
|
236
247
|
"docker_image": "gpustack/runner:cann8.3-910b-mindie2.2.rc1",
|
|
237
248
|
"deprecated": false
|
|
238
249
|
},
|
|
250
|
+
{
|
|
251
|
+
"backend": "cann",
|
|
252
|
+
"backend_version": "8.3",
|
|
253
|
+
"original_backend_version": "8.3.rc2",
|
|
254
|
+
"backend_variant": "910b",
|
|
255
|
+
"service": "sglang",
|
|
256
|
+
"service_version": "0.5.7",
|
|
257
|
+
"platform": "linux/arm64",
|
|
258
|
+
"docker_image": "gpustack/runner:cann8.3-910b-sglang0.5.7",
|
|
259
|
+
"deprecated": false
|
|
260
|
+
},
|
|
239
261
|
{
|
|
240
262
|
"backend": "cann",
|
|
241
263
|
"backend_version": "8.3",
|
|
@@ -648,6 +670,28 @@
|
|
|
648
670
|
"backend": "cuda"
|
|
649
671
|
},
|
|
650
672
|
[
|
|
673
|
+
{
|
|
674
|
+
"backend": "cuda",
|
|
675
|
+
"backend_version": "12.9",
|
|
676
|
+
"original_backend_version": "12.9.1",
|
|
677
|
+
"backend_variant": "",
|
|
678
|
+
"service": "sglang",
|
|
679
|
+
"service_version": "0.5.7",
|
|
680
|
+
"platform": "linux/amd64",
|
|
681
|
+
"docker_image": "gpustack/runner:cuda12.9-sglang0.5.7",
|
|
682
|
+
"deprecated": false
|
|
683
|
+
},
|
|
684
|
+
{
|
|
685
|
+
"backend": "cuda",
|
|
686
|
+
"backend_version": "12.9",
|
|
687
|
+
"original_backend_version": "12.9.1",
|
|
688
|
+
"backend_variant": "",
|
|
689
|
+
"service": "sglang",
|
|
690
|
+
"service_version": "0.5.7",
|
|
691
|
+
"platform": "linux/arm64",
|
|
692
|
+
"docker_image": "gpustack/runner:cuda12.9-sglang0.5.7",
|
|
693
|
+
"deprecated": false
|
|
694
|
+
},
|
|
651
695
|
{
|
|
652
696
|
"backend": "cuda",
|
|
653
697
|
"backend_version": "12.9",
|
|
@@ -736,6 +780,28 @@
|
|
|
736
780
|
"docker_image": "gpustack/runner:cuda12.9-vllm0.11.2",
|
|
737
781
|
"deprecated": false
|
|
738
782
|
},
|
|
783
|
+
{
|
|
784
|
+
"backend": "cuda",
|
|
785
|
+
"backend_version": "12.8",
|
|
786
|
+
"original_backend_version": "12.8.1",
|
|
787
|
+
"backend_variant": "",
|
|
788
|
+
"service": "sglang",
|
|
789
|
+
"service_version": "0.5.7",
|
|
790
|
+
"platform": "linux/amd64",
|
|
791
|
+
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.7",
|
|
792
|
+
"deprecated": false
|
|
793
|
+
},
|
|
794
|
+
{
|
|
795
|
+
"backend": "cuda",
|
|
796
|
+
"backend_version": "12.8",
|
|
797
|
+
"original_backend_version": "12.8.1",
|
|
798
|
+
"backend_variant": "",
|
|
799
|
+
"service": "sglang",
|
|
800
|
+
"service_version": "0.5.7",
|
|
801
|
+
"platform": "linux/arm64",
|
|
802
|
+
"docker_image": "gpustack/runner:cuda12.8-sglang0.5.7",
|
|
803
|
+
"deprecated": false
|
|
804
|
+
},
|
|
739
805
|
{
|
|
740
806
|
"backend": "cuda",
|
|
741
807
|
"backend_version": "12.8",
|
|
@@ -1387,6 +1453,17 @@
|
|
|
1387
1453
|
"backend": "rocm"
|
|
1388
1454
|
},
|
|
1389
1455
|
[
|
|
1456
|
+
{
|
|
1457
|
+
"backend": "rocm",
|
|
1458
|
+
"backend_version": "7.0",
|
|
1459
|
+
"original_backend_version": "7.0.2",
|
|
1460
|
+
"backend_variant": "",
|
|
1461
|
+
"service": "sglang",
|
|
1462
|
+
"service_version": "0.5.7",
|
|
1463
|
+
"platform": "linux/amd64",
|
|
1464
|
+
"docker_image": "gpustack/runner:rocm7.0-sglang0.5.7",
|
|
1465
|
+
"deprecated": false
|
|
1466
|
+
},
|
|
1390
1467
|
{
|
|
1391
1468
|
"backend": "rocm",
|
|
1392
1469
|
"backend_version": "7.0",
|
|
@@ -1442,6 +1519,17 @@
|
|
|
1442
1519
|
"docker_image": "gpustack/runner:rocm7.0-vllm0.11.0",
|
|
1443
1520
|
"deprecated": true
|
|
1444
1521
|
},
|
|
1522
|
+
{
|
|
1523
|
+
"backend": "rocm",
|
|
1524
|
+
"backend_version": "6.4",
|
|
1525
|
+
"original_backend_version": "6.4.4",
|
|
1526
|
+
"backend_variant": "",
|
|
1527
|
+
"service": "sglang",
|
|
1528
|
+
"service_version": "0.5.7",
|
|
1529
|
+
"platform": "linux/amd64",
|
|
1530
|
+
"docker_image": "gpustack/runner:rocm6.4-sglang0.5.7",
|
|
1531
|
+
"deprecated": false
|
|
1532
|
+
},
|
|
1445
1533
|
{
|
|
1446
1534
|
"backend": "rocm",
|
|
1447
1535
|
"backend_version": "6.4",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
git_commit = "b4fa7a6"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/docs/modules/gpustack_runner.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/cmds/__init__.py
RENAMED
|
File without changes
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/gpustack_runner/cmds/__types__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/pack/.post_operation/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tests/gpustack_runner/test_runner.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_current_date_time.sh
RENAMED
|
File without changes
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_get_temperature.sh
RENAMED
|
File without changes
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_get_weather.sh
RENAMED
|
File without changes
|
{gpustack_runner-0.1.23.post1 → gpustack_runner-0.1.23.post2}/tools/chat_tool_square_of_number.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|